diff options
Diffstat (limited to 'fs')
64 files changed, 1624 insertions, 1497 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c95295c65045..e83aa5ebe861 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
626 | return NULL; | 626 | return NULL; |
627 | 627 | ||
628 | error: | 628 | error: |
629 | if (fid) | 629 | p9_client_clunk(fid); |
630 | p9_client_clunk(fid); | ||
631 | 630 | ||
632 | return ERR_PTR(result); | 631 | return ERR_PTR(result); |
633 | } | 632 | } |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 87ee5ccee348..ed8feb052df9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
125 | inode->i_ino); | 125 | inode->i_ino); |
126 | if (err) { | 126 | if (err) { |
127 | inode_dec_link_count(inode); | 127 | inode_dec_link_count(inode); |
128 | iput(inode); | ||
129 | mutex_unlock(&info->bfs_lock); | 128 | mutex_unlock(&info->bfs_lock); |
129 | iput(inode); | ||
130 | return err; | 130 | return err; |
131 | } | 131 | } |
132 | mutex_unlock(&info->bfs_lock); | 132 | mutex_unlock(&info->bfs_lock); |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c3e174b35fe6..19caf7c962ac 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) | |||
107 | BUG_ON(bip == NULL); | 107 | BUG_ON(bip == NULL); |
108 | 108 | ||
109 | /* A cloned bio doesn't own the integrity metadata */ | 109 | /* A cloned bio doesn't own the integrity metadata */ |
110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | 110 | if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) |
111 | && bip->bip_buf != NULL) | ||
111 | kfree(bip->bip_buf); | 112 | kfree(bip->bip_buf); |
112 | 113 | ||
113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | 114 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); |
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, | |||
150 | } | 151 | } |
151 | EXPORT_SYMBOL(bio_integrity_add_page); | 152 | EXPORT_SYMBOL(bio_integrity_add_page); |
152 | 153 | ||
154 | static int bdev_integrity_enabled(struct block_device *bdev, int rw) | ||
155 | { | ||
156 | struct blk_integrity *bi = bdev_get_integrity(bdev); | ||
157 | |||
158 | if (bi == NULL) | ||
159 | return 0; | ||
160 | |||
161 | if (rw == READ && bi->verify_fn != NULL && | ||
162 | (bi->flags & INTEGRITY_FLAG_READ)) | ||
163 | return 1; | ||
164 | |||
165 | if (rw == WRITE && bi->generate_fn != NULL && | ||
166 | (bi->flags & INTEGRITY_FLAG_WRITE)) | ||
167 | return 1; | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
153 | /** | 172 | /** |
154 | * bio_integrity_enabled - Check whether integrity can be passed | 173 | * bio_integrity_enabled - Check whether integrity can be passed |
155 | * @bio: bio to check | 174 | * @bio: bio to check |
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio) | |||
313 | } | 332 | } |
314 | } | 333 | } |
315 | 334 | ||
335 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | ||
336 | { | ||
337 | if (bi) | ||
338 | return bi->tuple_size; | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
316 | /** | 343 | /** |
317 | * bio_integrity_prep - Prepare bio for integrity I/O | 344 | * bio_integrity_prep - Prepare bio for integrity I/O |
318 | * @bio: bio to prepare | 345 | * @bio: bio to prepare |
@@ -30,7 +30,7 @@ | |||
30 | 30 | ||
31 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
32 | 32 | ||
33 | mempool_t *bio_split_pool __read_mostly; | 33 | static mempool_t *bio_split_pool __read_mostly; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct | |||
60 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * see comment near bvec_array define! | 63 | * If 'bs' is given, lookup the pool and do the mempool alloc. |
64 | * If not, this is a bio_kmalloc() allocation and just do a | ||
65 | * kzalloc() for the exact number of vecs right away. | ||
64 | */ | 66 | */ |
65 | switch (nr) { | 67 | if (bs) { |
66 | case 1 : *idx = 0; break; | 68 | /* |
67 | case 2 ... 4: *idx = 1; break; | 69 | * see comment near bvec_array define! |
68 | case 5 ... 16: *idx = 2; break; | 70 | */ |
69 | case 17 ... 64: *idx = 3; break; | 71 | switch (nr) { |
70 | case 65 ... 128: *idx = 4; break; | 72 | case 1: |
71 | case 129 ... BIO_MAX_PAGES: *idx = 5; break; | 73 | *idx = 0; |
74 | break; | ||
75 | case 2 ... 4: | ||
76 | *idx = 1; | ||
77 | break; | ||
78 | case 5 ... 16: | ||
79 | *idx = 2; | ||
80 | break; | ||
81 | case 17 ... 64: | ||
82 | *idx = 3; | ||
83 | break; | ||
84 | case 65 ... 128: | ||
85 | *idx = 4; | ||
86 | break; | ||
87 | case 129 ... BIO_MAX_PAGES: | ||
88 | *idx = 5; | ||
89 | break; | ||
72 | default: | 90 | default: |
73 | return NULL; | 91 | return NULL; |
74 | } | 92 | } |
75 | /* | ||
76 | * idx now points to the pool we want to allocate from | ||
77 | */ | ||
78 | 93 | ||
79 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | 94 | /* |
80 | if (bvl) | 95 | * idx now points to the pool we want to allocate from |
81 | memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | 96 | */ |
97 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | ||
98 | if (bvl) | ||
99 | memset(bvl, 0, | ||
100 | bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | ||
101 | } else | ||
102 | bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
82 | 103 | ||
83 | return bvl; | 104 | return bvl; |
84 | } | 105 | } |
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio) | |||
107 | bio_free(bio, fs_bio_set); | 128 | bio_free(bio, fs_bio_set); |
108 | } | 129 | } |
109 | 130 | ||
131 | static void bio_kmalloc_destructor(struct bio *bio) | ||
132 | { | ||
133 | kfree(bio->bi_io_vec); | ||
134 | kfree(bio); | ||
135 | } | ||
136 | |||
110 | void bio_init(struct bio *bio) | 137 | void bio_init(struct bio *bio) |
111 | { | 138 | { |
112 | memset(bio, 0, sizeof(*bio)); | 139 | memset(bio, 0, sizeof(*bio)); |
113 | bio->bi_flags = 1 << BIO_UPTODATE; | 140 | bio->bi_flags = 1 << BIO_UPTODATE; |
141 | bio->bi_comp_cpu = -1; | ||
114 | atomic_set(&bio->bi_cnt, 1); | 142 | atomic_set(&bio->bi_cnt, 1); |
115 | } | 143 | } |
116 | 144 | ||
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio) | |||
118 | * bio_alloc_bioset - allocate a bio for I/O | 146 | * bio_alloc_bioset - allocate a bio for I/O |
119 | * @gfp_mask: the GFP_ mask given to the slab allocator | 147 | * @gfp_mask: the GFP_ mask given to the slab allocator |
120 | * @nr_iovecs: number of iovecs to pre-allocate | 148 | * @nr_iovecs: number of iovecs to pre-allocate |
121 | * @bs: the bio_set to allocate from | 149 | * @bs: the bio_set to allocate from. If %NULL, just use kmalloc |
122 | * | 150 | * |
123 | * Description: | 151 | * Description: |
124 | * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. | 152 | * bio_alloc_bioset will first try its own mempool to satisfy the allocation. |
125 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 153 | * If %__GFP_WAIT is set then we will block on the internal pool waiting |
126 | * for a &struct bio to become free. | 154 | * for a &struct bio to become free. If a %NULL @bs is passed in, we will |
155 | * fall back to just using @kmalloc to allocate the required memory. | ||
127 | * | 156 | * |
128 | * allocate bio and iovecs from the memory pools specified by the | 157 | * allocate bio and iovecs from the memory pools specified by the |
129 | * bio_set structure. | 158 | * bio_set structure, or @kmalloc if none given. |
130 | **/ | 159 | **/ |
131 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 160 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
132 | { | 161 | { |
133 | struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); | 162 | struct bio *bio; |
163 | |||
164 | if (bs) | ||
165 | bio = mempool_alloc(bs->bio_pool, gfp_mask); | ||
166 | else | ||
167 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
134 | 168 | ||
135 | if (likely(bio)) { | 169 | if (likely(bio)) { |
136 | struct bio_vec *bvl = NULL; | 170 | struct bio_vec *bvl = NULL; |
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
141 | 175 | ||
142 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 176 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
143 | if (unlikely(!bvl)) { | 177 | if (unlikely(!bvl)) { |
144 | mempool_free(bio, bs->bio_pool); | 178 | if (bs) |
179 | mempool_free(bio, bs->bio_pool); | ||
180 | else | ||
181 | kfree(bio); | ||
145 | bio = NULL; | 182 | bio = NULL; |
146 | goto out; | 183 | goto out; |
147 | } | 184 | } |
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | |||
164 | return bio; | 201 | return bio; |
165 | } | 202 | } |
166 | 203 | ||
204 | /* | ||
205 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | ||
206 | * it CAN fail, but while bio_alloc() can only be used for allocations | ||
207 | * that have a short (finite) life span, bio_kmalloc() should be used | ||
208 | * for more permanent bio allocations (like allocating some bio's for | ||
209 | * initalization or setup purposes). | ||
210 | */ | ||
211 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | ||
212 | { | ||
213 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
214 | |||
215 | if (bio) | ||
216 | bio->bi_destructor = bio_kmalloc_destructor; | ||
217 | |||
218 | return bio; | ||
219 | } | ||
220 | |||
167 | void zero_fill_bio(struct bio *bio) | 221 | void zero_fill_bio(struct bio *bio) |
168 | { | 222 | { |
169 | unsigned long flags; | 223 | unsigned long flags; |
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) | |||
208 | return bio->bi_phys_segments; | 262 | return bio->bi_phys_segments; |
209 | } | 263 | } |
210 | 264 | ||
211 | inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | ||
212 | { | ||
213 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | ||
214 | blk_recount_segments(q, bio); | ||
215 | |||
216 | return bio->bi_hw_segments; | ||
217 | } | ||
218 | |||
219 | /** | 265 | /** |
220 | * __bio_clone - clone a bio | 266 | * __bio_clone - clone a bio |
221 | * @bio: destination bio | 267 | * @bio: destination bio |
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
350 | */ | 396 | */ |
351 | 397 | ||
352 | while (bio->bi_phys_segments >= q->max_phys_segments | 398 | while (bio->bi_phys_segments >= q->max_phys_segments |
353 | || bio->bi_hw_segments >= q->max_hw_segments | 399 | || bio->bi_phys_segments >= q->max_hw_segments) { |
354 | || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | ||
355 | 400 | ||
356 | if (retried_segments) | 401 | if (retried_segments) |
357 | return 0; | 402 | return 0; |
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
395 | } | 440 | } |
396 | 441 | ||
397 | /* If we may be able to merge these biovecs, force a recount */ | 442 | /* If we may be able to merge these biovecs, force a recount */ |
398 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | 443 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
399 | BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | ||
400 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 444 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
401 | 445 | ||
402 | bio->bi_vcnt++; | 446 | bio->bi_vcnt++; |
403 | bio->bi_phys_segments++; | 447 | bio->bi_phys_segments++; |
404 | bio->bi_hw_segments++; | ||
405 | done: | 448 | done: |
406 | bio->bi_size += len; | 449 | bio->bi_size += len; |
407 | return len; | 450 | return len; |
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
449 | 492 | ||
450 | struct bio_map_data { | 493 | struct bio_map_data { |
451 | struct bio_vec *iovecs; | 494 | struct bio_vec *iovecs; |
452 | int nr_sgvecs; | ||
453 | struct sg_iovec *sgvecs; | 495 | struct sg_iovec *sgvecs; |
496 | int nr_sgvecs; | ||
497 | int is_our_pages; | ||
454 | }; | 498 | }; |
455 | 499 | ||
456 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | 500 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, |
457 | struct sg_iovec *iov, int iov_count) | 501 | struct sg_iovec *iov, int iov_count, |
502 | int is_our_pages) | ||
458 | { | 503 | { |
459 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); | 504 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); |
460 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | 505 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); |
461 | bmd->nr_sgvecs = iov_count; | 506 | bmd->nr_sgvecs = iov_count; |
507 | bmd->is_our_pages = is_our_pages; | ||
462 | bio->bi_private = bmd; | 508 | bio->bi_private = bmd; |
463 | } | 509 | } |
464 | 510 | ||
@@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, | |||
493 | } | 539 | } |
494 | 540 | ||
495 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | 541 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
496 | struct sg_iovec *iov, int iov_count, int uncopy) | 542 | struct sg_iovec *iov, int iov_count, int uncopy, |
543 | int do_free_page) | ||
497 | { | 544 | { |
498 | int ret = 0, i; | 545 | int ret = 0, i; |
499 | struct bio_vec *bvec; | 546 | struct bio_vec *bvec; |
@@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
536 | } | 583 | } |
537 | } | 584 | } |
538 | 585 | ||
539 | if (uncopy) | 586 | if (do_free_page) |
540 | __free_page(bvec->bv_page); | 587 | __free_page(bvec->bv_page); |
541 | } | 588 | } |
542 | 589 | ||
@@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
553 | int bio_uncopy_user(struct bio *bio) | 600 | int bio_uncopy_user(struct bio *bio) |
554 | { | 601 | { |
555 | struct bio_map_data *bmd = bio->bi_private; | 602 | struct bio_map_data *bmd = bio->bi_private; |
556 | int ret; | 603 | int ret = 0; |
557 | |||
558 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); | ||
559 | 604 | ||
605 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) | ||
606 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, | ||
607 | bmd->nr_sgvecs, 1, bmd->is_our_pages); | ||
560 | bio_free_map_data(bmd); | 608 | bio_free_map_data(bmd); |
561 | bio_put(bio); | 609 | bio_put(bio); |
562 | return ret; | 610 | return ret; |
@@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio) | |||
565 | /** | 613 | /** |
566 | * bio_copy_user_iov - copy user data to bio | 614 | * bio_copy_user_iov - copy user data to bio |
567 | * @q: destination block queue | 615 | * @q: destination block queue |
616 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
568 | * @iov: the iovec. | 617 | * @iov: the iovec. |
569 | * @iov_count: number of elements in the iovec | 618 | * @iov_count: number of elements in the iovec |
570 | * @write_to_vm: bool indicating writing to pages or not | 619 | * @write_to_vm: bool indicating writing to pages or not |
620 | * @gfp_mask: memory allocation flags | ||
571 | * | 621 | * |
572 | * Prepares and returns a bio for indirect user io, bouncing data | 622 | * Prepares and returns a bio for indirect user io, bouncing data |
573 | * to/from kernel pages as necessary. Must be paired with | 623 | * to/from kernel pages as necessary. Must be paired with |
574 | * call bio_uncopy_user() on io completion. | 624 | * call bio_uncopy_user() on io completion. |
575 | */ | 625 | */ |
576 | struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | 626 | struct bio *bio_copy_user_iov(struct request_queue *q, |
577 | int iov_count, int write_to_vm) | 627 | struct rq_map_data *map_data, |
628 | struct sg_iovec *iov, int iov_count, | ||
629 | int write_to_vm, gfp_t gfp_mask) | ||
578 | { | 630 | { |
579 | struct bio_map_data *bmd; | 631 | struct bio_map_data *bmd; |
580 | struct bio_vec *bvec; | 632 | struct bio_vec *bvec; |
@@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
597 | len += iov[i].iov_len; | 649 | len += iov[i].iov_len; |
598 | } | 650 | } |
599 | 651 | ||
600 | bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); | 652 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
601 | if (!bmd) | 653 | if (!bmd) |
602 | return ERR_PTR(-ENOMEM); | 654 | return ERR_PTR(-ENOMEM); |
603 | 655 | ||
604 | ret = -ENOMEM; | 656 | ret = -ENOMEM; |
605 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 657 | bio = bio_alloc(gfp_mask, nr_pages); |
606 | if (!bio) | 658 | if (!bio) |
607 | goto out_bmd; | 659 | goto out_bmd; |
608 | 660 | ||
609 | bio->bi_rw |= (!write_to_vm << BIO_RW); | 661 | bio->bi_rw |= (!write_to_vm << BIO_RW); |
610 | 662 | ||
611 | ret = 0; | 663 | ret = 0; |
664 | i = 0; | ||
612 | while (len) { | 665 | while (len) { |
613 | unsigned int bytes = PAGE_SIZE; | 666 | unsigned int bytes; |
667 | |||
668 | if (map_data) | ||
669 | bytes = 1U << (PAGE_SHIFT + map_data->page_order); | ||
670 | else | ||
671 | bytes = PAGE_SIZE; | ||
614 | 672 | ||
615 | if (bytes > len) | 673 | if (bytes > len) |
616 | bytes = len; | 674 | bytes = len; |
617 | 675 | ||
618 | page = alloc_page(q->bounce_gfp | GFP_KERNEL); | 676 | if (map_data) { |
677 | if (i == map_data->nr_entries) { | ||
678 | ret = -ENOMEM; | ||
679 | break; | ||
680 | } | ||
681 | page = map_data->pages[i++]; | ||
682 | } else | ||
683 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
619 | if (!page) { | 684 | if (!page) { |
620 | ret = -ENOMEM; | 685 | ret = -ENOMEM; |
621 | break; | 686 | break; |
@@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
634 | * success | 699 | * success |
635 | */ | 700 | */ |
636 | if (!write_to_vm) { | 701 | if (!write_to_vm) { |
637 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); | 702 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); |
638 | if (ret) | 703 | if (ret) |
639 | goto cleanup; | 704 | goto cleanup; |
640 | } | 705 | } |
641 | 706 | ||
642 | bio_set_map_data(bmd, bio, iov, iov_count); | 707 | bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); |
643 | return bio; | 708 | return bio; |
644 | cleanup: | 709 | cleanup: |
645 | bio_for_each_segment(bvec, bio, i) | 710 | if (!map_data) |
646 | __free_page(bvec->bv_page); | 711 | bio_for_each_segment(bvec, bio, i) |
712 | __free_page(bvec->bv_page); | ||
647 | 713 | ||
648 | bio_put(bio); | 714 | bio_put(bio); |
649 | out_bmd: | 715 | out_bmd: |
@@ -654,29 +720,32 @@ out_bmd: | |||
654 | /** | 720 | /** |
655 | * bio_copy_user - copy user data to bio | 721 | * bio_copy_user - copy user data to bio |
656 | * @q: destination block queue | 722 | * @q: destination block queue |
723 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
657 | * @uaddr: start of user address | 724 | * @uaddr: start of user address |
658 | * @len: length in bytes | 725 | * @len: length in bytes |
659 | * @write_to_vm: bool indicating writing to pages or not | 726 | * @write_to_vm: bool indicating writing to pages or not |
727 | * @gfp_mask: memory allocation flags | ||
660 | * | 728 | * |
661 | * Prepares and returns a bio for indirect user io, bouncing data | 729 | * Prepares and returns a bio for indirect user io, bouncing data |
662 | * to/from kernel pages as necessary. Must be paired with | 730 | * to/from kernel pages as necessary. Must be paired with |
663 | * call bio_uncopy_user() on io completion. | 731 | * call bio_uncopy_user() on io completion. |
664 | */ | 732 | */ |
665 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | 733 | struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, |
666 | unsigned int len, int write_to_vm) | 734 | unsigned long uaddr, unsigned int len, |
735 | int write_to_vm, gfp_t gfp_mask) | ||
667 | { | 736 | { |
668 | struct sg_iovec iov; | 737 | struct sg_iovec iov; |
669 | 738 | ||
670 | iov.iov_base = (void __user *)uaddr; | 739 | iov.iov_base = (void __user *)uaddr; |
671 | iov.iov_len = len; | 740 | iov.iov_len = len; |
672 | 741 | ||
673 | return bio_copy_user_iov(q, &iov, 1, write_to_vm); | 742 | return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); |
674 | } | 743 | } |
675 | 744 | ||
676 | static struct bio *__bio_map_user_iov(struct request_queue *q, | 745 | static struct bio *__bio_map_user_iov(struct request_queue *q, |
677 | struct block_device *bdev, | 746 | struct block_device *bdev, |
678 | struct sg_iovec *iov, int iov_count, | 747 | struct sg_iovec *iov, int iov_count, |
679 | int write_to_vm) | 748 | int write_to_vm, gfp_t gfp_mask) |
680 | { | 749 | { |
681 | int i, j; | 750 | int i, j; |
682 | int nr_pages = 0; | 751 | int nr_pages = 0; |
@@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
702 | if (!nr_pages) | 771 | if (!nr_pages) |
703 | return ERR_PTR(-EINVAL); | 772 | return ERR_PTR(-EINVAL); |
704 | 773 | ||
705 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 774 | bio = bio_alloc(gfp_mask, nr_pages); |
706 | if (!bio) | 775 | if (!bio) |
707 | return ERR_PTR(-ENOMEM); | 776 | return ERR_PTR(-ENOMEM); |
708 | 777 | ||
709 | ret = -ENOMEM; | 778 | ret = -ENOMEM; |
710 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 779 | pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); |
711 | if (!pages) | 780 | if (!pages) |
712 | goto out; | 781 | goto out; |
713 | 782 | ||
@@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
786 | * @uaddr: start of user address | 855 | * @uaddr: start of user address |
787 | * @len: length in bytes | 856 | * @len: length in bytes |
788 | * @write_to_vm: bool indicating writing to pages or not | 857 | * @write_to_vm: bool indicating writing to pages or not |
858 | * @gfp_mask: memory allocation flags | ||
789 | * | 859 | * |
790 | * Map the user space address into a bio suitable for io to a block | 860 | * Map the user space address into a bio suitable for io to a block |
791 | * device. Returns an error pointer in case of error. | 861 | * device. Returns an error pointer in case of error. |
792 | */ | 862 | */ |
793 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | 863 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, |
794 | unsigned long uaddr, unsigned int len, int write_to_vm) | 864 | unsigned long uaddr, unsigned int len, int write_to_vm, |
865 | gfp_t gfp_mask) | ||
795 | { | 866 | { |
796 | struct sg_iovec iov; | 867 | struct sg_iovec iov; |
797 | 868 | ||
798 | iov.iov_base = (void __user *)uaddr; | 869 | iov.iov_base = (void __user *)uaddr; |
799 | iov.iov_len = len; | 870 | iov.iov_len = len; |
800 | 871 | ||
801 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | 872 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); |
802 | } | 873 | } |
803 | 874 | ||
804 | /** | 875 | /** |
@@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | |||
808 | * @iov: the iovec. | 879 | * @iov: the iovec. |
809 | * @iov_count: number of elements in the iovec | 880 | * @iov_count: number of elements in the iovec |
810 | * @write_to_vm: bool indicating writing to pages or not | 881 | * @write_to_vm: bool indicating writing to pages or not |
882 | * @gfp_mask: memory allocation flags | ||
811 | * | 883 | * |
812 | * Map the user space address into a bio suitable for io to a block | 884 | * Map the user space address into a bio suitable for io to a block |
813 | * device. Returns an error pointer in case of error. | 885 | * device. Returns an error pointer in case of error. |
814 | */ | 886 | */ |
815 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | 887 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, |
816 | struct sg_iovec *iov, int iov_count, | 888 | struct sg_iovec *iov, int iov_count, |
817 | int write_to_vm) | 889 | int write_to_vm, gfp_t gfp_mask) |
818 | { | 890 | { |
819 | struct bio *bio; | 891 | struct bio *bio; |
820 | 892 | ||
821 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); | 893 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, |
822 | 894 | gfp_mask); | |
823 | if (IS_ERR(bio)) | 895 | if (IS_ERR(bio)) |
824 | return bio; | 896 | return bio; |
825 | 897 | ||
@@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
976 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | 1048 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
977 | gfp_t gfp_mask, int reading) | 1049 | gfp_t gfp_mask, int reading) |
978 | { | 1050 | { |
979 | unsigned long kaddr = (unsigned long)data; | ||
980 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
981 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
982 | const int nr_pages = end - start; | ||
983 | struct bio *bio; | 1051 | struct bio *bio; |
984 | struct bio_vec *bvec; | 1052 | struct bio_vec *bvec; |
985 | struct bio_map_data *bmd; | 1053 | int i; |
986 | int i, ret; | ||
987 | struct sg_iovec iov; | ||
988 | |||
989 | iov.iov_base = data; | ||
990 | iov.iov_len = len; | ||
991 | |||
992 | bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); | ||
993 | if (!bmd) | ||
994 | return ERR_PTR(-ENOMEM); | ||
995 | |||
996 | ret = -ENOMEM; | ||
997 | bio = bio_alloc(gfp_mask, nr_pages); | ||
998 | if (!bio) | ||
999 | goto out_bmd; | ||
1000 | |||
1001 | while (len) { | ||
1002 | struct page *page; | ||
1003 | unsigned int bytes = PAGE_SIZE; | ||
1004 | |||
1005 | if (bytes > len) | ||
1006 | bytes = len; | ||
1007 | |||
1008 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
1009 | if (!page) { | ||
1010 | ret = -ENOMEM; | ||
1011 | goto cleanup; | ||
1012 | } | ||
1013 | |||
1014 | if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { | ||
1015 | ret = -EINVAL; | ||
1016 | goto cleanup; | ||
1017 | } | ||
1018 | 1054 | ||
1019 | len -= bytes; | 1055 | bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); |
1020 | } | 1056 | if (IS_ERR(bio)) |
1057 | return bio; | ||
1021 | 1058 | ||
1022 | if (!reading) { | 1059 | if (!reading) { |
1023 | void *p = data; | 1060 | void *p = data; |
@@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
1030 | } | 1067 | } |
1031 | } | 1068 | } |
1032 | 1069 | ||
1033 | bio->bi_private = bmd; | ||
1034 | bio->bi_end_io = bio_copy_kern_endio; | 1070 | bio->bi_end_io = bio_copy_kern_endio; |
1035 | 1071 | ||
1036 | bio_set_map_data(bmd, bio, &iov, 1); | ||
1037 | return bio; | 1072 | return bio; |
1038 | cleanup: | ||
1039 | bio_for_each_segment(bvec, bio, i) | ||
1040 | __free_page(bvec->bv_page); | ||
1041 | |||
1042 | bio_put(bio); | ||
1043 | out_bmd: | ||
1044 | bio_free_map_data(bmd); | ||
1045 | |||
1046 | return ERR_PTR(ret); | ||
1047 | } | 1073 | } |
1048 | 1074 | ||
1049 | /* | 1075 | /* |
@@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) | |||
1230 | * split a bio - only worry about a bio with a single page | 1256 | * split a bio - only worry about a bio with a single page |
1231 | * in it's iovec | 1257 | * in it's iovec |
1232 | */ | 1258 | */ |
1233 | struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | 1259 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) |
1234 | { | 1260 | { |
1235 | struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); | 1261 | struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); |
1236 | 1262 | ||
1237 | if (!bp) | 1263 | if (!bp) |
1238 | return bp; | 1264 | return bp; |
@@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1266 | bp->bio2.bi_end_io = bio_pair_end_2; | 1292 | bp->bio2.bi_end_io = bio_pair_end_2; |
1267 | 1293 | ||
1268 | bp->bio1.bi_private = bi; | 1294 | bp->bio1.bi_private = bi; |
1269 | bp->bio2.bi_private = pool; | 1295 | bp->bio2.bi_private = bio_split_pool; |
1270 | 1296 | ||
1271 | if (bio_integrity(bi)) | 1297 | if (bio_integrity(bi)) |
1272 | bio_integrity_split(bi, bp, first_sectors); | 1298 | bio_integrity_split(bi, bp, first_sectors); |
@@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1274 | return bp; | 1300 | return bp; |
1275 | } | 1301 | } |
1276 | 1302 | ||
1303 | /** | ||
1304 | * bio_sector_offset - Find hardware sector offset in bio | ||
1305 | * @bio: bio to inspect | ||
1306 | * @index: bio_vec index | ||
1307 | * @offset: offset in bv_page | ||
1308 | * | ||
1309 | * Return the number of hardware sectors between beginning of bio | ||
1310 | * and an end point indicated by a bio_vec index and an offset | ||
1311 | * within that vector's page. | ||
1312 | */ | ||
1313 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, | ||
1314 | unsigned int offset) | ||
1315 | { | ||
1316 | unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); | ||
1317 | struct bio_vec *bv; | ||
1318 | sector_t sectors; | ||
1319 | int i; | ||
1320 | |||
1321 | sectors = 0; | ||
1322 | |||
1323 | if (index >= bio->bi_idx) | ||
1324 | index = bio->bi_vcnt - 1; | ||
1325 | |||
1326 | __bio_for_each_segment(bv, bio, i, 0) { | ||
1327 | if (i == index) { | ||
1328 | if (offset > bv->bv_offset) | ||
1329 | sectors += (offset - bv->bv_offset) / sector_sz; | ||
1330 | break; | ||
1331 | } | ||
1332 | |||
1333 | sectors += bv->bv_len / sector_sz; | ||
1334 | } | ||
1335 | |||
1336 | return sectors; | ||
1337 | } | ||
1338 | EXPORT_SYMBOL(bio_sector_offset); | ||
1277 | 1339 | ||
1278 | /* | 1340 | /* |
1279 | * create memory pools for biovec's in a bio_set. | 1341 | * create memory pools for biovec's in a bio_set. |
@@ -1376,6 +1438,7 @@ static int __init init_bio(void) | |||
1376 | subsys_initcall(init_bio); | 1438 | subsys_initcall(init_bio); |
1377 | 1439 | ||
1378 | EXPORT_SYMBOL(bio_alloc); | 1440 | EXPORT_SYMBOL(bio_alloc); |
1441 | EXPORT_SYMBOL(bio_kmalloc); | ||
1379 | EXPORT_SYMBOL(bio_put); | 1442 | EXPORT_SYMBOL(bio_put); |
1380 | EXPORT_SYMBOL(bio_free); | 1443 | EXPORT_SYMBOL(bio_free); |
1381 | EXPORT_SYMBOL(bio_endio); | 1444 | EXPORT_SYMBOL(bio_endio); |
@@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init); | |||
1383 | EXPORT_SYMBOL(__bio_clone); | 1446 | EXPORT_SYMBOL(__bio_clone); |
1384 | EXPORT_SYMBOL(bio_clone); | 1447 | EXPORT_SYMBOL(bio_clone); |
1385 | EXPORT_SYMBOL(bio_phys_segments); | 1448 | EXPORT_SYMBOL(bio_phys_segments); |
1386 | EXPORT_SYMBOL(bio_hw_segments); | ||
1387 | EXPORT_SYMBOL(bio_add_page); | 1449 | EXPORT_SYMBOL(bio_add_page); |
1388 | EXPORT_SYMBOL(bio_add_pc_page); | 1450 | EXPORT_SYMBOL(bio_add_pc_page); |
1389 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1451 | EXPORT_SYMBOL(bio_get_nr_vecs); |
@@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); | |||
1393 | EXPORT_SYMBOL(bio_copy_kern); | 1455 | EXPORT_SYMBOL(bio_copy_kern); |
1394 | EXPORT_SYMBOL(bio_pair_release); | 1456 | EXPORT_SYMBOL(bio_pair_release); |
1395 | EXPORT_SYMBOL(bio_split); | 1457 | EXPORT_SYMBOL(bio_split); |
1396 | EXPORT_SYMBOL(bio_split_pool); | ||
1397 | EXPORT_SYMBOL(bio_copy_user); | 1458 | EXPORT_SYMBOL(bio_copy_user); |
1398 | EXPORT_SYMBOL(bio_uncopy_user); | 1459 | EXPORT_SYMBOL(bio_uncopy_user); |
1399 | EXPORT_SYMBOL(bioset_create); | 1460 | EXPORT_SYMBOL(bioset_create); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index aff54219e049..d84f0469a016 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release); | |||
540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | 540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 |
541 | */ | 541 | */ |
542 | 542 | ||
543 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | ||
544 | { | ||
545 | if (bdev->bd_contains != bdev) | ||
546 | return kobject_get(&bdev->bd_part->dev.kobj); | ||
547 | else | ||
548 | return kobject_get(&bdev->bd_disk->dev.kobj); | ||
549 | } | ||
550 | |||
551 | static struct kobject *bdev_get_holder(struct block_device *bdev) | ||
552 | { | ||
553 | if (bdev->bd_contains != bdev) | ||
554 | return kobject_get(bdev->bd_part->holder_dir); | ||
555 | else | ||
556 | return kobject_get(bdev->bd_disk->holder_dir); | ||
557 | } | ||
558 | |||
559 | static int add_symlink(struct kobject *from, struct kobject *to) | 543 | static int add_symlink(struct kobject *from, struct kobject *to) |
560 | { | 544 | { |
561 | if (!from || !to) | 545 | if (!from || !to) |
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev, | |||
604 | if (!bo->hdev) | 588 | if (!bo->hdev) |
605 | goto fail_put_sdir; | 589 | goto fail_put_sdir; |
606 | 590 | ||
607 | bo->sdev = bdev_get_kobj(bdev); | 591 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); |
608 | if (!bo->sdev) | 592 | if (!bo->sdev) |
609 | goto fail_put_hdev; | 593 | goto fail_put_hdev; |
610 | 594 | ||
611 | bo->hdir = bdev_get_holder(bdev); | 595 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); |
612 | if (!bo->hdir) | 596 | if (!bo->hdir) |
613 | goto fail_put_sdev; | 597 | goto fail_put_sdev; |
614 | 598 | ||
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) | |||
868 | 852 | ||
869 | EXPORT_SYMBOL(open_by_devnum); | 853 | EXPORT_SYMBOL(open_by_devnum); |
870 | 854 | ||
855 | /** | ||
856 | * flush_disk - invalidates all buffer-cache entries on a disk | ||
857 | * | ||
858 | * @bdev: struct block device to be flushed | ||
859 | * | ||
860 | * Invalidates all buffer-cache entries on a disk. It should be called | ||
861 | * when a disk has been changed -- either by a media change or online | ||
862 | * resize. | ||
863 | */ | ||
864 | static void flush_disk(struct block_device *bdev) | ||
865 | { | ||
866 | if (__invalidate_device(bdev)) { | ||
867 | char name[BDEVNAME_SIZE] = ""; | ||
868 | |||
869 | if (bdev->bd_disk) | ||
870 | disk_name(bdev->bd_disk, 0, name); | ||
871 | printk(KERN_WARNING "VFS: busy inodes on changed media or " | ||
872 | "resized disk %s\n", name); | ||
873 | } | ||
874 | |||
875 | if (!bdev->bd_disk) | ||
876 | return; | ||
877 | if (disk_partitionable(bdev->bd_disk)) | ||
878 | bdev->bd_invalidated = 1; | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * check_disk_size_change - checks for disk size change and adjusts bdev size. | ||
883 | * @disk: struct gendisk to check | ||
884 | * @bdev: struct bdev to adjust. | ||
885 | * | ||
886 | * This routine checks to see if the bdev size does not match the disk size | ||
887 | * and adjusts it if it differs. | ||
888 | */ | ||
889 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | ||
890 | { | ||
891 | loff_t disk_size, bdev_size; | ||
892 | |||
893 | disk_size = (loff_t)get_capacity(disk) << 9; | ||
894 | bdev_size = i_size_read(bdev->bd_inode); | ||
895 | if (disk_size != bdev_size) { | ||
896 | char name[BDEVNAME_SIZE]; | ||
897 | |||
898 | disk_name(disk, 0, name); | ||
899 | printk(KERN_INFO | ||
900 | "%s: detected capacity change from %lld to %lld\n", | ||
901 | name, bdev_size, disk_size); | ||
902 | i_size_write(bdev->bd_inode, disk_size); | ||
903 | flush_disk(bdev); | ||
904 | } | ||
905 | } | ||
906 | EXPORT_SYMBOL(check_disk_size_change); | ||
907 | |||
908 | /** | ||
909 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back | ||
910 | * @disk: struct gendisk to be revalidated | ||
911 | * | ||
912 | * This routine is a wrapper for lower-level driver's revalidate_disk | ||
913 | * call-backs. It is used to do common pre and post operations needed | ||
914 | * for all revalidate_disk operations. | ||
915 | */ | ||
916 | int revalidate_disk(struct gendisk *disk) | ||
917 | { | ||
918 | struct block_device *bdev; | ||
919 | int ret = 0; | ||
920 | |||
921 | if (disk->fops->revalidate_disk) | ||
922 | ret = disk->fops->revalidate_disk(disk); | ||
923 | |||
924 | bdev = bdget_disk(disk, 0); | ||
925 | if (!bdev) | ||
926 | return ret; | ||
927 | |||
928 | mutex_lock(&bdev->bd_mutex); | ||
929 | check_disk_size_change(disk, bdev); | ||
930 | mutex_unlock(&bdev->bd_mutex); | ||
931 | bdput(bdev); | ||
932 | return ret; | ||
933 | } | ||
934 | EXPORT_SYMBOL(revalidate_disk); | ||
935 | |||
871 | /* | 936 | /* |
872 | * This routine checks whether a removable media has been changed, | 937 | * This routine checks whether a removable media has been changed, |
873 | * and invalidates all buffer-cache-entries in that case. This | 938 | * and invalidates all buffer-cache-entries in that case. This |
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev) | |||
887 | if (!bdops->media_changed(bdev->bd_disk)) | 952 | if (!bdops->media_changed(bdev->bd_disk)) |
888 | return 0; | 953 | return 0; |
889 | 954 | ||
890 | if (__invalidate_device(bdev)) | 955 | flush_disk(bdev); |
891 | printk("VFS: busy inodes on changed media.\n"); | ||
892 | |||
893 | if (bdops->revalidate_disk) | 956 | if (bdops->revalidate_disk) |
894 | bdops->revalidate_disk(bdev->bd_disk); | 957 | bdops->revalidate_disk(bdev->bd_disk); |
895 | if (bdev->bd_disk->minors > 1) | ||
896 | bdev->bd_invalidated = 1; | ||
897 | return 1; | 958 | return 1; |
898 | } | 959 | } |
899 | 960 | ||
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part); | |||
927 | 988 | ||
928 | static int do_open(struct block_device *bdev, struct file *file, int for_part) | 989 | static int do_open(struct block_device *bdev, struct file *file, int for_part) |
929 | { | 990 | { |
930 | struct module *owner = NULL; | ||
931 | struct gendisk *disk; | 991 | struct gendisk *disk; |
992 | struct hd_struct *part = NULL; | ||
932 | int ret; | 993 | int ret; |
933 | int part; | 994 | int partno; |
934 | int perm = 0; | 995 | int perm = 0; |
935 | 996 | ||
936 | if (file->f_mode & FMODE_READ) | 997 | if (file->f_mode & FMODE_READ) |
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
948 | 1009 | ||
949 | ret = -ENXIO; | 1010 | ret = -ENXIO; |
950 | file->f_mapping = bdev->bd_inode->i_mapping; | 1011 | file->f_mapping = bdev->bd_inode->i_mapping; |
1012 | |||
951 | lock_kernel(); | 1013 | lock_kernel(); |
952 | disk = get_gendisk(bdev->bd_dev, &part); | 1014 | |
953 | if (!disk) { | 1015 | disk = get_gendisk(bdev->bd_dev, &partno); |
954 | unlock_kernel(); | 1016 | if (!disk) |
955 | bdput(bdev); | 1017 | goto out_unlock_kernel; |
956 | return ret; | 1018 | part = disk_get_part(disk, partno); |
957 | } | 1019 | if (!part) |
958 | owner = disk->fops->owner; | 1020 | goto out_unlock_kernel; |
959 | 1021 | ||
960 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1022 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
961 | if (!bdev->bd_openers) { | 1023 | if (!bdev->bd_openers) { |
962 | bdev->bd_disk = disk; | 1024 | bdev->bd_disk = disk; |
1025 | bdev->bd_part = part; | ||
963 | bdev->bd_contains = bdev; | 1026 | bdev->bd_contains = bdev; |
964 | if (!part) { | 1027 | if (!partno) { |
965 | struct backing_dev_info *bdi; | 1028 | struct backing_dev_info *bdi; |
966 | if (disk->fops->open) { | 1029 | if (disk->fops->open) { |
967 | ret = disk->fops->open(bdev->bd_inode, file); | 1030 | ret = disk->fops->open(bdev->bd_inode, file); |
968 | if (ret) | 1031 | if (ret) |
969 | goto out_first; | 1032 | goto out_clear; |
970 | } | 1033 | } |
971 | if (!bdev->bd_openers) { | 1034 | if (!bdev->bd_openers) { |
972 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1035 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
978 | if (bdev->bd_invalidated) | 1041 | if (bdev->bd_invalidated) |
979 | rescan_partitions(disk, bdev); | 1042 | rescan_partitions(disk, bdev); |
980 | } else { | 1043 | } else { |
981 | struct hd_struct *p; | ||
982 | struct block_device *whole; | 1044 | struct block_device *whole; |
983 | whole = bdget_disk(disk, 0); | 1045 | whole = bdget_disk(disk, 0); |
984 | ret = -ENOMEM; | 1046 | ret = -ENOMEM; |
985 | if (!whole) | 1047 | if (!whole) |
986 | goto out_first; | 1048 | goto out_clear; |
987 | BUG_ON(for_part); | 1049 | BUG_ON(for_part); |
988 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); | 1050 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); |
989 | if (ret) | 1051 | if (ret) |
990 | goto out_first; | 1052 | goto out_clear; |
991 | bdev->bd_contains = whole; | 1053 | bdev->bd_contains = whole; |
992 | p = disk->part[part - 1]; | ||
993 | bdev->bd_inode->i_data.backing_dev_info = | 1054 | bdev->bd_inode->i_data.backing_dev_info = |
994 | whole->bd_inode->i_data.backing_dev_info; | 1055 | whole->bd_inode->i_data.backing_dev_info; |
995 | if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { | 1056 | if (!(disk->flags & GENHD_FL_UP) || |
1057 | !part || !part->nr_sects) { | ||
996 | ret = -ENXIO; | 1058 | ret = -ENXIO; |
997 | goto out_first; | 1059 | goto out_clear; |
998 | } | 1060 | } |
999 | kobject_get(&p->dev.kobj); | 1061 | bd_set_size(bdev, (loff_t)part->nr_sects << 9); |
1000 | bdev->bd_part = p; | ||
1001 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | ||
1002 | } | 1062 | } |
1003 | } else { | 1063 | } else { |
1064 | disk_put_part(part); | ||
1004 | put_disk(disk); | 1065 | put_disk(disk); |
1005 | module_put(owner); | 1066 | module_put(disk->fops->owner); |
1067 | part = NULL; | ||
1068 | disk = NULL; | ||
1006 | if (bdev->bd_contains == bdev) { | 1069 | if (bdev->bd_contains == bdev) { |
1007 | if (bdev->bd_disk->fops->open) { | 1070 | if (bdev->bd_disk->fops->open) { |
1008 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); | 1071 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); |
1009 | if (ret) | 1072 | if (ret) |
1010 | goto out; | 1073 | goto out_unlock_bdev; |
1011 | } | 1074 | } |
1012 | if (bdev->bd_invalidated) | 1075 | if (bdev->bd_invalidated) |
1013 | rescan_partitions(bdev->bd_disk, bdev); | 1076 | rescan_partitions(bdev->bd_disk, bdev); |
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1020 | unlock_kernel(); | 1083 | unlock_kernel(); |
1021 | return 0; | 1084 | return 0; |
1022 | 1085 | ||
1023 | out_first: | 1086 | out_clear: |
1024 | bdev->bd_disk = NULL; | 1087 | bdev->bd_disk = NULL; |
1088 | bdev->bd_part = NULL; | ||
1025 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1089 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1026 | if (bdev != bdev->bd_contains) | 1090 | if (bdev != bdev->bd_contains) |
1027 | __blkdev_put(bdev->bd_contains, 1); | 1091 | __blkdev_put(bdev->bd_contains, 1); |
1028 | bdev->bd_contains = NULL; | 1092 | bdev->bd_contains = NULL; |
1029 | put_disk(disk); | 1093 | out_unlock_bdev: |
1030 | module_put(owner); | ||
1031 | out: | ||
1032 | mutex_unlock(&bdev->bd_mutex); | 1094 | mutex_unlock(&bdev->bd_mutex); |
1095 | out_unlock_kernel: | ||
1033 | unlock_kernel(); | 1096 | unlock_kernel(); |
1034 | if (ret) | 1097 | |
1035 | bdput(bdev); | 1098 | disk_put_part(part); |
1099 | if (disk) | ||
1100 | module_put(disk->fops->owner); | ||
1101 | put_disk(disk); | ||
1102 | bdput(bdev); | ||
1103 | |||
1036 | return ret; | 1104 | return ret; |
1037 | } | 1105 | } |
1038 | 1106 | ||
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1117 | 1185 | ||
1118 | put_disk(disk); | 1186 | put_disk(disk); |
1119 | module_put(owner); | 1187 | module_put(owner); |
1120 | 1188 | disk_put_part(bdev->bd_part); | |
1121 | if (bdev->bd_contains != bdev) { | 1189 | bdev->bd_part = NULL; |
1122 | kobject_put(&bdev->bd_part->dev.kobj); | ||
1123 | bdev->bd_part = NULL; | ||
1124 | } | ||
1125 | bdev->bd_disk = NULL; | 1190 | bdev->bd_disk = NULL; |
1126 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1191 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1127 | if (bdev != bdev->bd_contains) | 1192 | if (bdev != bdev->bd_contains) |
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); | |||
1197 | 1262 | ||
1198 | /** | 1263 | /** |
1199 | * lookup_bdev - lookup a struct block_device by name | 1264 | * lookup_bdev - lookup a struct block_device by name |
1265 | * @pathname: special file representing the block device | ||
1200 | * | 1266 | * |
1201 | * @path: special file representing the block device | 1267 | * Get a reference to the blockdevice at @pathname in the current |
1202 | * | ||
1203 | * Get a reference to the blockdevice at @path in the current | ||
1204 | * namespace if possible and return it. Return ERR_PTR(error) | 1268 | * namespace if possible and return it. Return ERR_PTR(error) |
1205 | * otherwise. | 1269 | * otherwise. |
1206 | */ | 1270 | */ |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f9e4ad97a79e..06e521a945c3 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -9,7 +9,10 @@ files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit | |||
9 | on parent directory when server supports Unix Extensions but not POSIX | 9 | on parent directory when server supports Unix Extensions but not POSIX |
10 | create. Update cifs.upcall version to handle new Kerberos sec flags | 10 | create. Update cifs.upcall version to handle new Kerberos sec flags |
11 | (this requires update of cifs.upcall program from Samba). Fix memory leak | 11 | (this requires update of cifs.upcall program from Samba). Fix memory leak |
12 | on dns_upcall (resolving DFS referralls). | 12 | on dns_upcall (resolving DFS referralls). Fix plain text password |
13 | authentication (requires setting SecurityFlags to 0x30030 to enable | ||
14 | lanman and plain text though). Fix writes to be at correct offset when | ||
15 | file is open with O_APPEND and file is on a directio (forcediretio) mount. | ||
13 | 16 | ||
14 | Version 1.53 | 17 | Version 1.53 |
15 | ------------ | 18 | ------------ |
diff --git a/fs/cifs/README b/fs/cifs/README index 68b5c1169d9d..bd2343d4c6a6 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and | |||
542 | hashing mechanisms (as "must use") on the other hand | 542 | hashing mechanisms (as "must use") on the other hand |
543 | does not make much sense. Default flags are | 543 | does not make much sense. Default flags are |
544 | 0x07007 | 544 | 0x07007 |
545 | (NTLM, NTLMv2 and packet signing allowed). Maximum | 545 | (NTLM, NTLMv2 and packet signing allowed). The maximum |
546 | allowable flags if you want to allow mounts to servers | 546 | allowable flags if you want to allow mounts to servers |
547 | using weaker password hashes is 0x37037 (lanman, | 547 | using weaker password hashes is 0x37037 (lanman, |
548 | plaintext, ntlm, ntlmv2, signing allowed): | 548 | plaintext, ntlm, ntlmv2, signing allowed). Some |
549 | SecurityFlags require the corresponding menuconfig | ||
550 | options to be enabled (lanman and plaintext require | ||
551 | CONFIG_CIFS_WEAK_PW_HASH for example). Enabling | ||
552 | plaintext authentication currently requires also | ||
553 | enabling lanman authentication in the security flags | ||
554 | because the cifs module only supports sending | ||
555 | laintext passwords using the older lanman dialect | ||
556 | form of the session setup SMB. (e.g. for authentication | ||
557 | using plain text passwords, set the SecurityFlags | ||
558 | to 0x30030): | ||
549 | 559 | ||
550 | may use packet signing 0x00001 | 560 | may use packet signing 0x00001 |
551 | must use packet signing 0x01001 | 561 | must use packet signing 0x01001 |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 83fd40dc1ef0..bd5f13d38450 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) | |||
294 | 294 | ||
295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) | 295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) |
296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { | 296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { |
297 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
297 | memcpy(lnm_session_key, password_with_pad, | 298 | memcpy(lnm_session_key, password_with_pad, |
298 | CIFS_ENCPWD_SIZE); | 299 | CIFS_ENCPWD_SIZE); |
299 | return; | 300 | return; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff14d14903a0..cbefe1f1f9fe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
833 | return -EBADF; | 833 | return -EBADF; |
834 | open_file = (struct cifsFileInfo *) file->private_data; | 834 | open_file = (struct cifsFileInfo *) file->private_data; |
835 | 835 | ||
836 | rc = generic_write_checks(file, poffset, &write_size, 0); | ||
837 | if (rc) | ||
838 | return rc; | ||
839 | |||
836 | xid = GetXid(); | 840 | xid = GetXid(); |
837 | 841 | ||
838 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 842 | if (*poffset > file->f_path.dentry->d_inode->i_size) |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b537fad3bf50..252fdc0567f1 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; | 410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; |
411 | 411 | ||
412 | pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; | ||
413 | |||
412 | /* no capabilities flags in old lanman negotiation */ | 414 | /* no capabilities flags in old lanman negotiation */ |
413 | 415 | ||
414 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 416 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); |
diff --git a/fs/dcache.c b/fs/dcache.c index 80e93956aced..e7a1a99b7464 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1395 | if (dentry->d_parent != parent) | 1395 | if (dentry->d_parent != parent) |
1396 | goto next; | 1396 | goto next; |
1397 | 1397 | ||
1398 | /* non-existing due to RCU? */ | ||
1399 | if (d_unhashed(dentry)) | ||
1400 | goto next; | ||
1401 | |||
1398 | /* | 1402 | /* |
1399 | * It is safe to compare names since d_move() cannot | 1403 | * It is safe to compare names since d_move() cannot |
1400 | * change the qstr (protected by d_lock). | 1404 | * change the qstr (protected by d_lock). |
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1410 | goto next; | 1414 | goto next; |
1411 | } | 1415 | } |
1412 | 1416 | ||
1413 | if (!d_unhashed(dentry)) { | 1417 | atomic_inc(&dentry->d_count); |
1414 | atomic_inc(&dentry->d_count); | 1418 | found = dentry; |
1415 | found = dentry; | ||
1416 | } | ||
1417 | spin_unlock(&dentry->d_lock); | 1419 | spin_unlock(&dentry->d_lock); |
1418 | break; | 1420 | break; |
1419 | next: | 1421 | next: |
@@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm) | |||
752 | tsk->active_mm = mm; | 752 | tsk->active_mm = mm; |
753 | activate_mm(active_mm, mm); | 753 | activate_mm(active_mm, mm); |
754 | task_unlock(tsk); | 754 | task_unlock(tsk); |
755 | mm_update_next_owner(old_mm); | ||
756 | arch_pick_mmap_layout(mm); | 755 | arch_pick_mmap_layout(mm); |
757 | if (old_mm) { | 756 | if (old_mm) { |
758 | up_read(&old_mm->mmap_sem); | 757 | up_read(&old_mm->mmap_sem); |
759 | BUG_ON(active_mm != old_mm); | 758 | BUG_ON(active_mm != old_mm); |
759 | mm_update_next_owner(old_mm); | ||
760 | mmput(old_mm); | 760 | mmput(old_mm); |
761 | return 0; | 761 | return 0; |
762 | } | 762 | } |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 302e95c4af7e..fb98b3d847ed 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/msdos_fs.h> | 8 | #include <linux/msdos_fs.h> |
9 | #include <linux/blkdev.h> | ||
9 | 10 | ||
10 | struct fatent_operations { | 11 | struct fatent_operations { |
11 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); | 12 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); |
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
535 | struct fat_entry fatent; | 536 | struct fat_entry fatent; |
536 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; | 537 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; |
537 | int i, err, nr_bhs; | 538 | int i, err, nr_bhs; |
539 | int first_cl = cluster; | ||
538 | 540 | ||
539 | nr_bhs = 0; | 541 | nr_bhs = 0; |
540 | fatent_init(&fatent); | 542 | fatent_init(&fatent); |
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
551 | goto error; | 553 | goto error; |
552 | } | 554 | } |
553 | 555 | ||
556 | /* | ||
557 | * Issue discard for the sectors we no longer care about, | ||
558 | * batching contiguous clusters into one request | ||
559 | */ | ||
560 | if (cluster != fatent.entry + 1) { | ||
561 | int nr_clus = fatent.entry - first_cl + 1; | ||
562 | |||
563 | sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), | ||
564 | nr_clus * sbi->sec_per_clus); | ||
565 | first_cl = cluster; | ||
566 | } | ||
567 | |||
554 | ops->ent_put(&fatent, FAT_ENT_FREE); | 568 | ops->ent_put(&fatent, FAT_ENT_FREE); |
555 | if (sbi->free_clusters != -1) { | 569 | if (sbi->free_clusters != -1) { |
556 | sbi->free_clusters++; | 570 | sbi->free_clusters++; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13391e546616..c962283d4e7f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1266 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
1267 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
1268 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
1269 | delay = gl->gl_ops->go_min_hold_time; | ||
1268 | 1270 | ||
1269 | spin_lock(&gl->gl_spin); | 1271 | spin_lock(&gl->gl_spin); |
1270 | handle_callback(gl, state, 1, delay); | 1272 | handle_callback(gl, state, 1, delay); |
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1578 | *p++ = 'a'; | 1580 | *p++ = 'a'; |
1579 | if (flags & GL_EXACT) | 1581 | if (flags & GL_EXACT) |
1580 | *p++ = 'E'; | 1582 | *p++ = 'E'; |
1581 | if (flags & GL_ATIME) | ||
1582 | *p++ = 'a'; | ||
1583 | if (flags & GL_NOCACHE) | 1583 | if (flags & GL_NOCACHE) |
1584 | *p++ = 'c'; | 1584 | *p++ = 'c'; |
1585 | if (test_bit(HIF_HOLDER, &iflags)) | 1585 | if (test_bit(HIF_HOLDER, &iflags)) |
@@ -1816,15 +1816,17 @@ restart: | |||
1816 | if (gl) { | 1816 | if (gl) { |
1817 | gi->gl = hlist_entry(gl->gl_list.next, | 1817 | gi->gl = hlist_entry(gl->gl_list.next, |
1818 | struct gfs2_glock, gl_list); | 1818 | struct gfs2_glock, gl_list); |
1819 | if (gi->gl) | 1819 | } else { |
1820 | gfs2_glock_hold(gi->gl); | 1820 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, |
1821 | struct gfs2_glock, gl_list); | ||
1821 | } | 1822 | } |
1823 | if (gi->gl) | ||
1824 | gfs2_glock_hold(gi->gl); | ||
1822 | read_unlock(gl_lock_addr(gi->hash)); | 1825 | read_unlock(gl_lock_addr(gi->hash)); |
1823 | if (gl) | 1826 | if (gl) |
1824 | gfs2_glock_put(gl); | 1827 | gfs2_glock_put(gl); |
1825 | if (gl && gi->gl == NULL) | ||
1826 | gi->hash++; | ||
1827 | while (gi->gl == NULL) { | 1828 | while (gi->gl == NULL) { |
1829 | gi->hash++; | ||
1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1830 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
1829 | return 1; | 1831 | return 1; |
1830 | read_lock(gl_lock_addr(gi->hash)); | 1832 | read_lock(gl_lock_addr(gi->hash)); |
@@ -1833,7 +1835,6 @@ restart: | |||
1833 | if (gi->gl) | 1835 | if (gi->gl) |
1834 | gfs2_glock_hold(gi->gl); | 1836 | gfs2_glock_hold(gi->gl); |
1835 | read_unlock(gl_lock_addr(gi->hash)); | 1837 | read_unlock(gl_lock_addr(gi->hash)); |
1836 | gi->hash++; | ||
1837 | } | 1838 | } |
1838 | 1839 | ||
1839 | if (gi->sdp != gi->gl->gl_sbd) | 1840 | if (gi->sdp != gi->gl->gl_sbd) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 971d92af70fc..695c6b193611 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -24,7 +24,6 @@ | |||
24 | #define GL_ASYNC 0x00000040 | 24 | #define GL_ASYNC 0x00000040 |
25 | #define GL_EXACT 0x00000080 | 25 | #define GL_EXACT 0x00000080 |
26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
27 | #define GL_ATIME 0x00000200 | ||
28 | #define GL_NOCACHE 0x00000400 | 27 | #define GL_NOCACHE 0x00000400 |
29 | 28 | ||
30 | #define GLR_TRYFAILED 13 | 29 | #define GLR_TRYFAILED 13 |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 448697a5c462..f566ec1b4e8e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host { | |||
386 | #define GFS2_DATA_ORDERED 2 | 386 | #define GFS2_DATA_ORDERED 2 |
387 | 387 | ||
388 | struct gfs2_args { | 388 | struct gfs2_args { |
389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | 389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ |
390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
392 | int ar_spectator; /* Don't get a journal because we're always RO */ | 392 | unsigned int ar_spectator:1; /* Don't get a journal */ |
393 | int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ | 393 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ |
394 | int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ | 394 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
395 | int ar_localcaching; /* Local-style caching (dangerous on multihost) */ | 395 | unsigned int ar_localcaching:1; /* Local caching */ |
396 | int ar_debug; /* Oops on errors instead of trying to be graceful */ | 396 | unsigned int ar_debug:1; /* Oops on errors */ |
397 | int ar_upgrade; /* Upgrade ondisk/multihost format */ | 397 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ |
398 | unsigned int ar_num_glockd; /* Number of glockd threads */ | 398 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
399 | int ar_posix_acl; /* Enable posix acls */ | 399 | unsigned int ar_quota:2; /* off/account/on */ |
400 | int ar_quota; /* off/account/on */ | 400 | unsigned int ar_suiddir:1; /* suiddir support */ |
401 | int ar_suiddir; /* suiddir support */ | 401 | unsigned int ar_data:2; /* ordered/writeback */ |
402 | int ar_data; /* ordered/writeback */ | 402 | unsigned int ar_meta:1; /* mount metafs */ |
403 | unsigned int ar_num_glockd; /* Number of glockd threads */ | ||
403 | }; | 404 | }; |
404 | 405 | ||
405 | struct gfs2_tune { | 406 | struct gfs2_tune { |
@@ -419,7 +420,6 @@ struct gfs2_tune { | |||
419 | unsigned int gt_quota_scale_den; /* Denominator */ | 420 | unsigned int gt_quota_scale_den; /* Denominator */ |
420 | unsigned int gt_quota_cache_secs; | 421 | unsigned int gt_quota_cache_secs; |
421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 422 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | ||
423 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
425 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
@@ -432,7 +432,7 @@ enum { | |||
432 | SDF_JOURNAL_CHECKED = 0, | 432 | SDF_JOURNAL_CHECKED = 0, |
433 | SDF_JOURNAL_LIVE = 1, | 433 | SDF_JOURNAL_LIVE = 1, |
434 | SDF_SHUTDOWN = 2, | 434 | SDF_SHUTDOWN = 2, |
435 | SDF_NOATIME = 3, | 435 | SDF_NOBARRIERS = 3, |
436 | }; | 436 | }; |
437 | 437 | ||
438 | #define GFS2_FSNAME_LEN 256 | 438 | #define GFS2_FSNAME_LEN 256 |
@@ -461,7 +461,6 @@ struct gfs2_sb_host { | |||
461 | 461 | ||
462 | struct gfs2_sbd { | 462 | struct gfs2_sbd { |
463 | struct super_block *sd_vfs; | 463 | struct super_block *sd_vfs; |
464 | struct super_block *sd_vfs_meta; | ||
465 | struct kobject sd_kobj; | 464 | struct kobject sd_kobj; |
466 | unsigned long sd_flags; /* SDF_... */ | 465 | unsigned long sd_flags; /* SDF_... */ |
467 | struct gfs2_sb_host sd_sb; | 466 | struct gfs2_sb_host sd_sb; |
@@ -499,7 +498,9 @@ struct gfs2_sbd { | |||
499 | 498 | ||
500 | /* Inode Stuff */ | 499 | /* Inode Stuff */ |
501 | 500 | ||
502 | struct inode *sd_master_dir; | 501 | struct dentry *sd_master_dir; |
502 | struct dentry *sd_root_dir; | ||
503 | |||
503 | struct inode *sd_jindex; | 504 | struct inode *sd_jindex; |
504 | struct inode *sd_inum_inode; | 505 | struct inode *sd_inum_inode; |
505 | struct inode *sd_statfs_inode; | 506 | struct inode *sd_statfs_inode; |
@@ -634,7 +635,6 @@ struct gfs2_sbd { | |||
634 | /* Debugging crud */ | 635 | /* Debugging crud */ |
635 | 636 | ||
636 | unsigned long sd_last_warning; | 637 | unsigned long sd_last_warning; |
637 | struct vfsmount *sd_gfs2mnt; | ||
638 | struct dentry *debugfs_dir; /* debugfs directory */ | 638 | struct dentry *debugfs_dir; /* debugfs directory */ |
639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ | 639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ |
640 | }; | 640 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8b0806a32948..7cee695fa441 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/crc32.h> | 18 | #include <linux/crc32.h> |
19 | #include <linux/lm_interface.h> | 19 | #include <linux/lm_interface.h> |
20 | #include <linux/security.h> | 20 | #include <linux/security.h> |
21 | #include <linux/time.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
249 | { | 250 | { |
250 | struct gfs2_dinode_host *di = &ip->i_di; | 251 | struct gfs2_dinode_host *di = &ip->i_di; |
251 | const struct gfs2_dinode *str = buf; | 252 | const struct gfs2_dinode *str = buf; |
253 | struct timespec atime; | ||
252 | u16 height, depth; | 254 | u16 height, depth; |
253 | 255 | ||
254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) | 256 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
275 | di->di_size = be64_to_cpu(str->di_size); | 277 | di->di_size = be64_to_cpu(str->di_size); |
276 | i_size_write(&ip->i_inode, di->di_size); | 278 | i_size_write(&ip->i_inode, di->di_size); |
277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 279 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 280 | atime.tv_sec = be64_to_cpu(str->di_atime); |
279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 281 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
282 | if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) | ||
283 | ip->i_inode.i_atime = atime; | ||
280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 284 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
281 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); | 285 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); |
282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 286 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1033 | 1037 | ||
1034 | if (bh) | 1038 | if (bh) |
1035 | brelse(bh); | 1039 | brelse(bh); |
1036 | if (!inode) | ||
1037 | return ERR_PTR(-ENOMEM); | ||
1038 | return inode; | 1040 | return inode; |
1039 | 1041 | ||
1040 | fail_gunlock2: | 1042 | fail_gunlock2: |
1041 | gfs2_glock_dq_uninit(ghs + 1); | 1043 | gfs2_glock_dq_uninit(ghs + 1); |
1042 | if (inode) | 1044 | if (inode && !IS_ERR(inode)) |
1043 | iput(inode); | 1045 | iput(inode); |
1044 | fail_gunlock: | 1046 | fail_gunlock: |
1045 | gfs2_glock_dq(ghs); | 1047 | gfs2_glock_dq(ghs); |
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
1140 | return 0; | 1142 | return 0; |
1141 | } | 1143 | } |
1142 | 1144 | ||
1143 | /* | ||
1144 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
1145 | * @this: move this | ||
1146 | * @to: to here | ||
1147 | * | ||
1148 | * Follow @to back to the root and make sure we don't encounter @this | ||
1149 | * Assumes we already hold the rename lock. | ||
1150 | * | ||
1151 | * Returns: errno | ||
1152 | */ | ||
1153 | |||
1154 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
1155 | { | ||
1156 | struct inode *dir = &to->i_inode; | ||
1157 | struct super_block *sb = dir->i_sb; | ||
1158 | struct inode *tmp; | ||
1159 | struct qstr dotdot; | ||
1160 | int error = 0; | ||
1161 | |||
1162 | gfs2_str2qstr(&dotdot, ".."); | ||
1163 | |||
1164 | igrab(dir); | ||
1165 | |||
1166 | for (;;) { | ||
1167 | if (dir == &this->i_inode) { | ||
1168 | error = -EINVAL; | ||
1169 | break; | ||
1170 | } | ||
1171 | if (dir == sb->s_root->d_inode) { | ||
1172 | error = 0; | ||
1173 | break; | ||
1174 | } | ||
1175 | |||
1176 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
1177 | if (IS_ERR(tmp)) { | ||
1178 | error = PTR_ERR(tmp); | ||
1179 | break; | ||
1180 | } | ||
1181 | |||
1182 | iput(dir); | ||
1183 | dir = tmp; | ||
1184 | } | ||
1185 | |||
1186 | iput(dir); | ||
1187 | |||
1188 | return error; | ||
1189 | } | ||
1190 | |||
1191 | /** | 1145 | /** |
1192 | * gfs2_readlinki - return the contents of a symlink | 1146 | * gfs2_readlinki - return the contents of a symlink |
1193 | * @ip: the symlink's inode | 1147 | * @ip: the symlink's inode |
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
1207 | unsigned int x; | 1161 | unsigned int x; |
1208 | int error; | 1162 | int error; |
1209 | 1163 | ||
1210 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 1164 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
1211 | error = gfs2_glock_nq_atime(&i_gh); | 1165 | error = gfs2_glock_nq(&i_gh); |
1212 | if (error) { | 1166 | if (error) { |
1213 | gfs2_holder_uninit(&i_gh); | 1167 | gfs2_holder_uninit(&i_gh); |
1214 | return error; | 1168 | return error; |
@@ -1243,101 +1197,6 @@ out: | |||
1243 | return error; | 1197 | return error; |
1244 | } | 1198 | } |
1245 | 1199 | ||
1246 | /** | ||
1247 | * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and | ||
1248 | * conditionally update the inode's atime | ||
1249 | * @gh: the holder to acquire | ||
1250 | * | ||
1251 | * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap | ||
1252 | * Update if the difference between the current time and the inode's current | ||
1253 | * atime is greater than an interval specified at mount. | ||
1254 | * | ||
1255 | * Returns: errno | ||
1256 | */ | ||
1257 | |||
1258 | int gfs2_glock_nq_atime(struct gfs2_holder *gh) | ||
1259 | { | ||
1260 | struct gfs2_glock *gl = gh->gh_gl; | ||
1261 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1262 | struct gfs2_inode *ip = gl->gl_object; | ||
1263 | s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); | ||
1264 | unsigned int state; | ||
1265 | int flags; | ||
1266 | int error; | ||
1267 | struct timespec tv = CURRENT_TIME; | ||
1268 | |||
1269 | if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || | ||
1270 | gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || | ||
1271 | gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) | ||
1272 | return -EINVAL; | ||
1273 | |||
1274 | state = gh->gh_state; | ||
1275 | flags = gh->gh_flags; | ||
1276 | |||
1277 | error = gfs2_glock_nq(gh); | ||
1278 | if (error) | ||
1279 | return error; | ||
1280 | |||
1281 | if (test_bit(SDF_NOATIME, &sdp->sd_flags) || | ||
1282 | (sdp->sd_vfs->s_flags & MS_RDONLY)) | ||
1283 | return 0; | ||
1284 | |||
1285 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1286 | gfs2_glock_dq(gh); | ||
1287 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, | ||
1288 | gh); | ||
1289 | error = gfs2_glock_nq(gh); | ||
1290 | if (error) | ||
1291 | return error; | ||
1292 | |||
1293 | /* Verify that atime hasn't been updated while we were | ||
1294 | trying to get exclusive lock. */ | ||
1295 | |||
1296 | tv = CURRENT_TIME; | ||
1297 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1298 | struct buffer_head *dibh; | ||
1299 | struct gfs2_dinode *di; | ||
1300 | |||
1301 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
1302 | if (error == -EROFS) | ||
1303 | return 0; | ||
1304 | if (error) | ||
1305 | goto fail; | ||
1306 | |||
1307 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1308 | if (error) | ||
1309 | goto fail_end_trans; | ||
1310 | |||
1311 | ip->i_inode.i_atime = tv; | ||
1312 | |||
1313 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1314 | di = (struct gfs2_dinode *)dibh->b_data; | ||
1315 | di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | ||
1316 | di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); | ||
1317 | brelse(dibh); | ||
1318 | |||
1319 | gfs2_trans_end(sdp); | ||
1320 | } | ||
1321 | |||
1322 | /* If someone else has asked for the glock, | ||
1323 | unlock and let them have it. Then reacquire | ||
1324 | in the original state. */ | ||
1325 | if (gfs2_glock_is_blocking(gl)) { | ||
1326 | gfs2_glock_dq(gh); | ||
1327 | gfs2_holder_reinit(state, flags, gh); | ||
1328 | return gfs2_glock_nq(gh); | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1332 | return 0; | ||
1333 | |||
1334 | fail_end_trans: | ||
1335 | gfs2_trans_end(sdp); | ||
1336 | fail: | ||
1337 | gfs2_glock_dq(gh); | ||
1338 | return error; | ||
1339 | } | ||
1340 | |||
1341 | static int | 1200 | static int |
1342 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1201 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
1343 | { | 1202 | { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 58f9607d6a86..2d43f69610a0 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
92 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
93 | int gfs2_permission(struct inode *inode, int mask); | 93 | int gfs2_permission(struct inode *inode, int mask); |
94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | ||
95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 94 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | ||
97 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | 95 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); |
98 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 96 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
99 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 97 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6c6af9f5e3ab..ad305854bdc6 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
20 | #include <linux/freezer.h> | 20 | #include <linux/freezer.h> |
21 | #include <linux/bio.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
584 | memset(bh->b_data, 0, bh->b_size); | 585 | memset(bh->b_data, 0, bh->b_size); |
585 | set_buffer_uptodate(bh); | 586 | set_buffer_uptodate(bh); |
586 | clear_buffer_dirty(bh); | 587 | clear_buffer_dirty(bh); |
587 | unlock_buffer(bh); | ||
588 | 588 | ||
589 | gfs2_ail1_empty(sdp, 0); | 589 | gfs2_ail1_empty(sdp, 0); |
590 | tail = current_tail(sdp); | 590 | tail = current_tail(sdp); |
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); | 601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); |
602 | lh->lh_hash = cpu_to_be32(hash); | 602 | lh->lh_hash = cpu_to_be32(hash); |
603 | 603 | ||
604 | set_buffer_dirty(bh); | 604 | bh->b_end_io = end_buffer_write_sync; |
605 | if (sync_dirty_buffer(bh)) | 605 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
606 | goto skip_barrier; | ||
607 | get_bh(bh); | ||
608 | submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); | ||
609 | wait_on_buffer(bh); | ||
610 | if (buffer_eopnotsupp(bh)) { | ||
611 | clear_buffer_eopnotsupp(bh); | ||
612 | set_buffer_uptodate(bh); | ||
613 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
614 | lock_buffer(bh); | ||
615 | skip_barrier: | ||
616 | get_bh(bh); | ||
617 | submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); | ||
618 | wait_on_buffer(bh); | ||
619 | } | ||
620 | if (!buffer_uptodate(bh)) | ||
606 | gfs2_io_error_bh(sdp, bh); | 621 | gfs2_io_error_bh(sdp, bh); |
607 | brelse(bh); | 622 | brelse(bh); |
608 | 623 | ||
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f9f958..df48333e6f01 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c | |||
@@ -42,6 +42,7 @@ enum { | |||
42 | Opt_nosuiddir, | 42 | Opt_nosuiddir, |
43 | Opt_data_writeback, | 43 | Opt_data_writeback, |
44 | Opt_data_ordered, | 44 | Opt_data_ordered, |
45 | Opt_meta, | ||
45 | Opt_err, | 46 | Opt_err, |
46 | }; | 47 | }; |
47 | 48 | ||
@@ -66,6 +67,7 @@ static match_table_t tokens = { | |||
66 | {Opt_nosuiddir, "nosuiddir"}, | 67 | {Opt_nosuiddir, "nosuiddir"}, |
67 | {Opt_data_writeback, "data=writeback"}, | 68 | {Opt_data_writeback, "data=writeback"}, |
68 | {Opt_data_ordered, "data=ordered"}, | 69 | {Opt_data_ordered, "data=ordered"}, |
70 | {Opt_meta, "meta"}, | ||
69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
70 | }; | 72 | }; |
71 | 73 | ||
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
239 | case Opt_data_ordered: | 241 | case Opt_data_ordered: |
240 | args->ar_data = GFS2_DATA_ORDERED; | 242 | args->ar_data = GFS2_DATA_ORDERED; |
241 | break; | 243 | break; |
244 | case Opt_meta: | ||
245 | if (remount && args->ar_meta != 1) | ||
246 | goto cant_remount; | ||
247 | args->ar_meta = 1; | ||
248 | break; | ||
242 | case Opt_err: | 249 | case Opt_err: |
243 | default: | 250 | default: |
244 | fs_info(sdp, "unknown option: %s\n", o); | 251 | fs_info(sdp, "unknown option: %s\n", o); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e64a1b04117a..27563816e1c5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page) | |||
512 | int error; | 512 | int error; |
513 | 513 | ||
514 | unlock_page(page); | 514 | unlock_page(page); |
515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
516 | error = gfs2_glock_nq_atime(&gh); | 516 | error = gfs2_glock_nq(&gh); |
517 | if (unlikely(error)) | 517 | if (unlikely(error)) |
518 | goto out; | 518 | goto out; |
519 | error = AOP_TRUNCATED_PAGE; | 519 | error = AOP_TRUNCATED_PAGE; |
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
594 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
595 | int ret; | 595 | int ret; |
596 | 596 | ||
597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
598 | ret = gfs2_glock_nq_atime(&gh); | 598 | ret = gfs2_glock_nq(&gh); |
599 | if (unlikely(ret)) | 599 | if (unlikely(ret)) |
600 | goto out_uninit; | 600 | goto out_uninit; |
601 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
636 | unsigned to = from + len; | 636 | unsigned to = from + len; |
637 | struct page *page; | 637 | struct page *page; |
638 | 638 | ||
639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); | 639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); |
640 | error = gfs2_glock_nq_atime(&ip->i_gh); | 640 | error = gfs2_glock_nq(&ip->i_gh); |
641 | if (unlikely(error)) | 641 | if (unlikely(error)) |
642 | goto out_uninit; | 642 | goto out_uninit; |
643 | 643 | ||
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
975 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
976 | return 0; | 976 | return 0; |
977 | 977 | ||
978 | if (offset > i_size_read(&ip->i_inode)) | 978 | if (offset >= i_size_read(&ip->i_inode)) |
979 | return 0; | 979 | return 0; |
980 | return 1; | 980 | return 1; |
981 | } | 981 | } |
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1000 | * unfortunately have the option of only flushing a range like | 1000 | * unfortunately have the option of only flushing a range like |
1001 | * the VFS does. | 1001 | * the VFS does. |
1002 | */ | 1002 | */ |
1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); | 1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); |
1004 | rv = gfs2_glock_nq_atime(&gh); | 1004 | rv = gfs2_glock_nq(&gh); |
1005 | if (rv) | 1005 | if (rv) |
1006 | return rv; | 1006 | return rv; |
1007 | rv = gfs2_ok_for_dio(ip, rw, offset); | 1007 | rv = gfs2_ok_for_dio(ip, rw, offset); |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e9a366d4411c..3a747f8e2188 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
89 | u64 offset = file->f_pos; | 89 | u64 offset = file->f_pos; |
90 | int error; | 90 | int error; |
91 | 91 | ||
92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
93 | error = gfs2_glock_nq_atime(&d_gh); | 93 | error = gfs2_glock_nq(&d_gh); |
94 | if (error) { | 94 | if (error) { |
95 | gfs2_holder_uninit(&d_gh); | 95 | gfs2_holder_uninit(&d_gh); |
96 | return error; | 96 | return error; |
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
153 | int error; | 153 | int error; |
154 | u32 fsflags; | 154 | u32 fsflags; |
155 | 155 | ||
156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
157 | error = gfs2_glock_nq_atime(&gh); | 157 | error = gfs2_glock_nq(&gh); |
158 | if (error) | 158 | if (error) |
159 | return error; | 159 | return error; |
160 | 160 | ||
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
351 | struct gfs2_alloc *al; | 351 | struct gfs2_alloc *al; |
352 | int ret; | 352 | int ret; |
353 | 353 | ||
354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | 354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
355 | ret = gfs2_glock_nq_atime(&gh); | 355 | ret = gfs2_glock_nq(&gh); |
356 | if (ret) | 356 | if (ret) |
357 | goto out; | 357 | goto out; |
358 | 358 | ||
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
434 | struct gfs2_holder i_gh; | 434 | struct gfs2_holder i_gh; |
435 | int error; | 435 | int error; |
436 | 436 | ||
437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
438 | error = gfs2_glock_nq_atime(&i_gh); | 438 | error = gfs2_glock_nq(&i_gh); |
439 | if (error) { | 439 | if (error) { |
440 | gfs2_holder_uninit(&i_gh); | 440 | gfs2_holder_uninit(&i_gh); |
441 | return error; | 441 | return error; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b4d1d6490633..b117fcf2c4f5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -40,6 +40,44 @@ | |||
40 | #define DO 0 | 40 | #define DO 0 |
41 | #define UNDO 1 | 41 | #define UNDO 1 |
42 | 42 | ||
43 | static const u32 gfs2_old_fs_formats[] = { | ||
44 | 0 | ||
45 | }; | ||
46 | |||
47 | static const u32 gfs2_old_multihost_formats[] = { | ||
48 | 0 | ||
49 | }; | ||
50 | |||
51 | /** | ||
52 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
53 | * @gt: tune | ||
54 | * | ||
55 | */ | ||
56 | |||
57 | static void gfs2_tune_init(struct gfs2_tune *gt) | ||
58 | { | ||
59 | spin_lock_init(>->gt_spin); | ||
60 | |||
61 | gt->gt_demote_secs = 300; | ||
62 | gt->gt_incore_log_blocks = 1024; | ||
63 | gt->gt_log_flush_secs = 60; | ||
64 | gt->gt_recoverd_secs = 60; | ||
65 | gt->gt_logd_secs = 1; | ||
66 | gt->gt_quotad_secs = 5; | ||
67 | gt->gt_quota_simul_sync = 64; | ||
68 | gt->gt_quota_warn_period = 10; | ||
69 | gt->gt_quota_scale_num = 1; | ||
70 | gt->gt_quota_scale_den = 1; | ||
71 | gt->gt_quota_cache_secs = 300; | ||
72 | gt->gt_quota_quantum = 60; | ||
73 | gt->gt_new_files_jdata = 0; | ||
74 | gt->gt_max_readahead = 1 << 18; | ||
75 | gt->gt_stall_secs = 600; | ||
76 | gt->gt_complain_secs = 10; | ||
77 | gt->gt_statfs_quantum = 30; | ||
78 | gt->gt_statfs_slow = 0; | ||
79 | } | ||
80 | |||
43 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 81 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
44 | { | 82 | { |
45 | struct gfs2_sbd *sdp; | 83 | struct gfs2_sbd *sdp; |
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
96 | return sdp; | 134 | return sdp; |
97 | } | 135 | } |
98 | 136 | ||
99 | static void init_vfs(struct super_block *sb, unsigned noatime) | 137 | |
138 | /** | ||
139 | * gfs2_check_sb - Check superblock | ||
140 | * @sdp: the filesystem | ||
141 | * @sb: The superblock | ||
142 | * @silent: Don't print a message if the check fails | ||
143 | * | ||
144 | * Checks the version code of the FS is one that we understand how to | ||
145 | * read and that the sizes of the various on-disk structures have not | ||
146 | * changed. | ||
147 | */ | ||
148 | |||
149 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
100 | { | 150 | { |
101 | struct gfs2_sbd *sdp = sb->s_fs_info; | 151 | unsigned int x; |
102 | 152 | ||
103 | sb->s_magic = GFS2_MAGIC; | 153 | if (sb->sb_magic != GFS2_MAGIC || |
104 | sb->s_op = &gfs2_super_ops; | 154 | sb->sb_type != GFS2_METATYPE_SB) { |
105 | sb->s_export_op = &gfs2_export_ops; | 155 | if (!silent) |
106 | sb->s_time_gran = 1; | 156 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); |
107 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 157 | return -EINVAL; |
158 | } | ||
159 | |||
160 | /* If format numbers match exactly, we're done. */ | ||
161 | |||
162 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
163 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
164 | return 0; | ||
165 | |||
166 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
167 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
168 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
169 | break; | ||
170 | |||
171 | if (!gfs2_old_fs_formats[x]) { | ||
172 | printk(KERN_WARNING | ||
173 | "GFS2: code version (%u, %u) is incompatible " | ||
174 | "with ondisk format (%u, %u)\n", | ||
175 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
176 | sb->sb_fs_format, sb->sb_multihost_format); | ||
177 | printk(KERN_WARNING | ||
178 | "GFS2: I don't know how to upgrade this FS\n"); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
184 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
185 | if (gfs2_old_multihost_formats[x] == | ||
186 | sb->sb_multihost_format) | ||
187 | break; | ||
188 | |||
189 | if (!gfs2_old_multihost_formats[x]) { | ||
190 | printk(KERN_WARNING | ||
191 | "GFS2: code version (%u, %u) is incompatible " | ||
192 | "with ondisk format (%u, %u)\n", | ||
193 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
194 | sb->sb_fs_format, sb->sb_multihost_format); | ||
195 | printk(KERN_WARNING | ||
196 | "GFS2: I don't know how to upgrade this FS\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | if (!sdp->sd_args.ar_upgrade) { | ||
202 | printk(KERN_WARNING | ||
203 | "GFS2: code version (%u, %u) is incompatible " | ||
204 | "with ondisk format (%u, %u)\n", | ||
205 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
206 | sb->sb_fs_format, sb->sb_multihost_format); | ||
207 | printk(KERN_INFO | ||
208 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
209 | "the FS\n"); | ||
210 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static void end_bio_io_page(struct bio *bio, int error) | ||
218 | { | ||
219 | struct page *page = bio->bi_private; | ||
108 | 220 | ||
109 | if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) | 221 | if (!error) |
110 | set_bit(noatime, &sdp->sd_flags); | 222 | SetPageUptodate(page); |
223 | else | ||
224 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
225 | unlock_page(page); | ||
226 | } | ||
227 | |||
228 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
229 | { | ||
230 | const struct gfs2_sb *str = buf; | ||
231 | |||
232 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
233 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
234 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
235 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
236 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
237 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
238 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
239 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
240 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
241 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
242 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
243 | |||
244 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
245 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * gfs2_read_super - Read the gfs2 super block from disk | ||
250 | * @sdp: The GFS2 super block | ||
251 | * @sector: The location of the super block | ||
252 | * @error: The error code to return | ||
253 | * | ||
254 | * This uses the bio functions to read the super block from disk | ||
255 | * because we want to be 100% sure that we never read cached data. | ||
256 | * A super block is read twice only during each GFS2 mount and is | ||
257 | * never written to by the filesystem. The first time its read no | ||
258 | * locks are held, and the only details which are looked at are those | ||
259 | * relating to the locking protocol. Once locking is up and working, | ||
260 | * the sb is read again under the lock to establish the location of | ||
261 | * the master directory (contains pointers to journals etc) and the | ||
262 | * root directory. | ||
263 | * | ||
264 | * Returns: 0 on success or error | ||
265 | */ | ||
266 | |||
267 | static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
268 | { | ||
269 | struct super_block *sb = sdp->sd_vfs; | ||
270 | struct gfs2_sb *p; | ||
271 | struct page *page; | ||
272 | struct bio *bio; | ||
273 | |||
274 | page = alloc_page(GFP_NOFS); | ||
275 | if (unlikely(!page)) | ||
276 | return -ENOBUFS; | ||
277 | |||
278 | ClearPageUptodate(page); | ||
279 | ClearPageDirty(page); | ||
280 | lock_page(page); | ||
281 | |||
282 | bio = bio_alloc(GFP_NOFS, 1); | ||
283 | if (unlikely(!bio)) { | ||
284 | __free_page(page); | ||
285 | return -ENOBUFS; | ||
286 | } | ||
111 | 287 | ||
112 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | 288 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
113 | sb->s_flags |= MS_NOATIME | MS_NODIRATIME; | 289 | bio->bi_bdev = sb->s_bdev; |
290 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
291 | |||
292 | bio->bi_end_io = end_bio_io_page; | ||
293 | bio->bi_private = page; | ||
294 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
295 | wait_on_page_locked(page); | ||
296 | bio_put(bio); | ||
297 | if (!PageUptodate(page)) { | ||
298 | __free_page(page); | ||
299 | return -EIO; | ||
300 | } | ||
301 | p = kmap(page); | ||
302 | gfs2_sb_in(&sdp->sd_sb, p); | ||
303 | kunmap(page); | ||
304 | __free_page(page); | ||
305 | return 0; | ||
306 | } | ||
307 | /** | ||
308 | * gfs2_read_sb - Read super block | ||
309 | * @sdp: The GFS2 superblock | ||
310 | * @gl: the glock for the superblock (assumed to be held) | ||
311 | * @silent: Don't print message if mount fails | ||
312 | * | ||
313 | */ | ||
314 | |||
315 | static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
316 | { | ||
317 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
318 | u32 tmp_blocks; | ||
319 | unsigned int x; | ||
320 | int error; | ||
321 | |||
322 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
323 | if (error) { | ||
324 | if (!silent) | ||
325 | fs_err(sdp, "can't read superblock\n"); | ||
326 | return error; | ||
327 | } | ||
328 | |||
329 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
330 | if (error) | ||
331 | return error; | ||
332 | |||
333 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
334 | GFS2_BASIC_BLOCK_SHIFT; | ||
335 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
336 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
337 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
338 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
339 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
340 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
341 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
342 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
343 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
344 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
345 | sizeof(struct gfs2_meta_header)) / | ||
346 | sizeof(struct gfs2_quota_change); | ||
347 | |||
348 | /* Compute maximum reservation required to add a entry to a directory */ | ||
349 | |||
350 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
351 | sdp->sd_jbsize); | ||
352 | |||
353 | ind_blocks = 0; | ||
354 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
355 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
356 | ind_blocks += tmp_blocks; | ||
357 | } | ||
358 | |||
359 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
360 | |||
361 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
362 | |||
363 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
364 | sizeof(struct gfs2_dinode); | ||
365 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
366 | for (x = 2;; x++) { | ||
367 | u64 space, d; | ||
368 | u32 m; | ||
369 | |||
370 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
371 | d = space; | ||
372 | m = do_div(d, sdp->sd_inptrs); | ||
373 | |||
374 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
375 | break; | ||
376 | sdp->sd_heightsize[x] = space; | ||
377 | } | ||
378 | sdp->sd_max_height = x; | ||
379 | sdp->sd_heightsize[x] = ~0; | ||
380 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
381 | |||
382 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
383 | sizeof(struct gfs2_dinode); | ||
384 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
385 | for (x = 2;; x++) { | ||
386 | u64 space, d; | ||
387 | u32 m; | ||
388 | |||
389 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
390 | d = space; | ||
391 | m = do_div(d, sdp->sd_inptrs); | ||
392 | |||
393 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
394 | break; | ||
395 | sdp->sd_jheightsize[x] = space; | ||
396 | } | ||
397 | sdp->sd_max_jheight = x; | ||
398 | sdp->sd_jheightsize[x] = ~0; | ||
399 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
400 | |||
401 | return 0; | ||
114 | } | 402 | } |
115 | 403 | ||
116 | static int init_names(struct gfs2_sbd *sdp, int silent) | 404 | static int init_names(struct gfs2_sbd *sdp, int silent) |
@@ -224,51 +512,59 @@ fail: | |||
224 | return error; | 512 | return error; |
225 | } | 513 | } |
226 | 514 | ||
227 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, | 515 | static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, |
228 | u64 no_addr) | 516 | u64 no_addr, const char *name) |
229 | { | 517 | { |
230 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | 518 | struct gfs2_sbd *sdp = sb->s_fs_info; |
519 | struct dentry *dentry; | ||
520 | struct inode *inode; | ||
521 | |||
522 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | ||
523 | if (IS_ERR(inode)) { | ||
524 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); | ||
525 | return PTR_ERR(inode); | ||
526 | } | ||
527 | dentry = d_alloc_root(inode); | ||
528 | if (!dentry) { | ||
529 | fs_err(sdp, "can't alloc %s dentry\n", name); | ||
530 | iput(inode); | ||
531 | return -ENOMEM; | ||
532 | } | ||
533 | dentry->d_op = &gfs2_dops; | ||
534 | *dptr = dentry; | ||
535 | return 0; | ||
231 | } | 536 | } |
232 | 537 | ||
233 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | 538 | static int init_sb(struct gfs2_sbd *sdp, int silent) |
234 | { | 539 | { |
235 | struct super_block *sb = sdp->sd_vfs; | 540 | struct super_block *sb = sdp->sd_vfs; |
236 | struct gfs2_holder sb_gh; | 541 | struct gfs2_holder sb_gh; |
237 | u64 no_addr; | 542 | u64 no_addr; |
238 | struct inode *inode; | 543 | int ret; |
239 | int error = 0; | ||
240 | 544 | ||
241 | if (undo) { | 545 | ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, |
242 | if (sb->s_root) { | 546 | LM_ST_SHARED, 0, &sb_gh); |
243 | dput(sb->s_root); | 547 | if (ret) { |
244 | sb->s_root = NULL; | 548 | fs_err(sdp, "can't acquire superblock glock: %d\n", ret); |
245 | } | 549 | return ret; |
246 | return 0; | ||
247 | } | 550 | } |
248 | 551 | ||
249 | error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, | 552 | ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); |
250 | LM_ST_SHARED, 0, &sb_gh); | 553 | if (ret) { |
251 | if (error) { | 554 | fs_err(sdp, "can't read superblock: %d\n", ret); |
252 | fs_err(sdp, "can't acquire superblock glock: %d\n", error); | ||
253 | return error; | ||
254 | } | ||
255 | |||
256 | error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); | ||
257 | if (error) { | ||
258 | fs_err(sdp, "can't read superblock: %d\n", error); | ||
259 | goto out; | 555 | goto out; |
260 | } | 556 | } |
261 | 557 | ||
262 | /* Set up the buffer cache and SB for real */ | 558 | /* Set up the buffer cache and SB for real */ |
263 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | 559 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { |
264 | error = -EINVAL; | 560 | ret = -EINVAL; |
265 | fs_err(sdp, "FS block size (%u) is too small for device " | 561 | fs_err(sdp, "FS block size (%u) is too small for device " |
266 | "block size (%u)\n", | 562 | "block size (%u)\n", |
267 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | 563 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); |
268 | goto out; | 564 | goto out; |
269 | } | 565 | } |
270 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | 566 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { |
271 | error = -EINVAL; | 567 | ret = -EINVAL; |
272 | fs_err(sdp, "FS block size (%u) is too big for machine " | 568 | fs_err(sdp, "FS block size (%u) is too big for machine " |
273 | "page size (%u)\n", | 569 | "page size (%u)\n", |
274 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); | 570 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); |
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | |||
278 | 574 | ||
279 | /* Get the root inode */ | 575 | /* Get the root inode */ |
280 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; | 576 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; |
281 | if (sb->s_type == &gfs2meta_fs_type) | 577 | ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); |
282 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; | 578 | if (ret) |
283 | inode = gfs2_lookup_root(sb, no_addr); | ||
284 | if (IS_ERR(inode)) { | ||
285 | error = PTR_ERR(inode); | ||
286 | fs_err(sdp, "can't read in root inode: %d\n", error); | ||
287 | goto out; | 579 | goto out; |
288 | } | ||
289 | 580 | ||
290 | sb->s_root = d_alloc_root(inode); | 581 | /* Get the master inode */ |
291 | if (!sb->s_root) { | 582 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; |
292 | fs_err(sdp, "can't get root dentry\n"); | 583 | ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); |
293 | error = -ENOMEM; | 584 | if (ret) { |
294 | iput(inode); | 585 | dput(sdp->sd_root_dir); |
295 | } else | 586 | goto out; |
296 | sb->s_root->d_op = &gfs2_dops; | 587 | } |
297 | 588 | sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); | |
298 | out: | 589 | out: |
299 | gfs2_glock_dq_uninit(&sb_gh); | 590 | gfs2_glock_dq_uninit(&sb_gh); |
300 | return error; | 591 | return ret; |
301 | } | 592 | } |
302 | 593 | ||
303 | /** | 594 | /** |
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | |||
372 | 663 | ||
373 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 664 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
374 | { | 665 | { |
666 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
375 | struct gfs2_holder ji_gh; | 667 | struct gfs2_holder ji_gh; |
376 | struct task_struct *p; | 668 | struct task_struct *p; |
377 | struct gfs2_inode *ip; | 669 | struct gfs2_inode *ip; |
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
383 | goto fail_recoverd; | 675 | goto fail_recoverd; |
384 | } | 676 | } |
385 | 677 | ||
386 | sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); | 678 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); |
387 | if (IS_ERR(sdp->sd_jindex)) { | 679 | if (IS_ERR(sdp->sd_jindex)) { |
388 | fs_err(sdp, "can't lookup journal index: %d\n", error); | 680 | fs_err(sdp, "can't lookup journal index: %d\n", error); |
389 | return PTR_ERR(sdp->sd_jindex); | 681 | return PTR_ERR(sdp->sd_jindex); |
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
506 | { | 798 | { |
507 | int error = 0; | 799 | int error = 0; |
508 | struct gfs2_inode *ip; | 800 | struct gfs2_inode *ip; |
509 | struct inode *inode; | 801 | struct inode *master = sdp->sd_master_dir->d_inode; |
510 | 802 | ||
511 | if (undo) | 803 | if (undo) |
512 | goto fail_qinode; | 804 | goto fail_qinode; |
513 | 805 | ||
514 | inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); | ||
515 | if (IS_ERR(inode)) { | ||
516 | error = PTR_ERR(inode); | ||
517 | fs_err(sdp, "can't read in master directory: %d\n", error); | ||
518 | goto fail; | ||
519 | } | ||
520 | sdp->sd_master_dir = inode; | ||
521 | |||
522 | error = init_journal(sdp, undo); | 806 | error = init_journal(sdp, undo); |
523 | if (error) | 807 | if (error) |
524 | goto fail_master; | 808 | goto fail; |
525 | 809 | ||
526 | /* Read in the master inode number inode */ | 810 | /* Read in the master inode number inode */ |
527 | sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); | 811 | sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); |
528 | if (IS_ERR(sdp->sd_inum_inode)) { | 812 | if (IS_ERR(sdp->sd_inum_inode)) { |
529 | error = PTR_ERR(sdp->sd_inum_inode); | 813 | error = PTR_ERR(sdp->sd_inum_inode); |
530 | fs_err(sdp, "can't read in inum inode: %d\n", error); | 814 | fs_err(sdp, "can't read in inum inode: %d\n", error); |
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
533 | 817 | ||
534 | 818 | ||
535 | /* Read in the master statfs inode */ | 819 | /* Read in the master statfs inode */ |
536 | sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); | 820 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); |
537 | if (IS_ERR(sdp->sd_statfs_inode)) { | 821 | if (IS_ERR(sdp->sd_statfs_inode)) { |
538 | error = PTR_ERR(sdp->sd_statfs_inode); | 822 | error = PTR_ERR(sdp->sd_statfs_inode); |
539 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | 823 | fs_err(sdp, "can't read in statfs inode: %d\n", error); |
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
541 | } | 825 | } |
542 | 826 | ||
543 | /* Read in the resource index inode */ | 827 | /* Read in the resource index inode */ |
544 | sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); | 828 | sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); |
545 | if (IS_ERR(sdp->sd_rindex)) { | 829 | if (IS_ERR(sdp->sd_rindex)) { |
546 | error = PTR_ERR(sdp->sd_rindex); | 830 | error = PTR_ERR(sdp->sd_rindex); |
547 | fs_err(sdp, "can't get resource index inode: %d\n", error); | 831 | fs_err(sdp, "can't get resource index inode: %d\n", error); |
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
552 | sdp->sd_rindex_uptodate = 0; | 836 | sdp->sd_rindex_uptodate = 0; |
553 | 837 | ||
554 | /* Read in the quota inode */ | 838 | /* Read in the quota inode */ |
555 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 839 | sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); |
556 | if (IS_ERR(sdp->sd_quota_inode)) { | 840 | if (IS_ERR(sdp->sd_quota_inode)) { |
557 | error = PTR_ERR(sdp->sd_quota_inode); | 841 | error = PTR_ERR(sdp->sd_quota_inode); |
558 | fs_err(sdp, "can't get quota file inode: %d\n", error); | 842 | fs_err(sdp, "can't get quota file inode: %d\n", error); |
@@ -571,8 +855,6 @@ fail_inum: | |||
571 | iput(sdp->sd_inum_inode); | 855 | iput(sdp->sd_inum_inode); |
572 | fail_journal: | 856 | fail_journal: |
573 | init_journal(sdp, UNDO); | 857 | init_journal(sdp, UNDO); |
574 | fail_master: | ||
575 | iput(sdp->sd_master_dir); | ||
576 | fail: | 858 | fail: |
577 | return error; | 859 | return error; |
578 | } | 860 | } |
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
583 | char buf[30]; | 865 | char buf[30]; |
584 | int error = 0; | 866 | int error = 0; |
585 | struct gfs2_inode *ip; | 867 | struct gfs2_inode *ip; |
868 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
586 | 869 | ||
587 | if (sdp->sd_args.ar_spectator) | 870 | if (sdp->sd_args.ar_spectator) |
588 | return 0; | 871 | return 0; |
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
590 | if (undo) | 873 | if (undo) |
591 | goto fail_qc_gh; | 874 | goto fail_qc_gh; |
592 | 875 | ||
593 | pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); | 876 | pn = gfs2_lookup_simple(master, "per_node"); |
594 | if (IS_ERR(pn)) { | 877 | if (IS_ERR(pn)) { |
595 | error = PTR_ERR(pn); | 878 | error = PTR_ERR(pn); |
596 | fs_err(sdp, "can't find per_node directory: %d\n", error); | 879 | fs_err(sdp, "can't find per_node directory: %d\n", error); |
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
800 | goto fail; | 1083 | goto fail; |
801 | } | 1084 | } |
802 | 1085 | ||
803 | init_vfs(sb, SDF_NOATIME); | 1086 | sb->s_magic = GFS2_MAGIC; |
1087 | sb->s_op = &gfs2_super_ops; | ||
1088 | sb->s_export_op = &gfs2_export_ops; | ||
1089 | sb->s_time_gran = 1; | ||
1090 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
804 | 1091 | ||
805 | /* Set up the buffer cache and fill in some fake block size values | 1092 | /* Set up the buffer cache and fill in some fake block size values |
806 | to allow us to read-in the on-disk superblock. */ | 1093 | to allow us to read-in the on-disk superblock. */ |
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
828 | if (error) | 1115 | if (error) |
829 | goto fail_lm; | 1116 | goto fail_lm; |
830 | 1117 | ||
831 | error = init_sb(sdp, silent, DO); | 1118 | error = init_sb(sdp, silent); |
832 | if (error) | 1119 | if (error) |
833 | goto fail_locking; | 1120 | goto fail_locking; |
834 | 1121 | ||
@@ -869,7 +1156,11 @@ fail_per_node: | |||
869 | fail_inodes: | 1156 | fail_inodes: |
870 | init_inodes(sdp, UNDO); | 1157 | init_inodes(sdp, UNDO); |
871 | fail_sb: | 1158 | fail_sb: |
872 | init_sb(sdp, 0, UNDO); | 1159 | if (sdp->sd_root_dir) |
1160 | dput(sdp->sd_root_dir); | ||
1161 | if (sdp->sd_master_dir) | ||
1162 | dput(sdp->sd_master_dir); | ||
1163 | sb->s_root = NULL; | ||
873 | fail_locking: | 1164 | fail_locking: |
874 | init_locking(sdp, &mount_gh, UNDO); | 1165 | init_locking(sdp, &mount_gh, UNDO); |
875 | fail_lm: | 1166 | fail_lm: |
@@ -887,151 +1178,63 @@ fail: | |||
887 | } | 1178 | } |
888 | 1179 | ||
889 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | 1180 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, |
890 | const char *dev_name, void *data, struct vfsmount *mnt) | 1181 | const char *dev_name, void *data, struct vfsmount *mnt) |
891 | { | 1182 | { |
892 | struct super_block *sb; | 1183 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); |
893 | struct gfs2_sbd *sdp; | ||
894 | int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | ||
895 | if (error) | ||
896 | goto out; | ||
897 | sb = mnt->mnt_sb; | ||
898 | sdp = sb->s_fs_info; | ||
899 | sdp->sd_gfs2mnt = mnt; | ||
900 | out: | ||
901 | return error; | ||
902 | } | 1184 | } |
903 | 1185 | ||
904 | static int fill_super_meta(struct super_block *sb, struct super_block *new, | 1186 | static struct super_block *get_gfs2_sb(const char *dev_name) |
905 | void *data, int silent) | ||
906 | { | 1187 | { |
907 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1188 | struct super_block *sb; |
908 | struct inode *inode; | ||
909 | int error = 0; | ||
910 | |||
911 | new->s_fs_info = sdp; | ||
912 | sdp->sd_vfs_meta = sb; | ||
913 | |||
914 | init_vfs(new, SDF_NOATIME); | ||
915 | |||
916 | /* Get the master inode */ | ||
917 | inode = igrab(sdp->sd_master_dir); | ||
918 | |||
919 | new->s_root = d_alloc_root(inode); | ||
920 | if (!new->s_root) { | ||
921 | fs_err(sdp, "can't get root dentry\n"); | ||
922 | error = -ENOMEM; | ||
923 | iput(inode); | ||
924 | } else | ||
925 | new->s_root->d_op = &gfs2_dops; | ||
926 | |||
927 | return error; | ||
928 | } | ||
929 | |||
930 | static int set_bdev_super(struct super_block *s, void *data) | ||
931 | { | ||
932 | s->s_bdev = data; | ||
933 | s->s_dev = s->s_bdev->bd_dev; | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static int test_bdev_super(struct super_block *s, void *data) | ||
938 | { | ||
939 | return s->s_bdev == data; | ||
940 | } | ||
941 | |||
942 | static struct super_block* get_gfs2_sb(const char *dev_name) | ||
943 | { | ||
944 | struct kstat stat; | ||
945 | struct nameidata nd; | 1189 | struct nameidata nd; |
946 | struct super_block *sb = NULL, *s; | ||
947 | int error; | 1190 | int error; |
948 | 1191 | ||
949 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | 1192 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); |
950 | if (error) { | 1193 | if (error) { |
951 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", | 1194 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", |
952 | dev_name); | 1195 | dev_name, error); |
953 | goto out; | 1196 | return NULL; |
954 | } | ||
955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | ||
956 | |||
957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { | ||
958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | ||
959 | (S_ISDIR(stat.mode) && | ||
960 | s == nd.path.dentry->d_inode->i_sb)) { | ||
961 | sb = s; | ||
962 | goto free_nd; | ||
963 | } | ||
964 | } | 1197 | } |
965 | 1198 | sb = nd.path.dentry->d_inode->i_sb; | |
966 | printk(KERN_WARNING "GFS2: Unrecognized block device or " | 1199 | if (sb && (sb->s_type == &gfs2_fs_type)) |
967 | "mount point %s\n", dev_name); | 1200 | atomic_inc(&sb->s_active); |
968 | 1201 | else | |
969 | free_nd: | 1202 | sb = NULL; |
970 | path_put(&nd.path); | 1203 | path_put(&nd.path); |
971 | out: | ||
972 | return sb; | 1204 | return sb; |
973 | } | 1205 | } |
974 | 1206 | ||
975 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | 1207 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, |
976 | const char *dev_name, void *data, struct vfsmount *mnt) | 1208 | const char *dev_name, void *data, struct vfsmount *mnt) |
977 | { | 1209 | { |
978 | int error = 0; | 1210 | struct super_block *sb = NULL; |
979 | struct super_block *sb = NULL, *new; | ||
980 | struct gfs2_sbd *sdp; | 1211 | struct gfs2_sbd *sdp; |
981 | 1212 | ||
982 | sb = get_gfs2_sb(dev_name); | 1213 | sb = get_gfs2_sb(dev_name); |
983 | if (!sb) { | 1214 | if (!sb) { |
984 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1215 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); |
985 | error = -ENOENT; | 1216 | return -ENOENT; |
986 | goto error; | ||
987 | } | 1217 | } |
988 | sdp = sb->s_fs_info; | 1218 | sdp = sb->s_fs_info; |
989 | if (sdp->sd_vfs_meta) { | 1219 | mnt->mnt_sb = sb; |
990 | printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); | 1220 | mnt->mnt_root = dget(sdp->sd_master_dir); |
991 | error = -EBUSY; | 1221 | return 0; |
992 | goto error; | ||
993 | } | ||
994 | down(&sb->s_bdev->bd_mount_sem); | ||
995 | new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); | ||
996 | up(&sb->s_bdev->bd_mount_sem); | ||
997 | if (IS_ERR(new)) { | ||
998 | error = PTR_ERR(new); | ||
999 | goto error; | ||
1000 | } | ||
1001 | new->s_flags = flags; | ||
1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | ||
1003 | sb_set_blocksize(new, sb->s_blocksize); | ||
1004 | error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); | ||
1005 | if (error) { | ||
1006 | up_write(&new->s_umount); | ||
1007 | deactivate_super(new); | ||
1008 | goto error; | ||
1009 | } | ||
1010 | |||
1011 | new->s_flags |= MS_ACTIVE; | ||
1012 | |||
1013 | /* Grab a reference to the gfs2 mount point */ | ||
1014 | atomic_inc(&sdp->sd_gfs2mnt->mnt_count); | ||
1015 | return simple_set_mnt(mnt, new); | ||
1016 | error: | ||
1017 | return error; | ||
1018 | } | 1222 | } |
1019 | 1223 | ||
1020 | static void gfs2_kill_sb(struct super_block *sb) | 1224 | static void gfs2_kill_sb(struct super_block *sb) |
1021 | { | 1225 | { |
1022 | if (sb->s_fs_info) { | 1226 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1023 | gfs2_delete_debugfs_file(sb->s_fs_info); | 1227 | if (sdp) { |
1024 | gfs2_meta_syncfs(sb->s_fs_info); | 1228 | gfs2_meta_syncfs(sdp); |
1229 | dput(sdp->sd_root_dir); | ||
1230 | dput(sdp->sd_master_dir); | ||
1231 | sdp->sd_root_dir = NULL; | ||
1232 | sdp->sd_master_dir = NULL; | ||
1025 | } | 1233 | } |
1234 | shrink_dcache_sb(sb); | ||
1026 | kill_block_super(sb); | 1235 | kill_block_super(sb); |
1027 | } | 1236 | if (sdp) |
1028 | 1237 | gfs2_delete_debugfs_file(sdp); | |
1029 | static void gfs2_kill_sb_meta(struct super_block *sb) | ||
1030 | { | ||
1031 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
1032 | generic_shutdown_super(sb); | ||
1033 | sdp->sd_vfs_meta = NULL; | ||
1034 | atomic_dec(&sdp->sd_gfs2mnt->mnt_count); | ||
1035 | } | 1238 | } |
1036 | 1239 | ||
1037 | struct file_system_type gfs2_fs_type = { | 1240 | struct file_system_type gfs2_fs_type = { |
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = { | |||
1046 | .name = "gfs2meta", | 1249 | .name = "gfs2meta", |
1047 | .fs_flags = FS_REQUIRES_DEV, | 1250 | .fs_flags = FS_REQUIRES_DEV, |
1048 | .get_sb = gfs2_get_sb_meta, | 1251 | .get_sb = gfs2_get_sb_meta, |
1049 | .kill_sb = gfs2_kill_sb_meta, | ||
1050 | .owner = THIS_MODULE, | 1252 | .owner = THIS_MODULE, |
1051 | }; | 1253 | }; |
1052 | 1254 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e2c62f73a778..534e1e2c65ca 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
161 | 161 | ||
162 | error = gfs2_glock_nq_m(2, ghs); | 162 | error = gfs2_glock_nq(ghs); /* parent */ |
163 | if (error) | 163 | if (error) |
164 | goto out; | 164 | goto out_parent; |
165 | |||
166 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
167 | if (error) | ||
168 | goto out_child; | ||
165 | 169 | ||
166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); | 170 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
167 | if (error) | 171 | if (error) |
@@ -245,8 +249,10 @@ out_alloc: | |||
245 | if (alloc_required) | 249 | if (alloc_required) |
246 | gfs2_alloc_put(dip); | 250 | gfs2_alloc_put(dip); |
247 | out_gunlock: | 251 | out_gunlock: |
248 | gfs2_glock_dq_m(2, ghs); | 252 | gfs2_glock_dq(ghs + 1); |
249 | out: | 253 | out_child: |
254 | gfs2_glock_dq(ghs); | ||
255 | out_parent: | ||
250 | gfs2_holder_uninit(ghs); | 256 | gfs2_holder_uninit(ghs); |
251 | gfs2_holder_uninit(ghs + 1); | 257 | gfs2_holder_uninit(ghs + 1); |
252 | if (!error) { | 258 | if (!error) { |
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
302 | 308 | ||
303 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 309 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
304 | if (error) | 310 | if (error) |
305 | goto out_rgrp; | 311 | goto out_gunlock; |
306 | 312 | ||
307 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | 313 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); |
308 | if (error) | 314 | if (error) |
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
316 | 322 | ||
317 | out_end_trans: | 323 | out_end_trans: |
318 | gfs2_trans_end(sdp); | 324 | gfs2_trans_end(sdp); |
325 | out_gunlock: | ||
319 | gfs2_glock_dq(ghs + 2); | 326 | gfs2_glock_dq(ghs + 2); |
320 | out_rgrp: | 327 | out_rgrp: |
321 | gfs2_holder_uninit(ghs + 2); | 328 | gfs2_holder_uninit(ghs + 2); |
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
485 | struct gfs2_holder ri_gh; | 492 | struct gfs2_holder ri_gh; |
486 | int error; | 493 | int error; |
487 | 494 | ||
488 | |||
489 | error = gfs2_rindex_hold(sdp, &ri_gh); | 495 | error = gfs2_rindex_hold(sdp, &ri_gh); |
490 | if (error) | 496 | if (error) |
491 | return error; | 497 | return error; |
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
495 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); | 501 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); |
496 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | 502 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); |
497 | 503 | ||
498 | error = gfs2_glock_nq_m(3, ghs); | 504 | error = gfs2_glock_nq(ghs); /* parent */ |
499 | if (error) | 505 | if (error) |
500 | goto out; | 506 | goto out_parent; |
507 | |||
508 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
509 | if (error) | ||
510 | goto out_child; | ||
511 | |||
512 | error = gfs2_glock_nq(ghs + 2); /* rgrp */ | ||
513 | if (error) | ||
514 | goto out_rgrp; | ||
501 | 515 | ||
502 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 516 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
503 | if (error) | 517 | if (error) |
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
523 | gfs2_trans_end(sdp); | 537 | gfs2_trans_end(sdp); |
524 | 538 | ||
525 | out_gunlock: | 539 | out_gunlock: |
526 | gfs2_glock_dq_m(3, ghs); | 540 | gfs2_glock_dq(ghs + 2); |
527 | out: | 541 | out_rgrp: |
528 | gfs2_holder_uninit(ghs); | ||
529 | gfs2_holder_uninit(ghs + 1); | ||
530 | gfs2_holder_uninit(ghs + 2); | 542 | gfs2_holder_uninit(ghs + 2); |
543 | gfs2_glock_dq(ghs + 1); | ||
544 | out_child: | ||
545 | gfs2_holder_uninit(ghs + 1); | ||
546 | gfs2_glock_dq(ghs); | ||
547 | out_parent: | ||
548 | gfs2_holder_uninit(ghs); | ||
531 | gfs2_glock_dq_uninit(&ri_gh); | 549 | gfs2_glock_dq_uninit(&ri_gh); |
532 | return error; | 550 | return error; |
533 | } | 551 | } |
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
571 | return 0; | 589 | return 0; |
572 | } | 590 | } |
573 | 591 | ||
592 | /* | ||
593 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
594 | * @this: move this | ||
595 | * @to: to here | ||
596 | * | ||
597 | * Follow @to back to the root and make sure we don't encounter @this | ||
598 | * Assumes we already hold the rename lock. | ||
599 | * | ||
600 | * Returns: errno | ||
601 | */ | ||
602 | |||
603 | static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
604 | { | ||
605 | struct inode *dir = &to->i_inode; | ||
606 | struct super_block *sb = dir->i_sb; | ||
607 | struct inode *tmp; | ||
608 | struct qstr dotdot; | ||
609 | int error = 0; | ||
610 | |||
611 | gfs2_str2qstr(&dotdot, ".."); | ||
612 | |||
613 | igrab(dir); | ||
614 | |||
615 | for (;;) { | ||
616 | if (dir == &this->i_inode) { | ||
617 | error = -EINVAL; | ||
618 | break; | ||
619 | } | ||
620 | if (dir == sb->s_root->d_inode) { | ||
621 | error = 0; | ||
622 | break; | ||
623 | } | ||
624 | |||
625 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
626 | if (IS_ERR(tmp)) { | ||
627 | error = PTR_ERR(tmp); | ||
628 | break; | ||
629 | } | ||
630 | |||
631 | iput(dir); | ||
632 | dir = tmp; | ||
633 | } | ||
634 | |||
635 | iput(dir); | ||
636 | |||
637 | return error; | ||
638 | } | ||
639 | |||
574 | /** | 640 | /** |
575 | * gfs2_rename - Rename a file | 641 | * gfs2_rename - Rename a file |
576 | * @odir: Parent directory of old file name | 642 | * @odir: Parent directory of old file name |
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
589 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 655 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
590 | struct gfs2_inode *nip = NULL; | 656 | struct gfs2_inode *nip = NULL; |
591 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 657 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
592 | struct gfs2_holder ghs[5], r_gh; | 658 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; |
593 | struct gfs2_rgrpd *nrgd; | 659 | struct gfs2_rgrpd *nrgd; |
594 | unsigned int num_gh; | 660 | unsigned int num_gh; |
595 | int dir_rename = 0; | 661 | int dir_rename = 0; |
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
603 | return 0; | 669 | return 0; |
604 | } | 670 | } |
605 | 671 | ||
606 | /* Make sure we aren't trying to move a dirctory into it's subdir */ | ||
607 | |||
608 | if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { | ||
609 | dir_rename = 1; | ||
610 | 672 | ||
611 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, | 673 | if (odip != ndip) { |
612 | &r_gh); | 674 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
675 | 0, &r_gh); | ||
613 | if (error) | 676 | if (error) |
614 | goto out; | 677 | goto out; |
615 | 678 | ||
616 | error = gfs2_ok_to_move(ip, ndip); | 679 | if (S_ISDIR(ip->i_inode.i_mode)) { |
617 | if (error) | 680 | dir_rename = 1; |
618 | goto out_gunlock_r; | 681 | /* don't move a dirctory into it's subdir */ |
682 | error = gfs2_ok_to_move(ip, ndip); | ||
683 | if (error) | ||
684 | goto out_gunlock_r; | ||
685 | } | ||
619 | } | 686 | } |
620 | 687 | ||
621 | num_gh = 1; | 688 | num_gh = 1; |
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
639 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | 706 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); |
640 | } | 707 | } |
641 | 708 | ||
642 | error = gfs2_glock_nq_m(num_gh, ghs); | 709 | for (x = 0; x < num_gh; x++) { |
643 | if (error) | 710 | error = gfs2_glock_nq(ghs + x); |
644 | goto out_uninit; | 711 | if (error) |
712 | goto out_gunlock; | ||
713 | } | ||
645 | 714 | ||
646 | /* Check out the old directory */ | 715 | /* Check out the old directory */ |
647 | 716 | ||
@@ -804,12 +873,12 @@ out_alloc: | |||
804 | if (alloc_required) | 873 | if (alloc_required) |
805 | gfs2_alloc_put(ndip); | 874 | gfs2_alloc_put(ndip); |
806 | out_gunlock: | 875 | out_gunlock: |
807 | gfs2_glock_dq_m(num_gh, ghs); | 876 | while (x--) { |
808 | out_uninit: | 877 | gfs2_glock_dq(ghs + x); |
809 | for (x = 0; x < num_gh; x++) | ||
810 | gfs2_holder_uninit(ghs + x); | 878 | gfs2_holder_uninit(ghs + x); |
879 | } | ||
811 | out_gunlock_r: | 880 | out_gunlock_r: |
812 | if (dir_rename) | 881 | if (r_gh.gh_gl) |
813 | gfs2_glock_dq_uninit(&r_gh); | 882 | gfs2_glock_dq_uninit(&r_gh); |
814 | out: | 883 | out: |
815 | return error; | 884 | return error; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f66ea0f7a356..d5355d9b5926 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/time.h> | ||
23 | 24 | ||
24 | #include "gfs2.h" | 25 | #include "gfs2.h" |
25 | #include "incore.h" | 26 | #include "incore.h" |
@@ -38,6 +39,7 @@ | |||
38 | #include "dir.h" | 39 | #include "dir.h" |
39 | #include "eattr.h" | 40 | #include "eattr.h" |
40 | #include "bmap.h" | 41 | #include "bmap.h" |
42 | #include "meta_io.h" | ||
41 | 43 | ||
42 | /** | 44 | /** |
43 | * gfs2_write_inode - Make sure the inode is stable on the disk | 45 | * gfs2_write_inode - Make sure the inode is stable on the disk |
@@ -50,16 +52,74 @@ | |||
50 | static int gfs2_write_inode(struct inode *inode, int sync) | 52 | static int gfs2_write_inode(struct inode *inode, int sync) |
51 | { | 53 | { |
52 | struct gfs2_inode *ip = GFS2_I(inode); | 54 | struct gfs2_inode *ip = GFS2_I(inode); |
53 | 55 | struct gfs2_sbd *sdp = GFS2_SB(inode); | |
54 | /* Check this is a "normal" inode */ | 56 | struct gfs2_holder gh; |
55 | if (test_bit(GIF_USER, &ip->i_flags)) { | 57 | struct buffer_head *bh; |
56 | if (current->flags & PF_MEMALLOC) | 58 | struct timespec atime; |
57 | return 0; | 59 | struct gfs2_dinode *di; |
58 | if (sync) | 60 | int ret = 0; |
59 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | 61 | |
62 | /* Check this is a "normal" inode, etc */ | ||
63 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
64 | (current->flags & PF_MEMALLOC)) | ||
65 | return 0; | ||
66 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
67 | if (ret) | ||
68 | goto do_flush; | ||
69 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
70 | if (ret) | ||
71 | goto do_unlock; | ||
72 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
73 | if (ret == 0) { | ||
74 | di = (struct gfs2_dinode *)bh->b_data; | ||
75 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
76 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
77 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
78 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
79 | gfs2_dinode_out(ip, bh->b_data); | ||
80 | } | ||
81 | brelse(bh); | ||
60 | } | 82 | } |
83 | gfs2_trans_end(sdp); | ||
84 | do_unlock: | ||
85 | gfs2_glock_dq_uninit(&gh); | ||
86 | do_flush: | ||
87 | if (sync != 0) | ||
88 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
89 | return ret; | ||
90 | } | ||
61 | 91 | ||
62 | return 0; | 92 | /** |
93 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
94 | * @sdp: the filesystem | ||
95 | * | ||
96 | * Returns: errno | ||
97 | */ | ||
98 | |||
99 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
100 | { | ||
101 | struct gfs2_holder t_gh; | ||
102 | int error; | ||
103 | |||
104 | gfs2_quota_sync(sdp); | ||
105 | gfs2_statfs_sync(sdp); | ||
106 | |||
107 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
108 | &t_gh); | ||
109 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
110 | return error; | ||
111 | |||
112 | gfs2_meta_syncfs(sdp); | ||
113 | gfs2_log_shutdown(sdp); | ||
114 | |||
115 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
116 | |||
117 | if (t_gh.gh_gl) | ||
118 | gfs2_glock_dq_uninit(&t_gh); | ||
119 | |||
120 | gfs2_quota_cleanup(sdp); | ||
121 | |||
122 | return error; | ||
63 | } | 123 | } |
64 | 124 | ||
65 | /** | 125 | /** |
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
73 | struct gfs2_sbd *sdp = sb->s_fs_info; | 133 | struct gfs2_sbd *sdp = sb->s_fs_info; |
74 | int error; | 134 | int error; |
75 | 135 | ||
76 | if (!sdp) | ||
77 | return; | ||
78 | |||
79 | if (!strncmp(sb->s_type->name, "gfs2meta", 8)) | ||
80 | return; /* Nothing to do */ | ||
81 | |||
82 | /* Unfreeze the filesystem, if we need to */ | 136 | /* Unfreeze the filesystem, if we need to */ |
83 | 137 | ||
84 | mutex_lock(&sdp->sd_freeze_lock); | 138 | mutex_lock(&sdp->sd_freeze_lock); |
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
101 | 155 | ||
102 | /* Release stuff */ | 156 | /* Release stuff */ |
103 | 157 | ||
104 | iput(sdp->sd_master_dir); | ||
105 | iput(sdp->sd_jindex); | 158 | iput(sdp->sd_jindex); |
106 | iput(sdp->sd_inum_inode); | 159 | iput(sdp->sd_inum_inode); |
107 | iput(sdp->sd_statfs_inode); | 160 | iput(sdp->sd_statfs_inode); |
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
152 | * | 205 | * |
153 | * Flushes the log to disk. | 206 | * Flushes the log to disk. |
154 | */ | 207 | */ |
208 | |||
155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 209 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
156 | { | 210 | { |
157 | sb->s_dirt = 0; | 211 | sb->s_dirt = 0; |
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
270 | } | 324 | } |
271 | } | 325 | } |
272 | 326 | ||
273 | if (*flags & (MS_NOATIME | MS_NODIRATIME)) | ||
274 | set_bit(SDF_NOATIME, &sdp->sd_flags); | ||
275 | else | ||
276 | clear_bit(SDF_NOATIME, &sdp->sd_flags); | ||
277 | |||
278 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
279 | *flags |= MS_NOATIME | MS_NODIRATIME; | ||
280 | |||
281 | return error; | 327 | return error; |
282 | } | 328 | } |
283 | 329 | ||
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
295 | * inode's blocks, or alternatively pass the baton on to another | 341 | * inode's blocks, or alternatively pass the baton on to another |
296 | * node for later deallocation. | 342 | * node for later deallocation. |
297 | */ | 343 | */ |
344 | |||
298 | static void gfs2_drop_inode(struct inode *inode) | 345 | static void gfs2_drop_inode(struct inode *inode) |
299 | { | 346 | { |
300 | struct gfs2_inode *ip = GFS2_I(inode); | 347 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode) | |||
333 | } | 380 | } |
334 | } | 381 | } |
335 | 382 | ||
383 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
384 | { | ||
385 | do { | ||
386 | if (d1 == d2) | ||
387 | return 1; | ||
388 | d1 = d1->d_parent; | ||
389 | } while (!IS_ROOT(d1)); | ||
390 | return 0; | ||
391 | } | ||
392 | |||
336 | /** | 393 | /** |
337 | * gfs2_show_options - Show mount options for /proc/mounts | 394 | * gfs2_show_options - Show mount options for /proc/mounts |
338 | * @s: seq_file structure | 395 | * @s: seq_file structure |
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
346 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | 403 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; |
347 | struct gfs2_args *args = &sdp->sd_args; | 404 | struct gfs2_args *args = &sdp->sd_args; |
348 | 405 | ||
406 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
407 | seq_printf(s, ",meta"); | ||
349 | if (args->ar_lockproto[0]) | 408 | if (args->ar_lockproto[0]) |
350 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | 409 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); |
351 | if (args->ar_locktable[0]) | 410 | if (args->ar_locktable[0]) |
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
414 | * conversion on the iopen lock, but we can change that later. This | 473 | * conversion on the iopen lock, but we can change that later. This |
415 | * is safe, just less efficient. | 474 | * is safe, just less efficient. |
416 | */ | 475 | */ |
476 | |||
417 | static void gfs2_delete_inode(struct inode *inode) | 477 | static void gfs2_delete_inode(struct inode *inode) |
418 | { | 478 | { |
419 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 479 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
@@ -478,8 +538,6 @@ out: | |||
478 | clear_inode(inode); | 538 | clear_inode(inode); |
479 | } | 539 | } |
480 | 540 | ||
481 | |||
482 | |||
483 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 541 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
484 | { | 542 | { |
485 | struct gfs2_inode *ip; | 543 | struct gfs2_inode *ip; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca831991cbc2..c3ba3d9d0aac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -33,313 +33,6 @@ | |||
33 | #include "trans.h" | 33 | #include "trans.h" |
34 | #include "util.h" | 34 | #include "util.h" |
35 | 35 | ||
36 | static const u32 gfs2_old_fs_formats[] = { | ||
37 | 0 | ||
38 | }; | ||
39 | |||
40 | static const u32 gfs2_old_multihost_formats[] = { | ||
41 | 0 | ||
42 | }; | ||
43 | |||
44 | /** | ||
45 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
46 | * @gt: tune | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | void gfs2_tune_init(struct gfs2_tune *gt) | ||
51 | { | ||
52 | spin_lock_init(>->gt_spin); | ||
53 | |||
54 | gt->gt_demote_secs = 300; | ||
55 | gt->gt_incore_log_blocks = 1024; | ||
56 | gt->gt_log_flush_secs = 60; | ||
57 | gt->gt_recoverd_secs = 60; | ||
58 | gt->gt_logd_secs = 1; | ||
59 | gt->gt_quotad_secs = 5; | ||
60 | gt->gt_quota_simul_sync = 64; | ||
61 | gt->gt_quota_warn_period = 10; | ||
62 | gt->gt_quota_scale_num = 1; | ||
63 | gt->gt_quota_scale_den = 1; | ||
64 | gt->gt_quota_cache_secs = 300; | ||
65 | gt->gt_quota_quantum = 60; | ||
66 | gt->gt_atime_quantum = 3600; | ||
67 | gt->gt_new_files_jdata = 0; | ||
68 | gt->gt_max_readahead = 1 << 18; | ||
69 | gt->gt_stall_secs = 600; | ||
70 | gt->gt_complain_secs = 10; | ||
71 | gt->gt_statfs_quantum = 30; | ||
72 | gt->gt_statfs_slow = 0; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * gfs2_check_sb - Check superblock | ||
77 | * @sdp: the filesystem | ||
78 | * @sb: The superblock | ||
79 | * @silent: Don't print a message if the check fails | ||
80 | * | ||
81 | * Checks the version code of the FS is one that we understand how to | ||
82 | * read and that the sizes of the various on-disk structures have not | ||
83 | * changed. | ||
84 | */ | ||
85 | |||
86 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
87 | { | ||
88 | unsigned int x; | ||
89 | |||
90 | if (sb->sb_magic != GFS2_MAGIC || | ||
91 | sb->sb_type != GFS2_METATYPE_SB) { | ||
92 | if (!silent) | ||
93 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | ||
94 | return -EINVAL; | ||
95 | } | ||
96 | |||
97 | /* If format numbers match exactly, we're done. */ | ||
98 | |||
99 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
100 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
101 | return 0; | ||
102 | |||
103 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
104 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
105 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
106 | break; | ||
107 | |||
108 | if (!gfs2_old_fs_formats[x]) { | ||
109 | printk(KERN_WARNING | ||
110 | "GFS2: code version (%u, %u) is incompatible " | ||
111 | "with ondisk format (%u, %u)\n", | ||
112 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
113 | sb->sb_fs_format, sb->sb_multihost_format); | ||
114 | printk(KERN_WARNING | ||
115 | "GFS2: I don't know how to upgrade this FS\n"); | ||
116 | return -EINVAL; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
121 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
122 | if (gfs2_old_multihost_formats[x] == | ||
123 | sb->sb_multihost_format) | ||
124 | break; | ||
125 | |||
126 | if (!gfs2_old_multihost_formats[x]) { | ||
127 | printk(KERN_WARNING | ||
128 | "GFS2: code version (%u, %u) is incompatible " | ||
129 | "with ondisk format (%u, %u)\n", | ||
130 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
131 | sb->sb_fs_format, sb->sb_multihost_format); | ||
132 | printk(KERN_WARNING | ||
133 | "GFS2: I don't know how to upgrade this FS\n"); | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if (!sdp->sd_args.ar_upgrade) { | ||
139 | printk(KERN_WARNING | ||
140 | "GFS2: code version (%u, %u) is incompatible " | ||
141 | "with ondisk format (%u, %u)\n", | ||
142 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
143 | sb->sb_fs_format, sb->sb_multihost_format); | ||
144 | printk(KERN_INFO | ||
145 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
146 | "the FS\n"); | ||
147 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | |||
155 | static void end_bio_io_page(struct bio *bio, int error) | ||
156 | { | ||
157 | struct page *page = bio->bi_private; | ||
158 | |||
159 | if (!error) | ||
160 | SetPageUptodate(page); | ||
161 | else | ||
162 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
163 | unlock_page(page); | ||
164 | } | ||
165 | |||
166 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
167 | { | ||
168 | const struct gfs2_sb *str = buf; | ||
169 | |||
170 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
171 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
172 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
173 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
174 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
175 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
176 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
177 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
178 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
179 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
180 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
181 | |||
182 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
183 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * gfs2_read_super - Read the gfs2 super block from disk | ||
188 | * @sdp: The GFS2 super block | ||
189 | * @sector: The location of the super block | ||
190 | * @error: The error code to return | ||
191 | * | ||
192 | * This uses the bio functions to read the super block from disk | ||
193 | * because we want to be 100% sure that we never read cached data. | ||
194 | * A super block is read twice only during each GFS2 mount and is | ||
195 | * never written to by the filesystem. The first time its read no | ||
196 | * locks are held, and the only details which are looked at are those | ||
197 | * relating to the locking protocol. Once locking is up and working, | ||
198 | * the sb is read again under the lock to establish the location of | ||
199 | * the master directory (contains pointers to journals etc) and the | ||
200 | * root directory. | ||
201 | * | ||
202 | * Returns: 0 on success or error | ||
203 | */ | ||
204 | |||
205 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
206 | { | ||
207 | struct super_block *sb = sdp->sd_vfs; | ||
208 | struct gfs2_sb *p; | ||
209 | struct page *page; | ||
210 | struct bio *bio; | ||
211 | |||
212 | page = alloc_page(GFP_NOFS); | ||
213 | if (unlikely(!page)) | ||
214 | return -ENOBUFS; | ||
215 | |||
216 | ClearPageUptodate(page); | ||
217 | ClearPageDirty(page); | ||
218 | lock_page(page); | ||
219 | |||
220 | bio = bio_alloc(GFP_NOFS, 1); | ||
221 | if (unlikely(!bio)) { | ||
222 | __free_page(page); | ||
223 | return -ENOBUFS; | ||
224 | } | ||
225 | |||
226 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | ||
227 | bio->bi_bdev = sb->s_bdev; | ||
228 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
229 | |||
230 | bio->bi_end_io = end_bio_io_page; | ||
231 | bio->bi_private = page; | ||
232 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
233 | wait_on_page_locked(page); | ||
234 | bio_put(bio); | ||
235 | if (!PageUptodate(page)) { | ||
236 | __free_page(page); | ||
237 | return -EIO; | ||
238 | } | ||
239 | p = kmap(page); | ||
240 | gfs2_sb_in(&sdp->sd_sb, p); | ||
241 | kunmap(page); | ||
242 | __free_page(page); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | /** | ||
247 | * gfs2_read_sb - Read super block | ||
248 | * @sdp: The GFS2 superblock | ||
249 | * @gl: the glock for the superblock (assumed to be held) | ||
250 | * @silent: Don't print message if mount fails | ||
251 | * | ||
252 | */ | ||
253 | |||
254 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
255 | { | ||
256 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
257 | u32 tmp_blocks; | ||
258 | unsigned int x; | ||
259 | int error; | ||
260 | |||
261 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
262 | if (error) { | ||
263 | if (!silent) | ||
264 | fs_err(sdp, "can't read superblock\n"); | ||
265 | return error; | ||
266 | } | ||
267 | |||
268 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
269 | if (error) | ||
270 | return error; | ||
271 | |||
272 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
273 | GFS2_BASIC_BLOCK_SHIFT; | ||
274 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
275 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
276 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
277 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
278 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
279 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
280 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
281 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
282 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
283 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
284 | sizeof(struct gfs2_meta_header)) / | ||
285 | sizeof(struct gfs2_quota_change); | ||
286 | |||
287 | /* Compute maximum reservation required to add a entry to a directory */ | ||
288 | |||
289 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
290 | sdp->sd_jbsize); | ||
291 | |||
292 | ind_blocks = 0; | ||
293 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
294 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
295 | ind_blocks += tmp_blocks; | ||
296 | } | ||
297 | |||
298 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
299 | |||
300 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
301 | |||
302 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
303 | sizeof(struct gfs2_dinode); | ||
304 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
305 | for (x = 2;; x++) { | ||
306 | u64 space, d; | ||
307 | u32 m; | ||
308 | |||
309 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
310 | d = space; | ||
311 | m = do_div(d, sdp->sd_inptrs); | ||
312 | |||
313 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
314 | break; | ||
315 | sdp->sd_heightsize[x] = space; | ||
316 | } | ||
317 | sdp->sd_max_height = x; | ||
318 | sdp->sd_heightsize[x] = ~0; | ||
319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
320 | |||
321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
322 | sizeof(struct gfs2_dinode); | ||
323 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
324 | for (x = 2;; x++) { | ||
325 | u64 space, d; | ||
326 | u32 m; | ||
327 | |||
328 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
329 | d = space; | ||
330 | m = do_div(d, sdp->sd_inptrs); | ||
331 | |||
332 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
333 | break; | ||
334 | sdp->sd_jheightsize[x] = space; | ||
335 | } | ||
336 | sdp->sd_max_jheight = x; | ||
337 | sdp->sd_jheightsize[x] = ~0; | ||
338 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /** | 36 | /** |
344 | * gfs2_jindex_hold - Grab a lock on the jindex | 37 | * gfs2_jindex_hold - Grab a lock on the jindex |
345 | * @sdp: The GFS2 superblock | 38 | * @sdp: The GFS2 superblock |
@@ -581,39 +274,6 @@ fail: | |||
581 | return error; | 274 | return error; |
582 | } | 275 | } |
583 | 276 | ||
584 | /** | ||
585 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
586 | * @sdp: the filesystem | ||
587 | * | ||
588 | * Returns: errno | ||
589 | */ | ||
590 | |||
591 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
592 | { | ||
593 | struct gfs2_holder t_gh; | ||
594 | int error; | ||
595 | |||
596 | gfs2_quota_sync(sdp); | ||
597 | gfs2_statfs_sync(sdp); | ||
598 | |||
599 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
600 | &t_gh); | ||
601 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
602 | return error; | ||
603 | |||
604 | gfs2_meta_syncfs(sdp); | ||
605 | gfs2_log_shutdown(sdp); | ||
606 | |||
607 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
608 | |||
609 | if (t_gh.gh_gl) | ||
610 | gfs2_glock_dq_uninit(&t_gh); | ||
611 | |||
612 | gfs2_quota_cleanup(sdp); | ||
613 | |||
614 | return error; | ||
615 | } | ||
616 | |||
617 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) | 277 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
618 | { | 278 | { |
619 | const struct gfs2_statfs_change *str = buf; | 279 | const struct gfs2_statfs_change *str = buf; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ecc44f7..50a4c9b1215e 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -12,11 +12,6 @@ | |||
12 | 12 | ||
13 | #include "incore.h" | 13 | #include "incore.h" |
14 | 14 | ||
15 | void gfs2_tune_init(struct gfs2_tune *gt); | ||
16 | |||
17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | ||
18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | ||
19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | ||
20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | 15 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); |
21 | 16 | ||
22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 17 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | |||
40 | struct gfs2_inode **ipp); | 35 | struct gfs2_inode **ipp); |
41 | 36 | ||
42 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | 37 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); |
43 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp); | ||
44 | 38 | ||
45 | int gfs2_statfs_init(struct gfs2_sbd *sdp); | 39 | int gfs2_statfs_init(struct gfs2_sbd *sdp); |
46 | void gfs2_statfs_change(struct gfs2_sbd *sdp, | 40 | void gfs2_statfs_change(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 74846559fc3f..7e1879f1a02c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n"); | |||
269 | ARGS_ATTR(suiddir, "%d\n"); | 269 | ARGS_ATTR(suiddir, "%d\n"); |
270 | ARGS_ATTR(data, "%d\n"); | 270 | ARGS_ATTR(data, "%d\n"); |
271 | 271 | ||
272 | /* one oddball doesn't fit the macro mold */ | ||
273 | static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) | ||
274 | { | ||
275 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
276 | !!test_bit(SDF_NOATIME, &sdp->sd_flags)); | ||
277 | } | ||
278 | static struct args_attr args_attr_noatime = __ATTR_RO(noatime); | ||
279 | |||
280 | static struct attribute *args_attrs[] = { | 272 | static struct attribute *args_attrs[] = { |
281 | &args_attr_lockproto.attr, | 273 | &args_attr_lockproto.attr, |
282 | &args_attr_locktable.attr, | 274 | &args_attr_locktable.attr, |
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = { | |||
292 | &args_attr_quota.attr, | 284 | &args_attr_quota.attr, |
293 | &args_attr_suiddir.attr, | 285 | &args_attr_suiddir.attr, |
294 | &args_attr_data.attr, | 286 | &args_attr_data.attr, |
295 | &args_attr_noatime.attr, | ||
296 | NULL, | 287 | NULL, |
297 | }; | 288 | }; |
298 | 289 | ||
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0); | |||
407 | TUNE_ATTR(log_flush_secs, 0); | 398 | TUNE_ATTR(log_flush_secs, 0); |
408 | TUNE_ATTR(quota_warn_period, 0); | 399 | TUNE_ATTR(quota_warn_period, 0); |
409 | TUNE_ATTR(quota_quantum, 0); | 400 | TUNE_ATTR(quota_quantum, 0); |
410 | TUNE_ATTR(atime_quantum, 0); | ||
411 | TUNE_ATTR(max_readahead, 0); | 401 | TUNE_ATTR(max_readahead, 0); |
412 | TUNE_ATTR(complain_secs, 0); | 402 | TUNE_ATTR(complain_secs, 0); |
413 | TUNE_ATTR(statfs_slow, 0); | 403 | TUNE_ATTR(statfs_slow, 0); |
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = { | |||
427 | &tune_attr_log_flush_secs.attr, | 417 | &tune_attr_log_flush_secs.attr, |
428 | &tune_attr_quota_warn_period.attr, | 418 | &tune_attr_quota_warn_period.attr, |
429 | &tune_attr_quota_quantum.attr, | 419 | &tune_attr_quota_quantum.attr, |
430 | &tune_attr_atime_quantum.attr, | ||
431 | &tune_attr_max_readahead.attr, | 420 | &tune_attr_max_readahead.attr, |
432 | &tune_attr_complain_secs.attr, | 421 | &tune_attr_complain_secs.attr, |
433 | &tune_attr_statfs_slow.attr, | 422 | &tune_attr_statfs_slow.attr, |
diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 60249429a253..d85c7d931cdf 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c | |||
@@ -323,7 +323,7 @@ out: | |||
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * remove_kevent - cleans up and ultimately frees the given kevent | 326 | * remove_kevent - cleans up the given kevent |
327 | * | 327 | * |
328 | * Caller must hold dev->ev_mutex. | 328 | * Caller must hold dev->ev_mutex. |
329 | */ | 329 | */ |
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev, | |||
334 | 334 | ||
335 | dev->event_count--; | 335 | dev->event_count--; |
336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | 336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; |
337 | } | ||
337 | 338 | ||
339 | /* | ||
340 | * free_kevent - frees the given kevent. | ||
341 | */ | ||
342 | static void free_kevent(struct inotify_kernel_event *kevent) | ||
343 | { | ||
338 | kfree(kevent->name); | 344 | kfree(kevent->name); |
339 | kmem_cache_free(event_cachep, kevent); | 345 | kmem_cache_free(event_cachep, kevent); |
340 | } | 346 | } |
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) | |||
350 | struct inotify_kernel_event *kevent; | 356 | struct inotify_kernel_event *kevent; |
351 | kevent = inotify_dev_get_event(dev); | 357 | kevent = inotify_dev_get_event(dev); |
352 | remove_kevent(dev, kevent); | 358 | remove_kevent(dev, kevent); |
359 | free_kevent(kevent); | ||
353 | } | 360 | } |
354 | } | 361 | } |
355 | 362 | ||
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
433 | dev = file->private_data; | 440 | dev = file->private_data; |
434 | 441 | ||
435 | while (1) { | 442 | while (1) { |
436 | int events; | ||
437 | 443 | ||
438 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | 444 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); |
439 | 445 | ||
440 | mutex_lock(&dev->ev_mutex); | 446 | mutex_lock(&dev->ev_mutex); |
441 | events = !list_empty(&dev->events); | 447 | if (!list_empty(&dev->events)) { |
442 | mutex_unlock(&dev->ev_mutex); | ||
443 | if (events) { | ||
444 | ret = 0; | 448 | ret = 0; |
445 | break; | 449 | break; |
446 | } | 450 | } |
451 | mutex_unlock(&dev->ev_mutex); | ||
447 | 452 | ||
448 | if (file->f_flags & O_NONBLOCK) { | 453 | if (file->f_flags & O_NONBLOCK) { |
449 | ret = -EAGAIN; | 454 | ret = -EAGAIN; |
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
462 | if (ret) | 467 | if (ret) |
463 | return ret; | 468 | return ret; |
464 | 469 | ||
465 | mutex_lock(&dev->ev_mutex); | ||
466 | while (1) { | 470 | while (1) { |
467 | struct inotify_kernel_event *kevent; | 471 | struct inotify_kernel_event *kevent; |
468 | 472 | ||
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
481 | } | 485 | } |
482 | break; | 486 | break; |
483 | } | 487 | } |
488 | remove_kevent(dev, kevent); | ||
489 | |||
490 | /* | ||
491 | * Must perform the copy_to_user outside the mutex in order | ||
492 | * to avoid a lock order reversal with mmap_sem. | ||
493 | */ | ||
494 | mutex_unlock(&dev->ev_mutex); | ||
484 | 495 | ||
485 | if (copy_to_user(buf, &kevent->event, event_size)) { | 496 | if (copy_to_user(buf, &kevent->event, event_size)) { |
486 | ret = -EFAULT; | 497 | ret = -EFAULT; |
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
498 | count -= kevent->event.len; | 509 | count -= kevent->event.len; |
499 | } | 510 | } |
500 | 511 | ||
501 | remove_kevent(dev, kevent); | 512 | free_kevent(kevent); |
513 | |||
514 | mutex_lock(&dev->ev_mutex); | ||
502 | } | 515 | } |
503 | mutex_unlock(&dev->ev_mutex); | 516 | mutex_unlock(&dev->ev_mutex); |
504 | 517 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..e9b20173fef3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1279 | } | 1279 | } |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | if (errors > 0) { | ||
1283 | dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", | ||
1284 | errors, (errors == 1 ? "" : "s")); | ||
1285 | if (!sloppy) | ||
1286 | return 0; | ||
1287 | } | ||
1282 | return 1; | 1288 | return 1; |
1283 | 1289 | ||
1284 | out_nomem: | 1290 | out_nomem: |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab8..54b8b4140c8f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt) | |||
443 | * enough space for either: | 443 | * enough space for either: |
444 | */ | 444 | */ |
445 | alloc = sizeof(struct posix_ace_state_array) | 445 | alloc = sizeof(struct posix_ace_state_array) |
446 | + cnt*sizeof(struct posix_ace_state); | 446 | + cnt*sizeof(struct posix_user_ace_state); |
447 | state->users = kzalloc(alloc, GFP_KERNEL); | 447 | state->users = kzalloc(alloc, GFP_KERNEL); |
448 | if (!state->users) | 448 | if (!state->users) |
449 | return -ENOMEM; | 449 | return -ENOMEM; |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2e51adac65de..e5b51ffafc6c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
867 | int slack_bytes; | 867 | int slack_bytes; |
868 | __be32 status; | 868 | __be32 status; |
869 | 869 | ||
870 | status = nfserr_resource; | ||
871 | cstate = cstate_alloc(); | ||
872 | if (cstate == NULL) | ||
873 | goto out; | ||
874 | |||
875 | resp->xbuf = &rqstp->rq_res; | 870 | resp->xbuf = &rqstp->rq_res; |
876 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; |
877 | resp->tagp = resp->p; | 872 | resp->tagp = resp->p; |
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
890 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) |
891 | goto out; | 886 | goto out; |
892 | 887 | ||
888 | status = nfserr_resource; | ||
889 | cstate = cstate_alloc(); | ||
890 | if (cstate == NULL) | ||
891 | goto out; | ||
892 | |||
893 | status = nfs_ok; | 893 | status = nfs_ok; |
894 | while (!status && resp->opcnt < args->opcnt) { | 894 | while (!status && resp->opcnt < args->opcnt) { |
895 | op = &args->ops[resp->opcnt++]; | 895 | op = &args->ops[resp->opcnt++]; |
@@ -957,9 +957,9 @@ encode_op: | |||
957 | nfsd4_increment_op_stats(op->opnum); | 957 | nfsd4_increment_op_stats(op->opnum); |
958 | } | 958 | } |
959 | 959 | ||
960 | cstate_free(cstate); | ||
960 | out: | 961 | out: |
961 | nfsd4_release_compoundargs(args); | 962 | nfsd4_release_compoundargs(args); |
962 | cstate_free(cstate); | ||
963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
964 | return status; | 964 | return status; |
965 | } | 965 | } |
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h | |||
@@ -113,7 +113,7 @@ typedef struct { | |||
113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the | 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the |
114 | * file since it was last opened. I think the names speak for themselves but | 114 | * file since it was last opened. I think the names speak for themselves but |
115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS | 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS |
116 | * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 116 | * documentation: http://www.linux-ntfs.org/ |
117 | */ | 117 | */ |
118 | enum { | 118 | enum { |
119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), | 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), |
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; | |||
145 | * Source info flags (32-bit). Information about the source of the change(s) | 145 | * Source info flags (32-bit). Information about the source of the change(s) |
146 | * to the file. For detailed descriptions of what these mean, see the Linux | 146 | * to the file. For detailed descriptions of what these mean, see the Linux |
147 | * NTFS project NTFS documentation: | 147 | * NTFS project NTFS documentation: |
148 | * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 148 | * http://www.linux-ntfs.org/ |
149 | */ | 149 | */ |
150 | enum { | 150 | enum { |
151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), | 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 506c24fb5078..a53da1466277 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -594,7 +594,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
594 | goto bail; | 594 | goto bail; |
595 | } | 595 | } |
596 | 596 | ||
597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { | 597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { |
598 | ocfs2_error(inode->i_sb, | 598 | ocfs2_error(inode->i_sb, |
599 | "Inode %llu has a hole at block %llu\n", | 599 | "Inode %llu has a hole at block %llu\n", |
600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7d6b34e201db..7408227c49c9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = | |||
120 | * a pointer to that same buffer (for convenience). | 120 | * a pointer to that same buffer (for convenience). |
121 | */ | 121 | */ |
122 | 122 | ||
123 | char *disk_name(struct gendisk *hd, int part, char *buf) | 123 | char *disk_name(struct gendisk *hd, int partno, char *buf) |
124 | { | 124 | { |
125 | if (!part) | 125 | if (!partno) |
126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); | 126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); |
127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) | 127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) |
128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); | 128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); |
129 | else | 129 | else |
130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); | 130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); |
131 | 131 | ||
132 | return buf; | 132 | return buf; |
133 | } | 133 | } |
134 | 134 | ||
135 | const char *bdevname(struct block_device *bdev, char *buf) | 135 | const char *bdevname(struct block_device *bdev, char *buf) |
136 | { | 136 | { |
137 | int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; | 137 | return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); |
138 | return disk_name(bdev->bd_disk, part, buf); | ||
139 | } | 138 | } |
140 | 139 | ||
141 | EXPORT_SYMBOL(bdevname); | 140 | EXPORT_SYMBOL(bdevname); |
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
169 | if (isdigit(state->name[strlen(state->name)-1])) | 168 | if (isdigit(state->name[strlen(state->name)-1])) |
170 | sprintf(state->name, "p"); | 169 | sprintf(state->name, "p"); |
171 | 170 | ||
172 | state->limit = hd->minors; | 171 | state->limit = disk_max_parts(hd); |
173 | i = res = err = 0; | 172 | i = res = err = 0; |
174 | while (!res && check_part[i]) { | 173 | while (!res && check_part[i]) { |
175 | memset(&state->parts, 0, sizeof(state->parts)); | 174 | memset(&state->parts, 0, sizeof(state->parts)); |
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev, | |||
204 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); | 203 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
205 | } | 204 | } |
206 | 205 | ||
207 | static ssize_t part_size_show(struct device *dev, | 206 | ssize_t part_size_show(struct device *dev, |
208 | struct device_attribute *attr, char *buf) | 207 | struct device_attribute *attr, char *buf) |
209 | { | 208 | { |
210 | struct hd_struct *p = dev_to_part(dev); | 209 | struct hd_struct *p = dev_to_part(dev); |
211 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 210 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
212 | } | 211 | } |
213 | 212 | ||
214 | static ssize_t part_stat_show(struct device *dev, | 213 | ssize_t part_stat_show(struct device *dev, |
215 | struct device_attribute *attr, char *buf) | 214 | struct device_attribute *attr, char *buf) |
216 | { | 215 | { |
217 | struct hd_struct *p = dev_to_part(dev); | 216 | struct hd_struct *p = dev_to_part(dev); |
217 | int cpu; | ||
218 | 218 | ||
219 | preempt_disable(); | 219 | cpu = part_stat_lock(); |
220 | part_round_stats(p); | 220 | part_round_stats(cpu, p); |
221 | preempt_enable(); | 221 | part_stat_unlock(); |
222 | return sprintf(buf, | 222 | return sprintf(buf, |
223 | "%8lu %8lu %8llu %8u " | 223 | "%8lu %8lu %8llu %8u " |
224 | "%8lu %8lu %8llu %8u " | 224 | "%8lu %8lu %8llu %8u " |
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev, | |||
238 | } | 238 | } |
239 | 239 | ||
240 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 240 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
241 | static ssize_t part_fail_show(struct device *dev, | 241 | ssize_t part_fail_show(struct device *dev, |
242 | struct device_attribute *attr, char *buf) | 242 | struct device_attribute *attr, char *buf) |
243 | { | 243 | { |
244 | struct hd_struct *p = dev_to_part(dev); | 244 | struct hd_struct *p = dev_to_part(dev); |
245 | 245 | ||
246 | return sprintf(buf, "%d\n", p->make_it_fail); | 246 | return sprintf(buf, "%d\n", p->make_it_fail); |
247 | } | 247 | } |
248 | 248 | ||
249 | static ssize_t part_fail_store(struct device *dev, | 249 | ssize_t part_fail_store(struct device *dev, |
250 | struct device_attribute *attr, | 250 | struct device_attribute *attr, |
251 | const char *buf, size_t count) | 251 | const char *buf, size_t count) |
252 | { | 252 | { |
253 | struct hd_struct *p = dev_to_part(dev); | 253 | struct hd_struct *p = dev_to_part(dev); |
254 | int i; | 254 | int i; |
@@ -300,40 +300,34 @@ struct device_type part_type = { | |||
300 | .release = part_release, | 300 | .release = part_release, |
301 | }; | 301 | }; |
302 | 302 | ||
303 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 303 | static void delete_partition_rcu_cb(struct rcu_head *head) |
304 | { | ||
305 | struct kobject *k; | ||
306 | |||
307 | k = kobject_get(&p->dev.kobj); | ||
308 | p->holder_dir = kobject_create_and_add("holders", k); | ||
309 | kobject_put(k); | ||
310 | } | ||
311 | |||
312 | static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | ||
313 | { | 304 | { |
314 | struct kobject *k; | 305 | struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); |
315 | 306 | ||
316 | k = kobject_get(&disk->dev.kobj); | 307 | part->start_sect = 0; |
317 | disk->holder_dir = kobject_create_and_add("holders", k); | 308 | part->nr_sects = 0; |
318 | disk->slave_dir = kobject_create_and_add("slaves", k); | 309 | part_stat_set_all(part, 0); |
319 | kobject_put(k); | 310 | put_device(part_to_dev(part)); |
320 | } | 311 | } |
321 | 312 | ||
322 | void delete_partition(struct gendisk *disk, int part) | 313 | void delete_partition(struct gendisk *disk, int partno) |
323 | { | 314 | { |
324 | struct hd_struct *p = disk->part[part-1]; | 315 | struct disk_part_tbl *ptbl = disk->part_tbl; |
316 | struct hd_struct *part; | ||
325 | 317 | ||
326 | if (!p) | 318 | if (partno >= ptbl->len) |
327 | return; | 319 | return; |
328 | if (!p->nr_sects) | 320 | |
321 | part = ptbl->part[partno]; | ||
322 | if (!part) | ||
329 | return; | 323 | return; |
330 | disk->part[part-1] = NULL; | 324 | |
331 | p->start_sect = 0; | 325 | blk_free_devt(part_devt(part)); |
332 | p->nr_sects = 0; | 326 | rcu_assign_pointer(ptbl->part[partno], NULL); |
333 | part_stat_set_all(p, 0); | 327 | kobject_put(part->holder_dir); |
334 | kobject_put(p->holder_dir); | 328 | device_del(part_to_dev(part)); |
335 | device_del(&p->dev); | 329 | |
336 | put_device(&p->dev); | 330 | call_rcu(&part->rcu_head, delete_partition_rcu_cb); |
337 | } | 331 | } |
338 | 332 | ||
339 | static ssize_t whole_disk_show(struct device *dev, | 333 | static ssize_t whole_disk_show(struct device *dev, |
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev, | |||
344 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, | 338 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, |
345 | whole_disk_show, NULL); | 339 | whole_disk_show, NULL); |
346 | 340 | ||
347 | int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 341 | int add_partition(struct gendisk *disk, int partno, |
342 | sector_t start, sector_t len, int flags) | ||
348 | { | 343 | { |
349 | struct hd_struct *p; | 344 | struct hd_struct *p; |
345 | dev_t devt = MKDEV(0, 0); | ||
346 | struct device *ddev = disk_to_dev(disk); | ||
347 | struct device *pdev; | ||
348 | struct disk_part_tbl *ptbl; | ||
349 | const char *dname; | ||
350 | int err; | 350 | int err; |
351 | 351 | ||
352 | err = disk_expand_part_tbl(disk, partno); | ||
353 | if (err) | ||
354 | return err; | ||
355 | ptbl = disk->part_tbl; | ||
356 | |||
357 | if (ptbl->part[partno]) | ||
358 | return -EBUSY; | ||
359 | |||
352 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 360 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
353 | if (!p) | 361 | if (!p) |
354 | return -ENOMEM; | 362 | return -ENOMEM; |
355 | 363 | ||
356 | if (!init_part_stats(p)) { | 364 | if (!init_part_stats(p)) { |
357 | err = -ENOMEM; | 365 | err = -ENOMEM; |
358 | goto out0; | 366 | goto out_free; |
359 | } | 367 | } |
368 | pdev = part_to_dev(p); | ||
369 | |||
360 | p->start_sect = start; | 370 | p->start_sect = start; |
361 | p->nr_sects = len; | 371 | p->nr_sects = len; |
362 | p->partno = part; | 372 | p->partno = partno; |
363 | p->policy = disk->policy; | 373 | p->policy = get_disk_ro(disk); |
364 | 374 | ||
365 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) | 375 | dname = dev_name(ddev); |
366 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 376 | if (isdigit(dname[strlen(dname) - 1])) |
367 | "%sp%d", disk->dev.bus_id, part); | 377 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); |
368 | else | 378 | else |
369 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 379 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); |
370 | "%s%d", disk->dev.bus_id, part); | 380 | |
381 | device_initialize(pdev); | ||
382 | pdev->class = &block_class; | ||
383 | pdev->type = &part_type; | ||
384 | pdev->parent = ddev; | ||
371 | 385 | ||
372 | device_initialize(&p->dev); | 386 | err = blk_alloc_devt(p, &devt); |
373 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); | 387 | if (err) |
374 | p->dev.class = &block_class; | 388 | goto out_free; |
375 | p->dev.type = &part_type; | 389 | pdev->devt = devt; |
376 | p->dev.parent = &disk->dev; | ||
377 | disk->part[part-1] = p; | ||
378 | 390 | ||
379 | /* delay uevent until 'holders' subdir is created */ | 391 | /* delay uevent until 'holders' subdir is created */ |
380 | p->dev.uevent_suppress = 1; | 392 | pdev->uevent_suppress = 1; |
381 | err = device_add(&p->dev); | 393 | err = device_add(pdev); |
382 | if (err) | 394 | if (err) |
383 | goto out1; | 395 | goto out_put; |
384 | partition_sysfs_add_subdir(p); | 396 | |
385 | p->dev.uevent_suppress = 0; | 397 | err = -ENOMEM; |
398 | p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); | ||
399 | if (!p->holder_dir) | ||
400 | goto out_del; | ||
401 | |||
402 | pdev->uevent_suppress = 0; | ||
386 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 403 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
387 | err = device_create_file(&p->dev, &dev_attr_whole_disk); | 404 | err = device_create_file(pdev, &dev_attr_whole_disk); |
388 | if (err) | 405 | if (err) |
389 | goto out2; | 406 | goto out_del; |
390 | } | 407 | } |
391 | 408 | ||
409 | /* everything is up and running, commence */ | ||
410 | INIT_RCU_HEAD(&p->rcu_head); | ||
411 | rcu_assign_pointer(ptbl->part[partno], p); | ||
412 | |||
392 | /* suppress uevent if the disk supresses it */ | 413 | /* suppress uevent if the disk supresses it */ |
393 | if (!disk->dev.uevent_suppress) | 414 | if (!ddev->uevent_suppress) |
394 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | 415 | kobject_uevent(&pdev->kobj, KOBJ_ADD); |
395 | 416 | ||
396 | return 0; | 417 | return 0; |
397 | 418 | ||
398 | out2: | 419 | out_free: |
399 | device_del(&p->dev); | ||
400 | out1: | ||
401 | put_device(&p->dev); | ||
402 | free_part_stats(p); | ||
403 | out0: | ||
404 | kfree(p); | 420 | kfree(p); |
405 | return err; | 421 | return err; |
422 | out_del: | ||
423 | kobject_put(p->holder_dir); | ||
424 | device_del(pdev); | ||
425 | out_put: | ||
426 | put_device(pdev); | ||
427 | blk_free_devt(devt); | ||
428 | return err; | ||
406 | } | 429 | } |
407 | 430 | ||
408 | /* Not exported, helper to add_disk(). */ | 431 | /* Not exported, helper to add_disk(). */ |
409 | void register_disk(struct gendisk *disk) | 432 | void register_disk(struct gendisk *disk) |
410 | { | 433 | { |
434 | struct device *ddev = disk_to_dev(disk); | ||
411 | struct block_device *bdev; | 435 | struct block_device *bdev; |
436 | struct disk_part_iter piter; | ||
437 | struct hd_struct *part; | ||
412 | char *s; | 438 | char *s; |
413 | int i; | ||
414 | struct hd_struct *p; | ||
415 | int err; | 439 | int err; |
416 | 440 | ||
417 | disk->dev.parent = disk->driverfs_dev; | 441 | ddev->parent = disk->driverfs_dev; |
418 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); | ||
419 | 442 | ||
420 | strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); | 443 | strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); |
421 | /* ewww... some of these buggers have / in the name... */ | 444 | /* ewww... some of these buggers have / in the name... */ |
422 | s = strchr(disk->dev.bus_id, '/'); | 445 | s = strchr(ddev->bus_id, '/'); |
423 | if (s) | 446 | if (s) |
424 | *s = '!'; | 447 | *s = '!'; |
425 | 448 | ||
426 | /* delay uevents, until we scanned partition table */ | 449 | /* delay uevents, until we scanned partition table */ |
427 | disk->dev.uevent_suppress = 1; | 450 | ddev->uevent_suppress = 1; |
428 | 451 | ||
429 | if (device_add(&disk->dev)) | 452 | if (device_add(ddev)) |
430 | return; | 453 | return; |
431 | #ifndef CONFIG_SYSFS_DEPRECATED | 454 | #ifndef CONFIG_SYSFS_DEPRECATED |
432 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | 455 | err = sysfs_create_link(block_depr, &ddev->kobj, |
433 | kobject_name(&disk->dev.kobj)); | 456 | kobject_name(&ddev->kobj)); |
434 | if (err) { | 457 | if (err) { |
435 | device_del(&disk->dev); | 458 | device_del(ddev); |
436 | return; | 459 | return; |
437 | } | 460 | } |
438 | #endif | 461 | #endif |
439 | disk_sysfs_add_subdirs(disk); | 462 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); |
463 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
440 | 464 | ||
441 | /* No minors to use for partitions */ | 465 | /* No minors to use for partitions */ |
442 | if (disk->minors == 1) | 466 | if (!disk_partitionable(disk)) |
443 | goto exit; | 467 | goto exit; |
444 | 468 | ||
445 | /* No such device (e.g., media were just removed) */ | 469 | /* No such device (e.g., media were just removed) */ |
@@ -458,50 +482,66 @@ void register_disk(struct gendisk *disk) | |||
458 | 482 | ||
459 | exit: | 483 | exit: |
460 | /* announce disk after possible partitions are created */ | 484 | /* announce disk after possible partitions are created */ |
461 | disk->dev.uevent_suppress = 0; | 485 | ddev->uevent_suppress = 0; |
462 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | 486 | kobject_uevent(&ddev->kobj, KOBJ_ADD); |
463 | 487 | ||
464 | /* announce possible partitions */ | 488 | /* announce possible partitions */ |
465 | for (i = 1; i < disk->minors; i++) { | 489 | disk_part_iter_init(&piter, disk, 0); |
466 | p = disk->part[i-1]; | 490 | while ((part = disk_part_iter_next(&piter))) |
467 | if (!p || !p->nr_sects) | 491 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); |
468 | continue; | 492 | disk_part_iter_exit(&piter); |
469 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | ||
470 | } | ||
471 | } | 493 | } |
472 | 494 | ||
473 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 495 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
474 | { | 496 | { |
497 | struct disk_part_iter piter; | ||
498 | struct hd_struct *part; | ||
475 | struct parsed_partitions *state; | 499 | struct parsed_partitions *state; |
476 | int p, res; | 500 | int p, highest, res; |
477 | 501 | ||
478 | if (bdev->bd_part_count) | 502 | if (bdev->bd_part_count) |
479 | return -EBUSY; | 503 | return -EBUSY; |
480 | res = invalidate_partition(disk, 0); | 504 | res = invalidate_partition(disk, 0); |
481 | if (res) | 505 | if (res) |
482 | return res; | 506 | return res; |
483 | bdev->bd_invalidated = 0; | 507 | |
484 | for (p = 1; p < disk->minors; p++) | 508 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); |
485 | delete_partition(disk, p); | 509 | while ((part = disk_part_iter_next(&piter))) |
510 | delete_partition(disk, part->partno); | ||
511 | disk_part_iter_exit(&piter); | ||
512 | |||
486 | if (disk->fops->revalidate_disk) | 513 | if (disk->fops->revalidate_disk) |
487 | disk->fops->revalidate_disk(disk); | 514 | disk->fops->revalidate_disk(disk); |
515 | check_disk_size_change(disk, bdev); | ||
516 | bdev->bd_invalidated = 0; | ||
488 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 517 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
489 | return 0; | 518 | return 0; |
490 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 519 | if (IS_ERR(state)) /* I/O error reading the partition table */ |
491 | return -EIO; | 520 | return -EIO; |
492 | 521 | ||
493 | /* tell userspace that the media / partition table may have changed */ | 522 | /* tell userspace that the media / partition table may have changed */ |
494 | kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); | 523 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
495 | 524 | ||
525 | /* Detect the highest partition number and preallocate | ||
526 | * disk->part_tbl. This is an optimization and not strictly | ||
527 | * necessary. | ||
528 | */ | ||
529 | for (p = 1, highest = 0; p < state->limit; p++) | ||
530 | if (state->parts[p].size) | ||
531 | highest = p; | ||
532 | |||
533 | disk_expand_part_tbl(disk, highest); | ||
534 | |||
535 | /* add partitions */ | ||
496 | for (p = 1; p < state->limit; p++) { | 536 | for (p = 1; p < state->limit; p++) { |
497 | sector_t size = state->parts[p].size; | 537 | sector_t size = state->parts[p].size; |
498 | sector_t from = state->parts[p].from; | 538 | sector_t from = state->parts[p].from; |
499 | if (!size) | 539 | if (!size) |
500 | continue; | 540 | continue; |
501 | if (from + size > get_capacity(disk)) { | 541 | if (from + size > get_capacity(disk)) { |
502 | printk(KERN_ERR " %s: p%d exceeds device capacity\n", | 542 | printk(KERN_WARNING |
543 | "%s: p%d exceeds device capacity\n", | ||
503 | disk->disk_name, p); | 544 | disk->disk_name, p); |
504 | continue; | ||
505 | } | 545 | } |
506 | res = add_partition(disk, p, from, size, state->parts[p].flags); | 546 | res = add_partition(disk, p, from, size, state->parts[p].flags); |
507 | if (res) { | 547 | if (res) { |
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector); | |||
541 | 581 | ||
542 | void del_gendisk(struct gendisk *disk) | 582 | void del_gendisk(struct gendisk *disk) |
543 | { | 583 | { |
544 | int p; | 584 | struct disk_part_iter piter; |
585 | struct hd_struct *part; | ||
545 | 586 | ||
546 | /* invalidate stuff */ | 587 | /* invalidate stuff */ |
547 | for (p = disk->minors - 1; p > 0; p--) { | 588 | disk_part_iter_init(&piter, disk, |
548 | invalidate_partition(disk, p); | 589 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
549 | delete_partition(disk, p); | 590 | while ((part = disk_part_iter_next(&piter))) { |
591 | invalidate_partition(disk, part->partno); | ||
592 | delete_partition(disk, part->partno); | ||
550 | } | 593 | } |
594 | disk_part_iter_exit(&piter); | ||
595 | |||
551 | invalidate_partition(disk, 0); | 596 | invalidate_partition(disk, 0); |
552 | disk->capacity = 0; | 597 | blk_free_devt(disk_to_dev(disk)->devt); |
598 | set_capacity(disk, 0); | ||
553 | disk->flags &= ~GENHD_FL_UP; | 599 | disk->flags &= ~GENHD_FL_UP; |
554 | unlink_gendisk(disk); | 600 | unlink_gendisk(disk); |
555 | disk_stat_set_all(disk, 0); | 601 | part_stat_set_all(&disk->part0, 0); |
556 | disk->stamp = 0; | 602 | disk->part0.stamp = 0; |
557 | 603 | ||
558 | kobject_put(disk->holder_dir); | 604 | kobject_put(disk->part0.holder_dir); |
559 | kobject_put(disk->slave_dir); | 605 | kobject_put(disk->slave_dir); |
560 | disk->driverfs_dev = NULL; | 606 | disk->driverfs_dev = NULL; |
561 | #ifndef CONFIG_SYSFS_DEPRECATED | 607 | #ifndef CONFIG_SYSFS_DEPRECATED |
562 | sysfs_remove_link(block_depr, disk->dev.bus_id); | 608 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); |
563 | #endif | 609 | #endif |
564 | device_del(&disk->dev); | 610 | device_del(disk_to_dev(disk)); |
565 | } | 611 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ecd9e8b..98dbe1a84528 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -5,15 +5,13 @@ | |||
5 | * add_gd_partition adds a partitions details to the devices partition | 5 | * add_gd_partition adds a partitions details to the devices partition |
6 | * description. | 6 | * description. |
7 | */ | 7 | */ |
8 | enum { MAX_PART = 256 }; | ||
9 | |||
10 | struct parsed_partitions { | 8 | struct parsed_partitions { |
11 | char name[BDEVNAME_SIZE]; | 9 | char name[BDEVNAME_SIZE]; |
12 | struct { | 10 | struct { |
13 | sector_t from; | 11 | sector_t from; |
14 | sector_t size; | 12 | sector_t size; |
15 | int flags; | 13 | int flags; |
16 | } parts[MAX_PART]; | 14 | } parts[DISK_MAX_PARTS]; |
17 | int next; | 15 | int next; |
18 | int limit; | 16 | int limit; |
19 | }; | 17 | }; |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..71c9be59c9c2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
337 | return 0; | 337 | return 0; |
338 | } | 338 | } |
339 | 339 | ||
340 | /* | ||
341 | * Use precise platform statistics if available: | ||
342 | */ | ||
343 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
344 | static cputime_t task_utime(struct task_struct *p) | ||
345 | { | ||
346 | return p->utime; | ||
347 | } | ||
348 | |||
349 | static cputime_t task_stime(struct task_struct *p) | ||
350 | { | ||
351 | return p->stime; | ||
352 | } | ||
353 | #else | ||
354 | static cputime_t task_utime(struct task_struct *p) | ||
355 | { | ||
356 | clock_t utime = cputime_to_clock_t(p->utime), | ||
357 | total = utime + cputime_to_clock_t(p->stime); | ||
358 | u64 temp; | ||
359 | |||
360 | /* | ||
361 | * Use CFS's precise accounting: | ||
362 | */ | ||
363 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
364 | |||
365 | if (total) { | ||
366 | temp *= utime; | ||
367 | do_div(temp, total); | ||
368 | } | ||
369 | utime = (clock_t)temp; | ||
370 | |||
371 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
372 | return p->prev_utime; | ||
373 | } | ||
374 | |||
375 | static cputime_t task_stime(struct task_struct *p) | ||
376 | { | ||
377 | clock_t stime; | ||
378 | |||
379 | /* | ||
380 | * Use CFS's precise accounting. (we subtract utime from | ||
381 | * the total, to make sure the total observed by userspace | ||
382 | * grows monotonically - apps rely on that): | ||
383 | */ | ||
384 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
385 | cputime_to_clock_t(task_utime(p)); | ||
386 | |||
387 | if (stime >= 0) | ||
388 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
389 | |||
390 | return p->prev_stime; | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | static cputime_t task_gtime(struct task_struct *p) | ||
395 | { | ||
396 | return p->gtime; | ||
397 | } | ||
398 | |||
399 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 340 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
400 | struct pid *pid, struct task_struct *task, int whole) | 341 | struct pid *pid, struct task_struct *task, int whole) |
401 | { | 342 | { |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index bca0f81eb687..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -547,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
547 | 547 | ||
548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) | 548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) |
549 | if (strcmp(tmp->name, dp->name) == 0) { | 549 | if (strcmp(tmp->name, dp->name) == 0) { |
550 | printk(KERN_WARNING "proc_dir_entry '%s' already " | 550 | printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", |
551 | "registered\n", dp->name); | 551 | dir->name, dp->name); |
552 | dump_stack(); | 552 | dump_stack(); |
553 | break; | 553 | break; |
554 | } | 554 | } |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..29e20c6b1f7f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/tty.h> | 24 | #include <linux/tty.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
27 | #include <linux/quicklist.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
@@ -182,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
182 | "SReclaimable: %8lu kB\n" | 183 | "SReclaimable: %8lu kB\n" |
183 | "SUnreclaim: %8lu kB\n" | 184 | "SUnreclaim: %8lu kB\n" |
184 | "PageTables: %8lu kB\n" | 185 | "PageTables: %8lu kB\n" |
186 | #ifdef CONFIG_QUICKLIST | ||
187 | "Quicklists: %8lu kB\n" | ||
188 | #endif | ||
185 | "NFS_Unstable: %8lu kB\n" | 189 | "NFS_Unstable: %8lu kB\n" |
186 | "Bounce: %8lu kB\n" | 190 | "Bounce: %8lu kB\n" |
187 | "WritebackTmp: %8lu kB\n" | 191 | "WritebackTmp: %8lu kB\n" |
@@ -214,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
214 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | 218 | K(global_page_state(NR_SLAB_RECLAIMABLE)), |
215 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | 219 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), |
216 | K(global_page_state(NR_PAGETABLE)), | 220 | K(global_page_state(NR_PAGETABLE)), |
221 | #ifdef CONFIG_QUICKLIST | ||
222 | K(quicklist_total_size()), | ||
223 | #endif | ||
217 | K(global_page_state(NR_UNSTABLE_NFS)), | 224 | K(global_page_state(NR_UNSTABLE_NFS)), |
218 | K(global_page_state(NR_BOUNCE)), | 225 | K(global_page_state(NR_BOUNCE)), |
219 | K(global_page_state(NR_WRITEBACK_TEMP)), | 226 | K(global_page_state(NR_WRITEBACK_TEMP)), |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 52312ec93ff4..5145cb9125af 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = { | |||
58 | * size 0 on the assumption that it's going to be used for an mmap of shared | 58 | * size 0 on the assumption that it's going to be used for an mmap of shared |
59 | * memory | 59 | * memory |
60 | */ | 60 | */ |
61 | static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | 61 | int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) |
62 | { | 62 | { |
63 | struct pagevec lru_pvec; | 63 | struct pagevec lru_pvec; |
64 | unsigned long npages, xpages, loop, limit; | 64 | unsigned long npages, xpages, loop, limit; |
diff --git a/fs/splice.c b/fs/splice.c index 1bbc6f4bb09c..a1e701c27156 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | 898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) |
899 | return -EBADF; | 899 | return -EBADF; |
900 | 900 | ||
901 | if (unlikely(out->f_flags & O_APPEND)) | ||
902 | return -EINVAL; | ||
903 | |||
901 | ret = rw_verify_area(WRITE, out, ppos, len); | 904 | ret = rw_verify_area(WRITE, out, ppos, len); |
902 | if (unlikely(ret < 0)) | 905 | if (unlikely(ret < 0)) |
903 | return ret; | 906 | return ret; |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
302 | int subtract_lebs; | 302 | int subtract_lebs; |
303 | long long available; | 303 | long long available; |
304 | 304 | ||
305 | /* | ||
306 | * Force the amount available to the total size reported if the used | ||
307 | * space is zero. | ||
308 | */ | ||
309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
311 | /* Do the same calculation as for c->block_cnt */ | ||
312 | available = c->main_lebs - 2; | ||
313 | available *= c->leb_size - c->dark_wm; | ||
314 | return available; | ||
315 | } | ||
316 | |||
317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
318 | 306 | ||
319 | /* | 307 | /* |
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
714 | } | 702 | } |
715 | 703 | ||
716 | /** | 704 | /** |
717 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
706 | * @c: the UBIFS file-system description object | ||
707 | * @free: amount of free space | ||
708 | * | ||
709 | * This function calculates amount of free space which will be reported to | ||
710 | * user-space. User-space application tend to expect that if the file-system | ||
711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
713 | * node and it has to write indexind nodes as well. This introduces additional | ||
714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
715 | * above expectetion. | ||
716 | * | ||
717 | * This function assumes free space is made up of uncompressed data nodes and | ||
718 | * full index nodes (one per data node, tripled because we always allow enough | ||
719 | * space to write the index thrice). | ||
720 | * | ||
721 | * Note, the calculation is pessimistic, which means that most of the time | ||
722 | * UBIFS reports less space than it actually has. | ||
723 | */ | ||
724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
725 | { | ||
726 | int divisor, factor, f; | ||
727 | |||
728 | /* | ||
729 | * Reported space size is @free * X, where X is UBIFS block size | ||
730 | * divided by UBIFS block size + all overhead one data block | ||
731 | * introduces. The overhead is the node header + indexing overhead. | ||
732 | * | ||
733 | * Indexing overhead calculations are based on the following formula: | ||
734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
736 | * as less than maximum fanout, we assume that each data node | ||
737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
739 | * for the index. | ||
740 | */ | ||
741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
742 | factor = UBIFS_BLOCK_SIZE; | ||
743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
745 | free *= factor; | ||
746 | do_div(free, divisor); | ||
747 | return free; | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * ubifs_get_free_space - return amount of free space. | ||
718 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
719 | * | 753 | * |
720 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
755 | * | ||
756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
758 | * amount of free flash space it has (well, because not all dirty space is | ||
759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
760 | * it would bread user expectetion about what free space is. Users seem to | ||
761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
721 | */ | 765 | */ |
722 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
723 | { | 767 | { |
724 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
725 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
726 | 770 | ||
727 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
728 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
729 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
730 | 774 | ||
731 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
732 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
733 | else | 777 | * space is zero. |
734 | rsvd_idx_lebs = 0; | 778 | */ |
735 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
736 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
737 | - c->lst.taken_empty_lebs) { | ||
738 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
739 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
740 | } | 782 | } |
741 | 783 | ||
742 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
743 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
744 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
788 | * possible to write a file of free space size. This means that for | ||
789 | * empty LEBs we may use more precise calculations than | ||
790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
792 | * Thus, amend the available space. | ||
793 | * | ||
794 | * Note, the calculations below are similar to what we have in | ||
795 | * 'do_budget_space()', so refer there for comments. | ||
796 | */ | ||
797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
799 | else | ||
800 | rsvd_idx_lebs = 0; | ||
801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
802 | c->lst.taken_empty_lebs; | ||
803 | lebs -= rsvd_idx_lebs; | ||
804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
745 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
746 | 806 | ||
747 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b9cb77473758..d7f7645779f2 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); |
539 | for (i = 0; i < n; i++) | 539 | for (i = 0; i < n; i++) |
540 | printk(KERN_DEBUG "\t ino %llu\n", | 540 | printk(KERN_DEBUG "\t ino %llu\n", |
541 | le64_to_cpu(orph->inos[i])); | 541 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
542 | break; | 542 | break; |
543 | } | 543 | } |
544 | default: | 544 | default: |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..526c01ec8003 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
426 | 426 | ||
427 | while (1) { | 427 | while (1) { |
428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", | 428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", |
429 | dent->name, le64_to_cpu(dent->inum), | 429 | dent->name, (unsigned long long)le64_to_cpu(dent->inum), |
430 | key_hash_flash(c, &dent->key)); | 430 | key_hash_flash(c, &dent->key)); |
431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); | 431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); |
432 | 432 | ||
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
587 | if (err) { | 587 | if (err) { |
588 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
589 | return err; | 589 | return err; |
590 | err = 0; | ||
591 | budgeted = 0; | 590 | budgeted = 0; |
592 | } | 591 | } |
593 | 592 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
793 | int err; | 793 | int err; |
794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
798 | 798 | ||
799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
814 | if (err) | 814 | if (err) { |
815 | return err; | 815 | /* |
816 | * Treat truncations to zero as deletion and always allow them, | ||
817 | * just like we do for '->unlink()'. | ||
818 | */ | ||
819 | if (new_size || err != -ENOSPC) | ||
820 | return err; | ||
821 | budgeted = 0; | ||
822 | } | ||
816 | 823 | ||
817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
818 | if (err) | 825 | if (err) |
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
871 | out_budg: | 878 | out_budg: |
872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
880 | ubifs_release_budget(c, &req); | ||
881 | else { | ||
882 | c->nospace = c->nospace_rp = 0; | ||
883 | smp_wmb(); | ||
884 | } | ||
873 | return err; | 885 | return err; |
874 | } | 886 | } |
875 | 887 | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..47814cde2407 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
213 | * | 213 | * |
214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
216 | * by this function; | ||
217 | * | ||
218 | * Returns zero and the LEB properties of | ||
219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
220 | * negative error code in case of other failures. The returned LEB is marked as | ||
221 | * "taken". | ||
222 | * | 216 | * |
223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
231 | * | 225 | * |
232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
228 | * | ||
229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
231 | * case of other failures. The returned LEB is marked as "taken". | ||
234 | */ | 232 | */ |
235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
246 | 244 | ||
247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
250 | 248 | ||
251 | /* | 249 | /* |
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
317 | lp = idx_lp; | 315 | lp = idx_lp; |
318 | 316 | ||
319 | if (lp) { | 317 | if (lp) { |
320 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
321 | goto found; | 319 | goto found; |
322 | } | 320 | } |
323 | 321 | ||
@@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
509 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
510 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
511 | c->lst.taken_empty_lebs; | 509 | c->lst.taken_empty_lebs; |
512 | ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); | ||
513 | if (rsvd_idx_lebs < lebs) | 510 | if (rsvd_idx_lebs < lebs) |
514 | /* | 511 | /* |
515 | * OK to allocate an empty LEB, but we still don't want to go | 512 | * OK to allocate an empty LEB, but we still don't want to go |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..02aba36fe3d4 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
334 | 334 | ||
335 | err = move_nodes(c, sleb); | 335 | err = move_nodes(c, sleb); |
336 | if (err) | 336 | if (err) |
337 | goto out; | 337 | goto out_inc_seq; |
338 | 338 | ||
339 | err = gc_sync_wbufs(c); | 339 | err = gc_sync_wbufs(c); |
340 | if (err) | 340 | if (err) |
341 | goto out; | 341 | goto out_inc_seq; |
342 | 342 | ||
343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); | 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); |
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out_inc_seq; |
346 | |||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
346 | 352 | ||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
363 | out: | 369 | out: |
364 | ubifs_scan_destroy(sleb); | 370 | ubifs_scan_destroy(sleb); |
365 | return err; | 371 | return err; |
372 | |||
373 | out_inc_seq: | ||
374 | /* We may have moved at least some nodes so allow for races with TNC */ | ||
375 | c->gced_lnum = lnum; | ||
376 | smp_wmb(); | ||
377 | c->gc_seq += 1; | ||
378 | smp_wmb(); | ||
379 | goto out; | ||
366 | } | 380 | } |
367 | 381 | ||
368 | /** | 382 | /** |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | /** | 286 | /** |
287 | * ubifs_reported_space - calculate reported free space. | ||
288 | * @c: the UBIFS file-system description object | ||
289 | * @free: amount of free space | ||
290 | * | ||
291 | * This function calculates amount of free space which will be reported to | ||
292 | * user-space. User-space application tend to expect that if the file-system | ||
293 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
294 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
295 | * node and it has to write indexind nodes as well. This introduces additional | ||
296 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
297 | * above expectetion. | ||
298 | * | ||
299 | * This function assumes free space is made up of uncompressed data nodes and | ||
300 | * full index nodes (one per data node, doubled because we always allow enough | ||
301 | * space to write the index twice). | ||
302 | * | ||
303 | * Note, the calculation is pessimistic, which means that most of the time | ||
304 | * UBIFS reports less space than it actually has. | ||
305 | */ | ||
306 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
307 | uint64_t free) | ||
308 | { | ||
309 | int divisor, factor; | ||
310 | |||
311 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); | ||
312 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
313 | do_div(free, divisor); | ||
314 | |||
315 | return free * factor; | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
320 | * @inode: inode | 288 | * @inode: inode |
321 | */ | 289 | */ |
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
326 | } | 294 | } |
327 | 295 | ||
296 | /** | ||
297 | * ubifs_tnc_lookup - look up a file-system node. | ||
298 | * @c: UBIFS file-system description object | ||
299 | * @key: node key to lookup | ||
300 | * @node: the node is returned here | ||
301 | * | ||
302 | * This function look up and reads node with key @key. The caller has to make | ||
303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
305 | * case of failure. | ||
306 | */ | ||
307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
308 | const union ubifs_key *key, void *node) | ||
309 | { | ||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
311 | } | ||
312 | |||
328 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..3f4902060c7a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
370 | { | 370 | { |
371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
372 | unsigned long long free; | 372 | unsigned long long free; |
373 | __le32 *uuid = (__le32 *)c->uuid; | ||
373 | 374 | ||
374 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
375 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
376 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
377 | 378 | ||
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
386 | buf->f_files = 0; | 387 | buf->f_files = 0; |
387 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
388 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
389 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
390 | return 0; | 392 | return 0; |
391 | } | 393 | } |
392 | 394 | ||
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
530 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
531 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
532 | 534 | ||
535 | /* | ||
536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
537 | * fully filled with data nodes of maximum size. This is used in | ||
538 | * calculations when reporting free space. | ||
539 | */ | ||
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
533 | return 0; | 541 | return 0; |
534 | } | 542 | } |
535 | 543 | ||
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
647 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
648 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
649 | * | 657 | * |
650 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
651 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
652 | * | ||
653 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
654 | */ | 660 | */ |
655 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
656 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
657 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
658 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
659 | 665 | ||
@@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1018 | goto out_dereg; | 1024 | goto out_dereg; |
1019 | } | 1025 | } |
1020 | 1026 | ||
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); | ||
1021 | if (!mounted_read_only) { | 1028 | if (!mounted_read_only) { |
1022 | err = alloc_wbufs(c); | 1029 | err = alloc_wbufs(c); |
1023 | if (err) | 1030 | if (err) |
1024 | goto out_cbuf; | 1031 | goto out_cbuf; |
1025 | 1032 | ||
1026 | /* Create background thread */ | 1033 | /* Create background thread */ |
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, | ||
1028 | c->vi.vol_id); | ||
1029 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
1030 | if (!c->bgt) | 1035 | if (!c->bgt) |
1031 | c->bgt = ERR_PTR(-EINVAL); | 1036 | c->bgt = ERR_PTR(-EINVAL); |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7634c5970887 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | 1397 | ||
1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
1407 | } else if (found < 0) { | 1403 | return 0; |
1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
1410 | } | 1406 | return 1; |
1411 | zt = &znode->zbranch[n]; | 1407 | /* |
1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
1418 | goto out; | 1414 | /* Finally we can check lnum */ |
1419 | } | 1415 | if (gced_lnum == lnum) |
1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | |||
1426 | out: | ||
1427 | mutex_unlock(&c->tnc_mutex); | ||
1428 | return err; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1498 | { | 1509 | { |
1499 | int found, n, err; | 1510 | int found, n, err; |
1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
1501 | struct ubifs_zbranch zbr; | ||
1502 | 1512 | ||
1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1522 | goto out_unlock; | 1532 | goto out_unlock; |
1523 | } | 1533 | } |
1524 | 1534 | ||
1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
1526 | mutex_unlock(&c->tnc_mutex); | ||
1527 | |||
1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
1529 | return err; | ||
1530 | 1536 | ||
1531 | out_unlock: | 1537 | out_unlock: |
1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -87,7 +87,7 @@ | |||
87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
88 | 88 | ||
89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
91 | 91 | ||
92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -995,6 +995,9 @@ struct ubifs_mount_opts { | |||
995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
996 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
997 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
998 | * | ||
999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
1000 | * data nodes of maximum size - used in free space reporting | ||
998 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
999 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
1000 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { | |||
1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1031 | * | 1036 | * |
1032 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
1033 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1224,6 +1229,8 @@ struct ubifs_info { | |||
1224 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
1225 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
1226 | int max_znode_sz; | 1231 | int max_znode_sz; |
1232 | |||
1233 | int leb_overhead; | ||
1227 | int dead_wm; | 1234 | int dead_wm; |
1228 | int dark_wm; | 1235 | int dark_wm; |
1229 | int block_cnt; | 1236 | int block_cnt; |
@@ -1257,6 +1264,8 @@ struct ubifs_info { | |||
1257 | void *sbuf; | 1264 | void *sbuf; |
1258 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
1259 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
1267 | volatile int gc_seq; | ||
1268 | volatile int gced_lnum; | ||
1260 | 1269 | ||
1261 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
1262 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
1434 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
1435 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
1436 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
1437 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
1438 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
1439 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
1440 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1441 | 1451 | ||
1442 | /* find.c */ | 1452 | /* find.c */ |
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1451 | /* tnc.c */ | 1461 | /* tnc.c */ |
1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1453 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1455 | void *node); | ||
1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1457 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 0ed6e146a0d9..eb91f3b70320 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = { | |||
211 | .release = udf_release_file, | 211 | .release = udf_release_file, |
212 | .fsync = udf_fsync_file, | 212 | .fsync = udf_fsync_file, |
213 | .splice_read = generic_file_splice_read, | 213 | .splice_read = generic_file_splice_read, |
214 | .llseek = generic_file_llseek, | ||
214 | }; | 215 | }; |
215 | 216 | ||
216 | const struct inode_operations udf_file_inode_operations = { | 217 | const struct inode_operations udf_file_inode_operations = { |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index eb9cfa23dc3d..a4f2b3ce45b0 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
76 | *err = -ENOSPC; | 76 | *err = -ENOSPC; |
77 | 77 | ||
78 | iinfo = UDF_I(inode); | 78 | iinfo = UDF_I(inode); |
79 | iinfo->i_unique = 0; | 79 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { |
80 | iinfo->i_lenExtents = 0; | 80 | iinfo->i_efe = 1; |
81 | iinfo->i_next_alloc_block = 0; | 81 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) |
82 | iinfo->i_next_alloc_goal = 0; | 82 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; |
83 | iinfo->i_strat4096 = 0; | 83 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - |
84 | sizeof(struct extendedFileEntry), | ||
85 | GFP_KERNEL); | ||
86 | } else { | ||
87 | iinfo->i_efe = 0; | ||
88 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
89 | sizeof(struct fileEntry), | ||
90 | GFP_KERNEL); | ||
91 | } | ||
92 | if (!iinfo->i_ext.i_data) { | ||
93 | iput(inode); | ||
94 | *err = -ENOMEM; | ||
95 | return NULL; | ||
96 | } | ||
84 | 97 | ||
85 | block = udf_new_block(dir->i_sb, NULL, | 98 | block = udf_new_block(dir->i_sb, NULL, |
86 | dinfo->i_location.partitionReferenceNum, | 99 | dinfo->i_location.partitionReferenceNum, |
@@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
111 | lvhd->uniqueID = cpu_to_le64(uniqueID); | 124 | lvhd->uniqueID = cpu_to_le64(uniqueID); |
112 | mark_buffer_dirty(sbi->s_lvid_bh); | 125 | mark_buffer_dirty(sbi->s_lvid_bh); |
113 | } | 126 | } |
127 | mutex_unlock(&sbi->s_alloc_mutex); | ||
114 | inode->i_mode = mode; | 128 | inode->i_mode = mode; |
115 | inode->i_uid = current->fsuid; | 129 | inode->i_uid = current->fsuid; |
116 | if (dir->i_mode & S_ISGID) { | 130 | if (dir->i_mode & S_ISGID) { |
@@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
129 | iinfo->i_lenEAttr = 0; | 143 | iinfo->i_lenEAttr = 0; |
130 | iinfo->i_lenAlloc = 0; | 144 | iinfo->i_lenAlloc = 0; |
131 | iinfo->i_use = 0; | 145 | iinfo->i_use = 0; |
132 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { | ||
133 | iinfo->i_efe = 1; | ||
134 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) | ||
135 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; | ||
136 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
137 | sizeof(struct extendedFileEntry), | ||
138 | GFP_KERNEL); | ||
139 | } else { | ||
140 | iinfo->i_efe = 0; | ||
141 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
142 | sizeof(struct fileEntry), | ||
143 | GFP_KERNEL); | ||
144 | } | ||
145 | if (!iinfo->i_ext.i_data) { | ||
146 | iput(inode); | ||
147 | *err = -ENOMEM; | ||
148 | mutex_unlock(&sbi->s_alloc_mutex); | ||
149 | return NULL; | ||
150 | } | ||
151 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
152 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 147 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
153 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
@@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
158 | iinfo->i_crtime = current_fs_time(inode->i_sb); | 153 | iinfo->i_crtime = current_fs_time(inode->i_sb); |
159 | insert_inode_hash(inode); | 154 | insert_inode_hash(inode); |
160 | mark_inode_dirty(inode); | 155 | mark_inode_dirty(inode); |
161 | mutex_unlock(&sbi->s_alloc_mutex); | ||
162 | 156 | ||
163 | if (DQUOT_ALLOC_INODE(inode)) { | 157 | if (DQUOT_ALLOC_INODE(inode)) { |
164 | DQUOT_DROP(inode); | 158 | DQUOT_DROP(inode); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f42f80a3b1fa..a44d68eb50b5 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1338,6 +1338,10 @@ __xfs_get_blocks( | |||
1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1340 | size = bh_result->b_size; | 1340 | size = bh_result->b_size; |
1341 | |||
1342 | if (!create && direct && offset >= i_size_read(inode)) | ||
1343 | return 0; | ||
1344 | |||
1341 | error = xfs_iomap(XFS_I(inode), offset, size, | 1345 | error = xfs_iomap(XFS_I(inode), offset, size, |
1342 | create ? flags : BMAPI_READ, &iomap, &niomap); | 1346 | create ? flags : BMAPI_READ, &iomap, &niomap); |
1343 | if (error) | 1347 | if (error) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 986061ae1b9b..36d5fcd3f593 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1001,12 +1001,13 @@ xfs_buf_iodone_work( | |||
1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | 1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the |
1002 | * ordered flag and reissue them. Because we can't tell the higher | 1002 | * ordered flag and reissue them. Because we can't tell the higher |
1003 | * layers directly that they should not issue ordered I/O anymore, they | 1003 | * layers directly that they should not issue ordered I/O anymore, they |
1004 | * need to check if the ordered flag was cleared during I/O completion. | 1004 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. |
1005 | */ | 1005 | */ |
1006 | if ((bp->b_error == EOPNOTSUPP) && | 1006 | if ((bp->b_error == EOPNOTSUPP) && |
1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | 1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { |
1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); | 1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); |
1009 | bp->b_flags &= ~XBF_ORDERED; | 1009 | bp->b_flags &= ~XBF_ORDERED; |
1010 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
1010 | xfs_buf_iorequest(bp); | 1011 | xfs_buf_iorequest(bp); |
1011 | } else if (bp->b_iodone) | 1012 | } else if (bp->b_iodone) |
1012 | (*(bp->b_iodone))(bp); | 1013 | (*(bp->b_iodone))(bp); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index fe0109956656..456519a088c7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -85,6 +85,14 @@ typedef enum { | |||
85 | * modifications being lost. | 85 | * modifications being lost. |
86 | */ | 86 | */ |
87 | _XBF_PAGE_LOCKED = (1 << 22), | 87 | _XBF_PAGE_LOCKED = (1 << 22), |
88 | |||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | _XFS_BARRIER_FAILED = (1 << 23), | ||
88 | } xfs_buf_flags_t; | 96 | } xfs_buf_flags_t; |
89 | 97 | ||
90 | typedef enum { | 98 | typedef enum { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 73c65f19e549..18d3c8487835 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1302,9 +1302,29 @@ xfs_fs_remount( | |||
1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1303 | break; | 1303 | break; |
1304 | default: | 1304 | default: |
1305 | /* | ||
1306 | * Logically we would return an error here to prevent | ||
1307 | * users from believing they might have changed | ||
1308 | * mount options using remount which can't be changed. | ||
1309 | * | ||
1310 | * But unfortunately mount(8) adds all options from | ||
1311 | * mtab and fstab to the mount arguments in some cases | ||
1312 | * so we can't blindly reject options, but have to | ||
1313 | * check for each specified option if it actually | ||
1314 | * differs from the currently set option and only | ||
1315 | * reject it if that's the case. | ||
1316 | * | ||
1317 | * Until that is implemented we return success for | ||
1318 | * every remount request, and silently ignore all | ||
1319 | * options that we can't actually change. | ||
1320 | */ | ||
1321 | #if 0 | ||
1305 | printk(KERN_INFO | 1322 | printk(KERN_INFO |
1306 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); |
1307 | return -EINVAL; | 1324 | return -EINVAL; |
1325 | #else | ||
1326 | return 0; | ||
1327 | #endif | ||
1308 | } | 1328 | } |
1309 | } | 1329 | } |
1310 | 1330 | ||
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 608c30c3f76b..002fc2617c8e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -732,6 +732,7 @@ xfs_buf_item_init( | |||
732 | bip->bli_item.li_ops = &xfs_buf_item_ops; | 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; |
733 | bip->bli_item.li_mountp = mp; | 733 | bip->bli_item.li_mountp = mp; |
734 | bip->bli_buf = bp; | 734 | bip->bli_buf = bp; |
735 | xfs_buf_hold(bp); | ||
735 | bip->bli_format.blf_type = XFS_LI_BUF; | 736 | bip->bli_format.blf_type = XFS_LI_BUF; |
736 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 737 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
737 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 738 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); |
@@ -867,6 +868,21 @@ xfs_buf_item_dirty( | |||
867 | return (bip->bli_flags & XFS_BLI_DIRTY); | 868 | return (bip->bli_flags & XFS_BLI_DIRTY); |
868 | } | 869 | } |
869 | 870 | ||
871 | STATIC void | ||
872 | xfs_buf_item_free( | ||
873 | xfs_buf_log_item_t *bip) | ||
874 | { | ||
875 | #ifdef XFS_TRANS_DEBUG | ||
876 | kmem_free(bip->bli_orig); | ||
877 | kmem_free(bip->bli_logged); | ||
878 | #endif /* XFS_TRANS_DEBUG */ | ||
879 | |||
880 | #ifdef XFS_BLI_TRACE | ||
881 | ktrace_free(bip->bli_trace); | ||
882 | #endif | ||
883 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
884 | } | ||
885 | |||
870 | /* | 886 | /* |
871 | * This is called when the buf log item is no longer needed. It should | 887 | * This is called when the buf log item is no longer needed. It should |
872 | * free the buf log item associated with the given buffer and clear | 888 | * free the buf log item associated with the given buffer and clear |
@@ -887,18 +903,8 @@ xfs_buf_item_relse( | |||
887 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { | 903 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { |
888 | XFS_BUF_CLR_IODONE_FUNC(bp); | 904 | XFS_BUF_CLR_IODONE_FUNC(bp); |
889 | } | 905 | } |
890 | 906 | xfs_buf_rele(bp); | |
891 | #ifdef XFS_TRANS_DEBUG | 907 | xfs_buf_item_free(bip); |
892 | kmem_free(bip->bli_orig); | ||
893 | bip->bli_orig = NULL; | ||
894 | kmem_free(bip->bli_logged); | ||
895 | bip->bli_logged = NULL; | ||
896 | #endif /* XFS_TRANS_DEBUG */ | ||
897 | |||
898 | #ifdef XFS_BLI_TRACE | ||
899 | ktrace_free(bip->bli_trace); | ||
900 | #endif | ||
901 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
902 | } | 908 | } |
903 | 909 | ||
904 | 910 | ||
@@ -1120,6 +1126,7 @@ xfs_buf_iodone( | |||
1120 | 1126 | ||
1121 | ASSERT(bip->bli_buf == bp); | 1127 | ASSERT(bip->bli_buf == bp); |
1122 | 1128 | ||
1129 | xfs_buf_rele(bp); | ||
1123 | mp = bip->bli_item.li_mountp; | 1130 | mp = bip->bli_item.li_mountp; |
1124 | 1131 | ||
1125 | /* | 1132 | /* |
@@ -1136,18 +1143,7 @@ xfs_buf_iodone( | |||
1136 | * xfs_trans_delete_ail() drops the AIL lock. | 1143 | * xfs_trans_delete_ail() drops the AIL lock. |
1137 | */ | 1144 | */ |
1138 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); | 1145 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); |
1139 | 1146 | xfs_buf_item_free(bip); | |
1140 | #ifdef XFS_TRANS_DEBUG | ||
1141 | kmem_free(bip->bli_orig); | ||
1142 | bip->bli_orig = NULL; | ||
1143 | kmem_free(bip->bli_logged); | ||
1144 | bip->bli_logged = NULL; | ||
1145 | #endif /* XFS_TRANS_DEBUG */ | ||
1146 | |||
1147 | #ifdef XFS_BLI_TRACE | ||
1148 | ktrace_free(bip->bli_trace); | ||
1149 | #endif | ||
1150 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
1151 | } | 1147 | } |
1152 | 1148 | ||
1153 | #if defined(XFS_BLI_TRACE) | 1149 | #if defined(XFS_BLI_TRACE) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 760f4c5b5160..75b0cd4da0ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -149,7 +149,14 @@ xfs_swap_extents( | |||
149 | 149 | ||
150 | sbp = &sxp->sx_stat; | 150 | sbp = &sxp->sx_stat; |
151 | 151 | ||
152 | xfs_lock_two_inodes(ip, tip, lock_flags); | 152 | /* |
153 | * we have to do two separate lock calls here to keep lockdep | ||
154 | * happy. If we try to get all the locks in one call, lock will | ||
155 | * report false positives when we drop the ILOCK and regain them | ||
156 | * below. | ||
157 | */ | ||
158 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
159 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
153 | locked = 1; | 160 | locked = 1; |
154 | 161 | ||
155 | /* Verify that both files have the same format */ | 162 | /* Verify that both files have the same format */ |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 00e80df9dd9d..dbd9cef852ec 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct( | |||
4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4119 | size = nextents * sizeof(xfs_bmbt_rec_t); | 4119 | size = nextents * sizeof(xfs_bmbt_rec_t); |
4120 | 4120 | ||
4121 | xfs_iext_irec_compact_full(ifp); | 4121 | xfs_iext_irec_compact_pages(ifp); |
4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | 4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); |
4123 | 4123 | ||
4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | 4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; |
@@ -4449,8 +4449,7 @@ xfs_iext_irec_remove( | |||
4449 | * compaction policy is as follows: | 4449 | * compaction policy is as follows: |
4450 | * | 4450 | * |
4451 | * Full Compaction: Extents fit into a single page (or inline buffer) | 4451 | * Full Compaction: Extents fit into a single page (or inline buffer) |
4452 | * Full Compaction: Extents occupy less than 10% of allocated space | 4452 | * Partial Compaction: Extents occupy less than 50% of allocated space |
4453 | * Partial Compaction: Extents occupy > 10% and < 50% of allocated space | ||
4454 | * No Compaction: Extents occupy at least 50% of allocated space | 4453 | * No Compaction: Extents occupy at least 50% of allocated space |
4455 | */ | 4454 | */ |
4456 | void | 4455 | void |
@@ -4471,8 +4470,6 @@ xfs_iext_irec_compact( | |||
4471 | xfs_iext_direct_to_inline(ifp, nextents); | 4470 | xfs_iext_direct_to_inline(ifp, nextents); |
4472 | } else if (nextents <= XFS_LINEAR_EXTS) { | 4471 | } else if (nextents <= XFS_LINEAR_EXTS) { |
4473 | xfs_iext_indirect_to_direct(ifp); | 4472 | xfs_iext_indirect_to_direct(ifp); |
4474 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { | ||
4475 | xfs_iext_irec_compact_full(ifp); | ||
4476 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | 4473 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { |
4477 | xfs_iext_irec_compact_pages(ifp); | 4474 | xfs_iext_irec_compact_pages(ifp); |
4478 | } | 4475 | } |
@@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages( | |||
4496 | erp_next = erp + 1; | 4493 | erp_next = erp + 1; |
4497 | if (erp_next->er_extcount <= | 4494 | if (erp_next->er_extcount <= |
4498 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | 4495 | (XFS_LINEAR_EXTS - erp->er_extcount)) { |
4499 | memmove(&erp->er_extbuf[erp->er_extcount], | 4496 | memcpy(&erp->er_extbuf[erp->er_extcount], |
4500 | erp_next->er_extbuf, erp_next->er_extcount * | 4497 | erp_next->er_extbuf, erp_next->er_extcount * |
4501 | sizeof(xfs_bmbt_rec_t)); | 4498 | sizeof(xfs_bmbt_rec_t)); |
4502 | erp->er_extcount += erp_next->er_extcount; | 4499 | erp->er_extcount += erp_next->er_extcount; |
@@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages( | |||
4516 | } | 4513 | } |
4517 | 4514 | ||
4518 | /* | 4515 | /* |
4519 | * Fully compact the extent records managed by the indirection array. | ||
4520 | */ | ||
4521 | void | ||
4522 | xfs_iext_irec_compact_full( | ||
4523 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
4524 | { | ||
4525 | xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */ | ||
4526 | xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ | ||
4527 | int erp_idx = 0; /* extent irec index */ | ||
4528 | int ext_avail; /* empty entries in ex list */ | ||
4529 | int ext_diff; /* number of exts to add */ | ||
4530 | int nlists; /* number of irec's (ex lists) */ | ||
4531 | |||
4532 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
4533 | |||
4534 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4535 | erp = ifp->if_u1.if_ext_irec; | ||
4536 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4537 | erp_next = erp + 1; | ||
4538 | ep_next = erp_next->er_extbuf; | ||
4539 | |||
4540 | while (erp_idx < nlists - 1) { | ||
4541 | /* | ||
4542 | * Check how many extent records are available in this irec. | ||
4543 | * If there is none skip the whole exercise. | ||
4544 | */ | ||
4545 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
4546 | if (ext_avail) { | ||
4547 | |||
4548 | /* | ||
4549 | * Copy over as many as possible extent records into | ||
4550 | * the previous page. | ||
4551 | */ | ||
4552 | ext_diff = MIN(ext_avail, erp_next->er_extcount); | ||
4553 | memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
4554 | erp->er_extcount += ext_diff; | ||
4555 | erp_next->er_extcount -= ext_diff; | ||
4556 | |||
4557 | /* | ||
4558 | * If the next irec is empty now we can simply | ||
4559 | * remove it. | ||
4560 | */ | ||
4561 | if (erp_next->er_extcount == 0) { | ||
4562 | /* | ||
4563 | * Free page before removing extent record | ||
4564 | * so er_extoffs don't get modified in | ||
4565 | * xfs_iext_irec_remove. | ||
4566 | */ | ||
4567 | kmem_free(erp_next->er_extbuf); | ||
4568 | erp_next->er_extbuf = NULL; | ||
4569 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
4570 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4571 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4572 | |||
4573 | /* | ||
4574 | * If the next irec is not empty move up the content | ||
4575 | * that has not been copied to the previous page to | ||
4576 | * the beggining of this one. | ||
4577 | */ | ||
4578 | } else { | ||
4579 | memmove(erp_next->er_extbuf, &ep_next[ext_diff], | ||
4580 | erp_next->er_extcount * | ||
4581 | sizeof(xfs_bmbt_rec_t)); | ||
4582 | ep_next = erp_next->er_extbuf; | ||
4583 | memset(&ep_next[erp_next->er_extcount], 0, | ||
4584 | (XFS_LINEAR_EXTS - | ||
4585 | erp_next->er_extcount) * | ||
4586 | sizeof(xfs_bmbt_rec_t)); | ||
4587 | } | ||
4588 | } | ||
4589 | |||
4590 | if (erp->er_extcount == XFS_LINEAR_EXTS) { | ||
4591 | erp_idx++; | ||
4592 | if (erp_idx < nlists) | ||
4593 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4594 | else | ||
4595 | break; | ||
4596 | } | ||
4597 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4598 | erp_next = erp + 1; | ||
4599 | ep_next = erp_next->er_extbuf; | ||
4600 | } | ||
4601 | } | ||
4602 | |||
4603 | /* | ||
4604 | * This is called to update the er_extoff field in the indirection | 4516 | * This is called to update the er_extoff field in the indirection |
4605 | * array when extents have been added or removed from one of the | 4517 | * array when extents have been added or removed from one of the |
4606 | * extent lists. erp_idx contains the irec index to begin updating | 4518 | * extent lists. erp_idx contains the irec index to begin updating |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ccba14eb9dbe..0b02c6443551 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | |||
124 | STATIC int xlog_iclogs_empty(xlog_t *log); | 124 | STATIC int xlog_iclogs_empty(xlog_t *log); |
125 | 125 | ||
126 | #if defined(XFS_LOG_TRACE) | 126 | #if defined(XFS_LOG_TRACE) |
127 | |||
128 | #define XLOG_TRACE_LOGGRANT_SIZE 2048 | ||
129 | #define XLOG_TRACE_ICLOG_SIZE 256 | ||
130 | |||
131 | void | ||
132 | xlog_trace_loggrant_alloc(xlog_t *log) | ||
133 | { | ||
134 | log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); | ||
135 | } | ||
136 | |||
137 | void | ||
138 | xlog_trace_loggrant_dealloc(xlog_t *log) | ||
139 | { | ||
140 | ktrace_free(log->l_grant_trace); | ||
141 | } | ||
142 | |||
127 | void | 143 | void |
128 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | 144 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) |
129 | { | 145 | { |
130 | unsigned long cnts; | 146 | unsigned long cnts; |
131 | 147 | ||
132 | if (!log->l_grant_trace) { | ||
133 | log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); | ||
134 | if (!log->l_grant_trace) | ||
135 | return; | ||
136 | } | ||
137 | /* ticket counts are 1 byte each */ | 148 | /* ticket counts are 1 byte each */ |
138 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; | 149 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; |
139 | 150 | ||
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | |||
157 | } | 168 | } |
158 | 169 | ||
159 | void | 170 | void |
171 | xlog_trace_iclog_alloc(xlog_in_core_t *iclog) | ||
172 | { | ||
173 | iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); | ||
174 | } | ||
175 | |||
176 | void | ||
177 | xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) | ||
178 | { | ||
179 | ktrace_free(iclog->ic_trace); | ||
180 | } | ||
181 | |||
182 | void | ||
160 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | 183 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) |
161 | { | 184 | { |
162 | if (!iclog->ic_trace) | ||
163 | iclog->ic_trace = ktrace_alloc(256, KM_NOFS); | ||
164 | ktrace_enter(iclog->ic_trace, | 185 | ktrace_enter(iclog->ic_trace, |
165 | (void *)((unsigned long)state), | 186 | (void *)((unsigned long)state), |
166 | (void *)((unsigned long)current_pid()), | 187 | (void *)((unsigned long)current_pid()), |
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | |||
170 | (void *)NULL, (void *)NULL); | 191 | (void *)NULL, (void *)NULL); |
171 | } | 192 | } |
172 | #else | 193 | #else |
194 | |||
195 | #define xlog_trace_loggrant_alloc(log) | ||
196 | #define xlog_trace_loggrant_dealloc(log) | ||
173 | #define xlog_trace_loggrant(log,tic,string) | 197 | #define xlog_trace_loggrant(log,tic,string) |
198 | |||
199 | #define xlog_trace_iclog_alloc(iclog) | ||
200 | #define xlog_trace_iclog_dealloc(iclog) | ||
174 | #define xlog_trace_iclog(iclog,state) | 201 | #define xlog_trace_iclog(iclog,state) |
202 | |||
175 | #endif /* XFS_LOG_TRACE */ | 203 | #endif /* XFS_LOG_TRACE */ |
176 | 204 | ||
177 | 205 | ||
@@ -1005,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp) | |||
1005 | l = iclog->ic_log; | 1033 | l = iclog->ic_log; |
1006 | 1034 | ||
1007 | /* | 1035 | /* |
1008 | * If the ordered flag has been removed by a lower | 1036 | * If the _XFS_BARRIER_FAILED flag was set by a lower |
1009 | * layer, it means the underlyin device no longer supports | 1037 | * layer, it means the underlying device no longer supports |
1010 | * barrier I/O. Warn loudly and turn off barriers. | 1038 | * barrier I/O. Warn loudly and turn off barriers. |
1011 | */ | 1039 | */ |
1012 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { | 1040 | if (bp->b_flags & _XFS_BARRIER_FAILED) { |
1041 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
1013 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1042 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1014 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | 1043 | xfs_fs_cmn_err(CE_WARN, l->l_mp, |
1015 | "xlog_iodone: Barriers are no longer supported" | 1044 | "xlog_iodone: Barriers are no longer supported" |
@@ -1231,6 +1260,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1231 | spin_lock_init(&log->l_grant_lock); | 1260 | spin_lock_init(&log->l_grant_lock); |
1232 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | 1261 | sv_init(&log->l_flush_wait, 0, "flush_wait"); |
1233 | 1262 | ||
1263 | xlog_trace_loggrant_alloc(log); | ||
1234 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1264 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1235 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1265 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
1236 | 1266 | ||
@@ -1285,6 +1315,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1285 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1315 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); |
1286 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1316 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); |
1287 | 1317 | ||
1318 | xlog_trace_iclog_alloc(iclog); | ||
1319 | |||
1288 | iclogp = &iclog->ic_next; | 1320 | iclogp = &iclog->ic_next; |
1289 | } | 1321 | } |
1290 | *iclogp = log->l_iclog; /* complete ring */ | 1322 | *iclogp = log->l_iclog; /* complete ring */ |
@@ -1565,11 +1597,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1565 | sv_destroy(&iclog->ic_force_wait); | 1597 | sv_destroy(&iclog->ic_force_wait); |
1566 | sv_destroy(&iclog->ic_write_wait); | 1598 | sv_destroy(&iclog->ic_write_wait); |
1567 | xfs_buf_free(iclog->ic_bp); | 1599 | xfs_buf_free(iclog->ic_bp); |
1568 | #ifdef XFS_LOG_TRACE | 1600 | xlog_trace_iclog_dealloc(iclog); |
1569 | if (iclog->ic_trace != NULL) { | ||
1570 | ktrace_free(iclog->ic_trace); | ||
1571 | } | ||
1572 | #endif | ||
1573 | next_iclog = iclog->ic_next; | 1601 | next_iclog = iclog->ic_next; |
1574 | kmem_free(iclog); | 1602 | kmem_free(iclog); |
1575 | iclog = next_iclog; | 1603 | iclog = next_iclog; |
@@ -1578,14 +1606,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1578 | spinlock_destroy(&log->l_grant_lock); | 1606 | spinlock_destroy(&log->l_grant_lock); |
1579 | 1607 | ||
1580 | xfs_buf_free(log->l_xbuf); | 1608 | xfs_buf_free(log->l_xbuf); |
1581 | #ifdef XFS_LOG_TRACE | 1609 | xlog_trace_loggrant_dealloc(log); |
1582 | if (log->l_trace != NULL) { | ||
1583 | ktrace_free(log->l_trace); | ||
1584 | } | ||
1585 | if (log->l_grant_trace != NULL) { | ||
1586 | ktrace_free(log->l_grant_trace); | ||
1587 | } | ||
1588 | #endif | ||
1589 | log->l_mp->m_log = NULL; | 1610 | log->l_mp->m_log = NULL; |
1590 | kmem_free(log); | 1611 | kmem_free(log); |
1591 | } /* xlog_dealloc_log */ | 1612 | } /* xlog_dealloc_log */ |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c8a5b22ee3e3..e7d8f84443fa 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -448,7 +448,6 @@ typedef struct log { | |||
448 | int l_grant_write_bytes; | 448 | int l_grant_write_bytes; |
449 | 449 | ||
450 | #ifdef XFS_LOG_TRACE | 450 | #ifdef XFS_LOG_TRACE |
451 | struct ktrace *l_trace; | ||
452 | struct ktrace *l_grant_trace; | 451 | struct ktrace *l_grant_trace; |
453 | #endif | 452 | #endif |
454 | 453 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index aa238c8fbd7a..8b6812f66a15 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -1838,6 +1838,12 @@ again: | |||
1838 | #endif | 1838 | #endif |
1839 | } | 1839 | } |
1840 | 1840 | ||
1841 | /* | ||
1842 | * xfs_lock_two_inodes() can only be used to lock one type of lock | ||
1843 | * at a time - the iolock or the ilock, but not both at once. If | ||
1844 | * we lock both at once, lockdep will report false positives saying | ||
1845 | * we have violated locking orders. | ||
1846 | */ | ||
1841 | void | 1847 | void |
1842 | xfs_lock_two_inodes( | 1848 | xfs_lock_two_inodes( |
1843 | xfs_inode_t *ip0, | 1849 | xfs_inode_t *ip0, |
@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes( | |||
1848 | int attempts = 0; | 1854 | int attempts = 0; |
1849 | xfs_log_item_t *lp; | 1855 | xfs_log_item_t *lp; |
1850 | 1856 | ||
1857 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | ||
1858 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); | ||
1851 | ASSERT(ip0->i_ino != ip1->i_ino); | 1859 | ASSERT(ip0->i_ino != ip1->i_ino); |
1852 | 1860 | ||
1853 | if (ip0->i_ino > ip1->i_ino) { | 1861 | if (ip0->i_ino > ip1->i_ino) { |
@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */ | |||
3152 | /* | 3160 | /* |
3153 | * Zero file bytes between startoff and endoff inclusive. | 3161 | * Zero file bytes between startoff and endoff inclusive. |
3154 | * The iolock is held exclusive and no blocks are buffered. | 3162 | * The iolock is held exclusive and no blocks are buffered. |
3163 | * | ||
3164 | * This function is used by xfs_free_file_space() to zero | ||
3165 | * partial blocks when the range to free is not block aligned. | ||
3166 | * When unreserving space with boundaries that are not block | ||
3167 | * aligned we round up the start and round down the end | ||
3168 | * boundaries and then use this function to zero the parts of | ||
3169 | * the blocks that got dropped during the rounding. | ||
3155 | */ | 3170 | */ |
3156 | STATIC int | 3171 | STATIC int |
3157 | xfs_zero_remaining_bytes( | 3172 | xfs_zero_remaining_bytes( |
@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes( | |||
3168 | int nimap; | 3183 | int nimap; |
3169 | int error = 0; | 3184 | int error = 0; |
3170 | 3185 | ||
3186 | /* | ||
3187 | * Avoid doing I/O beyond eof - it's not necessary | ||
3188 | * since nothing can read beyond eof. The space will | ||
3189 | * be zeroed when the file is extended anyway. | ||
3190 | */ | ||
3191 | if (startoff >= ip->i_size) | ||
3192 | return 0; | ||
3193 | |||
3194 | if (endoff > ip->i_size) | ||
3195 | endoff = ip->i_size; | ||
3196 | |||
3171 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 3197 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, |
3172 | XFS_IS_REALTIME_INODE(ip) ? | 3198 | XFS_IS_REALTIME_INODE(ip) ? |
3173 | mp->m_rtdev_targp : mp->m_ddev_targp); | 3199 | mp->m_rtdev_targp : mp->m_ddev_targp); |