diff options
Diffstat (limited to 'fs')
102 files changed, 4902 insertions, 3225 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 0ce72dcd6b96..84ab76a206a0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -930,7 +930,7 @@ config PROC_KCORE | |||
| 930 | 930 | ||
| 931 | config PROC_VMCORE | 931 | config PROC_VMCORE |
| 932 | bool "/proc/vmcore support (EXPERIMENTAL)" | 932 | bool "/proc/vmcore support (EXPERIMENTAL)" |
| 933 | depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP | 933 | depends on PROC_FS && CRASH_DUMP |
| 934 | default y | 934 | default y |
| 935 | help | 935 | help |
| 936 | Exports the dump image of crashed kernel in ELF format. | 936 | Exports the dump image of crashed kernel in ELF format. |
diff --git a/fs/Makefile b/fs/Makefile index 1e7a11bd4da1..277b079dec9e 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -19,6 +19,7 @@ else | |||
| 19 | obj-y += no-block.o | 19 | obj-y += no-block.o |
| 20 | endif | 20 | endif |
| 21 | 21 | ||
| 22 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o | ||
| 22 | obj-$(CONFIG_INOTIFY) += inotify.o | 23 | obj-$(CONFIG_INOTIFY) += inotify.o |
| 23 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o | 24 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o |
| 24 | obj-$(CONFIG_EPOLL) += eventpoll.o | 25 | obj-$(CONFIG_EPOLL) += eventpoll.o |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c new file mode 100644 index 000000000000..63e2ee63058d --- /dev/null +++ b/fs/bio-integrity.c | |||
| @@ -0,0 +1,719 @@ | |||
| 1 | /* | ||
| 2 | * bio-integrity.c - bio data integrity extensions | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007, 2008 Oracle Corporation | ||
| 5 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License version | ||
| 9 | * 2 as published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program; see the file COPYING. If not, write to | ||
| 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
| 19 | * USA. | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/blkdev.h> | ||
| 24 | #include <linux/mempool.h> | ||
| 25 | #include <linux/bio.h> | ||
| 26 | #include <linux/workqueue.h> | ||
| 27 | |||
| 28 | static struct kmem_cache *bio_integrity_slab __read_mostly; | ||
| 29 | static struct workqueue_struct *kintegrityd_wq; | ||
| 30 | |||
| 31 | /** | ||
| 32 | * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio | ||
| 33 | * @bio: bio to attach integrity metadata to | ||
| 34 | * @gfp_mask: Memory allocation mask | ||
| 35 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
| 36 | * @bs: bio_set to allocate from | ||
| 37 | * | ||
| 38 | * Description: This function prepares a bio for attaching integrity | ||
| 39 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
| 40 | * integrity metadata that can be attached. | ||
| 41 | */ | ||
| 42 | struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | ||
| 43 | gfp_t gfp_mask, | ||
| 44 | unsigned int nr_vecs, | ||
| 45 | struct bio_set *bs) | ||
| 46 | { | ||
| 47 | struct bio_integrity_payload *bip; | ||
| 48 | struct bio_vec *iv; | ||
| 49 | unsigned long idx; | ||
| 50 | |||
| 51 | BUG_ON(bio == NULL); | ||
| 52 | |||
| 53 | bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); | ||
| 54 | if (unlikely(bip == NULL)) { | ||
| 55 | printk(KERN_ERR "%s: could not alloc bip\n", __func__); | ||
| 56 | return NULL; | ||
| 57 | } | ||
| 58 | |||
| 59 | memset(bip, 0, sizeof(*bip)); | ||
| 60 | |||
| 61 | iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs); | ||
| 62 | if (unlikely(iv == NULL)) { | ||
| 63 | printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); | ||
| 64 | mempool_free(bip, bs->bio_integrity_pool); | ||
| 65 | return NULL; | ||
| 66 | } | ||
| 67 | |||
| 68 | bip->bip_pool = idx; | ||
| 69 | bip->bip_vec = iv; | ||
| 70 | bip->bip_bio = bio; | ||
| 71 | bio->bi_integrity = bip; | ||
| 72 | |||
| 73 | return bip; | ||
| 74 | } | ||
| 75 | EXPORT_SYMBOL(bio_integrity_alloc_bioset); | ||
| 76 | |||
| 77 | /** | ||
| 78 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio | ||
| 79 | * @bio: bio to attach integrity metadata to | ||
| 80 | * @gfp_mask: Memory allocation mask | ||
| 81 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
| 82 | * | ||
| 83 | * Description: This function prepares a bio for attaching integrity | ||
| 84 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
| 85 | * integrity metadata that can be attached. | ||
| 86 | */ | ||
| 87 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, | ||
| 88 | gfp_t gfp_mask, | ||
| 89 | unsigned int nr_vecs) | ||
| 90 | { | ||
| 91 | return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); | ||
| 92 | } | ||
| 93 | EXPORT_SYMBOL(bio_integrity_alloc); | ||
| 94 | |||
| 95 | /** | ||
| 96 | * bio_integrity_free - Free bio integrity payload | ||
| 97 | * @bio: bio containing bip to be freed | ||
| 98 | * @bs: bio_set this bio was allocated from | ||
| 99 | * | ||
| 100 | * Description: Used to free the integrity portion of a bio. Usually | ||
| 101 | * called from bio_free(). | ||
| 102 | */ | ||
| 103 | void bio_integrity_free(struct bio *bio, struct bio_set *bs) | ||
| 104 | { | ||
| 105 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 106 | |||
| 107 | BUG_ON(bip == NULL); | ||
| 108 | |||
| 109 | /* A cloned bio doesn't own the integrity metadata */ | ||
| 110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | ||
| 111 | kfree(bip->bip_buf); | ||
| 112 | |||
| 113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | ||
| 114 | mempool_free(bip, bs->bio_integrity_pool); | ||
| 115 | |||
| 116 | bio->bi_integrity = NULL; | ||
| 117 | } | ||
| 118 | EXPORT_SYMBOL(bio_integrity_free); | ||
| 119 | |||
| 120 | /** | ||
| 121 | * bio_integrity_add_page - Attach integrity metadata | ||
| 122 | * @bio: bio to update | ||
| 123 | * @page: page containing integrity metadata | ||
| 124 | * @len: number of bytes of integrity metadata in page | ||
| 125 | * @offset: start offset within page | ||
| 126 | * | ||
| 127 | * Description: Attach a page containing integrity metadata to bio. | ||
| 128 | */ | ||
| 129 | int bio_integrity_add_page(struct bio *bio, struct page *page, | ||
| 130 | unsigned int len, unsigned int offset) | ||
| 131 | { | ||
| 132 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 133 | struct bio_vec *iv; | ||
| 134 | |||
| 135 | if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { | ||
| 136 | printk(KERN_ERR "%s: bip_vec full\n", __func__); | ||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | iv = bip_vec_idx(bip, bip->bip_vcnt); | ||
| 141 | BUG_ON(iv == NULL); | ||
| 142 | BUG_ON(iv->bv_page != NULL); | ||
| 143 | |||
| 144 | iv->bv_page = page; | ||
| 145 | iv->bv_len = len; | ||
| 146 | iv->bv_offset = offset; | ||
| 147 | bip->bip_vcnt++; | ||
| 148 | |||
| 149 | return len; | ||
| 150 | } | ||
| 151 | EXPORT_SYMBOL(bio_integrity_add_page); | ||
| 152 | |||
| 153 | /** | ||
| 154 | * bio_integrity_enabled - Check whether integrity can be passed | ||
| 155 | * @bio: bio to check | ||
| 156 | * | ||
| 157 | * Description: Determines whether bio_integrity_prep() can be called | ||
| 158 | * on this bio or not. bio data direction and target device must be | ||
| 159 | * set prior to calling. The functions honors the write_generate and | ||
| 160 | * read_verify flags in sysfs. | ||
| 161 | */ | ||
| 162 | int bio_integrity_enabled(struct bio *bio) | ||
| 163 | { | ||
| 164 | /* Already protected? */ | ||
| 165 | if (bio_integrity(bio)) | ||
| 166 | return 0; | ||
| 167 | |||
| 168 | return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio)); | ||
| 169 | } | ||
| 170 | EXPORT_SYMBOL(bio_integrity_enabled); | ||
| 171 | |||
| 172 | /** | ||
| 173 | * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto | ||
| 174 | * @bi: blk_integrity profile for device | ||
| 175 | * @sectors: Number of 512 sectors to convert | ||
| 176 | * | ||
| 177 | * Description: The block layer calculates everything in 512 byte | ||
| 178 | * sectors but integrity metadata is done in terms of the hardware | ||
| 179 | * sector size of the storage device. Convert the block layer sectors | ||
| 180 | * to physical sectors. | ||
| 181 | */ | ||
| 182 | static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, | ||
| 183 | unsigned int sectors) | ||
| 184 | { | ||
| 185 | /* At this point there are only 512b or 4096b DIF/EPP devices */ | ||
| 186 | if (bi->sector_size == 4096) | ||
| 187 | return sectors >>= 3; | ||
| 188 | |||
| 189 | return sectors; | ||
| 190 | } | ||
| 191 | |||
| 192 | /** | ||
| 193 | * bio_integrity_tag_size - Retrieve integrity tag space | ||
| 194 | * @bio: bio to inspect | ||
| 195 | * | ||
| 196 | * Description: Returns the maximum number of tag bytes that can be | ||
| 197 | * attached to this bio. Filesystems can use this to determine how | ||
| 198 | * much metadata to attach to an I/O. | ||
| 199 | */ | ||
| 200 | unsigned int bio_integrity_tag_size(struct bio *bio) | ||
| 201 | { | ||
| 202 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 203 | |||
| 204 | BUG_ON(bio->bi_size == 0); | ||
| 205 | |||
| 206 | return bi->tag_size * (bio->bi_size / bi->sector_size); | ||
| 207 | } | ||
| 208 | EXPORT_SYMBOL(bio_integrity_tag_size); | ||
| 209 | |||
| 210 | int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set) | ||
| 211 | { | ||
| 212 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 213 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 214 | unsigned int nr_sectors; | ||
| 215 | |||
| 216 | BUG_ON(bip->bip_buf == NULL); | ||
| 217 | |||
| 218 | if (bi->tag_size == 0) | ||
| 219 | return -1; | ||
| 220 | |||
| 221 | nr_sectors = bio_integrity_hw_sectors(bi, | ||
| 222 | DIV_ROUND_UP(len, bi->tag_size)); | ||
| 223 | |||
| 224 | if (nr_sectors * bi->tuple_size > bip->bip_size) { | ||
| 225 | printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", | ||
| 226 | __func__, nr_sectors * bi->tuple_size, bip->bip_size); | ||
| 227 | return -1; | ||
| 228 | } | ||
| 229 | |||
| 230 | if (set) | ||
| 231 | bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors); | ||
| 232 | else | ||
| 233 | bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors); | ||
| 234 | |||
| 235 | return 0; | ||
| 236 | } | ||
| 237 | |||
| 238 | /** | ||
| 239 | * bio_integrity_set_tag - Attach a tag buffer to a bio | ||
| 240 | * @bio: bio to attach buffer to | ||
| 241 | * @tag_buf: Pointer to a buffer containing tag data | ||
| 242 | * @len: Length of the included buffer | ||
| 243 | * | ||
| 244 | * Description: Use this function to tag a bio by leveraging the extra | ||
| 245 | * space provided by devices formatted with integrity protection. The | ||
| 246 | * size of the integrity buffer must be <= to the size reported by | ||
| 247 | * bio_integrity_tag_size(). | ||
| 248 | */ | ||
| 249 | int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len) | ||
| 250 | { | ||
| 251 | BUG_ON(bio_data_dir(bio) != WRITE); | ||
| 252 | |||
| 253 | return bio_integrity_tag(bio, tag_buf, len, 1); | ||
| 254 | } | ||
| 255 | EXPORT_SYMBOL(bio_integrity_set_tag); | ||
| 256 | |||
| 257 | /** | ||
| 258 | * bio_integrity_get_tag - Retrieve a tag buffer from a bio | ||
| 259 | * @bio: bio to retrieve buffer from | ||
| 260 | * @tag_buf: Pointer to a buffer for the tag data | ||
| 261 | * @len: Length of the target buffer | ||
| 262 | * | ||
| 263 | * Description: Use this function to retrieve the tag buffer from a | ||
| 264 | * completed I/O. The size of the integrity buffer must be <= to the | ||
| 265 | * size reported by bio_integrity_tag_size(). | ||
| 266 | */ | ||
| 267 | int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) | ||
| 268 | { | ||
| 269 | BUG_ON(bio_data_dir(bio) != READ); | ||
| 270 | |||
| 271 | return bio_integrity_tag(bio, tag_buf, len, 0); | ||
| 272 | } | ||
| 273 | EXPORT_SYMBOL(bio_integrity_get_tag); | ||
| 274 | |||
| 275 | /** | ||
| 276 | * bio_integrity_generate - Generate integrity metadata for a bio | ||
| 277 | * @bio: bio to generate integrity metadata for | ||
| 278 | * | ||
| 279 | * Description: Generates integrity metadata for a bio by calling the | ||
| 280 | * block device's generation callback function. The bio must have a | ||
| 281 | * bip attached with enough room to accommodate the generated | ||
| 282 | * integrity metadata. | ||
| 283 | */ | ||
| 284 | static void bio_integrity_generate(struct bio *bio) | ||
| 285 | { | ||
| 286 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 287 | struct blk_integrity_exchg bix; | ||
| 288 | struct bio_vec *bv; | ||
| 289 | sector_t sector = bio->bi_sector; | ||
| 290 | unsigned int i, sectors, total; | ||
| 291 | void *prot_buf = bio->bi_integrity->bip_buf; | ||
| 292 | |||
| 293 | total = 0; | ||
| 294 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | ||
| 295 | bix.sector_size = bi->sector_size; | ||
| 296 | |||
| 297 | bio_for_each_segment(bv, bio, i) { | ||
| 298 | void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); | ||
| 299 | bix.data_buf = kaddr + bv->bv_offset; | ||
| 300 | bix.data_size = bv->bv_len; | ||
| 301 | bix.prot_buf = prot_buf; | ||
| 302 | bix.sector = sector; | ||
| 303 | |||
| 304 | bi->generate_fn(&bix); | ||
| 305 | |||
| 306 | sectors = bv->bv_len / bi->sector_size; | ||
| 307 | sector += sectors; | ||
| 308 | prot_buf += sectors * bi->tuple_size; | ||
| 309 | total += sectors * bi->tuple_size; | ||
| 310 | BUG_ON(total > bio->bi_integrity->bip_size); | ||
| 311 | |||
| 312 | kunmap_atomic(kaddr, KM_USER0); | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | /** | ||
| 317 | * bio_integrity_prep - Prepare bio for integrity I/O | ||
| 318 | * @bio: bio to prepare | ||
| 319 | * | ||
| 320 | * Description: Allocates a buffer for integrity metadata, maps the | ||
| 321 | * pages and attaches them to a bio. The bio must have data | ||
| 322 | * direction, target device and start sector set priot to calling. In | ||
| 323 | * the WRITE case, integrity metadata will be generated using the | ||
| 324 | * block device's integrity function. In the READ case, the buffer | ||
| 325 | * will be prepared for DMA and a suitable end_io handler set up. | ||
| 326 | */ | ||
| 327 | int bio_integrity_prep(struct bio *bio) | ||
| 328 | { | ||
| 329 | struct bio_integrity_payload *bip; | ||
| 330 | struct blk_integrity *bi; | ||
| 331 | struct request_queue *q; | ||
| 332 | void *buf; | ||
| 333 | unsigned long start, end; | ||
| 334 | unsigned int len, nr_pages; | ||
| 335 | unsigned int bytes, offset, i; | ||
| 336 | unsigned int sectors; | ||
| 337 | |||
| 338 | bi = bdev_get_integrity(bio->bi_bdev); | ||
| 339 | q = bdev_get_queue(bio->bi_bdev); | ||
| 340 | BUG_ON(bi == NULL); | ||
| 341 | BUG_ON(bio_integrity(bio)); | ||
| 342 | |||
| 343 | sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio)); | ||
| 344 | |||
| 345 | /* Allocate kernel buffer for protection data */ | ||
| 346 | len = sectors * blk_integrity_tuple_size(bi); | ||
| 347 | buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); | ||
| 348 | if (unlikely(buf == NULL)) { | ||
| 349 | printk(KERN_ERR "could not allocate integrity buffer\n"); | ||
| 350 | return -EIO; | ||
| 351 | } | ||
| 352 | |||
| 353 | end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 354 | start = ((unsigned long) buf) >> PAGE_SHIFT; | ||
| 355 | nr_pages = end - start; | ||
| 356 | |||
| 357 | /* Allocate bio integrity payload and integrity vectors */ | ||
| 358 | bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); | ||
| 359 | if (unlikely(bip == NULL)) { | ||
| 360 | printk(KERN_ERR "could not allocate data integrity bioset\n"); | ||
| 361 | kfree(buf); | ||
| 362 | return -EIO; | ||
| 363 | } | ||
| 364 | |||
| 365 | bip->bip_buf = buf; | ||
| 366 | bip->bip_size = len; | ||
| 367 | bip->bip_sector = bio->bi_sector; | ||
| 368 | |||
| 369 | /* Map it */ | ||
| 370 | offset = offset_in_page(buf); | ||
| 371 | for (i = 0 ; i < nr_pages ; i++) { | ||
| 372 | int ret; | ||
| 373 | bytes = PAGE_SIZE - offset; | ||
| 374 | |||
| 375 | if (len <= 0) | ||
| 376 | break; | ||
| 377 | |||
| 378 | if (bytes > len) | ||
| 379 | bytes = len; | ||
| 380 | |||
| 381 | ret = bio_integrity_add_page(bio, virt_to_page(buf), | ||
| 382 | bytes, offset); | ||
| 383 | |||
| 384 | if (ret == 0) | ||
| 385 | return 0; | ||
| 386 | |||
| 387 | if (ret < bytes) | ||
| 388 | break; | ||
| 389 | |||
| 390 | buf += bytes; | ||
| 391 | len -= bytes; | ||
| 392 | offset = 0; | ||
| 393 | } | ||
| 394 | |||
| 395 | /* Install custom I/O completion handler if read verify is enabled */ | ||
| 396 | if (bio_data_dir(bio) == READ) { | ||
| 397 | bip->bip_end_io = bio->bi_end_io; | ||
| 398 | bio->bi_end_io = bio_integrity_endio; | ||
| 399 | } | ||
| 400 | |||
| 401 | /* Auto-generate integrity metadata if this is a write */ | ||
| 402 | if (bio_data_dir(bio) == WRITE) | ||
| 403 | bio_integrity_generate(bio); | ||
| 404 | |||
| 405 | return 0; | ||
| 406 | } | ||
| 407 | EXPORT_SYMBOL(bio_integrity_prep); | ||
| 408 | |||
| 409 | /** | ||
| 410 | * bio_integrity_verify - Verify integrity metadata for a bio | ||
| 411 | * @bio: bio to verify | ||
| 412 | * | ||
| 413 | * Description: This function is called to verify the integrity of a | ||
| 414 | * bio. The data in the bio io_vec is compared to the integrity | ||
| 415 | * metadata returned by the HBA. | ||
| 416 | */ | ||
| 417 | static int bio_integrity_verify(struct bio *bio) | ||
| 418 | { | ||
| 419 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 420 | struct blk_integrity_exchg bix; | ||
| 421 | struct bio_vec *bv; | ||
| 422 | sector_t sector = bio->bi_integrity->bip_sector; | ||
| 423 | unsigned int i, sectors, total, ret; | ||
| 424 | void *prot_buf = bio->bi_integrity->bip_buf; | ||
| 425 | |||
| 426 | ret = total = 0; | ||
| 427 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | ||
| 428 | bix.sector_size = bi->sector_size; | ||
| 429 | |||
| 430 | bio_for_each_segment(bv, bio, i) { | ||
| 431 | void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); | ||
| 432 | bix.data_buf = kaddr + bv->bv_offset; | ||
| 433 | bix.data_size = bv->bv_len; | ||
| 434 | bix.prot_buf = prot_buf; | ||
| 435 | bix.sector = sector; | ||
| 436 | |||
| 437 | ret = bi->verify_fn(&bix); | ||
| 438 | |||
| 439 | if (ret) { | ||
| 440 | kunmap_atomic(kaddr, KM_USER0); | ||
| 441 | break; | ||
| 442 | } | ||
| 443 | |||
| 444 | sectors = bv->bv_len / bi->sector_size; | ||
| 445 | sector += sectors; | ||
| 446 | prot_buf += sectors * bi->tuple_size; | ||
| 447 | total += sectors * bi->tuple_size; | ||
| 448 | BUG_ON(total > bio->bi_integrity->bip_size); | ||
| 449 | |||
| 450 | kunmap_atomic(kaddr, KM_USER0); | ||
| 451 | } | ||
| 452 | |||
| 453 | return ret; | ||
| 454 | } | ||
| 455 | |||
| 456 | /** | ||
| 457 | * bio_integrity_verify_fn - Integrity I/O completion worker | ||
| 458 | * @work: Work struct stored in bio to be verified | ||
| 459 | * | ||
| 460 | * Description: This workqueue function is called to complete a READ | ||
| 461 | * request. The function verifies the transferred integrity metadata | ||
| 462 | * and then calls the original bio end_io function. | ||
| 463 | */ | ||
| 464 | static void bio_integrity_verify_fn(struct work_struct *work) | ||
| 465 | { | ||
| 466 | struct bio_integrity_payload *bip = | ||
| 467 | container_of(work, struct bio_integrity_payload, bip_work); | ||
| 468 | struct bio *bio = bip->bip_bio; | ||
| 469 | int error = bip->bip_error; | ||
| 470 | |||
| 471 | if (bio_integrity_verify(bio)) { | ||
| 472 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 473 | error = -EIO; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* Restore original bio completion handler */ | ||
| 477 | bio->bi_end_io = bip->bip_end_io; | ||
| 478 | |||
| 479 | if (bio->bi_end_io) | ||
| 480 | bio->bi_end_io(bio, error); | ||
| 481 | } | ||
| 482 | |||
| 483 | /** | ||
| 484 | * bio_integrity_endio - Integrity I/O completion function | ||
| 485 | * @bio: Protected bio | ||
| 486 | * @error: Pointer to errno | ||
| 487 | * | ||
| 488 | * Description: Completion for integrity I/O | ||
| 489 | * | ||
| 490 | * Normally I/O completion is done in interrupt context. However, | ||
| 491 | * verifying I/O integrity is a time-consuming task which must be run | ||
| 492 | * in process context. This function postpones completion | ||
| 493 | * accordingly. | ||
| 494 | */ | ||
| 495 | void bio_integrity_endio(struct bio *bio, int error) | ||
| 496 | { | ||
| 497 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 498 | |||
| 499 | BUG_ON(bip->bip_bio != bio); | ||
| 500 | |||
| 501 | bip->bip_error = error; | ||
| 502 | INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); | ||
| 503 | queue_work(kintegrityd_wq, &bip->bip_work); | ||
| 504 | } | ||
| 505 | EXPORT_SYMBOL(bio_integrity_endio); | ||
| 506 | |||
| 507 | /** | ||
| 508 | * bio_integrity_mark_head - Advance bip_vec skip bytes | ||
| 509 | * @bip: Integrity vector to advance | ||
| 510 | * @skip: Number of bytes to advance it | ||
| 511 | */ | ||
| 512 | void bio_integrity_mark_head(struct bio_integrity_payload *bip, | ||
| 513 | unsigned int skip) | ||
| 514 | { | ||
| 515 | struct bio_vec *iv; | ||
| 516 | unsigned int i; | ||
| 517 | |||
| 518 | bip_for_each_vec(iv, bip, i) { | ||
| 519 | if (skip == 0) { | ||
| 520 | bip->bip_idx = i; | ||
| 521 | return; | ||
| 522 | } else if (skip >= iv->bv_len) { | ||
| 523 | skip -= iv->bv_len; | ||
| 524 | } else { /* skip < iv->bv_len) */ | ||
| 525 | iv->bv_offset += skip; | ||
| 526 | iv->bv_len -= skip; | ||
| 527 | bip->bip_idx = i; | ||
| 528 | return; | ||
| 529 | } | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 533 | /** | ||
| 534 | * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long | ||
| 535 | * @bip: Integrity vector to truncate | ||
| 536 | * @len: New length of integrity vector | ||
| 537 | */ | ||
| 538 | void bio_integrity_mark_tail(struct bio_integrity_payload *bip, | ||
| 539 | unsigned int len) | ||
| 540 | { | ||
| 541 | struct bio_vec *iv; | ||
| 542 | unsigned int i; | ||
| 543 | |||
| 544 | bip_for_each_vec(iv, bip, i) { | ||
| 545 | if (len == 0) { | ||
| 546 | bip->bip_vcnt = i; | ||
| 547 | return; | ||
| 548 | } else if (len >= iv->bv_len) { | ||
| 549 | len -= iv->bv_len; | ||
| 550 | } else { /* len < iv->bv_len) */ | ||
| 551 | iv->bv_len = len; | ||
| 552 | len = 0; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * bio_integrity_advance - Advance integrity vector | ||
| 559 | * @bio: bio whose integrity vector to update | ||
| 560 | * @bytes_done: number of data bytes that have been completed | ||
| 561 | * | ||
| 562 | * Description: This function calculates how many integrity bytes the | ||
| 563 | * number of completed data bytes correspond to and advances the | ||
| 564 | * integrity vector accordingly. | ||
| 565 | */ | ||
| 566 | void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) | ||
| 567 | { | ||
| 568 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 569 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 570 | unsigned int nr_sectors; | ||
| 571 | |||
| 572 | BUG_ON(bip == NULL); | ||
| 573 | BUG_ON(bi == NULL); | ||
| 574 | |||
| 575 | nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9); | ||
| 576 | bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size); | ||
| 577 | } | ||
| 578 | EXPORT_SYMBOL(bio_integrity_advance); | ||
| 579 | |||
| 580 | /** | ||
| 581 | * bio_integrity_trim - Trim integrity vector | ||
| 582 | * @bio: bio whose integrity vector to update | ||
| 583 | * @offset: offset to first data sector | ||
| 584 | * @sectors: number of data sectors | ||
| 585 | * | ||
| 586 | * Description: Used to trim the integrity vector in a cloned bio. | ||
| 587 | * The ivec will be advanced corresponding to 'offset' data sectors | ||
| 588 | * and the length will be truncated corresponding to 'len' data | ||
| 589 | * sectors. | ||
| 590 | */ | ||
| 591 | void bio_integrity_trim(struct bio *bio, unsigned int offset, | ||
| 592 | unsigned int sectors) | ||
| 593 | { | ||
| 594 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 595 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 596 | unsigned int nr_sectors; | ||
| 597 | |||
| 598 | BUG_ON(bip == NULL); | ||
| 599 | BUG_ON(bi == NULL); | ||
| 600 | BUG_ON(!bio_flagged(bio, BIO_CLONED)); | ||
| 601 | |||
| 602 | nr_sectors = bio_integrity_hw_sectors(bi, sectors); | ||
| 603 | bip->bip_sector = bip->bip_sector + offset; | ||
| 604 | bio_integrity_mark_head(bip, offset * bi->tuple_size); | ||
| 605 | bio_integrity_mark_tail(bip, sectors * bi->tuple_size); | ||
| 606 | } | ||
| 607 | EXPORT_SYMBOL(bio_integrity_trim); | ||
| 608 | |||
| 609 | /** | ||
| 610 | * bio_integrity_split - Split integrity metadata | ||
| 611 | * @bio: Protected bio | ||
| 612 | * @bp: Resulting bio_pair | ||
| 613 | * @sectors: Offset | ||
| 614 | * | ||
| 615 | * Description: Splits an integrity page into a bio_pair. | ||
| 616 | */ | ||
| 617 | void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) | ||
| 618 | { | ||
| 619 | struct blk_integrity *bi; | ||
| 620 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 621 | unsigned int nr_sectors; | ||
| 622 | |||
| 623 | if (bio_integrity(bio) == 0) | ||
| 624 | return; | ||
| 625 | |||
| 626 | bi = bdev_get_integrity(bio->bi_bdev); | ||
| 627 | BUG_ON(bi == NULL); | ||
| 628 | BUG_ON(bip->bip_vcnt != 1); | ||
| 629 | |||
| 630 | nr_sectors = bio_integrity_hw_sectors(bi, sectors); | ||
| 631 | |||
| 632 | bp->bio1.bi_integrity = &bp->bip1; | ||
| 633 | bp->bio2.bi_integrity = &bp->bip2; | ||
| 634 | |||
| 635 | bp->iv1 = bip->bip_vec[0]; | ||
| 636 | bp->iv2 = bip->bip_vec[0]; | ||
| 637 | |||
| 638 | bp->bip1.bip_vec = &bp->iv1; | ||
| 639 | bp->bip2.bip_vec = &bp->iv2; | ||
| 640 | |||
| 641 | bp->iv1.bv_len = sectors * bi->tuple_size; | ||
| 642 | bp->iv2.bv_offset += sectors * bi->tuple_size; | ||
| 643 | bp->iv2.bv_len -= sectors * bi->tuple_size; | ||
| 644 | |||
| 645 | bp->bip1.bip_sector = bio->bi_integrity->bip_sector; | ||
| 646 | bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors; | ||
| 647 | |||
| 648 | bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1; | ||
| 649 | bp->bip1.bip_idx = bp->bip2.bip_idx = 0; | ||
| 650 | } | ||
| 651 | EXPORT_SYMBOL(bio_integrity_split); | ||
| 652 | |||
| 653 | /** | ||
| 654 | * bio_integrity_clone - Callback for cloning bios with integrity metadata | ||
| 655 | * @bio: New bio | ||
| 656 | * @bio_src: Original bio | ||
| 657 | * @bs: bio_set to allocate bip from | ||
| 658 | * | ||
| 659 | * Description: Called to allocate a bip when cloning a bio | ||
| 660 | */ | ||
| 661 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | ||
| 662 | struct bio_set *bs) | ||
| 663 | { | ||
| 664 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; | ||
| 665 | struct bio_integrity_payload *bip; | ||
| 666 | |||
| 667 | BUG_ON(bip_src == NULL); | ||
| 668 | |||
| 669 | bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs); | ||
| 670 | |||
| 671 | if (bip == NULL) | ||
| 672 | return -EIO; | ||
| 673 | |||
| 674 | memcpy(bip->bip_vec, bip_src->bip_vec, | ||
| 675 | bip_src->bip_vcnt * sizeof(struct bio_vec)); | ||
| 676 | |||
| 677 | bip->bip_sector = bip_src->bip_sector; | ||
| 678 | bip->bip_vcnt = bip_src->bip_vcnt; | ||
| 679 | bip->bip_idx = bip_src->bip_idx; | ||
| 680 | |||
| 681 | return 0; | ||
| 682 | } | ||
| 683 | EXPORT_SYMBOL(bio_integrity_clone); | ||
| 684 | |||
| 685 | int bioset_integrity_create(struct bio_set *bs, int pool_size) | ||
| 686 | { | ||
| 687 | bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, | ||
| 688 | bio_integrity_slab); | ||
| 689 | if (!bs->bio_integrity_pool) | ||
| 690 | return -1; | ||
| 691 | |||
| 692 | return 0; | ||
| 693 | } | ||
| 694 | EXPORT_SYMBOL(bioset_integrity_create); | ||
| 695 | |||
| 696 | void bioset_integrity_free(struct bio_set *bs) | ||
| 697 | { | ||
| 698 | if (bs->bio_integrity_pool) | ||
| 699 | mempool_destroy(bs->bio_integrity_pool); | ||
| 700 | } | ||
| 701 | EXPORT_SYMBOL(bioset_integrity_free); | ||
| 702 | |||
| 703 | void __init bio_integrity_init_slab(void) | ||
| 704 | { | ||
| 705 | bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, | ||
| 706 | SLAB_HWCACHE_ALIGN|SLAB_PANIC); | ||
| 707 | } | ||
| 708 | EXPORT_SYMBOL(bio_integrity_init_slab); | ||
| 709 | |||
| 710 | static int __init integrity_init(void) | ||
| 711 | { | ||
| 712 | kintegrityd_wq = create_workqueue("kintegrityd"); | ||
| 713 | |||
| 714 | if (!kintegrityd_wq) | ||
| 715 | panic("Failed to create kintegrityd\n"); | ||
| 716 | |||
| 717 | return 0; | ||
| 718 | } | ||
| 719 | subsys_initcall(integrity_init); | ||
| @@ -28,25 +28,10 @@ | |||
| 28 | #include <linux/blktrace_api.h> | 28 | #include <linux/blktrace_api.h> |
| 29 | #include <scsi/sg.h> /* for struct sg_iovec */ | 29 | #include <scsi/sg.h> /* for struct sg_iovec */ |
| 30 | 30 | ||
| 31 | #define BIO_POOL_SIZE 2 | ||
| 32 | |||
| 33 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
| 34 | 32 | ||
| 35 | #define BIOVEC_NR_POOLS 6 | ||
| 36 | |||
| 37 | /* | ||
| 38 | * a small number of entries is fine, not going to be performance critical. | ||
| 39 | * basically we just need to survive | ||
| 40 | */ | ||
| 41 | #define BIO_SPLIT_ENTRIES 2 | ||
| 42 | mempool_t *bio_split_pool __read_mostly; | 33 | mempool_t *bio_split_pool __read_mostly; |
| 43 | 34 | ||
| 44 | struct biovec_slab { | ||
| 45 | int nr_vecs; | ||
| 46 | char *name; | ||
| 47 | struct kmem_cache *slab; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /* | 35 | /* |
| 51 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
| 52 | * break badly! cannot be bigger than what you can fit into an | 37 | * break badly! cannot be bigger than what you can fit into an |
| @@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { | |||
| 60 | #undef BV | 45 | #undef BV |
| 61 | 46 | ||
| 62 | /* | 47 | /* |
| 63 | * bio_set is used to allow other portions of the IO system to | ||
| 64 | * allocate their own private memory pools for bio and iovec structures. | ||
| 65 | * These memory pools in turn all allocate from the bio_slab | ||
| 66 | * and the bvec_slabs[]. | ||
| 67 | */ | ||
| 68 | struct bio_set { | ||
| 69 | mempool_t *bio_pool; | ||
| 70 | mempool_t *bvec_pools[BIOVEC_NR_POOLS]; | ||
| 71 | }; | ||
| 72 | |||
| 73 | /* | ||
| 74 | * fs_bio_set is the bio_set containing bio and iovec memory pools used by | 48 | * fs_bio_set is the bio_set containing bio and iovec memory pools used by |
| 75 | * IO code that does not need private memory pools. | 49 | * IO code that does not need private memory pools. |
| 76 | */ | 50 | */ |
| 77 | static struct bio_set *fs_bio_set; | 51 | struct bio_set *fs_bio_set; |
| 52 | |||
| 53 | unsigned int bvec_nr_vecs(unsigned short idx) | ||
| 54 | { | ||
| 55 | return bvec_slabs[idx].nr_vecs; | ||
| 56 | } | ||
| 78 | 57 | ||
| 79 | static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) | 58 | struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) |
| 80 | { | 59 | { |
| 81 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
| 82 | 61 | ||
| @@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set) | |||
| 117 | mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); | 96 | mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); |
| 118 | } | 97 | } |
| 119 | 98 | ||
| 99 | if (bio_integrity(bio)) | ||
| 100 | bio_integrity_free(bio, bio_set); | ||
| 101 | |||
| 120 | mempool_free(bio, bio_set->bio_pool); | 102 | mempool_free(bio, bio_set->bio_pool); |
| 121 | } | 103 | } |
| 122 | 104 | ||
| @@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | |||
| 275 | { | 257 | { |
| 276 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 258 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); |
| 277 | 259 | ||
| 278 | if (b) { | 260 | if (!b) |
| 279 | b->bi_destructor = bio_fs_destructor; | 261 | return NULL; |
| 280 | __bio_clone(b, bio); | 262 | |
| 263 | b->bi_destructor = bio_fs_destructor; | ||
| 264 | __bio_clone(b, bio); | ||
| 265 | |||
| 266 | if (bio_integrity(bio)) { | ||
| 267 | int ret; | ||
| 268 | |||
| 269 | ret = bio_integrity_clone(b, bio, fs_bio_set); | ||
| 270 | |||
| 271 | if (ret < 0) | ||
| 272 | return NULL; | ||
| 281 | } | 273 | } |
| 282 | 274 | ||
| 283 | return b; | 275 | return b; |
| @@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
| 333 | if (page == prev->bv_page && | 325 | if (page == prev->bv_page && |
| 334 | offset == prev->bv_offset + prev->bv_len) { | 326 | offset == prev->bv_offset + prev->bv_len) { |
| 335 | prev->bv_len += len; | 327 | prev->bv_len += len; |
| 336 | if (q->merge_bvec_fn && | 328 | |
| 337 | q->merge_bvec_fn(q, bio, prev) < len) { | 329 | if (q->merge_bvec_fn) { |
| 338 | prev->bv_len -= len; | 330 | struct bvec_merge_data bvm = { |
| 339 | return 0; | 331 | .bi_bdev = bio->bi_bdev, |
| 332 | .bi_sector = bio->bi_sector, | ||
| 333 | .bi_size = bio->bi_size, | ||
| 334 | .bi_rw = bio->bi_rw, | ||
| 335 | }; | ||
| 336 | |||
| 337 | if (q->merge_bvec_fn(q, &bvm, prev) < len) { | ||
| 338 | prev->bv_len -= len; | ||
| 339 | return 0; | ||
| 340 | } | ||
| 340 | } | 341 | } |
| 341 | 342 | ||
| 342 | goto done; | 343 | goto done; |
| @@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
| 377 | * queue to get further control | 378 | * queue to get further control |
| 378 | */ | 379 | */ |
| 379 | if (q->merge_bvec_fn) { | 380 | if (q->merge_bvec_fn) { |
| 381 | struct bvec_merge_data bvm = { | ||
| 382 | .bi_bdev = bio->bi_bdev, | ||
| 383 | .bi_sector = bio->bi_sector, | ||
| 384 | .bi_size = bio->bi_size, | ||
| 385 | .bi_rw = bio->bi_rw, | ||
| 386 | }; | ||
| 387 | |||
| 380 | /* | 388 | /* |
| 381 | * merge_bvec_fn() returns number of bytes it can accept | 389 | * merge_bvec_fn() returns number of bytes it can accept |
| 382 | * at this offset | 390 | * at this offset |
| 383 | */ | 391 | */ |
| 384 | if (q->merge_bvec_fn(q, bio, bvec) < len) { | 392 | if (q->merge_bvec_fn(q, &bvm, bvec) < len) { |
| 385 | bvec->bv_page = NULL; | 393 | bvec->bv_page = NULL; |
| 386 | bvec->bv_len = 0; | 394 | bvec->bv_len = 0; |
| 387 | bvec->bv_offset = 0; | 395 | bvec->bv_offset = 0; |
| @@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
| 1249 | bp->bio1.bi_private = bi; | 1257 | bp->bio1.bi_private = bi; |
| 1250 | bp->bio2.bi_private = pool; | 1258 | bp->bio2.bi_private = pool; |
| 1251 | 1259 | ||
| 1260 | if (bio_integrity(bi)) | ||
| 1261 | bio_integrity_split(bi, bp, first_sectors); | ||
| 1262 | |||
| 1252 | return bp; | 1263 | return bp; |
| 1253 | } | 1264 | } |
| 1254 | 1265 | ||
| @@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs) | |||
| 1290 | if (bs->bio_pool) | 1301 | if (bs->bio_pool) |
| 1291 | mempool_destroy(bs->bio_pool); | 1302 | mempool_destroy(bs->bio_pool); |
| 1292 | 1303 | ||
| 1304 | bioset_integrity_free(bs); | ||
| 1293 | biovec_free_pools(bs); | 1305 | biovec_free_pools(bs); |
| 1294 | 1306 | ||
| 1295 | kfree(bs); | 1307 | kfree(bs); |
| @@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) | |||
| 1306 | if (!bs->bio_pool) | 1318 | if (!bs->bio_pool) |
| 1307 | goto bad; | 1319 | goto bad; |
| 1308 | 1320 | ||
| 1321 | if (bioset_integrity_create(bs, bio_pool_size)) | ||
| 1322 | goto bad; | ||
| 1323 | |||
| 1309 | if (!biovec_create_pools(bs, bvec_pool_size)) | 1324 | if (!biovec_create_pools(bs, bvec_pool_size)) |
| 1310 | return bs; | 1325 | return bs; |
| 1311 | 1326 | ||
| @@ -1332,6 +1347,7 @@ static int __init init_bio(void) | |||
| 1332 | { | 1347 | { |
| 1333 | bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 1348 | bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
| 1334 | 1349 | ||
| 1350 | bio_integrity_init_slab(); | ||
| 1335 | biovec_init_slabs(); | 1351 | biovec_init_slabs(); |
| 1336 | 1352 | ||
| 1337 | fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); | 1353 | fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); |
diff --git a/fs/buffer.c b/fs/buffer.c index 0f51c0f7c266..d48caee12e2a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -1464,7 +1464,7 @@ static void invalidate_bh_lru(void *arg) | |||
| 1464 | 1464 | ||
| 1465 | void invalidate_bh_lrus(void) | 1465 | void invalidate_bh_lrus(void) |
| 1466 | { | 1466 | { |
| 1467 | on_each_cpu(invalidate_bh_lru, NULL, 1, 1); | 1467 | on_each_cpu(invalidate_bh_lru, NULL, 1); |
| 1468 | } | 1468 | } |
| 1469 | EXPORT_SYMBOL_GPL(invalidate_bh_lrus); | 1469 | EXPORT_SYMBOL_GPL(invalidate_bh_lrus); |
| 1470 | 1470 | ||
| @@ -1691,11 +1691,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1691 | */ | 1691 | */ |
| 1692 | clear_buffer_dirty(bh); | 1692 | clear_buffer_dirty(bh); |
| 1693 | set_buffer_uptodate(bh); | 1693 | set_buffer_uptodate(bh); |
| 1694 | } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { | 1694 | } else if ((!buffer_mapped(bh) || buffer_delay(bh)) && |
| 1695 | buffer_dirty(bh)) { | ||
| 1695 | WARN_ON(bh->b_size != blocksize); | 1696 | WARN_ON(bh->b_size != blocksize); |
| 1696 | err = get_block(inode, block, bh, 1); | 1697 | err = get_block(inode, block, bh, 1); |
| 1697 | if (err) | 1698 | if (err) |
| 1698 | goto recover; | 1699 | goto recover; |
| 1700 | clear_buffer_delay(bh); | ||
| 1699 | if (buffer_new(bh)) { | 1701 | if (buffer_new(bh)) { |
| 1700 | /* blockdev mappings never come here */ | 1702 | /* blockdev mappings never come here */ |
| 1701 | clear_buffer_new(bh); | 1703 | clear_buffer_new(bh); |
| @@ -1774,7 +1776,8 @@ recover: | |||
| 1774 | bh = head; | 1776 | bh = head; |
| 1775 | /* Recovery: lock and submit the mapped buffers */ | 1777 | /* Recovery: lock and submit the mapped buffers */ |
| 1776 | do { | 1778 | do { |
| 1777 | if (buffer_mapped(bh) && buffer_dirty(bh)) { | 1779 | if (buffer_mapped(bh) && buffer_dirty(bh) && |
| 1780 | !buffer_delay(bh)) { | ||
| 1778 | lock_buffer(bh); | 1781 | lock_buffer(bh); |
| 1779 | mark_buffer_async_write(bh); | 1782 | mark_buffer_async_write(bh); |
| 1780 | } else { | 1783 | } else { |
| @@ -2061,6 +2064,7 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
| 2061 | struct page *page, void *fsdata) | 2064 | struct page *page, void *fsdata) |
| 2062 | { | 2065 | { |
| 2063 | struct inode *inode = mapping->host; | 2066 | struct inode *inode = mapping->host; |
| 2067 | int i_size_changed = 0; | ||
| 2064 | 2068 | ||
| 2065 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 2069 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
| 2066 | 2070 | ||
| @@ -2073,12 +2077,21 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
| 2073 | */ | 2077 | */ |
| 2074 | if (pos+copied > inode->i_size) { | 2078 | if (pos+copied > inode->i_size) { |
| 2075 | i_size_write(inode, pos+copied); | 2079 | i_size_write(inode, pos+copied); |
| 2076 | mark_inode_dirty(inode); | 2080 | i_size_changed = 1; |
| 2077 | } | 2081 | } |
| 2078 | 2082 | ||
| 2079 | unlock_page(page); | 2083 | unlock_page(page); |
| 2080 | page_cache_release(page); | 2084 | page_cache_release(page); |
| 2081 | 2085 | ||
| 2086 | /* | ||
| 2087 | * Don't mark the inode dirty under page lock. First, it unnecessarily | ||
| 2088 | * makes the holding time of page lock longer. Second, it forces lock | ||
| 2089 | * ordering of page lock and transaction start for journaling | ||
| 2090 | * filesystems. | ||
| 2091 | */ | ||
| 2092 | if (i_size_changed) | ||
| 2093 | mark_inode_dirty(inode); | ||
| 2094 | |||
| 2082 | return copied; | 2095 | return copied; |
| 2083 | } | 2096 | } |
| 2084 | EXPORT_SYMBOL(generic_write_end); | 2097 | EXPORT_SYMBOL(generic_write_end); |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 68e510b88457..3cb7cda3d780 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
| @@ -373,6 +373,8 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
| 373 | return -ENXIO; | 373 | return -ENXIO; |
| 374 | new = container_of(kobj, struct cdev, kobj); | 374 | new = container_of(kobj, struct cdev, kobj); |
| 375 | spin_lock(&cdev_lock); | 375 | spin_lock(&cdev_lock); |
| 376 | /* Check i_cdev again in case somebody beat us to it while | ||
| 377 | we dropped the lock. */ | ||
| 376 | p = inode->i_cdev; | 378 | p = inode->i_cdev; |
| 377 | if (!p) { | 379 | if (!p) { |
| 378 | inode->i_cdev = p = new; | 380 | inode->i_cdev = p = new; |
| @@ -392,11 +394,8 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
| 392 | cdev_put(p); | 394 | cdev_put(p); |
| 393 | return -ENXIO; | 395 | return -ENXIO; |
| 394 | } | 396 | } |
| 395 | if (filp->f_op->open) { | 397 | if (filp->f_op->open) |
| 396 | lock_kernel(); | ||
| 397 | ret = filp->f_op->open(inode,filp); | 398 | ret = filp->f_op->open(inode,filp); |
| 398 | unlock_kernel(); | ||
| 399 | } | ||
| 400 | if (ret) | 399 | if (ret) |
| 401 | cdev_put(p); | 400 | cdev_put(p); |
| 402 | return ret; | 401 | return ret; |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 34902cff5400..0e9fc2ba90ee 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
| @@ -34,11 +34,11 @@ | |||
| 34 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { | 34 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { |
| 35 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, | 35 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, |
| 36 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, | 36 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, |
| 37 | {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, | 37 | {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, |
| 38 | {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, | 38 | {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, |
| 39 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"}, | 39 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"}, |
| 40 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"}, | 40 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"}, |
| 41 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"} } | 41 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} } |
| 42 | ; | 42 | ; |
| 43 | 43 | ||
| 44 | 44 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 86b4d5f405ae..22857c639df5 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -612,7 +612,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
| 612 | if (retval < 0) | 612 | if (retval < 0) |
| 613 | return (loff_t)retval; | 613 | return (loff_t)retval; |
| 614 | } | 614 | } |
| 615 | return remote_llseek(file, offset, origin); | 615 | return generic_file_llseek_unlocked(file, offset, origin); |
| 616 | } | 616 | } |
| 617 | 617 | ||
| 618 | struct file_system_type cifs_fs_type = { | 618 | struct file_system_type cifs_fs_type = { |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 722be543ceec..2e904bd111c8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
| @@ -219,15 +219,15 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
| 219 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, | 219 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, |
| 220 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 220 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
| 221 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 221 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 222 | if (rc) { | 222 | if (rc == -EREMOTE && !is_dfs_referral) { |
| 223 | if (rc == -EREMOTE && !is_dfs_referral) { | 223 | is_dfs_referral = true; |
| 224 | is_dfs_referral = true; | 224 | cFYI(DBG2, ("DFS ref")); |
| 225 | cFYI(DBG2, ("DFS ref")); | 225 | /* for DFS, server does not give us real inode data */ |
| 226 | /* for DFS, server does not give us real inode data */ | 226 | fill_fake_finddataunix(&find_data, sb); |
| 227 | fill_fake_finddataunix(&find_data, sb); | 227 | rc = 0; |
| 228 | rc = 0; | 228 | } else if (rc) |
| 229 | } | 229 | goto cgiiu_exit; |
| 230 | } | 230 | |
| 231 | num_of_bytes = le64_to_cpu(find_data.NumOfBytes); | 231 | num_of_bytes = le64_to_cpu(find_data.NumOfBytes); |
| 232 | end_of_file = le64_to_cpu(find_data.EndOfFile); | 232 | end_of_file = le64_to_cpu(find_data.EndOfFile); |
| 233 | 233 | ||
| @@ -236,7 +236,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
| 236 | *pinode = new_inode(sb); | 236 | *pinode = new_inode(sb); |
| 237 | if (*pinode == NULL) { | 237 | if (*pinode == NULL) { |
| 238 | rc = -ENOMEM; | 238 | rc = -ENOMEM; |
| 239 | goto cgiiu_exit; | 239 | goto cgiiu_exit; |
| 240 | } | 240 | } |
| 241 | /* Is an i_ino of zero legal? */ | 241 | /* Is an i_ino of zero legal? */ |
| 242 | /* note ino incremented to unique num in new_inode */ | 242 | /* note ino incremented to unique num in new_inode */ |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index ebbcf38fd33b..f976f303c196 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/poll.h> | 15 | #include <linux/poll.h> |
| 16 | #include <linux/signal.h> | 16 | #include <linux/signal.h> |
| 17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 18 | #include <linux/smp_lock.h> | ||
| 18 | #include <linux/dlm.h> | 19 | #include <linux/dlm.h> |
| 19 | #include <linux/dlm_device.h> | 20 | #include <linux/dlm_device.h> |
| 20 | 21 | ||
| @@ -618,13 +619,17 @@ static int device_open(struct inode *inode, struct file *file) | |||
| 618 | struct dlm_user_proc *proc; | 619 | struct dlm_user_proc *proc; |
| 619 | struct dlm_ls *ls; | 620 | struct dlm_ls *ls; |
| 620 | 621 | ||
| 622 | lock_kernel(); | ||
| 621 | ls = dlm_find_lockspace_device(iminor(inode)); | 623 | ls = dlm_find_lockspace_device(iminor(inode)); |
| 622 | if (!ls) | 624 | if (!ls) { |
| 625 | unlock_kernel(); | ||
| 623 | return -ENOENT; | 626 | return -ENOENT; |
| 627 | } | ||
| 624 | 628 | ||
| 625 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | 629 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); |
| 626 | if (!proc) { | 630 | if (!proc) { |
| 627 | dlm_put_lockspace(ls); | 631 | dlm_put_lockspace(ls); |
| 632 | unlock_kernel(); | ||
| 628 | return -ENOMEM; | 633 | return -ENOMEM; |
| 629 | } | 634 | } |
| 630 | 635 | ||
| @@ -636,6 +641,7 @@ static int device_open(struct inode *inode, struct file *file) | |||
| 636 | spin_lock_init(&proc->locks_spin); | 641 | spin_lock_init(&proc->locks_spin); |
| 637 | init_waitqueue_head(&proc->wait); | 642 | init_waitqueue_head(&proc->wait); |
| 638 | file->private_data = proc; | 643 | file->private_data = proc; |
| 644 | unlock_kernel(); | ||
| 639 | 645 | ||
| 640 | return 0; | 646 | return 0; |
| 641 | } | 647 | } |
| @@ -870,6 +876,7 @@ static unsigned int device_poll(struct file *file, poll_table *wait) | |||
| 870 | 876 | ||
| 871 | static int ctl_device_open(struct inode *inode, struct file *file) | 877 | static int ctl_device_open(struct inode *inode, struct file *file) |
| 872 | { | 878 | { |
| 879 | cycle_kernel_lock(); | ||
| 873 | file->private_data = NULL; | 880 | file->private_data = NULL; |
| 874 | return 0; | 881 | return 0; |
| 875 | } | 882 | } |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2258b8f654a6..24749bf0668f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
| 31 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
| 32 | #include <linux/fs_stack.h> | 32 | #include <linux/fs_stack.h> |
| 33 | #include <linux/smp_lock.h> | ||
| 33 | #include "ecryptfs_kernel.h" | 34 | #include "ecryptfs_kernel.h" |
| 34 | 35 | ||
| 35 | /** | 36 | /** |
| @@ -277,9 +278,11 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) | |||
| 277 | int rc = 0; | 278 | int rc = 0; |
| 278 | struct file *lower_file = NULL; | 279 | struct file *lower_file = NULL; |
| 279 | 280 | ||
| 281 | lock_kernel(); | ||
| 280 | lower_file = ecryptfs_file_to_lower(file); | 282 | lower_file = ecryptfs_file_to_lower(file); |
| 281 | if (lower_file->f_op && lower_file->f_op->fasync) | 283 | if (lower_file->f_op && lower_file->f_op->fasync) |
| 282 | rc = lower_file->f_op->fasync(fd, lower_file, flag); | 284 | rc = lower_file->f_op->fasync(fd, lower_file, flag); |
| 285 | unlock_kernel(); | ||
| 283 | return rc; | 286 | return rc; |
| 284 | } | 287 | } |
| 285 | 288 | ||
| @@ -610,7 +610,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
| 610 | bprm->exec -= stack_shift; | 610 | bprm->exec -= stack_shift; |
| 611 | 611 | ||
| 612 | down_write(&mm->mmap_sem); | 612 | down_write(&mm->mmap_sem); |
| 613 | vm_flags = vma->vm_flags; | 613 | vm_flags = VM_STACK_FLAGS; |
| 614 | 614 | ||
| 615 | /* | 615 | /* |
| 616 | * Adjust stack execute permissions; explicitly enable for | 616 | * Adjust stack execute permissions; explicitly enable for |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9cc80b9cc8d8..495ab21b9832 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, | |||
| 47 | ext4_group_t block_group) | 47 | ext4_group_t block_group) |
| 48 | { | 48 | { |
| 49 | ext4_group_t actual_group; | 49 | ext4_group_t actual_group; |
| 50 | ext4_get_group_no_and_offset(sb, block, &actual_group, 0); | 50 | ext4_get_group_no_and_offset(sb, block, &actual_group, NULL); |
| 51 | if (actual_group == block_group) | 51 | if (actual_group == block_group) |
| 52 | return 1; | 52 | return 1; |
| 53 | return 0; | 53 | return 0; |
| @@ -121,12 +121,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
| 121 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); | 121 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); |
| 122 | } | 122 | } |
| 123 | } else { /* For META_BG_BLOCK_GROUPS */ | 123 | } else { /* For META_BG_BLOCK_GROUPS */ |
| 124 | int group_rel = (block_group - | 124 | bit_max += ext4_bg_num_gdb(sb, block_group); |
| 125 | le32_to_cpu(sbi->s_es->s_first_meta_bg)) % | ||
| 126 | EXT4_DESC_PER_BLOCK(sb); | ||
| 127 | if (group_rel == 0 || group_rel == 1 || | ||
| 128 | (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1)) | ||
| 129 | bit_max += 1; | ||
| 130 | } | 125 | } |
| 131 | 126 | ||
| 132 | if (block_group == sbi->s_groups_count - 1) { | 127 | if (block_group == sbi->s_groups_count - 1) { |
| @@ -295,7 +290,7 @@ err_out: | |||
| 295 | return 0; | 290 | return 0; |
| 296 | } | 291 | } |
| 297 | /** | 292 | /** |
| 298 | * read_block_bitmap() | 293 | * ext4_read_block_bitmap() |
| 299 | * @sb: super block | 294 | * @sb: super block |
| 300 | * @block_group: given block group | 295 | * @block_group: given block group |
| 301 | * | 296 | * |
| @@ -305,7 +300,7 @@ err_out: | |||
| 305 | * Return buffer_head on success or NULL in case of failure. | 300 | * Return buffer_head on success or NULL in case of failure. |
| 306 | */ | 301 | */ |
| 307 | struct buffer_head * | 302 | struct buffer_head * |
| 308 | read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
| 309 | { | 304 | { |
| 310 | struct ext4_group_desc * desc; | 305 | struct ext4_group_desc * desc; |
| 311 | struct buffer_head * bh = NULL; | 306 | struct buffer_head * bh = NULL; |
| @@ -409,8 +404,7 @@ restart: | |||
| 409 | prev = rsv; | 404 | prev = rsv; |
| 410 | } | 405 | } |
| 411 | printk("Window map complete.\n"); | 406 | printk("Window map complete.\n"); |
| 412 | if (bad) | 407 | BUG_ON(bad); |
| 413 | BUG(); | ||
| 414 | } | 408 | } |
| 415 | #define rsv_window_dump(root, verbose) \ | 409 | #define rsv_window_dump(root, verbose) \ |
| 416 | __rsv_window_dump((root), (verbose), __func__) | 410 | __rsv_window_dump((root), (verbose), __func__) |
| @@ -694,7 +688,7 @@ do_more: | |||
| 694 | count -= overflow; | 688 | count -= overflow; |
| 695 | } | 689 | } |
| 696 | brelse(bitmap_bh); | 690 | brelse(bitmap_bh); |
| 697 | bitmap_bh = read_block_bitmap(sb, block_group); | 691 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
| 698 | if (!bitmap_bh) | 692 | if (!bitmap_bh) |
| 699 | goto error_return; | 693 | goto error_return; |
| 700 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 694 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); |
| @@ -810,6 +804,13 @@ do_more: | |||
| 810 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 804 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
| 811 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 805 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
| 812 | 806 | ||
| 807 | if (sbi->s_log_groups_per_flex) { | ||
| 808 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
| 809 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 810 | sbi->s_flex_groups[flex_group].free_blocks += count; | ||
| 811 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 812 | } | ||
| 813 | |||
| 813 | /* We dirtied the bitmap block */ | 814 | /* We dirtied the bitmap block */ |
| 814 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 815 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
| 815 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | 816 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
| @@ -1598,23 +1599,35 @@ out: | |||
| 1598 | 1599 | ||
| 1599 | /** | 1600 | /** |
| 1600 | * ext4_has_free_blocks() | 1601 | * ext4_has_free_blocks() |
| 1601 | * @sbi: in-core super block structure. | 1602 | * @sbi: in-core super block structure. |
| 1603 | * @nblocks: number of neeed blocks | ||
| 1602 | * | 1604 | * |
| 1603 | * Check if filesystem has at least 1 free block available for allocation. | 1605 | * Check if filesystem has free blocks available for allocation. |
| 1606 | * Return the number of blocks avaible for allocation for this request | ||
| 1607 | * On success, return nblocks | ||
| 1604 | */ | 1608 | */ |
| 1605 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi) | 1609 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
| 1610 | ext4_fsblk_t nblocks) | ||
| 1606 | { | 1611 | { |
| 1607 | ext4_fsblk_t free_blocks, root_blocks; | 1612 | ext4_fsblk_t free_blocks; |
| 1613 | ext4_fsblk_t root_blocks = 0; | ||
| 1608 | 1614 | ||
| 1609 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 1615 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); |
| 1610 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 1616 | |
| 1611 | if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && | 1617 | if (!capable(CAP_SYS_RESOURCE) && |
| 1612 | sbi->s_resuid != current->fsuid && | 1618 | sbi->s_resuid != current->fsuid && |
| 1613 | (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { | 1619 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
| 1614 | return 0; | 1620 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
| 1615 | } | 1621 | #ifdef CONFIG_SMP |
| 1616 | return 1; | 1622 | if (free_blocks - root_blocks < FBC_BATCH) |
| 1617 | } | 1623 | free_blocks = |
| 1624 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | ||
| 1625 | #endif | ||
| 1626 | if (free_blocks - root_blocks < nblocks) | ||
| 1627 | return free_blocks - root_blocks; | ||
| 1628 | return nblocks; | ||
| 1629 | } | ||
| 1630 | |||
| 1618 | 1631 | ||
| 1619 | /** | 1632 | /** |
| 1620 | * ext4_should_retry_alloc() | 1633 | * ext4_should_retry_alloc() |
| @@ -1630,7 +1643,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi) | |||
| 1630 | */ | 1643 | */ |
| 1631 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) | 1644 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) |
| 1632 | { | 1645 | { |
| 1633 | if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) | 1646 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) |
| 1634 | return 0; | 1647 | return 0; |
| 1635 | 1648 | ||
| 1636 | jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); | 1649 | jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); |
| @@ -1639,20 +1652,24 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
| 1639 | } | 1652 | } |
| 1640 | 1653 | ||
| 1641 | /** | 1654 | /** |
| 1642 | * ext4_new_blocks_old() -- core block(s) allocation function | 1655 | * ext4_old_new_blocks() -- core block bitmap based block allocation function |
| 1656 | * | ||
| 1643 | * @handle: handle to this transaction | 1657 | * @handle: handle to this transaction |
| 1644 | * @inode: file inode | 1658 | * @inode: file inode |
| 1645 | * @goal: given target block(filesystem wide) | 1659 | * @goal: given target block(filesystem wide) |
| 1646 | * @count: target number of blocks to allocate | 1660 | * @count: target number of blocks to allocate |
| 1647 | * @errp: error code | 1661 | * @errp: error code |
| 1648 | * | 1662 | * |
| 1649 | * ext4_new_blocks uses a goal block to assist allocation. It tries to | 1663 | * ext4_old_new_blocks uses a goal block to assist allocation and look up |
| 1650 | * allocate block(s) from the block group contains the goal block first. If that | 1664 | * the block bitmap directly to do block allocation. It tries to |
| 1651 | * fails, it will try to allocate block(s) from other block groups without | 1665 | * allocate block(s) from the block group contains the goal block first. If |
| 1652 | * any specific goal block. | 1666 | * that fails, it will try to allocate block(s) from other block groups |
| 1667 | * without any specific goal block. | ||
| 1668 | * | ||
| 1669 | * This function is called when -o nomballoc mount option is enabled | ||
| 1653 | * | 1670 | * |
| 1654 | */ | 1671 | */ |
| 1655 | ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | 1672 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, |
| 1656 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 1673 | ext4_fsblk_t goal, unsigned long *count, int *errp) |
| 1657 | { | 1674 | { |
| 1658 | struct buffer_head *bitmap_bh = NULL; | 1675 | struct buffer_head *bitmap_bh = NULL; |
| @@ -1676,13 +1693,26 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | |||
| 1676 | ext4_group_t ngroups; | 1693 | ext4_group_t ngroups; |
| 1677 | unsigned long num = *count; | 1694 | unsigned long num = *count; |
| 1678 | 1695 | ||
| 1679 | *errp = -ENOSPC; | ||
| 1680 | sb = inode->i_sb; | 1696 | sb = inode->i_sb; |
| 1681 | if (!sb) { | 1697 | if (!sb) { |
| 1698 | *errp = -ENODEV; | ||
| 1682 | printk("ext4_new_block: nonexistent device"); | 1699 | printk("ext4_new_block: nonexistent device"); |
| 1683 | return 0; | 1700 | return 0; |
| 1684 | } | 1701 | } |
| 1685 | 1702 | ||
| 1703 | sbi = EXT4_SB(sb); | ||
| 1704 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
| 1705 | /* | ||
| 1706 | * With delalloc we already reserved the blocks | ||
| 1707 | */ | ||
| 1708 | *count = ext4_has_free_blocks(sbi, *count); | ||
| 1709 | } | ||
| 1710 | if (*count == 0) { | ||
| 1711 | *errp = -ENOSPC; | ||
| 1712 | return 0; /*return with ENOSPC error */ | ||
| 1713 | } | ||
| 1714 | num = *count; | ||
| 1715 | |||
| 1686 | /* | 1716 | /* |
| 1687 | * Check quota for allocation of this block. | 1717 | * Check quota for allocation of this block. |
| 1688 | */ | 1718 | */ |
| @@ -1706,11 +1736,6 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | |||
| 1706 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | 1736 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) |
| 1707 | my_rsv = &block_i->rsv_window_node; | 1737 | my_rsv = &block_i->rsv_window_node; |
| 1708 | 1738 | ||
| 1709 | if (!ext4_has_free_blocks(sbi)) { | ||
| 1710 | *errp = -ENOSPC; | ||
| 1711 | goto out; | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | /* | 1739 | /* |
| 1715 | * First, test whether the goal block is free. | 1740 | * First, test whether the goal block is free. |
| 1716 | */ | 1741 | */ |
| @@ -1734,7 +1759,7 @@ retry_alloc: | |||
| 1734 | my_rsv = NULL; | 1759 | my_rsv = NULL; |
| 1735 | 1760 | ||
| 1736 | if (free_blocks > 0) { | 1761 | if (free_blocks > 0) { |
| 1737 | bitmap_bh = read_block_bitmap(sb, group_no); | 1762 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); |
| 1738 | if (!bitmap_bh) | 1763 | if (!bitmap_bh) |
| 1739 | goto io_error; | 1764 | goto io_error; |
| 1740 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | 1765 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, |
| @@ -1770,7 +1795,7 @@ retry_alloc: | |||
| 1770 | continue; | 1795 | continue; |
| 1771 | 1796 | ||
| 1772 | brelse(bitmap_bh); | 1797 | brelse(bitmap_bh); |
| 1773 | bitmap_bh = read_block_bitmap(sb, group_no); | 1798 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); |
| 1774 | if (!bitmap_bh) | 1799 | if (!bitmap_bh) |
| 1775 | goto io_error; | 1800 | goto io_error; |
| 1776 | /* | 1801 | /* |
| @@ -1882,7 +1907,15 @@ allocated: | |||
| 1882 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | 1907 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); |
| 1883 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | 1908 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); |
| 1884 | spin_unlock(sb_bgl_lock(sbi, group_no)); | 1909 | spin_unlock(sb_bgl_lock(sbi, group_no)); |
| 1885 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | 1910 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) |
| 1911 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
| 1912 | |||
| 1913 | if (sbi->s_log_groups_per_flex) { | ||
| 1914 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
| 1915 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 1916 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
| 1917 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 1918 | } | ||
| 1886 | 1919 | ||
| 1887 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | 1920 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); |
| 1888 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | 1921 | err = ext4_journal_dirty_metadata(handle, gdp_bh); |
| @@ -1915,46 +1948,104 @@ out: | |||
| 1915 | return 0; | 1948 | return 0; |
| 1916 | } | 1949 | } |
| 1917 | 1950 | ||
| 1918 | ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, | 1951 | #define EXT4_META_BLOCK 0x1 |
| 1919 | ext4_fsblk_t goal, int *errp) | 1952 | |
| 1953 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | ||
| 1954 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
| 1955 | unsigned long *count, int *errp, int flags) | ||
| 1920 | { | 1956 | { |
| 1921 | struct ext4_allocation_request ar; | 1957 | struct ext4_allocation_request ar; |
| 1922 | ext4_fsblk_t ret; | 1958 | ext4_fsblk_t ret; |
| 1923 | 1959 | ||
| 1924 | if (!test_opt(inode->i_sb, MBALLOC)) { | 1960 | if (!test_opt(inode->i_sb, MBALLOC)) { |
| 1925 | unsigned long count = 1; | 1961 | return ext4_old_new_blocks(handle, inode, goal, count, errp); |
| 1926 | ret = ext4_new_blocks_old(handle, inode, goal, &count, errp); | ||
| 1927 | return ret; | ||
| 1928 | } | 1962 | } |
| 1929 | 1963 | ||
| 1930 | memset(&ar, 0, sizeof(ar)); | 1964 | memset(&ar, 0, sizeof(ar)); |
| 1965 | /* Fill with neighbour allocated blocks */ | ||
| 1966 | |||
| 1931 | ar.inode = inode; | 1967 | ar.inode = inode; |
| 1932 | ar.goal = goal; | 1968 | ar.goal = goal; |
| 1933 | ar.len = 1; | 1969 | ar.len = *count; |
| 1970 | ar.logical = iblock; | ||
| 1971 | |||
| 1972 | if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK)) | ||
| 1973 | /* enable in-core preallocation for data block allocation */ | ||
| 1974 | ar.flags = EXT4_MB_HINT_DATA; | ||
| 1975 | else | ||
| 1976 | /* disable in-core preallocation for non-regular files */ | ||
| 1977 | ar.flags = 0; | ||
| 1978 | |||
| 1934 | ret = ext4_mb_new_blocks(handle, &ar, errp); | 1979 | ret = ext4_mb_new_blocks(handle, &ar, errp); |
| 1980 | *count = ar.len; | ||
| 1935 | return ret; | 1981 | return ret; |
| 1936 | } | 1982 | } |
| 1937 | 1983 | ||
| 1938 | ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1984 | /* |
| 1985 | * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks | ||
| 1986 | * | ||
| 1987 | * @handle: handle to this transaction | ||
| 1988 | * @inode: file inode | ||
| 1989 | * @goal: given target block(filesystem wide) | ||
| 1990 | * @count: total number of blocks need | ||
| 1991 | * @errp: error code | ||
| 1992 | * | ||
| 1993 | * Return 1st allocated block numberon success, *count stores total account | ||
| 1994 | * error stores in errp pointer | ||
| 1995 | */ | ||
| 1996 | ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | ||
| 1939 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 1997 | ext4_fsblk_t goal, unsigned long *count, int *errp) |
| 1940 | { | 1998 | { |
| 1941 | struct ext4_allocation_request ar; | ||
| 1942 | ext4_fsblk_t ret; | 1999 | ext4_fsblk_t ret; |
| 1943 | 2000 | ret = do_blk_alloc(handle, inode, 0, goal, | |
| 1944 | if (!test_opt(inode->i_sb, MBALLOC)) { | 2001 | count, errp, EXT4_META_BLOCK); |
| 1945 | ret = ext4_new_blocks_old(handle, inode, goal, count, errp); | 2002 | /* |
| 1946 | return ret; | 2003 | * Account for the allocated meta blocks |
| 2004 | */ | ||
| 2005 | if (!(*errp)) { | ||
| 2006 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 2007 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | ||
| 2008 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1947 | } | 2009 | } |
| 1948 | |||
| 1949 | memset(&ar, 0, sizeof(ar)); | ||
| 1950 | ar.inode = inode; | ||
| 1951 | ar.goal = goal; | ||
| 1952 | ar.len = *count; | ||
| 1953 | ret = ext4_mb_new_blocks(handle, &ar, errp); | ||
| 1954 | *count = ar.len; | ||
| 1955 | return ret; | 2010 | return ret; |
| 1956 | } | 2011 | } |
| 1957 | 2012 | ||
| 2013 | /* | ||
| 2014 | * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks | ||
| 2015 | * | ||
| 2016 | * @handle: handle to this transaction | ||
| 2017 | * @inode: file inode | ||
| 2018 | * @goal: given target block(filesystem wide) | ||
| 2019 | * @errp: error code | ||
| 2020 | * | ||
| 2021 | * Return allocated block number on success | ||
| 2022 | */ | ||
| 2023 | ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, | ||
| 2024 | ext4_fsblk_t goal, int *errp) | ||
| 2025 | { | ||
| 2026 | unsigned long count = 1; | ||
| 2027 | return ext4_new_meta_blocks(handle, inode, goal, &count, errp); | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | /* | ||
| 2031 | * ext4_new_blocks() -- allocate data blocks | ||
| 2032 | * | ||
| 2033 | * @handle: handle to this transaction | ||
| 2034 | * @inode: file inode | ||
| 2035 | * @goal: given target block(filesystem wide) | ||
| 2036 | * @count: total number of blocks need | ||
| 2037 | * @errp: error code | ||
| 2038 | * | ||
| 2039 | * Return 1st allocated block numberon success, *count stores total account | ||
| 2040 | * error stores in errp pointer | ||
| 2041 | */ | ||
| 2042 | |||
| 2043 | ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | ||
| 2044 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
| 2045 | unsigned long *count, int *errp) | ||
| 2046 | { | ||
| 2047 | return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0); | ||
| 2048 | } | ||
| 1958 | 2049 | ||
| 1959 | /** | 2050 | /** |
| 1960 | * ext4_count_free_blocks() -- count filesystem free blocks | 2051 | * ext4_count_free_blocks() -- count filesystem free blocks |
| @@ -1986,7 +2077,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
| 1986 | continue; | 2077 | continue; |
| 1987 | desc_count += le16_to_cpu(gdp->bg_free_blocks_count); | 2078 | desc_count += le16_to_cpu(gdp->bg_free_blocks_count); |
| 1988 | brelse(bitmap_bh); | 2079 | brelse(bitmap_bh); |
| 1989 | bitmap_bh = read_block_bitmap(sb, i); | 2080 | bitmap_bh = ext4_read_block_bitmap(sb, i); |
| 1990 | if (bitmap_bh == NULL) | 2081 | if (bitmap_bh == NULL) |
| 1991 | continue; | 2082 | continue; |
| 1992 | 2083 | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2bf0331ea194..d3d23d73c08b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
| @@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp, | |||
| 129 | struct buffer_head *bh = NULL; | 129 | struct buffer_head *bh = NULL; |
| 130 | 130 | ||
| 131 | map_bh.b_state = 0; | 131 | map_bh.b_state = 0; |
| 132 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); | 132 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, |
| 133 | 0, 0, 0); | ||
| 133 | if (err > 0) { | 134 | if (err > 0) { |
| 134 | pgoff_t index = map_bh.b_blocknr >> | 135 | pgoff_t index = map_bh.b_blocknr >> |
| 135 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 136 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| @@ -272,7 +273,7 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
| 272 | 273 | ||
| 273 | while (n) { | 274 | while (n) { |
| 274 | /* Do the node's children first */ | 275 | /* Do the node's children first */ |
| 275 | if ((n)->rb_left) { | 276 | if (n->rb_left) { |
| 276 | n = n->rb_left; | 277 | n = n->rb_left; |
| 277 | continue; | 278 | continue; |
| 278 | } | 279 | } |
| @@ -301,24 +302,18 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
| 301 | parent->rb_right = NULL; | 302 | parent->rb_right = NULL; |
| 302 | n = parent; | 303 | n = parent; |
| 303 | } | 304 | } |
| 304 | root->rb_node = NULL; | ||
| 305 | } | 305 | } |
| 306 | 306 | ||
| 307 | 307 | ||
| 308 | static struct dir_private_info *create_dir_info(loff_t pos) | 308 | static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) |
| 309 | { | 309 | { |
| 310 | struct dir_private_info *p; | 310 | struct dir_private_info *p; |
| 311 | 311 | ||
| 312 | p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 312 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
| 313 | if (!p) | 313 | if (!p) |
| 314 | return NULL; | 314 | return NULL; |
| 315 | p->root.rb_node = NULL; | ||
| 316 | p->curr_node = NULL; | ||
| 317 | p->extra_fname = NULL; | ||
| 318 | p->last_pos = 0; | ||
| 319 | p->curr_hash = pos2maj_hash(pos); | 315 | p->curr_hash = pos2maj_hash(pos); |
| 320 | p->curr_minor_hash = pos2min_hash(pos); | 316 | p->curr_minor_hash = pos2min_hash(pos); |
| 321 | p->next_hash = 0; | ||
| 322 | return p; | 317 | return p; |
| 323 | } | 318 | } |
| 324 | 319 | ||
| @@ -433,7 +428,7 @@ static int ext4_dx_readdir(struct file * filp, | |||
| 433 | int ret; | 428 | int ret; |
| 434 | 429 | ||
| 435 | if (!info) { | 430 | if (!info) { |
| 436 | info = create_dir_info(filp->f_pos); | 431 | info = ext4_htree_create_dir_info(filp->f_pos); |
| 437 | if (!info) | 432 | if (!info) |
| 438 | return -ENOMEM; | 433 | return -ENOMEM; |
| 439 | filp->private_data = info; | 434 | filp->private_data = info; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8158083f7ac0..303e41cf7b14 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include "ext4_i.h" | 22 | #include "ext4_i.h" |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * The second extended filesystem constants/structures | 25 | * The fourth extended filesystem constants/structures |
| 26 | */ | 26 | */ |
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| @@ -45,7 +45,7 @@ | |||
| 45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
| 46 | do { \ | 46 | do { \ |
| 47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
| 48 | __FILE__, __LINE__, __FUNCTION__); \ | 48 | __FILE__, __LINE__, __func__); \ |
| 49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk (KERN_DEBUG f, ## a); \ |
| 50 | } while (0) | 50 | } while (0) |
| 51 | #else | 51 | #else |
| @@ -74,6 +74,9 @@ | |||
| 74 | #define EXT4_MB_HINT_GOAL_ONLY 256 | 74 | #define EXT4_MB_HINT_GOAL_ONLY 256 |
| 75 | /* goal is meaningful */ | 75 | /* goal is meaningful */ |
| 76 | #define EXT4_MB_HINT_TRY_GOAL 512 | 76 | #define EXT4_MB_HINT_TRY_GOAL 512 |
| 77 | /* blocks already pre-reserved by delayed allocation */ | ||
| 78 | #define EXT4_MB_DELALLOC_RESERVED 1024 | ||
| 79 | |||
| 77 | 80 | ||
| 78 | struct ext4_allocation_request { | 81 | struct ext4_allocation_request { |
| 79 | /* target inode for block we're allocating */ | 82 | /* target inode for block we're allocating */ |
| @@ -170,6 +173,15 @@ struct ext4_group_desc | |||
| 170 | __u32 bg_reserved2[3]; | 173 | __u32 bg_reserved2[3]; |
| 171 | }; | 174 | }; |
| 172 | 175 | ||
| 176 | /* | ||
| 177 | * Structure of a flex block group info | ||
| 178 | */ | ||
| 179 | |||
| 180 | struct flex_groups { | ||
| 181 | __u32 free_inodes; | ||
| 182 | __u32 free_blocks; | ||
| 183 | }; | ||
| 184 | |||
| 173 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ | 185 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ |
| 174 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ | 186 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ |
| 175 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ | 187 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ |
| @@ -527,6 +539,7 @@ do { \ | |||
| 527 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
| 528 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
| 529 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | 541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ |
| 542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | ||
| 530 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
| 531 | #ifndef _LINUX_EXT2_FS_H | 544 | #ifndef _LINUX_EXT2_FS_H |
| 532 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 545 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
| @@ -647,7 +660,10 @@ struct ext4_super_block { | |||
| 647 | __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ | 660 | __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ |
| 648 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 661 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
| 649 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 662 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
| 650 | __u32 s_reserved[163]; /* Padding to the end of the block */ | 663 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
| 664 | __u8 s_reserved_char_pad2; | ||
| 665 | __le16 s_reserved_pad; | ||
| 666 | __u32 s_reserved[162]; /* Padding to the end of the block */ | ||
| 651 | }; | 667 | }; |
| 652 | 668 | ||
| 653 | #ifdef __KERNEL__ | 669 | #ifdef __KERNEL__ |
| @@ -958,12 +974,17 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, | |||
| 958 | extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); | 974 | extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); |
| 959 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | 975 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, |
| 960 | ext4_group_t group); | 976 | ext4_group_t group); |
| 961 | extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, | 977 | extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, |
| 962 | ext4_fsblk_t goal, int *errp); | 978 | ext4_fsblk_t goal, int *errp); |
| 963 | extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, | 979 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
| 964 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 980 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
| 965 | extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | 981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
| 982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
| 983 | unsigned long *count, int *errp); | ||
| 984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
| 966 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 985 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
| 986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | ||
| 987 | ext4_fsblk_t nblocks); | ||
| 967 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, |
| 968 | ext4_fsblk_t block, unsigned long count, int metadata); | 989 | ext4_fsblk_t block, unsigned long count, int metadata); |
| 969 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, |
| @@ -1016,9 +1037,14 @@ extern int __init init_ext4_mballoc(void); | |||
| 1016 | extern void exit_ext4_mballoc(void); | 1037 | extern void exit_ext4_mballoc(void); |
| 1017 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
| 1018 | unsigned long, unsigned long, int, unsigned long *); | 1039 | unsigned long, unsigned long, int, unsigned long *); |
| 1040 | extern int ext4_mb_add_more_groupinfo(struct super_block *sb, | ||
| 1041 | ext4_group_t i, struct ext4_group_desc *desc); | ||
| 1042 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
| 1043 | ext4_grpblk_t add); | ||
| 1019 | 1044 | ||
| 1020 | 1045 | ||
| 1021 | /* inode.c */ | 1046 | /* inode.c */ |
| 1047 | void ext4_da_release_space(struct inode *inode, int used, int to_free); | ||
| 1022 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | 1048 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, |
| 1023 | struct buffer_head *bh, ext4_fsblk_t blocknr); | 1049 | struct buffer_head *bh, ext4_fsblk_t blocknr); |
| 1024 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1050 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
| @@ -1033,19 +1059,23 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 1033 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1059 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
| 1034 | extern int ext4_write_inode (struct inode *, int); | 1060 | extern int ext4_write_inode (struct inode *, int); |
| 1035 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1061 | extern int ext4_setattr (struct dentry *, struct iattr *); |
| 1062 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 1063 | struct kstat *stat); | ||
| 1036 | extern void ext4_delete_inode (struct inode *); | 1064 | extern void ext4_delete_inode (struct inode *); |
| 1037 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1065 | extern int ext4_sync_inode (handle_t *, struct inode *); |
| 1038 | extern void ext4_discard_reservation (struct inode *); | 1066 | extern void ext4_discard_reservation (struct inode *); |
| 1039 | extern void ext4_dirty_inode(struct inode *); | 1067 | extern void ext4_dirty_inode(struct inode *); |
| 1040 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1068 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
| 1041 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1069 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
| 1070 | extern int ext4_can_truncate(struct inode *inode); | ||
| 1042 | extern void ext4_truncate (struct inode *); | 1071 | extern void ext4_truncate (struct inode *); |
| 1043 | extern void ext4_set_inode_flags(struct inode *); | 1072 | extern void ext4_set_inode_flags(struct inode *); |
| 1044 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1073 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
| 1045 | extern void ext4_set_aops(struct inode *inode); | 1074 | extern void ext4_set_aops(struct inode *inode); |
| 1046 | extern int ext4_writepage_trans_blocks(struct inode *); | 1075 | extern int ext4_writepage_trans_blocks(struct inode *); |
| 1047 | extern int ext4_block_truncate_page(handle_t *handle, struct page *page, | 1076 | extern int ext4_block_truncate_page(handle_t *handle, |
| 1048 | struct address_space *mapping, loff_t from); | 1077 | struct address_space *mapping, loff_t from); |
| 1078 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | ||
| 1049 | 1079 | ||
| 1050 | /* ioctl.c */ | 1080 | /* ioctl.c */ |
| 1051 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1081 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
| @@ -1159,10 +1189,21 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
| 1159 | } | 1189 | } |
| 1160 | 1190 | ||
| 1161 | 1191 | ||
| 1192 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, | ||
| 1193 | ext4_group_t block_group) | ||
| 1194 | { | ||
| 1195 | return block_group >> sbi->s_log_groups_per_flex; | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) | ||
| 1199 | { | ||
| 1200 | return 1 << sbi->s_log_groups_per_flex; | ||
| 1201 | } | ||
| 1202 | |||
| 1162 | #define ext4_std_error(sb, errno) \ | 1203 | #define ext4_std_error(sb, errno) \ |
| 1163 | do { \ | 1204 | do { \ |
| 1164 | if ((errno)) \ | 1205 | if ((errno)) \ |
| 1165 | __ext4_std_error((sb), __FUNCTION__, (errno)); \ | 1206 | __ext4_std_error((sb), __func__, (errno)); \ |
| 1166 | } while (0) | 1207 | } while (0) |
| 1167 | 1208 | ||
| 1168 | /* | 1209 | /* |
| @@ -1191,7 +1232,7 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 1191 | ext4_lblk_t iblock, | 1232 | ext4_lblk_t iblock, |
| 1192 | unsigned long max_blocks, struct buffer_head *bh_result, | 1233 | unsigned long max_blocks, struct buffer_head *bh_result, |
| 1193 | int create, int extend_disksize); | 1234 | int create, int extend_disksize); |
| 1194 | extern void ext4_ext_truncate(struct inode *, struct page *); | 1235 | extern void ext4_ext_truncate(struct inode *); |
| 1195 | extern void ext4_ext_init(struct super_block *); | 1236 | extern void ext4_ext_init(struct super_block *); |
| 1196 | extern void ext4_ext_release(struct super_block *); | 1237 | extern void ext4_ext_release(struct super_block *); |
| 1197 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 1238 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, |
| @@ -1199,7 +1240,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | |||
| 1199 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, | 1240 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, |
| 1200 | sector_t block, unsigned long max_blocks, | 1241 | sector_t block, unsigned long max_blocks, |
| 1201 | struct buffer_head *bh, int create, | 1242 | struct buffer_head *bh, int create, |
| 1202 | int extend_disksize); | 1243 | int extend_disksize, int flag); |
| 1203 | #endif /* __KERNEL__ */ | 1244 | #endif /* __KERNEL__ */ |
| 1204 | 1245 | ||
| 1205 | #endif /* _EXT4_H */ | 1246 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 75333b595fab..6c166c0a54b7 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) | |||
| 212 | (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); | 212 | (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | ||
| 215 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
| 216 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
| 217 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index 26a4ae255d79..ef7409f0e7e4 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
| @@ -79,7 +79,7 @@ struct ext4_ext_cache { | |||
| 79 | }; | 79 | }; |
| 80 | 80 | ||
| 81 | /* | 81 | /* |
| 82 | * third extended file system inode data in memory | 82 | * fourth extended file system inode data in memory |
| 83 | */ | 83 | */ |
| 84 | struct ext4_inode_info { | 84 | struct ext4_inode_info { |
| 85 | __le32 i_data[15]; /* unconverted */ | 85 | __le32 i_data[15]; /* unconverted */ |
| @@ -150,6 +150,7 @@ struct ext4_inode_info { | |||
| 150 | */ | 150 | */ |
| 151 | struct rw_semaphore i_data_sem; | 151 | struct rw_semaphore i_data_sem; |
| 152 | struct inode vfs_inode; | 152 | struct inode vfs_inode; |
| 153 | struct jbd2_inode jinode; | ||
| 153 | 154 | ||
| 154 | unsigned long i_ext_generation; | 155 | unsigned long i_ext_generation; |
| 155 | struct ext4_ext_cache i_cached_extent; | 156 | struct ext4_ext_cache i_cached_extent; |
| @@ -162,6 +163,13 @@ struct ext4_inode_info { | |||
| 162 | /* mballoc */ | 163 | /* mballoc */ |
| 163 | struct list_head i_prealloc_list; | 164 | struct list_head i_prealloc_list; |
| 164 | spinlock_t i_prealloc_lock; | 165 | spinlock_t i_prealloc_lock; |
| 166 | |||
| 167 | /* allocation reservation info for delalloc */ | ||
| 168 | unsigned long i_reserved_data_blocks; | ||
| 169 | unsigned long i_reserved_meta_blocks; | ||
| 170 | unsigned long i_allocated_meta_blocks; | ||
| 171 | unsigned short i_delalloc_reserved_flag; | ||
| 172 | spinlock_t i_block_reservation_lock; | ||
| 165 | }; | 173 | }; |
| 166 | 174 | ||
| 167 | #endif /* _EXT4_I */ | 175 | #endif /* _EXT4_I */ |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 9255a7d28b24..eb8bc3afe6e9 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
| @@ -142,19 +142,17 @@ int __ext4_journal_dirty_metadata(const char *where, | |||
| 142 | handle_t *handle, struct buffer_head *bh); | 142 | handle_t *handle, struct buffer_head *bh); |
| 143 | 143 | ||
| 144 | #define ext4_journal_get_undo_access(handle, bh) \ | 144 | #define ext4_journal_get_undo_access(handle, bh) \ |
| 145 | __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) | 145 | __ext4_journal_get_undo_access(__func__, (handle), (bh)) |
| 146 | #define ext4_journal_get_write_access(handle, bh) \ | 146 | #define ext4_journal_get_write_access(handle, bh) \ |
| 147 | __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) | 147 | __ext4_journal_get_write_access(__func__, (handle), (bh)) |
| 148 | #define ext4_journal_revoke(handle, blocknr, bh) \ | 148 | #define ext4_journal_revoke(handle, blocknr, bh) \ |
| 149 | __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) | 149 | __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) |
| 150 | #define ext4_journal_get_create_access(handle, bh) \ | 150 | #define ext4_journal_get_create_access(handle, bh) \ |
| 151 | __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) | 151 | __ext4_journal_get_create_access(__func__, (handle), (bh)) |
| 152 | #define ext4_journal_dirty_metadata(handle, bh) \ | 152 | #define ext4_journal_dirty_metadata(handle, bh) \ |
| 153 | __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) | 153 | __ext4_journal_dirty_metadata(__func__, (handle), (bh)) |
| 154 | #define ext4_journal_forget(handle, bh) \ | 154 | #define ext4_journal_forget(handle, bh) \ |
| 155 | __ext4_journal_forget(__FUNCTION__, (handle), (bh)) | 155 | __ext4_journal_forget(__func__, (handle), (bh)) |
| 156 | |||
| 157 | int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); | ||
| 158 | 156 | ||
| 159 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); | 157 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); |
| 160 | int __ext4_journal_stop(const char *where, handle_t *handle); | 158 | int __ext4_journal_stop(const char *where, handle_t *handle); |
| @@ -165,7 +163,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | |||
| 165 | } | 163 | } |
| 166 | 164 | ||
| 167 | #define ext4_journal_stop(handle) \ | 165 | #define ext4_journal_stop(handle) \ |
| 168 | __ext4_journal_stop(__FUNCTION__, (handle)) | 166 | __ext4_journal_stop(__func__, (handle)) |
| 169 | 167 | ||
| 170 | static inline handle_t *ext4_journal_current_handle(void) | 168 | static inline handle_t *ext4_journal_current_handle(void) |
| 171 | { | 169 | { |
| @@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal) | |||
| 192 | return jbd2_journal_force_commit(journal); | 190 | return jbd2_journal_force_commit(journal); |
| 193 | } | 191 | } |
| 194 | 192 | ||
| 193 | static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) | ||
| 194 | { | ||
| 195 | return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); | ||
| 196 | } | ||
| 197 | |||
| 195 | /* super.c */ | 198 | /* super.c */ |
| 196 | int ext4_force_commit(struct super_block *sb); | 199 | int ext4_force_commit(struct super_block *sb); |
| 197 | 200 | ||
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 5802e69f2191..6300226d5531 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include <linux/rbtree.h> | 25 | #include <linux/rbtree.h> |
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * third extended-fs super-block data in memory | 28 | * fourth extended-fs super-block data in memory |
| 29 | */ | 29 | */ |
| 30 | struct ext4_sb_info { | 30 | struct ext4_sb_info { |
| 31 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | 31 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ |
| @@ -143,6 +143,9 @@ struct ext4_sb_info { | |||
| 143 | 143 | ||
| 144 | /* locality groups */ | 144 | /* locality groups */ |
| 145 | struct ext4_locality_group *s_locality_groups; | 145 | struct ext4_locality_group *s_locality_groups; |
| 146 | |||
| 147 | unsigned int s_log_groups_per_flex; | ||
| 148 | struct flex_groups *s_flex_groups; | ||
| 146 | }; | 149 | }; |
| 147 | 150 | ||
| 148 | #endif /* _EXT4_SB */ | 151 | #endif /* _EXT4_SB */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 47929c4e3dae..42c4c0c892ed 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | |||
| 92 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 92 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) | 95 | static int ext4_ext_journal_restart(handle_t *handle, int needed) |
| 96 | { | 96 | { |
| 97 | int err; | 97 | int err; |
| 98 | 98 | ||
| 99 | if (handle->h_buffer_credits > needed) | 99 | if (handle->h_buffer_credits > needed) |
| 100 | return handle; | 100 | return 0; |
| 101 | if (!ext4_journal_extend(handle, needed)) | 101 | err = ext4_journal_extend(handle, needed); |
| 102 | return handle; | 102 | if (err) |
| 103 | err = ext4_journal_restart(handle, needed); | 103 | return err; |
| 104 | 104 | return ext4_journal_restart(handle, needed); | |
| 105 | return handle; | ||
| 106 | } | 105 | } |
| 107 | 106 | ||
| 108 | /* | 107 | /* |
| @@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
| 180 | return bg_start + colour + block; | 179 | return bg_start + colour + block; |
| 181 | } | 180 | } |
| 182 | 181 | ||
| 182 | /* | ||
| 183 | * Allocation for a meta data block | ||
| 184 | */ | ||
| 183 | static ext4_fsblk_t | 185 | static ext4_fsblk_t |
| 184 | ext4_ext_new_block(handle_t *handle, struct inode *inode, | 186 | ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, |
| 185 | struct ext4_ext_path *path, | 187 | struct ext4_ext_path *path, |
| 186 | struct ext4_extent *ex, int *err) | 188 | struct ext4_extent *ex, int *err) |
| 187 | { | 189 | { |
| 188 | ext4_fsblk_t goal, newblock; | 190 | ext4_fsblk_t goal, newblock; |
| 189 | 191 | ||
| 190 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); | 192 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); |
| 191 | newblock = ext4_new_block(handle, inode, goal, err); | 193 | newblock = ext4_new_meta_block(handle, inode, goal, err); |
| 192 | return newblock; | 194 | return newblock; |
| 193 | } | 195 | } |
| 194 | 196 | ||
| @@ -246,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode) | |||
| 246 | return size; | 248 | return size; |
| 247 | } | 249 | } |
| 248 | 250 | ||
| 251 | /* | ||
| 252 | * Calculate the number of metadata blocks needed | ||
| 253 | * to allocate @blocks | ||
| 254 | * Worse case is one block per extent | ||
| 255 | */ | ||
| 256 | int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 257 | { | ||
| 258 | int lcap, icap, rcap, leafs, idxs, num; | ||
| 259 | int newextents = blocks; | ||
| 260 | |||
| 261 | rcap = ext4_ext_space_root_idx(inode); | ||
| 262 | lcap = ext4_ext_space_block(inode); | ||
| 263 | icap = ext4_ext_space_block_idx(inode); | ||
| 264 | |||
| 265 | /* number of new leaf blocks needed */ | ||
| 266 | num = leafs = (newextents + lcap - 1) / lcap; | ||
| 267 | |||
| 268 | /* | ||
| 269 | * Worse case, we need separate index block(s) | ||
| 270 | * to link all new leaf blocks | ||
| 271 | */ | ||
| 272 | idxs = (leafs + icap - 1) / icap; | ||
| 273 | do { | ||
| 274 | num += idxs; | ||
| 275 | idxs = (idxs + icap - 1) / icap; | ||
| 276 | } while (idxs > rcap); | ||
| 277 | |||
| 278 | return num; | ||
| 279 | } | ||
| 280 | |||
| 249 | static int | 281 | static int |
| 250 | ext4_ext_max_entries(struct inode *inode, int depth) | 282 | ext4_ext_max_entries(struct inode *inode, int depth) |
| 251 | { | 283 | { |
| @@ -524,6 +556,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
| 524 | alloc = 1; | 556 | alloc = 1; |
| 525 | } | 557 | } |
| 526 | path[0].p_hdr = eh; | 558 | path[0].p_hdr = eh; |
| 559 | path[0].p_bh = NULL; | ||
| 527 | 560 | ||
| 528 | i = depth; | 561 | i = depth; |
| 529 | /* walk through the tree */ | 562 | /* walk through the tree */ |
| @@ -552,12 +585,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
| 552 | } | 585 | } |
| 553 | 586 | ||
| 554 | path[ppos].p_depth = i; | 587 | path[ppos].p_depth = i; |
| 555 | path[ppos].p_hdr = eh; | ||
| 556 | path[ppos].p_ext = NULL; | 588 | path[ppos].p_ext = NULL; |
| 557 | path[ppos].p_idx = NULL; | 589 | path[ppos].p_idx = NULL; |
| 558 | 590 | ||
| 559 | /* find extent */ | 591 | /* find extent */ |
| 560 | ext4_ext_binsearch(inode, path + ppos, block); | 592 | ext4_ext_binsearch(inode, path + ppos, block); |
| 593 | /* if not an empty leaf */ | ||
| 594 | if (path[ppos].p_ext) | ||
| 595 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | ||
| 561 | 596 | ||
| 562 | ext4_ext_show_path(inode, path); | 597 | ext4_ext_show_path(inode, path); |
| 563 | 598 | ||
| @@ -688,7 +723,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
| 688 | /* allocate all needed blocks */ | 723 | /* allocate all needed blocks */ |
| 689 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); | 724 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); |
| 690 | for (a = 0; a < depth - at; a++) { | 725 | for (a = 0; a < depth - at; a++) { |
| 691 | newblock = ext4_ext_new_block(handle, inode, path, newext, &err); | 726 | newblock = ext4_ext_new_meta_block(handle, inode, path, |
| 727 | newext, &err); | ||
| 692 | if (newblock == 0) | 728 | if (newblock == 0) |
| 693 | goto cleanup; | 729 | goto cleanup; |
| 694 | ablocks[a] = newblock; | 730 | ablocks[a] = newblock; |
| @@ -884,7 +920,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
| 884 | ext4_fsblk_t newblock; | 920 | ext4_fsblk_t newblock; |
| 885 | int err = 0; | 921 | int err = 0; |
| 886 | 922 | ||
| 887 | newblock = ext4_ext_new_block(handle, inode, path, newext, &err); | 923 | newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); |
| 888 | if (newblock == 0) | 924 | if (newblock == 0) |
| 889 | return err; | 925 | return err; |
| 890 | 926 | ||
| @@ -981,6 +1017,8 @@ repeat: | |||
| 981 | /* if we found index with free entry, then use that | 1017 | /* if we found index with free entry, then use that |
| 982 | * entry: create all needed subtree and add new leaf */ | 1018 | * entry: create all needed subtree and add new leaf */ |
| 983 | err = ext4_ext_split(handle, inode, path, newext, i); | 1019 | err = ext4_ext_split(handle, inode, path, newext, i); |
| 1020 | if (err) | ||
| 1021 | goto out; | ||
| 984 | 1022 | ||
| 985 | /* refill path */ | 1023 | /* refill path */ |
| 986 | ext4_ext_drop_refs(path); | 1024 | ext4_ext_drop_refs(path); |
| @@ -1883,11 +1921,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 1883 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 1921 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
| 1884 | #endif | 1922 | #endif |
| 1885 | 1923 | ||
| 1886 | handle = ext4_ext_journal_restart(handle, credits); | 1924 | err = ext4_ext_journal_restart(handle, credits); |
| 1887 | if (IS_ERR(handle)) { | 1925 | if (err) |
| 1888 | err = PTR_ERR(handle); | ||
| 1889 | goto out; | 1926 | goto out; |
| 1890 | } | ||
| 1891 | 1927 | ||
| 1892 | err = ext4_ext_get_access(handle, inode, path + depth); | 1928 | err = ext4_ext_get_access(handle, inode, path + depth); |
| 1893 | if (err) | 1929 | if (err) |
| @@ -2529,6 +2565,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2529 | int err = 0, depth, ret; | 2565 | int err = 0, depth, ret; |
| 2530 | unsigned long allocated = 0; | 2566 | unsigned long allocated = 0; |
| 2531 | struct ext4_allocation_request ar; | 2567 | struct ext4_allocation_request ar; |
| 2568 | loff_t disksize; | ||
| 2532 | 2569 | ||
| 2533 | __clear_bit(BH_New, &bh_result->b_state); | 2570 | __clear_bit(BH_New, &bh_result->b_state); |
| 2534 | ext_debug("blocks %u/%lu requested for inode %u\n", | 2571 | ext_debug("blocks %u/%lu requested for inode %u\n", |
| @@ -2616,8 +2653,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2616 | */ | 2653 | */ |
| 2617 | if (allocated > max_blocks) | 2654 | if (allocated > max_blocks) |
| 2618 | allocated = max_blocks; | 2655 | allocated = max_blocks; |
| 2619 | /* mark the buffer unwritten */ | 2656 | set_buffer_unwritten(bh_result); |
| 2620 | __set_bit(BH_Unwritten, &bh_result->b_state); | ||
| 2621 | goto out2; | 2657 | goto out2; |
| 2622 | } | 2658 | } |
| 2623 | 2659 | ||
| @@ -2716,14 +2752,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2716 | goto out2; | 2752 | goto out2; |
| 2717 | } | 2753 | } |
| 2718 | 2754 | ||
| 2719 | if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) | ||
| 2720 | EXT4_I(inode)->i_disksize = inode->i_size; | ||
| 2721 | |||
| 2722 | /* previous routine could use block we allocated */ | 2755 | /* previous routine could use block we allocated */ |
| 2723 | newblock = ext_pblock(&newex); | 2756 | newblock = ext_pblock(&newex); |
| 2724 | allocated = ext4_ext_get_actual_len(&newex); | 2757 | allocated = ext4_ext_get_actual_len(&newex); |
| 2725 | outnew: | 2758 | outnew: |
| 2726 | __set_bit(BH_New, &bh_result->b_state); | 2759 | if (extend_disksize) { |
| 2760 | disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits; | ||
| 2761 | if (disksize > i_size_read(inode)) | ||
| 2762 | disksize = i_size_read(inode); | ||
| 2763 | if (disksize > EXT4_I(inode)->i_disksize) | ||
| 2764 | EXT4_I(inode)->i_disksize = disksize; | ||
| 2765 | } | ||
| 2766 | |||
| 2767 | set_buffer_new(bh_result); | ||
| 2727 | 2768 | ||
| 2728 | /* Cache only when it is _not_ an uninitialized extent */ | 2769 | /* Cache only when it is _not_ an uninitialized extent */ |
| 2729 | if (create != EXT4_CREATE_UNINITIALIZED_EXT) | 2770 | if (create != EXT4_CREATE_UNINITIALIZED_EXT) |
| @@ -2733,7 +2774,7 @@ out: | |||
| 2733 | if (allocated > max_blocks) | 2774 | if (allocated > max_blocks) |
| 2734 | allocated = max_blocks; | 2775 | allocated = max_blocks; |
| 2735 | ext4_ext_show_leaf(inode, path); | 2776 | ext4_ext_show_leaf(inode, path); |
| 2736 | __set_bit(BH_Mapped, &bh_result->b_state); | 2777 | set_buffer_mapped(bh_result); |
| 2737 | bh_result->b_bdev = inode->i_sb->s_bdev; | 2778 | bh_result->b_bdev = inode->i_sb->s_bdev; |
| 2738 | bh_result->b_blocknr = newblock; | 2779 | bh_result->b_blocknr = newblock; |
| 2739 | out2: | 2780 | out2: |
| @@ -2744,7 +2785,7 @@ out2: | |||
| 2744 | return err ? err : allocated; | 2785 | return err ? err : allocated; |
| 2745 | } | 2786 | } |
| 2746 | 2787 | ||
| 2747 | void ext4_ext_truncate(struct inode * inode, struct page *page) | 2788 | void ext4_ext_truncate(struct inode *inode) |
| 2748 | { | 2789 | { |
| 2749 | struct address_space *mapping = inode->i_mapping; | 2790 | struct address_space *mapping = inode->i_mapping; |
| 2750 | struct super_block *sb = inode->i_sb; | 2791 | struct super_block *sb = inode->i_sb; |
| @@ -2757,18 +2798,14 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2757 | */ | 2798 | */ |
| 2758 | err = ext4_writepage_trans_blocks(inode) + 3; | 2799 | err = ext4_writepage_trans_blocks(inode) + 3; |
| 2759 | handle = ext4_journal_start(inode, err); | 2800 | handle = ext4_journal_start(inode, err); |
| 2760 | if (IS_ERR(handle)) { | 2801 | if (IS_ERR(handle)) |
| 2761 | if (page) { | ||
| 2762 | clear_highpage(page); | ||
| 2763 | flush_dcache_page(page); | ||
| 2764 | unlock_page(page); | ||
| 2765 | page_cache_release(page); | ||
| 2766 | } | ||
| 2767 | return; | 2802 | return; |
| 2768 | } | ||
| 2769 | 2803 | ||
| 2770 | if (page) | 2804 | if (inode->i_size & (sb->s_blocksize - 1)) |
| 2771 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 2805 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
| 2806 | |||
| 2807 | if (ext4_orphan_add(handle, inode)) | ||
| 2808 | goto out_stop; | ||
| 2772 | 2809 | ||
| 2773 | down_write(&EXT4_I(inode)->i_data_sem); | 2810 | down_write(&EXT4_I(inode)->i_data_sem); |
| 2774 | ext4_ext_invalidate_cache(inode); | 2811 | ext4_ext_invalidate_cache(inode); |
| @@ -2780,8 +2817,6 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2780 | * Probably we need not scan at all, | 2817 | * Probably we need not scan at all, |
| 2781 | * because page truncation is enough. | 2818 | * because page truncation is enough. |
| 2782 | */ | 2819 | */ |
| 2783 | if (ext4_orphan_add(handle, inode)) | ||
| 2784 | goto out_stop; | ||
| 2785 | 2820 | ||
| 2786 | /* we have to know where to truncate from in crash case */ | 2821 | /* we have to know where to truncate from in crash case */ |
| 2787 | EXT4_I(inode)->i_disksize = inode->i_size; | 2822 | EXT4_I(inode)->i_disksize = inode->i_size; |
| @@ -2798,6 +2833,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2798 | handle->h_sync = 1; | 2833 | handle->h_sync = 1; |
| 2799 | 2834 | ||
| 2800 | out_stop: | 2835 | out_stop: |
| 2836 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2801 | /* | 2837 | /* |
| 2802 | * If this was a simple ftruncate() and the file will remain alive, | 2838 | * If this was a simple ftruncate() and the file will remain alive, |
| 2803 | * then we need to clear up the orphan record which we created above. | 2839 | * then we need to clear up the orphan record which we created above. |
| @@ -2808,7 +2844,6 @@ out_stop: | |||
| 2808 | if (inode->i_nlink) | 2844 | if (inode->i_nlink) |
| 2809 | ext4_orphan_del(handle, inode); | 2845 | ext4_orphan_del(handle, inode); |
| 2810 | 2846 | ||
| 2811 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2812 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 2847 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
| 2813 | ext4_mark_inode_dirty(handle, inode); | 2848 | ext4_mark_inode_dirty(handle, inode); |
| 2814 | ext4_journal_stop(handle); | 2849 | ext4_journal_stop(handle); |
| @@ -2911,7 +2946,7 @@ retry: | |||
| 2911 | } | 2946 | } |
| 2912 | ret = ext4_get_blocks_wrap(handle, inode, block, | 2947 | ret = ext4_get_blocks_wrap(handle, inode, block, |
| 2913 | max_blocks, &map_bh, | 2948 | max_blocks, &map_bh, |
| 2914 | EXT4_CREATE_UNINITIALIZED_EXT, 0); | 2949 | EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); |
| 2915 | if (ret <= 0) { | 2950 | if (ret <= 0) { |
| 2916 | #ifdef EXT4FS_DEBUG | 2951 | #ifdef EXT4FS_DEBUG |
| 2917 | WARN_ON(ret <= 0); | 2952 | WARN_ON(ret <= 0); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4159be6366ab..430eb7978db4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -123,6 +123,23 @@ force_commit: | |||
| 123 | return ret; | 123 | return ret; |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | static struct vm_operations_struct ext4_file_vm_ops = { | ||
| 127 | .fault = filemap_fault, | ||
| 128 | .page_mkwrite = ext4_page_mkwrite, | ||
| 129 | }; | ||
| 130 | |||
| 131 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 132 | { | ||
| 133 | struct address_space *mapping = file->f_mapping; | ||
| 134 | |||
| 135 | if (!mapping->a_ops->readpage) | ||
| 136 | return -ENOEXEC; | ||
| 137 | file_accessed(file); | ||
| 138 | vma->vm_ops = &ext4_file_vm_ops; | ||
| 139 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
| 140 | return 0; | ||
| 141 | } | ||
| 142 | |||
| 126 | const struct file_operations ext4_file_operations = { | 143 | const struct file_operations ext4_file_operations = { |
| 127 | .llseek = generic_file_llseek, | 144 | .llseek = generic_file_llseek, |
| 128 | .read = do_sync_read, | 145 | .read = do_sync_read, |
| @@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = { | |||
| 133 | #ifdef CONFIG_COMPAT | 150 | #ifdef CONFIG_COMPAT |
| 134 | .compat_ioctl = ext4_compat_ioctl, | 151 | .compat_ioctl = ext4_compat_ioctl, |
| 135 | #endif | 152 | #endif |
| 136 | .mmap = generic_file_mmap, | 153 | .mmap = ext4_file_mmap, |
| 137 | .open = generic_file_open, | 154 | .open = generic_file_open, |
| 138 | .release = ext4_release_file, | 155 | .release = ext4_release_file, |
| 139 | .fsync = ext4_sync_file, | 156 | .fsync = ext4_sync_file, |
| @@ -144,6 +161,7 @@ const struct file_operations ext4_file_operations = { | |||
| 144 | const struct inode_operations ext4_file_inode_operations = { | 161 | const struct inode_operations ext4_file_inode_operations = { |
| 145 | .truncate = ext4_truncate, | 162 | .truncate = ext4_truncate, |
| 146 | .setattr = ext4_setattr, | 163 | .setattr = ext4_setattr, |
| 164 | .getattr = ext4_getattr, | ||
| 147 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 165 | #ifdef CONFIG_EXT4DEV_FS_XATTR |
| 148 | .setxattr = generic_setxattr, | 166 | .setxattr = generic_setxattr, |
| 149 | .getxattr = generic_getxattr, | 167 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 1c8ba48d4f8d..a45c3737ad31 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
| 29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
| 30 | #include <linux/blkdev.h> | ||
| 30 | #include "ext4.h" | 31 | #include "ext4.h" |
| 31 | #include "ext4_jbd2.h" | 32 | #include "ext4_jbd2.h" |
| 32 | 33 | ||
| @@ -45,6 +46,7 @@ | |||
| 45 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) |
| 46 | { | 47 | { |
| 47 | struct inode *inode = dentry->d_inode; | 48 | struct inode *inode = dentry->d_inode; |
| 49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
| 48 | int ret = 0; | 50 | int ret = 0; |
| 49 | 51 | ||
| 50 | J_ASSERT(ext4_journal_current_handle() == NULL); | 52 | J_ASSERT(ext4_journal_current_handle() == NULL); |
| @@ -85,6 +87,8 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
| 85 | .nr_to_write = 0, /* sys_fsync did this */ | 87 | .nr_to_write = 0, /* sys_fsync did this */ |
| 86 | }; | 88 | }; |
| 87 | ret = sync_inode(inode, &wbc); | 89 | ret = sync_inode(inode, &wbc); |
| 90 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
| 91 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
| 88 | } | 92 | } |
| 89 | out: | 93 | out: |
| 90 | return ret; | 94 | return ret; |
diff --git a/fs/ext4/group.h b/fs/ext4/group.h index 7eb0604e7eea..c2c0a8d06d0e 100644 --- a/fs/ext4/group.h +++ b/fs/ext4/group.h | |||
| @@ -13,7 +13,7 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | |||
| 13 | struct ext4_group_desc *gdp); | 13 | struct ext4_group_desc *gdp); |
| 14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | 14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, |
| 15 | struct ext4_group_desc *gdp); | 15 | struct ext4_group_desc *gdp); |
| 16 | struct buffer_head *read_block_bitmap(struct super_block *sb, | 16 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, |
| 17 | ext4_group_t block_group); | 17 | ext4_group_t block_group); |
| 18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | 18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, |
| 19 | struct buffer_head *bh, | 19 | struct buffer_head *bh, |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c6efbab0c801..a92eb305344f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
| 157 | struct ext4_super_block * es; | 157 | struct ext4_super_block * es; |
| 158 | struct ext4_sb_info *sbi; | 158 | struct ext4_sb_info *sbi; |
| 159 | int fatal = 0, err; | 159 | int fatal = 0, err; |
| 160 | ext4_group_t flex_group; | ||
| 160 | 161 | ||
| 161 | if (atomic_read(&inode->i_count) > 1) { | 162 | if (atomic_read(&inode->i_count) > 1) { |
| 162 | printk ("ext4_free_inode: inode has count=%d\n", | 163 | printk ("ext4_free_inode: inode has count=%d\n", |
| @@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
| 232 | if (is_directory) | 233 | if (is_directory) |
| 233 | percpu_counter_dec(&sbi->s_dirs_counter); | 234 | percpu_counter_dec(&sbi->s_dirs_counter); |
| 234 | 235 | ||
| 236 | if (sbi->s_log_groups_per_flex) { | ||
| 237 | flex_group = ext4_flex_group(sbi, block_group); | ||
| 238 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 239 | sbi->s_flex_groups[flex_group].free_inodes++; | ||
| 240 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 241 | } | ||
| 235 | } | 242 | } |
| 236 | BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); | 243 | BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); |
| 237 | err = ext4_journal_dirty_metadata(handle, bh2); | 244 | err = ext4_journal_dirty_metadata(handle, bh2); |
| @@ -286,6 +293,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
| 286 | return ret; | 293 | return ret; |
| 287 | } | 294 | } |
| 288 | 295 | ||
| 296 | #define free_block_ratio 10 | ||
| 297 | |||
| 298 | static int find_group_flex(struct super_block *sb, struct inode *parent, | ||
| 299 | ext4_group_t *best_group) | ||
| 300 | { | ||
| 301 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 302 | struct ext4_group_desc *desc; | ||
| 303 | struct buffer_head *bh; | ||
| 304 | struct flex_groups *flex_group = sbi->s_flex_groups; | ||
| 305 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | ||
| 306 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | ||
| 307 | ext4_group_t ngroups = sbi->s_groups_count; | ||
| 308 | int flex_size = ext4_flex_bg_size(sbi); | ||
| 309 | ext4_group_t best_flex = parent_fbg_group; | ||
| 310 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | ||
| 311 | int flexbg_free_blocks; | ||
| 312 | int flex_freeb_ratio; | ||
| 313 | ext4_group_t n_fbg_groups; | ||
| 314 | ext4_group_t i; | ||
| 315 | |||
| 316 | n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> | ||
| 317 | sbi->s_log_groups_per_flex; | ||
| 318 | |||
| 319 | find_close_to_parent: | ||
| 320 | flexbg_free_blocks = flex_group[best_flex].free_blocks; | ||
| 321 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
| 322 | if (flex_group[best_flex].free_inodes && | ||
| 323 | flex_freeb_ratio > free_block_ratio) | ||
| 324 | goto found_flexbg; | ||
| 325 | |||
| 326 | if (best_flex && best_flex == parent_fbg_group) { | ||
| 327 | best_flex--; | ||
| 328 | goto find_close_to_parent; | ||
| 329 | } | ||
| 330 | |||
| 331 | for (i = 0; i < n_fbg_groups; i++) { | ||
| 332 | if (i == parent_fbg_group || i == parent_fbg_group - 1) | ||
| 333 | continue; | ||
| 334 | |||
| 335 | flexbg_free_blocks = flex_group[i].free_blocks; | ||
| 336 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
| 337 | |||
| 338 | if (flex_freeb_ratio > free_block_ratio && | ||
| 339 | flex_group[i].free_inodes) { | ||
| 340 | best_flex = i; | ||
| 341 | goto found_flexbg; | ||
| 342 | } | ||
| 343 | |||
| 344 | if (best_flex < 0 || | ||
| 345 | (flex_group[i].free_blocks > | ||
| 346 | flex_group[best_flex].free_blocks && | ||
| 347 | flex_group[i].free_inodes)) | ||
| 348 | best_flex = i; | ||
| 349 | } | ||
| 350 | |||
| 351 | if (!flex_group[best_flex].free_inodes || | ||
| 352 | !flex_group[best_flex].free_blocks) | ||
| 353 | return -1; | ||
| 354 | |||
| 355 | found_flexbg: | ||
| 356 | for (i = best_flex * flex_size; i < ngroups && | ||
| 357 | i < (best_flex + 1) * flex_size; i++) { | ||
| 358 | desc = ext4_get_group_desc(sb, i, &bh); | ||
| 359 | if (le16_to_cpu(desc->bg_free_inodes_count)) { | ||
| 360 | *best_group = i; | ||
| 361 | goto out; | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | return -1; | ||
| 366 | out: | ||
| 367 | return 0; | ||
| 368 | } | ||
| 369 | |||
| 289 | /* | 370 | /* |
| 290 | * Orlov's allocator for directories. | 371 | * Orlov's allocator for directories. |
| 291 | * | 372 | * |
| @@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 501 | struct inode *ret; | 582 | struct inode *ret; |
| 502 | ext4_group_t i; | 583 | ext4_group_t i; |
| 503 | int free = 0; | 584 | int free = 0; |
| 585 | ext4_group_t flex_group; | ||
| 504 | 586 | ||
| 505 | /* Cannot create files in a deleted directory */ | 587 | /* Cannot create files in a deleted directory */ |
| 506 | if (!dir || !dir->i_nlink) | 588 | if (!dir || !dir->i_nlink) |
| @@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 514 | 596 | ||
| 515 | sbi = EXT4_SB(sb); | 597 | sbi = EXT4_SB(sb); |
| 516 | es = sbi->s_es; | 598 | es = sbi->s_es; |
| 599 | |||
| 600 | if (sbi->s_log_groups_per_flex) { | ||
| 601 | ret2 = find_group_flex(sb, dir, &group); | ||
| 602 | goto got_group; | ||
| 603 | } | ||
| 604 | |||
| 517 | if (S_ISDIR(mode)) { | 605 | if (S_ISDIR(mode)) { |
| 518 | if (test_opt (sb, OLDALLOC)) | 606 | if (test_opt (sb, OLDALLOC)) |
| 519 | ret2 = find_group_dir(sb, dir, &group); | 607 | ret2 = find_group_dir(sb, dir, &group); |
| @@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 522 | } else | 610 | } else |
| 523 | ret2 = find_group_other(sb, dir, &group); | 611 | ret2 = find_group_other(sb, dir, &group); |
| 524 | 612 | ||
| 613 | got_group: | ||
| 525 | err = -ENOSPC; | 614 | err = -ENOSPC; |
| 526 | if (ret2 == -1) | 615 | if (ret2 == -1) |
| 527 | goto out; | 616 | goto out; |
| @@ -600,7 +689,7 @@ got: | |||
| 600 | /* We may have to initialize the block bitmap if it isn't already */ | 689 | /* We may have to initialize the block bitmap if it isn't already */ |
| 601 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && | 690 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && |
| 602 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 691 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 603 | struct buffer_head *block_bh = read_block_bitmap(sb, group); | 692 | struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group); |
| 604 | 693 | ||
| 605 | BUFFER_TRACE(block_bh, "get block bitmap access"); | 694 | BUFFER_TRACE(block_bh, "get block bitmap access"); |
| 606 | err = ext4_journal_get_write_access(handle, block_bh); | 695 | err = ext4_journal_get_write_access(handle, block_bh); |
| @@ -676,6 +765,13 @@ got: | |||
| 676 | percpu_counter_inc(&sbi->s_dirs_counter); | 765 | percpu_counter_inc(&sbi->s_dirs_counter); |
| 677 | sb->s_dirt = 1; | 766 | sb->s_dirt = 1; |
| 678 | 767 | ||
| 768 | if (sbi->s_log_groups_per_flex) { | ||
| 769 | flex_group = ext4_flex_group(sbi, group); | ||
| 770 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 771 | sbi->s_flex_groups[flex_group].free_inodes--; | ||
| 772 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 773 | } | ||
| 774 | |||
| 679 | inode->i_uid = current->fsuid; | 775 | inode->i_uid = current->fsuid; |
| 680 | if (test_opt (sb, GRPID)) | 776 | if (test_opt (sb, GRPID)) |
| 681 | inode->i_gid = dir->i_gid; | 777 | inode->i_gid = dir->i_gid; |
| @@ -740,14 +836,10 @@ got: | |||
| 740 | goto fail_free_drop; | 836 | goto fail_free_drop; |
| 741 | 837 | ||
| 742 | if (test_opt(sb, EXTENTS)) { | 838 | if (test_opt(sb, EXTENTS)) { |
| 743 | /* set extent flag only for diretory, file and normal symlink*/ | 839 | /* set extent flag only for directory, file and normal symlink*/ |
| 744 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { | 840 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { |
| 745 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; | 841 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; |
| 746 | ext4_ext_tree_init(handle, inode); | 842 | ext4_ext_tree_init(handle, inode); |
| 747 | err = ext4_update_incompat_feature(handle, sb, | ||
| 748 | EXT4_FEATURE_INCOMPAT_EXTENTS); | ||
| 749 | if (err) | ||
| 750 | goto fail_free_drop; | ||
| 751 | } | 843 | } |
| 752 | } | 844 | } |
| 753 | 845 | ||
| @@ -817,6 +909,14 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
| 817 | if (IS_ERR(inode)) | 909 | if (IS_ERR(inode)) |
| 818 | goto iget_failed; | 910 | goto iget_failed; |
| 819 | 911 | ||
| 912 | /* | ||
| 913 | * If the orphans has i_nlinks > 0 then it should be able to be | ||
| 914 | * truncated, otherwise it won't be removed from the orphan list | ||
| 915 | * during processing and an infinite loop will result. | ||
| 916 | */ | ||
| 917 | if (inode->i_nlink && !ext4_can_truncate(inode)) | ||
| 918 | goto bad_orphan; | ||
| 919 | |||
| 820 | if (NEXT_ORPHAN(inode) > max_ino) | 920 | if (NEXT_ORPHAN(inode) > max_ino) |
| 821 | goto bad_orphan; | 921 | goto bad_orphan; |
| 822 | brelse(bitmap_bh); | 922 | brelse(bitmap_bh); |
| @@ -838,6 +938,7 @@ bad_orphan: | |||
| 838 | printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", | 938 | printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", |
| 839 | NEXT_ORPHAN(inode)); | 939 | NEXT_ORPHAN(inode)); |
| 840 | printk(KERN_NOTICE "max_ino=%lu\n", max_ino); | 940 | printk(KERN_NOTICE "max_ino=%lu\n", max_ino); |
| 941 | printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); | ||
| 841 | /* Avoid freeing blocks if we got a bad deleted inode */ | 942 | /* Avoid freeing blocks if we got a bad deleted inode */ |
| 842 | if (inode->i_nlink == 0) | 943 | if (inode->i_nlink == 0) |
| 843 | inode->i_blocks = 0; | 944 | inode->i_blocks = 0; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8d9707746413..8ca2763df091 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -32,12 +32,23 @@ | |||
| 32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
| 33 | #include <linux/buffer_head.h> | 33 | #include <linux/buffer_head.h> |
| 34 | #include <linux/writeback.h> | 34 | #include <linux/writeback.h> |
| 35 | #include <linux/pagevec.h> | ||
| 35 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
| 36 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
| 37 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
| 38 | #include "ext4_jbd2.h" | 39 | #include "ext4_jbd2.h" |
| 39 | #include "xattr.h" | 40 | #include "xattr.h" |
| 40 | #include "acl.h" | 41 | #include "acl.h" |
| 42 | #include "ext4_extents.h" | ||
| 43 | |||
| 44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | ||
| 45 | loff_t new_size) | ||
| 46 | { | ||
| 47 | return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | ||
| 48 | new_size); | ||
| 49 | } | ||
| 50 | |||
| 51 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | ||
| 41 | 52 | ||
| 42 | /* | 53 | /* |
| 43 | * Test whether an inode is a fast symlink. | 54 | * Test whether an inode is a fast symlink. |
| @@ -181,6 +192,8 @@ void ext4_delete_inode (struct inode * inode) | |||
| 181 | { | 192 | { |
| 182 | handle_t *handle; | 193 | handle_t *handle; |
| 183 | 194 | ||
| 195 | if (ext4_should_order_data(inode)) | ||
| 196 | ext4_begin_ordered_truncate(inode, 0); | ||
| 184 | truncate_inode_pages(&inode->i_data, 0); | 197 | truncate_inode_pages(&inode->i_data, 0); |
| 185 | 198 | ||
| 186 | if (is_bad_inode(inode)) | 199 | if (is_bad_inode(inode)) |
| @@ -508,11 +521,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, | |||
| 508 | * direct blocks | 521 | * direct blocks |
| 509 | */ | 522 | */ |
| 510 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 523 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
| 511 | ext4_fsblk_t goal, int indirect_blks, int blks, | 524 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
| 512 | ext4_fsblk_t new_blocks[4], int *err) | 525 | int indirect_blks, int blks, |
| 526 | ext4_fsblk_t new_blocks[4], int *err) | ||
| 513 | { | 527 | { |
| 514 | int target, i; | 528 | int target, i; |
| 515 | unsigned long count = 0; | 529 | unsigned long count = 0, blk_allocated = 0; |
| 516 | int index = 0; | 530 | int index = 0; |
| 517 | ext4_fsblk_t current_block = 0; | 531 | ext4_fsblk_t current_block = 0; |
| 518 | int ret = 0; | 532 | int ret = 0; |
| @@ -525,12 +539,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 525 | * the first direct block of this branch. That's the | 539 | * the first direct block of this branch. That's the |
| 526 | * minimum number of blocks need to allocate(required) | 540 | * minimum number of blocks need to allocate(required) |
| 527 | */ | 541 | */ |
| 528 | target = blks + indirect_blks; | 542 | /* first we try to allocate the indirect blocks */ |
| 529 | 543 | target = indirect_blks; | |
| 530 | while (1) { | 544 | while (target > 0) { |
| 531 | count = target; | 545 | count = target; |
| 532 | /* allocating blocks for indirect blocks and direct blocks */ | 546 | /* allocating blocks for indirect blocks and direct blocks */ |
| 533 | current_block = ext4_new_blocks(handle,inode,goal,&count,err); | 547 | current_block = ext4_new_meta_blocks(handle, inode, |
| 548 | goal, &count, err); | ||
| 534 | if (*err) | 549 | if (*err) |
| 535 | goto failed_out; | 550 | goto failed_out; |
| 536 | 551 | ||
| @@ -540,16 +555,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 540 | new_blocks[index++] = current_block++; | 555 | new_blocks[index++] = current_block++; |
| 541 | count--; | 556 | count--; |
| 542 | } | 557 | } |
| 543 | 558 | if (count > 0) { | |
| 544 | if (count > 0) | 559 | /* |
| 560 | * save the new block number | ||
| 561 | * for the first direct block | ||
| 562 | */ | ||
| 563 | new_blocks[index] = current_block; | ||
| 564 | printk(KERN_INFO "%s returned more blocks than " | ||
| 565 | "requested\n", __func__); | ||
| 566 | WARN_ON(1); | ||
| 545 | break; | 567 | break; |
| 568 | } | ||
| 546 | } | 569 | } |
| 547 | 570 | ||
| 548 | /* save the new block number for the first direct block */ | 571 | target = blks - count ; |
| 549 | new_blocks[index] = current_block; | 572 | blk_allocated = count; |
| 550 | 573 | if (!target) | |
| 574 | goto allocated; | ||
| 575 | /* Now allocate data blocks */ | ||
| 576 | count = target; | ||
| 577 | /* allocating blocks for data blocks */ | ||
| 578 | current_block = ext4_new_blocks(handle, inode, iblock, | ||
| 579 | goal, &count, err); | ||
| 580 | if (*err && (target == blks)) { | ||
| 581 | /* | ||
| 582 | * if the allocation failed and we didn't allocate | ||
| 583 | * any blocks before | ||
| 584 | */ | ||
| 585 | goto failed_out; | ||
| 586 | } | ||
| 587 | if (!*err) { | ||
| 588 | if (target == blks) { | ||
| 589 | /* | ||
| 590 | * save the new block number | ||
| 591 | * for the first direct block | ||
| 592 | */ | ||
| 593 | new_blocks[index] = current_block; | ||
| 594 | } | ||
| 595 | blk_allocated += count; | ||
| 596 | } | ||
| 597 | allocated: | ||
| 551 | /* total number of blocks allocated for direct blocks */ | 598 | /* total number of blocks allocated for direct blocks */ |
| 552 | ret = count; | 599 | ret = blk_allocated; |
| 553 | *err = 0; | 600 | *err = 0; |
| 554 | return ret; | 601 | return ret; |
| 555 | failed_out: | 602 | failed_out: |
| @@ -584,8 +631,9 @@ failed_out: | |||
| 584 | * as described above and return 0. | 631 | * as described above and return 0. |
| 585 | */ | 632 | */ |
| 586 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 633 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
| 587 | int indirect_blks, int *blks, ext4_fsblk_t goal, | 634 | ext4_lblk_t iblock, int indirect_blks, |
| 588 | ext4_lblk_t *offsets, Indirect *branch) | 635 | int *blks, ext4_fsblk_t goal, |
| 636 | ext4_lblk_t *offsets, Indirect *branch) | ||
| 589 | { | 637 | { |
| 590 | int blocksize = inode->i_sb->s_blocksize; | 638 | int blocksize = inode->i_sb->s_blocksize; |
| 591 | int i, n = 0; | 639 | int i, n = 0; |
| @@ -595,7 +643,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 595 | ext4_fsblk_t new_blocks[4]; | 643 | ext4_fsblk_t new_blocks[4]; |
| 596 | ext4_fsblk_t current_block; | 644 | ext4_fsblk_t current_block; |
| 597 | 645 | ||
| 598 | num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, | 646 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, |
| 599 | *blks, new_blocks, &err); | 647 | *blks, new_blocks, &err); |
| 600 | if (err) | 648 | if (err) |
| 601 | return err; | 649 | return err; |
| @@ -799,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 799 | struct ext4_inode_info *ei = EXT4_I(inode); | 847 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 800 | int count = 0; | 848 | int count = 0; |
| 801 | ext4_fsblk_t first_block = 0; | 849 | ext4_fsblk_t first_block = 0; |
| 850 | loff_t disksize; | ||
| 802 | 851 | ||
| 803 | 852 | ||
| 804 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 853 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
| @@ -855,8 +904,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 855 | /* | 904 | /* |
| 856 | * Block out ext4_truncate while we alter the tree | 905 | * Block out ext4_truncate while we alter the tree |
| 857 | */ | 906 | */ |
| 858 | err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, | 907 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, |
| 859 | offsets + (partial - chain), partial); | 908 | &count, goal, |
| 909 | offsets + (partial - chain), partial); | ||
| 860 | 910 | ||
| 861 | /* | 911 | /* |
| 862 | * The ext4_splice_branch call will free and forget any buffers | 912 | * The ext4_splice_branch call will free and forget any buffers |
| @@ -873,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 873 | * protect it if you're about to implement concurrent | 923 | * protect it if you're about to implement concurrent |
| 874 | * ext4_get_block() -bzzz | 924 | * ext4_get_block() -bzzz |
| 875 | */ | 925 | */ |
| 876 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) | 926 | if (!err && extend_disksize) { |
| 877 | ei->i_disksize = inode->i_size; | 927 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; |
| 928 | if (disksize > i_size_read(inode)) | ||
| 929 | disksize = i_size_read(inode); | ||
| 930 | if (disksize > ei->i_disksize) | ||
| 931 | ei->i_disksize = disksize; | ||
| 932 | } | ||
| 878 | if (err) | 933 | if (err) |
| 879 | goto cleanup; | 934 | goto cleanup; |
| 880 | 935 | ||
| @@ -934,7 +989,7 @@ out: | |||
| 934 | */ | 989 | */ |
| 935 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 990 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, |
| 936 | unsigned long max_blocks, struct buffer_head *bh, | 991 | unsigned long max_blocks, struct buffer_head *bh, |
| 937 | int create, int extend_disksize) | 992 | int create, int extend_disksize, int flag) |
| 938 | { | 993 | { |
| 939 | int retval; | 994 | int retval; |
| 940 | 995 | ||
| @@ -975,6 +1030,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 975 | * with create == 1 flag. | 1030 | * with create == 1 flag. |
| 976 | */ | 1031 | */ |
| 977 | down_write((&EXT4_I(inode)->i_data_sem)); | 1032 | down_write((&EXT4_I(inode)->i_data_sem)); |
| 1033 | |||
| 1034 | /* | ||
| 1035 | * if the caller is from delayed allocation writeout path | ||
| 1036 | * we have already reserved fs blocks for allocation | ||
| 1037 | * let the underlying get_block() function know to | ||
| 1038 | * avoid double accounting | ||
| 1039 | */ | ||
| 1040 | if (flag) | ||
| 1041 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | ||
| 978 | /* | 1042 | /* |
| 979 | * We need to check for EXT4 here because migrate | 1043 | * We need to check for EXT4 here because migrate |
| 980 | * could have changed the inode type in between | 1044 | * could have changed the inode type in between |
| @@ -996,6 +1060,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 996 | ~EXT4_EXT_MIGRATE; | 1060 | ~EXT4_EXT_MIGRATE; |
| 997 | } | 1061 | } |
| 998 | } | 1062 | } |
| 1063 | |||
| 1064 | if (flag) { | ||
| 1065 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | ||
| 1066 | /* | ||
| 1067 | * Update reserved blocks/metadata blocks | ||
| 1068 | * after successful block allocation | ||
| 1069 | * which were deferred till now | ||
| 1070 | */ | ||
| 1071 | if ((retval > 0) && buffer_delay(bh)) | ||
| 1072 | ext4_da_release_space(inode, retval, 0); | ||
| 1073 | } | ||
| 1074 | |||
| 999 | up_write((&EXT4_I(inode)->i_data_sem)); | 1075 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 1000 | return retval; | 1076 | return retval; |
| 1001 | } | 1077 | } |
| @@ -1021,7 +1097,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, | |||
| 1021 | } | 1097 | } |
| 1022 | 1098 | ||
| 1023 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1099 | ret = ext4_get_blocks_wrap(handle, inode, iblock, |
| 1024 | max_blocks, bh_result, create, 0); | 1100 | max_blocks, bh_result, create, 0, 0); |
| 1025 | if (ret > 0) { | 1101 | if (ret > 0) { |
| 1026 | bh_result->b_size = (ret << inode->i_blkbits); | 1102 | bh_result->b_size = (ret << inode->i_blkbits); |
| 1027 | ret = 0; | 1103 | ret = 0; |
| @@ -1047,7 +1123,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
| 1047 | dummy.b_blocknr = -1000; | 1123 | dummy.b_blocknr = -1000; |
| 1048 | buffer_trace_init(&dummy.b_history); | 1124 | buffer_trace_init(&dummy.b_history); |
| 1049 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1125 | err = ext4_get_blocks_wrap(handle, inode, block, 1, |
| 1050 | &dummy, create, 1); | 1126 | &dummy, create, 1, 0); |
| 1051 | /* | 1127 | /* |
| 1052 | * ext4_get_blocks_handle() returns number of blocks | 1128 | * ext4_get_blocks_handle() returns number of blocks |
| 1053 | * mapped. 0 in case of a HOLE. | 1129 | * mapped. 0 in case of a HOLE. |
| @@ -1203,19 +1279,20 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
| 1203 | to = from + len; | 1279 | to = from + len; |
| 1204 | 1280 | ||
| 1205 | retry: | 1281 | retry: |
| 1206 | page = __grab_cache_page(mapping, index); | ||
| 1207 | if (!page) | ||
| 1208 | return -ENOMEM; | ||
| 1209 | *pagep = page; | ||
| 1210 | |||
| 1211 | handle = ext4_journal_start(inode, needed_blocks); | 1282 | handle = ext4_journal_start(inode, needed_blocks); |
| 1212 | if (IS_ERR(handle)) { | 1283 | if (IS_ERR(handle)) { |
| 1213 | unlock_page(page); | ||
| 1214 | page_cache_release(page); | ||
| 1215 | ret = PTR_ERR(handle); | 1284 | ret = PTR_ERR(handle); |
| 1216 | goto out; | 1285 | goto out; |
| 1217 | } | 1286 | } |
| 1218 | 1287 | ||
| 1288 | page = __grab_cache_page(mapping, index); | ||
| 1289 | if (!page) { | ||
| 1290 | ext4_journal_stop(handle); | ||
| 1291 | ret = -ENOMEM; | ||
| 1292 | goto out; | ||
| 1293 | } | ||
| 1294 | *pagep = page; | ||
| 1295 | |||
| 1219 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1296 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
| 1220 | ext4_get_block); | 1297 | ext4_get_block); |
| 1221 | 1298 | ||
| @@ -1225,8 +1302,8 @@ retry: | |||
| 1225 | } | 1302 | } |
| 1226 | 1303 | ||
| 1227 | if (ret) { | 1304 | if (ret) { |
| 1228 | ext4_journal_stop(handle); | ||
| 1229 | unlock_page(page); | 1305 | unlock_page(page); |
| 1306 | ext4_journal_stop(handle); | ||
| 1230 | page_cache_release(page); | 1307 | page_cache_release(page); |
| 1231 | } | 1308 | } |
| 1232 | 1309 | ||
| @@ -1236,15 +1313,6 @@ out: | |||
| 1236 | return ret; | 1313 | return ret; |
| 1237 | } | 1314 | } |
| 1238 | 1315 | ||
| 1239 | int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
| 1240 | { | ||
| 1241 | int err = jbd2_journal_dirty_data(handle, bh); | ||
| 1242 | if (err) | ||
| 1243 | ext4_journal_abort_handle(__func__, __func__, | ||
| 1244 | bh, handle, err); | ||
| 1245 | return err; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* For write_end() in data=journal mode */ | 1316 | /* For write_end() in data=journal mode */ |
| 1249 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1317 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
| 1250 | { | 1318 | { |
| @@ -1255,29 +1323,6 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
| 1255 | } | 1323 | } |
| 1256 | 1324 | ||
| 1257 | /* | 1325 | /* |
| 1258 | * Generic write_end handler for ordered and writeback ext4 journal modes. | ||
| 1259 | * We can't use generic_write_end, because that unlocks the page and we need to | ||
| 1260 | * unlock the page after ext4_journal_stop, but ext4_journal_stop must run | ||
| 1261 | * after block_write_end. | ||
| 1262 | */ | ||
| 1263 | static int ext4_generic_write_end(struct file *file, | ||
| 1264 | struct address_space *mapping, | ||
| 1265 | loff_t pos, unsigned len, unsigned copied, | ||
| 1266 | struct page *page, void *fsdata) | ||
| 1267 | { | ||
| 1268 | struct inode *inode = file->f_mapping->host; | ||
| 1269 | |||
| 1270 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
| 1271 | |||
| 1272 | if (pos+copied > inode->i_size) { | ||
| 1273 | i_size_write(inode, pos+copied); | ||
| 1274 | mark_inode_dirty(inode); | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | return copied; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /* | ||
| 1281 | * We need to pick up the new inode size which generic_commit_write gave us | 1326 | * We need to pick up the new inode size which generic_commit_write gave us |
| 1282 | * `file' can be NULL - eg, when called from page_symlink(). | 1327 | * `file' can be NULL - eg, when called from page_symlink(). |
| 1283 | * | 1328 | * |
| @@ -1290,15 +1335,14 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1290 | struct page *page, void *fsdata) | 1335 | struct page *page, void *fsdata) |
| 1291 | { | 1336 | { |
| 1292 | handle_t *handle = ext4_journal_current_handle(); | 1337 | handle_t *handle = ext4_journal_current_handle(); |
| 1293 | struct inode *inode = file->f_mapping->host; | 1338 | struct inode *inode = mapping->host; |
| 1294 | unsigned from, to; | 1339 | unsigned from, to; |
| 1295 | int ret = 0, ret2; | 1340 | int ret = 0, ret2; |
| 1296 | 1341 | ||
| 1297 | from = pos & (PAGE_CACHE_SIZE - 1); | 1342 | from = pos & (PAGE_CACHE_SIZE - 1); |
| 1298 | to = from + len; | 1343 | to = from + len; |
| 1299 | 1344 | ||
| 1300 | ret = walk_page_buffers(handle, page_buffers(page), | 1345 | ret = ext4_jbd2_file_inode(handle, inode); |
| 1301 | from, to, NULL, ext4_journal_dirty_data); | ||
| 1302 | 1346 | ||
| 1303 | if (ret == 0) { | 1347 | if (ret == 0) { |
| 1304 | /* | 1348 | /* |
| @@ -1311,7 +1355,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1311 | new_i_size = pos + copied; | 1355 | new_i_size = pos + copied; |
| 1312 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1356 | if (new_i_size > EXT4_I(inode)->i_disksize) |
| 1313 | EXT4_I(inode)->i_disksize = new_i_size; | 1357 | EXT4_I(inode)->i_disksize = new_i_size; |
| 1314 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1358 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1315 | page, fsdata); | 1359 | page, fsdata); |
| 1316 | copied = ret2; | 1360 | copied = ret2; |
| 1317 | if (ret2 < 0) | 1361 | if (ret2 < 0) |
| @@ -1320,8 +1364,6 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1320 | ret2 = ext4_journal_stop(handle); | 1364 | ret2 = ext4_journal_stop(handle); |
| 1321 | if (!ret) | 1365 | if (!ret) |
| 1322 | ret = ret2; | 1366 | ret = ret2; |
| 1323 | unlock_page(page); | ||
| 1324 | page_cache_release(page); | ||
| 1325 | 1367 | ||
| 1326 | return ret ? ret : copied; | 1368 | return ret ? ret : copied; |
| 1327 | } | 1369 | } |
| @@ -1332,7 +1374,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1332 | struct page *page, void *fsdata) | 1374 | struct page *page, void *fsdata) |
| 1333 | { | 1375 | { |
| 1334 | handle_t *handle = ext4_journal_current_handle(); | 1376 | handle_t *handle = ext4_journal_current_handle(); |
| 1335 | struct inode *inode = file->f_mapping->host; | 1377 | struct inode *inode = mapping->host; |
| 1336 | int ret = 0, ret2; | 1378 | int ret = 0, ret2; |
| 1337 | loff_t new_i_size; | 1379 | loff_t new_i_size; |
| 1338 | 1380 | ||
| @@ -1340,7 +1382,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1340 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1382 | if (new_i_size > EXT4_I(inode)->i_disksize) |
| 1341 | EXT4_I(inode)->i_disksize = new_i_size; | 1383 | EXT4_I(inode)->i_disksize = new_i_size; |
| 1342 | 1384 | ||
| 1343 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1385 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1344 | page, fsdata); | 1386 | page, fsdata); |
| 1345 | copied = ret2; | 1387 | copied = ret2; |
| 1346 | if (ret2 < 0) | 1388 | if (ret2 < 0) |
| @@ -1349,8 +1391,6 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1349 | ret2 = ext4_journal_stop(handle); | 1391 | ret2 = ext4_journal_stop(handle); |
| 1350 | if (!ret) | 1392 | if (!ret) |
| 1351 | ret = ret2; | 1393 | ret = ret2; |
| 1352 | unlock_page(page); | ||
| 1353 | page_cache_release(page); | ||
| 1354 | 1394 | ||
| 1355 | return ret ? ret : copied; | 1395 | return ret ? ret : copied; |
| 1356 | } | 1396 | } |
| @@ -1389,14 +1429,965 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1389 | ret = ret2; | 1429 | ret = ret2; |
| 1390 | } | 1430 | } |
| 1391 | 1431 | ||
| 1432 | unlock_page(page); | ||
| 1392 | ret2 = ext4_journal_stop(handle); | 1433 | ret2 = ext4_journal_stop(handle); |
| 1393 | if (!ret) | 1434 | if (!ret) |
| 1394 | ret = ret2; | 1435 | ret = ret2; |
| 1395 | unlock_page(page); | ||
| 1396 | page_cache_release(page); | 1436 | page_cache_release(page); |
| 1397 | 1437 | ||
| 1398 | return ret ? ret : copied; | 1438 | return ret ? ret : copied; |
| 1399 | } | 1439 | } |
| 1440 | /* | ||
| 1441 | * Calculate the number of metadata blocks need to reserve | ||
| 1442 | * to allocate @blocks for non extent file based file | ||
| 1443 | */ | ||
| 1444 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 1445 | { | ||
| 1446 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
| 1447 | int ind_blks, dind_blks, tind_blks; | ||
| 1448 | |||
| 1449 | /* number of new indirect blocks needed */ | ||
| 1450 | ind_blks = (blocks + icap - 1) / icap; | ||
| 1451 | |||
| 1452 | dind_blks = (ind_blks + icap - 1) / icap; | ||
| 1453 | |||
| 1454 | tind_blks = 1; | ||
| 1455 | |||
| 1456 | return ind_blks + dind_blks + tind_blks; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /* | ||
| 1460 | * Calculate the number of metadata blocks need to reserve | ||
| 1461 | * to allocate given number of blocks | ||
| 1462 | */ | ||
| 1463 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 1464 | { | ||
| 1465 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | ||
| 1466 | return ext4_ext_calc_metadata_amount(inode, blocks); | ||
| 1467 | |||
| 1468 | return ext4_indirect_calc_metadata_amount(inode, blocks); | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | ||
| 1472 | { | ||
| 1473 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1474 | unsigned long md_needed, mdblocks, total = 0; | ||
| 1475 | |||
| 1476 | /* | ||
| 1477 | * recalculate the amount of metadata blocks to reserve | ||
| 1478 | * in order to allocate nrblocks | ||
| 1479 | * worse case is one extent per block | ||
| 1480 | */ | ||
| 1481 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1482 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | ||
| 1483 | mdblocks = ext4_calc_metadata_amount(inode, total); | ||
| 1484 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1485 | |||
| 1486 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
| 1487 | total = md_needed + nrblocks; | ||
| 1488 | |||
| 1489 | if (ext4_has_free_blocks(sbi, total) < total) { | ||
| 1490 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1491 | return -ENOSPC; | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | /* reduce fs free blocks counter */ | ||
| 1495 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
| 1496 | |||
| 1497 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | ||
| 1498 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | ||
| 1499 | |||
| 1500 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1501 | return 0; /* success */ | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | void ext4_da_release_space(struct inode *inode, int used, int to_free) | ||
| 1505 | { | ||
| 1506 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1507 | int total, mdb, mdb_free, release; | ||
| 1508 | |||
| 1509 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1510 | /* recalculate the number of metablocks still need to be reserved */ | ||
| 1511 | total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; | ||
| 1512 | mdb = ext4_calc_metadata_amount(inode, total); | ||
| 1513 | |||
| 1514 | /* figure out how many metablocks to release */ | ||
| 1515 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1516 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | ||
| 1517 | |||
| 1518 | /* Account for allocated meta_blocks */ | ||
| 1519 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
| 1520 | |||
| 1521 | release = to_free + mdb_free; | ||
| 1522 | |||
| 1523 | /* update fs free blocks counter for truncate case */ | ||
| 1524 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | ||
| 1525 | |||
| 1526 | /* update per-inode reservations */ | ||
| 1527 | BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); | ||
| 1528 | EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); | ||
| 1529 | |||
| 1530 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1531 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1532 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
| 1533 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | static void ext4_da_page_release_reservation(struct page *page, | ||
| 1537 | unsigned long offset) | ||
| 1538 | { | ||
| 1539 | int to_release = 0; | ||
| 1540 | struct buffer_head *head, *bh; | ||
| 1541 | unsigned int curr_off = 0; | ||
| 1542 | |||
| 1543 | head = page_buffers(page); | ||
| 1544 | bh = head; | ||
| 1545 | do { | ||
| 1546 | unsigned int next_off = curr_off + bh->b_size; | ||
| 1547 | |||
| 1548 | if ((offset <= curr_off) && (buffer_delay(bh))) { | ||
| 1549 | to_release++; | ||
| 1550 | clear_buffer_delay(bh); | ||
| 1551 | } | ||
| 1552 | curr_off = next_off; | ||
| 1553 | } while ((bh = bh->b_this_page) != head); | ||
| 1554 | ext4_da_release_space(page->mapping->host, 0, to_release); | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | /* | ||
| 1558 | * Delayed allocation stuff | ||
| 1559 | */ | ||
| 1560 | |||
| 1561 | struct mpage_da_data { | ||
| 1562 | struct inode *inode; | ||
| 1563 | struct buffer_head lbh; /* extent of blocks */ | ||
| 1564 | unsigned long first_page, next_page; /* extent of pages */ | ||
| 1565 | get_block_t *get_block; | ||
| 1566 | struct writeback_control *wbc; | ||
| 1567 | }; | ||
| 1568 | |||
| 1569 | /* | ||
| 1570 | * mpage_da_submit_io - walks through extent of pages and try to write | ||
| 1571 | * them with __mpage_writepage() | ||
| 1572 | * | ||
| 1573 | * @mpd->inode: inode | ||
| 1574 | * @mpd->first_page: first page of the extent | ||
| 1575 | * @mpd->next_page: page after the last page of the extent | ||
| 1576 | * @mpd->get_block: the filesystem's block mapper function | ||
| 1577 | * | ||
| 1578 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
| 1579 | * to be allocated. this may be wrong if allocation failed. | ||
| 1580 | * | ||
| 1581 | * As pages are already locked by write_cache_pages(), we can't use it | ||
| 1582 | */ | ||
| 1583 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | ||
| 1584 | { | ||
| 1585 | struct address_space *mapping = mpd->inode->i_mapping; | ||
| 1586 | struct mpage_data mpd_pp = { | ||
| 1587 | .bio = NULL, | ||
| 1588 | .last_block_in_bio = 0, | ||
| 1589 | .get_block = mpd->get_block, | ||
| 1590 | .use_writepage = 1, | ||
| 1591 | }; | ||
| 1592 | int ret = 0, err, nr_pages, i; | ||
| 1593 | unsigned long index, end; | ||
| 1594 | struct pagevec pvec; | ||
| 1595 | |||
| 1596 | BUG_ON(mpd->next_page <= mpd->first_page); | ||
| 1597 | |||
| 1598 | pagevec_init(&pvec, 0); | ||
| 1599 | index = mpd->first_page; | ||
| 1600 | end = mpd->next_page - 1; | ||
| 1601 | |||
| 1602 | while (index <= end) { | ||
| 1603 | /* XXX: optimize tail */ | ||
| 1604 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1605 | if (nr_pages == 0) | ||
| 1606 | break; | ||
| 1607 | for (i = 0; i < nr_pages; i++) { | ||
| 1608 | struct page *page = pvec.pages[i]; | ||
| 1609 | |||
| 1610 | index = page->index; | ||
| 1611 | if (index > end) | ||
| 1612 | break; | ||
| 1613 | index++; | ||
| 1614 | |||
| 1615 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | ||
| 1616 | |||
| 1617 | /* | ||
| 1618 | * In error case, we have to continue because | ||
| 1619 | * remaining pages are still locked | ||
| 1620 | * XXX: unlock and re-dirty them? | ||
| 1621 | */ | ||
| 1622 | if (ret == 0) | ||
| 1623 | ret = err; | ||
| 1624 | } | ||
| 1625 | pagevec_release(&pvec); | ||
| 1626 | } | ||
| 1627 | if (mpd_pp.bio) | ||
| 1628 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
| 1629 | |||
| 1630 | return ret; | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | /* | ||
| 1634 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||
| 1635 | * | ||
| 1636 | * @mpd->inode - inode to walk through | ||
| 1637 | * @exbh->b_blocknr - first block on a disk | ||
| 1638 | * @exbh->b_size - amount of space in bytes | ||
| 1639 | * @logical - first logical block to start assignment with | ||
| 1640 | * | ||
| 1641 | * the function goes through all passed space and put actual disk | ||
| 1642 | * block numbers into buffer heads, dropping BH_Delay | ||
| 1643 | */ | ||
| 1644 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | ||
| 1645 | struct buffer_head *exbh) | ||
| 1646 | { | ||
| 1647 | struct inode *inode = mpd->inode; | ||
| 1648 | struct address_space *mapping = inode->i_mapping; | ||
| 1649 | int blocks = exbh->b_size >> inode->i_blkbits; | ||
| 1650 | sector_t pblock = exbh->b_blocknr, cur_logical; | ||
| 1651 | struct buffer_head *head, *bh; | ||
| 1652 | unsigned long index, end; | ||
| 1653 | struct pagevec pvec; | ||
| 1654 | int nr_pages, i; | ||
| 1655 | |||
| 1656 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1657 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1658 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1659 | |||
| 1660 | pagevec_init(&pvec, 0); | ||
| 1661 | |||
| 1662 | while (index <= end) { | ||
| 1663 | /* XXX: optimize tail */ | ||
| 1664 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1665 | if (nr_pages == 0) | ||
| 1666 | break; | ||
| 1667 | for (i = 0; i < nr_pages; i++) { | ||
| 1668 | struct page *page = pvec.pages[i]; | ||
| 1669 | |||
| 1670 | index = page->index; | ||
| 1671 | if (index > end) | ||
| 1672 | break; | ||
| 1673 | index++; | ||
| 1674 | |||
| 1675 | BUG_ON(!PageLocked(page)); | ||
| 1676 | BUG_ON(PageWriteback(page)); | ||
| 1677 | BUG_ON(!page_has_buffers(page)); | ||
| 1678 | |||
| 1679 | bh = page_buffers(page); | ||
| 1680 | head = bh; | ||
| 1681 | |||
| 1682 | /* skip blocks out of the range */ | ||
| 1683 | do { | ||
| 1684 | if (cur_logical >= logical) | ||
| 1685 | break; | ||
| 1686 | cur_logical++; | ||
| 1687 | } while ((bh = bh->b_this_page) != head); | ||
| 1688 | |||
| 1689 | do { | ||
| 1690 | if (cur_logical >= logical + blocks) | ||
| 1691 | break; | ||
| 1692 | if (buffer_delay(bh)) { | ||
| 1693 | bh->b_blocknr = pblock; | ||
| 1694 | clear_buffer_delay(bh); | ||
| 1695 | } else if (buffer_mapped(bh)) | ||
| 1696 | BUG_ON(bh->b_blocknr != pblock); | ||
| 1697 | |||
| 1698 | cur_logical++; | ||
| 1699 | pblock++; | ||
| 1700 | } while ((bh = bh->b_this_page) != head); | ||
| 1701 | } | ||
| 1702 | pagevec_release(&pvec); | ||
| 1703 | } | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | |||
| 1707 | /* | ||
| 1708 | * __unmap_underlying_blocks - just a helper function to unmap | ||
| 1709 | * set of blocks described by @bh | ||
| 1710 | */ | ||
| 1711 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
| 1712 | struct buffer_head *bh) | ||
| 1713 | { | ||
| 1714 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
| 1715 | int blocks, i; | ||
| 1716 | |||
| 1717 | blocks = bh->b_size >> inode->i_blkbits; | ||
| 1718 | for (i = 0; i < blocks; i++) | ||
| 1719 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | /* | ||
| 1723 | * mpage_da_map_blocks - go through given space | ||
| 1724 | * | ||
| 1725 | * @mpd->lbh - bh describing space | ||
| 1726 | * @mpd->get_block - the filesystem's block mapper function | ||
| 1727 | * | ||
| 1728 | * The function skips space we know is already mapped to disk blocks. | ||
| 1729 | * | ||
| 1730 | * The function ignores errors ->get_block() returns, thus real | ||
| 1731 | * error handling is postponed to __mpage_writepage() | ||
| 1732 | */ | ||
| 1733 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | ||
| 1734 | { | ||
| 1735 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1736 | int err = 0, remain = lbh->b_size; | ||
| 1737 | sector_t next = lbh->b_blocknr; | ||
| 1738 | struct buffer_head new; | ||
| 1739 | |||
| 1740 | /* | ||
| 1741 | * We consider only non-mapped and non-allocated blocks | ||
| 1742 | */ | ||
| 1743 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | ||
| 1744 | return; | ||
| 1745 | |||
| 1746 | while (remain) { | ||
| 1747 | new.b_state = lbh->b_state; | ||
| 1748 | new.b_blocknr = 0; | ||
| 1749 | new.b_size = remain; | ||
| 1750 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
| 1751 | if (err) { | ||
| 1752 | /* | ||
| 1753 | * Rather than implement own error handling | ||
| 1754 | * here, we just leave remaining blocks | ||
| 1755 | * unallocated and try again with ->writepage() | ||
| 1756 | */ | ||
| 1757 | break; | ||
| 1758 | } | ||
| 1759 | BUG_ON(new.b_size == 0); | ||
| 1760 | |||
| 1761 | if (buffer_new(&new)) | ||
| 1762 | __unmap_underlying_blocks(mpd->inode, &new); | ||
| 1763 | |||
| 1764 | /* | ||
| 1765 | * If blocks are delayed marked, we need to | ||
| 1766 | * put actual blocknr and drop delayed bit | ||
| 1767 | */ | ||
| 1768 | if (buffer_delay(lbh)) | ||
| 1769 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
| 1770 | |||
| 1771 | /* go for the remaining blocks */ | ||
| 1772 | next += new.b_size >> mpd->inode->i_blkbits; | ||
| 1773 | remain -= new.b_size; | ||
| 1774 | } | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | ||
| 1778 | |||
| 1779 | /* | ||
| 1780 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
| 1781 | * | ||
| 1782 | * @mpd->lbh - extent of blocks | ||
| 1783 | * @logical - logical number of the block in the file | ||
| 1784 | * @bh - bh of the block (used to access block's state) | ||
| 1785 | * | ||
| 1786 | * the function is used to collect contig. blocks in same state | ||
| 1787 | */ | ||
| 1788 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | ||
| 1789 | sector_t logical, struct buffer_head *bh) | ||
| 1790 | { | ||
| 1791 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1792 | sector_t next; | ||
| 1793 | |||
| 1794 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | ||
| 1795 | |||
| 1796 | /* | ||
| 1797 | * First block in the extent | ||
| 1798 | */ | ||
| 1799 | if (lbh->b_size == 0) { | ||
| 1800 | lbh->b_blocknr = logical; | ||
| 1801 | lbh->b_size = bh->b_size; | ||
| 1802 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1803 | return; | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | /* | ||
| 1807 | * Can we merge the block to our big extent? | ||
| 1808 | */ | ||
| 1809 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | ||
| 1810 | lbh->b_size += bh->b_size; | ||
| 1811 | return; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | /* | ||
| 1815 | * We couldn't merge the block to our extent, so we | ||
| 1816 | * need to flush current extent and start new one | ||
| 1817 | */ | ||
| 1818 | mpage_da_map_blocks(mpd); | ||
| 1819 | |||
| 1820 | /* | ||
| 1821 | * Now start a new extent | ||
| 1822 | */ | ||
| 1823 | lbh->b_size = bh->b_size; | ||
| 1824 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1825 | lbh->b_blocknr = logical; | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | /* | ||
| 1829 | * __mpage_da_writepage - finds extent of pages and blocks | ||
| 1830 | * | ||
| 1831 | * @page: page to consider | ||
| 1832 | * @wbc: not used, we just follow rules | ||
| 1833 | * @data: context | ||
| 1834 | * | ||
| 1835 | * The function finds extents of pages and scan them for all blocks. | ||
| 1836 | */ | ||
| 1837 | static int __mpage_da_writepage(struct page *page, | ||
| 1838 | struct writeback_control *wbc, void *data) | ||
| 1839 | { | ||
| 1840 | struct mpage_da_data *mpd = data; | ||
| 1841 | struct inode *inode = mpd->inode; | ||
| 1842 | struct buffer_head *bh, *head, fake; | ||
| 1843 | sector_t logical; | ||
| 1844 | |||
| 1845 | /* | ||
| 1846 | * Can we merge this page to current extent? | ||
| 1847 | */ | ||
| 1848 | if (mpd->next_page != page->index) { | ||
| 1849 | /* | ||
| 1850 | * Nope, we can't. So, we map non-allocated blocks | ||
| 1851 | * and start IO on them using __mpage_writepage() | ||
| 1852 | */ | ||
| 1853 | if (mpd->next_page != mpd->first_page) { | ||
| 1854 | mpage_da_map_blocks(mpd); | ||
| 1855 | mpage_da_submit_io(mpd); | ||
| 1856 | } | ||
| 1857 | |||
| 1858 | /* | ||
| 1859 | * Start next extent of pages ... | ||
| 1860 | */ | ||
| 1861 | mpd->first_page = page->index; | ||
| 1862 | |||
| 1863 | /* | ||
| 1864 | * ... and blocks | ||
| 1865 | */ | ||
| 1866 | mpd->lbh.b_size = 0; | ||
| 1867 | mpd->lbh.b_state = 0; | ||
| 1868 | mpd->lbh.b_blocknr = 0; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | mpd->next_page = page->index + 1; | ||
| 1872 | logical = (sector_t) page->index << | ||
| 1873 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1874 | |||
| 1875 | if (!page_has_buffers(page)) { | ||
| 1876 | /* | ||
| 1877 | * There is no attached buffer heads yet (mmap?) | ||
| 1878 | * we treat the page asfull of dirty blocks | ||
| 1879 | */ | ||
| 1880 | bh = &fake; | ||
| 1881 | bh->b_size = PAGE_CACHE_SIZE; | ||
| 1882 | bh->b_state = 0; | ||
| 1883 | set_buffer_dirty(bh); | ||
| 1884 | set_buffer_uptodate(bh); | ||
| 1885 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
| 1886 | } else { | ||
| 1887 | /* | ||
| 1888 | * Page with regular buffer heads, just add all dirty ones | ||
| 1889 | */ | ||
| 1890 | head = page_buffers(page); | ||
| 1891 | bh = head; | ||
| 1892 | do { | ||
| 1893 | BUG_ON(buffer_locked(bh)); | ||
| 1894 | if (buffer_dirty(bh)) | ||
| 1895 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
| 1896 | logical++; | ||
| 1897 | } while ((bh = bh->b_this_page) != head); | ||
| 1898 | } | ||
| 1899 | |||
| 1900 | return 0; | ||
| 1901 | } | ||
| 1902 | |||
| 1903 | /* | ||
| 1904 | * mpage_da_writepages - walk the list of dirty pages of the given | ||
| 1905 | * address space, allocates non-allocated blocks, maps newly-allocated | ||
| 1906 | * blocks to existing bhs and issue IO them | ||
| 1907 | * | ||
| 1908 | * @mapping: address space structure to write | ||
| 1909 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
| 1910 | * @get_block: the filesystem's block mapper function. | ||
| 1911 | * | ||
| 1912 | * This is a library function, which implements the writepages() | ||
| 1913 | * address_space_operation. | ||
| 1914 | * | ||
| 1915 | * In order to avoid duplication of logic that deals with partial pages, | ||
| 1916 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
| 1917 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
| 1918 | * | ||
| 1919 | * It's important that we call __mpage_writepage() only once for each | ||
| 1920 | * involved page, otherwise we'd have to implement more complicated logic | ||
| 1921 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
| 1922 | * | ||
| 1923 | * See comments to mpage_writepages() | ||
| 1924 | */ | ||
| 1925 | static int mpage_da_writepages(struct address_space *mapping, | ||
| 1926 | struct writeback_control *wbc, | ||
| 1927 | get_block_t get_block) | ||
| 1928 | { | ||
| 1929 | struct mpage_da_data mpd; | ||
| 1930 | int ret; | ||
| 1931 | |||
| 1932 | if (!get_block) | ||
| 1933 | return generic_writepages(mapping, wbc); | ||
| 1934 | |||
| 1935 | mpd.wbc = wbc; | ||
| 1936 | mpd.inode = mapping->host; | ||
| 1937 | mpd.lbh.b_size = 0; | ||
| 1938 | mpd.lbh.b_state = 0; | ||
| 1939 | mpd.lbh.b_blocknr = 0; | ||
| 1940 | mpd.first_page = 0; | ||
| 1941 | mpd.next_page = 0; | ||
| 1942 | mpd.get_block = get_block; | ||
| 1943 | |||
| 1944 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||
| 1945 | |||
| 1946 | /* | ||
| 1947 | * Handle last extent of pages | ||
| 1948 | */ | ||
| 1949 | if (mpd.next_page != mpd.first_page) { | ||
| 1950 | mpage_da_map_blocks(&mpd); | ||
| 1951 | mpage_da_submit_io(&mpd); | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | return ret; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | /* | ||
| 1958 | * this is a special callback for ->write_begin() only | ||
| 1959 | * it's intention is to return mapped block or reserve space | ||
| 1960 | */ | ||
| 1961 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | ||
| 1962 | struct buffer_head *bh_result, int create) | ||
| 1963 | { | ||
| 1964 | int ret = 0; | ||
| 1965 | |||
| 1966 | BUG_ON(create == 0); | ||
| 1967 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
| 1968 | |||
| 1969 | /* | ||
| 1970 | * first, we need to know whether the block is allocated already | ||
| 1971 | * preallocated blocks are unmapped but should treated | ||
| 1972 | * the same as allocated blocks. | ||
| 1973 | */ | ||
| 1974 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | ||
| 1975 | if ((ret == 0) && !buffer_delay(bh_result)) { | ||
| 1976 | /* the block isn't (pre)allocated yet, let's reserve space */ | ||
| 1977 | /* | ||
| 1978 | * XXX: __block_prepare_write() unmaps passed block, | ||
| 1979 | * is it OK? | ||
| 1980 | */ | ||
| 1981 | ret = ext4_da_reserve_space(inode, 1); | ||
| 1982 | if (ret) | ||
| 1983 | /* not enough space to reserve */ | ||
| 1984 | return ret; | ||
| 1985 | |||
| 1986 | map_bh(bh_result, inode->i_sb, 0); | ||
| 1987 | set_buffer_new(bh_result); | ||
| 1988 | set_buffer_delay(bh_result); | ||
| 1989 | } else if (ret > 0) { | ||
| 1990 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 1991 | ret = 0; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | return ret; | ||
| 1995 | } | ||
| 1996 | #define EXT4_DELALLOC_RSVED 1 | ||
| 1997 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
| 1998 | struct buffer_head *bh_result, int create) | ||
| 1999 | { | ||
| 2000 | int ret; | ||
| 2001 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
| 2002 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
| 2003 | handle_t *handle = NULL; | ||
| 2004 | |||
| 2005 | handle = ext4_journal_current_handle(); | ||
| 2006 | if (!handle) { | ||
| 2007 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2008 | bh_result, 0, 0, 0); | ||
| 2009 | BUG_ON(!ret); | ||
| 2010 | } else { | ||
| 2011 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2012 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | if (ret > 0) { | ||
| 2016 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 2017 | |||
| 2018 | /* | ||
| 2019 | * Update on-disk size along with block allocation | ||
| 2020 | * we don't use 'extend_disksize' as size may change | ||
| 2021 | * within already allocated block -bzzz | ||
| 2022 | */ | ||
| 2023 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
| 2024 | if (disksize > i_size_read(inode)) | ||
| 2025 | disksize = i_size_read(inode); | ||
| 2026 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
| 2027 | /* | ||
| 2028 | * XXX: replace with spinlock if seen contended -bzzz | ||
| 2029 | */ | ||
| 2030 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2031 | if (disksize > EXT4_I(inode)->i_disksize) | ||
| 2032 | EXT4_I(inode)->i_disksize = disksize; | ||
| 2033 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2034 | |||
| 2035 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
| 2036 | ret = ext4_mark_inode_dirty(handle, inode); | ||
| 2037 | return ret; | ||
| 2038 | } | ||
| 2039 | } | ||
| 2040 | ret = 0; | ||
| 2041 | } | ||
| 2042 | return ret; | ||
| 2043 | } | ||
| 2044 | |||
| 2045 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
| 2046 | { | ||
| 2047 | /* | ||
| 2048 | * unmapped buffer is possible for holes. | ||
| 2049 | * delay buffer is possible with delayed allocation | ||
| 2050 | */ | ||
| 2051 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | ||
| 2052 | } | ||
| 2053 | |||
| 2054 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | ||
| 2055 | struct buffer_head *bh_result, int create) | ||
| 2056 | { | ||
| 2057 | int ret = 0; | ||
| 2058 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
| 2059 | |||
| 2060 | /* | ||
| 2061 | * we don't want to do block allocation in writepage | ||
| 2062 | * so call get_block_wrap with create = 0 | ||
| 2063 | */ | ||
| 2064 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | ||
| 2065 | bh_result, 0, 0, 0); | ||
| 2066 | if (ret > 0) { | ||
| 2067 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 2068 | ret = 0; | ||
| 2069 | } | ||
| 2070 | return ret; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | /* | ||
| 2074 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | ||
| 2075 | * get called via journal_submit_inode_data_buffers (no journal handle) | ||
| 2076 | * get called via shrink_page_list via pdflush (no journal handle) | ||
| 2077 | * or grab_page_cache when doing write_begin (have journal handle) | ||
| 2078 | */ | ||
| 2079 | static int ext4_da_writepage(struct page *page, | ||
| 2080 | struct writeback_control *wbc) | ||
| 2081 | { | ||
| 2082 | int ret = 0; | ||
| 2083 | loff_t size; | ||
| 2084 | unsigned long len; | ||
| 2085 | struct buffer_head *page_bufs; | ||
| 2086 | struct inode *inode = page->mapping->host; | ||
| 2087 | |||
| 2088 | size = i_size_read(inode); | ||
| 2089 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 2090 | len = size & ~PAGE_CACHE_MASK; | ||
| 2091 | else | ||
| 2092 | len = PAGE_CACHE_SIZE; | ||
| 2093 | |||
| 2094 | if (page_has_buffers(page)) { | ||
| 2095 | page_bufs = page_buffers(page); | ||
| 2096 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2097 | ext4_bh_unmapped_or_delay)) { | ||
| 2098 | /* | ||
| 2099 | * We don't want to do block allocation | ||
| 2100 | * So redirty the page and return | ||
| 2101 | * We may reach here when we do a journal commit | ||
| 2102 | * via journal_submit_inode_data_buffers. | ||
| 2103 | * If we don't have mapping block we just ignore | ||
| 2104 | * them. We can also reach here via shrink_page_list | ||
| 2105 | */ | ||
| 2106 | redirty_page_for_writepage(wbc, page); | ||
| 2107 | unlock_page(page); | ||
| 2108 | return 0; | ||
| 2109 | } | ||
| 2110 | } else { | ||
| 2111 | /* | ||
| 2112 | * The test for page_has_buffers() is subtle: | ||
| 2113 | * We know the page is dirty but it lost buffers. That means | ||
| 2114 | * that at some moment in time after write_begin()/write_end() | ||
| 2115 | * has been called all buffers have been clean and thus they | ||
| 2116 | * must have been written at least once. So they are all | ||
| 2117 | * mapped and we can happily proceed with mapping them | ||
| 2118 | * and writing the page. | ||
| 2119 | * | ||
| 2120 | * Try to initialize the buffer_heads and check whether | ||
| 2121 | * all are mapped and non delay. We don't want to | ||
| 2122 | * do block allocation here. | ||
| 2123 | */ | ||
| 2124 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
| 2125 | ext4_normal_get_block_write); | ||
| 2126 | if (!ret) { | ||
| 2127 | page_bufs = page_buffers(page); | ||
| 2128 | /* check whether all are mapped and non delay */ | ||
| 2129 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2130 | ext4_bh_unmapped_or_delay)) { | ||
| 2131 | redirty_page_for_writepage(wbc, page); | ||
| 2132 | unlock_page(page); | ||
| 2133 | return 0; | ||
| 2134 | } | ||
| 2135 | } else { | ||
| 2136 | /* | ||
| 2137 | * We can't do block allocation here | ||
| 2138 | * so just redity the page and unlock | ||
| 2139 | * and return | ||
| 2140 | */ | ||
| 2141 | redirty_page_for_writepage(wbc, page); | ||
| 2142 | unlock_page(page); | ||
| 2143 | return 0; | ||
| 2144 | } | ||
| 2145 | } | ||
| 2146 | |||
| 2147 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | ||
| 2148 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | ||
| 2149 | else | ||
| 2150 | ret = block_write_full_page(page, | ||
| 2151 | ext4_normal_get_block_write, | ||
| 2152 | wbc); | ||
| 2153 | |||
| 2154 | return ret; | ||
| 2155 | } | ||
| 2156 | |||
| 2157 | /* | ||
| 2158 | * For now just follow the DIO way to estimate the max credits | ||
| 2159 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | ||
| 2160 | * todo: need to calculate the max credits need for | ||
| 2161 | * extent based files, currently the DIO credits is based on | ||
| 2162 | * indirect-blocks mapping way. | ||
| 2163 | * | ||
| 2164 | * Probably should have a generic way to calculate credits | ||
| 2165 | * for DIO, writepages, and truncate | ||
| 2166 | */ | ||
| 2167 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | ||
| 2168 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | ||
| 2169 | |||
| 2170 | static int ext4_da_writepages(struct address_space *mapping, | ||
| 2171 | struct writeback_control *wbc) | ||
| 2172 | { | ||
| 2173 | struct inode *inode = mapping->host; | ||
| 2174 | handle_t *handle = NULL; | ||
| 2175 | int needed_blocks; | ||
| 2176 | int ret = 0; | ||
| 2177 | long to_write; | ||
| 2178 | loff_t range_start = 0; | ||
| 2179 | |||
| 2180 | /* | ||
| 2181 | * No pages to write? This is mainly a kludge to avoid starting | ||
| 2182 | * a transaction for special inodes like journal inode on last iput() | ||
| 2183 | * because that could violate lock ordering on umount | ||
| 2184 | */ | ||
| 2185 | if (!mapping->nrpages) | ||
| 2186 | return 0; | ||
| 2187 | |||
| 2188 | /* | ||
| 2189 | * Estimate the worse case needed credits to write out | ||
| 2190 | * EXT4_MAX_BUF_BLOCKS pages | ||
| 2191 | */ | ||
| 2192 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | ||
| 2193 | |||
| 2194 | to_write = wbc->nr_to_write; | ||
| 2195 | if (!wbc->range_cyclic) { | ||
| 2196 | /* | ||
| 2197 | * If range_cyclic is not set force range_cont | ||
| 2198 | * and save the old writeback_index | ||
| 2199 | */ | ||
| 2200 | wbc->range_cont = 1; | ||
| 2201 | range_start = wbc->range_start; | ||
| 2202 | } | ||
| 2203 | |||
| 2204 | while (!ret && to_write) { | ||
| 2205 | /* start a new transaction*/ | ||
| 2206 | handle = ext4_journal_start(inode, needed_blocks); | ||
| 2207 | if (IS_ERR(handle)) { | ||
| 2208 | ret = PTR_ERR(handle); | ||
| 2209 | goto out_writepages; | ||
| 2210 | } | ||
| 2211 | if (ext4_should_order_data(inode)) { | ||
| 2212 | /* | ||
| 2213 | * With ordered mode we need to add | ||
| 2214 | * the inode to the journal handle | ||
| 2215 | * when we do block allocation. | ||
| 2216 | */ | ||
| 2217 | ret = ext4_jbd2_file_inode(handle, inode); | ||
| 2218 | if (ret) { | ||
| 2219 | ext4_journal_stop(handle); | ||
| 2220 | goto out_writepages; | ||
| 2221 | } | ||
| 2222 | |||
| 2223 | } | ||
| 2224 | /* | ||
| 2225 | * set the max dirty pages could be write at a time | ||
| 2226 | * to fit into the reserved transaction credits | ||
| 2227 | */ | ||
| 2228 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
| 2229 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
| 2230 | |||
| 2231 | to_write -= wbc->nr_to_write; | ||
| 2232 | ret = mpage_da_writepages(mapping, wbc, | ||
| 2233 | ext4_da_get_block_write); | ||
| 2234 | ext4_journal_stop(handle); | ||
| 2235 | if (wbc->nr_to_write) { | ||
| 2236 | /* | ||
| 2237 | * There is no more writeout needed | ||
| 2238 | * or we requested for a noblocking writeout | ||
| 2239 | * and we found the device congested | ||
| 2240 | */ | ||
| 2241 | to_write += wbc->nr_to_write; | ||
| 2242 | break; | ||
| 2243 | } | ||
| 2244 | wbc->nr_to_write = to_write; | ||
| 2245 | } | ||
| 2246 | |||
| 2247 | out_writepages: | ||
| 2248 | wbc->nr_to_write = to_write; | ||
| 2249 | if (range_start) | ||
| 2250 | wbc->range_start = range_start; | ||
| 2251 | return ret; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | ||
| 2255 | loff_t pos, unsigned len, unsigned flags, | ||
| 2256 | struct page **pagep, void **fsdata) | ||
| 2257 | { | ||
| 2258 | int ret, retries = 0; | ||
| 2259 | struct page *page; | ||
| 2260 | pgoff_t index; | ||
| 2261 | unsigned from, to; | ||
| 2262 | struct inode *inode = mapping->host; | ||
| 2263 | handle_t *handle; | ||
| 2264 | |||
| 2265 | index = pos >> PAGE_CACHE_SHIFT; | ||
| 2266 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2267 | to = from + len; | ||
| 2268 | |||
| 2269 | retry: | ||
| 2270 | /* | ||
| 2271 | * With delayed allocation, we don't log the i_disksize update | ||
| 2272 | * if there is delayed block allocation. But we still need | ||
| 2273 | * to journalling the i_disksize update if writes to the end | ||
| 2274 | * of file which has an already mapped buffer. | ||
| 2275 | */ | ||
| 2276 | handle = ext4_journal_start(inode, 1); | ||
| 2277 | if (IS_ERR(handle)) { | ||
| 2278 | ret = PTR_ERR(handle); | ||
| 2279 | goto out; | ||
| 2280 | } | ||
| 2281 | |||
| 2282 | page = __grab_cache_page(mapping, index); | ||
| 2283 | if (!page) | ||
| 2284 | return -ENOMEM; | ||
| 2285 | *pagep = page; | ||
| 2286 | |||
| 2287 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||
| 2288 | ext4_da_get_block_prep); | ||
| 2289 | if (ret < 0) { | ||
| 2290 | unlock_page(page); | ||
| 2291 | ext4_journal_stop(handle); | ||
| 2292 | page_cache_release(page); | ||
| 2293 | } | ||
| 2294 | |||
| 2295 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
| 2296 | goto retry; | ||
| 2297 | out: | ||
| 2298 | return ret; | ||
| 2299 | } | ||
| 2300 | |||
| 2301 | /* | ||
| 2302 | * Check if we should update i_disksize | ||
| 2303 | * when write to the end of file but not require block allocation | ||
| 2304 | */ | ||
| 2305 | static int ext4_da_should_update_i_disksize(struct page *page, | ||
| 2306 | unsigned long offset) | ||
| 2307 | { | ||
| 2308 | struct buffer_head *bh; | ||
| 2309 | struct inode *inode = page->mapping->host; | ||
| 2310 | unsigned int idx; | ||
| 2311 | int i; | ||
| 2312 | |||
| 2313 | bh = page_buffers(page); | ||
| 2314 | idx = offset >> inode->i_blkbits; | ||
| 2315 | |||
| 2316 | for (i=0; i < idx; i++) | ||
| 2317 | bh = bh->b_this_page; | ||
| 2318 | |||
| 2319 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | ||
| 2320 | return 0; | ||
| 2321 | return 1; | ||
| 2322 | } | ||
| 2323 | |||
| 2324 | static int ext4_da_write_end(struct file *file, | ||
| 2325 | struct address_space *mapping, | ||
| 2326 | loff_t pos, unsigned len, unsigned copied, | ||
| 2327 | struct page *page, void *fsdata) | ||
| 2328 | { | ||
| 2329 | struct inode *inode = mapping->host; | ||
| 2330 | int ret = 0, ret2; | ||
| 2331 | handle_t *handle = ext4_journal_current_handle(); | ||
| 2332 | loff_t new_i_size; | ||
| 2333 | unsigned long start, end; | ||
| 2334 | |||
| 2335 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2336 | end = start + copied -1; | ||
| 2337 | |||
| 2338 | /* | ||
| 2339 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
| 2340 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
| 2341 | * into that. | ||
| 2342 | */ | ||
| 2343 | |||
| 2344 | new_i_size = pos + copied; | ||
| 2345 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
| 2346 | if (ext4_da_should_update_i_disksize(page, end)) { | ||
| 2347 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2348 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
| 2349 | /* | ||
| 2350 | * Updating i_disksize when extending file | ||
| 2351 | * without needing block allocation | ||
| 2352 | */ | ||
| 2353 | if (ext4_should_order_data(inode)) | ||
| 2354 | ret = ext4_jbd2_file_inode(handle, | ||
| 2355 | inode); | ||
| 2356 | |||
| 2357 | EXT4_I(inode)->i_disksize = new_i_size; | ||
| 2358 | } | ||
| 2359 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2360 | } | ||
| 2361 | } | ||
| 2362 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
| 2363 | page, fsdata); | ||
| 2364 | copied = ret2; | ||
| 2365 | if (ret2 < 0) | ||
| 2366 | ret = ret2; | ||
| 2367 | ret2 = ext4_journal_stop(handle); | ||
| 2368 | if (!ret) | ||
| 2369 | ret = ret2; | ||
| 2370 | |||
| 2371 | return ret ? ret : copied; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | ||
| 2375 | { | ||
| 2376 | /* | ||
| 2377 | * Drop reserved blocks | ||
| 2378 | */ | ||
| 2379 | BUG_ON(!PageLocked(page)); | ||
| 2380 | if (!page_has_buffers(page)) | ||
| 2381 | goto out; | ||
| 2382 | |||
| 2383 | ext4_da_page_release_reservation(page, offset); | ||
| 2384 | |||
| 2385 | out: | ||
| 2386 | ext4_invalidatepage(page, offset); | ||
| 2387 | |||
| 2388 | return; | ||
| 2389 | } | ||
| 2390 | |||
| 1400 | 2391 | ||
| 1401 | /* | 2392 | /* |
| 1402 | * bmap() is special. It gets used by applications such as lilo and by | 2393 | * bmap() is special. It gets used by applications such as lilo and by |
| @@ -1418,6 +2409,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 1418 | journal_t *journal; | 2409 | journal_t *journal; |
| 1419 | int err; | 2410 | int err; |
| 1420 | 2411 | ||
| 2412 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | ||
| 2413 | test_opt(inode->i_sb, DELALLOC)) { | ||
| 2414 | /* | ||
| 2415 | * With delalloc we want to sync the file | ||
| 2416 | * so that we can make sure we allocate | ||
| 2417 | * blocks for file | ||
| 2418 | */ | ||
| 2419 | filemap_write_and_wait(mapping); | ||
| 2420 | } | ||
| 2421 | |||
| 1421 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 2422 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { |
| 1422 | /* | 2423 | /* |
| 1423 | * This is a REALLY heavyweight approach, but the use of | 2424 | * This is a REALLY heavyweight approach, but the use of |
| @@ -1462,21 +2463,17 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
| 1462 | return 0; | 2463 | return 0; |
| 1463 | } | 2464 | } |
| 1464 | 2465 | ||
| 1465 | static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | ||
| 1466 | { | ||
| 1467 | if (buffer_mapped(bh)) | ||
| 1468 | return ext4_journal_dirty_data(handle, bh); | ||
| 1469 | return 0; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | /* | 2466 | /* |
| 1473 | * Note that we always start a transaction even if we're not journalling | 2467 | * Note that we don't need to start a transaction unless we're journaling data |
| 1474 | * data. This is to preserve ordering: any hole instantiation within | 2468 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
| 1475 | * __block_write_full_page -> ext4_get_block() should be journalled | 2469 | * need to file the inode to the transaction's list in ordered mode because if |
| 1476 | * along with the data so we don't crash and then get metadata which | 2470 | * we are writing back data added by write(), the inode is already there and if |
| 1477 | * refers to old data. | 2471 | * we are writing back data modified via mmap(), noone guarantees in which |
| 2472 | * transaction the data will hit the disk. In case we are journaling data, we | ||
| 2473 | * cannot start transaction directly because transaction start ranks above page | ||
| 2474 | * lock so we have to do some magic. | ||
| 1478 | * | 2475 | * |
| 1479 | * In all journalling modes block_write_full_page() will start the I/O. | 2476 | * In all journaling modes block_write_full_page() will start the I/O. |
| 1480 | * | 2477 | * |
| 1481 | * Problem: | 2478 | * Problem: |
| 1482 | * | 2479 | * |
| @@ -1518,105 +2515,103 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | |||
| 1518 | * disastrous. Any write() or metadata operation will sync the fs for | 2515 | * disastrous. Any write() or metadata operation will sync the fs for |
| 1519 | * us. | 2516 | * us. |
| 1520 | * | 2517 | * |
| 1521 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | ||
| 1522 | * we don't need to open a transaction here. | ||
| 1523 | */ | 2518 | */ |
| 1524 | static int ext4_ordered_writepage(struct page *page, | 2519 | static int __ext4_normal_writepage(struct page *page, |
| 1525 | struct writeback_control *wbc) | 2520 | struct writeback_control *wbc) |
| 1526 | { | 2521 | { |
| 1527 | struct inode *inode = page->mapping->host; | 2522 | struct inode *inode = page->mapping->host; |
| 1528 | struct buffer_head *page_bufs; | ||
| 1529 | handle_t *handle = NULL; | ||
| 1530 | int ret = 0; | ||
| 1531 | int err; | ||
| 1532 | |||
| 1533 | J_ASSERT(PageLocked(page)); | ||
| 1534 | |||
| 1535 | /* | ||
| 1536 | * We give up here if we're reentered, because it might be for a | ||
| 1537 | * different filesystem. | ||
| 1538 | */ | ||
| 1539 | if (ext4_journal_current_handle()) | ||
| 1540 | goto out_fail; | ||
| 1541 | 2523 | ||
| 1542 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2524 | if (test_opt(inode->i_sb, NOBH)) |
| 2525 | return nobh_writepage(page, | ||
| 2526 | ext4_normal_get_block_write, wbc); | ||
| 2527 | else | ||
| 2528 | return block_write_full_page(page, | ||
| 2529 | ext4_normal_get_block_write, | ||
| 2530 | wbc); | ||
| 2531 | } | ||
| 1543 | 2532 | ||
| 1544 | if (IS_ERR(handle)) { | 2533 | static int ext4_normal_writepage(struct page *page, |
| 1545 | ret = PTR_ERR(handle); | 2534 | struct writeback_control *wbc) |
| 1546 | goto out_fail; | 2535 | { |
| 1547 | } | 2536 | struct inode *inode = page->mapping->host; |
| 2537 | loff_t size = i_size_read(inode); | ||
| 2538 | loff_t len; | ||
| 1548 | 2539 | ||
| 1549 | if (!page_has_buffers(page)) { | 2540 | J_ASSERT(PageLocked(page)); |
| 1550 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 2541 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 1551 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 2542 | len = size & ~PAGE_CACHE_MASK; |
| 2543 | else | ||
| 2544 | len = PAGE_CACHE_SIZE; | ||
| 2545 | |||
| 2546 | if (page_has_buffers(page)) { | ||
| 2547 | /* if page has buffers it should all be mapped | ||
| 2548 | * and allocated. If there are not buffers attached | ||
| 2549 | * to the page we know the page is dirty but it lost | ||
| 2550 | * buffers. That means that at some moment in time | ||
| 2551 | * after write_begin() / write_end() has been called | ||
| 2552 | * all buffers have been clean and thus they must have been | ||
| 2553 | * written at least once. So they are all mapped and we can | ||
| 2554 | * happily proceed with mapping them and writing the page. | ||
| 2555 | */ | ||
| 2556 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 2557 | ext4_bh_unmapped_or_delay)); | ||
| 1552 | } | 2558 | } |
| 1553 | page_bufs = page_buffers(page); | ||
| 1554 | walk_page_buffers(handle, page_bufs, 0, | ||
| 1555 | PAGE_CACHE_SIZE, NULL, bget_one); | ||
| 1556 | |||
| 1557 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
| 1558 | 2559 | ||
| 1559 | /* | 2560 | if (!ext4_journal_current_handle()) |
| 1560 | * The page can become unlocked at any point now, and | 2561 | return __ext4_normal_writepage(page, wbc); |
| 1561 | * truncate can then come in and change things. So we | ||
| 1562 | * can't touch *page from now on. But *page_bufs is | ||
| 1563 | * safe due to elevated refcount. | ||
| 1564 | */ | ||
| 1565 | 2562 | ||
| 1566 | /* | ||
| 1567 | * And attach them to the current transaction. But only if | ||
| 1568 | * block_write_full_page() succeeded. Otherwise they are unmapped, | ||
| 1569 | * and generally junk. | ||
| 1570 | */ | ||
| 1571 | if (ret == 0) { | ||
| 1572 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | ||
| 1573 | NULL, jbd2_journal_dirty_data_fn); | ||
| 1574 | if (!ret) | ||
| 1575 | ret = err; | ||
| 1576 | } | ||
| 1577 | walk_page_buffers(handle, page_bufs, 0, | ||
| 1578 | PAGE_CACHE_SIZE, NULL, bput_one); | ||
| 1579 | err = ext4_journal_stop(handle); | ||
| 1580 | if (!ret) | ||
| 1581 | ret = err; | ||
| 1582 | return ret; | ||
| 1583 | |||
| 1584 | out_fail: | ||
| 1585 | redirty_page_for_writepage(wbc, page); | 2563 | redirty_page_for_writepage(wbc, page); |
| 1586 | unlock_page(page); | 2564 | unlock_page(page); |
| 1587 | return ret; | 2565 | return 0; |
| 1588 | } | 2566 | } |
| 1589 | 2567 | ||
| 1590 | static int ext4_writeback_writepage(struct page *page, | 2568 | static int __ext4_journalled_writepage(struct page *page, |
| 1591 | struct writeback_control *wbc) | 2569 | struct writeback_control *wbc) |
| 1592 | { | 2570 | { |
| 1593 | struct inode *inode = page->mapping->host; | 2571 | struct address_space *mapping = page->mapping; |
| 2572 | struct inode *inode = mapping->host; | ||
| 2573 | struct buffer_head *page_bufs; | ||
| 1594 | handle_t *handle = NULL; | 2574 | handle_t *handle = NULL; |
| 1595 | int ret = 0; | 2575 | int ret = 0; |
| 1596 | int err; | 2576 | int err; |
| 1597 | 2577 | ||
| 1598 | if (ext4_journal_current_handle()) | 2578 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
| 1599 | goto out_fail; | 2579 | ext4_normal_get_block_write); |
| 2580 | if (ret != 0) | ||
| 2581 | goto out_unlock; | ||
| 2582 | |||
| 2583 | page_bufs = page_buffers(page); | ||
| 2584 | walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, | ||
| 2585 | bget_one); | ||
| 2586 | /* As soon as we unlock the page, it can go away, but we have | ||
| 2587 | * references to buffers so we are safe */ | ||
| 2588 | unlock_page(page); | ||
| 1600 | 2589 | ||
| 1601 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2590 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
| 1602 | if (IS_ERR(handle)) { | 2591 | if (IS_ERR(handle)) { |
| 1603 | ret = PTR_ERR(handle); | 2592 | ret = PTR_ERR(handle); |
| 1604 | goto out_fail; | 2593 | goto out; |
| 1605 | } | 2594 | } |
| 1606 | 2595 | ||
| 1607 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2596 | ret = walk_page_buffers(handle, page_bufs, 0, |
| 1608 | ret = nobh_writepage(page, ext4_get_block, wbc); | 2597 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); |
| 1609 | else | ||
| 1610 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
| 1611 | 2598 | ||
| 2599 | err = walk_page_buffers(handle, page_bufs, 0, | ||
| 2600 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 2601 | if (ret == 0) | ||
| 2602 | ret = err; | ||
| 1612 | err = ext4_journal_stop(handle); | 2603 | err = ext4_journal_stop(handle); |
| 1613 | if (!ret) | 2604 | if (!ret) |
| 1614 | ret = err; | 2605 | ret = err; |
| 1615 | return ret; | ||
| 1616 | 2606 | ||
| 1617 | out_fail: | 2607 | walk_page_buffers(handle, page_bufs, 0, |
| 1618 | redirty_page_for_writepage(wbc, page); | 2608 | PAGE_CACHE_SIZE, NULL, bput_one); |
| 2609 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 2610 | goto out; | ||
| 2611 | |||
| 2612 | out_unlock: | ||
| 1619 | unlock_page(page); | 2613 | unlock_page(page); |
| 2614 | out: | ||
| 1620 | return ret; | 2615 | return ret; |
| 1621 | } | 2616 | } |
| 1622 | 2617 | ||
| @@ -1624,59 +2619,53 @@ static int ext4_journalled_writepage(struct page *page, | |||
| 1624 | struct writeback_control *wbc) | 2619 | struct writeback_control *wbc) |
| 1625 | { | 2620 | { |
| 1626 | struct inode *inode = page->mapping->host; | 2621 | struct inode *inode = page->mapping->host; |
| 1627 | handle_t *handle = NULL; | 2622 | loff_t size = i_size_read(inode); |
| 1628 | int ret = 0; | 2623 | loff_t len; |
| 1629 | int err; | ||
| 1630 | 2624 | ||
| 1631 | if (ext4_journal_current_handle()) | 2625 | J_ASSERT(PageLocked(page)); |
| 1632 | goto no_write; | 2626 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 2627 | len = size & ~PAGE_CACHE_MASK; | ||
| 2628 | else | ||
| 2629 | len = PAGE_CACHE_SIZE; | ||
| 2630 | |||
| 2631 | if (page_has_buffers(page)) { | ||
| 2632 | /* if page has buffers it should all be mapped | ||
| 2633 | * and allocated. If there are not buffers attached | ||
| 2634 | * to the page we know the page is dirty but it lost | ||
| 2635 | * buffers. That means that at some moment in time | ||
| 2636 | * after write_begin() / write_end() has been called | ||
| 2637 | * all buffers have been clean and thus they must have been | ||
| 2638 | * written at least once. So they are all mapped and we can | ||
| 2639 | * happily proceed with mapping them and writing the page. | ||
| 2640 | */ | ||
| 2641 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 2642 | ext4_bh_unmapped_or_delay)); | ||
| 2643 | } | ||
| 1633 | 2644 | ||
| 1634 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2645 | if (ext4_journal_current_handle()) |
| 1635 | if (IS_ERR(handle)) { | ||
| 1636 | ret = PTR_ERR(handle); | ||
| 1637 | goto no_write; | 2646 | goto no_write; |
| 1638 | } | ||
| 1639 | 2647 | ||
| 1640 | if (!page_has_buffers(page) || PageChecked(page)) { | 2648 | if (PageChecked(page)) { |
| 1641 | /* | 2649 | /* |
| 1642 | * It's mmapped pagecache. Add buffers and journal it. There | 2650 | * It's mmapped pagecache. Add buffers and journal it. There |
| 1643 | * doesn't seem much point in redirtying the page here. | 2651 | * doesn't seem much point in redirtying the page here. |
| 1644 | */ | 2652 | */ |
| 1645 | ClearPageChecked(page); | 2653 | ClearPageChecked(page); |
| 1646 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2654 | return __ext4_journalled_writepage(page, wbc); |
| 1647 | ext4_get_block); | ||
| 1648 | if (ret != 0) { | ||
| 1649 | ext4_journal_stop(handle); | ||
| 1650 | goto out_unlock; | ||
| 1651 | } | ||
| 1652 | ret = walk_page_buffers(handle, page_buffers(page), 0, | ||
| 1653 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | ||
| 1654 | |||
| 1655 | err = walk_page_buffers(handle, page_buffers(page), 0, | ||
| 1656 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 1657 | if (ret == 0) | ||
| 1658 | ret = err; | ||
| 1659 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 1660 | unlock_page(page); | ||
| 1661 | } else { | 2655 | } else { |
| 1662 | /* | 2656 | /* |
| 1663 | * It may be a page full of checkpoint-mode buffers. We don't | 2657 | * It may be a page full of checkpoint-mode buffers. We don't |
| 1664 | * really know unless we go poke around in the buffer_heads. | 2658 | * really know unless we go poke around in the buffer_heads. |
| 1665 | * But block_write_full_page will do the right thing. | 2659 | * But block_write_full_page will do the right thing. |
| 1666 | */ | 2660 | */ |
| 1667 | ret = block_write_full_page(page, ext4_get_block, wbc); | 2661 | return block_write_full_page(page, |
| 2662 | ext4_normal_get_block_write, | ||
| 2663 | wbc); | ||
| 1668 | } | 2664 | } |
| 1669 | err = ext4_journal_stop(handle); | ||
| 1670 | if (!ret) | ||
| 1671 | ret = err; | ||
| 1672 | out: | ||
| 1673 | return ret; | ||
| 1674 | |||
| 1675 | no_write: | 2665 | no_write: |
| 1676 | redirty_page_for_writepage(wbc, page); | 2666 | redirty_page_for_writepage(wbc, page); |
| 1677 | out_unlock: | ||
| 1678 | unlock_page(page); | 2667 | unlock_page(page); |
| 1679 | goto out; | 2668 | return 0; |
| 1680 | } | 2669 | } |
| 1681 | 2670 | ||
| 1682 | static int ext4_readpage(struct file *file, struct page *page) | 2671 | static int ext4_readpage(struct file *file, struct page *page) |
| @@ -1819,7 +2808,7 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
| 1819 | static const struct address_space_operations ext4_ordered_aops = { | 2808 | static const struct address_space_operations ext4_ordered_aops = { |
| 1820 | .readpage = ext4_readpage, | 2809 | .readpage = ext4_readpage, |
| 1821 | .readpages = ext4_readpages, | 2810 | .readpages = ext4_readpages, |
| 1822 | .writepage = ext4_ordered_writepage, | 2811 | .writepage = ext4_normal_writepage, |
| 1823 | .sync_page = block_sync_page, | 2812 | .sync_page = block_sync_page, |
| 1824 | .write_begin = ext4_write_begin, | 2813 | .write_begin = ext4_write_begin, |
| 1825 | .write_end = ext4_ordered_write_end, | 2814 | .write_end = ext4_ordered_write_end, |
| @@ -1833,7 +2822,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
| 1833 | static const struct address_space_operations ext4_writeback_aops = { | 2822 | static const struct address_space_operations ext4_writeback_aops = { |
| 1834 | .readpage = ext4_readpage, | 2823 | .readpage = ext4_readpage, |
| 1835 | .readpages = ext4_readpages, | 2824 | .readpages = ext4_readpages, |
| 1836 | .writepage = ext4_writeback_writepage, | 2825 | .writepage = ext4_normal_writepage, |
| 1837 | .sync_page = block_sync_page, | 2826 | .sync_page = block_sync_page, |
| 1838 | .write_begin = ext4_write_begin, | 2827 | .write_begin = ext4_write_begin, |
| 1839 | .write_end = ext4_writeback_write_end, | 2828 | .write_end = ext4_writeback_write_end, |
| @@ -1857,10 +2846,31 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 1857 | .releasepage = ext4_releasepage, | 2846 | .releasepage = ext4_releasepage, |
| 1858 | }; | 2847 | }; |
| 1859 | 2848 | ||
| 2849 | static const struct address_space_operations ext4_da_aops = { | ||
| 2850 | .readpage = ext4_readpage, | ||
| 2851 | .readpages = ext4_readpages, | ||
| 2852 | .writepage = ext4_da_writepage, | ||
| 2853 | .writepages = ext4_da_writepages, | ||
| 2854 | .sync_page = block_sync_page, | ||
| 2855 | .write_begin = ext4_da_write_begin, | ||
| 2856 | .write_end = ext4_da_write_end, | ||
| 2857 | .bmap = ext4_bmap, | ||
| 2858 | .invalidatepage = ext4_da_invalidatepage, | ||
| 2859 | .releasepage = ext4_releasepage, | ||
| 2860 | .direct_IO = ext4_direct_IO, | ||
| 2861 | .migratepage = buffer_migrate_page, | ||
| 2862 | }; | ||
| 2863 | |||
| 1860 | void ext4_set_aops(struct inode *inode) | 2864 | void ext4_set_aops(struct inode *inode) |
| 1861 | { | 2865 | { |
| 1862 | if (ext4_should_order_data(inode)) | 2866 | if (ext4_should_order_data(inode) && |
| 2867 | test_opt(inode->i_sb, DELALLOC)) | ||
| 2868 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
| 2869 | else if (ext4_should_order_data(inode)) | ||
| 1863 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 2870 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
| 2871 | else if (ext4_should_writeback_data(inode) && | ||
| 2872 | test_opt(inode->i_sb, DELALLOC)) | ||
| 2873 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
| 1864 | else if (ext4_should_writeback_data(inode)) | 2874 | else if (ext4_should_writeback_data(inode)) |
| 1865 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 2875 | inode->i_mapping->a_ops = &ext4_writeback_aops; |
| 1866 | else | 2876 | else |
| @@ -1873,7 +2883,7 @@ void ext4_set_aops(struct inode *inode) | |||
| 1873 | * This required during truncate. We need to physically zero the tail end | 2883 | * This required during truncate. We need to physically zero the tail end |
| 1874 | * of that block so it doesn't yield old data if the file is later grown. | 2884 | * of that block so it doesn't yield old data if the file is later grown. |
| 1875 | */ | 2885 | */ |
| 1876 | int ext4_block_truncate_page(handle_t *handle, struct page *page, | 2886 | int ext4_block_truncate_page(handle_t *handle, |
| 1877 | struct address_space *mapping, loff_t from) | 2887 | struct address_space *mapping, loff_t from) |
| 1878 | { | 2888 | { |
| 1879 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2889 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
| @@ -1882,8 +2892,13 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
| 1882 | ext4_lblk_t iblock; | 2892 | ext4_lblk_t iblock; |
| 1883 | struct inode *inode = mapping->host; | 2893 | struct inode *inode = mapping->host; |
| 1884 | struct buffer_head *bh; | 2894 | struct buffer_head *bh; |
| 2895 | struct page *page; | ||
| 1885 | int err = 0; | 2896 | int err = 0; |
| 1886 | 2897 | ||
| 2898 | page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); | ||
| 2899 | if (!page) | ||
| 2900 | return -EINVAL; | ||
| 2901 | |||
| 1887 | blocksize = inode->i_sb->s_blocksize; | 2902 | blocksize = inode->i_sb->s_blocksize; |
| 1888 | length = blocksize - (offset & (blocksize - 1)); | 2903 | length = blocksize - (offset & (blocksize - 1)); |
| 1889 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2904 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
| @@ -1956,7 +2971,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
| 1956 | err = ext4_journal_dirty_metadata(handle, bh); | 2971 | err = ext4_journal_dirty_metadata(handle, bh); |
| 1957 | } else { | 2972 | } else { |
| 1958 | if (ext4_should_order_data(inode)) | 2973 | if (ext4_should_order_data(inode)) |
| 1959 | err = ext4_journal_dirty_data(handle, bh); | 2974 | err = ext4_jbd2_file_inode(handle, inode); |
| 1960 | mark_buffer_dirty(bh); | 2975 | mark_buffer_dirty(bh); |
| 1961 | } | 2976 | } |
| 1962 | 2977 | ||
| @@ -2179,7 +3194,21 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
| 2179 | 3194 | ||
| 2180 | if (this_bh) { | 3195 | if (this_bh) { |
| 2181 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); | 3196 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); |
| 2182 | ext4_journal_dirty_metadata(handle, this_bh); | 3197 | |
| 3198 | /* | ||
| 3199 | * The buffer head should have an attached journal head at this | ||
| 3200 | * point. However, if the data is corrupted and an indirect | ||
| 3201 | * block pointed to itself, it would have been detached when | ||
| 3202 | * the block was cleared. Check for this instead of OOPSing. | ||
| 3203 | */ | ||
| 3204 | if (bh2jh(this_bh)) | ||
| 3205 | ext4_journal_dirty_metadata(handle, this_bh); | ||
| 3206 | else | ||
| 3207 | ext4_error(inode->i_sb, __func__, | ||
| 3208 | "circular indirect block detected, " | ||
| 3209 | "inode=%lu, block=%llu", | ||
| 3210 | inode->i_ino, | ||
| 3211 | (unsigned long long) this_bh->b_blocknr); | ||
| 2183 | } | 3212 | } |
| 2184 | } | 3213 | } |
| 2185 | 3214 | ||
| @@ -2305,6 +3334,19 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 2305 | } | 3334 | } |
| 2306 | } | 3335 | } |
| 2307 | 3336 | ||
| 3337 | int ext4_can_truncate(struct inode *inode) | ||
| 3338 | { | ||
| 3339 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 3340 | return 0; | ||
| 3341 | if (S_ISREG(inode->i_mode)) | ||
| 3342 | return 1; | ||
| 3343 | if (S_ISDIR(inode->i_mode)) | ||
| 3344 | return 1; | ||
| 3345 | if (S_ISLNK(inode->i_mode)) | ||
| 3346 | return !ext4_inode_is_fast_symlink(inode); | ||
| 3347 | return 0; | ||
| 3348 | } | ||
| 3349 | |||
| 2308 | /* | 3350 | /* |
| 2309 | * ext4_truncate() | 3351 | * ext4_truncate() |
| 2310 | * | 3352 | * |
| @@ -2347,51 +3389,25 @@ void ext4_truncate(struct inode *inode) | |||
| 2347 | int n; | 3389 | int n; |
| 2348 | ext4_lblk_t last_block; | 3390 | ext4_lblk_t last_block; |
| 2349 | unsigned blocksize = inode->i_sb->s_blocksize; | 3391 | unsigned blocksize = inode->i_sb->s_blocksize; |
| 2350 | struct page *page; | ||
| 2351 | 3392 | ||
| 2352 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 3393 | if (!ext4_can_truncate(inode)) |
| 2353 | S_ISLNK(inode->i_mode))) | ||
| 2354 | return; | ||
| 2355 | if (ext4_inode_is_fast_symlink(inode)) | ||
| 2356 | return; | ||
| 2357 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 2358 | return; | 3394 | return; |
| 2359 | 3395 | ||
| 2360 | /* | ||
| 2361 | * We have to lock the EOF page here, because lock_page() nests | ||
| 2362 | * outside jbd2_journal_start(). | ||
| 2363 | */ | ||
| 2364 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
| 2365 | /* Block boundary? Nothing to do */ | ||
| 2366 | page = NULL; | ||
| 2367 | } else { | ||
| 2368 | page = grab_cache_page(mapping, | ||
| 2369 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
| 2370 | if (!page) | ||
| 2371 | return; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3396 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
| 2375 | ext4_ext_truncate(inode, page); | 3397 | ext4_ext_truncate(inode); |
| 2376 | return; | 3398 | return; |
| 2377 | } | 3399 | } |
| 2378 | 3400 | ||
| 2379 | handle = start_transaction(inode); | 3401 | handle = start_transaction(inode); |
| 2380 | if (IS_ERR(handle)) { | 3402 | if (IS_ERR(handle)) |
| 2381 | if (page) { | ||
| 2382 | clear_highpage(page); | ||
| 2383 | flush_dcache_page(page); | ||
| 2384 | unlock_page(page); | ||
| 2385 | page_cache_release(page); | ||
| 2386 | } | ||
| 2387 | return; /* AKPM: return what? */ | 3403 | return; /* AKPM: return what? */ |
| 2388 | } | ||
| 2389 | 3404 | ||
| 2390 | last_block = (inode->i_size + blocksize-1) | 3405 | last_block = (inode->i_size + blocksize-1) |
| 2391 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 3406 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
| 2392 | 3407 | ||
| 2393 | if (page) | 3408 | if (inode->i_size & (blocksize - 1)) |
| 2394 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 3409 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
| 3410 | goto out_stop; | ||
| 2395 | 3411 | ||
| 2396 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 3412 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
| 2397 | if (n == 0) | 3413 | if (n == 0) |
| @@ -2410,6 +3426,11 @@ void ext4_truncate(struct inode *inode) | |||
| 2410 | goto out_stop; | 3426 | goto out_stop; |
| 2411 | 3427 | ||
| 2412 | /* | 3428 | /* |
| 3429 | * From here we block out all ext4_get_block() callers who want to | ||
| 3430 | * modify the block allocation tree. | ||
| 3431 | */ | ||
| 3432 | down_write(&ei->i_data_sem); | ||
| 3433 | /* | ||
| 2413 | * The orphan list entry will now protect us from any crash which | 3434 | * The orphan list entry will now protect us from any crash which |
| 2414 | * occurs before the truncate completes, so it is now safe to propagate | 3435 | * occurs before the truncate completes, so it is now safe to propagate |
| 2415 | * the new, shorter inode size (held for now in i_size) into the | 3436 | * the new, shorter inode size (held for now in i_size) into the |
| @@ -2418,12 +3439,6 @@ void ext4_truncate(struct inode *inode) | |||
| 2418 | */ | 3439 | */ |
| 2419 | ei->i_disksize = inode->i_size; | 3440 | ei->i_disksize = inode->i_size; |
| 2420 | 3441 | ||
| 2421 | /* | ||
| 2422 | * From here we block out all ext4_get_block() callers who want to | ||
| 2423 | * modify the block allocation tree. | ||
| 2424 | */ | ||
| 2425 | down_write(&ei->i_data_sem); | ||
| 2426 | |||
| 2427 | if (n == 1) { /* direct blocks */ | 3442 | if (n == 1) { /* direct blocks */ |
| 2428 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 3443 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
| 2429 | i_data + EXT4_NDIR_BLOCKS); | 3444 | i_data + EXT4_NDIR_BLOCKS); |
| @@ -3107,7 +4122,14 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
| 3107 | * be freed, so we have a strong guarantee that no future commit will | 4122 | * be freed, so we have a strong guarantee that no future commit will |
| 3108 | * leave these blocks visible to the user.) | 4123 | * leave these blocks visible to the user.) |
| 3109 | * | 4124 | * |
| 3110 | * Called with inode->sem down. | 4125 | * Another thing we have to assure is that if we are in ordered mode |
| 4126 | * and inode is still attached to the committing transaction, we must | ||
| 4127 | * we start writeout of all the dirty pages which are being truncated. | ||
| 4128 | * This way we are sure that all the data written in the previous | ||
| 4129 | * transaction are already on disk (truncate waits for pages under | ||
| 4130 | * writeback). | ||
| 4131 | * | ||
| 4132 | * Called with inode->i_mutex down. | ||
| 3111 | */ | 4133 | */ |
| 3112 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) | 4134 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) |
| 3113 | { | 4135 | { |
| @@ -3173,6 +4195,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 3173 | if (!error) | 4195 | if (!error) |
| 3174 | error = rc; | 4196 | error = rc; |
| 3175 | ext4_journal_stop(handle); | 4197 | ext4_journal_stop(handle); |
| 4198 | |||
| 4199 | if (ext4_should_order_data(inode)) { | ||
| 4200 | error = ext4_begin_ordered_truncate(inode, | ||
| 4201 | attr->ia_size); | ||
| 4202 | if (error) { | ||
| 4203 | /* Do as much error cleanup as possible */ | ||
| 4204 | handle = ext4_journal_start(inode, 3); | ||
| 4205 | if (IS_ERR(handle)) { | ||
| 4206 | ext4_orphan_del(NULL, inode); | ||
| 4207 | goto err_out; | ||
| 4208 | } | ||
| 4209 | ext4_orphan_del(handle, inode); | ||
| 4210 | ext4_journal_stop(handle); | ||
| 4211 | goto err_out; | ||
| 4212 | } | ||
| 4213 | } | ||
| 3176 | } | 4214 | } |
| 3177 | 4215 | ||
| 3178 | rc = inode_setattr(inode, attr); | 4216 | rc = inode_setattr(inode, attr); |
| @@ -3193,6 +4231,32 @@ err_out: | |||
| 3193 | return error; | 4231 | return error; |
| 3194 | } | 4232 | } |
| 3195 | 4233 | ||
| 4234 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 4235 | struct kstat *stat) | ||
| 4236 | { | ||
| 4237 | struct inode *inode; | ||
| 4238 | unsigned long delalloc_blocks; | ||
| 4239 | |||
| 4240 | inode = dentry->d_inode; | ||
| 4241 | generic_fillattr(inode, stat); | ||
| 4242 | |||
| 4243 | /* | ||
| 4244 | * We can't update i_blocks if the block allocation is delayed | ||
| 4245 | * otherwise in the case of system crash before the real block | ||
| 4246 | * allocation is done, we will have i_blocks inconsistent with | ||
| 4247 | * on-disk file blocks. | ||
| 4248 | * We always keep i_blocks updated together with real | ||
| 4249 | * allocation. But to not confuse with user, stat | ||
| 4250 | * will return the blocks that include the delayed allocation | ||
| 4251 | * blocks for this file. | ||
| 4252 | */ | ||
| 4253 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 4254 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
| 4255 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 4256 | |||
| 4257 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | ||
| 4258 | return 0; | ||
| 4259 | } | ||
| 3196 | 4260 | ||
| 3197 | /* | 4261 | /* |
| 3198 | * How many blocks doth make a writepage()? | 4262 | * How many blocks doth make a writepage()? |
| @@ -3506,3 +4570,64 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 3506 | 4570 | ||
| 3507 | return err; | 4571 | return err; |
| 3508 | } | 4572 | } |
| 4573 | |||
| 4574 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | ||
| 4575 | { | ||
| 4576 | return !buffer_mapped(bh); | ||
| 4577 | } | ||
| 4578 | |||
| 4579 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
| 4580 | { | ||
| 4581 | loff_t size; | ||
| 4582 | unsigned long len; | ||
| 4583 | int ret = -EINVAL; | ||
| 4584 | struct file *file = vma->vm_file; | ||
| 4585 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 4586 | struct address_space *mapping = inode->i_mapping; | ||
| 4587 | |||
| 4588 | /* | ||
| 4589 | * Get i_alloc_sem to stop truncates messing with the inode. We cannot | ||
| 4590 | * get i_mutex because we are already holding mmap_sem. | ||
| 4591 | */ | ||
| 4592 | down_read(&inode->i_alloc_sem); | ||
| 4593 | size = i_size_read(inode); | ||
| 4594 | if (page->mapping != mapping || size <= page_offset(page) | ||
| 4595 | || !PageUptodate(page)) { | ||
| 4596 | /* page got truncated from under us? */ | ||
| 4597 | goto out_unlock; | ||
| 4598 | } | ||
| 4599 | ret = 0; | ||
| 4600 | if (PageMappedToDisk(page)) | ||
| 4601 | goto out_unlock; | ||
| 4602 | |||
| 4603 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 4604 | len = size & ~PAGE_CACHE_MASK; | ||
| 4605 | else | ||
| 4606 | len = PAGE_CACHE_SIZE; | ||
| 4607 | |||
| 4608 | if (page_has_buffers(page)) { | ||
| 4609 | /* return if we have all the buffers mapped */ | ||
| 4610 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 4611 | ext4_bh_unmapped)) | ||
| 4612 | goto out_unlock; | ||
| 4613 | } | ||
| 4614 | /* | ||
| 4615 | * OK, we need to fill the hole... Do write_begin write_end | ||
| 4616 | * to do block allocation/reservation.We are not holding | ||
| 4617 | * inode.i__mutex here. That allow * parallel write_begin, | ||
| 4618 | * write_end call. lock_page prevent this from happening | ||
| 4619 | * on the same page though | ||
| 4620 | */ | ||
| 4621 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | ||
| 4622 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
| 4623 | if (ret < 0) | ||
| 4624 | goto out_unlock; | ||
| 4625 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | ||
| 4626 | len, len, page, NULL); | ||
| 4627 | if (ret < 0) | ||
| 4628 | goto out_unlock; | ||
| 4629 | ret = 0; | ||
| 4630 | out_unlock: | ||
| 4631 | up_read(&inode->i_alloc_sem); | ||
| 4632 | return ret; | ||
| 4633 | } | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c9900aade150..8d141a25bbee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -381,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) | |||
| 381 | 381 | ||
| 382 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) | 382 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) |
| 383 | { | 383 | { |
| 384 | int fix = 0; | 384 | int fix = 0, ret, tmpmax; |
| 385 | addr = mb_correct_addr_and_bit(&fix, addr); | 385 | addr = mb_correct_addr_and_bit(&fix, addr); |
| 386 | max += fix; | 386 | tmpmax = max + fix; |
| 387 | start += fix; | 387 | start += fix; |
| 388 | 388 | ||
| 389 | return ext4_find_next_zero_bit(addr, max, start) - fix; | 389 | ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix; |
| 390 | if (ret > max) | ||
| 391 | return max; | ||
| 392 | return ret; | ||
| 390 | } | 393 | } |
| 391 | 394 | ||
| 392 | static inline int mb_find_next_bit(void *addr, int max, int start) | 395 | static inline int mb_find_next_bit(void *addr, int max, int start) |
| 393 | { | 396 | { |
| 394 | int fix = 0; | 397 | int fix = 0, ret, tmpmax; |
| 395 | addr = mb_correct_addr_and_bit(&fix, addr); | 398 | addr = mb_correct_addr_and_bit(&fix, addr); |
| 396 | max += fix; | 399 | tmpmax = max + fix; |
| 397 | start += fix; | 400 | start += fix; |
| 398 | 401 | ||
| 399 | return ext4_find_next_bit(addr, max, start) - fix; | 402 | ret = ext4_find_next_bit(addr, tmpmax, start) - fix; |
| 403 | if (ret > max) | ||
| 404 | return max; | ||
| 405 | return ret; | ||
| 400 | } | 406 | } |
| 401 | 407 | ||
| 402 | static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | 408 | static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) |
| @@ -803,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 803 | if (!buffer_uptodate(bh[i])) | 809 | if (!buffer_uptodate(bh[i])) |
| 804 | goto out; | 810 | goto out; |
| 805 | 811 | ||
| 812 | err = 0; | ||
| 806 | first_block = page->index * blocks_per_page; | 813 | first_block = page->index * blocks_per_page; |
| 807 | for (i = 0; i < blocks_per_page; i++) { | 814 | for (i = 0; i < blocks_per_page; i++) { |
| 808 | int group; | 815 | int group; |
| @@ -883,6 +890,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 883 | int pnum; | 890 | int pnum; |
| 884 | int poff; | 891 | int poff; |
| 885 | struct page *page; | 892 | struct page *page; |
| 893 | int ret; | ||
| 886 | 894 | ||
| 887 | mb_debug("load group %lu\n", group); | 895 | mb_debug("load group %lu\n", group); |
| 888 | 896 | ||
| @@ -914,15 +922,21 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 914 | if (page) { | 922 | if (page) { |
| 915 | BUG_ON(page->mapping != inode->i_mapping); | 923 | BUG_ON(page->mapping != inode->i_mapping); |
| 916 | if (!PageUptodate(page)) { | 924 | if (!PageUptodate(page)) { |
| 917 | ext4_mb_init_cache(page, NULL); | 925 | ret = ext4_mb_init_cache(page, NULL); |
| 926 | if (ret) { | ||
| 927 | unlock_page(page); | ||
| 928 | goto err; | ||
| 929 | } | ||
| 918 | mb_cmp_bitmaps(e4b, page_address(page) + | 930 | mb_cmp_bitmaps(e4b, page_address(page) + |
| 919 | (poff * sb->s_blocksize)); | 931 | (poff * sb->s_blocksize)); |
| 920 | } | 932 | } |
| 921 | unlock_page(page); | 933 | unlock_page(page); |
| 922 | } | 934 | } |
| 923 | } | 935 | } |
| 924 | if (page == NULL || !PageUptodate(page)) | 936 | if (page == NULL || !PageUptodate(page)) { |
| 937 | ret = -EIO; | ||
| 925 | goto err; | 938 | goto err; |
| 939 | } | ||
| 926 | e4b->bd_bitmap_page = page; | 940 | e4b->bd_bitmap_page = page; |
| 927 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); | 941 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); |
| 928 | mark_page_accessed(page); | 942 | mark_page_accessed(page); |
| @@ -938,14 +952,20 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 938 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | 952 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
| 939 | if (page) { | 953 | if (page) { |
| 940 | BUG_ON(page->mapping != inode->i_mapping); | 954 | BUG_ON(page->mapping != inode->i_mapping); |
| 941 | if (!PageUptodate(page)) | 955 | if (!PageUptodate(page)) { |
| 942 | ext4_mb_init_cache(page, e4b->bd_bitmap); | 956 | ret = ext4_mb_init_cache(page, e4b->bd_bitmap); |
| 943 | 957 | if (ret) { | |
| 958 | unlock_page(page); | ||
| 959 | goto err; | ||
| 960 | } | ||
| 961 | } | ||
| 944 | unlock_page(page); | 962 | unlock_page(page); |
| 945 | } | 963 | } |
| 946 | } | 964 | } |
| 947 | if (page == NULL || !PageUptodate(page)) | 965 | if (page == NULL || !PageUptodate(page)) { |
| 966 | ret = -EIO; | ||
| 948 | goto err; | 967 | goto err; |
| 968 | } | ||
| 949 | e4b->bd_buddy_page = page; | 969 | e4b->bd_buddy_page = page; |
| 950 | e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); | 970 | e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); |
| 951 | mark_page_accessed(page); | 971 | mark_page_accessed(page); |
| @@ -962,7 +982,7 @@ err: | |||
| 962 | page_cache_release(e4b->bd_buddy_page); | 982 | page_cache_release(e4b->bd_buddy_page); |
| 963 | e4b->bd_buddy = NULL; | 983 | e4b->bd_buddy = NULL; |
| 964 | e4b->bd_bitmap = NULL; | 984 | e4b->bd_bitmap = NULL; |
| 965 | return -EIO; | 985 | return ret; |
| 966 | } | 986 | } |
| 967 | 987 | ||
| 968 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) | 988 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) |
| @@ -1031,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
| 1031 | } | 1051 | } |
| 1032 | } | 1052 | } |
| 1033 | 1053 | ||
| 1034 | static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | 1054 | static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, |
| 1035 | int first, int count) | 1055 | int first, int count) |
| 1036 | { | 1056 | { |
| 1037 | int block = 0; | 1057 | int block = 0; |
| @@ -1071,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1071 | blocknr += block; | 1091 | blocknr += block; |
| 1072 | blocknr += | 1092 | blocknr += |
| 1073 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 1093 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
| 1074 | 1094 | ext4_unlock_group(sb, e4b->bd_group); | |
| 1075 | ext4_error(sb, __func__, "double-free of inode" | 1095 | ext4_error(sb, __func__, "double-free of inode" |
| 1076 | " %lu's block %llu(bit %u in group %lu)\n", | 1096 | " %lu's block %llu(bit %u in group %lu)\n", |
| 1077 | inode ? inode->i_ino : 0, blocknr, block, | 1097 | inode ? inode->i_ino : 0, blocknr, block, |
| 1078 | e4b->bd_group); | 1098 | e4b->bd_group); |
| 1099 | ext4_lock_group(sb, e4b->bd_group); | ||
| 1079 | } | 1100 | } |
| 1080 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1101 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); |
| 1081 | e4b->bd_info->bb_counters[order]++; | 1102 | e4b->bd_info->bb_counters[order]++; |
| @@ -1113,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1113 | } while (1); | 1134 | } while (1); |
| 1114 | } | 1135 | } |
| 1115 | mb_check_buddy(e4b); | 1136 | mb_check_buddy(e4b); |
| 1116 | |||
| 1117 | return 0; | ||
| 1118 | } | 1137 | } |
| 1119 | 1138 | ||
| 1120 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | 1139 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, |
| @@ -1730,10 +1749,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
| 1730 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | 1749 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; |
| 1731 | spin_unlock(&sbi->s_md_lock); | 1750 | spin_unlock(&sbi->s_md_lock); |
| 1732 | } | 1751 | } |
| 1733 | |||
| 1734 | /* searching for the right group start from the goal value specified */ | ||
| 1735 | group = ac->ac_g_ex.fe_group; | ||
| 1736 | |||
| 1737 | /* Let's just scan groups to find more-less suitable blocks */ | 1752 | /* Let's just scan groups to find more-less suitable blocks */ |
| 1738 | cr = ac->ac_2order ? 0 : 1; | 1753 | cr = ac->ac_2order ? 0 : 1; |
| 1739 | /* | 1754 | /* |
| @@ -1743,6 +1758,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
| 1743 | repeat: | 1758 | repeat: |
| 1744 | for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { | 1759 | for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { |
| 1745 | ac->ac_criteria = cr; | 1760 | ac->ac_criteria = cr; |
| 1761 | /* | ||
| 1762 | * searching for the right group start | ||
| 1763 | * from the goal value specified | ||
| 1764 | */ | ||
| 1765 | group = ac->ac_g_ex.fe_group; | ||
| 1766 | |||
| 1746 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { | 1767 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { |
| 1747 | struct ext4_group_info *grp; | 1768 | struct ext4_group_info *grp; |
| 1748 | struct ext4_group_desc *desc; | 1769 | struct ext4_group_desc *desc; |
| @@ -1963,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file) | |||
| 1963 | int rc; | 1984 | int rc; |
| 1964 | int size; | 1985 | int size; |
| 1965 | 1986 | ||
| 1987 | if (unlikely(sbi->s_mb_history == NULL)) | ||
| 1988 | return -ENOMEM; | ||
| 1966 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 1989 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
| 1967 | if (s == NULL) | 1990 | if (s == NULL) |
| 1968 | return -ENOMEM; | 1991 | return -ENOMEM; |
| @@ -2165,9 +2188,7 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
| 2165 | sbi->s_mb_history_cur = 0; | 2188 | sbi->s_mb_history_cur = 0; |
| 2166 | spin_lock_init(&sbi->s_mb_history_lock); | 2189 | spin_lock_init(&sbi->s_mb_history_lock); |
| 2167 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); | 2190 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); |
| 2168 | sbi->s_mb_history = kmalloc(i, GFP_KERNEL); | 2191 | sbi->s_mb_history = kzalloc(i, GFP_KERNEL); |
| 2169 | if (likely(sbi->s_mb_history != NULL)) | ||
| 2170 | memset(sbi->s_mb_history, 0, i); | ||
| 2171 | /* if we can't allocate history, then we simple won't use it */ | 2192 | /* if we can't allocate history, then we simple won't use it */ |
| 2172 | } | 2193 | } |
| 2173 | 2194 | ||
| @@ -2215,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
| 2215 | #define ext4_mb_history_init(sb) | 2236 | #define ext4_mb_history_init(sb) |
| 2216 | #endif | 2237 | #endif |
| 2217 | 2238 | ||
| 2239 | |||
| 2240 | /* Create and initialize ext4_group_info data for the given group. */ | ||
| 2241 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | ||
| 2242 | struct ext4_group_desc *desc) | ||
| 2243 | { | ||
| 2244 | int i, len; | ||
| 2245 | int metalen = 0; | ||
| 2246 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2247 | struct ext4_group_info **meta_group_info; | ||
| 2248 | |||
| 2249 | /* | ||
| 2250 | * First check if this group is the first of a reserved block. | ||
| 2251 | * If it's true, we have to allocate a new table of pointers | ||
| 2252 | * to ext4_group_info structures | ||
| 2253 | */ | ||
| 2254 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { | ||
| 2255 | metalen = sizeof(*meta_group_info) << | ||
| 2256 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
| 2257 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
| 2258 | if (meta_group_info == NULL) { | ||
| 2259 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
| 2260 | "buddy group\n"); | ||
| 2261 | goto exit_meta_group_info; | ||
| 2262 | } | ||
| 2263 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = | ||
| 2264 | meta_group_info; | ||
| 2265 | } | ||
| 2266 | |||
| 2267 | /* | ||
| 2268 | * calculate needed size. if change bb_counters size, | ||
| 2269 | * don't forget about ext4_mb_generate_buddy() | ||
| 2270 | */ | ||
| 2271 | len = offsetof(typeof(**meta_group_info), | ||
| 2272 | bb_counters[sb->s_blocksize_bits + 2]); | ||
| 2273 | |||
| 2274 | meta_group_info = | ||
| 2275 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
| 2276 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
| 2277 | |||
| 2278 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | ||
| 2279 | if (meta_group_info[i] == NULL) { | ||
| 2280 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
| 2281 | goto exit_group_info; | ||
| 2282 | } | ||
| 2283 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | ||
| 2284 | &(meta_group_info[i]->bb_state)); | ||
| 2285 | |||
| 2286 | /* | ||
| 2287 | * initialize bb_free to be able to skip | ||
| 2288 | * empty groups without initialization | ||
| 2289 | */ | ||
| 2290 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
| 2291 | meta_group_info[i]->bb_free = | ||
| 2292 | ext4_free_blocks_after_init(sb, group, desc); | ||
| 2293 | } else { | ||
| 2294 | meta_group_info[i]->bb_free = | ||
| 2295 | le16_to_cpu(desc->bg_free_blocks_count); | ||
| 2296 | } | ||
| 2297 | |||
| 2298 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | ||
| 2299 | |||
| 2300 | #ifdef DOUBLE_CHECK | ||
| 2301 | { | ||
| 2302 | struct buffer_head *bh; | ||
| 2303 | meta_group_info[i]->bb_bitmap = | ||
| 2304 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
| 2305 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); | ||
| 2306 | bh = ext4_read_block_bitmap(sb, group); | ||
| 2307 | BUG_ON(bh == NULL); | ||
| 2308 | memcpy(meta_group_info[i]->bb_bitmap, bh->b_data, | ||
| 2309 | sb->s_blocksize); | ||
| 2310 | put_bh(bh); | ||
| 2311 | } | ||
| 2312 | #endif | ||
| 2313 | |||
| 2314 | return 0; | ||
| 2315 | |||
| 2316 | exit_group_info: | ||
| 2317 | /* If a meta_group_info table has been allocated, release it now */ | ||
| 2318 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) | ||
| 2319 | kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); | ||
| 2320 | exit_meta_group_info: | ||
| 2321 | return -ENOMEM; | ||
| 2322 | } /* ext4_mb_add_groupinfo */ | ||
| 2323 | |||
| 2324 | /* | ||
| 2325 | * Add a group to the existing groups. | ||
| 2326 | * This function is used for online resize | ||
| 2327 | */ | ||
| 2328 | int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, | ||
| 2329 | struct ext4_group_desc *desc) | ||
| 2330 | { | ||
| 2331 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2332 | struct inode *inode = sbi->s_buddy_cache; | ||
| 2333 | int blocks_per_page; | ||
| 2334 | int block; | ||
| 2335 | int pnum; | ||
| 2336 | struct page *page; | ||
| 2337 | int err; | ||
| 2338 | |||
| 2339 | /* Add group based on group descriptor*/ | ||
| 2340 | err = ext4_mb_add_groupinfo(sb, group, desc); | ||
| 2341 | if (err) | ||
| 2342 | return err; | ||
| 2343 | |||
| 2344 | /* | ||
| 2345 | * Cache pages containing dynamic mb_alloc datas (buddy and bitmap | ||
| 2346 | * datas) are set not up to date so that they will be re-initilaized | ||
| 2347 | * during the next call to ext4_mb_load_buddy | ||
| 2348 | */ | ||
| 2349 | |||
| 2350 | /* Set buddy page as not up to date */ | ||
| 2351 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
| 2352 | block = group * 2; | ||
| 2353 | pnum = block / blocks_per_page; | ||
| 2354 | page = find_get_page(inode->i_mapping, pnum); | ||
| 2355 | if (page != NULL) { | ||
| 2356 | ClearPageUptodate(page); | ||
| 2357 | page_cache_release(page); | ||
| 2358 | } | ||
| 2359 | |||
| 2360 | /* Set bitmap page as not up to date */ | ||
| 2361 | block++; | ||
| 2362 | pnum = block / blocks_per_page; | ||
| 2363 | page = find_get_page(inode->i_mapping, pnum); | ||
| 2364 | if (page != NULL) { | ||
| 2365 | ClearPageUptodate(page); | ||
| 2366 | page_cache_release(page); | ||
| 2367 | } | ||
| 2368 | |||
| 2369 | return 0; | ||
| 2370 | } | ||
| 2371 | |||
| 2372 | /* | ||
| 2373 | * Update an existing group. | ||
| 2374 | * This function is used for online resize | ||
| 2375 | */ | ||
| 2376 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
| 2377 | { | ||
| 2378 | grp->bb_free += add; | ||
| 2379 | } | ||
| 2380 | |||
| 2218 | static int ext4_mb_init_backend(struct super_block *sb) | 2381 | static int ext4_mb_init_backend(struct super_block *sb) |
| 2219 | { | 2382 | { |
| 2220 | ext4_group_t i; | 2383 | ext4_group_t i; |
| 2221 | int j, len, metalen; | 2384 | int metalen; |
| 2222 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2385 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2223 | int num_meta_group_infos = | 2386 | struct ext4_super_block *es = sbi->s_es; |
| 2224 | (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2387 | int num_meta_group_infos; |
| 2225 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2388 | int num_meta_group_infos_max; |
| 2389 | int array_size; | ||
| 2226 | struct ext4_group_info **meta_group_info; | 2390 | struct ext4_group_info **meta_group_info; |
| 2391 | struct ext4_group_desc *desc; | ||
| 2392 | |||
| 2393 | /* This is the number of blocks used by GDT */ | ||
| 2394 | num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - | ||
| 2395 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | ||
| 2396 | |||
| 2397 | /* | ||
| 2398 | * This is the total number of blocks used by GDT including | ||
| 2399 | * the number of reserved blocks for GDT. | ||
| 2400 | * The s_group_info array is allocated with this value | ||
| 2401 | * to allow a clean online resize without a complex | ||
| 2402 | * manipulation of pointer. | ||
| 2403 | * The drawback is the unused memory when no resize | ||
| 2404 | * occurs but it's very low in terms of pages | ||
| 2405 | * (see comments below) | ||
| 2406 | * Need to handle this properly when META_BG resizing is allowed | ||
| 2407 | */ | ||
| 2408 | num_meta_group_infos_max = num_meta_group_infos + | ||
| 2409 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
| 2227 | 2410 | ||
| 2411 | /* | ||
| 2412 | * array_size is the size of s_group_info array. We round it | ||
| 2413 | * to the next power of two because this approximation is done | ||
| 2414 | * internally by kmalloc so we can have some more memory | ||
| 2415 | * for free here (e.g. may be used for META_BG resize). | ||
| 2416 | */ | ||
| 2417 | array_size = 1; | ||
| 2418 | while (array_size < sizeof(*sbi->s_group_info) * | ||
| 2419 | num_meta_group_infos_max) | ||
| 2420 | array_size = array_size << 1; | ||
| 2228 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2421 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
| 2229 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2422 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
| 2230 | * So a two level scheme suffices for now. */ | 2423 | * So a two level scheme suffices for now. */ |
| 2231 | sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * | 2424 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); |
| 2232 | num_meta_group_infos, GFP_KERNEL); | ||
| 2233 | if (sbi->s_group_info == NULL) { | 2425 | if (sbi->s_group_info == NULL) { |
| 2234 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2426 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
| 2235 | return -ENOMEM; | 2427 | return -ENOMEM; |
| @@ -2256,63 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
| 2256 | sbi->s_group_info[i] = meta_group_info; | 2448 | sbi->s_group_info[i] = meta_group_info; |
| 2257 | } | 2449 | } |
| 2258 | 2450 | ||
| 2259 | /* | ||
| 2260 | * calculate needed size. if change bb_counters size, | ||
| 2261 | * don't forget about ext4_mb_generate_buddy() | ||
| 2262 | */ | ||
| 2263 | len = sizeof(struct ext4_group_info); | ||
| 2264 | len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); | ||
| 2265 | for (i = 0; i < sbi->s_groups_count; i++) { | 2451 | for (i = 0; i < sbi->s_groups_count; i++) { |
| 2266 | struct ext4_group_desc *desc; | ||
| 2267 | |||
| 2268 | meta_group_info = | ||
| 2269 | sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
| 2270 | j = i & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
| 2271 | |||
| 2272 | meta_group_info[j] = kzalloc(len, GFP_KERNEL); | ||
| 2273 | if (meta_group_info[j] == NULL) { | ||
| 2274 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
| 2275 | goto err_freebuddy; | ||
| 2276 | } | ||
| 2277 | desc = ext4_get_group_desc(sb, i, NULL); | 2452 | desc = ext4_get_group_desc(sb, i, NULL); |
| 2278 | if (desc == NULL) { | 2453 | if (desc == NULL) { |
| 2279 | printk(KERN_ERR | 2454 | printk(KERN_ERR |
| 2280 | "EXT4-fs: can't read descriptor %lu\n", i); | 2455 | "EXT4-fs: can't read descriptor %lu\n", i); |
| 2281 | i++; | ||
| 2282 | goto err_freebuddy; | 2456 | goto err_freebuddy; |
| 2283 | } | 2457 | } |
| 2284 | memset(meta_group_info[j], 0, len); | 2458 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) |
| 2285 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2459 | goto err_freebuddy; |
| 2286 | &(meta_group_info[j]->bb_state)); | ||
| 2287 | |||
| 2288 | /* | ||
| 2289 | * initialize bb_free to be able to skip | ||
| 2290 | * empty groups without initialization | ||
| 2291 | */ | ||
| 2292 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
| 2293 | meta_group_info[j]->bb_free = | ||
| 2294 | ext4_free_blocks_after_init(sb, i, desc); | ||
| 2295 | } else { | ||
| 2296 | meta_group_info[j]->bb_free = | ||
| 2297 | le16_to_cpu(desc->bg_free_blocks_count); | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); | ||
| 2301 | |||
| 2302 | #ifdef DOUBLE_CHECK | ||
| 2303 | { | ||
| 2304 | struct buffer_head *bh; | ||
| 2305 | meta_group_info[j]->bb_bitmap = | ||
| 2306 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
| 2307 | BUG_ON(meta_group_info[j]->bb_bitmap == NULL); | ||
| 2308 | bh = read_block_bitmap(sb, i); | ||
| 2309 | BUG_ON(bh == NULL); | ||
| 2310 | memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, | ||
| 2311 | sb->s_blocksize); | ||
| 2312 | put_bh(bh); | ||
| 2313 | } | ||
| 2314 | #endif | ||
| 2315 | |||
| 2316 | } | 2460 | } |
| 2317 | 2461 | ||
| 2318 | return 0; | 2462 | return 0; |
| @@ -2336,6 +2480,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2336 | unsigned i; | 2480 | unsigned i; |
| 2337 | unsigned offset; | 2481 | unsigned offset; |
| 2338 | unsigned max; | 2482 | unsigned max; |
| 2483 | int ret; | ||
| 2339 | 2484 | ||
| 2340 | if (!test_opt(sb, MBALLOC)) | 2485 | if (!test_opt(sb, MBALLOC)) |
| 2341 | return 0; | 2486 | return 0; |
| @@ -2370,12 +2515,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2370 | } while (i <= sb->s_blocksize_bits + 1); | 2515 | } while (i <= sb->s_blocksize_bits + 1); |
| 2371 | 2516 | ||
| 2372 | /* init file for buddy data */ | 2517 | /* init file for buddy data */ |
| 2373 | i = ext4_mb_init_backend(sb); | 2518 | ret = ext4_mb_init_backend(sb); |
| 2374 | if (i) { | 2519 | if (ret != 0) { |
| 2375 | clear_opt(sbi->s_mount_opt, MBALLOC); | 2520 | clear_opt(sbi->s_mount_opt, MBALLOC); |
| 2376 | kfree(sbi->s_mb_offsets); | 2521 | kfree(sbi->s_mb_offsets); |
| 2377 | kfree(sbi->s_mb_maxs); | 2522 | kfree(sbi->s_mb_maxs); |
| 2378 | return i; | 2523 | return ret; |
| 2379 | } | 2524 | } |
| 2380 | 2525 | ||
| 2381 | spin_lock_init(&sbi->s_md_lock); | 2526 | spin_lock_init(&sbi->s_md_lock); |
| @@ -2548,8 +2693,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
| 2548 | ext4_lock_group(sb, md->group); | 2693 | ext4_lock_group(sb, md->group); |
| 2549 | for (i = 0; i < md->num; i++) { | 2694 | for (i = 0; i < md->num; i++) { |
| 2550 | mb_debug(" %u", md->blocks[i]); | 2695 | mb_debug(" %u", md->blocks[i]); |
| 2551 | err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2696 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); |
| 2552 | BUG_ON(err != 0); | ||
| 2553 | } | 2697 | } |
| 2554 | mb_debug("\n"); | 2698 | mb_debug("\n"); |
| 2555 | ext4_unlock_group(sb, md->group); | 2699 | ext4_unlock_group(sb, md->group); |
| @@ -2575,25 +2719,24 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
| 2575 | 2719 | ||
| 2576 | 2720 | ||
| 2577 | 2721 | ||
| 2578 | #define MB_PROC_VALUE_READ(name) \ | 2722 | #define MB_PROC_FOPS(name) \ |
| 2579 | static int ext4_mb_read_##name(char *page, char **start, \ | 2723 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ |
| 2580 | off_t off, int count, int *eof, void *data) \ | ||
| 2581 | { \ | 2724 | { \ |
| 2582 | struct ext4_sb_info *sbi = data; \ | 2725 | struct ext4_sb_info *sbi = m->private; \ |
| 2583 | int len; \ | 2726 | \ |
| 2584 | *eof = 1; \ | 2727 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ |
| 2585 | if (off != 0) \ | 2728 | return 0; \ |
| 2586 | return 0; \ | 2729 | } \ |
| 2587 | len = sprintf(page, "%ld\n", sbi->s_mb_##name); \ | 2730 | \ |
| 2588 | *start = page; \ | 2731 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ |
| 2589 | return len; \ | 2732 | { \ |
| 2590 | } | 2733 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ |
| 2591 | 2734 | } \ | |
| 2592 | #define MB_PROC_VALUE_WRITE(name) \ | 2735 | \ |
| 2593 | static int ext4_mb_write_##name(struct file *file, \ | 2736 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ |
| 2594 | const char __user *buf, unsigned long cnt, void *data) \ | 2737 | const char __user *buf, size_t cnt, loff_t *ppos) \ |
| 2595 | { \ | 2738 | { \ |
| 2596 | struct ext4_sb_info *sbi = data; \ | 2739 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ |
| 2597 | char str[32]; \ | 2740 | char str[32]; \ |
| 2598 | long value; \ | 2741 | long value; \ |
| 2599 | if (cnt >= sizeof(str)) \ | 2742 | if (cnt >= sizeof(str)) \ |
| @@ -2605,31 +2748,32 @@ static int ext4_mb_write_##name(struct file *file, \ | |||
| 2605 | return -ERANGE; \ | 2748 | return -ERANGE; \ |
| 2606 | sbi->s_mb_##name = value; \ | 2749 | sbi->s_mb_##name = value; \ |
| 2607 | return cnt; \ | 2750 | return cnt; \ |
| 2608 | } | 2751 | } \ |
| 2752 | \ | ||
| 2753 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
| 2754 | .owner = THIS_MODULE, \ | ||
| 2755 | .open = ext4_mb_##name##_proc_open, \ | ||
| 2756 | .read = seq_read, \ | ||
| 2757 | .llseek = seq_lseek, \ | ||
| 2758 | .release = single_release, \ | ||
| 2759 | .write = ext4_mb_##name##_proc_write, \ | ||
| 2760 | }; | ||
| 2609 | 2761 | ||
| 2610 | MB_PROC_VALUE_READ(stats); | 2762 | MB_PROC_FOPS(stats); |
| 2611 | MB_PROC_VALUE_WRITE(stats); | 2763 | MB_PROC_FOPS(max_to_scan); |
| 2612 | MB_PROC_VALUE_READ(max_to_scan); | 2764 | MB_PROC_FOPS(min_to_scan); |
| 2613 | MB_PROC_VALUE_WRITE(max_to_scan); | 2765 | MB_PROC_FOPS(order2_reqs); |
| 2614 | MB_PROC_VALUE_READ(min_to_scan); | 2766 | MB_PROC_FOPS(stream_request); |
| 2615 | MB_PROC_VALUE_WRITE(min_to_scan); | 2767 | MB_PROC_FOPS(group_prealloc); |
| 2616 | MB_PROC_VALUE_READ(order2_reqs); | ||
| 2617 | MB_PROC_VALUE_WRITE(order2_reqs); | ||
| 2618 | MB_PROC_VALUE_READ(stream_request); | ||
| 2619 | MB_PROC_VALUE_WRITE(stream_request); | ||
| 2620 | MB_PROC_VALUE_READ(group_prealloc); | ||
| 2621 | MB_PROC_VALUE_WRITE(group_prealloc); | ||
| 2622 | 2768 | ||
| 2623 | #define MB_PROC_HANDLER(name, var) \ | 2769 | #define MB_PROC_HANDLER(name, var) \ |
| 2624 | do { \ | 2770 | do { \ |
| 2625 | proc = create_proc_entry(name, mode, sbi->s_mb_proc); \ | 2771 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ |
| 2772 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
| 2626 | if (proc == NULL) { \ | 2773 | if (proc == NULL) { \ |
| 2627 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | 2774 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ |
| 2628 | goto err_out; \ | 2775 | goto err_out; \ |
| 2629 | } \ | 2776 | } \ |
| 2630 | proc->data = sbi; \ | ||
| 2631 | proc->read_proc = ext4_mb_read_##var ; \ | ||
| 2632 | proc->write_proc = ext4_mb_write_##var; \ | ||
| 2633 | } while (0) | 2777 | } while (0) |
| 2634 | 2778 | ||
| 2635 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2779 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
| @@ -2639,6 +2783,10 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb) | |||
| 2639 | struct proc_dir_entry *proc; | 2783 | struct proc_dir_entry *proc; |
| 2640 | char devname[64]; | 2784 | char devname[64]; |
| 2641 | 2785 | ||
| 2786 | if (proc_root_ext4 == NULL) { | ||
| 2787 | sbi->s_mb_proc = NULL; | ||
| 2788 | return -EINVAL; | ||
| 2789 | } | ||
| 2642 | bdevname(sb->s_bdev, devname); | 2790 | bdevname(sb->s_bdev, devname); |
| 2643 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | 2791 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); |
| 2644 | 2792 | ||
| @@ -2747,7 +2895,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2747 | 2895 | ||
| 2748 | 2896 | ||
| 2749 | err = -EIO; | 2897 | err = -EIO; |
| 2750 | bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); | 2898 | bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); |
| 2751 | if (!bitmap_bh) | 2899 | if (!bitmap_bh) |
| 2752 | goto out_err; | 2900 | goto out_err; |
| 2753 | 2901 | ||
| @@ -2816,7 +2964,23 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2816 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2964 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
| 2817 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2965 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
| 2818 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2966 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
| 2819 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 2967 | |
| 2968 | /* | ||
| 2969 | * free blocks account has already be reduced/reserved | ||
| 2970 | * at write_begin() time for delayed allocation | ||
| 2971 | * do not double accounting | ||
| 2972 | */ | ||
| 2973 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | ||
| 2974 | percpu_counter_sub(&sbi->s_freeblocks_counter, | ||
| 2975 | ac->ac_b_ex.fe_len); | ||
| 2976 | |||
| 2977 | if (sbi->s_log_groups_per_flex) { | ||
| 2978 | ext4_group_t flex_group = ext4_flex_group(sbi, | ||
| 2979 | ac->ac_b_ex.fe_group); | ||
| 2980 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 2981 | sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len; | ||
| 2982 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 2983 | } | ||
| 2820 | 2984 | ||
| 2821 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | 2985 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
| 2822 | if (err) | 2986 | if (err) |
| @@ -3473,8 +3637,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
| 3473 | if (bit >= end) | 3637 | if (bit >= end) |
| 3474 | break; | 3638 | break; |
| 3475 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3639 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
| 3476 | if (next > end) | ||
| 3477 | next = end; | ||
| 3478 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3640 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + |
| 3479 | le32_to_cpu(sbi->s_es->s_first_data_block); | 3641 | le32_to_cpu(sbi->s_es->s_first_data_block); |
| 3480 | mb_debug(" free preallocated %u/%u in group %u\n", | 3642 | mb_debug(" free preallocated %u/%u in group %u\n", |
| @@ -3569,7 +3731,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
| 3569 | if (list_empty(&grp->bb_prealloc_list)) | 3731 | if (list_empty(&grp->bb_prealloc_list)) |
| 3570 | return 0; | 3732 | return 0; |
| 3571 | 3733 | ||
| 3572 | bitmap_bh = read_block_bitmap(sb, group); | 3734 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
| 3573 | if (bitmap_bh == NULL) { | 3735 | if (bitmap_bh == NULL) { |
| 3574 | /* error handling here */ | 3736 | /* error handling here */ |
| 3575 | ext4_mb_release_desc(&e4b); | 3737 | ext4_mb_release_desc(&e4b); |
| @@ -3743,7 +3905,7 @@ repeat: | |||
| 3743 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3905 | err = ext4_mb_load_buddy(sb, group, &e4b); |
| 3744 | BUG_ON(err != 0); /* error handling here */ | 3906 | BUG_ON(err != 0); /* error handling here */ |
| 3745 | 3907 | ||
| 3746 | bitmap_bh = read_block_bitmap(sb, group); | 3908 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
| 3747 | if (bitmap_bh == NULL) { | 3909 | if (bitmap_bh == NULL) { |
| 3748 | /* error handling here */ | 3910 | /* error handling here */ |
| 3749 | ext4_mb_release_desc(&e4b); | 3911 | ext4_mb_release_desc(&e4b); |
| @@ -4011,10 +4173,21 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4011 | sbi = EXT4_SB(sb); | 4173 | sbi = EXT4_SB(sb); |
| 4012 | 4174 | ||
| 4013 | if (!test_opt(sb, MBALLOC)) { | 4175 | if (!test_opt(sb, MBALLOC)) { |
| 4014 | block = ext4_new_blocks_old(handle, ar->inode, ar->goal, | 4176 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, |
| 4015 | &(ar->len), errp); | 4177 | &(ar->len), errp); |
| 4016 | return block; | 4178 | return block; |
| 4017 | } | 4179 | } |
| 4180 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | ||
| 4181 | /* | ||
| 4182 | * With delalloc we already reserved the blocks | ||
| 4183 | */ | ||
| 4184 | ar->len = ext4_has_free_blocks(sbi, ar->len); | ||
| 4185 | } | ||
| 4186 | |||
| 4187 | if (ar->len == 0) { | ||
| 4188 | *errp = -ENOSPC; | ||
| 4189 | return 0; | ||
| 4190 | } | ||
| 4018 | 4191 | ||
| 4019 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4192 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
| 4020 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4193 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
| @@ -4026,10 +4199,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4026 | } | 4199 | } |
| 4027 | inquota = ar->len; | 4200 | inquota = ar->len; |
| 4028 | 4201 | ||
| 4202 | if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) | ||
| 4203 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | ||
| 4204 | |||
| 4029 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4205 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
| 4030 | if (!ac) { | 4206 | if (!ac) { |
| 4207 | ar->len = 0; | ||
| 4031 | *errp = -ENOMEM; | 4208 | *errp = -ENOMEM; |
| 4032 | return 0; | 4209 | goto out1; |
| 4033 | } | 4210 | } |
| 4034 | 4211 | ||
| 4035 | ext4_mb_poll_new_transaction(sb, handle); | 4212 | ext4_mb_poll_new_transaction(sb, handle); |
| @@ -4037,12 +4214,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4037 | *errp = ext4_mb_initialize_context(ac, ar); | 4214 | *errp = ext4_mb_initialize_context(ac, ar); |
| 4038 | if (*errp) { | 4215 | if (*errp) { |
| 4039 | ar->len = 0; | 4216 | ar->len = 0; |
| 4040 | goto out; | 4217 | goto out2; |
| 4041 | } | 4218 | } |
| 4042 | 4219 | ||
| 4043 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; | 4220 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; |
| 4044 | if (!ext4_mb_use_preallocated(ac)) { | 4221 | if (!ext4_mb_use_preallocated(ac)) { |
| 4045 | |||
| 4046 | ac->ac_op = EXT4_MB_HISTORY_ALLOC; | 4222 | ac->ac_op = EXT4_MB_HISTORY_ALLOC; |
| 4047 | ext4_mb_normalize_request(ac, ar); | 4223 | ext4_mb_normalize_request(ac, ar); |
| 4048 | repeat: | 4224 | repeat: |
| @@ -4085,11 +4261,12 @@ repeat: | |||
| 4085 | 4261 | ||
| 4086 | ext4_mb_release_context(ac); | 4262 | ext4_mb_release_context(ac); |
| 4087 | 4263 | ||
| 4088 | out: | 4264 | out2: |
| 4265 | kmem_cache_free(ext4_ac_cachep, ac); | ||
| 4266 | out1: | ||
| 4089 | if (ar->len < inquota) | 4267 | if (ar->len < inquota) |
| 4090 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); | 4268 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); |
| 4091 | 4269 | ||
| 4092 | kmem_cache_free(ext4_ac_cachep, ac); | ||
| 4093 | return block; | 4270 | return block; |
| 4094 | } | 4271 | } |
| 4095 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | 4272 | static void ext4_mb_poll_new_transaction(struct super_block *sb, |
| @@ -4242,7 +4419,7 @@ do_more: | |||
| 4242 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); | 4419 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); |
| 4243 | count -= overflow; | 4420 | count -= overflow; |
| 4244 | } | 4421 | } |
| 4245 | bitmap_bh = read_block_bitmap(sb, block_group); | 4422 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
| 4246 | if (!bitmap_bh) | 4423 | if (!bitmap_bh) |
| 4247 | goto error_return; | 4424 | goto error_return; |
| 4248 | gdp = ext4_get_group_desc(sb, block_group, &gd_bh); | 4425 | gdp = ext4_get_group_desc(sb, block_group, &gd_bh); |
| @@ -4309,10 +4486,9 @@ do_more: | |||
| 4309 | ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); | 4486 | ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); |
| 4310 | } else { | 4487 | } else { |
| 4311 | ext4_lock_group(sb, block_group); | 4488 | ext4_lock_group(sb, block_group); |
| 4312 | err = mb_free_blocks(inode, &e4b, bit, count); | 4489 | mb_free_blocks(inode, &e4b, bit, count); |
| 4313 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4490 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
| 4314 | ext4_unlock_group(sb, block_group); | 4491 | ext4_unlock_group(sb, block_group); |
| 4315 | BUG_ON(err != 0); | ||
| 4316 | } | 4492 | } |
| 4317 | 4493 | ||
| 4318 | spin_lock(sb_bgl_lock(sbi, block_group)); | 4494 | spin_lock(sb_bgl_lock(sbi, block_group)); |
| @@ -4321,6 +4497,13 @@ do_more: | |||
| 4321 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 4497 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
| 4322 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4498 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
| 4323 | 4499 | ||
| 4500 | if (sbi->s_log_groups_per_flex) { | ||
| 4501 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
| 4502 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 4503 | sbi->s_flex_groups[flex_group].free_blocks += count; | ||
| 4504 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 4505 | } | ||
| 4506 | |||
| 4324 | ext4_mb_release_desc(&e4b); | 4507 | ext4_mb_release_desc(&e4b); |
| 4325 | 4508 | ||
| 4326 | *freed += count; | 4509 | *freed += count; |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ab16beaa830d..387ad98350c3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -183,6 +183,16 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 183 | struct inode *inode); | 183 | struct inode *inode); |
| 184 | 184 | ||
| 185 | /* | 185 | /* |
| 186 | * p is at least 6 bytes before the end of page | ||
| 187 | */ | ||
| 188 | static inline struct ext4_dir_entry_2 * | ||
| 189 | ext4_next_entry(struct ext4_dir_entry_2 *p) | ||
| 190 | { | ||
| 191 | return (struct ext4_dir_entry_2 *)((char *)p + | ||
| 192 | ext4_rec_len_from_disk(p->rec_len)); | ||
| 193 | } | ||
| 194 | |||
| 195 | /* | ||
| 186 | * Future: use high four bits of block for coalesce-on-delete flags | 196 | * Future: use high four bits of block for coalesce-on-delete flags |
| 187 | * Mask them off for now. | 197 | * Mask them off for now. |
| 188 | */ | 198 | */ |
| @@ -231,13 +241,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | |||
| 231 | { | 241 | { |
| 232 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
| 233 | EXT4_DIR_REC_LEN(2) - infosize; | 243 | EXT4_DIR_REC_LEN(2) - infosize; |
| 234 | return 0? 20: entry_space / sizeof(struct dx_entry); | 244 | return entry_space / sizeof(struct dx_entry); |
| 235 | } | 245 | } |
| 236 | 246 | ||
| 237 | static inline unsigned dx_node_limit (struct inode *dir) | 247 | static inline unsigned dx_node_limit (struct inode *dir) |
| 238 | { | 248 | { |
| 239 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
| 240 | return 0? 22: entry_space / sizeof(struct dx_entry); | 250 | return entry_space / sizeof(struct dx_entry); |
| 241 | } | 251 | } |
| 242 | 252 | ||
| 243 | /* | 253 | /* |
| @@ -554,15 +564,6 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
| 554 | 564 | ||
| 555 | 565 | ||
| 556 | /* | 566 | /* |
| 557 | * p is at least 6 bytes before the end of page | ||
| 558 | */ | ||
| 559 | static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) | ||
| 560 | { | ||
| 561 | return (struct ext4_dir_entry_2 *)((char *)p + | ||
| 562 | ext4_rec_len_from_disk(p->rec_len)); | ||
| 563 | } | ||
| 564 | |||
| 565 | /* | ||
| 566 | * This function fills a red-black tree with information from a | 567 | * This function fills a red-black tree with information from a |
| 567 | * directory block. It returns the number directory entries loaded | 568 | * directory block. It returns the number directory entries loaded |
| 568 | * into the tree. If there is an error it is returned in err. | 569 | * into the tree. If there is an error it is returned in err. |
| @@ -993,19 +994,21 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
| 993 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 994 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
| 994 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - | 995 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - |
| 995 | EXT4_DIR_REC_LEN(0)); | 996 | EXT4_DIR_REC_LEN(0)); |
| 996 | for (; de < top; de = ext4_next_entry(de)) | 997 | for (; de < top; de = ext4_next_entry(de)) { |
| 997 | if (ext4_match (namelen, name, de)) { | 998 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) |
| 998 | if (!ext4_check_dir_entry("ext4_find_entry", | 999 | + ((char *) de - bh->b_data); |
| 999 | dir, de, bh, | 1000 | |
| 1000 | (block<<EXT4_BLOCK_SIZE_BITS(sb)) | 1001 | if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) { |
| 1001 | +((char *)de - bh->b_data))) { | 1002 | brelse(bh); |
| 1002 | brelse (bh); | ||
| 1003 | *err = ERR_BAD_DX_DIR; | 1003 | *err = ERR_BAD_DX_DIR; |
| 1004 | goto errout; | 1004 | goto errout; |
| 1005 | } | 1005 | } |
| 1006 | *res_dir = de; | 1006 | |
| 1007 | dx_release (frames); | 1007 | if (ext4_match(namelen, name, de)) { |
| 1008 | return bh; | 1008 | *res_dir = de; |
| 1009 | dx_release(frames); | ||
| 1010 | return bh; | ||
| 1011 | } | ||
| 1009 | } | 1012 | } |
| 1010 | brelse (bh); | 1013 | brelse (bh); |
| 1011 | /* Check to see if we should continue to search */ | 1014 | /* Check to see if we should continue to search */ |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ff7b1c04239..f000fbe2cd93 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -866,6 +866,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 866 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | 866 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); |
| 867 | 867 | ||
| 868 | /* | 868 | /* |
| 869 | * We can allocate memory for mb_alloc based on the new group | ||
| 870 | * descriptor | ||
| 871 | */ | ||
| 872 | if (test_opt(sb, MBALLOC)) { | ||
| 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | ||
| 874 | if (err) | ||
| 875 | goto exit_journal; | ||
| 876 | } | ||
| 877 | /* | ||
| 869 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
| 870 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
| 871 | * all of its blocks and inodes are already valid. | 880 | * all of its blocks and inodes are already valid. |
| @@ -957,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 957 | handle_t *handle; | 966 | handle_t *handle; |
| 958 | int err; | 967 | int err; |
| 959 | unsigned long freed_blocks; | 968 | unsigned long freed_blocks; |
| 969 | ext4_group_t group; | ||
| 970 | struct ext4_group_info *grp; | ||
| 960 | 971 | ||
| 961 | /* We don't need to worry about locking wrt other resizers just | 972 | /* We don't need to worry about locking wrt other resizers just |
| 962 | * yet: we're going to revalidate es->s_blocks_count after | 973 | * yet: we're going to revalidate es->s_blocks_count after |
| @@ -988,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 988 | } | 999 | } |
| 989 | 1000 | ||
| 990 | /* Handle the remaining blocks in the last group only. */ | 1001 | /* Handle the remaining blocks in the last group only. */ |
| 991 | ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); | 1002 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); |
| 992 | 1003 | ||
| 993 | if (last == 0) { | 1004 | if (last == 0) { |
| 994 | ext4_warning(sb, __func__, | 1005 | ext4_warning(sb, __func__, |
| @@ -1060,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 1060 | o_blocks_count + add); | 1071 | o_blocks_count + add); |
| 1061 | if ((err = ext4_journal_stop(handle))) | 1072 | if ((err = ext4_journal_stop(handle))) |
| 1062 | goto exit_put; | 1073 | goto exit_put; |
| 1074 | |||
| 1075 | /* | ||
| 1076 | * Mark mballoc pages as not up to date so that they will be updated | ||
| 1077 | * next time they are loaded by ext4_mb_load_buddy. | ||
| 1078 | */ | ||
| 1079 | if (test_opt(sb, MBALLOC)) { | ||
| 1080 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1081 | struct inode *inode = sbi->s_buddy_cache; | ||
| 1082 | int blocks_per_page; | ||
| 1083 | int block; | ||
| 1084 | int pnum; | ||
| 1085 | struct page *page; | ||
| 1086 | |||
| 1087 | /* Set buddy page as not up to date */ | ||
| 1088 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
| 1089 | block = group * 2; | ||
| 1090 | pnum = block / blocks_per_page; | ||
| 1091 | page = find_get_page(inode->i_mapping, pnum); | ||
| 1092 | if (page != NULL) { | ||
| 1093 | ClearPageUptodate(page); | ||
| 1094 | page_cache_release(page); | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | /* Set bitmap page as not up to date */ | ||
| 1098 | block++; | ||
| 1099 | pnum = block / blocks_per_page; | ||
| 1100 | page = find_get_page(inode->i_mapping, pnum); | ||
| 1101 | if (page != NULL) { | ||
| 1102 | ClearPageUptodate(page); | ||
| 1103 | page_cache_release(page); | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | /* Get the info on the last group */ | ||
| 1107 | grp = ext4_get_group_info(sb, group); | ||
| 1108 | |||
| 1109 | /* Update free blocks in group info */ | ||
| 1110 | ext4_mb_update_group_info(grp, add); | ||
| 1111 | } | ||
| 1112 | |||
| 1063 | if (test_opt(sb, DEBUG)) | 1113 | if (test_opt(sb, DEBUG)) |
| 1064 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", | 1114 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", |
| 1065 | ext4_blocks_count(es)); | 1115 | ext4_blocks_count(es)); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 02bf24343979..1cb371dcd609 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -506,6 +506,7 @@ static void ext4_put_super (struct super_block * sb) | |||
| 506 | ext4_ext_release(sb); | 506 | ext4_ext_release(sb); |
| 507 | ext4_xattr_put_super(sb); | 507 | ext4_xattr_put_super(sb); |
| 508 | jbd2_journal_destroy(sbi->s_journal); | 508 | jbd2_journal_destroy(sbi->s_journal); |
| 509 | sbi->s_journal = NULL; | ||
| 509 | if (!(sb->s_flags & MS_RDONLY)) { | 510 | if (!(sb->s_flags & MS_RDONLY)) { |
| 510 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 511 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| 511 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 512 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
| @@ -517,6 +518,7 @@ static void ext4_put_super (struct super_block * sb) | |||
| 517 | for (i = 0; i < sbi->s_gdb_count; i++) | 518 | for (i = 0; i < sbi->s_gdb_count; i++) |
| 518 | brelse(sbi->s_group_desc[i]); | 519 | brelse(sbi->s_group_desc[i]); |
| 519 | kfree(sbi->s_group_desc); | 520 | kfree(sbi->s_group_desc); |
| 521 | kfree(sbi->s_flex_groups); | ||
| 520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 522 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
| 521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 523 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 524 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| @@ -571,6 +573,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 573 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
| 572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 574 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
| 573 | spin_lock_init(&ei->i_prealloc_lock); | 575 | spin_lock_init(&ei->i_prealloc_lock); |
| 576 | jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); | ||
| 577 | ei->i_reserved_data_blocks = 0; | ||
| 578 | ei->i_reserved_meta_blocks = 0; | ||
| 579 | ei->i_allocated_meta_blocks = 0; | ||
| 580 | ei->i_delalloc_reserved_flag = 0; | ||
| 581 | spin_lock_init(&(ei->i_block_reservation_lock)); | ||
| 574 | return &ei->vfs_inode; | 582 | return &ei->vfs_inode; |
| 575 | } | 583 | } |
| 576 | 584 | ||
| @@ -635,6 +643,8 @@ static void ext4_clear_inode(struct inode *inode) | |||
| 635 | EXT4_I(inode)->i_block_alloc_info = NULL; | 643 | EXT4_I(inode)->i_block_alloc_info = NULL; |
| 636 | if (unlikely(rsv)) | 644 | if (unlikely(rsv)) |
| 637 | kfree(rsv); | 645 | kfree(rsv); |
| 646 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | ||
| 647 | &EXT4_I(inode)->jinode); | ||
| 638 | } | 648 | } |
| 639 | 649 | ||
| 640 | static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) | 650 | static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) |
| @@ -671,7 +681,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 671 | unsigned long def_mount_opts; | 681 | unsigned long def_mount_opts; |
| 672 | struct super_block *sb = vfs->mnt_sb; | 682 | struct super_block *sb = vfs->mnt_sb; |
| 673 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 683 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 674 | journal_t *journal = sbi->s_journal; | ||
| 675 | struct ext4_super_block *es = sbi->s_es; | 684 | struct ext4_super_block *es = sbi->s_es; |
| 676 | 685 | ||
| 677 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 686 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
| @@ -747,6 +756,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 747 | seq_puts(seq, ",nomballoc"); | 756 | seq_puts(seq, ",nomballoc"); |
| 748 | if (test_opt(sb, I_VERSION)) | 757 | if (test_opt(sb, I_VERSION)) |
| 749 | seq_puts(seq, ",i_version"); | 758 | seq_puts(seq, ",i_version"); |
| 759 | if (!test_opt(sb, DELALLOC)) | ||
| 760 | seq_puts(seq, ",nodelalloc"); | ||
| 761 | |||
| 750 | 762 | ||
| 751 | if (sbi->s_stripe) | 763 | if (sbi->s_stripe) |
| 752 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 764 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
| @@ -894,7 +906,7 @@ enum { | |||
| 894 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 906 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
| 895 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 907 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
| 896 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 908 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
| 897 | Opt_mballoc, Opt_nomballoc, Opt_stripe, | 909 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
| 898 | }; | 910 | }; |
| 899 | 911 | ||
| 900 | static match_table_t tokens = { | 912 | static match_table_t tokens = { |
| @@ -953,6 +965,8 @@ static match_table_t tokens = { | |||
| 953 | {Opt_nomballoc, "nomballoc"}, | 965 | {Opt_nomballoc, "nomballoc"}, |
| 954 | {Opt_stripe, "stripe=%u"}, | 966 | {Opt_stripe, "stripe=%u"}, |
| 955 | {Opt_resize, "resize"}, | 967 | {Opt_resize, "resize"}, |
| 968 | {Opt_delalloc, "delalloc"}, | ||
| 969 | {Opt_nodelalloc, "nodelalloc"}, | ||
| 956 | {Opt_err, NULL}, | 970 | {Opt_err, NULL}, |
| 957 | }; | 971 | }; |
| 958 | 972 | ||
| @@ -990,6 +1004,7 @@ static int parse_options (char *options, struct super_block *sb, | |||
| 990 | int qtype, qfmt; | 1004 | int qtype, qfmt; |
| 991 | char *qname; | 1005 | char *qname; |
| 992 | #endif | 1006 | #endif |
| 1007 | ext4_fsblk_t last_block; | ||
| 993 | 1008 | ||
| 994 | if (!options) | 1009 | if (!options) |
| 995 | return 1; | 1010 | return 1; |
| @@ -1309,15 +1324,39 @@ set_qf_format: | |||
| 1309 | clear_opt(sbi->s_mount_opt, NOBH); | 1324 | clear_opt(sbi->s_mount_opt, NOBH); |
| 1310 | break; | 1325 | break; |
| 1311 | case Opt_extents: | 1326 | case Opt_extents: |
| 1327 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
| 1328 | EXT4_FEATURE_INCOMPAT_EXTENTS)) { | ||
| 1329 | ext4_warning(sb, __func__, | ||
| 1330 | "extents feature not enabled " | ||
| 1331 | "on this filesystem, use tune2fs\n"); | ||
| 1332 | return 0; | ||
| 1333 | } | ||
| 1312 | set_opt (sbi->s_mount_opt, EXTENTS); | 1334 | set_opt (sbi->s_mount_opt, EXTENTS); |
| 1313 | break; | 1335 | break; |
| 1314 | case Opt_noextents: | 1336 | case Opt_noextents: |
| 1337 | /* | ||
| 1338 | * When e2fsprogs support resizing an already existing | ||
| 1339 | * ext3 file system to greater than 2**32 we need to | ||
| 1340 | * add support to block allocator to handle growing | ||
| 1341 | * already existing block mapped inode so that blocks | ||
| 1342 | * allocated for them fall within 2**32 | ||
| 1343 | */ | ||
| 1344 | last_block = ext4_blocks_count(sbi->s_es) - 1; | ||
| 1345 | if (last_block > 0xffffffffULL) { | ||
| 1346 | printk(KERN_ERR "EXT4-fs: Filesystem too " | ||
| 1347 | "large to mount with " | ||
| 1348 | "-o noextents options\n"); | ||
| 1349 | return 0; | ||
| 1350 | } | ||
| 1315 | clear_opt (sbi->s_mount_opt, EXTENTS); | 1351 | clear_opt (sbi->s_mount_opt, EXTENTS); |
| 1316 | break; | 1352 | break; |
| 1317 | case Opt_i_version: | 1353 | case Opt_i_version: |
| 1318 | set_opt(sbi->s_mount_opt, I_VERSION); | 1354 | set_opt(sbi->s_mount_opt, I_VERSION); |
| 1319 | sb->s_flags |= MS_I_VERSION; | 1355 | sb->s_flags |= MS_I_VERSION; |
| 1320 | break; | 1356 | break; |
| 1357 | case Opt_nodelalloc: | ||
| 1358 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
| 1359 | break; | ||
| 1321 | case Opt_mballoc: | 1360 | case Opt_mballoc: |
| 1322 | set_opt(sbi->s_mount_opt, MBALLOC); | 1361 | set_opt(sbi->s_mount_opt, MBALLOC); |
| 1323 | break; | 1362 | break; |
| @@ -1331,6 +1370,9 @@ set_qf_format: | |||
| 1331 | return 0; | 1370 | return 0; |
| 1332 | sbi->s_stripe = option; | 1371 | sbi->s_stripe = option; |
| 1333 | break; | 1372 | break; |
| 1373 | case Opt_delalloc: | ||
| 1374 | set_opt(sbi->s_mount_opt, DELALLOC); | ||
| 1375 | break; | ||
| 1334 | default: | 1376 | default: |
| 1335 | printk (KERN_ERR | 1377 | printk (KERN_ERR |
| 1336 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1378 | "EXT4-fs: Unrecognized mount option \"%s\" " |
| @@ -1443,6 +1485,54 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
| 1443 | return res; | 1485 | return res; |
| 1444 | } | 1486 | } |
| 1445 | 1487 | ||
| 1488 | static int ext4_fill_flex_info(struct super_block *sb) | ||
| 1489 | { | ||
| 1490 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1491 | struct ext4_group_desc *gdp = NULL; | ||
| 1492 | struct buffer_head *bh; | ||
| 1493 | ext4_group_t flex_group_count; | ||
| 1494 | ext4_group_t flex_group; | ||
| 1495 | int groups_per_flex = 0; | ||
| 1496 | __u64 block_bitmap = 0; | ||
| 1497 | int i; | ||
| 1498 | |||
| 1499 | if (!sbi->s_es->s_log_groups_per_flex) { | ||
| 1500 | sbi->s_log_groups_per_flex = 0; | ||
| 1501 | return 1; | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | ||
| 1505 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | ||
| 1506 | |||
| 1507 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | ||
| 1508 | groups_per_flex; | ||
| 1509 | sbi->s_flex_groups = kmalloc(flex_group_count * | ||
| 1510 | sizeof(struct flex_groups), GFP_KERNEL); | ||
| 1511 | if (sbi->s_flex_groups == NULL) { | ||
| 1512 | printk(KERN_ERR "EXT4-fs: not enough memory\n"); | ||
| 1513 | goto failed; | ||
| 1514 | } | ||
| 1515 | memset(sbi->s_flex_groups, 0, flex_group_count * | ||
| 1516 | sizeof(struct flex_groups)); | ||
| 1517 | |||
| 1518 | gdp = ext4_get_group_desc(sb, 1, &bh); | ||
| 1519 | block_bitmap = ext4_block_bitmap(sb, gdp) - 1; | ||
| 1520 | |||
| 1521 | for (i = 0; i < sbi->s_groups_count; i++) { | ||
| 1522 | gdp = ext4_get_group_desc(sb, i, &bh); | ||
| 1523 | |||
| 1524 | flex_group = ext4_flex_group(sbi, i); | ||
| 1525 | sbi->s_flex_groups[flex_group].free_inodes += | ||
| 1526 | le16_to_cpu(gdp->bg_free_inodes_count); | ||
| 1527 | sbi->s_flex_groups[flex_group].free_blocks += | ||
| 1528 | le16_to_cpu(gdp->bg_free_blocks_count); | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | return 1; | ||
| 1532 | failed: | ||
| 1533 | return 0; | ||
| 1534 | } | ||
| 1535 | |||
| 1446 | __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | 1536 | __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, |
| 1447 | struct ext4_group_desc *gdp) | 1537 | struct ext4_group_desc *gdp) |
| 1448 | { | 1538 | { |
| @@ -1810,8 +1900,8 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | |||
| 1810 | } | 1900 | } |
| 1811 | 1901 | ||
| 1812 | static int ext4_fill_super (struct super_block *sb, void *data, int silent) | 1902 | static int ext4_fill_super (struct super_block *sb, void *data, int silent) |
| 1813 | __releases(kernel_sem) | 1903 | __releases(kernel_lock) |
| 1814 | __acquires(kernel_sem) | 1904 | __acquires(kernel_lock) |
| 1815 | 1905 | ||
| 1816 | { | 1906 | { |
| 1817 | struct buffer_head * bh; | 1907 | struct buffer_head * bh; |
| @@ -1851,11 +1941,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 1851 | goto out_fail; | 1941 | goto out_fail; |
| 1852 | } | 1942 | } |
| 1853 | 1943 | ||
| 1854 | if (!sb_set_blocksize(sb, blocksize)) { | ||
| 1855 | printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize); | ||
| 1856 | goto out_fail; | ||
| 1857 | } | ||
| 1858 | |||
| 1859 | /* | 1944 | /* |
| 1860 | * The ext4 superblock will not be buffer aligned for other than 1kB | 1945 | * The ext4 superblock will not be buffer aligned for other than 1kB |
| 1861 | * block sizes. We need to calculate the offset from buffer start. | 1946 | * block sizes. We need to calculate the offset from buffer start. |
| @@ -1919,15 +2004,28 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 1919 | 2004 | ||
| 1920 | /* | 2005 | /* |
| 1921 | * turn on extents feature by default in ext4 filesystem | 2006 | * turn on extents feature by default in ext4 filesystem |
| 1922 | * User -o noextents to turn it off | 2007 | * only if feature flag already set by mkfs or tune2fs. |
| 2008 | * Use -o noextents to turn it off | ||
| 1923 | */ | 2009 | */ |
| 1924 | set_opt(sbi->s_mount_opt, EXTENTS); | 2010 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) |
| 2011 | set_opt(sbi->s_mount_opt, EXTENTS); | ||
| 2012 | else | ||
| 2013 | ext4_warning(sb, __func__, | ||
| 2014 | "extents feature not enabled on this filesystem, " | ||
| 2015 | "use tune2fs.\n"); | ||
| 1925 | /* | 2016 | /* |
| 1926 | * turn on mballoc feature by default in ext4 filesystem | 2017 | * turn on mballoc code by default in ext4 filesystem |
| 1927 | * User -o nomballoc to turn it off | 2018 | * Use -o nomballoc to turn it off |
| 1928 | */ | 2019 | */ |
| 1929 | set_opt(sbi->s_mount_opt, MBALLOC); | 2020 | set_opt(sbi->s_mount_opt, MBALLOC); |
| 1930 | 2021 | ||
| 2022 | /* | ||
| 2023 | * enable delayed allocation by default | ||
| 2024 | * Use -o nodelalloc to turn it off | ||
| 2025 | */ | ||
| 2026 | set_opt(sbi->s_mount_opt, DELALLOC); | ||
| 2027 | |||
| 2028 | |||
| 1931 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, | 2029 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, |
| 1932 | NULL, 0)) | 2030 | NULL, 0)) |
| 1933 | goto failed_mount; | 2031 | goto failed_mount; |
| @@ -2138,6 +2236,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 2138 | printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); | 2236 | printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); |
| 2139 | goto failed_mount2; | 2237 | goto failed_mount2; |
| 2140 | } | 2238 | } |
| 2239 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | ||
| 2240 | if (!ext4_fill_flex_info(sb)) { | ||
| 2241 | printk(KERN_ERR | ||
| 2242 | "EXT4-fs: unable to initialize " | ||
| 2243 | "flex_bg meta info!\n"); | ||
| 2244 | goto failed_mount2; | ||
| 2245 | } | ||
| 2246 | |||
| 2141 | sbi->s_gdb_count = db_count; | 2247 | sbi->s_gdb_count = db_count; |
| 2142 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 2248 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
| 2143 | spin_lock_init(&sbi->s_next_gen_lock); | 2249 | spin_lock_init(&sbi->s_next_gen_lock); |
| @@ -2358,6 +2464,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 2358 | test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": | 2464 | test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": |
| 2359 | "writeback"); | 2465 | "writeback"); |
| 2360 | 2466 | ||
| 2467 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
| 2468 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
| 2469 | "requested data journaling mode\n"); | ||
| 2470 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
| 2471 | } else if (test_opt(sb, DELALLOC)) | ||
| 2472 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
| 2473 | |||
| 2361 | ext4_ext_init(sb); | 2474 | ext4_ext_init(sb); |
| 2362 | ext4_mb_init(sb, needs_recovery); | 2475 | ext4_mb_init(sb, needs_recovery); |
| 2363 | 2476 | ||
| @@ -2372,6 +2485,7 @@ cantfind_ext4: | |||
| 2372 | 2485 | ||
| 2373 | failed_mount4: | 2486 | failed_mount4: |
| 2374 | jbd2_journal_destroy(sbi->s_journal); | 2487 | jbd2_journal_destroy(sbi->s_journal); |
| 2488 | sbi->s_journal = NULL; | ||
| 2375 | failed_mount3: | 2489 | failed_mount3: |
| 2376 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2490 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
| 2377 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2491 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| @@ -3325,7 +3439,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 3325 | err = ext4_journal_dirty_metadata(handle, bh); | 3439 | err = ext4_journal_dirty_metadata(handle, bh); |
| 3326 | else { | 3440 | else { |
| 3327 | /* Always do at least ordered writes for quotas */ | 3441 | /* Always do at least ordered writes for quotas */ |
| 3328 | err = ext4_journal_dirty_data(handle, bh); | 3442 | err = ext4_jbd2_file_inode(handle, inode); |
| 3329 | mark_buffer_dirty(bh); | 3443 | mark_buffer_dirty(bh); |
| 3330 | } | 3444 | } |
| 3331 | brelse(bh); | 3445 | brelse(bh); |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index ff08633f398e..93c5fdcdad2e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -810,7 +810,7 @@ inserted: | |||
| 810 | /* We need to allocate a new block */ | 810 | /* We need to allocate a new block */ |
| 811 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, | 811 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, |
| 812 | EXT4_I(inode)->i_block_group); | 812 | EXT4_I(inode)->i_block_group); |
| 813 | ext4_fsblk_t block = ext4_new_block(handle, inode, | 813 | ext4_fsblk_t block = ext4_new_meta_block(handle, inode, |
| 814 | goal, &error); | 814 | goal, &error); |
| 815 | if (error) | 815 | if (error) |
| 816 | goto cleanup; | 816 | goto cleanup; |
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index fff33382cadc..ac1a52cf2a37 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c | |||
| @@ -13,13 +13,11 @@ | |||
| 13 | #include "ext4.h" | 13 | #include "ext4.h" |
| 14 | #include "xattr.h" | 14 | #include "xattr.h" |
| 15 | 15 | ||
| 16 | #define XATTR_TRUSTED_PREFIX "trusted." | ||
| 17 | |||
| 18 | static size_t | 16 | static size_t |
| 19 | ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, | 17 | ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, |
| 20 | const char *name, size_t name_len) | 18 | const char *name, size_t name_len) |
| 21 | { | 19 | { |
| 22 | const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; | 20 | const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; |
| 23 | const size_t total_len = prefix_len + name_len + 1; | 21 | const size_t total_len = prefix_len + name_len + 1; |
| 24 | 22 | ||
| 25 | if (!capable(CAP_SYS_ADMIN)) | 23 | if (!capable(CAP_SYS_ADMIN)) |
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 67be723fcc4e..d91aa61b42aa 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c | |||
| @@ -12,13 +12,11 @@ | |||
| 12 | #include "ext4.h" | 12 | #include "ext4.h" |
| 13 | #include "xattr.h" | 13 | #include "xattr.h" |
| 14 | 14 | ||
| 15 | #define XATTR_USER_PREFIX "user." | ||
| 16 | |||
| 17 | static size_t | 15 | static size_t |
| 18 | ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, | 16 | ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, |
| 19 | const char *name, size_t name_len) | 17 | const char *name, size_t name_len) |
| 20 | { | 18 | { |
| 21 | const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; | 19 | const size_t prefix_len = XATTR_USER_PREFIX_LEN; |
| 22 | const size_t total_len = prefix_len + name_len + 1; | 20 | const size_t total_len = prefix_len + name_len + 1; |
| 23 | 21 | ||
| 24 | if (!test_opt(inode->i_sb, XATTR_USER)) | 22 | if (!test_opt(inode->i_sb, XATTR_USER)) |
diff --git a/fs/fat/cache.c b/fs/fat/cache.c index fda25479af26..3a9ecac8d61f 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c | |||
| @@ -61,7 +61,7 @@ void fat_cache_destroy(void) | |||
| 61 | 61 | ||
| 62 | static inline struct fat_cache *fat_cache_alloc(struct inode *inode) | 62 | static inline struct fat_cache *fat_cache_alloc(struct inode *inode) |
| 63 | { | 63 | { |
| 64 | return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL); | 64 | return kmem_cache_alloc(fat_cache_cachep, GFP_NOFS); |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | static inline void fat_cache_free(struct fat_cache *cache) | 67 | static inline void fat_cache_free(struct fat_cache *cache) |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 486725ee99ae..34541d06e626 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
| @@ -472,7 +472,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
| 472 | loff_t cpos; | 472 | loff_t cpos; |
| 473 | int ret = 0; | 473 | int ret = 0; |
| 474 | 474 | ||
| 475 | lock_kernel(); | 475 | lock_super(sb); |
| 476 | 476 | ||
| 477 | cpos = filp->f_pos; | 477 | cpos = filp->f_pos; |
| 478 | /* Fake . and .. for the root directory. */ | 478 | /* Fake . and .. for the root directory. */ |
| @@ -654,7 +654,7 @@ FillFailed: | |||
| 654 | if (unicode) | 654 | if (unicode) |
| 655 | __putname(unicode); | 655 | __putname(unicode); |
| 656 | out: | 656 | out: |
| 657 | unlock_kernel(); | 657 | unlock_super(sb); |
| 658 | return ret; | 658 | return ret; |
| 659 | } | 659 | } |
| 660 | 660 | ||
diff --git a/fs/fat/file.c b/fs/fat/file.c index 771326b8047e..c672df4036e9 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/mount.h> | 11 | #include <linux/mount.h> |
| 12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
| 13 | #include <linux/msdos_fs.h> | 13 | #include <linux/msdos_fs.h> |
| 14 | #include <linux/smp_lock.h> | ||
| 15 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
| 16 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
| 17 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
| @@ -242,9 +241,7 @@ void fat_truncate(struct inode *inode) | |||
| 242 | 241 | ||
| 243 | nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; | 242 | nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; |
| 244 | 243 | ||
| 245 | lock_kernel(); | ||
| 246 | fat_free(inode, nr_clusters); | 244 | fat_free(inode, nr_clusters); |
| 247 | unlock_kernel(); | ||
| 248 | fat_flush_inodes(inode->i_sb, inode, NULL); | 245 | fat_flush_inodes(inode->i_sb, inode, NULL); |
| 249 | } | 246 | } |
| 250 | 247 | ||
| @@ -310,8 +307,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 310 | int error = 0; | 307 | int error = 0; |
| 311 | unsigned int ia_valid; | 308 | unsigned int ia_valid; |
| 312 | 309 | ||
| 313 | lock_kernel(); | ||
| 314 | |||
| 315 | /* | 310 | /* |
| 316 | * Expand the file. Since inode_setattr() updates ->i_size | 311 | * Expand the file. Since inode_setattr() updates ->i_size |
| 317 | * before calling the ->truncate(), but FAT needs to fill the | 312 | * before calling the ->truncate(), but FAT needs to fill the |
| @@ -366,7 +361,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 366 | 361 | ||
| 367 | error = inode_setattr(inode, attr); | 362 | error = inode_setattr(inode, attr); |
| 368 | out: | 363 | out: |
| 369 | unlock_kernel(); | ||
| 370 | return error; | 364 | return error; |
| 371 | } | 365 | } |
| 372 | EXPORT_SYMBOL_GPL(fat_setattr); | 366 | EXPORT_SYMBOL_GPL(fat_setattr); |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4e0a3dd9d677..46a4508ffd2e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -440,14 +440,13 @@ static void fat_delete_inode(struct inode *inode) | |||
| 440 | 440 | ||
| 441 | static void fat_clear_inode(struct inode *inode) | 441 | static void fat_clear_inode(struct inode *inode) |
| 442 | { | 442 | { |
| 443 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 443 | struct super_block *sb = inode->i_sb; |
| 444 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
| 444 | 445 | ||
| 445 | lock_kernel(); | ||
| 446 | spin_lock(&sbi->inode_hash_lock); | 446 | spin_lock(&sbi->inode_hash_lock); |
| 447 | fat_cache_inval_inode(inode); | 447 | fat_cache_inval_inode(inode); |
| 448 | hlist_del_init(&MSDOS_I(inode)->i_fat_hash); | 448 | hlist_del_init(&MSDOS_I(inode)->i_fat_hash); |
| 449 | spin_unlock(&sbi->inode_hash_lock); | 449 | spin_unlock(&sbi->inode_hash_lock); |
| 450 | unlock_kernel(); | ||
| 451 | } | 450 | } |
| 452 | 451 | ||
| 453 | static void fat_write_super(struct super_block *sb) | 452 | static void fat_write_super(struct super_block *sb) |
| @@ -485,7 +484,7 @@ static struct kmem_cache *fat_inode_cachep; | |||
| 485 | static struct inode *fat_alloc_inode(struct super_block *sb) | 484 | static struct inode *fat_alloc_inode(struct super_block *sb) |
| 486 | { | 485 | { |
| 487 | struct msdos_inode_info *ei; | 486 | struct msdos_inode_info *ei; |
| 488 | ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL); | 487 | ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS); |
| 489 | if (!ei) | 488 | if (!ei) |
| 490 | return NULL; | 489 | return NULL; |
| 491 | return &ei->vfs_inode; | 490 | return &ei->vfs_inode; |
| @@ -567,7 +566,7 @@ retry: | |||
| 567 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) | 566 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) |
| 568 | return 0; | 567 | return 0; |
| 569 | 568 | ||
| 570 | lock_kernel(); | 569 | lock_super(sb); |
| 571 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 570 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
| 572 | if (!bh) { | 571 | if (!bh) { |
| 573 | printk(KERN_ERR "FAT: unable to read inode block " | 572 | printk(KERN_ERR "FAT: unable to read inode block " |
| @@ -579,7 +578,7 @@ retry: | |||
| 579 | if (i_pos != MSDOS_I(inode)->i_pos) { | 578 | if (i_pos != MSDOS_I(inode)->i_pos) { |
| 580 | spin_unlock(&sbi->inode_hash_lock); | 579 | spin_unlock(&sbi->inode_hash_lock); |
| 581 | brelse(bh); | 580 | brelse(bh); |
| 582 | unlock_kernel(); | 581 | unlock_super(sb); |
| 583 | goto retry; | 582 | goto retry; |
| 584 | } | 583 | } |
| 585 | 584 | ||
| @@ -606,7 +605,7 @@ retry: | |||
| 606 | err = sync_dirty_buffer(bh); | 605 | err = sync_dirty_buffer(bh); |
| 607 | brelse(bh); | 606 | brelse(bh); |
| 608 | out: | 607 | out: |
| 609 | unlock_kernel(); | 608 | unlock_super(sb); |
| 610 | return err; | 609 | return err; |
| 611 | } | 610 | } |
| 612 | 611 | ||
| @@ -736,6 +735,7 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) | |||
| 736 | 735 | ||
| 737 | static struct dentry *fat_get_parent(struct dentry *child) | 736 | static struct dentry *fat_get_parent(struct dentry *child) |
| 738 | { | 737 | { |
| 738 | struct super_block *sb = child->d_sb; | ||
| 739 | struct buffer_head *bh; | 739 | struct buffer_head *bh; |
| 740 | struct msdos_dir_entry *de; | 740 | struct msdos_dir_entry *de; |
| 741 | loff_t i_pos; | 741 | loff_t i_pos; |
| @@ -743,14 +743,14 @@ static struct dentry *fat_get_parent(struct dentry *child) | |||
| 743 | struct inode *inode; | 743 | struct inode *inode; |
| 744 | int err; | 744 | int err; |
| 745 | 745 | ||
| 746 | lock_kernel(); | 746 | lock_super(sb); |
| 747 | 747 | ||
| 748 | err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); | 748 | err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); |
| 749 | if (err) { | 749 | if (err) { |
| 750 | parent = ERR_PTR(err); | 750 | parent = ERR_PTR(err); |
| 751 | goto out; | 751 | goto out; |
| 752 | } | 752 | } |
| 753 | inode = fat_build_inode(child->d_sb, de, i_pos); | 753 | inode = fat_build_inode(sb, de, i_pos); |
| 754 | brelse(bh); | 754 | brelse(bh); |
| 755 | if (IS_ERR(inode)) { | 755 | if (IS_ERR(inode)) { |
| 756 | parent = ERR_CAST(inode); | 756 | parent = ERR_CAST(inode); |
| @@ -762,7 +762,7 @@ static struct dentry *fat_get_parent(struct dentry *child) | |||
| 762 | parent = ERR_PTR(-ENOMEM); | 762 | parent = ERR_PTR(-ENOMEM); |
| 763 | } | 763 | } |
| 764 | out: | 764 | out: |
| 765 | unlock_kernel(); | 765 | unlock_super(sb); |
| 766 | 766 | ||
| 767 | return parent; | 767 | return parent; |
| 768 | } | 768 | } |
| @@ -1172,6 +1172,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1172 | long error; | 1172 | long error; |
| 1173 | char buf[50]; | 1173 | char buf[50]; |
| 1174 | 1174 | ||
| 1175 | /* | ||
| 1176 | * GFP_KERNEL is ok here, because while we do hold the | ||
| 1177 | * supeblock lock, memory pressure can't call back into | ||
| 1178 | * the filesystem, since we're only just about to mount | ||
| 1179 | * it and have no inodes etc active! | ||
| 1180 | */ | ||
| 1175 | sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); | 1181 | sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); |
| 1176 | if (!sbi) | 1182 | if (!sbi) |
| 1177 | return -ENOMEM; | 1183 | return -ENOMEM; |
diff --git a/fs/fcntl.c b/fs/fcntl.c index bfd776509a72..330a7d782591 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/fdtable.h> | 12 | #include <linux/fdtable.h> |
| 13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
| 14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
| 15 | #include <linux/smp_lock.h> | ||
| 16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 17 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 18 | #include <linux/security.h> | 17 | #include <linux/security.h> |
| @@ -227,7 +226,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) | |||
| 227 | if (error) | 226 | if (error) |
| 228 | return error; | 227 | return error; |
| 229 | 228 | ||
| 230 | lock_kernel(); | ||
| 231 | if ((arg ^ filp->f_flags) & FASYNC) { | 229 | if ((arg ^ filp->f_flags) & FASYNC) { |
| 232 | if (filp->f_op && filp->f_op->fasync) { | 230 | if (filp->f_op && filp->f_op->fasync) { |
| 233 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); | 231 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); |
| @@ -238,7 +236,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) | |||
| 238 | 236 | ||
| 239 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); | 237 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); |
| 240 | out: | 238 | out: |
| 241 | unlock_kernel(); | ||
| 242 | return error; | 239 | return error; |
| 243 | } | 240 | } |
| 244 | 241 | ||
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 7f7947e3dfbb..ab2f57e3fb87 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
| @@ -14,23 +14,11 @@ config GFS2_FS | |||
| 14 | GFS is perfect consistency -- changes made to the filesystem on one | 14 | GFS is perfect consistency -- changes made to the filesystem on one |
| 15 | machine show up immediately on all other machines in the cluster. | 15 | machine show up immediately on all other machines in the cluster. |
| 16 | 16 | ||
| 17 | To use the GFS2 filesystem, you will need to enable one or more of | 17 | To use the GFS2 filesystem in a cluster, you will need to enable |
| 18 | the below locking modules. Documentation and utilities for GFS2 can | 18 | the locking module below. Documentation and utilities for GFS2 can |
| 19 | be found here: http://sources.redhat.com/cluster | 19 | be found here: http://sources.redhat.com/cluster |
| 20 | 20 | ||
| 21 | config GFS2_FS_LOCKING_NOLOCK | 21 | The "nolock" lock module is now built in to GFS2 by default. |
| 22 | tristate "GFS2 \"nolock\" locking module" | ||
| 23 | depends on GFS2_FS | ||
| 24 | help | ||
| 25 | Single node locking module for GFS2. | ||
| 26 | |||
| 27 | Use this module if you want to use GFS2 on a single node without | ||
| 28 | its clustering features. You can still take advantage of the | ||
| 29 | large file support, and upgrade to running a full cluster later on | ||
| 30 | if required. | ||
| 31 | |||
| 32 | If you will only be using GFS2 in cluster mode, you do not need this | ||
| 33 | module. | ||
| 34 | 22 | ||
| 35 | config GFS2_FS_LOCKING_DLM | 23 | config GFS2_FS_LOCKING_DLM |
| 36 | tristate "GFS2 DLM locking module" | 24 | tristate "GFS2 DLM locking module" |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index e2350df02a07..ec65851ec80a 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
| @@ -5,6 +5,5 @@ gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | |||
| 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ | 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ |
| 6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 6 | recovery.o rgrp.o super.o sys.o trans.o util.o |
| 7 | 7 | ||
| 8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ | ||
| 9 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ | 8 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ |
| 10 | 9 | ||
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h index 3bb11c0f8b56..ef606e3a5cf4 100644 --- a/fs/gfs2/gfs2.h +++ b/fs/gfs2/gfs2.h | |||
| @@ -16,11 +16,6 @@ enum { | |||
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | enum { | 18 | enum { |
| 19 | NO_WAIT = 0, | ||
| 20 | WAIT = 1, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum { | ||
| 24 | NO_FORCE = 0, | 19 | NO_FORCE = 0, |
| 25 | FORCE = 1, | 20 | FORCE = 1, |
| 26 | }; | 21 | }; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d636b3e80f5d..13391e546616 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -45,21 +45,19 @@ struct gfs2_gl_hash_bucket { | |||
| 45 | struct hlist_head hb_list; | 45 | struct hlist_head hb_list; |
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | struct glock_iter { | 48 | struct gfs2_glock_iter { |
| 49 | int hash; /* hash bucket index */ | 49 | int hash; /* hash bucket index */ |
| 50 | struct gfs2_sbd *sdp; /* incore superblock */ | 50 | struct gfs2_sbd *sdp; /* incore superblock */ |
| 51 | struct gfs2_glock *gl; /* current glock struct */ | 51 | struct gfs2_glock *gl; /* current glock struct */ |
| 52 | struct seq_file *seq; /* sequence file for debugfs */ | 52 | char string[512]; /* scratch space */ |
| 53 | char string[512]; /* scratch space */ | ||
| 54 | }; | 53 | }; |
| 55 | 54 | ||
| 56 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | 55 | typedef void (*glock_examiner) (struct gfs2_glock * gl); |
| 57 | 56 | ||
| 58 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | 57 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); |
| 59 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); | 58 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); |
| 60 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); | 59 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) |
| 61 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | 60 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); |
| 62 | static void run_queue(struct gfs2_glock *gl); | ||
| 63 | 61 | ||
| 64 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | 62 | static DECLARE_RWSEM(gfs2_umount_flush_sem); |
| 65 | static struct dentry *gfs2_root; | 63 | static struct dentry *gfs2_root; |
| @@ -123,33 +121,6 @@ static inline rwlock_t *gl_lock_addr(unsigned int x) | |||
| 123 | #endif | 121 | #endif |
| 124 | 122 | ||
| 125 | /** | 123 | /** |
| 126 | * relaxed_state_ok - is a requested lock compatible with the current lock mode? | ||
| 127 | * @actual: the current state of the lock | ||
| 128 | * @requested: the lock state that was requested by the caller | ||
| 129 | * @flags: the modifier flags passed in by the caller | ||
| 130 | * | ||
| 131 | * Returns: 1 if the locks are compatible, 0 otherwise | ||
| 132 | */ | ||
| 133 | |||
| 134 | static inline int relaxed_state_ok(unsigned int actual, unsigned requested, | ||
| 135 | int flags) | ||
| 136 | { | ||
| 137 | if (actual == requested) | ||
| 138 | return 1; | ||
| 139 | |||
| 140 | if (flags & GL_EXACT) | ||
| 141 | return 0; | ||
| 142 | |||
| 143 | if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) | ||
| 144 | return 1; | ||
| 145 | |||
| 146 | if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) | ||
| 147 | return 1; | ||
| 148 | |||
| 149 | return 0; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * gl_hash() - Turn glock number into hash bucket number | 124 | * gl_hash() - Turn glock number into hash bucket number |
| 154 | * @lock: The glock number | 125 | * @lock: The glock number |
| 155 | * | 126 | * |
| @@ -182,7 +153,7 @@ static void glock_free(struct gfs2_glock *gl) | |||
| 182 | struct gfs2_sbd *sdp = gl->gl_sbd; | 153 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 183 | struct inode *aspace = gl->gl_aspace; | 154 | struct inode *aspace = gl->gl_aspace; |
| 184 | 155 | ||
| 185 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 156 | if (sdp->sd_lockstruct.ls_ops->lm_put_lock) |
| 186 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); | 157 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); |
| 187 | 158 | ||
| 188 | if (aspace) | 159 | if (aspace) |
| @@ -211,17 +182,14 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) | |||
| 211 | int gfs2_glock_put(struct gfs2_glock *gl) | 182 | int gfs2_glock_put(struct gfs2_glock *gl) |
| 212 | { | 183 | { |
| 213 | int rv = 0; | 184 | int rv = 0; |
| 214 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 215 | 185 | ||
| 216 | write_lock(gl_lock_addr(gl->gl_hash)); | 186 | write_lock(gl_lock_addr(gl->gl_hash)); |
| 217 | if (atomic_dec_and_test(&gl->gl_ref)) { | 187 | if (atomic_dec_and_test(&gl->gl_ref)) { |
| 218 | hlist_del(&gl->gl_list); | 188 | hlist_del(&gl->gl_list); |
| 219 | write_unlock(gl_lock_addr(gl->gl_hash)); | 189 | write_unlock(gl_lock_addr(gl->gl_hash)); |
| 220 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); | 190 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED); |
| 221 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | 191 | GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim)); |
| 222 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | 192 | GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); |
| 223 | gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); | ||
| 224 | gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); | ||
| 225 | glock_free(gl); | 193 | glock_free(gl); |
| 226 | rv = 1; | 194 | rv = 1; |
| 227 | goto out; | 195 | goto out; |
| @@ -281,22 +249,401 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, | |||
| 281 | return gl; | 249 | return gl; |
| 282 | } | 250 | } |
| 283 | 251 | ||
| 252 | /** | ||
| 253 | * may_grant - check if its ok to grant a new lock | ||
| 254 | * @gl: The glock | ||
| 255 | * @gh: The lock request which we wish to grant | ||
| 256 | * | ||
| 257 | * Returns: true if its ok to grant the lock | ||
| 258 | */ | ||
| 259 | |||
| 260 | static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) | ||
| 261 | { | ||
| 262 | const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); | ||
| 263 | if ((gh->gh_state == LM_ST_EXCLUSIVE || | ||
| 264 | gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) | ||
| 265 | return 0; | ||
| 266 | if (gl->gl_state == gh->gh_state) | ||
| 267 | return 1; | ||
| 268 | if (gh->gh_flags & GL_EXACT) | ||
| 269 | return 0; | ||
| 270 | if (gl->gl_state == LM_ST_EXCLUSIVE) { | ||
| 271 | if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) | ||
| 272 | return 1; | ||
| 273 | if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) | ||
| 274 | return 1; | ||
| 275 | } | ||
| 276 | if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) | ||
| 277 | return 1; | ||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | static void gfs2_holder_wake(struct gfs2_holder *gh) | ||
| 282 | { | ||
| 283 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
| 284 | smp_mb__after_clear_bit(); | ||
| 285 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
| 286 | } | ||
| 287 | |||
| 288 | /** | ||
| 289 | * do_promote - promote as many requests as possible on the current queue | ||
| 290 | * @gl: The glock | ||
| 291 | * | ||
| 292 | * Returns: true if there is a blocked holder at the head of the list | ||
| 293 | */ | ||
| 294 | |||
| 295 | static int do_promote(struct gfs2_glock *gl) | ||
| 296 | { | ||
| 297 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 298 | struct gfs2_holder *gh, *tmp; | ||
| 299 | int ret; | ||
| 300 | |||
| 301 | restart: | ||
| 302 | list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { | ||
| 303 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 304 | continue; | ||
| 305 | if (may_grant(gl, gh)) { | ||
| 306 | if (gh->gh_list.prev == &gl->gl_holders && | ||
| 307 | glops->go_lock) { | ||
| 308 | spin_unlock(&gl->gl_spin); | ||
| 309 | /* FIXME: eliminate this eventually */ | ||
| 310 | ret = glops->go_lock(gh); | ||
| 311 | spin_lock(&gl->gl_spin); | ||
| 312 | if (ret) { | ||
| 313 | gh->gh_error = ret; | ||
| 314 | list_del_init(&gh->gh_list); | ||
| 315 | gfs2_holder_wake(gh); | ||
| 316 | goto restart; | ||
| 317 | } | ||
| 318 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 319 | gfs2_holder_wake(gh); | ||
| 320 | goto restart; | ||
| 321 | } | ||
| 322 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 323 | gfs2_holder_wake(gh); | ||
| 324 | continue; | ||
| 325 | } | ||
| 326 | if (gh->gh_list.prev == &gl->gl_holders) | ||
| 327 | return 1; | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | /** | ||
| 334 | * do_error - Something unexpected has happened during a lock request | ||
| 335 | * | ||
| 336 | */ | ||
| 337 | |||
| 338 | static inline void do_error(struct gfs2_glock *gl, const int ret) | ||
| 339 | { | ||
| 340 | struct gfs2_holder *gh, *tmp; | ||
| 341 | |||
| 342 | list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { | ||
| 343 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 344 | continue; | ||
| 345 | if (ret & LM_OUT_ERROR) | ||
| 346 | gh->gh_error = -EIO; | ||
| 347 | else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) | ||
| 348 | gh->gh_error = GLR_TRYFAILED; | ||
| 349 | else | ||
| 350 | continue; | ||
| 351 | list_del_init(&gh->gh_list); | ||
| 352 | gfs2_holder_wake(gh); | ||
| 353 | } | ||
| 354 | } | ||
| 355 | |||
| 356 | /** | ||
| 357 | * find_first_waiter - find the first gh that's waiting for the glock | ||
| 358 | * @gl: the glock | ||
| 359 | */ | ||
| 360 | |||
| 361 | static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) | ||
| 362 | { | ||
| 363 | struct gfs2_holder *gh; | ||
| 364 | |||
| 365 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | ||
| 366 | if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 367 | return gh; | ||
| 368 | } | ||
| 369 | return NULL; | ||
| 370 | } | ||
| 371 | |||
| 372 | /** | ||
| 373 | * state_change - record that the glock is now in a different state | ||
| 374 | * @gl: the glock | ||
| 375 | * @new_state the new state | ||
| 376 | * | ||
| 377 | */ | ||
| 378 | |||
| 379 | static void state_change(struct gfs2_glock *gl, unsigned int new_state) | ||
| 380 | { | ||
| 381 | int held1, held2; | ||
| 382 | |||
| 383 | held1 = (gl->gl_state != LM_ST_UNLOCKED); | ||
| 384 | held2 = (new_state != LM_ST_UNLOCKED); | ||
| 385 | |||
| 386 | if (held1 != held2) { | ||
| 387 | if (held2) | ||
| 388 | gfs2_glock_hold(gl); | ||
| 389 | else | ||
| 390 | gfs2_glock_put(gl); | ||
| 391 | } | ||
| 392 | |||
| 393 | gl->gl_state = new_state; | ||
| 394 | gl->gl_tchange = jiffies; | ||
| 395 | } | ||
| 396 | |||
| 397 | static void gfs2_demote_wake(struct gfs2_glock *gl) | ||
| 398 | { | ||
| 399 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | ||
| 400 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 401 | smp_mb__after_clear_bit(); | ||
| 402 | wake_up_bit(&gl->gl_flags, GLF_DEMOTE); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * finish_xmote - The DLM has replied to one of our lock requests | ||
| 407 | * @gl: The glock | ||
| 408 | * @ret: The status from the DLM | ||
| 409 | * | ||
| 410 | */ | ||
| 411 | |||
| 412 | static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) | ||
| 413 | { | ||
| 414 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 415 | struct gfs2_holder *gh; | ||
| 416 | unsigned state = ret & LM_OUT_ST_MASK; | ||
| 417 | |||
| 418 | spin_lock(&gl->gl_spin); | ||
| 419 | state_change(gl, state); | ||
| 420 | gh = find_first_waiter(gl); | ||
| 421 | |||
| 422 | /* Demote to UN request arrived during demote to SH or DF */ | ||
| 423 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && | ||
| 424 | state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) | ||
| 425 | gl->gl_target = LM_ST_UNLOCKED; | ||
| 426 | |||
| 427 | /* Check for state != intended state */ | ||
| 428 | if (unlikely(state != gl->gl_target)) { | ||
| 429 | if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { | ||
| 430 | /* move to back of queue and try next entry */ | ||
| 431 | if (ret & LM_OUT_CANCELED) { | ||
| 432 | if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) | ||
| 433 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 434 | gh = find_first_waiter(gl); | ||
| 435 | gl->gl_target = gh->gh_state; | ||
| 436 | goto retry; | ||
| 437 | } | ||
| 438 | /* Some error or failed "try lock" - report it */ | ||
| 439 | if ((ret & LM_OUT_ERROR) || | ||
| 440 | (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { | ||
| 441 | gl->gl_target = gl->gl_state; | ||
| 442 | do_error(gl, ret); | ||
| 443 | goto out; | ||
| 444 | } | ||
| 445 | } | ||
| 446 | switch(state) { | ||
| 447 | /* Unlocked due to conversion deadlock, try again */ | ||
| 448 | case LM_ST_UNLOCKED: | ||
| 449 | retry: | ||
| 450 | do_xmote(gl, gh, gl->gl_target); | ||
| 451 | break; | ||
| 452 | /* Conversion fails, unlock and try again */ | ||
| 453 | case LM_ST_SHARED: | ||
| 454 | case LM_ST_DEFERRED: | ||
| 455 | do_xmote(gl, gh, LM_ST_UNLOCKED); | ||
| 456 | break; | ||
| 457 | default: /* Everything else */ | ||
| 458 | printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); | ||
| 459 | GLOCK_BUG_ON(gl, 1); | ||
| 460 | } | ||
| 461 | spin_unlock(&gl->gl_spin); | ||
| 462 | gfs2_glock_put(gl); | ||
| 463 | return; | ||
| 464 | } | ||
| 465 | |||
| 466 | /* Fast path - we got what we asked for */ | ||
| 467 | if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) | ||
| 468 | gfs2_demote_wake(gl); | ||
| 469 | if (state != LM_ST_UNLOCKED) { | ||
| 470 | if (glops->go_xmote_bh) { | ||
| 471 | int rv; | ||
| 472 | spin_unlock(&gl->gl_spin); | ||
| 473 | rv = glops->go_xmote_bh(gl, gh); | ||
| 474 | if (rv == -EAGAIN) | ||
| 475 | return; | ||
| 476 | spin_lock(&gl->gl_spin); | ||
| 477 | if (rv) { | ||
| 478 | do_error(gl, rv); | ||
| 479 | goto out; | ||
| 480 | } | ||
| 481 | } | ||
| 482 | do_promote(gl); | ||
| 483 | } | ||
| 484 | out: | ||
| 485 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 486 | spin_unlock(&gl->gl_spin); | ||
| 487 | gfs2_glock_put(gl); | ||
| 488 | } | ||
| 489 | |||
| 490 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 491 | unsigned int cur_state, unsigned int req_state, | ||
| 492 | unsigned int flags) | ||
| 493 | { | ||
| 494 | int ret = LM_OUT_ERROR; | ||
| 495 | |||
| 496 | if (!sdp->sd_lockstruct.ls_ops->lm_lock) | ||
| 497 | return req_state == LM_ST_UNLOCKED ? 0 : req_state; | ||
| 498 | |||
| 499 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 500 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 501 | req_state, flags); | ||
| 502 | return ret; | ||
| 503 | } | ||
| 504 | |||
| 505 | /** | ||
| 506 | * do_xmote - Calls the DLM to change the state of a lock | ||
| 507 | * @gl: The lock state | ||
| 508 | * @gh: The holder (only for promotes) | ||
| 509 | * @target: The target lock state | ||
| 510 | * | ||
| 511 | */ | ||
| 512 | |||
| 513 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) | ||
| 514 | { | ||
| 515 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 516 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 517 | unsigned int lck_flags = gh ? gh->gh_flags : 0; | ||
| 518 | int ret; | ||
| 519 | |||
| 520 | lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | | ||
| 521 | LM_FLAG_PRIORITY); | ||
| 522 | BUG_ON(gl->gl_state == target); | ||
| 523 | BUG_ON(gl->gl_state == gl->gl_target); | ||
| 524 | if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && | ||
| 525 | glops->go_inval) { | ||
| 526 | set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); | ||
| 527 | do_error(gl, 0); /* Fail queued try locks */ | ||
| 528 | } | ||
| 529 | spin_unlock(&gl->gl_spin); | ||
| 530 | if (glops->go_xmote_th) | ||
| 531 | glops->go_xmote_th(gl); | ||
| 532 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) | ||
| 533 | glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); | ||
| 534 | clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); | ||
| 535 | |||
| 536 | gfs2_glock_hold(gl); | ||
| 537 | if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED || | ||
| 538 | gl->gl_state == LM_ST_DEFERRED) && | ||
| 539 | !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) | ||
| 540 | lck_flags |= LM_FLAG_TRY_1CB; | ||
| 541 | ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags); | ||
| 542 | |||
| 543 | if (!(ret & LM_OUT_ASYNC)) { | ||
| 544 | finish_xmote(gl, ret); | ||
| 545 | gfs2_glock_hold(gl); | ||
| 546 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 547 | gfs2_glock_put(gl); | ||
| 548 | } else { | ||
| 549 | GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); | ||
| 550 | } | ||
| 551 | spin_lock(&gl->gl_spin); | ||
| 552 | } | ||
| 553 | |||
| 554 | /** | ||
| 555 | * find_first_holder - find the first "holder" gh | ||
| 556 | * @gl: the glock | ||
| 557 | */ | ||
| 558 | |||
| 559 | static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) | ||
| 560 | { | ||
| 561 | struct gfs2_holder *gh; | ||
| 562 | |||
| 563 | if (!list_empty(&gl->gl_holders)) { | ||
| 564 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | ||
| 565 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 566 | return gh; | ||
| 567 | } | ||
| 568 | return NULL; | ||
| 569 | } | ||
| 570 | |||
| 571 | /** | ||
| 572 | * run_queue - do all outstanding tasks related to a glock | ||
| 573 | * @gl: The glock in question | ||
| 574 | * @nonblock: True if we must not block in run_queue | ||
| 575 | * | ||
| 576 | */ | ||
| 577 | |||
| 578 | static void run_queue(struct gfs2_glock *gl, const int nonblock) | ||
| 579 | { | ||
| 580 | struct gfs2_holder *gh = NULL; | ||
| 581 | |||
| 582 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 583 | return; | ||
| 584 | |||
| 585 | GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); | ||
| 586 | |||
| 587 | if (test_bit(GLF_DEMOTE, &gl->gl_flags) && | ||
| 588 | gl->gl_demote_state != gl->gl_state) { | ||
| 589 | if (find_first_holder(gl)) | ||
| 590 | goto out; | ||
| 591 | if (nonblock) | ||
| 592 | goto out_sched; | ||
| 593 | set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
| 594 | GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); | ||
| 595 | gl->gl_target = gl->gl_demote_state; | ||
| 596 | } else { | ||
| 597 | if (test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 598 | gfs2_demote_wake(gl); | ||
| 599 | if (do_promote(gl) == 0) | ||
| 600 | goto out; | ||
| 601 | gh = find_first_waiter(gl); | ||
| 602 | gl->gl_target = gh->gh_state; | ||
| 603 | if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) | ||
| 604 | do_error(gl, 0); /* Fail queued try locks */ | ||
| 605 | } | ||
| 606 | do_xmote(gl, gh, gl->gl_target); | ||
| 607 | return; | ||
| 608 | |||
| 609 | out_sched: | ||
| 610 | gfs2_glock_hold(gl); | ||
| 611 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 612 | gfs2_glock_put(gl); | ||
| 613 | out: | ||
| 614 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 615 | } | ||
| 616 | |||
| 284 | static void glock_work_func(struct work_struct *work) | 617 | static void glock_work_func(struct work_struct *work) |
| 285 | { | 618 | { |
| 619 | unsigned long delay = 0; | ||
| 286 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); | 620 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); |
| 287 | 621 | ||
| 622 | if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
| 623 | finish_xmote(gl, gl->gl_reply); | ||
| 288 | spin_lock(&gl->gl_spin); | 624 | spin_lock(&gl->gl_spin); |
| 289 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) | 625 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && |
| 290 | set_bit(GLF_DEMOTE, &gl->gl_flags); | 626 | gl->gl_state != LM_ST_UNLOCKED && |
| 291 | run_queue(gl); | 627 | gl->gl_demote_state != LM_ST_EXCLUSIVE) { |
| 628 | unsigned long holdtime, now = jiffies; | ||
| 629 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | ||
| 630 | if (time_before(now, holdtime)) | ||
| 631 | delay = holdtime - now; | ||
| 632 | set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); | ||
| 633 | } | ||
| 634 | run_queue(gl, 0); | ||
| 292 | spin_unlock(&gl->gl_spin); | 635 | spin_unlock(&gl->gl_spin); |
| 293 | gfs2_glock_put(gl); | 636 | if (!delay || |
| 637 | queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | ||
| 638 | gfs2_glock_put(gl); | ||
| 294 | } | 639 | } |
| 295 | 640 | ||
| 296 | static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | 641 | static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, |
| 297 | void **lockp) | 642 | void **lockp) |
| 298 | { | 643 | { |
| 299 | int error = -EIO; | 644 | int error = -EIO; |
| 645 | if (!sdp->sd_lockstruct.ls_ops->lm_get_lock) | ||
| 646 | return 0; | ||
| 300 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 647 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 301 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | 648 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( |
| 302 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | 649 | sdp->sd_lockstruct.ls_lockspace, name, lockp); |
| @@ -342,12 +689,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
| 342 | gl->gl_name = name; | 689 | gl->gl_name = name; |
| 343 | atomic_set(&gl->gl_ref, 1); | 690 | atomic_set(&gl->gl_ref, 1); |
| 344 | gl->gl_state = LM_ST_UNLOCKED; | 691 | gl->gl_state = LM_ST_UNLOCKED; |
| 692 | gl->gl_target = LM_ST_UNLOCKED; | ||
| 345 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | 693 | gl->gl_demote_state = LM_ST_EXCLUSIVE; |
| 346 | gl->gl_hash = hash; | 694 | gl->gl_hash = hash; |
| 347 | gl->gl_owner_pid = NULL; | ||
| 348 | gl->gl_ip = 0; | ||
| 349 | gl->gl_ops = glops; | 695 | gl->gl_ops = glops; |
| 350 | gl->gl_req_gh = NULL; | ||
| 351 | gl->gl_stamp = jiffies; | 696 | gl->gl_stamp = jiffies; |
| 352 | gl->gl_tchange = jiffies; | 697 | gl->gl_tchange = jiffies; |
| 353 | gl->gl_object = NULL; | 698 | gl->gl_object = NULL; |
| @@ -447,13 +792,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) | |||
| 447 | gh->gh_ip = 0; | 792 | gh->gh_ip = 0; |
| 448 | } | 793 | } |
| 449 | 794 | ||
| 450 | static void gfs2_holder_wake(struct gfs2_holder *gh) | ||
| 451 | { | ||
| 452 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
| 453 | smp_mb__after_clear_bit(); | ||
| 454 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
| 455 | } | ||
| 456 | |||
| 457 | static int just_schedule(void *word) | 795 | static int just_schedule(void *word) |
| 458 | { | 796 | { |
| 459 | schedule(); | 797 | schedule(); |
| @@ -466,14 +804,6 @@ static void wait_on_holder(struct gfs2_holder *gh) | |||
| 466 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); | 804 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); |
| 467 | } | 805 | } |
| 468 | 806 | ||
| 469 | static void gfs2_demote_wake(struct gfs2_glock *gl) | ||
| 470 | { | ||
| 471 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | ||
| 472 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 473 | smp_mb__after_clear_bit(); | ||
| 474 | wake_up_bit(&gl->gl_flags, GLF_DEMOTE); | ||
| 475 | } | ||
| 476 | |||
| 477 | static void wait_on_demote(struct gfs2_glock *gl) | 807 | static void wait_on_demote(struct gfs2_glock *gl) |
| 478 | { | 808 | { |
| 479 | might_sleep(); | 809 | might_sleep(); |
| @@ -481,217 +811,6 @@ static void wait_on_demote(struct gfs2_glock *gl) | |||
| 481 | } | 811 | } |
| 482 | 812 | ||
| 483 | /** | 813 | /** |
| 484 | * rq_mutex - process a mutex request in the queue | ||
| 485 | * @gh: the glock holder | ||
| 486 | * | ||
| 487 | * Returns: 1 if the queue is blocked | ||
| 488 | */ | ||
| 489 | |||
| 490 | static int rq_mutex(struct gfs2_holder *gh) | ||
| 491 | { | ||
| 492 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 493 | |||
| 494 | list_del_init(&gh->gh_list); | ||
| 495 | /* gh->gh_error never examined. */ | ||
| 496 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 497 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
| 498 | smp_mb(); | ||
| 499 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
| 500 | |||
| 501 | return 1; | ||
| 502 | } | ||
| 503 | |||
| 504 | /** | ||
| 505 | * rq_promote - process a promote request in the queue | ||
| 506 | * @gh: the glock holder | ||
| 507 | * | ||
| 508 | * Acquire a new inter-node lock, or change a lock state to more restrictive. | ||
| 509 | * | ||
| 510 | * Returns: 1 if the queue is blocked | ||
| 511 | */ | ||
| 512 | |||
| 513 | static int rq_promote(struct gfs2_holder *gh) | ||
| 514 | { | ||
| 515 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 516 | |||
| 517 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 518 | if (list_empty(&gl->gl_holders)) { | ||
| 519 | gl->gl_req_gh = gh; | ||
| 520 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 521 | spin_unlock(&gl->gl_spin); | ||
| 522 | gfs2_glock_xmote_th(gh->gh_gl, gh); | ||
| 523 | spin_lock(&gl->gl_spin); | ||
| 524 | } | ||
| 525 | return 1; | ||
| 526 | } | ||
| 527 | |||
| 528 | if (list_empty(&gl->gl_holders)) { | ||
| 529 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 530 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 531 | } else { | ||
| 532 | struct gfs2_holder *next_gh; | ||
| 533 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
| 534 | return 1; | ||
| 535 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, | ||
| 536 | gh_list); | ||
| 537 | if (next_gh->gh_state == LM_ST_EXCLUSIVE) | ||
| 538 | return 1; | ||
| 539 | } | ||
| 540 | |||
| 541 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 542 | gh->gh_error = 0; | ||
| 543 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 544 | |||
| 545 | gfs2_holder_wake(gh); | ||
| 546 | |||
| 547 | return 0; | ||
| 548 | } | ||
| 549 | |||
| 550 | /** | ||
| 551 | * rq_demote - process a demote request in the queue | ||
| 552 | * @gh: the glock holder | ||
| 553 | * | ||
| 554 | * Returns: 1 if the queue is blocked | ||
| 555 | */ | ||
| 556 | |||
| 557 | static int rq_demote(struct gfs2_glock *gl) | ||
| 558 | { | ||
| 559 | if (!list_empty(&gl->gl_holders)) | ||
| 560 | return 1; | ||
| 561 | |||
| 562 | if (gl->gl_state == gl->gl_demote_state || | ||
| 563 | gl->gl_state == LM_ST_UNLOCKED) { | ||
| 564 | gfs2_demote_wake(gl); | ||
| 565 | return 0; | ||
| 566 | } | ||
| 567 | |||
| 568 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 569 | set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
| 570 | |||
| 571 | if (gl->gl_demote_state == LM_ST_UNLOCKED || | ||
| 572 | gl->gl_state != LM_ST_EXCLUSIVE) { | ||
| 573 | spin_unlock(&gl->gl_spin); | ||
| 574 | gfs2_glock_drop_th(gl); | ||
| 575 | } else { | ||
| 576 | spin_unlock(&gl->gl_spin); | ||
| 577 | gfs2_glock_xmote_th(gl, NULL); | ||
| 578 | } | ||
| 579 | |||
| 580 | spin_lock(&gl->gl_spin); | ||
| 581 | clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
| 582 | |||
| 583 | return 0; | ||
| 584 | } | ||
| 585 | |||
| 586 | /** | ||
| 587 | * run_queue - process holder structures on a glock | ||
| 588 | * @gl: the glock | ||
| 589 | * | ||
| 590 | */ | ||
| 591 | static void run_queue(struct gfs2_glock *gl) | ||
| 592 | { | ||
| 593 | struct gfs2_holder *gh; | ||
| 594 | int blocked = 1; | ||
| 595 | |||
| 596 | for (;;) { | ||
| 597 | if (test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 598 | break; | ||
| 599 | |||
| 600 | if (!list_empty(&gl->gl_waiters1)) { | ||
| 601 | gh = list_entry(gl->gl_waiters1.next, | ||
| 602 | struct gfs2_holder, gh_list); | ||
| 603 | blocked = rq_mutex(gh); | ||
| 604 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | ||
| 605 | blocked = rq_demote(gl); | ||
| 606 | if (test_bit(GLF_WAITERS2, &gl->gl_flags) && | ||
| 607 | !blocked) { | ||
| 608 | set_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 609 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
| 610 | } | ||
| 611 | clear_bit(GLF_WAITERS2, &gl->gl_flags); | ||
| 612 | } else if (!list_empty(&gl->gl_waiters3)) { | ||
| 613 | gh = list_entry(gl->gl_waiters3.next, | ||
| 614 | struct gfs2_holder, gh_list); | ||
| 615 | blocked = rq_promote(gh); | ||
| 616 | } else | ||
| 617 | break; | ||
| 618 | |||
| 619 | if (blocked) | ||
| 620 | break; | ||
| 621 | } | ||
| 622 | } | ||
| 623 | |||
| 624 | /** | ||
| 625 | * gfs2_glmutex_lock - acquire a local lock on a glock | ||
| 626 | * @gl: the glock | ||
| 627 | * | ||
| 628 | * Gives caller exclusive access to manipulate a glock structure. | ||
| 629 | */ | ||
| 630 | |||
| 631 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) | ||
| 632 | { | ||
| 633 | spin_lock(&gl->gl_spin); | ||
| 634 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | ||
| 635 | struct gfs2_holder gh; | ||
| 636 | |||
| 637 | gfs2_holder_init(gl, 0, 0, &gh); | ||
| 638 | set_bit(HIF_WAIT, &gh.gh_iflags); | ||
| 639 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | ||
| 640 | spin_unlock(&gl->gl_spin); | ||
| 641 | wait_on_holder(&gh); | ||
| 642 | gfs2_holder_uninit(&gh); | ||
| 643 | } else { | ||
| 644 | gl->gl_owner_pid = get_pid(task_pid(current)); | ||
| 645 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | ||
| 646 | spin_unlock(&gl->gl_spin); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 650 | /** | ||
| 651 | * gfs2_glmutex_trylock - try to acquire a local lock on a glock | ||
| 652 | * @gl: the glock | ||
| 653 | * | ||
| 654 | * Returns: 1 if the glock is acquired | ||
| 655 | */ | ||
| 656 | |||
| 657 | static int gfs2_glmutex_trylock(struct gfs2_glock *gl) | ||
| 658 | { | ||
| 659 | int acquired = 1; | ||
| 660 | |||
| 661 | spin_lock(&gl->gl_spin); | ||
| 662 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | ||
| 663 | acquired = 0; | ||
| 664 | } else { | ||
| 665 | gl->gl_owner_pid = get_pid(task_pid(current)); | ||
| 666 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | ||
| 667 | } | ||
| 668 | spin_unlock(&gl->gl_spin); | ||
| 669 | |||
| 670 | return acquired; | ||
| 671 | } | ||
| 672 | |||
| 673 | /** | ||
| 674 | * gfs2_glmutex_unlock - release a local lock on a glock | ||
| 675 | * @gl: the glock | ||
| 676 | * | ||
| 677 | */ | ||
| 678 | |||
| 679 | static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | ||
| 680 | { | ||
| 681 | struct pid *pid; | ||
| 682 | |||
| 683 | spin_lock(&gl->gl_spin); | ||
| 684 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 685 | pid = gl->gl_owner_pid; | ||
| 686 | gl->gl_owner_pid = NULL; | ||
| 687 | gl->gl_ip = 0; | ||
| 688 | run_queue(gl); | ||
| 689 | spin_unlock(&gl->gl_spin); | ||
| 690 | |||
| 691 | put_pid(pid); | ||
| 692 | } | ||
| 693 | |||
| 694 | /** | ||
| 695 | * handle_callback - process a demote request | 814 | * handle_callback - process a demote request |
| 696 | * @gl: the glock | 815 | * @gl: the glock |
| 697 | * @state: the state the caller wants us to change to | 816 | * @state: the state the caller wants us to change to |
| @@ -705,398 +824,45 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
| 705 | { | 824 | { |
| 706 | int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; | 825 | int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; |
| 707 | 826 | ||
| 708 | spin_lock(&gl->gl_spin); | ||
| 709 | set_bit(bit, &gl->gl_flags); | 827 | set_bit(bit, &gl->gl_flags); |
| 710 | if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { | 828 | if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { |
| 711 | gl->gl_demote_state = state; | 829 | gl->gl_demote_state = state; |
| 712 | gl->gl_demote_time = jiffies; | 830 | gl->gl_demote_time = jiffies; |
| 713 | if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && | 831 | if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && |
| 714 | gl->gl_object) { | 832 | gl->gl_object) |
| 715 | gfs2_glock_schedule_for_reclaim(gl); | 833 | gfs2_glock_schedule_for_reclaim(gl); |
| 716 | spin_unlock(&gl->gl_spin); | ||
| 717 | return; | ||
| 718 | } | ||
| 719 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && | 834 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && |
| 720 | gl->gl_demote_state != state) { | 835 | gl->gl_demote_state != state) { |
| 721 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) | 836 | gl->gl_demote_state = LM_ST_UNLOCKED; |
| 722 | set_bit(GLF_WAITERS2, &gl->gl_flags); | ||
| 723 | else | ||
| 724 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
| 725 | } | ||
| 726 | spin_unlock(&gl->gl_spin); | ||
| 727 | } | ||
| 728 | |||
| 729 | /** | ||
| 730 | * state_change - record that the glock is now in a different state | ||
| 731 | * @gl: the glock | ||
| 732 | * @new_state the new state | ||
| 733 | * | ||
| 734 | */ | ||
| 735 | |||
| 736 | static void state_change(struct gfs2_glock *gl, unsigned int new_state) | ||
| 737 | { | ||
| 738 | int held1, held2; | ||
| 739 | |||
| 740 | held1 = (gl->gl_state != LM_ST_UNLOCKED); | ||
| 741 | held2 = (new_state != LM_ST_UNLOCKED); | ||
| 742 | |||
| 743 | if (held1 != held2) { | ||
| 744 | if (held2) | ||
| 745 | gfs2_glock_hold(gl); | ||
| 746 | else | ||
| 747 | gfs2_glock_put(gl); | ||
| 748 | } | 837 | } |
| 749 | |||
| 750 | gl->gl_state = new_state; | ||
| 751 | gl->gl_tchange = jiffies; | ||
| 752 | } | 838 | } |
| 753 | 839 | ||
| 754 | /** | 840 | /** |
| 755 | * drop_bh - Called after a lock module unlock completes | 841 | * gfs2_glock_wait - wait on a glock acquisition |
| 756 | * @gl: the glock | ||
| 757 | * @ret: the return status | ||
| 758 | * | ||
| 759 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
| 760 | * Doesn't drop the reference on the glock the top half took out | ||
| 761 | * | ||
| 762 | */ | ||
| 763 | |||
| 764 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 765 | { | ||
| 766 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 767 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 768 | |||
| 769 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 770 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 771 | gfs2_assert_warn(sdp, !ret); | ||
| 772 | |||
| 773 | state_change(gl, LM_ST_UNLOCKED); | ||
| 774 | |||
| 775 | if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) { | ||
| 776 | spin_lock(&gl->gl_spin); | ||
| 777 | gh->gh_error = 0; | ||
| 778 | spin_unlock(&gl->gl_spin); | ||
| 779 | gfs2_glock_xmote_th(gl, gl->gl_req_gh); | ||
| 780 | gfs2_glock_put(gl); | ||
| 781 | return; | ||
| 782 | } | ||
| 783 | |||
| 784 | spin_lock(&gl->gl_spin); | ||
| 785 | gfs2_demote_wake(gl); | ||
| 786 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 787 | spin_unlock(&gl->gl_spin); | ||
| 788 | gfs2_glock_put(gl); | ||
| 789 | } | ||
| 790 | |||
| 791 | /** | ||
| 792 | * xmote_bh - Called after the lock module is done acquiring a lock | ||
| 793 | * @gl: The glock in question | ||
| 794 | * @ret: the int returned from the lock module | ||
| 795 | * | ||
| 796 | */ | ||
| 797 | |||
| 798 | static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 799 | { | ||
| 800 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 801 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 802 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 803 | int op_done = 1; | ||
| 804 | |||
| 805 | if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { | ||
| 806 | drop_bh(gl, ret); | ||
| 807 | return; | ||
| 808 | } | ||
| 809 | |||
| 810 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 811 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 812 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | ||
| 813 | |||
| 814 | state_change(gl, ret & LM_OUT_ST_MASK); | ||
| 815 | |||
| 816 | /* Deal with each possible exit condition */ | ||
| 817 | |||
| 818 | if (!gh) { | ||
| 819 | gl->gl_stamp = jiffies; | ||
| 820 | if (ret & LM_OUT_CANCELED) { | ||
| 821 | op_done = 0; | ||
| 822 | } else { | ||
| 823 | spin_lock(&gl->gl_spin); | ||
| 824 | if (gl->gl_state != gl->gl_demote_state) { | ||
| 825 | spin_unlock(&gl->gl_spin); | ||
| 826 | gfs2_glock_drop_th(gl); | ||
| 827 | gfs2_glock_put(gl); | ||
| 828 | return; | ||
| 829 | } | ||
| 830 | gfs2_demote_wake(gl); | ||
| 831 | spin_unlock(&gl->gl_spin); | ||
| 832 | } | ||
| 833 | } else { | ||
| 834 | spin_lock(&gl->gl_spin); | ||
| 835 | if (ret & LM_OUT_CONV_DEADLK) { | ||
| 836 | gh->gh_error = 0; | ||
| 837 | set_bit(GLF_CONV_DEADLK, &gl->gl_flags); | ||
| 838 | spin_unlock(&gl->gl_spin); | ||
| 839 | gfs2_glock_drop_th(gl); | ||
| 840 | gfs2_glock_put(gl); | ||
| 841 | return; | ||
| 842 | } | ||
| 843 | list_del_init(&gh->gh_list); | ||
| 844 | gh->gh_error = -EIO; | ||
| 845 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 846 | goto out; | ||
| 847 | gh->gh_error = GLR_CANCELED; | ||
| 848 | if (ret & LM_OUT_CANCELED) | ||
| 849 | goto out; | ||
| 850 | if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 851 | list_add_tail(&gh->gh_list, &gl->gl_holders); | ||
| 852 | gh->gh_error = 0; | ||
| 853 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 854 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 855 | op_done = 0; | ||
| 856 | goto out; | ||
| 857 | } | ||
| 858 | gh->gh_error = GLR_TRYFAILED; | ||
| 859 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) | ||
| 860 | goto out; | ||
| 861 | gh->gh_error = -EINVAL; | ||
| 862 | if (gfs2_assert_withdraw(sdp, 0) == -1) | ||
| 863 | fs_err(sdp, "ret = 0x%.8X\n", ret); | ||
| 864 | out: | ||
| 865 | spin_unlock(&gl->gl_spin); | ||
| 866 | } | ||
| 867 | |||
| 868 | if (glops->go_xmote_bh) | ||
| 869 | glops->go_xmote_bh(gl); | ||
| 870 | |||
| 871 | if (op_done) { | ||
| 872 | spin_lock(&gl->gl_spin); | ||
| 873 | gl->gl_req_gh = NULL; | ||
| 874 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 875 | spin_unlock(&gl->gl_spin); | ||
| 876 | } | ||
| 877 | |||
| 878 | gfs2_glock_put(gl); | ||
| 879 | |||
| 880 | if (gh) | ||
| 881 | gfs2_holder_wake(gh); | ||
| 882 | } | ||
| 883 | |||
| 884 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 885 | unsigned int cur_state, unsigned int req_state, | ||
| 886 | unsigned int flags) | ||
| 887 | { | ||
| 888 | int ret = 0; | ||
| 889 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 890 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 891 | req_state, flags); | ||
| 892 | return ret; | ||
| 893 | } | ||
| 894 | |||
| 895 | /** | ||
| 896 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock | ||
| 897 | * @gl: The glock in question | ||
| 898 | * @state: the requested state | ||
| 899 | * @flags: modifier flags to the lock call | ||
| 900 | * | ||
| 901 | */ | ||
| 902 | |||
| 903 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | ||
| 904 | { | ||
| 905 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 906 | int flags = gh ? gh->gh_flags : 0; | ||
| 907 | unsigned state = gh ? gh->gh_state : gl->gl_demote_state; | ||
| 908 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 909 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | ||
| 910 | LM_FLAG_NOEXP | LM_FLAG_ANY | | ||
| 911 | LM_FLAG_PRIORITY); | ||
| 912 | unsigned int lck_ret; | ||
| 913 | |||
| 914 | if (glops->go_xmote_th) | ||
| 915 | glops->go_xmote_th(gl); | ||
| 916 | if (state == LM_ST_DEFERRED && glops->go_inval) | ||
| 917 | glops->go_inval(gl, DIO_METADATA); | ||
| 918 | |||
| 919 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 920 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 921 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); | ||
| 922 | gfs2_assert_warn(sdp, state != gl->gl_state); | ||
| 923 | |||
| 924 | gfs2_glock_hold(gl); | ||
| 925 | |||
| 926 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); | ||
| 927 | |||
| 928 | if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR))) | ||
| 929 | return; | ||
| 930 | |||
| 931 | if (lck_ret & LM_OUT_ASYNC) | ||
| 932 | gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC); | ||
| 933 | else | ||
| 934 | xmote_bh(gl, lck_ret); | ||
| 935 | } | ||
| 936 | |||
| 937 | static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
| 938 | unsigned int cur_state) | ||
| 939 | { | ||
| 940 | int ret = 0; | ||
| 941 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 942 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); | ||
| 943 | return ret; | ||
| 944 | } | ||
| 945 | |||
| 946 | /** | ||
| 947 | * gfs2_glock_drop_th - call into the lock module to unlock a lock | ||
| 948 | * @gl: the glock | ||
| 949 | * | ||
| 950 | */ | ||
| 951 | |||
| 952 | static void gfs2_glock_drop_th(struct gfs2_glock *gl) | ||
| 953 | { | ||
| 954 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 955 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 956 | unsigned int ret; | ||
| 957 | |||
| 958 | if (glops->go_xmote_th) | ||
| 959 | glops->go_xmote_th(gl); | ||
| 960 | if (glops->go_inval) | ||
| 961 | glops->go_inval(gl, DIO_METADATA); | ||
| 962 | |||
| 963 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 964 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 965 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | ||
| 966 | |||
| 967 | gfs2_glock_hold(gl); | ||
| 968 | |||
| 969 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); | ||
| 970 | |||
| 971 | if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR))) | ||
| 972 | return; | ||
| 973 | |||
| 974 | if (!ret) | ||
| 975 | drop_bh(gl, ret); | ||
| 976 | else | ||
| 977 | gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC); | ||
| 978 | } | ||
| 979 | |||
| 980 | /** | ||
| 981 | * do_cancels - cancel requests for locks stuck waiting on an expire flag | ||
| 982 | * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock | ||
| 983 | * | ||
| 984 | * Don't cancel GL_NOCANCEL requests. | ||
| 985 | */ | ||
| 986 | |||
| 987 | static void do_cancels(struct gfs2_holder *gh) | ||
| 988 | { | ||
| 989 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 990 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 991 | |||
| 992 | spin_lock(&gl->gl_spin); | ||
| 993 | |||
| 994 | while (gl->gl_req_gh != gh && | ||
| 995 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | ||
| 996 | !list_empty(&gh->gh_list)) { | ||
| 997 | if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { | ||
| 998 | spin_unlock(&gl->gl_spin); | ||
| 999 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 1000 | sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); | ||
| 1001 | msleep(100); | ||
| 1002 | spin_lock(&gl->gl_spin); | ||
| 1003 | } else { | ||
| 1004 | spin_unlock(&gl->gl_spin); | ||
| 1005 | msleep(100); | ||
| 1006 | spin_lock(&gl->gl_spin); | ||
| 1007 | } | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | spin_unlock(&gl->gl_spin); | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | /** | ||
| 1014 | * glock_wait_internal - wait on a glock acquisition | ||
| 1015 | * @gh: the glock holder | 842 | * @gh: the glock holder |
| 1016 | * | 843 | * |
| 1017 | * Returns: 0 on success | 844 | * Returns: 0 on success |
| 1018 | */ | 845 | */ |
| 1019 | 846 | ||
| 1020 | static int glock_wait_internal(struct gfs2_holder *gh) | 847 | int gfs2_glock_wait(struct gfs2_holder *gh) |
| 1021 | { | 848 | { |
| 1022 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1023 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1024 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1025 | |||
| 1026 | if (test_bit(HIF_ABORTED, &gh->gh_iflags)) | ||
| 1027 | return -EIO; | ||
| 1028 | |||
| 1029 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { | ||
| 1030 | spin_lock(&gl->gl_spin); | ||
| 1031 | if (gl->gl_req_gh != gh && | ||
| 1032 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | ||
| 1033 | !list_empty(&gh->gh_list)) { | ||
| 1034 | list_del_init(&gh->gh_list); | ||
| 1035 | gh->gh_error = GLR_TRYFAILED; | ||
| 1036 | run_queue(gl); | ||
| 1037 | spin_unlock(&gl->gl_spin); | ||
| 1038 | return gh->gh_error; | ||
| 1039 | } | ||
| 1040 | spin_unlock(&gl->gl_spin); | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | if (gh->gh_flags & LM_FLAG_PRIORITY) | ||
| 1044 | do_cancels(gh); | ||
| 1045 | |||
| 1046 | wait_on_holder(gh); | 849 | wait_on_holder(gh); |
| 1047 | if (gh->gh_error) | ||
| 1048 | return gh->gh_error; | ||
| 1049 | |||
| 1050 | gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags)); | ||
| 1051 | gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state, | ||
| 1052 | gh->gh_flags)); | ||
| 1053 | |||
| 1054 | if (test_bit(HIF_FIRST, &gh->gh_iflags)) { | ||
| 1055 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 1056 | |||
| 1057 | if (glops->go_lock) { | ||
| 1058 | gh->gh_error = glops->go_lock(gh); | ||
| 1059 | if (gh->gh_error) { | ||
| 1060 | spin_lock(&gl->gl_spin); | ||
| 1061 | list_del_init(&gh->gh_list); | ||
| 1062 | spin_unlock(&gl->gl_spin); | ||
| 1063 | } | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | spin_lock(&gl->gl_spin); | ||
| 1067 | gl->gl_req_gh = NULL; | ||
| 1068 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1069 | run_queue(gl); | ||
| 1070 | spin_unlock(&gl->gl_spin); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | return gh->gh_error; | 850 | return gh->gh_error; |
| 1074 | } | 851 | } |
| 1075 | 852 | ||
| 1076 | static inline struct gfs2_holder * | 853 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) |
| 1077 | find_holder_by_owner(struct list_head *head, struct pid *pid) | ||
| 1078 | { | ||
| 1079 | struct gfs2_holder *gh; | ||
| 1080 | |||
| 1081 | list_for_each_entry(gh, head, gh_list) { | ||
| 1082 | if (gh->gh_owner_pid == pid) | ||
| 1083 | return gh; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | return NULL; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | static void print_dbg(struct glock_iter *gi, const char *fmt, ...) | ||
| 1090 | { | 854 | { |
| 1091 | va_list args; | 855 | va_list args; |
| 1092 | 856 | ||
| 1093 | va_start(args, fmt); | 857 | va_start(args, fmt); |
| 1094 | if (gi) { | 858 | if (seq) { |
| 859 | struct gfs2_glock_iter *gi = seq->private; | ||
| 1095 | vsprintf(gi->string, fmt, args); | 860 | vsprintf(gi->string, fmt, args); |
| 1096 | seq_printf(gi->seq, gi->string); | 861 | seq_printf(seq, gi->string); |
| 1097 | } | 862 | } else { |
| 1098 | else | 863 | printk(KERN_ERR " "); |
| 1099 | vprintk(fmt, args); | 864 | vprintk(fmt, args); |
| 865 | } | ||
| 1100 | va_end(args); | 866 | va_end(args); |
| 1101 | } | 867 | } |
| 1102 | 868 | ||
| @@ -1104,50 +870,76 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...) | |||
| 1104 | * add_to_queue - Add a holder to the wait queue (but look for recursion) | 870 | * add_to_queue - Add a holder to the wait queue (but look for recursion) |
| 1105 | * @gh: the holder structure to add | 871 | * @gh: the holder structure to add |
| 1106 | * | 872 | * |
| 873 | * Eventually we should move the recursive locking trap to a | ||
| 874 | * debugging option or something like that. This is the fast | ||
| 875 | * path and needs to have the minimum number of distractions. | ||
| 876 | * | ||
| 1107 | */ | 877 | */ |
| 1108 | 878 | ||
| 1109 | static void add_to_queue(struct gfs2_holder *gh) | 879 | static inline void add_to_queue(struct gfs2_holder *gh) |
| 1110 | { | 880 | { |
| 1111 | struct gfs2_glock *gl = gh->gh_gl; | 881 | struct gfs2_glock *gl = gh->gh_gl; |
| 1112 | struct gfs2_holder *existing; | 882 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 883 | struct list_head *insert_pt = NULL; | ||
| 884 | struct gfs2_holder *gh2; | ||
| 885 | int try_lock = 0; | ||
| 1113 | 886 | ||
| 1114 | BUG_ON(gh->gh_owner_pid == NULL); | 887 | BUG_ON(gh->gh_owner_pid == NULL); |
| 1115 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | 888 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) |
| 1116 | BUG(); | 889 | BUG(); |
| 1117 | 890 | ||
| 1118 | if (!(gh->gh_flags & GL_FLOCK)) { | 891 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { |
| 1119 | existing = find_holder_by_owner(&gl->gl_holders, | 892 | if (test_bit(GLF_LOCK, &gl->gl_flags)) |
| 1120 | gh->gh_owner_pid); | 893 | try_lock = 1; |
| 1121 | if (existing) { | 894 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) |
| 1122 | print_symbol(KERN_WARNING "original: %s\n", | 895 | goto fail; |
| 1123 | existing->gh_ip); | 896 | } |
| 1124 | printk(KERN_INFO "pid : %d\n", | 897 | |
| 1125 | pid_nr(existing->gh_owner_pid)); | 898 | list_for_each_entry(gh2, &gl->gl_holders, gh_list) { |
| 1126 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 899 | if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && |
| 1127 | existing->gh_gl->gl_name.ln_type, | 900 | (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) |
| 1128 | existing->gh_gl->gl_state); | 901 | goto trap_recursive; |
| 1129 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | 902 | if (try_lock && |
| 1130 | printk(KERN_INFO "pid : %d\n", | 903 | !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && |
| 1131 | pid_nr(gh->gh_owner_pid)); | 904 | !may_grant(gl, gh)) { |
| 1132 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 905 | fail: |
| 1133 | gl->gl_name.ln_type, gl->gl_state); | 906 | gh->gh_error = GLR_TRYFAILED; |
| 1134 | BUG(); | 907 | gfs2_holder_wake(gh); |
| 1135 | } | 908 | return; |
| 1136 | |||
| 1137 | existing = find_holder_by_owner(&gl->gl_waiters3, | ||
| 1138 | gh->gh_owner_pid); | ||
| 1139 | if (existing) { | ||
| 1140 | print_symbol(KERN_WARNING "original: %s\n", | ||
| 1141 | existing->gh_ip); | ||
| 1142 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | ||
| 1143 | BUG(); | ||
| 1144 | } | 909 | } |
| 910 | if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) | ||
| 911 | continue; | ||
| 912 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) | ||
| 913 | insert_pt = &gh2->gh_list; | ||
| 914 | } | ||
| 915 | if (likely(insert_pt == NULL)) { | ||
| 916 | list_add_tail(&gh->gh_list, &gl->gl_holders); | ||
| 917 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | ||
| 918 | goto do_cancel; | ||
| 919 | return; | ||
| 920 | } | ||
| 921 | list_add_tail(&gh->gh_list, insert_pt); | ||
| 922 | do_cancel: | ||
| 923 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | ||
| 924 | if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { | ||
| 925 | spin_unlock(&gl->gl_spin); | ||
| 926 | if (sdp->sd_lockstruct.ls_ops->lm_cancel) | ||
| 927 | sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); | ||
| 928 | spin_lock(&gl->gl_spin); | ||
| 1145 | } | 929 | } |
| 930 | return; | ||
| 1146 | 931 | ||
| 1147 | if (gh->gh_flags & LM_FLAG_PRIORITY) | 932 | trap_recursive: |
| 1148 | list_add(&gh->gh_list, &gl->gl_waiters3); | 933 | print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip); |
| 1149 | else | 934 | printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); |
| 1150 | list_add_tail(&gh->gh_list, &gl->gl_waiters3); | 935 | printk(KERN_ERR "lock type: %d req lock state : %d\n", |
| 936 | gh2->gh_gl->gl_name.ln_type, gh2->gh_state); | ||
| 937 | print_symbol(KERN_ERR "new: %s\n", gh->gh_ip); | ||
| 938 | printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); | ||
| 939 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | ||
| 940 | gh->gh_gl->gl_name.ln_type, gh->gh_state); | ||
| 941 | __dump_glock(NULL, gl); | ||
| 942 | BUG(); | ||
| 1151 | } | 943 | } |
| 1152 | 944 | ||
| 1153 | /** | 945 | /** |
| @@ -1165,24 +957,16 @@ int gfs2_glock_nq(struct gfs2_holder *gh) | |||
| 1165 | struct gfs2_sbd *sdp = gl->gl_sbd; | 957 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 1166 | int error = 0; | 958 | int error = 0; |
| 1167 | 959 | ||
| 1168 | restart: | 960 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 1169 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { | ||
| 1170 | set_bit(HIF_ABORTED, &gh->gh_iflags); | ||
| 1171 | return -EIO; | 961 | return -EIO; |
| 1172 | } | ||
| 1173 | 962 | ||
| 1174 | spin_lock(&gl->gl_spin); | 963 | spin_lock(&gl->gl_spin); |
| 1175 | add_to_queue(gh); | 964 | add_to_queue(gh); |
| 1176 | run_queue(gl); | 965 | run_queue(gl, 1); |
| 1177 | spin_unlock(&gl->gl_spin); | 966 | spin_unlock(&gl->gl_spin); |
| 1178 | 967 | ||
| 1179 | if (!(gh->gh_flags & GL_ASYNC)) { | 968 | if (!(gh->gh_flags & GL_ASYNC)) |
| 1180 | error = glock_wait_internal(gh); | 969 | error = gfs2_glock_wait(gh); |
| 1181 | if (error == GLR_CANCELED) { | ||
| 1182 | msleep(100); | ||
| 1183 | goto restart; | ||
| 1184 | } | ||
| 1185 | } | ||
| 1186 | 970 | ||
| 1187 | return error; | 971 | return error; |
| 1188 | } | 972 | } |
| @@ -1196,48 +980,7 @@ restart: | |||
| 1196 | 980 | ||
| 1197 | int gfs2_glock_poll(struct gfs2_holder *gh) | 981 | int gfs2_glock_poll(struct gfs2_holder *gh) |
| 1198 | { | 982 | { |
| 1199 | struct gfs2_glock *gl = gh->gh_gl; | 983 | return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; |
| 1200 | int ready = 0; | ||
| 1201 | |||
| 1202 | spin_lock(&gl->gl_spin); | ||
| 1203 | |||
| 1204 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 1205 | ready = 1; | ||
| 1206 | else if (list_empty(&gh->gh_list)) { | ||
| 1207 | if (gh->gh_error == GLR_CANCELED) { | ||
| 1208 | spin_unlock(&gl->gl_spin); | ||
| 1209 | msleep(100); | ||
| 1210 | if (gfs2_glock_nq(gh)) | ||
| 1211 | return 1; | ||
| 1212 | return 0; | ||
| 1213 | } else | ||
| 1214 | ready = 1; | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | spin_unlock(&gl->gl_spin); | ||
| 1218 | |||
| 1219 | return ready; | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | /** | ||
| 1223 | * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC | ||
| 1224 | * @gh: the holder structure | ||
| 1225 | * | ||
| 1226 | * Returns: 0, GLR_TRYFAILED, or errno on failure | ||
| 1227 | */ | ||
| 1228 | |||
| 1229 | int gfs2_glock_wait(struct gfs2_holder *gh) | ||
| 1230 | { | ||
| 1231 | int error; | ||
| 1232 | |||
| 1233 | error = glock_wait_internal(gh); | ||
| 1234 | if (error == GLR_CANCELED) { | ||
| 1235 | msleep(100); | ||
| 1236 | gh->gh_flags &= ~GL_ASYNC; | ||
| 1237 | error = gfs2_glock_nq(gh); | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | return error; | ||
| 1241 | } | 984 | } |
| 1242 | 985 | ||
| 1243 | /** | 986 | /** |
| @@ -1251,26 +994,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
| 1251 | struct gfs2_glock *gl = gh->gh_gl; | 994 | struct gfs2_glock *gl = gh->gh_gl; |
| 1252 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 995 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
| 1253 | unsigned delay = 0; | 996 | unsigned delay = 0; |
| 997 | int fast_path = 0; | ||
| 1254 | 998 | ||
| 999 | spin_lock(&gl->gl_spin); | ||
| 1255 | if (gh->gh_flags & GL_NOCACHE) | 1000 | if (gh->gh_flags & GL_NOCACHE) |
| 1256 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); | 1001 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
| 1257 | 1002 | ||
| 1258 | gfs2_glmutex_lock(gl); | ||
| 1259 | |||
| 1260 | spin_lock(&gl->gl_spin); | ||
| 1261 | list_del_init(&gh->gh_list); | 1003 | list_del_init(&gh->gh_list); |
| 1262 | 1004 | if (find_first_holder(gl) == NULL) { | |
| 1263 | if (list_empty(&gl->gl_holders)) { | ||
| 1264 | if (glops->go_unlock) { | 1005 | if (glops->go_unlock) { |
| 1006 | GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 1265 | spin_unlock(&gl->gl_spin); | 1007 | spin_unlock(&gl->gl_spin); |
| 1266 | glops->go_unlock(gh); | 1008 | glops->go_unlock(gh); |
| 1267 | spin_lock(&gl->gl_spin); | 1009 | spin_lock(&gl->gl_spin); |
| 1010 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1268 | } | 1011 | } |
| 1269 | gl->gl_stamp = jiffies; | 1012 | gl->gl_stamp = jiffies; |
| 1013 | if (list_empty(&gl->gl_holders) && | ||
| 1014 | !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | ||
| 1015 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 1016 | fast_path = 1; | ||
| 1270 | } | 1017 | } |
| 1271 | |||
| 1272 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1273 | spin_unlock(&gl->gl_spin); | 1018 | spin_unlock(&gl->gl_spin); |
| 1019 | if (likely(fast_path)) | ||
| 1020 | return; | ||
| 1274 | 1021 | ||
| 1275 | gfs2_glock_hold(gl); | 1022 | gfs2_glock_hold(gl); |
| 1276 | if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | 1023 | if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && |
| @@ -1454,6 +1201,8 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
| 1454 | static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | 1201 | static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) |
| 1455 | { | 1202 | { |
| 1456 | int error = -EIO; | 1203 | int error = -EIO; |
| 1204 | if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb) | ||
| 1205 | return 0; | ||
| 1457 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 1206 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 1458 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | 1207 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); |
| 1459 | return error; | 1208 | return error; |
| @@ -1469,20 +1218,14 @@ int gfs2_lvb_hold(struct gfs2_glock *gl) | |||
| 1469 | { | 1218 | { |
| 1470 | int error; | 1219 | int error; |
| 1471 | 1220 | ||
| 1472 | gfs2_glmutex_lock(gl); | ||
| 1473 | |||
| 1474 | if (!atomic_read(&gl->gl_lvb_count)) { | 1221 | if (!atomic_read(&gl->gl_lvb_count)) { |
| 1475 | error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); | 1222 | error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); |
| 1476 | if (error) { | 1223 | if (error) |
| 1477 | gfs2_glmutex_unlock(gl); | ||
| 1478 | return error; | 1224 | return error; |
| 1479 | } | ||
| 1480 | gfs2_glock_hold(gl); | 1225 | gfs2_glock_hold(gl); |
| 1481 | } | 1226 | } |
| 1482 | atomic_inc(&gl->gl_lvb_count); | 1227 | atomic_inc(&gl->gl_lvb_count); |
| 1483 | 1228 | ||
| 1484 | gfs2_glmutex_unlock(gl); | ||
| 1485 | |||
| 1486 | return 0; | 1229 | return 0; |
| 1487 | } | 1230 | } |
| 1488 | 1231 | ||
| @@ -1497,17 +1240,13 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl) | |||
| 1497 | struct gfs2_sbd *sdp = gl->gl_sbd; | 1240 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 1498 | 1241 | ||
| 1499 | gfs2_glock_hold(gl); | 1242 | gfs2_glock_hold(gl); |
| 1500 | gfs2_glmutex_lock(gl); | ||
| 1501 | |||
| 1502 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); | 1243 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); |
| 1503 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { | 1244 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { |
| 1504 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 1245 | if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb) |
| 1505 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); | 1246 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); |
| 1506 | gl->gl_lvb = NULL; | 1247 | gl->gl_lvb = NULL; |
| 1507 | gfs2_glock_put(gl); | 1248 | gfs2_glock_put(gl); |
| 1508 | } | 1249 | } |
| 1509 | |||
| 1510 | gfs2_glmutex_unlock(gl); | ||
| 1511 | gfs2_glock_put(gl); | 1250 | gfs2_glock_put(gl); |
| 1512 | } | 1251 | } |
| 1513 | 1252 | ||
| @@ -1527,7 +1266,9 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
| 1527 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
| 1528 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
| 1529 | 1268 | ||
| 1269 | spin_lock(&gl->gl_spin); | ||
| 1530 | handle_callback(gl, state, 1, delay); | 1270 | handle_callback(gl, state, 1, delay); |
| 1271 | spin_unlock(&gl->gl_spin); | ||
| 1531 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | 1272 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) |
| 1532 | gfs2_glock_put(gl); | 1273 | gfs2_glock_put(gl); |
| 1533 | } | 1274 | } |
| @@ -1568,7 +1309,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
| 1568 | gl = gfs2_glock_find(sdp, &async->lc_name); | 1309 | gl = gfs2_glock_find(sdp, &async->lc_name); |
| 1569 | if (gfs2_assert_warn(sdp, gl)) | 1310 | if (gfs2_assert_warn(sdp, gl)) |
| 1570 | return; | 1311 | return; |
| 1571 | xmote_bh(gl, async->lc_ret); | 1312 | gl->gl_reply = async->lc_ret; |
| 1313 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); | ||
| 1572 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 1314 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
| 1573 | gfs2_glock_put(gl); | 1315 | gfs2_glock_put(gl); |
| 1574 | up_read(&gfs2_umount_flush_sem); | 1316 | up_read(&gfs2_umount_flush_sem); |
| @@ -1581,11 +1323,6 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
| 1581 | wake_up_process(sdp->sd_recoverd_process); | 1323 | wake_up_process(sdp->sd_recoverd_process); |
| 1582 | return; | 1324 | return; |
| 1583 | 1325 | ||
| 1584 | case LM_CB_DROPLOCKS: | ||
| 1585 | gfs2_gl_hash_clear(sdp, NO_WAIT); | ||
| 1586 | gfs2_quota_scan(sdp); | ||
| 1587 | return; | ||
| 1588 | |||
| 1589 | default: | 1326 | default: |
| 1590 | gfs2_assert_warn(sdp, 0); | 1327 | gfs2_assert_warn(sdp, 0); |
| 1591 | return; | 1328 | return; |
| @@ -1646,6 +1383,7 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | |||
| 1646 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | 1383 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp) |
| 1647 | { | 1384 | { |
| 1648 | struct gfs2_glock *gl; | 1385 | struct gfs2_glock *gl; |
| 1386 | int done_callback = 0; | ||
| 1649 | 1387 | ||
| 1650 | spin_lock(&sdp->sd_reclaim_lock); | 1388 | spin_lock(&sdp->sd_reclaim_lock); |
| 1651 | if (list_empty(&sdp->sd_reclaim_list)) { | 1389 | if (list_empty(&sdp->sd_reclaim_list)) { |
| @@ -1660,14 +1398,16 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | |||
| 1660 | atomic_dec(&sdp->sd_reclaim_count); | 1398 | atomic_dec(&sdp->sd_reclaim_count); |
| 1661 | atomic_inc(&sdp->sd_reclaimed); | 1399 | atomic_inc(&sdp->sd_reclaimed); |
| 1662 | 1400 | ||
| 1663 | if (gfs2_glmutex_trylock(gl)) { | 1401 | spin_lock(&gl->gl_spin); |
| 1664 | if (list_empty(&gl->gl_holders) && | 1402 | if (find_first_holder(gl) == NULL && |
| 1665 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1403 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) { |
| 1666 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); | 1404 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
| 1667 | gfs2_glmutex_unlock(gl); | 1405 | done_callback = 1; |
| 1668 | } | 1406 | } |
| 1669 | 1407 | spin_unlock(&gl->gl_spin); | |
| 1670 | gfs2_glock_put(gl); | 1408 | if (!done_callback || |
| 1409 | queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 1410 | gfs2_glock_put(gl); | ||
| 1671 | } | 1411 | } |
| 1672 | 1412 | ||
| 1673 | /** | 1413 | /** |
| @@ -1724,18 +1464,14 @@ static void scan_glock(struct gfs2_glock *gl) | |||
| 1724 | { | 1464 | { |
| 1725 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) | 1465 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) |
| 1726 | return; | 1466 | return; |
| 1467 | if (test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 1468 | return; | ||
| 1727 | 1469 | ||
| 1728 | if (gfs2_glmutex_trylock(gl)) { | 1470 | spin_lock(&gl->gl_spin); |
| 1729 | if (list_empty(&gl->gl_holders) && | 1471 | if (find_first_holder(gl) == NULL && |
| 1730 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1472 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
| 1731 | goto out_schedule; | 1473 | gfs2_glock_schedule_for_reclaim(gl); |
| 1732 | gfs2_glmutex_unlock(gl); | 1474 | spin_unlock(&gl->gl_spin); |
| 1733 | } | ||
| 1734 | return; | ||
| 1735 | |||
| 1736 | out_schedule: | ||
| 1737 | gfs2_glmutex_unlock(gl); | ||
| 1738 | gfs2_glock_schedule_for_reclaim(gl); | ||
| 1739 | } | 1475 | } |
| 1740 | 1476 | ||
| 1741 | /** | 1477 | /** |
| @@ -1760,12 +1496,13 @@ static void clear_glock(struct gfs2_glock *gl) | |||
| 1760 | spin_unlock(&sdp->sd_reclaim_lock); | 1496 | spin_unlock(&sdp->sd_reclaim_lock); |
| 1761 | } | 1497 | } |
| 1762 | 1498 | ||
| 1763 | if (gfs2_glmutex_trylock(gl)) { | 1499 | spin_lock(&gl->gl_spin); |
| 1764 | if (list_empty(&gl->gl_holders) && | 1500 | if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) |
| 1765 | gl->gl_state != LM_ST_UNLOCKED) | 1501 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
| 1766 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); | 1502 | spin_unlock(&gl->gl_spin); |
| 1767 | gfs2_glmutex_unlock(gl); | 1503 | gfs2_glock_hold(gl); |
| 1768 | } | 1504 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
| 1505 | gfs2_glock_put(gl); | ||
| 1769 | } | 1506 | } |
| 1770 | 1507 | ||
| 1771 | /** | 1508 | /** |
| @@ -1773,11 +1510,10 @@ static void clear_glock(struct gfs2_glock *gl) | |||
| 1773 | * @sdp: the filesystem | 1510 | * @sdp: the filesystem |
| 1774 | * @wait: wait until it's all gone | 1511 | * @wait: wait until it's all gone |
| 1775 | * | 1512 | * |
| 1776 | * Called when unmounting the filesystem, or when inter-node lock manager | 1513 | * Called when unmounting the filesystem. |
| 1777 | * requests DROPLOCKS because it is running out of capacity. | ||
| 1778 | */ | 1514 | */ |
| 1779 | 1515 | ||
| 1780 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | 1516 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) |
| 1781 | { | 1517 | { |
| 1782 | unsigned long t; | 1518 | unsigned long t; |
| 1783 | unsigned int x; | 1519 | unsigned int x; |
| @@ -1792,7 +1528,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
| 1792 | cont = 1; | 1528 | cont = 1; |
| 1793 | } | 1529 | } |
| 1794 | 1530 | ||
| 1795 | if (!wait || !cont) | 1531 | if (!cont) |
| 1796 | break; | 1532 | break; |
| 1797 | 1533 | ||
| 1798 | if (time_after_eq(jiffies, | 1534 | if (time_after_eq(jiffies, |
| @@ -1810,180 +1546,164 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
| 1810 | } | 1546 | } |
| 1811 | } | 1547 | } |
| 1812 | 1548 | ||
| 1813 | /* | 1549 | static const char *state2str(unsigned state) |
| 1814 | * Diagnostic routines to help debug distributed deadlock | ||
| 1815 | */ | ||
| 1816 | |||
| 1817 | static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, | ||
| 1818 | unsigned long address) | ||
| 1819 | { | 1550 | { |
| 1820 | char buffer[KSYM_SYMBOL_LEN]; | 1551 | switch(state) { |
| 1821 | 1552 | case LM_ST_UNLOCKED: | |
| 1822 | sprint_symbol(buffer, address); | 1553 | return "UN"; |
| 1823 | print_dbg(gi, fmt, buffer); | 1554 | case LM_ST_SHARED: |
| 1555 | return "SH"; | ||
| 1556 | case LM_ST_DEFERRED: | ||
| 1557 | return "DF"; | ||
| 1558 | case LM_ST_EXCLUSIVE: | ||
| 1559 | return "EX"; | ||
| 1560 | } | ||
| 1561 | return "??"; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | ||
| 1565 | { | ||
| 1566 | char *p = buf; | ||
| 1567 | if (flags & LM_FLAG_TRY) | ||
| 1568 | *p++ = 't'; | ||
| 1569 | if (flags & LM_FLAG_TRY_1CB) | ||
| 1570 | *p++ = 'T'; | ||
| 1571 | if (flags & LM_FLAG_NOEXP) | ||
| 1572 | *p++ = 'e'; | ||
| 1573 | if (flags & LM_FLAG_ANY) | ||
| 1574 | *p++ = 'a'; | ||
| 1575 | if (flags & LM_FLAG_PRIORITY) | ||
| 1576 | *p++ = 'p'; | ||
| 1577 | if (flags & GL_ASYNC) | ||
| 1578 | *p++ = 'a'; | ||
| 1579 | if (flags & GL_EXACT) | ||
| 1580 | *p++ = 'E'; | ||
| 1581 | if (flags & GL_ATIME) | ||
| 1582 | *p++ = 'a'; | ||
| 1583 | if (flags & GL_NOCACHE) | ||
| 1584 | *p++ = 'c'; | ||
| 1585 | if (test_bit(HIF_HOLDER, &iflags)) | ||
| 1586 | *p++ = 'H'; | ||
| 1587 | if (test_bit(HIF_WAIT, &iflags)) | ||
| 1588 | *p++ = 'W'; | ||
| 1589 | if (test_bit(HIF_FIRST, &iflags)) | ||
| 1590 | *p++ = 'F'; | ||
| 1591 | *p = 0; | ||
| 1592 | return buf; | ||
| 1824 | } | 1593 | } |
| 1825 | 1594 | ||
| 1826 | /** | 1595 | /** |
| 1827 | * dump_holder - print information about a glock holder | 1596 | * dump_holder - print information about a glock holder |
| 1828 | * @str: a string naming the type of holder | 1597 | * @seq: the seq_file struct |
| 1829 | * @gh: the glock holder | 1598 | * @gh: the glock holder |
| 1830 | * | 1599 | * |
| 1831 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1600 | * Returns: 0 on success, -ENOBUFS when we run out of space |
| 1832 | */ | 1601 | */ |
| 1833 | 1602 | ||
| 1834 | static int dump_holder(struct glock_iter *gi, char *str, | 1603 | static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) |
| 1835 | struct gfs2_holder *gh) | ||
| 1836 | { | 1604 | { |
| 1837 | unsigned int x; | 1605 | struct task_struct *gh_owner = NULL; |
| 1838 | struct task_struct *gh_owner; | 1606 | char buffer[KSYM_SYMBOL_LEN]; |
| 1607 | char flags_buf[32]; | ||
| 1839 | 1608 | ||
| 1840 | print_dbg(gi, " %s\n", str); | 1609 | sprint_symbol(buffer, gh->gh_ip); |
| 1841 | if (gh->gh_owner_pid) { | 1610 | if (gh->gh_owner_pid) |
| 1842 | print_dbg(gi, " owner = %ld ", | ||
| 1843 | (long)pid_nr(gh->gh_owner_pid)); | ||
| 1844 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); | 1611 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); |
| 1845 | if (gh_owner) | 1612 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", |
| 1846 | print_dbg(gi, "(%s)\n", gh_owner->comm); | 1613 | state2str(gh->gh_state), |
| 1847 | else | 1614 | hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), |
| 1848 | print_dbg(gi, "(ended)\n"); | 1615 | gh->gh_error, |
| 1849 | } else | 1616 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, |
| 1850 | print_dbg(gi, " owner = -1\n"); | 1617 | gh_owner ? gh_owner->comm : "(ended)", buffer); |
| 1851 | print_dbg(gi, " gh_state = %u\n", gh->gh_state); | ||
| 1852 | print_dbg(gi, " gh_flags ="); | ||
| 1853 | for (x = 0; x < 32; x++) | ||
| 1854 | if (gh->gh_flags & (1 << x)) | ||
| 1855 | print_dbg(gi, " %u", x); | ||
| 1856 | print_dbg(gi, " \n"); | ||
| 1857 | print_dbg(gi, " error = %d\n", gh->gh_error); | ||
| 1858 | print_dbg(gi, " gh_iflags ="); | ||
| 1859 | for (x = 0; x < 32; x++) | ||
| 1860 | if (test_bit(x, &gh->gh_iflags)) | ||
| 1861 | print_dbg(gi, " %u", x); | ||
| 1862 | print_dbg(gi, " \n"); | ||
| 1863 | gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); | ||
| 1864 | |||
| 1865 | return 0; | 1618 | return 0; |
| 1866 | } | 1619 | } |
| 1867 | 1620 | ||
| 1868 | /** | 1621 | static const char *gflags2str(char *buf, const unsigned long *gflags) |
| 1869 | * dump_inode - print information about an inode | 1622 | { |
| 1870 | * @ip: the inode | 1623 | char *p = buf; |
| 1871 | * | 1624 | if (test_bit(GLF_LOCK, gflags)) |
| 1872 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1625 | *p++ = 'l'; |
| 1873 | */ | 1626 | if (test_bit(GLF_STICKY, gflags)) |
| 1874 | 1627 | *p++ = 's'; | |
| 1875 | static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) | 1628 | if (test_bit(GLF_DEMOTE, gflags)) |
| 1876 | { | 1629 | *p++ = 'D'; |
| 1877 | unsigned int x; | 1630 | if (test_bit(GLF_PENDING_DEMOTE, gflags)) |
| 1878 | 1631 | *p++ = 'd'; | |
| 1879 | print_dbg(gi, " Inode:\n"); | 1632 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) |
| 1880 | print_dbg(gi, " num = %llu/%llu\n", | 1633 | *p++ = 'p'; |
| 1881 | (unsigned long long)ip->i_no_formal_ino, | 1634 | if (test_bit(GLF_DIRTY, gflags)) |
| 1882 | (unsigned long long)ip->i_no_addr); | 1635 | *p++ = 'y'; |
| 1883 | print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); | 1636 | if (test_bit(GLF_LFLUSH, gflags)) |
| 1884 | print_dbg(gi, " i_flags ="); | 1637 | *p++ = 'f'; |
| 1885 | for (x = 0; x < 32; x++) | 1638 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) |
| 1886 | if (test_bit(x, &ip->i_flags)) | 1639 | *p++ = 'i'; |
| 1887 | print_dbg(gi, " %u", x); | 1640 | if (test_bit(GLF_REPLY_PENDING, gflags)) |
| 1888 | print_dbg(gi, " \n"); | 1641 | *p++ = 'r'; |
| 1889 | return 0; | 1642 | *p = 0; |
| 1643 | return buf; | ||
| 1890 | } | 1644 | } |
| 1891 | 1645 | ||
| 1892 | /** | 1646 | /** |
| 1893 | * dump_glock - print information about a glock | 1647 | * __dump_glock - print information about a glock |
| 1648 | * @seq: The seq_file struct | ||
| 1894 | * @gl: the glock | 1649 | * @gl: the glock |
| 1895 | * @count: where we are in the buffer | 1650 | * |
| 1651 | * The file format is as follows: | ||
| 1652 | * One line per object, capital letters are used to indicate objects | ||
| 1653 | * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, | ||
| 1654 | * other objects are indented by a single space and follow the glock to | ||
| 1655 | * which they are related. Fields are indicated by lower case letters | ||
| 1656 | * followed by a colon and the field value, except for strings which are in | ||
| 1657 | * [] so that its possible to see if they are composed of spaces for | ||
| 1658 | * example. The field's are n = number (id of the object), f = flags, | ||
| 1659 | * t = type, s = state, r = refcount, e = error, p = pid. | ||
| 1896 | * | 1660 | * |
| 1897 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1661 | * Returns: 0 on success, -ENOBUFS when we run out of space |
| 1898 | */ | 1662 | */ |
| 1899 | 1663 | ||
| 1900 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | 1664 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) |
| 1901 | { | 1665 | { |
| 1902 | struct gfs2_holder *gh; | 1666 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
| 1903 | unsigned int x; | 1667 | unsigned long long dtime; |
| 1904 | int error = -ENOBUFS; | 1668 | const struct gfs2_holder *gh; |
| 1905 | struct task_struct *gl_owner; | 1669 | char gflags_buf[32]; |
| 1670 | int error = 0; | ||
| 1906 | 1671 | ||
| 1907 | spin_lock(&gl->gl_spin); | 1672 | dtime = jiffies - gl->gl_demote_time; |
| 1673 | dtime *= 1000000/HZ; /* demote time in uSec */ | ||
| 1674 | if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 1675 | dtime = 0; | ||
| 1676 | gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n", | ||
| 1677 | state2str(gl->gl_state), | ||
| 1678 | gl->gl_name.ln_type, | ||
| 1679 | (unsigned long long)gl->gl_name.ln_number, | ||
| 1680 | gflags2str(gflags_buf, &gl->gl_flags), | ||
| 1681 | state2str(gl->gl_target), | ||
| 1682 | state2str(gl->gl_demote_state), dtime, | ||
| 1683 | atomic_read(&gl->gl_lvb_count), | ||
| 1684 | atomic_read(&gl->gl_ail_count), | ||
| 1685 | atomic_read(&gl->gl_ref)); | ||
| 1908 | 1686 | ||
| 1909 | print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, | ||
| 1910 | (unsigned long long)gl->gl_name.ln_number); | ||
| 1911 | print_dbg(gi, " gl_flags ="); | ||
| 1912 | for (x = 0; x < 32; x++) { | ||
| 1913 | if (test_bit(x, &gl->gl_flags)) | ||
| 1914 | print_dbg(gi, " %u", x); | ||
| 1915 | } | ||
| 1916 | if (!test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 1917 | print_dbg(gi, " (unlocked)"); | ||
| 1918 | print_dbg(gi, " \n"); | ||
| 1919 | print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); | ||
| 1920 | print_dbg(gi, " gl_state = %u\n", gl->gl_state); | ||
| 1921 | if (gl->gl_owner_pid) { | ||
| 1922 | gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); | ||
| 1923 | if (gl_owner) | ||
| 1924 | print_dbg(gi, " gl_owner = pid %d (%s)\n", | ||
| 1925 | pid_nr(gl->gl_owner_pid), gl_owner->comm); | ||
| 1926 | else | ||
| 1927 | print_dbg(gi, " gl_owner = %d (ended)\n", | ||
| 1928 | pid_nr(gl->gl_owner_pid)); | ||
| 1929 | } else | ||
| 1930 | print_dbg(gi, " gl_owner = -1\n"); | ||
| 1931 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); | ||
| 1932 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | ||
| 1933 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | ||
| 1934 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | ||
| 1935 | print_dbg(gi, " reclaim = %s\n", | ||
| 1936 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | ||
| 1937 | if (gl->gl_aspace) | ||
| 1938 | print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, | ||
| 1939 | gl->gl_aspace->i_mapping->nrpages); | ||
| 1940 | else | ||
| 1941 | print_dbg(gi, " aspace = no\n"); | ||
| 1942 | print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); | ||
| 1943 | if (gl->gl_req_gh) { | ||
| 1944 | error = dump_holder(gi, "Request", gl->gl_req_gh); | ||
| 1945 | if (error) | ||
| 1946 | goto out; | ||
| 1947 | } | ||
| 1948 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 1687 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { |
| 1949 | error = dump_holder(gi, "Holder", gh); | 1688 | error = dump_holder(seq, gh); |
| 1950 | if (error) | 1689 | if (error) |
| 1951 | goto out; | 1690 | goto out; |
| 1952 | } | 1691 | } |
| 1953 | list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { | 1692 | if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) |
| 1954 | error = dump_holder(gi, "Waiter1", gh); | 1693 | error = glops->go_dump(seq, gl); |
| 1955 | if (error) | ||
| 1956 | goto out; | ||
| 1957 | } | ||
| 1958 | list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { | ||
| 1959 | error = dump_holder(gi, "Waiter3", gh); | ||
| 1960 | if (error) | ||
| 1961 | goto out; | ||
| 1962 | } | ||
| 1963 | if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | ||
| 1964 | print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", | ||
| 1965 | gl->gl_demote_state, (unsigned long long) | ||
| 1966 | (jiffies - gl->gl_demote_time)*(1000000/HZ)); | ||
| 1967 | } | ||
| 1968 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { | ||
| 1969 | if (!test_bit(GLF_LOCK, &gl->gl_flags) && | ||
| 1970 | list_empty(&gl->gl_holders)) { | ||
| 1971 | error = dump_inode(gi, gl->gl_object); | ||
| 1972 | if (error) | ||
| 1973 | goto out; | ||
| 1974 | } else { | ||
| 1975 | error = -ENOBUFS; | ||
| 1976 | print_dbg(gi, " Inode: busy\n"); | ||
| 1977 | } | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | error = 0; | ||
| 1981 | |||
| 1982 | out: | 1694 | out: |
| 1983 | spin_unlock(&gl->gl_spin); | ||
| 1984 | return error; | 1695 | return error; |
| 1985 | } | 1696 | } |
| 1986 | 1697 | ||
| 1698 | static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) | ||
| 1699 | { | ||
| 1700 | int ret; | ||
| 1701 | spin_lock(&gl->gl_spin); | ||
| 1702 | ret = __dump_glock(seq, gl); | ||
| 1703 | spin_unlock(&gl->gl_spin); | ||
| 1704 | return ret; | ||
| 1705 | } | ||
| 1706 | |||
| 1987 | /** | 1707 | /** |
| 1988 | * gfs2_dump_lockstate - print out the current lockstate | 1708 | * gfs2_dump_lockstate - print out the current lockstate |
| 1989 | * @sdp: the filesystem | 1709 | * @sdp: the filesystem |
| @@ -2086,7 +1806,7 @@ void gfs2_glock_exit(void) | |||
| 2086 | module_param(scand_secs, uint, S_IRUGO|S_IWUSR); | 1806 | module_param(scand_secs, uint, S_IRUGO|S_IWUSR); |
| 2087 | MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); | 1807 | MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); |
| 2088 | 1808 | ||
| 2089 | static int gfs2_glock_iter_next(struct glock_iter *gi) | 1809 | static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) |
| 2090 | { | 1810 | { |
| 2091 | struct gfs2_glock *gl; | 1811 | struct gfs2_glock *gl; |
| 2092 | 1812 | ||
| @@ -2104,7 +1824,7 @@ restart: | |||
| 2104 | gfs2_glock_put(gl); | 1824 | gfs2_glock_put(gl); |
| 2105 | if (gl && gi->gl == NULL) | 1825 | if (gl && gi->gl == NULL) |
| 2106 | gi->hash++; | 1826 | gi->hash++; |
| 2107 | while(gi->gl == NULL) { | 1827 | while (gi->gl == NULL) { |
| 2108 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
| 2109 | return 1; | 1829 | return 1; |
| 2110 | read_lock(gl_lock_addr(gi->hash)); | 1830 | read_lock(gl_lock_addr(gi->hash)); |
| @@ -2122,58 +1842,34 @@ restart: | |||
| 2122 | return 0; | 1842 | return 0; |
| 2123 | } | 1843 | } |
| 2124 | 1844 | ||
| 2125 | static void gfs2_glock_iter_free(struct glock_iter *gi) | 1845 | static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) |
| 2126 | { | 1846 | { |
| 2127 | if (gi->gl) | 1847 | if (gi->gl) |
| 2128 | gfs2_glock_put(gi->gl); | 1848 | gfs2_glock_put(gi->gl); |
| 2129 | kfree(gi); | ||
| 2130 | } | ||
| 2131 | |||
| 2132 | static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) | ||
| 2133 | { | ||
| 2134 | struct glock_iter *gi; | ||
| 2135 | |||
| 2136 | gi = kmalloc(sizeof (*gi), GFP_KERNEL); | ||
| 2137 | if (!gi) | ||
| 2138 | return NULL; | ||
| 2139 | |||
| 2140 | gi->sdp = sdp; | ||
| 2141 | gi->hash = 0; | ||
| 2142 | gi->seq = NULL; | ||
| 2143 | gi->gl = NULL; | 1849 | gi->gl = NULL; |
| 2144 | memset(gi->string, 0, sizeof(gi->string)); | ||
| 2145 | |||
| 2146 | if (gfs2_glock_iter_next(gi)) { | ||
| 2147 | gfs2_glock_iter_free(gi); | ||
| 2148 | return NULL; | ||
| 2149 | } | ||
| 2150 | |||
| 2151 | return gi; | ||
| 2152 | } | 1850 | } |
| 2153 | 1851 | ||
| 2154 | static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) | 1852 | static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) |
| 2155 | { | 1853 | { |
| 2156 | struct glock_iter *gi; | 1854 | struct gfs2_glock_iter *gi = seq->private; |
| 2157 | loff_t n = *pos; | 1855 | loff_t n = *pos; |
| 2158 | 1856 | ||
| 2159 | gi = gfs2_glock_iter_init(file->private); | 1857 | gi->hash = 0; |
| 2160 | if (!gi) | ||
| 2161 | return NULL; | ||
| 2162 | 1858 | ||
| 2163 | while(n--) { | 1859 | do { |
| 2164 | if (gfs2_glock_iter_next(gi)) { | 1860 | if (gfs2_glock_iter_next(gi)) { |
| 2165 | gfs2_glock_iter_free(gi); | 1861 | gfs2_glock_iter_free(gi); |
| 2166 | return NULL; | 1862 | return NULL; |
| 2167 | } | 1863 | } |
| 2168 | } | 1864 | } while (n--); |
| 2169 | 1865 | ||
| 2170 | return gi; | 1866 | return gi->gl; |
| 2171 | } | 1867 | } |
| 2172 | 1868 | ||
| 2173 | static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, | 1869 | static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, |
| 2174 | loff_t *pos) | 1870 | loff_t *pos) |
| 2175 | { | 1871 | { |
| 2176 | struct glock_iter *gi = iter_ptr; | 1872 | struct gfs2_glock_iter *gi = seq->private; |
| 2177 | 1873 | ||
| 2178 | (*pos)++; | 1874 | (*pos)++; |
| 2179 | 1875 | ||
| @@ -2182,24 +1878,18 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, | |||
| 2182 | return NULL; | 1878 | return NULL; |
| 2183 | } | 1879 | } |
| 2184 | 1880 | ||
| 2185 | return gi; | 1881 | return gi->gl; |
| 2186 | } | 1882 | } |
| 2187 | 1883 | ||
| 2188 | static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) | 1884 | static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) |
| 2189 | { | 1885 | { |
| 2190 | struct glock_iter *gi = iter_ptr; | 1886 | struct gfs2_glock_iter *gi = seq->private; |
| 2191 | if (gi) | 1887 | gfs2_glock_iter_free(gi); |
| 2192 | gfs2_glock_iter_free(gi); | ||
| 2193 | } | 1888 | } |
| 2194 | 1889 | ||
| 2195 | static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) | 1890 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) |
| 2196 | { | 1891 | { |
| 2197 | struct glock_iter *gi = iter_ptr; | 1892 | return dump_glock(seq, iter_ptr); |
| 2198 | |||
| 2199 | gi->seq = file; | ||
| 2200 | dump_glock(gi, gi->gl); | ||
| 2201 | |||
| 2202 | return 0; | ||
| 2203 | } | 1893 | } |
| 2204 | 1894 | ||
| 2205 | static const struct seq_operations gfs2_glock_seq_ops = { | 1895 | static const struct seq_operations gfs2_glock_seq_ops = { |
| @@ -2211,17 +1901,14 @@ static const struct seq_operations gfs2_glock_seq_ops = { | |||
| 2211 | 1901 | ||
| 2212 | static int gfs2_debugfs_open(struct inode *inode, struct file *file) | 1902 | static int gfs2_debugfs_open(struct inode *inode, struct file *file) |
| 2213 | { | 1903 | { |
| 2214 | struct seq_file *seq; | 1904 | int ret = seq_open_private(file, &gfs2_glock_seq_ops, |
| 2215 | int ret; | 1905 | sizeof(struct gfs2_glock_iter)); |
| 2216 | 1906 | if (ret == 0) { | |
| 2217 | ret = seq_open(file, &gfs2_glock_seq_ops); | 1907 | struct seq_file *seq = file->private_data; |
| 2218 | if (ret) | 1908 | struct gfs2_glock_iter *gi = seq->private; |
| 2219 | return ret; | 1909 | gi->sdp = inode->i_private; |
| 2220 | 1910 | } | |
| 2221 | seq = file->private_data; | 1911 | return ret; |
| 2222 | seq->private = inode->i_private; | ||
| 2223 | |||
| 2224 | return 0; | ||
| 2225 | } | 1912 | } |
| 2226 | 1913 | ||
| 2227 | static const struct file_operations gfs2_debug_fops = { | 1914 | static const struct file_operations gfs2_debug_fops = { |
| @@ -2229,7 +1916,7 @@ static const struct file_operations gfs2_debug_fops = { | |||
| 2229 | .open = gfs2_debugfs_open, | 1916 | .open = gfs2_debugfs_open, |
| 2230 | .read = seq_read, | 1917 | .read = seq_read, |
| 2231 | .llseek = seq_lseek, | 1918 | .llseek = seq_lseek, |
| 2232 | .release = seq_release | 1919 | .release = seq_release_private, |
| 2233 | }; | 1920 | }; |
| 2234 | 1921 | ||
| 2235 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) | 1922 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index cdad3e6f8150..971d92af70fc 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -26,11 +26,8 @@ | |||
| 26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
| 27 | #define GL_ATIME 0x00000200 | 27 | #define GL_ATIME 0x00000200 |
| 28 | #define GL_NOCACHE 0x00000400 | 28 | #define GL_NOCACHE 0x00000400 |
| 29 | #define GL_FLOCK 0x00000800 | ||
| 30 | #define GL_NOCANCEL 0x00001000 | ||
| 31 | 29 | ||
| 32 | #define GLR_TRYFAILED 13 | 30 | #define GLR_TRYFAILED 13 |
| 33 | #define GLR_CANCELED 14 | ||
| 34 | 31 | ||
| 35 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | 32 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) |
| 36 | { | 33 | { |
| @@ -41,6 +38,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * | |||
| 41 | spin_lock(&gl->gl_spin); | 38 | spin_lock(&gl->gl_spin); |
| 42 | pid = task_pid(current); | 39 | pid = task_pid(current); |
| 43 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 40 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { |
| 41 | if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 42 | break; | ||
| 44 | if (gh->gh_owner_pid == pid) | 43 | if (gh->gh_owner_pid == pid) |
| 45 | goto out; | 44 | goto out; |
| 46 | } | 45 | } |
| @@ -70,7 +69,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | |||
| 70 | { | 69 | { |
| 71 | int ret; | 70 | int ret; |
| 72 | spin_lock(&gl->gl_spin); | 71 | spin_lock(&gl->gl_spin); |
| 73 | ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); | 72 | ret = test_bit(GLF_DEMOTE, &gl->gl_flags); |
| 74 | spin_unlock(&gl->gl_spin); | 73 | spin_unlock(&gl->gl_spin); |
| 75 | return ret; | 74 | return ret; |
| 76 | } | 75 | } |
| @@ -98,6 +97,7 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | |||
| 98 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 97 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
| 99 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 98 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
| 100 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | 99 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); |
| 100 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | ||
| 101 | 101 | ||
| 102 | /** | 102 | /** |
| 103 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 103 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock |
| @@ -130,10 +130,9 @@ int gfs2_lvb_hold(struct gfs2_glock *gl); | |||
| 130 | void gfs2_lvb_unhold(struct gfs2_glock *gl); | 130 | void gfs2_lvb_unhold(struct gfs2_glock *gl); |
| 131 | 131 | ||
| 132 | void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); | 132 | void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); |
| 133 | |||
| 134 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); | 133 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); |
| 135 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp); | 134 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp); |
| 136 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); | 135 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); |
| 137 | 136 | ||
| 138 | int __init gfs2_glock_init(void); | 137 | int __init gfs2_glock_init(void); |
| 139 | void gfs2_glock_exit(void); | 138 | void gfs2_glock_exit(void); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 07d84d16cda4..c6c318c2a0f6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
| 14 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> |
| 15 | #include <linux/lm_interface.h> | 15 | #include <linux/lm_interface.h> |
| 16 | #include <linux/bio.h> | ||
| 16 | 17 | ||
| 17 | #include "gfs2.h" | 18 | #include "gfs2.h" |
| 18 | #include "incore.h" | 19 | #include "incore.h" |
| @@ -172,26 +173,6 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
| 172 | } | 173 | } |
| 173 | 174 | ||
| 174 | /** | 175 | /** |
| 175 | * inode_go_xmote_bh - After promoting/demoting a glock | ||
| 176 | * @gl: the glock | ||
| 177 | * | ||
| 178 | */ | ||
| 179 | |||
| 180 | static void inode_go_xmote_bh(struct gfs2_glock *gl) | ||
| 181 | { | ||
| 182 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 183 | struct buffer_head *bh; | ||
| 184 | int error; | ||
| 185 | |||
| 186 | if (gl->gl_state != LM_ST_UNLOCKED && | ||
| 187 | (!gh || !(gh->gh_flags & GL_SKIP))) { | ||
| 188 | error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh); | ||
| 189 | if (!error) | ||
| 190 | brelse(bh); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * inode_go_inval - prepare a inode glock to be released | 176 | * inode_go_inval - prepare a inode glock to be released |
| 196 | * @gl: the glock | 177 | * @gl: the glock |
| 197 | * @flags: | 178 | * @flags: |
| @@ -267,6 +248,26 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
| 267 | } | 248 | } |
| 268 | 249 | ||
| 269 | /** | 250 | /** |
| 251 | * inode_go_dump - print information about an inode | ||
| 252 | * @seq: The iterator | ||
| 253 | * @ip: the inode | ||
| 254 | * | ||
| 255 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
| 256 | */ | ||
| 257 | |||
| 258 | static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | ||
| 259 | { | ||
| 260 | const struct gfs2_inode *ip = gl->gl_object; | ||
| 261 | if (ip == NULL) | ||
| 262 | return 0; | ||
| 263 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n", | ||
| 264 | (unsigned long long)ip->i_no_formal_ino, | ||
| 265 | (unsigned long long)ip->i_no_addr, | ||
| 266 | IF2DT(ip->i_inode.i_mode), ip->i_flags); | ||
| 267 | return 0; | ||
| 268 | } | ||
| 269 | |||
| 270 | /** | ||
| 270 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | 271 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock |
| 271 | * @gl: the glock | 272 | * @gl: the glock |
| 272 | * | 273 | * |
| @@ -306,6 +307,22 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
| 306 | } | 307 | } |
| 307 | 308 | ||
| 308 | /** | 309 | /** |
| 310 | * rgrp_go_dump - print out an rgrp | ||
| 311 | * @seq: The iterator | ||
| 312 | * @gl: The glock in question | ||
| 313 | * | ||
| 314 | */ | ||
| 315 | |||
| 316 | static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | ||
| 317 | { | ||
| 318 | const struct gfs2_rgrpd *rgd = gl->gl_object; | ||
| 319 | if (rgd == NULL) | ||
| 320 | return 0; | ||
| 321 | gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr); | ||
| 322 | return 0; | ||
| 323 | } | ||
| 324 | |||
| 325 | /** | ||
| 309 | * trans_go_sync - promote/demote the transaction glock | 326 | * trans_go_sync - promote/demote the transaction glock |
| 310 | * @gl: the glock | 327 | * @gl: the glock |
| 311 | * @state: the requested state | 328 | * @state: the requested state |
| @@ -330,7 +347,7 @@ static void trans_go_sync(struct gfs2_glock *gl) | |||
| 330 | * | 347 | * |
| 331 | */ | 348 | */ |
| 332 | 349 | ||
| 333 | static void trans_go_xmote_bh(struct gfs2_glock *gl) | 350 | static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) |
| 334 | { | 351 | { |
| 335 | struct gfs2_sbd *sdp = gl->gl_sbd; | 352 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 336 | struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); | 353 | struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); |
| @@ -338,8 +355,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
| 338 | struct gfs2_log_header_host head; | 355 | struct gfs2_log_header_host head; |
| 339 | int error; | 356 | int error; |
| 340 | 357 | ||
| 341 | if (gl->gl_state != LM_ST_UNLOCKED && | 358 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
| 342 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
| 343 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 359 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
| 344 | 360 | ||
| 345 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 361 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
| @@ -354,6 +370,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
| 354 | gfs2_log_pointers_init(sdp, head.lh_blkno); | 370 | gfs2_log_pointers_init(sdp, head.lh_blkno); |
| 355 | } | 371 | } |
| 356 | } | 372 | } |
| 373 | return 0; | ||
| 357 | } | 374 | } |
| 358 | 375 | ||
| 359 | /** | 376 | /** |
| @@ -375,12 +392,12 @@ const struct gfs2_glock_operations gfs2_meta_glops = { | |||
| 375 | 392 | ||
| 376 | const struct gfs2_glock_operations gfs2_inode_glops = { | 393 | const struct gfs2_glock_operations gfs2_inode_glops = { |
| 377 | .go_xmote_th = inode_go_sync, | 394 | .go_xmote_th = inode_go_sync, |
| 378 | .go_xmote_bh = inode_go_xmote_bh, | ||
| 379 | .go_inval = inode_go_inval, | 395 | .go_inval = inode_go_inval, |
| 380 | .go_demote_ok = inode_go_demote_ok, | 396 | .go_demote_ok = inode_go_demote_ok, |
| 381 | .go_lock = inode_go_lock, | 397 | .go_lock = inode_go_lock, |
| 398 | .go_dump = inode_go_dump, | ||
| 382 | .go_type = LM_TYPE_INODE, | 399 | .go_type = LM_TYPE_INODE, |
| 383 | .go_min_hold_time = HZ / 10, | 400 | .go_min_hold_time = HZ / 5, |
| 384 | }; | 401 | }; |
| 385 | 402 | ||
| 386 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 403 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
| @@ -389,8 +406,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
| 389 | .go_demote_ok = rgrp_go_demote_ok, | 406 | .go_demote_ok = rgrp_go_demote_ok, |
| 390 | .go_lock = rgrp_go_lock, | 407 | .go_lock = rgrp_go_lock, |
| 391 | .go_unlock = rgrp_go_unlock, | 408 | .go_unlock = rgrp_go_unlock, |
| 409 | .go_dump = rgrp_go_dump, | ||
| 392 | .go_type = LM_TYPE_RGRP, | 410 | .go_type = LM_TYPE_RGRP, |
| 393 | .go_min_hold_time = HZ / 10, | 411 | .go_min_hold_time = HZ / 5, |
| 394 | }; | 412 | }; |
| 395 | 413 | ||
| 396 | const struct gfs2_glock_operations gfs2_trans_glops = { | 414 | const struct gfs2_glock_operations gfs2_trans_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index eabe5eac41da..448697a5c462 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -77,7 +77,6 @@ struct gfs2_rgrp_host { | |||
| 77 | struct gfs2_rgrpd { | 77 | struct gfs2_rgrpd { |
| 78 | struct list_head rd_list; /* Link with superblock */ | 78 | struct list_head rd_list; /* Link with superblock */ |
| 79 | struct list_head rd_list_mru; | 79 | struct list_head rd_list_mru; |
| 80 | struct list_head rd_recent; /* Recently used rgrps */ | ||
| 81 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ | 80 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ |
| 82 | u64 rd_addr; /* grp block disk address */ | 81 | u64 rd_addr; /* grp block disk address */ |
| 83 | u64 rd_data0; /* first data location */ | 82 | u64 rd_data0; /* first data location */ |
| @@ -128,20 +127,20 @@ struct gfs2_bufdata { | |||
| 128 | 127 | ||
| 129 | struct gfs2_glock_operations { | 128 | struct gfs2_glock_operations { |
| 130 | void (*go_xmote_th) (struct gfs2_glock *gl); | 129 | void (*go_xmote_th) (struct gfs2_glock *gl); |
| 131 | void (*go_xmote_bh) (struct gfs2_glock *gl); | 130 | int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); |
| 132 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 131 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
| 133 | int (*go_demote_ok) (struct gfs2_glock *gl); | 132 | int (*go_demote_ok) (struct gfs2_glock *gl); |
| 134 | int (*go_lock) (struct gfs2_holder *gh); | 133 | int (*go_lock) (struct gfs2_holder *gh); |
| 135 | void (*go_unlock) (struct gfs2_holder *gh); | 134 | void (*go_unlock) (struct gfs2_holder *gh); |
| 135 | int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); | ||
| 136 | const int go_type; | 136 | const int go_type; |
| 137 | const unsigned long go_min_hold_time; | 137 | const unsigned long go_min_hold_time; |
| 138 | }; | 138 | }; |
| 139 | 139 | ||
| 140 | enum { | 140 | enum { |
| 141 | /* States */ | 141 | /* States */ |
| 142 | HIF_HOLDER = 6, | 142 | HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ |
| 143 | HIF_FIRST = 7, | 143 | HIF_FIRST = 7, |
| 144 | HIF_ABORTED = 9, | ||
| 145 | HIF_WAIT = 10, | 144 | HIF_WAIT = 10, |
| 146 | }; | 145 | }; |
| 147 | 146 | ||
| @@ -154,20 +153,20 @@ struct gfs2_holder { | |||
| 154 | unsigned gh_flags; | 153 | unsigned gh_flags; |
| 155 | 154 | ||
| 156 | int gh_error; | 155 | int gh_error; |
| 157 | unsigned long gh_iflags; | 156 | unsigned long gh_iflags; /* HIF_... */ |
| 158 | unsigned long gh_ip; | 157 | unsigned long gh_ip; |
| 159 | }; | 158 | }; |
| 160 | 159 | ||
| 161 | enum { | 160 | enum { |
| 162 | GLF_LOCK = 1, | 161 | GLF_LOCK = 1, |
| 163 | GLF_STICKY = 2, | 162 | GLF_STICKY = 2, |
| 164 | GLF_DEMOTE = 3, | 163 | GLF_DEMOTE = 3, |
| 165 | GLF_PENDING_DEMOTE = 4, | 164 | GLF_PENDING_DEMOTE = 4, |
| 166 | GLF_DIRTY = 5, | 165 | GLF_DEMOTE_IN_PROGRESS = 5, |
| 167 | GLF_DEMOTE_IN_PROGRESS = 6, | 166 | GLF_DIRTY = 6, |
| 168 | GLF_LFLUSH = 7, | 167 | GLF_LFLUSH = 7, |
| 169 | GLF_WAITERS2 = 8, | 168 | GLF_INVALIDATE_IN_PROGRESS = 8, |
| 170 | GLF_CONV_DEADLK = 9, | 169 | GLF_REPLY_PENDING = 9, |
| 171 | }; | 170 | }; |
| 172 | 171 | ||
| 173 | struct gfs2_glock { | 172 | struct gfs2_glock { |
| @@ -179,19 +178,14 @@ struct gfs2_glock { | |||
| 179 | spinlock_t gl_spin; | 178 | spinlock_t gl_spin; |
| 180 | 179 | ||
| 181 | unsigned int gl_state; | 180 | unsigned int gl_state; |
| 181 | unsigned int gl_target; | ||
| 182 | unsigned int gl_reply; | ||
| 182 | unsigned int gl_hash; | 183 | unsigned int gl_hash; |
| 183 | unsigned int gl_demote_state; /* state requested by remote node */ | 184 | unsigned int gl_demote_state; /* state requested by remote node */ |
| 184 | unsigned long gl_demote_time; /* time of first demote request */ | 185 | unsigned long gl_demote_time; /* time of first demote request */ |
| 185 | struct pid *gl_owner_pid; | ||
| 186 | unsigned long gl_ip; | ||
| 187 | struct list_head gl_holders; | 186 | struct list_head gl_holders; |
| 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ | ||
| 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | ||
| 190 | 187 | ||
| 191 | const struct gfs2_glock_operations *gl_ops; | 188 | const struct gfs2_glock_operations *gl_ops; |
| 192 | |||
| 193 | struct gfs2_holder *gl_req_gh; | ||
| 194 | |||
| 195 | void *gl_lock; | 189 | void *gl_lock; |
| 196 | char *gl_lvb; | 190 | char *gl_lvb; |
| 197 | atomic_t gl_lvb_count; | 191 | atomic_t gl_lvb_count; |
| @@ -427,7 +421,6 @@ struct gfs2_tune { | |||
| 427 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
| 428 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | 422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ |
| 429 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
| 430 | unsigned int gt_new_files_directio; | ||
| 431 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
| 432 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
| 433 | unsigned int gt_complain_secs; | 426 | unsigned int gt_complain_secs; |
| @@ -534,7 +527,6 @@ struct gfs2_sbd { | |||
| 534 | struct mutex sd_rindex_mutex; | 527 | struct mutex sd_rindex_mutex; |
| 535 | struct list_head sd_rindex_list; | 528 | struct list_head sd_rindex_list; |
| 536 | struct list_head sd_rindex_mru_list; | 529 | struct list_head sd_rindex_mru_list; |
| 537 | struct list_head sd_rindex_recent_list; | ||
| 538 | struct gfs2_rgrpd *sd_rindex_forward; | 530 | struct gfs2_rgrpd *sd_rindex_forward; |
| 539 | unsigned int sd_rgrps; | 531 | unsigned int sd_rgrps; |
| 540 | 532 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 09453d057e41..6da0ab355b8a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -504,7 +504,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
| 504 | } | 504 | } |
| 505 | 505 | ||
| 506 | if (!is_root) { | 506 | if (!is_root) { |
| 507 | error = permission(dir, MAY_EXEC, NULL); | 507 | error = gfs2_permission(dir, MAY_EXEC); |
| 508 | if (error) | 508 | if (error) |
| 509 | goto out; | 509 | goto out; |
| 510 | } | 510 | } |
| @@ -667,7 +667,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
| 667 | { | 667 | { |
| 668 | int error; | 668 | int error; |
| 669 | 669 | ||
| 670 | error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); | 670 | error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); |
| 671 | if (error) | 671 | if (error) |
| 672 | return error; | 672 | return error; |
| 673 | 673 | ||
| @@ -789,13 +789,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
| 789 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || | 789 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || |
| 790 | gfs2_tune_get(sdp, gt_new_files_jdata)) | 790 | gfs2_tune_get(sdp, gt_new_files_jdata)) |
| 791 | di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); | 791 | di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); |
| 792 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || | ||
| 793 | gfs2_tune_get(sdp, gt_new_files_directio)) | ||
| 794 | di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO); | ||
| 795 | } else if (S_ISDIR(mode)) { | 792 | } else if (S_ISDIR(mode)) { |
| 796 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & | 793 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & |
| 797 | GFS2_DIF_INHERIT_DIRECTIO); | ||
| 798 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & | ||
| 799 | GFS2_DIF_INHERIT_JDATA); | 794 | GFS2_DIF_INHERIT_JDATA); |
| 800 | } | 795 | } |
| 801 | 796 | ||
| @@ -1134,7 +1129,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
| 1134 | if (IS_APPEND(&dip->i_inode)) | 1129 | if (IS_APPEND(&dip->i_inode)) |
| 1135 | return -EPERM; | 1130 | return -EPERM; |
| 1136 | 1131 | ||
| 1137 | error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); | 1132 | error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); |
| 1138 | if (error) | 1133 | if (error) |
| 1139 | return error; | 1134 | return error; |
| 1140 | 1135 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 580da454b38f..6074c2506f75 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -72,7 +72,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | 74 | ||
| 75 | void gfs2_inode_attr_in(struct gfs2_inode *ip); | ||
| 76 | void gfs2_set_iop(struct inode *inode); | 75 | void gfs2_set_iop(struct inode *inode); |
| 77 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 76 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
| 78 | u64 no_addr, u64 no_formal_ino, | 77 | u64 no_addr, u64 no_formal_ino, |
| @@ -91,6 +90,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
| 91 | struct gfs2_inode *ip); | 90 | struct gfs2_inode *ip); |
| 92 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
| 93 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
| 93 | int gfs2_permission(struct inode *inode, int mask); | ||
| 94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | 94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); |
| 95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
| 96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | 96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); |
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c index 663fee728783..523243a13a21 100644 --- a/fs/gfs2/locking.c +++ b/fs/gfs2/locking.c | |||
| @@ -23,12 +23,54 @@ struct lmh_wrapper { | |||
| 23 | const struct lm_lockops *lw_ops; | 23 | const struct lm_lockops *lw_ops; |
| 24 | }; | 24 | }; |
| 25 | 25 | ||
| 26 | static int nolock_mount(char *table_name, char *host_data, | ||
| 27 | lm_callback_t cb, void *cb_data, | ||
| 28 | unsigned int min_lvb_size, int flags, | ||
| 29 | struct lm_lockstruct *lockstruct, | ||
| 30 | struct kobject *fskobj); | ||
| 31 | |||
| 26 | /* List of registered low-level locking protocols. A file system selects one | 32 | /* List of registered low-level locking protocols. A file system selects one |
| 27 | of them by name at mount time, e.g. lock_nolock, lock_dlm. */ | 33 | of them by name at mount time, e.g. lock_nolock, lock_dlm. */ |
| 28 | 34 | ||
| 35 | static const struct lm_lockops nolock_ops = { | ||
| 36 | .lm_proto_name = "lock_nolock", | ||
| 37 | .lm_mount = nolock_mount, | ||
| 38 | }; | ||
| 39 | |||
| 40 | static struct lmh_wrapper nolock_proto = { | ||
| 41 | .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list), | ||
| 42 | .lw_ops = &nolock_ops, | ||
| 43 | }; | ||
| 44 | |||
| 29 | static LIST_HEAD(lmh_list); | 45 | static LIST_HEAD(lmh_list); |
| 30 | static DEFINE_MUTEX(lmh_lock); | 46 | static DEFINE_MUTEX(lmh_lock); |
| 31 | 47 | ||
| 48 | static int nolock_mount(char *table_name, char *host_data, | ||
| 49 | lm_callback_t cb, void *cb_data, | ||
| 50 | unsigned int min_lvb_size, int flags, | ||
| 51 | struct lm_lockstruct *lockstruct, | ||
| 52 | struct kobject *fskobj) | ||
| 53 | { | ||
| 54 | char *c; | ||
| 55 | unsigned int jid; | ||
| 56 | |||
| 57 | c = strstr(host_data, "jid="); | ||
| 58 | if (!c) | ||
| 59 | jid = 0; | ||
| 60 | else { | ||
| 61 | c += 4; | ||
| 62 | sscanf(c, "%u", &jid); | ||
| 63 | } | ||
| 64 | |||
| 65 | lockstruct->ls_jid = jid; | ||
| 66 | lockstruct->ls_first = 1; | ||
| 67 | lockstruct->ls_lvb_size = min_lvb_size; | ||
| 68 | lockstruct->ls_ops = &nolock_ops; | ||
| 69 | lockstruct->ls_flags = LM_LSFLAG_LOCAL; | ||
| 70 | |||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | |||
| 32 | /** | 74 | /** |
| 33 | * gfs2_register_lockproto - Register a low-level locking protocol | 75 | * gfs2_register_lockproto - Register a low-level locking protocol |
| 34 | * @proto: the protocol definition | 76 | * @proto: the protocol definition |
| @@ -116,9 +158,13 @@ int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data, | |||
| 116 | int try = 0; | 158 | int try = 0; |
| 117 | int error, found; | 159 | int error, found; |
| 118 | 160 | ||
| 161 | |||
| 119 | retry: | 162 | retry: |
| 120 | mutex_lock(&lmh_lock); | 163 | mutex_lock(&lmh_lock); |
| 121 | 164 | ||
| 165 | if (list_empty(&nolock_proto.lw_list)) | ||
| 166 | list_add(&nolock_proto.lw_list, &lmh_list); | ||
| 167 | |||
| 122 | found = 0; | 168 | found = 0; |
| 123 | list_for_each_entry(lw, &lmh_list, lw_list) { | 169 | list_for_each_entry(lw, &lmh_list, lw_list) { |
| 124 | if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { | 170 | if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { |
| @@ -139,7 +185,8 @@ retry: | |||
| 139 | goto out; | 185 | goto out; |
| 140 | } | 186 | } |
| 141 | 187 | ||
| 142 | if (!try_module_get(lw->lw_ops->lm_owner)) { | 188 | if (lw->lw_ops->lm_owner && |
| 189 | !try_module_get(lw->lw_ops->lm_owner)) { | ||
| 143 | try = 0; | 190 | try = 0; |
| 144 | mutex_unlock(&lmh_lock); | 191 | mutex_unlock(&lmh_lock); |
| 145 | msleep(1000); | 192 | msleep(1000); |
| @@ -158,7 +205,8 @@ out: | |||
| 158 | void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) | 205 | void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) |
| 159 | { | 206 | { |
| 160 | mutex_lock(&lmh_lock); | 207 | mutex_lock(&lmh_lock); |
| 161 | lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); | 208 | if (lockstruct->ls_ops->lm_unmount) |
| 209 | lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); | ||
| 162 | if (lockstruct->ls_ops->lm_owner) | 210 | if (lockstruct->ls_ops->lm_owner) |
| 163 | module_put(lockstruct->ls_ops->lm_owner); | 211 | module_put(lockstruct->ls_ops->lm_owner); |
| 164 | mutex_unlock(&lmh_lock); | 212 | mutex_unlock(&lmh_lock); |
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index cf7ea8abec87..2482c9047505 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c | |||
| @@ -11,46 +11,60 @@ | |||
| 11 | 11 | ||
| 12 | static char junk_lvb[GDLM_LVB_SIZE]; | 12 | static char junk_lvb[GDLM_LVB_SIZE]; |
| 13 | 13 | ||
| 14 | static void queue_complete(struct gdlm_lock *lp) | 14 | |
| 15 | /* convert dlm lock-mode to gfs lock-state */ | ||
| 16 | |||
| 17 | static s16 gdlm_make_lmstate(s16 dlmmode) | ||
| 15 | { | 18 | { |
| 16 | struct gdlm_ls *ls = lp->ls; | 19 | switch (dlmmode) { |
| 20 | case DLM_LOCK_IV: | ||
| 21 | case DLM_LOCK_NL: | ||
| 22 | return LM_ST_UNLOCKED; | ||
| 23 | case DLM_LOCK_EX: | ||
| 24 | return LM_ST_EXCLUSIVE; | ||
| 25 | case DLM_LOCK_CW: | ||
| 26 | return LM_ST_DEFERRED; | ||
| 27 | case DLM_LOCK_PR: | ||
| 28 | return LM_ST_SHARED; | ||
| 29 | } | ||
| 30 | gdlm_assert(0, "unknown DLM mode %d", dlmmode); | ||
| 31 | return -1; | ||
| 32 | } | ||
| 17 | 33 | ||
| 18 | clear_bit(LFL_ACTIVE, &lp->flags); | 34 | /* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm |
| 35 | thread gets to it. */ | ||
| 36 | |||
| 37 | static void queue_submit(struct gdlm_lock *lp) | ||
| 38 | { | ||
| 39 | struct gdlm_ls *ls = lp->ls; | ||
| 19 | 40 | ||
| 20 | spin_lock(&ls->async_lock); | 41 | spin_lock(&ls->async_lock); |
| 21 | list_add_tail(&lp->clist, &ls->complete); | 42 | list_add_tail(&lp->delay_list, &ls->submit); |
| 22 | spin_unlock(&ls->async_lock); | 43 | spin_unlock(&ls->async_lock); |
| 23 | wake_up(&ls->thread_wait); | 44 | wake_up(&ls->thread_wait); |
| 24 | } | 45 | } |
| 25 | 46 | ||
| 26 | static inline void gdlm_ast(void *astarg) | 47 | static void wake_up_ast(struct gdlm_lock *lp) |
| 27 | { | 48 | { |
| 28 | queue_complete(astarg); | 49 | clear_bit(LFL_AST_WAIT, &lp->flags); |
| 50 | smp_mb__after_clear_bit(); | ||
| 51 | wake_up_bit(&lp->flags, LFL_AST_WAIT); | ||
| 29 | } | 52 | } |
| 30 | 53 | ||
| 31 | static inline void gdlm_bast(void *astarg, int mode) | 54 | static void gdlm_delete_lp(struct gdlm_lock *lp) |
| 32 | { | 55 | { |
| 33 | struct gdlm_lock *lp = astarg; | ||
| 34 | struct gdlm_ls *ls = lp->ls; | 56 | struct gdlm_ls *ls = lp->ls; |
| 35 | 57 | ||
| 36 | if (!mode) { | ||
| 37 | printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", | ||
| 38 | lp->lockname.ln_type, | ||
| 39 | (unsigned long long)lp->lockname.ln_number); | ||
| 40 | return; | ||
| 41 | } | ||
| 42 | |||
| 43 | spin_lock(&ls->async_lock); | 58 | spin_lock(&ls->async_lock); |
| 44 | if (!lp->bast_mode) { | 59 | if (!list_empty(&lp->delay_list)) |
| 45 | list_add_tail(&lp->blist, &ls->blocking); | 60 | list_del_init(&lp->delay_list); |
| 46 | lp->bast_mode = mode; | 61 | ls->all_locks_count--; |
| 47 | } else if (lp->bast_mode < mode) | ||
| 48 | lp->bast_mode = mode; | ||
| 49 | spin_unlock(&ls->async_lock); | 62 | spin_unlock(&ls->async_lock); |
| 50 | wake_up(&ls->thread_wait); | 63 | |
| 64 | kfree(lp); | ||
| 51 | } | 65 | } |
| 52 | 66 | ||
| 53 | void gdlm_queue_delayed(struct gdlm_lock *lp) | 67 | static void gdlm_queue_delayed(struct gdlm_lock *lp) |
| 54 | { | 68 | { |
| 55 | struct gdlm_ls *ls = lp->ls; | 69 | struct gdlm_ls *ls = lp->ls; |
| 56 | 70 | ||
| @@ -59,6 +73,236 @@ void gdlm_queue_delayed(struct gdlm_lock *lp) | |||
| 59 | spin_unlock(&ls->async_lock); | 73 | spin_unlock(&ls->async_lock); |
| 60 | } | 74 | } |
| 61 | 75 | ||
| 76 | static void process_complete(struct gdlm_lock *lp) | ||
| 77 | { | ||
| 78 | struct gdlm_ls *ls = lp->ls; | ||
| 79 | struct lm_async_cb acb; | ||
| 80 | |||
| 81 | memset(&acb, 0, sizeof(acb)); | ||
| 82 | |||
| 83 | if (lp->lksb.sb_status == -DLM_ECANCEL) { | ||
| 84 | log_info("complete dlm cancel %x,%llx flags %lx", | ||
| 85 | lp->lockname.ln_type, | ||
| 86 | (unsigned long long)lp->lockname.ln_number, | ||
| 87 | lp->flags); | ||
| 88 | |||
| 89 | lp->req = lp->cur; | ||
| 90 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 91 | if (lp->cur == DLM_LOCK_IV) | ||
| 92 | lp->lksb.sb_lkid = 0; | ||
| 93 | goto out; | ||
| 94 | } | ||
| 95 | |||
| 96 | if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { | ||
| 97 | if (lp->lksb.sb_status != -DLM_EUNLOCK) { | ||
| 98 | log_info("unlock sb_status %d %x,%llx flags %lx", | ||
| 99 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 100 | (unsigned long long)lp->lockname.ln_number, | ||
| 101 | lp->flags); | ||
| 102 | return; | ||
| 103 | } | ||
| 104 | |||
| 105 | lp->cur = DLM_LOCK_IV; | ||
| 106 | lp->req = DLM_LOCK_IV; | ||
| 107 | lp->lksb.sb_lkid = 0; | ||
| 108 | |||
| 109 | if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { | ||
| 110 | gdlm_delete_lp(lp); | ||
| 111 | return; | ||
| 112 | } | ||
| 113 | goto out; | ||
| 114 | } | ||
| 115 | |||
| 116 | if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) | ||
| 117 | memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); | ||
| 118 | |||
| 119 | if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { | ||
| 120 | if (lp->req == DLM_LOCK_PR) | ||
| 121 | lp->req = DLM_LOCK_CW; | ||
| 122 | else if (lp->req == DLM_LOCK_CW) | ||
| 123 | lp->req = DLM_LOCK_PR; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * A canceled lock request. The lock was just taken off the delayed | ||
| 128 | * list and was never even submitted to dlm. | ||
| 129 | */ | ||
| 130 | |||
| 131 | if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { | ||
| 132 | log_info("complete internal cancel %x,%llx", | ||
| 133 | lp->lockname.ln_type, | ||
| 134 | (unsigned long long)lp->lockname.ln_number); | ||
| 135 | lp->req = lp->cur; | ||
| 136 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 137 | goto out; | ||
| 138 | } | ||
| 139 | |||
| 140 | /* | ||
| 141 | * An error occured. | ||
| 142 | */ | ||
| 143 | |||
| 144 | if (lp->lksb.sb_status) { | ||
| 145 | /* a "normal" error */ | ||
| 146 | if ((lp->lksb.sb_status == -EAGAIN) && | ||
| 147 | (lp->lkf & DLM_LKF_NOQUEUE)) { | ||
| 148 | lp->req = lp->cur; | ||
| 149 | if (lp->cur == DLM_LOCK_IV) | ||
| 150 | lp->lksb.sb_lkid = 0; | ||
| 151 | goto out; | ||
| 152 | } | ||
| 153 | |||
| 154 | /* this could only happen with cancels I think */ | ||
| 155 | log_info("ast sb_status %d %x,%llx flags %lx", | ||
| 156 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 157 | (unsigned long long)lp->lockname.ln_number, | ||
| 158 | lp->flags); | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | |||
| 162 | /* | ||
| 163 | * This is an AST for an EX->EX conversion for sync_lvb from GFS. | ||
| 164 | */ | ||
| 165 | |||
| 166 | if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { | ||
| 167 | wake_up_ast(lp); | ||
| 168 | return; | ||
| 169 | } | ||
| 170 | |||
| 171 | /* | ||
| 172 | * A lock has been demoted to NL because it initially completed during | ||
| 173 | * BLOCK_LOCKS. Now it must be requested in the originally requested | ||
| 174 | * mode. | ||
| 175 | */ | ||
| 176 | |||
| 177 | if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { | ||
| 178 | gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", | ||
| 179 | lp->lockname.ln_type, | ||
| 180 | (unsigned long long)lp->lockname.ln_number); | ||
| 181 | gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", | ||
| 182 | lp->lockname.ln_type, | ||
| 183 | (unsigned long long)lp->lockname.ln_number); | ||
| 184 | |||
| 185 | lp->cur = DLM_LOCK_NL; | ||
| 186 | lp->req = lp->prev_req; | ||
| 187 | lp->prev_req = DLM_LOCK_IV; | ||
| 188 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 189 | |||
| 190 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 191 | |||
| 192 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 193 | !test_bit(LFL_NOBLOCK, &lp->flags)) | ||
| 194 | gdlm_queue_delayed(lp); | ||
| 195 | else | ||
| 196 | queue_submit(lp); | ||
| 197 | return; | ||
| 198 | } | ||
| 199 | |||
| 200 | /* | ||
| 201 | * A request is granted during dlm recovery. It may be granted | ||
| 202 | * because the locks of a failed node were cleared. In that case, | ||
| 203 | * there may be inconsistent data beneath this lock and we must wait | ||
| 204 | * for recovery to complete to use it. When gfs recovery is done this | ||
| 205 | * granted lock will be converted to NL and then reacquired in this | ||
| 206 | * granted state. | ||
| 207 | */ | ||
| 208 | |||
| 209 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 210 | !test_bit(LFL_NOBLOCK, &lp->flags) && | ||
| 211 | lp->req != DLM_LOCK_NL) { | ||
| 212 | |||
| 213 | lp->cur = lp->req; | ||
| 214 | lp->prev_req = lp->req; | ||
| 215 | lp->req = DLM_LOCK_NL; | ||
| 216 | lp->lkf |= DLM_LKF_CONVERT; | ||
| 217 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 218 | |||
| 219 | log_debug("rereq %x,%llx id %x %d,%d", | ||
| 220 | lp->lockname.ln_type, | ||
| 221 | (unsigned long long)lp->lockname.ln_number, | ||
| 222 | lp->lksb.sb_lkid, lp->cur, lp->req); | ||
| 223 | |||
| 224 | set_bit(LFL_REREQUEST, &lp->flags); | ||
| 225 | queue_submit(lp); | ||
| 226 | return; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* | ||
| 230 | * DLM demoted the lock to NL before it was granted so GFS must be | ||
| 231 | * told it cannot cache data for this lock. | ||
| 232 | */ | ||
| 233 | |||
| 234 | if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) | ||
| 235 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 236 | |||
| 237 | out: | ||
| 238 | /* | ||
| 239 | * This is an internal lock_dlm lock | ||
| 240 | */ | ||
| 241 | |||
| 242 | if (test_bit(LFL_INLOCK, &lp->flags)) { | ||
| 243 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 244 | lp->cur = lp->req; | ||
| 245 | wake_up_ast(lp); | ||
| 246 | return; | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Normal completion of a lock request. Tell GFS it now has the lock. | ||
| 251 | */ | ||
| 252 | |||
| 253 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 254 | lp->cur = lp->req; | ||
| 255 | |||
| 256 | acb.lc_name = lp->lockname; | ||
| 257 | acb.lc_ret |= gdlm_make_lmstate(lp->cur); | ||
| 258 | |||
| 259 | ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); | ||
| 260 | } | ||
| 261 | |||
| 262 | static void gdlm_ast(void *astarg) | ||
| 263 | { | ||
| 264 | struct gdlm_lock *lp = astarg; | ||
| 265 | clear_bit(LFL_ACTIVE, &lp->flags); | ||
| 266 | process_complete(lp); | ||
| 267 | } | ||
| 268 | |||
| 269 | static void process_blocking(struct gdlm_lock *lp, int bast_mode) | ||
| 270 | { | ||
| 271 | struct gdlm_ls *ls = lp->ls; | ||
| 272 | unsigned int cb = 0; | ||
| 273 | |||
| 274 | switch (gdlm_make_lmstate(bast_mode)) { | ||
| 275 | case LM_ST_EXCLUSIVE: | ||
| 276 | cb = LM_CB_NEED_E; | ||
| 277 | break; | ||
| 278 | case LM_ST_DEFERRED: | ||
| 279 | cb = LM_CB_NEED_D; | ||
| 280 | break; | ||
| 281 | case LM_ST_SHARED: | ||
| 282 | cb = LM_CB_NEED_S; | ||
| 283 | break; | ||
| 284 | default: | ||
| 285 | gdlm_assert(0, "unknown bast mode %u", bast_mode); | ||
| 286 | } | ||
| 287 | |||
| 288 | ls->fscb(ls->sdp, cb, &lp->lockname); | ||
| 289 | } | ||
| 290 | |||
| 291 | |||
| 292 | static void gdlm_bast(void *astarg, int mode) | ||
| 293 | { | ||
| 294 | struct gdlm_lock *lp = astarg; | ||
| 295 | |||
| 296 | if (!mode) { | ||
| 297 | printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", | ||
| 298 | lp->lockname.ln_type, | ||
| 299 | (unsigned long long)lp->lockname.ln_number); | ||
| 300 | return; | ||
| 301 | } | ||
| 302 | |||
| 303 | process_blocking(lp, mode); | ||
| 304 | } | ||
| 305 | |||
| 62 | /* convert gfs lock-state to dlm lock-mode */ | 306 | /* convert gfs lock-state to dlm lock-mode */ |
| 63 | 307 | ||
| 64 | static s16 make_mode(s16 lmstate) | 308 | static s16 make_mode(s16 lmstate) |
| @@ -77,24 +321,6 @@ static s16 make_mode(s16 lmstate) | |||
| 77 | return -1; | 321 | return -1; |
| 78 | } | 322 | } |
| 79 | 323 | ||
| 80 | /* convert dlm lock-mode to gfs lock-state */ | ||
| 81 | |||
| 82 | s16 gdlm_make_lmstate(s16 dlmmode) | ||
| 83 | { | ||
| 84 | switch (dlmmode) { | ||
| 85 | case DLM_LOCK_IV: | ||
| 86 | case DLM_LOCK_NL: | ||
| 87 | return LM_ST_UNLOCKED; | ||
| 88 | case DLM_LOCK_EX: | ||
| 89 | return LM_ST_EXCLUSIVE; | ||
| 90 | case DLM_LOCK_CW: | ||
| 91 | return LM_ST_DEFERRED; | ||
| 92 | case DLM_LOCK_PR: | ||
| 93 | return LM_ST_SHARED; | ||
| 94 | } | ||
| 95 | gdlm_assert(0, "unknown DLM mode %d", dlmmode); | ||
| 96 | return -1; | ||
| 97 | } | ||
| 98 | 324 | ||
| 99 | /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and | 325 | /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and |
| 100 | DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ | 326 | DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ |
| @@ -134,14 +360,6 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, | |||
| 134 | 360 | ||
| 135 | if (lp->lksb.sb_lkid != 0) { | 361 | if (lp->lksb.sb_lkid != 0) { |
| 136 | lkf |= DLM_LKF_CONVERT; | 362 | lkf |= DLM_LKF_CONVERT; |
| 137 | |||
| 138 | /* Conversion deadlock avoidance by DLM */ | ||
| 139 | |||
| 140 | if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) && | ||
| 141 | !test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | ||
| 142 | !(lkf & DLM_LKF_NOQUEUE) && | ||
| 143 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) | ||
| 144 | lkf |= DLM_LKF_CONVDEADLK; | ||
| 145 | } | 363 | } |
| 146 | 364 | ||
| 147 | if (lp->lvb) | 365 | if (lp->lvb) |
| @@ -173,14 +391,9 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
| 173 | make_strname(name, &lp->strname); | 391 | make_strname(name, &lp->strname); |
| 174 | lp->ls = ls; | 392 | lp->ls = ls; |
| 175 | lp->cur = DLM_LOCK_IV; | 393 | lp->cur = DLM_LOCK_IV; |
| 176 | lp->lvb = NULL; | ||
| 177 | lp->hold_null = NULL; | ||
| 178 | INIT_LIST_HEAD(&lp->clist); | ||
| 179 | INIT_LIST_HEAD(&lp->blist); | ||
| 180 | INIT_LIST_HEAD(&lp->delay_list); | 394 | INIT_LIST_HEAD(&lp->delay_list); |
| 181 | 395 | ||
| 182 | spin_lock(&ls->async_lock); | 396 | spin_lock(&ls->async_lock); |
| 183 | list_add(&lp->all_list, &ls->all_locks); | ||
| 184 | ls->all_locks_count++; | 397 | ls->all_locks_count++; |
| 185 | spin_unlock(&ls->async_lock); | 398 | spin_unlock(&ls->async_lock); |
| 186 | 399 | ||
| @@ -188,26 +401,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
| 188 | return 0; | 401 | return 0; |
| 189 | } | 402 | } |
| 190 | 403 | ||
| 191 | void gdlm_delete_lp(struct gdlm_lock *lp) | ||
| 192 | { | ||
| 193 | struct gdlm_ls *ls = lp->ls; | ||
| 194 | |||
| 195 | spin_lock(&ls->async_lock); | ||
| 196 | if (!list_empty(&lp->clist)) | ||
| 197 | list_del_init(&lp->clist); | ||
| 198 | if (!list_empty(&lp->blist)) | ||
| 199 | list_del_init(&lp->blist); | ||
| 200 | if (!list_empty(&lp->delay_list)) | ||
| 201 | list_del_init(&lp->delay_list); | ||
| 202 | gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type, | ||
| 203 | (unsigned long long)lp->lockname.ln_number); | ||
| 204 | list_del_init(&lp->all_list); | ||
| 205 | ls->all_locks_count--; | ||
| 206 | spin_unlock(&ls->async_lock); | ||
| 207 | |||
| 208 | kfree(lp); | ||
| 209 | } | ||
| 210 | |||
| 211 | int gdlm_get_lock(void *lockspace, struct lm_lockname *name, | 404 | int gdlm_get_lock(void *lockspace, struct lm_lockname *name, |
| 212 | void **lockp) | 405 | void **lockp) |
| 213 | { | 406 | { |
| @@ -261,7 +454,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) | |||
| 261 | 454 | ||
| 262 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { | 455 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { |
| 263 | lp->lksb.sb_status = -EAGAIN; | 456 | lp->lksb.sb_status = -EAGAIN; |
| 264 | queue_complete(lp); | 457 | gdlm_ast(lp); |
| 265 | error = 0; | 458 | error = 0; |
| 266 | } | 459 | } |
| 267 | 460 | ||
| @@ -308,6 +501,12 @@ unsigned int gdlm_lock(void *lock, unsigned int cur_state, | |||
| 308 | { | 501 | { |
| 309 | struct gdlm_lock *lp = lock; | 502 | struct gdlm_lock *lp = lock; |
| 310 | 503 | ||
| 504 | if (req_state == LM_ST_UNLOCKED) | ||
| 505 | return gdlm_unlock(lock, cur_state); | ||
| 506 | |||
| 507 | if (req_state == LM_ST_UNLOCKED) | ||
| 508 | return gdlm_unlock(lock, cur_state); | ||
| 509 | |||
| 311 | clear_bit(LFL_DLM_CANCEL, &lp->flags); | 510 | clear_bit(LFL_DLM_CANCEL, &lp->flags); |
| 312 | if (flags & LM_FLAG_NOEXP) | 511 | if (flags & LM_FLAG_NOEXP) |
| 313 | set_bit(LFL_NOBLOCK, &lp->flags); | 512 | set_bit(LFL_NOBLOCK, &lp->flags); |
| @@ -351,7 +550,7 @@ void gdlm_cancel(void *lock) | |||
| 351 | if (delay_list) { | 550 | if (delay_list) { |
| 352 | set_bit(LFL_CANCEL, &lp->flags); | 551 | set_bit(LFL_CANCEL, &lp->flags); |
| 353 | set_bit(LFL_ACTIVE, &lp->flags); | 552 | set_bit(LFL_ACTIVE, &lp->flags); |
| 354 | queue_complete(lp); | 553 | gdlm_ast(lp); |
| 355 | return; | 554 | return; |
| 356 | } | 555 | } |
| 357 | 556 | ||
| @@ -507,22 +706,3 @@ void gdlm_submit_delayed(struct gdlm_ls *ls) | |||
| 507 | wake_up(&ls->thread_wait); | 706 | wake_up(&ls->thread_wait); |
| 508 | } | 707 | } |
| 509 | 708 | ||
| 510 | int gdlm_release_all_locks(struct gdlm_ls *ls) | ||
| 511 | { | ||
| 512 | struct gdlm_lock *lp, *safe; | ||
| 513 | int count = 0; | ||
| 514 | |||
| 515 | spin_lock(&ls->async_lock); | ||
| 516 | list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) { | ||
| 517 | list_del_init(&lp->all_list); | ||
| 518 | |||
| 519 | if (lp->lvb && lp->lvb != junk_lvb) | ||
| 520 | kfree(lp->lvb); | ||
| 521 | kfree(lp); | ||
| 522 | count++; | ||
| 523 | } | ||
| 524 | spin_unlock(&ls->async_lock); | ||
| 525 | |||
| 526 | return count; | ||
| 527 | } | ||
| 528 | |||
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index a243cf69c54e..3c98e7c6f93b 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
| @@ -72,19 +72,12 @@ struct gdlm_ls { | |||
| 72 | int recover_jid_done; | 72 | int recover_jid_done; |
| 73 | int recover_jid_status; | 73 | int recover_jid_status; |
| 74 | spinlock_t async_lock; | 74 | spinlock_t async_lock; |
| 75 | struct list_head complete; | ||
| 76 | struct list_head blocking; | ||
| 77 | struct list_head delayed; | 75 | struct list_head delayed; |
| 78 | struct list_head submit; | 76 | struct list_head submit; |
| 79 | struct list_head all_locks; | ||
| 80 | u32 all_locks_count; | 77 | u32 all_locks_count; |
| 81 | wait_queue_head_t wait_control; | 78 | wait_queue_head_t wait_control; |
| 82 | struct task_struct *thread1; | 79 | struct task_struct *thread; |
| 83 | struct task_struct *thread2; | ||
| 84 | wait_queue_head_t thread_wait; | 80 | wait_queue_head_t thread_wait; |
| 85 | unsigned long drop_time; | ||
| 86 | int drop_locks_count; | ||
| 87 | int drop_locks_period; | ||
| 88 | }; | 81 | }; |
| 89 | 82 | ||
| 90 | enum { | 83 | enum { |
| @@ -117,12 +110,7 @@ struct gdlm_lock { | |||
| 117 | u32 lkf; /* dlm flags DLM_LKF_ */ | 110 | u32 lkf; /* dlm flags DLM_LKF_ */ |
| 118 | unsigned long flags; /* lock_dlm flags LFL_ */ | 111 | unsigned long flags; /* lock_dlm flags LFL_ */ |
| 119 | 112 | ||
| 120 | int bast_mode; /* protected by async_lock */ | ||
| 121 | |||
| 122 | struct list_head clist; /* complete */ | ||
| 123 | struct list_head blist; /* blocking */ | ||
| 124 | struct list_head delay_list; /* delayed */ | 113 | struct list_head delay_list; /* delayed */ |
| 125 | struct list_head all_list; /* all locks for the fs */ | ||
| 126 | struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ | 114 | struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ |
| 127 | }; | 115 | }; |
| 128 | 116 | ||
| @@ -159,11 +147,7 @@ void gdlm_release_threads(struct gdlm_ls *); | |||
| 159 | 147 | ||
| 160 | /* lock.c */ | 148 | /* lock.c */ |
| 161 | 149 | ||
| 162 | s16 gdlm_make_lmstate(s16); | ||
| 163 | void gdlm_queue_delayed(struct gdlm_lock *); | ||
| 164 | void gdlm_submit_delayed(struct gdlm_ls *); | 150 | void gdlm_submit_delayed(struct gdlm_ls *); |
| 165 | int gdlm_release_all_locks(struct gdlm_ls *); | ||
| 166 | void gdlm_delete_lp(struct gdlm_lock *); | ||
| 167 | unsigned int gdlm_do_lock(struct gdlm_lock *); | 151 | unsigned int gdlm_do_lock(struct gdlm_lock *); |
| 168 | 152 | ||
| 169 | int gdlm_get_lock(void *, struct lm_lockname *, void **); | 153 | int gdlm_get_lock(void *, struct lm_lockname *, void **); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 470bdf650b50..09d78c216f48 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
| @@ -22,22 +22,14 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, | |||
| 22 | if (!ls) | 22 | if (!ls) |
| 23 | return NULL; | 23 | return NULL; |
| 24 | 24 | ||
| 25 | ls->drop_locks_count = GDLM_DROP_COUNT; | ||
| 26 | ls->drop_locks_period = GDLM_DROP_PERIOD; | ||
| 27 | ls->fscb = cb; | 25 | ls->fscb = cb; |
| 28 | ls->sdp = sdp; | 26 | ls->sdp = sdp; |
| 29 | ls->fsflags = flags; | 27 | ls->fsflags = flags; |
| 30 | spin_lock_init(&ls->async_lock); | 28 | spin_lock_init(&ls->async_lock); |
| 31 | INIT_LIST_HEAD(&ls->complete); | ||
| 32 | INIT_LIST_HEAD(&ls->blocking); | ||
| 33 | INIT_LIST_HEAD(&ls->delayed); | 29 | INIT_LIST_HEAD(&ls->delayed); |
| 34 | INIT_LIST_HEAD(&ls->submit); | 30 | INIT_LIST_HEAD(&ls->submit); |
| 35 | INIT_LIST_HEAD(&ls->all_locks); | ||
| 36 | init_waitqueue_head(&ls->thread_wait); | 31 | init_waitqueue_head(&ls->thread_wait); |
| 37 | init_waitqueue_head(&ls->wait_control); | 32 | init_waitqueue_head(&ls->wait_control); |
| 38 | ls->thread1 = NULL; | ||
| 39 | ls->thread2 = NULL; | ||
| 40 | ls->drop_time = jiffies; | ||
| 41 | ls->jid = -1; | 33 | ls->jid = -1; |
| 42 | 34 | ||
| 43 | strncpy(buf, table_name, 256); | 35 | strncpy(buf, table_name, 256); |
| @@ -180,7 +172,6 @@ out: | |||
| 180 | static void gdlm_unmount(void *lockspace) | 172 | static void gdlm_unmount(void *lockspace) |
| 181 | { | 173 | { |
| 182 | struct gdlm_ls *ls = lockspace; | 174 | struct gdlm_ls *ls = lockspace; |
| 183 | int rv; | ||
| 184 | 175 | ||
| 185 | log_debug("unmount flags %lx", ls->flags); | 176 | log_debug("unmount flags %lx", ls->flags); |
| 186 | 177 | ||
| @@ -194,9 +185,7 @@ static void gdlm_unmount(void *lockspace) | |||
| 194 | gdlm_kobject_release(ls); | 185 | gdlm_kobject_release(ls); |
| 195 | dlm_release_lockspace(ls->dlm_lockspace, 2); | 186 | dlm_release_lockspace(ls->dlm_lockspace, 2); |
| 196 | gdlm_release_threads(ls); | 187 | gdlm_release_threads(ls); |
| 197 | rv = gdlm_release_all_locks(ls); | 188 | BUG_ON(ls->all_locks_count); |
| 198 | if (rv) | ||
| 199 | log_info("gdlm_unmount: %d stray locks freed", rv); | ||
| 200 | out: | 189 | out: |
| 201 | kfree(ls); | 190 | kfree(ls); |
| 202 | } | 191 | } |
| @@ -232,7 +221,6 @@ static void gdlm_withdraw(void *lockspace) | |||
| 232 | 221 | ||
| 233 | dlm_release_lockspace(ls->dlm_lockspace, 2); | 222 | dlm_release_lockspace(ls->dlm_lockspace, 2); |
| 234 | gdlm_release_threads(ls); | 223 | gdlm_release_threads(ls); |
| 235 | gdlm_release_all_locks(ls); | ||
| 236 | gdlm_kobject_release(ls); | 224 | gdlm_kobject_release(ls); |
| 237 | } | 225 | } |
| 238 | 226 | ||
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index a4ff271df9ee..4ec571c3d8a9 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
| @@ -114,17 +114,6 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) | |||
| 114 | return sprintf(buf, "%d\n", ls->recover_jid_status); | 114 | return sprintf(buf, "%d\n", ls->recover_jid_status); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) | ||
| 118 | { | ||
| 119 | return sprintf(buf, "%d\n", ls->drop_locks_count); | ||
| 120 | } | ||
| 121 | |||
| 122 | static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
| 123 | { | ||
| 124 | ls->drop_locks_count = simple_strtol(buf, NULL, 0); | ||
| 125 | return len; | ||
| 126 | } | ||
| 127 | |||
| 128 | struct gdlm_attr { | 117 | struct gdlm_attr { |
| 129 | struct attribute attr; | 118 | struct attribute attr; |
| 130 | ssize_t (*show)(struct gdlm_ls *, char *); | 119 | ssize_t (*show)(struct gdlm_ls *, char *); |
| @@ -144,7 +133,6 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); | |||
| 144 | GDLM_ATTR(recover, 0644, recover_show, recover_store); | 133 | GDLM_ATTR(recover, 0644, recover_show, recover_store); |
| 145 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 134 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
| 146 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 135 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
| 147 | GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); | ||
| 148 | 136 | ||
| 149 | static struct attribute *gdlm_attrs[] = { | 137 | static struct attribute *gdlm_attrs[] = { |
| 150 | &gdlm_attr_proto_name.attr, | 138 | &gdlm_attr_proto_name.attr, |
| @@ -157,7 +145,6 @@ static struct attribute *gdlm_attrs[] = { | |||
| 157 | &gdlm_attr_recover.attr, | 145 | &gdlm_attr_recover.attr, |
| 158 | &gdlm_attr_recover_done.attr, | 146 | &gdlm_attr_recover_done.attr, |
| 159 | &gdlm_attr_recover_status.attr, | 147 | &gdlm_attr_recover_status.attr, |
| 160 | &gdlm_attr_drop_count.attr, | ||
| 161 | NULL, | 148 | NULL, |
| 162 | }; | 149 | }; |
| 163 | 150 | ||
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index e53db6fd28ab..38823efd698c 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
| @@ -9,367 +9,60 @@ | |||
| 9 | 9 | ||
| 10 | #include "lock_dlm.h" | 10 | #include "lock_dlm.h" |
| 11 | 11 | ||
| 12 | /* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm | 12 | static inline int no_work(struct gdlm_ls *ls) |
| 13 | thread gets to it. */ | ||
| 14 | |||
| 15 | static void queue_submit(struct gdlm_lock *lp) | ||
| 16 | { | ||
| 17 | struct gdlm_ls *ls = lp->ls; | ||
| 18 | |||
| 19 | spin_lock(&ls->async_lock); | ||
| 20 | list_add_tail(&lp->delay_list, &ls->submit); | ||
| 21 | spin_unlock(&ls->async_lock); | ||
| 22 | wake_up(&ls->thread_wait); | ||
| 23 | } | ||
| 24 | |||
| 25 | static void process_blocking(struct gdlm_lock *lp, int bast_mode) | ||
| 26 | { | ||
| 27 | struct gdlm_ls *ls = lp->ls; | ||
| 28 | unsigned int cb = 0; | ||
| 29 | |||
| 30 | switch (gdlm_make_lmstate(bast_mode)) { | ||
| 31 | case LM_ST_EXCLUSIVE: | ||
| 32 | cb = LM_CB_NEED_E; | ||
| 33 | break; | ||
| 34 | case LM_ST_DEFERRED: | ||
| 35 | cb = LM_CB_NEED_D; | ||
| 36 | break; | ||
| 37 | case LM_ST_SHARED: | ||
| 38 | cb = LM_CB_NEED_S; | ||
| 39 | break; | ||
| 40 | default: | ||
| 41 | gdlm_assert(0, "unknown bast mode %u", lp->bast_mode); | ||
| 42 | } | ||
| 43 | |||
| 44 | ls->fscb(ls->sdp, cb, &lp->lockname); | ||
| 45 | } | ||
| 46 | |||
| 47 | static void wake_up_ast(struct gdlm_lock *lp) | ||
| 48 | { | ||
| 49 | clear_bit(LFL_AST_WAIT, &lp->flags); | ||
| 50 | smp_mb__after_clear_bit(); | ||
| 51 | wake_up_bit(&lp->flags, LFL_AST_WAIT); | ||
| 52 | } | ||
| 53 | |||
| 54 | static void process_complete(struct gdlm_lock *lp) | ||
| 55 | { | ||
| 56 | struct gdlm_ls *ls = lp->ls; | ||
| 57 | struct lm_async_cb acb; | ||
| 58 | s16 prev_mode = lp->cur; | ||
| 59 | |||
| 60 | memset(&acb, 0, sizeof(acb)); | ||
| 61 | |||
| 62 | if (lp->lksb.sb_status == -DLM_ECANCEL) { | ||
| 63 | log_info("complete dlm cancel %x,%llx flags %lx", | ||
| 64 | lp->lockname.ln_type, | ||
| 65 | (unsigned long long)lp->lockname.ln_number, | ||
| 66 | lp->flags); | ||
| 67 | |||
| 68 | lp->req = lp->cur; | ||
| 69 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 70 | if (lp->cur == DLM_LOCK_IV) | ||
| 71 | lp->lksb.sb_lkid = 0; | ||
| 72 | goto out; | ||
| 73 | } | ||
| 74 | |||
| 75 | if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { | ||
| 76 | if (lp->lksb.sb_status != -DLM_EUNLOCK) { | ||
| 77 | log_info("unlock sb_status %d %x,%llx flags %lx", | ||
| 78 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 79 | (unsigned long long)lp->lockname.ln_number, | ||
| 80 | lp->flags); | ||
| 81 | return; | ||
| 82 | } | ||
| 83 | |||
| 84 | lp->cur = DLM_LOCK_IV; | ||
| 85 | lp->req = DLM_LOCK_IV; | ||
| 86 | lp->lksb.sb_lkid = 0; | ||
| 87 | |||
| 88 | if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { | ||
| 89 | gdlm_delete_lp(lp); | ||
| 90 | return; | ||
| 91 | } | ||
| 92 | goto out; | ||
| 93 | } | ||
| 94 | |||
| 95 | if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) | ||
| 96 | memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); | ||
| 97 | |||
| 98 | if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { | ||
| 99 | if (lp->req == DLM_LOCK_PR) | ||
| 100 | lp->req = DLM_LOCK_CW; | ||
| 101 | else if (lp->req == DLM_LOCK_CW) | ||
| 102 | lp->req = DLM_LOCK_PR; | ||
| 103 | } | ||
| 104 | |||
| 105 | /* | ||
| 106 | * A canceled lock request. The lock was just taken off the delayed | ||
| 107 | * list and was never even submitted to dlm. | ||
| 108 | */ | ||
| 109 | |||
| 110 | if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { | ||
| 111 | log_info("complete internal cancel %x,%llx", | ||
| 112 | lp->lockname.ln_type, | ||
| 113 | (unsigned long long)lp->lockname.ln_number); | ||
| 114 | lp->req = lp->cur; | ||
| 115 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 116 | goto out; | ||
| 117 | } | ||
| 118 | |||
| 119 | /* | ||
| 120 | * An error occured. | ||
| 121 | */ | ||
| 122 | |||
| 123 | if (lp->lksb.sb_status) { | ||
| 124 | /* a "normal" error */ | ||
| 125 | if ((lp->lksb.sb_status == -EAGAIN) && | ||
| 126 | (lp->lkf & DLM_LKF_NOQUEUE)) { | ||
| 127 | lp->req = lp->cur; | ||
| 128 | if (lp->cur == DLM_LOCK_IV) | ||
| 129 | lp->lksb.sb_lkid = 0; | ||
| 130 | goto out; | ||
| 131 | } | ||
| 132 | |||
| 133 | /* this could only happen with cancels I think */ | ||
| 134 | log_info("ast sb_status %d %x,%llx flags %lx", | ||
| 135 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 136 | (unsigned long long)lp->lockname.ln_number, | ||
| 137 | lp->flags); | ||
| 138 | if (lp->lksb.sb_status == -EDEADLOCK && | ||
| 139 | lp->ls->fsflags & LM_MFLAG_CONV_NODROP) { | ||
| 140 | lp->req = lp->cur; | ||
| 141 | acb.lc_ret |= LM_OUT_CONV_DEADLK; | ||
| 142 | if (lp->cur == DLM_LOCK_IV) | ||
| 143 | lp->lksb.sb_lkid = 0; | ||
| 144 | goto out; | ||
| 145 | } else | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * This is an AST for an EX->EX conversion for sync_lvb from GFS. | ||
| 151 | */ | ||
| 152 | |||
| 153 | if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { | ||
| 154 | wake_up_ast(lp); | ||
| 155 | return; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * A lock has been demoted to NL because it initially completed during | ||
| 160 | * BLOCK_LOCKS. Now it must be requested in the originally requested | ||
| 161 | * mode. | ||
| 162 | */ | ||
| 163 | |||
| 164 | if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { | ||
| 165 | gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", | ||
| 166 | lp->lockname.ln_type, | ||
| 167 | (unsigned long long)lp->lockname.ln_number); | ||
| 168 | gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", | ||
| 169 | lp->lockname.ln_type, | ||
| 170 | (unsigned long long)lp->lockname.ln_number); | ||
| 171 | |||
| 172 | lp->cur = DLM_LOCK_NL; | ||
| 173 | lp->req = lp->prev_req; | ||
| 174 | lp->prev_req = DLM_LOCK_IV; | ||
| 175 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 176 | |||
| 177 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 178 | |||
| 179 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 180 | !test_bit(LFL_NOBLOCK, &lp->flags)) | ||
| 181 | gdlm_queue_delayed(lp); | ||
| 182 | else | ||
| 183 | queue_submit(lp); | ||
| 184 | return; | ||
| 185 | } | ||
| 186 | |||
| 187 | /* | ||
| 188 | * A request is granted during dlm recovery. It may be granted | ||
| 189 | * because the locks of a failed node were cleared. In that case, | ||
| 190 | * there may be inconsistent data beneath this lock and we must wait | ||
| 191 | * for recovery to complete to use it. When gfs recovery is done this | ||
| 192 | * granted lock will be converted to NL and then reacquired in this | ||
| 193 | * granted state. | ||
| 194 | */ | ||
| 195 | |||
| 196 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 197 | !test_bit(LFL_NOBLOCK, &lp->flags) && | ||
| 198 | lp->req != DLM_LOCK_NL) { | ||
| 199 | |||
| 200 | lp->cur = lp->req; | ||
| 201 | lp->prev_req = lp->req; | ||
| 202 | lp->req = DLM_LOCK_NL; | ||
| 203 | lp->lkf |= DLM_LKF_CONVERT; | ||
| 204 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 205 | |||
| 206 | log_debug("rereq %x,%llx id %x %d,%d", | ||
| 207 | lp->lockname.ln_type, | ||
| 208 | (unsigned long long)lp->lockname.ln_number, | ||
| 209 | lp->lksb.sb_lkid, lp->cur, lp->req); | ||
| 210 | |||
| 211 | set_bit(LFL_REREQUEST, &lp->flags); | ||
| 212 | queue_submit(lp); | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | |||
| 216 | /* | ||
| 217 | * DLM demoted the lock to NL before it was granted so GFS must be | ||
| 218 | * told it cannot cache data for this lock. | ||
| 219 | */ | ||
| 220 | |||
| 221 | if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) | ||
| 222 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 223 | |||
| 224 | out: | ||
| 225 | /* | ||
| 226 | * This is an internal lock_dlm lock | ||
| 227 | */ | ||
| 228 | |||
| 229 | if (test_bit(LFL_INLOCK, &lp->flags)) { | ||
| 230 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 231 | lp->cur = lp->req; | ||
| 232 | wake_up_ast(lp); | ||
| 233 | return; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Normal completion of a lock request. Tell GFS it now has the lock. | ||
| 238 | */ | ||
| 239 | |||
| 240 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 241 | lp->cur = lp->req; | ||
| 242 | |||
| 243 | acb.lc_name = lp->lockname; | ||
| 244 | acb.lc_ret |= gdlm_make_lmstate(lp->cur); | ||
| 245 | |||
| 246 | if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) && | ||
| 247 | (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL)) | ||
| 248 | acb.lc_ret |= LM_OUT_CACHEABLE; | ||
| 249 | |||
| 250 | ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); | ||
| 251 | } | ||
| 252 | |||
| 253 | static inline int no_work(struct gdlm_ls *ls, int blocking) | ||
| 254 | { | 13 | { |
| 255 | int ret; | 14 | int ret; |
| 256 | 15 | ||
| 257 | spin_lock(&ls->async_lock); | 16 | spin_lock(&ls->async_lock); |
| 258 | ret = list_empty(&ls->complete) && list_empty(&ls->submit); | 17 | ret = list_empty(&ls->submit); |
| 259 | if (ret && blocking) | ||
| 260 | ret = list_empty(&ls->blocking); | ||
| 261 | spin_unlock(&ls->async_lock); | 18 | spin_unlock(&ls->async_lock); |
| 262 | 19 | ||
| 263 | return ret; | 20 | return ret; |
| 264 | } | 21 | } |
| 265 | 22 | ||
| 266 | static inline int check_drop(struct gdlm_ls *ls) | 23 | static int gdlm_thread(void *data) |
| 267 | { | ||
| 268 | if (!ls->drop_locks_count) | ||
| 269 | return 0; | ||
| 270 | |||
| 271 | if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) { | ||
| 272 | ls->drop_time = jiffies; | ||
| 273 | if (ls->all_locks_count >= ls->drop_locks_count) | ||
| 274 | return 1; | ||
| 275 | } | ||
| 276 | return 0; | ||
| 277 | } | ||
| 278 | |||
| 279 | static int gdlm_thread(void *data, int blist) | ||
| 280 | { | 24 | { |
| 281 | struct gdlm_ls *ls = (struct gdlm_ls *) data; | 25 | struct gdlm_ls *ls = (struct gdlm_ls *) data; |
| 282 | struct gdlm_lock *lp = NULL; | 26 | struct gdlm_lock *lp = NULL; |
| 283 | uint8_t complete, blocking, submit, drop; | ||
| 284 | |||
| 285 | /* Only thread1 is allowed to do blocking callbacks since gfs | ||
| 286 | may wait for a completion callback within a blocking cb. */ | ||
| 287 | 27 | ||
| 288 | while (!kthread_should_stop()) { | 28 | while (!kthread_should_stop()) { |
| 289 | wait_event_interruptible(ls->thread_wait, | 29 | wait_event_interruptible(ls->thread_wait, |
| 290 | !no_work(ls, blist) || kthread_should_stop()); | 30 | !no_work(ls) || kthread_should_stop()); |
| 291 | |||
| 292 | complete = blocking = submit = drop = 0; | ||
| 293 | 31 | ||
| 294 | spin_lock(&ls->async_lock); | 32 | spin_lock(&ls->async_lock); |
| 295 | 33 | ||
| 296 | if (blist && !list_empty(&ls->blocking)) { | 34 | if (!list_empty(&ls->submit)) { |
| 297 | lp = list_entry(ls->blocking.next, struct gdlm_lock, | ||
| 298 | blist); | ||
| 299 | list_del_init(&lp->blist); | ||
| 300 | blocking = lp->bast_mode; | ||
| 301 | lp->bast_mode = 0; | ||
| 302 | } else if (!list_empty(&ls->complete)) { | ||
| 303 | lp = list_entry(ls->complete.next, struct gdlm_lock, | ||
| 304 | clist); | ||
| 305 | list_del_init(&lp->clist); | ||
| 306 | complete = 1; | ||
| 307 | } else if (!list_empty(&ls->submit)) { | ||
| 308 | lp = list_entry(ls->submit.next, struct gdlm_lock, | 35 | lp = list_entry(ls->submit.next, struct gdlm_lock, |
| 309 | delay_list); | 36 | delay_list); |
| 310 | list_del_init(&lp->delay_list); | 37 | list_del_init(&lp->delay_list); |
| 311 | submit = 1; | 38 | spin_unlock(&ls->async_lock); |
| 39 | gdlm_do_lock(lp); | ||
| 40 | spin_lock(&ls->async_lock); | ||
| 312 | } | 41 | } |
| 313 | |||
| 314 | drop = check_drop(ls); | ||
| 315 | spin_unlock(&ls->async_lock); | 42 | spin_unlock(&ls->async_lock); |
| 316 | |||
| 317 | if (complete) | ||
| 318 | process_complete(lp); | ||
| 319 | |||
| 320 | else if (blocking) | ||
| 321 | process_blocking(lp, blocking); | ||
| 322 | |||
| 323 | else if (submit) | ||
| 324 | gdlm_do_lock(lp); | ||
| 325 | |||
| 326 | if (drop) | ||
| 327 | ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL); | ||
| 328 | |||
| 329 | schedule(); | ||
| 330 | } | 43 | } |
| 331 | 44 | ||
| 332 | return 0; | 45 | return 0; |
| 333 | } | 46 | } |
| 334 | 47 | ||
| 335 | static int gdlm_thread1(void *data) | ||
| 336 | { | ||
| 337 | return gdlm_thread(data, 1); | ||
| 338 | } | ||
| 339 | |||
| 340 | static int gdlm_thread2(void *data) | ||
| 341 | { | ||
| 342 | return gdlm_thread(data, 0); | ||
| 343 | } | ||
| 344 | |||
| 345 | int gdlm_init_threads(struct gdlm_ls *ls) | 48 | int gdlm_init_threads(struct gdlm_ls *ls) |
| 346 | { | 49 | { |
| 347 | struct task_struct *p; | 50 | struct task_struct *p; |
| 348 | int error; | 51 | int error; |
| 349 | 52 | ||
| 350 | p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); | 53 | p = kthread_run(gdlm_thread, ls, "lock_dlm"); |
| 351 | error = IS_ERR(p); | ||
| 352 | if (error) { | ||
| 353 | log_error("can't start lock_dlm1 thread %d", error); | ||
| 354 | return error; | ||
| 355 | } | ||
| 356 | ls->thread1 = p; | ||
| 357 | |||
| 358 | p = kthread_run(gdlm_thread2, ls, "lock_dlm2"); | ||
| 359 | error = IS_ERR(p); | 54 | error = IS_ERR(p); |
| 360 | if (error) { | 55 | if (error) { |
| 361 | log_error("can't start lock_dlm2 thread %d", error); | 56 | log_error("can't start lock_dlm thread %d", error); |
| 362 | kthread_stop(ls->thread1); | ||
| 363 | return error; | 57 | return error; |
| 364 | } | 58 | } |
| 365 | ls->thread2 = p; | 59 | ls->thread = p; |
| 366 | 60 | ||
| 367 | return 0; | 61 | return 0; |
| 368 | } | 62 | } |
| 369 | 63 | ||
| 370 | void gdlm_release_threads(struct gdlm_ls *ls) | 64 | void gdlm_release_threads(struct gdlm_ls *ls) |
| 371 | { | 65 | { |
| 372 | kthread_stop(ls->thread1); | 66 | kthread_stop(ls->thread); |
| 373 | kthread_stop(ls->thread2); | ||
| 374 | } | 67 | } |
| 375 | 68 | ||
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile deleted file mode 100644 index 35e9730bc3a8..000000000000 --- a/fs/gfs2/locking/nolock/Makefile +++ /dev/null | |||
| @@ -1,3 +0,0 @@ | |||
| 1 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o | ||
| 2 | lock_nolock-y := main.o | ||
| 3 | |||
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c deleted file mode 100644 index 284a5ece8d94..000000000000 --- a/fs/gfs2/locking/nolock/main.c +++ /dev/null | |||
| @@ -1,238 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/init.h> | ||
| 13 | #include <linux/types.h> | ||
| 14 | #include <linux/fs.h> | ||
| 15 | #include <linux/lm_interface.h> | ||
| 16 | |||
| 17 | struct nolock_lockspace { | ||
| 18 | unsigned int nl_lvb_size; | ||
| 19 | }; | ||
| 20 | |||
| 21 | static const struct lm_lockops nolock_ops; | ||
| 22 | |||
| 23 | static int nolock_mount(char *table_name, char *host_data, | ||
| 24 | lm_callback_t cb, void *cb_data, | ||
| 25 | unsigned int min_lvb_size, int flags, | ||
| 26 | struct lm_lockstruct *lockstruct, | ||
| 27 | struct kobject *fskobj) | ||
| 28 | { | ||
| 29 | char *c; | ||
| 30 | unsigned int jid; | ||
| 31 | struct nolock_lockspace *nl; | ||
| 32 | |||
| 33 | c = strstr(host_data, "jid="); | ||
| 34 | if (!c) | ||
| 35 | jid = 0; | ||
| 36 | else { | ||
| 37 | c += 4; | ||
| 38 | sscanf(c, "%u", &jid); | ||
| 39 | } | ||
| 40 | |||
| 41 | nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL); | ||
| 42 | if (!nl) | ||
| 43 | return -ENOMEM; | ||
| 44 | |||
| 45 | nl->nl_lvb_size = min_lvb_size; | ||
| 46 | |||
| 47 | lockstruct->ls_jid = jid; | ||
| 48 | lockstruct->ls_first = 1; | ||
| 49 | lockstruct->ls_lvb_size = min_lvb_size; | ||
| 50 | lockstruct->ls_lockspace = nl; | ||
| 51 | lockstruct->ls_ops = &nolock_ops; | ||
| 52 | lockstruct->ls_flags = LM_LSFLAG_LOCAL; | ||
| 53 | |||
| 54 | return 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | static void nolock_others_may_mount(void *lockspace) | ||
| 58 | { | ||
| 59 | } | ||
| 60 | |||
| 61 | static void nolock_unmount(void *lockspace) | ||
| 62 | { | ||
| 63 | struct nolock_lockspace *nl = lockspace; | ||
| 64 | kfree(nl); | ||
| 65 | } | ||
| 66 | |||
| 67 | static void nolock_withdraw(void *lockspace) | ||
| 68 | { | ||
| 69 | } | ||
| 70 | |||
| 71 | /** | ||
| 72 | * nolock_get_lock - get a lm_lock_t given a descripton of the lock | ||
| 73 | * @lockspace: the lockspace the lock lives in | ||
| 74 | * @name: the name of the lock | ||
| 75 | * @lockp: return the lm_lock_t here | ||
| 76 | * | ||
| 77 | * Returns: 0 on success, -EXXX on failure | ||
| 78 | */ | ||
| 79 | |||
| 80 | static int nolock_get_lock(void *lockspace, struct lm_lockname *name, | ||
| 81 | void **lockp) | ||
| 82 | { | ||
| 83 | *lockp = lockspace; | ||
| 84 | return 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | /** | ||
| 88 | * nolock_put_lock - get rid of a lock structure | ||
| 89 | * @lock: the lock to throw away | ||
| 90 | * | ||
| 91 | */ | ||
| 92 | |||
| 93 | static void nolock_put_lock(void *lock) | ||
| 94 | { | ||
| 95 | } | ||
| 96 | |||
| 97 | /** | ||
| 98 | * nolock_lock - acquire a lock | ||
| 99 | * @lock: the lock to manipulate | ||
| 100 | * @cur_state: the current state | ||
| 101 | * @req_state: the requested state | ||
| 102 | * @flags: modifier flags | ||
| 103 | * | ||
| 104 | * Returns: A bitmap of LM_OUT_* | ||
| 105 | */ | ||
| 106 | |||
| 107 | static unsigned int nolock_lock(void *lock, unsigned int cur_state, | ||
| 108 | unsigned int req_state, unsigned int flags) | ||
| 109 | { | ||
| 110 | return req_state | LM_OUT_CACHEABLE; | ||
| 111 | } | ||
| 112 | |||
| 113 | /** | ||
| 114 | * nolock_unlock - unlock a lock | ||
| 115 | * @lock: the lock to manipulate | ||
| 116 | * @cur_state: the current state | ||
| 117 | * | ||
| 118 | * Returns: 0 | ||
| 119 | */ | ||
| 120 | |||
| 121 | static unsigned int nolock_unlock(void *lock, unsigned int cur_state) | ||
| 122 | { | ||
| 123 | return 0; | ||
| 124 | } | ||
| 125 | |||
| 126 | static void nolock_cancel(void *lock) | ||
| 127 | { | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * nolock_hold_lvb - hold on to a lock value block | ||
| 132 | * @lock: the lock the LVB is associated with | ||
| 133 | * @lvbp: return the lm_lvb_t here | ||
| 134 | * | ||
| 135 | * Returns: 0 on success, -EXXX on failure | ||
| 136 | */ | ||
| 137 | |||
| 138 | static int nolock_hold_lvb(void *lock, char **lvbp) | ||
| 139 | { | ||
| 140 | struct nolock_lockspace *nl = lock; | ||
| 141 | int error = 0; | ||
| 142 | |||
| 143 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS); | ||
| 144 | if (!*lvbp) | ||
| 145 | error = -ENOMEM; | ||
| 146 | |||
| 147 | return error; | ||
| 148 | } | ||
| 149 | |||
| 150 | /** | ||
| 151 | * nolock_unhold_lvb - release a LVB | ||
| 152 | * @lock: the lock the LVB is associated with | ||
| 153 | * @lvb: the lock value block | ||
| 154 | * | ||
| 155 | */ | ||
| 156 | |||
| 157 | static void nolock_unhold_lvb(void *lock, char *lvb) | ||
| 158 | { | ||
| 159 | kfree(lvb); | ||
| 160 | } | ||
| 161 | |||
| 162 | static int nolock_plock_get(void *lockspace, struct lm_lockname *name, | ||
| 163 | struct file *file, struct file_lock *fl) | ||
| 164 | { | ||
| 165 | posix_test_lock(file, fl); | ||
| 166 | |||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 170 | static int nolock_plock(void *lockspace, struct lm_lockname *name, | ||
| 171 | struct file *file, int cmd, struct file_lock *fl) | ||
| 172 | { | ||
| 173 | int error; | ||
| 174 | error = posix_lock_file_wait(file, fl); | ||
| 175 | return error; | ||
| 176 | } | ||
| 177 | |||
| 178 | static int nolock_punlock(void *lockspace, struct lm_lockname *name, | ||
| 179 | struct file *file, struct file_lock *fl) | ||
| 180 | { | ||
| 181 | int error; | ||
| 182 | error = posix_lock_file_wait(file, fl); | ||
| 183 | return error; | ||
| 184 | } | ||
| 185 | |||
| 186 | static void nolock_recovery_done(void *lockspace, unsigned int jid, | ||
| 187 | unsigned int message) | ||
| 188 | { | ||
| 189 | } | ||
| 190 | |||
| 191 | static const struct lm_lockops nolock_ops = { | ||
| 192 | .lm_proto_name = "lock_nolock", | ||
| 193 | .lm_mount = nolock_mount, | ||
| 194 | .lm_others_may_mount = nolock_others_may_mount, | ||
| 195 | .lm_unmount = nolock_unmount, | ||
| 196 | .lm_withdraw = nolock_withdraw, | ||
| 197 | .lm_get_lock = nolock_get_lock, | ||
| 198 | .lm_put_lock = nolock_put_lock, | ||
| 199 | .lm_lock = nolock_lock, | ||
| 200 | .lm_unlock = nolock_unlock, | ||
| 201 | .lm_cancel = nolock_cancel, | ||
| 202 | .lm_hold_lvb = nolock_hold_lvb, | ||
| 203 | .lm_unhold_lvb = nolock_unhold_lvb, | ||
| 204 | .lm_plock_get = nolock_plock_get, | ||
| 205 | .lm_plock = nolock_plock, | ||
| 206 | .lm_punlock = nolock_punlock, | ||
| 207 | .lm_recovery_done = nolock_recovery_done, | ||
| 208 | .lm_owner = THIS_MODULE, | ||
| 209 | }; | ||
| 210 | |||
| 211 | static int __init init_nolock(void) | ||
| 212 | { | ||
| 213 | int error; | ||
| 214 | |||
| 215 | error = gfs2_register_lockproto(&nolock_ops); | ||
| 216 | if (error) { | ||
| 217 | printk(KERN_WARNING | ||
| 218 | "lock_nolock: can't register protocol: %d\n", error); | ||
| 219 | return error; | ||
| 220 | } | ||
| 221 | |||
| 222 | printk(KERN_INFO | ||
| 223 | "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__); | ||
| 224 | return 0; | ||
| 225 | } | ||
| 226 | |||
| 227 | static void __exit exit_nolock(void) | ||
| 228 | { | ||
| 229 | gfs2_unregister_lockproto(&nolock_ops); | ||
| 230 | } | ||
| 231 | |||
| 232 | module_init(init_nolock); | ||
| 233 | module_exit(exit_nolock); | ||
| 234 | |||
| 235 | MODULE_DESCRIPTION("GFS Nolock Locking Module"); | ||
| 236 | MODULE_AUTHOR("Red Hat, Inc."); | ||
| 237 | MODULE_LICENSE("GPL"); | ||
| 238 | |||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 548264b1836d..6c6af9f5e3ab 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
| @@ -87,6 +87,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) | |||
| 87 | */ | 87 | */ |
| 88 | 88 | ||
| 89 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | 89 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) |
| 90 | __releases(&sdp->sd_log_lock) | ||
| 91 | __acquires(&sdp->sd_log_lock) | ||
| 90 | { | 92 | { |
| 91 | struct gfs2_bufdata *bd, *s; | 93 | struct gfs2_bufdata *bd, *s; |
| 92 | struct buffer_head *bh; | 94 | struct buffer_head *bh; |
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 771152816508..7c64510ccfd2 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | static inline void gfs2_log_lock(struct gfs2_sbd *sdp) | 23 | static inline void gfs2_log_lock(struct gfs2_sbd *sdp) |
| 24 | __acquires(&sdp->sd_log_lock) | ||
| 24 | { | 25 | { |
| 25 | spin_lock(&sdp->sd_log_lock); | 26 | spin_lock(&sdp->sd_log_lock); |
| 26 | } | 27 | } |
| @@ -32,6 +33,7 @@ static inline void gfs2_log_lock(struct gfs2_sbd *sdp) | |||
| 32 | */ | 33 | */ |
| 33 | 34 | ||
| 34 | static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) | 35 | static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) |
| 36 | __releases(&sdp->sd_log_lock) | ||
| 35 | { | 37 | { |
| 36 | spin_unlock(&sdp->sd_log_lock); | 38 | spin_unlock(&sdp->sd_log_lock); |
| 37 | } | 39 | } |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 053e2ebbbd50..bcc668d0fadd 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -40,8 +40,6 @@ static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) | |||
| 40 | INIT_HLIST_NODE(&gl->gl_list); | 40 | INIT_HLIST_NODE(&gl->gl_list); |
| 41 | spin_lock_init(&gl->gl_spin); | 41 | spin_lock_init(&gl->gl_spin); |
| 42 | INIT_LIST_HEAD(&gl->gl_holders); | 42 | INIT_LIST_HEAD(&gl->gl_holders); |
| 43 | INIT_LIST_HEAD(&gl->gl_waiters1); | ||
| 44 | INIT_LIST_HEAD(&gl->gl_waiters3); | ||
| 45 | gl->gl_lvb = NULL; | 43 | gl->gl_lvb = NULL; |
| 46 | atomic_set(&gl->gl_lvb_count, 0); | 44 | atomic_set(&gl->gl_lvb_count, 0); |
| 47 | INIT_LIST_HEAD(&gl->gl_reclaim); | 45 | INIT_LIST_HEAD(&gl->gl_reclaim); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 78d75f892f82..09853620c951 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
| @@ -129,7 +129,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) | |||
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | /** | 131 | /** |
| 132 | * getbuf - Get a buffer with a given address space | 132 | * gfs2_getbuf - Get a buffer with a given address space |
| 133 | * @gl: the glock | 133 | * @gl: the glock |
| 134 | * @blkno: the block number (filesystem scope) | 134 | * @blkno: the block number (filesystem scope) |
| 135 | * @create: 1 if the buffer should be created | 135 | * @create: 1 if the buffer should be created |
| @@ -137,7 +137,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) | |||
| 137 | * Returns: the buffer | 137 | * Returns: the buffer |
| 138 | */ | 138 | */ |
| 139 | 139 | ||
| 140 | static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create) | 140 | struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) |
| 141 | { | 141 | { |
| 142 | struct address_space *mapping = gl->gl_aspace->i_mapping; | 142 | struct address_space *mapping = gl->gl_aspace->i_mapping; |
| 143 | struct gfs2_sbd *sdp = gl->gl_sbd; | 143 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| @@ -205,7 +205,7 @@ static void meta_prep_new(struct buffer_head *bh) | |||
| 205 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | 205 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) |
| 206 | { | 206 | { |
| 207 | struct buffer_head *bh; | 207 | struct buffer_head *bh; |
| 208 | bh = getbuf(gl, blkno, CREATE); | 208 | bh = gfs2_getbuf(gl, blkno, CREATE); |
| 209 | meta_prep_new(bh); | 209 | meta_prep_new(bh); |
| 210 | return bh; | 210 | return bh; |
| 211 | } | 211 | } |
| @@ -223,7 +223,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | |||
| 223 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | 223 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, |
| 224 | struct buffer_head **bhp) | 224 | struct buffer_head **bhp) |
| 225 | { | 225 | { |
| 226 | *bhp = getbuf(gl, blkno, CREATE); | 226 | *bhp = gfs2_getbuf(gl, blkno, CREATE); |
| 227 | if (!buffer_uptodate(*bhp)) { | 227 | if (!buffer_uptodate(*bhp)) { |
| 228 | ll_rw_block(READ_META, 1, bhp); | 228 | ll_rw_block(READ_META, 1, bhp); |
| 229 | if (flags & DIO_WAIT) { | 229 | if (flags & DIO_WAIT) { |
| @@ -346,7 +346,7 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
| 346 | struct buffer_head *bh; | 346 | struct buffer_head *bh; |
| 347 | 347 | ||
| 348 | while (blen) { | 348 | while (blen) { |
| 349 | bh = getbuf(ip->i_gl, bstart, NO_CREATE); | 349 | bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE); |
| 350 | if (bh) { | 350 | if (bh) { |
| 351 | lock_buffer(bh); | 351 | lock_buffer(bh); |
| 352 | gfs2_log_lock(sdp); | 352 | gfs2_log_lock(sdp); |
| @@ -421,7 +421,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
| 421 | if (extlen > max_ra) | 421 | if (extlen > max_ra) |
| 422 | extlen = max_ra; | 422 | extlen = max_ra; |
| 423 | 423 | ||
| 424 | first_bh = getbuf(gl, dblock, CREATE); | 424 | first_bh = gfs2_getbuf(gl, dblock, CREATE); |
| 425 | 425 | ||
| 426 | if (buffer_uptodate(first_bh)) | 426 | if (buffer_uptodate(first_bh)) |
| 427 | goto out; | 427 | goto out; |
| @@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
| 432 | extlen--; | 432 | extlen--; |
| 433 | 433 | ||
| 434 | while (extlen) { | 434 | while (extlen) { |
| 435 | bh = getbuf(gl, dblock, CREATE); | 435 | bh = gfs2_getbuf(gl, dblock, CREATE); |
| 436 | 436 | ||
| 437 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) | 437 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) |
| 438 | ll_rw_block(READA, 1, &bh); | 438 | ll_rw_block(READA, 1, &bh); |
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 73e3b1c76fe1..b1a5f3674d43 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h | |||
| @@ -47,6 +47,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); | |||
| 47 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, | 47 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, |
| 48 | int flags, struct buffer_head **bhp); | 48 | int flags, struct buffer_head **bhp); |
| 49 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); | 49 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); |
| 50 | struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); | ||
| 50 | 51 | ||
| 51 | void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, | 52 | void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, |
| 52 | int meta); | 53 | int meta); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index f55394e57cb2..e64a1b04117a 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
| @@ -499,34 +499,34 @@ static int __gfs2_readpage(void *file, struct page *page) | |||
| 499 | * @file: The file to read | 499 | * @file: The file to read |
| 500 | * @page: The page of the file | 500 | * @page: The page of the file |
| 501 | * | 501 | * |
| 502 | * This deals with the locking required. We use a trylock in order to | 502 | * This deals with the locking required. We have to unlock and |
| 503 | * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE | 503 | * relock the page in order to get the locking in the right |
| 504 | * in the event that we are unable to get the lock. | 504 | * order. |
| 505 | */ | 505 | */ |
| 506 | 506 | ||
| 507 | static int gfs2_readpage(struct file *file, struct page *page) | 507 | static int gfs2_readpage(struct file *file, struct page *page) |
| 508 | { | 508 | { |
| 509 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 509 | struct address_space *mapping = page->mapping; |
| 510 | struct gfs2_holder *gh; | 510 | struct gfs2_inode *ip = GFS2_I(mapping->host); |
| 511 | struct gfs2_holder gh; | ||
| 511 | int error; | 512 | int error; |
| 512 | 513 | ||
| 513 | gh = gfs2_glock_is_locked_by_me(ip->i_gl); | 514 | unlock_page(page); |
| 514 | if (!gh) { | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); |
| 515 | gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS); | 516 | error = gfs2_glock_nq_atime(&gh); |
| 516 | if (!gh) | 517 | if (unlikely(error)) |
| 517 | return -ENOBUFS; | 518 | goto out; |
| 518 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh); | 519 | error = AOP_TRUNCATED_PAGE; |
| 520 | lock_page(page); | ||
| 521 | if (page->mapping == mapping && !PageUptodate(page)) | ||
| 522 | error = __gfs2_readpage(file, page); | ||
| 523 | else | ||
| 519 | unlock_page(page); | 524 | unlock_page(page); |
| 520 | error = gfs2_glock_nq_atime(gh); | 525 | gfs2_glock_dq(&gh); |
| 521 | if (likely(error != 0)) | ||
| 522 | goto out; | ||
| 523 | return AOP_TRUNCATED_PAGE; | ||
| 524 | } | ||
| 525 | error = __gfs2_readpage(file, page); | ||
| 526 | gfs2_glock_dq(gh); | ||
| 527 | out: | 526 | out: |
| 528 | gfs2_holder_uninit(gh); | 527 | gfs2_holder_uninit(&gh); |
| 529 | kfree(gh); | 528 | if (error && error != AOP_TRUNCATED_PAGE) |
| 529 | lock_page(page); | ||
| 530 | return error; | 530 | return error; |
| 531 | } | 531 | } |
| 532 | 532 | ||
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e1b7d525a066..e9a366d4411c 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/uio.h> | 15 | #include <linux/uio.h> |
| 16 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
| 17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
| 18 | #include <linux/mount.h> | ||
| 18 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
| 19 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
| 20 | #include <linux/ext2_fs.h> | 21 | #include <linux/ext2_fs.h> |
| @@ -62,11 +63,11 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) | |||
| 62 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | 63 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
| 63 | &i_gh); | 64 | &i_gh); |
| 64 | if (!error) { | 65 | if (!error) { |
| 65 | error = remote_llseek(file, offset, origin); | 66 | error = generic_file_llseek_unlocked(file, offset, origin); |
| 66 | gfs2_glock_dq_uninit(&i_gh); | 67 | gfs2_glock_dq_uninit(&i_gh); |
| 67 | } | 68 | } |
| 68 | } else | 69 | } else |
| 69 | error = remote_llseek(file, offset, origin); | 70 | error = generic_file_llseek_unlocked(file, offset, origin); |
| 70 | 71 | ||
| 71 | return error; | 72 | return error; |
| 72 | } | 73 | } |
| @@ -133,7 +134,6 @@ static const u32 fsflags_to_gfs2[32] = { | |||
| 133 | [7] = GFS2_DIF_NOATIME, | 134 | [7] = GFS2_DIF_NOATIME, |
| 134 | [12] = GFS2_DIF_EXHASH, | 135 | [12] = GFS2_DIF_EXHASH, |
| 135 | [14] = GFS2_DIF_INHERIT_JDATA, | 136 | [14] = GFS2_DIF_INHERIT_JDATA, |
| 136 | [20] = GFS2_DIF_INHERIT_DIRECTIO, | ||
| 137 | }; | 137 | }; |
| 138 | 138 | ||
| 139 | static const u32 gfs2_to_fsflags[32] = { | 139 | static const u32 gfs2_to_fsflags[32] = { |
| @@ -142,7 +142,6 @@ static const u32 gfs2_to_fsflags[32] = { | |||
| 142 | [gfs2fl_AppendOnly] = FS_APPEND_FL, | 142 | [gfs2fl_AppendOnly] = FS_APPEND_FL, |
| 143 | [gfs2fl_NoAtime] = FS_NOATIME_FL, | 143 | [gfs2fl_NoAtime] = FS_NOATIME_FL, |
| 144 | [gfs2fl_ExHash] = FS_INDEX_FL, | 144 | [gfs2fl_ExHash] = FS_INDEX_FL, |
| 145 | [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL, | ||
| 146 | [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, | 145 | [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, |
| 147 | }; | 146 | }; |
| 148 | 147 | ||
| @@ -160,12 +159,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
| 160 | return error; | 159 | return error; |
| 161 | 160 | ||
| 162 | fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); | 161 | fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); |
| 163 | if (!S_ISDIR(inode->i_mode)) { | 162 | if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA) |
| 164 | if (ip->i_di.di_flags & GFS2_DIF_JDATA) | 163 | fsflags |= FS_JOURNAL_DATA_FL; |
| 165 | fsflags |= FS_JOURNAL_DATA_FL; | ||
| 166 | if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) | ||
| 167 | fsflags |= FS_DIRECTIO_FL; | ||
| 168 | } | ||
| 169 | if (put_user(fsflags, ptr)) | 164 | if (put_user(fsflags, ptr)) |
| 170 | error = -EFAULT; | 165 | error = -EFAULT; |
| 171 | 166 | ||
| @@ -194,13 +189,11 @@ void gfs2_set_inode_flags(struct inode *inode) | |||
| 194 | 189 | ||
| 195 | /* Flags that can be set by user space */ | 190 | /* Flags that can be set by user space */ |
| 196 | #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ | 191 | #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ |
| 197 | GFS2_DIF_DIRECTIO| \ | ||
| 198 | GFS2_DIF_IMMUTABLE| \ | 192 | GFS2_DIF_IMMUTABLE| \ |
| 199 | GFS2_DIF_APPENDONLY| \ | 193 | GFS2_DIF_APPENDONLY| \ |
| 200 | GFS2_DIF_NOATIME| \ | 194 | GFS2_DIF_NOATIME| \ |
| 201 | GFS2_DIF_SYNC| \ | 195 | GFS2_DIF_SYNC| \ |
| 202 | GFS2_DIF_SYSTEM| \ | 196 | GFS2_DIF_SYSTEM| \ |
| 203 | GFS2_DIF_INHERIT_DIRECTIO| \ | ||
| 204 | GFS2_DIF_INHERIT_JDATA) | 197 | GFS2_DIF_INHERIT_JDATA) |
| 205 | 198 | ||
| 206 | /** | 199 | /** |
| @@ -220,10 +213,14 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
| 220 | int error; | 213 | int error; |
| 221 | u32 new_flags, flags; | 214 | u32 new_flags, flags; |
| 222 | 215 | ||
| 223 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 216 | error = mnt_want_write(filp->f_path.mnt); |
| 224 | if (error) | 217 | if (error) |
| 225 | return error; | 218 | return error; |
| 226 | 219 | ||
| 220 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
| 221 | if (error) | ||
| 222 | goto out_drop_write; | ||
| 223 | |||
| 227 | flags = ip->i_di.di_flags; | 224 | flags = ip->i_di.di_flags; |
| 228 | new_flags = (flags & ~mask) | (reqflags & mask); | 225 | new_flags = (flags & ~mask) | (reqflags & mask); |
| 229 | if ((new_flags ^ flags) == 0) | 226 | if ((new_flags ^ flags) == 0) |
| @@ -242,7 +239,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
| 242 | !capable(CAP_LINUX_IMMUTABLE)) | 239 | !capable(CAP_LINUX_IMMUTABLE)) |
| 243 | goto out; | 240 | goto out; |
| 244 | if (!IS_IMMUTABLE(inode)) { | 241 | if (!IS_IMMUTABLE(inode)) { |
| 245 | error = permission(inode, MAY_WRITE, NULL); | 242 | error = gfs2_permission(inode, MAY_WRITE); |
| 246 | if (error) | 243 | if (error) |
| 247 | goto out; | 244 | goto out; |
| 248 | } | 245 | } |
| @@ -272,6 +269,8 @@ out_trans_end: | |||
| 272 | gfs2_trans_end(sdp); | 269 | gfs2_trans_end(sdp); |
| 273 | out: | 270 | out: |
| 274 | gfs2_glock_dq_uninit(&gh); | 271 | gfs2_glock_dq_uninit(&gh); |
| 272 | out_drop_write: | ||
| 273 | mnt_drop_write(filp->f_path.mnt); | ||
| 275 | return error; | 274 | return error; |
| 276 | } | 275 | } |
| 277 | 276 | ||
| @@ -285,8 +284,6 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) | |||
| 285 | if (!S_ISDIR(inode->i_mode)) { | 284 | if (!S_ISDIR(inode->i_mode)) { |
| 286 | if (gfsflags & GFS2_DIF_INHERIT_JDATA) | 285 | if (gfsflags & GFS2_DIF_INHERIT_JDATA) |
| 287 | gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); | 286 | gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); |
| 288 | if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO) | ||
| 289 | gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO); | ||
| 290 | return do_gfs2_set_flags(filp, gfsflags, ~0); | 287 | return do_gfs2_set_flags(filp, gfsflags, ~0); |
| 291 | } | 288 | } |
| 292 | return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); | 289 | return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); |
| @@ -487,11 +484,6 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
| 487 | goto fail_gunlock; | 484 | goto fail_gunlock; |
| 488 | } | 485 | } |
| 489 | 486 | ||
| 490 | /* Listen to the Direct I/O flag */ | ||
| 491 | |||
| 492 | if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) | ||
| 493 | file->f_flags |= O_DIRECT; | ||
| 494 | |||
| 495 | gfs2_glock_dq_uninit(&i_gh); | 487 | gfs2_glock_dq_uninit(&i_gh); |
| 496 | } | 488 | } |
| 497 | 489 | ||
| @@ -669,8 +661,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 669 | int error = 0; | 661 | int error = 0; |
| 670 | 662 | ||
| 671 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; | 663 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; |
| 672 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE | 664 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; |
| 673 | | GL_FLOCK; | ||
| 674 | 665 | ||
| 675 | mutex_lock(&fp->f_fl_mutex); | 666 | mutex_lock(&fp->f_fl_mutex); |
| 676 | 667 | ||
| @@ -683,9 +674,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 683 | gfs2_glock_dq_wait(fl_gh); | 674 | gfs2_glock_dq_wait(fl_gh); |
| 684 | gfs2_holder_reinit(state, flags, fl_gh); | 675 | gfs2_holder_reinit(state, flags, fl_gh); |
| 685 | } else { | 676 | } else { |
| 686 | error = gfs2_glock_get(GFS2_SB(&ip->i_inode), | 677 | error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, |
| 687 | ip->i_no_addr, &gfs2_flock_glops, | 678 | &gfs2_flock_glops, CREATE, &gl); |
| 688 | CREATE, &gl); | ||
| 689 | if (error) | 679 | if (error) |
| 690 | goto out; | 680 | goto out; |
| 691 | gfs2_holder_init(gl, state, flags, fl_gh); | 681 | gfs2_holder_init(gl, state, flags, fl_gh); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b2028c82e8d1..b4d1d6490633 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -64,7 +64,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 64 | mutex_init(&sdp->sd_rindex_mutex); | 64 | mutex_init(&sdp->sd_rindex_mutex); |
| 65 | INIT_LIST_HEAD(&sdp->sd_rindex_list); | 65 | INIT_LIST_HEAD(&sdp->sd_rindex_list); |
| 66 | INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); | 66 | INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); |
| 67 | INIT_LIST_HEAD(&sdp->sd_rindex_recent_list); | ||
| 68 | 67 | ||
| 69 | INIT_LIST_HEAD(&sdp->sd_jindex_list); | 68 | INIT_LIST_HEAD(&sdp->sd_jindex_list); |
| 70 | spin_lock_init(&sdp->sd_jindex_spin); | 69 | spin_lock_init(&sdp->sd_jindex_spin); |
| @@ -364,6 +363,8 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
| 364 | 363 | ||
| 365 | static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | 364 | static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) |
| 366 | { | 365 | { |
| 366 | if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount) | ||
| 367 | return; | ||
| 367 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 368 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 368 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | 369 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( |
| 369 | sdp->sd_lockstruct.ls_lockspace); | 370 | sdp->sd_lockstruct.ls_lockspace); |
| @@ -741,8 +742,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
| 741 | goto out; | 742 | goto out; |
| 742 | } | 743 | } |
| 743 | 744 | ||
| 744 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | 745 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || |
| 745 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
| 746 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | 746 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= |
| 747 | GFS2_MIN_LVB_SIZE)) { | 747 | GFS2_MIN_LVB_SIZE)) { |
| 748 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | 748 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); |
| @@ -873,7 +873,7 @@ fail_sb: | |||
| 873 | fail_locking: | 873 | fail_locking: |
| 874 | init_locking(sdp, &mount_gh, UNDO); | 874 | init_locking(sdp, &mount_gh, UNDO); |
| 875 | fail_lm: | 875 | fail_lm: |
| 876 | gfs2_gl_hash_clear(sdp, WAIT); | 876 | gfs2_gl_hash_clear(sdp); |
| 877 | gfs2_lm_unmount(sdp); | 877 | gfs2_lm_unmount(sdp); |
| 878 | while (invalidate_inodes(sb)) | 878 | while (invalidate_inodes(sb)) |
| 879 | yield(); | 879 | yield(); |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 2686ad4c0029..1e252dfc5294 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
| @@ -163,7 +163,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 163 | if (error) | 163 | if (error) |
| 164 | goto out; | 164 | goto out; |
| 165 | 165 | ||
| 166 | error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); | 166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
| 167 | if (error) | 167 | if (error) |
| 168 | goto out_gunlock; | 168 | goto out_gunlock; |
| 169 | 169 | ||
| @@ -669,7 +669,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 669 | } | 669 | } |
| 670 | } | 670 | } |
| 671 | } else { | 671 | } else { |
| 672 | error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL); | 672 | error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); |
| 673 | if (error) | 673 | if (error) |
| 674 | goto out_gunlock; | 674 | goto out_gunlock; |
| 675 | 675 | ||
| @@ -704,7 +704,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 704 | /* Check out the dir to be renamed */ | 704 | /* Check out the dir to be renamed */ |
| 705 | 705 | ||
| 706 | if (dir_rename) { | 706 | if (dir_rename) { |
| 707 | error = permission(odentry->d_inode, MAY_WRITE, NULL); | 707 | error = gfs2_permission(odentry->d_inode, MAY_WRITE); |
| 708 | if (error) | 708 | if (error) |
| 709 | goto out_gunlock; | 709 | goto out_gunlock; |
| 710 | } | 710 | } |
| @@ -891,7 +891,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 891 | * Returns: errno | 891 | * Returns: errno |
| 892 | */ | 892 | */ |
| 893 | 893 | ||
| 894 | static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | 894 | int gfs2_permission(struct inode *inode, int mask) |
| 895 | { | 895 | { |
| 896 | struct gfs2_inode *ip = GFS2_I(inode); | 896 | struct gfs2_inode *ip = GFS2_I(inode); |
| 897 | struct gfs2_holder i_gh; | 897 | struct gfs2_holder i_gh; |
| @@ -905,13 +905,22 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 905 | unlock = 1; | 905 | unlock = 1; |
| 906 | } | 906 | } |
| 907 | 907 | ||
| 908 | error = generic_permission(inode, mask, gfs2_check_acl); | 908 | if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) |
| 909 | error = -EACCES; | ||
| 910 | else | ||
| 911 | error = generic_permission(inode, mask, gfs2_check_acl); | ||
| 909 | if (unlock) | 912 | if (unlock) |
| 910 | gfs2_glock_dq_uninit(&i_gh); | 913 | gfs2_glock_dq_uninit(&i_gh); |
| 911 | 914 | ||
| 912 | return error; | 915 | return error; |
| 913 | } | 916 | } |
| 914 | 917 | ||
| 918 | static int gfs2_iop_permission(struct inode *inode, int mask, | ||
| 919 | struct nameidata *nd) | ||
| 920 | { | ||
| 921 | return gfs2_permission(inode, mask); | ||
| 922 | } | ||
| 923 | |||
| 915 | static int setattr_size(struct inode *inode, struct iattr *attr) | 924 | static int setattr_size(struct inode *inode, struct iattr *attr) |
| 916 | { | 925 | { |
| 917 | struct gfs2_inode *ip = GFS2_I(inode); | 926 | struct gfs2_inode *ip = GFS2_I(inode); |
| @@ -1141,7 +1150,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
| 1141 | } | 1150 | } |
| 1142 | 1151 | ||
| 1143 | const struct inode_operations gfs2_file_iops = { | 1152 | const struct inode_operations gfs2_file_iops = { |
| 1144 | .permission = gfs2_permission, | 1153 | .permission = gfs2_iop_permission, |
| 1145 | .setattr = gfs2_setattr, | 1154 | .setattr = gfs2_setattr, |
| 1146 | .getattr = gfs2_getattr, | 1155 | .getattr = gfs2_getattr, |
| 1147 | .setxattr = gfs2_setxattr, | 1156 | .setxattr = gfs2_setxattr, |
| @@ -1160,7 +1169,7 @@ const struct inode_operations gfs2_dir_iops = { | |||
| 1160 | .rmdir = gfs2_rmdir, | 1169 | .rmdir = gfs2_rmdir, |
| 1161 | .mknod = gfs2_mknod, | 1170 | .mknod = gfs2_mknod, |
| 1162 | .rename = gfs2_rename, | 1171 | .rename = gfs2_rename, |
| 1163 | .permission = gfs2_permission, | 1172 | .permission = gfs2_iop_permission, |
| 1164 | .setattr = gfs2_setattr, | 1173 | .setattr = gfs2_setattr, |
| 1165 | .getattr = gfs2_getattr, | 1174 | .getattr = gfs2_getattr, |
| 1166 | .setxattr = gfs2_setxattr, | 1175 | .setxattr = gfs2_setxattr, |
| @@ -1172,7 +1181,7 @@ const struct inode_operations gfs2_dir_iops = { | |||
| 1172 | const struct inode_operations gfs2_symlink_iops = { | 1181 | const struct inode_operations gfs2_symlink_iops = { |
| 1173 | .readlink = gfs2_readlink, | 1182 | .readlink = gfs2_readlink, |
| 1174 | .follow_link = gfs2_follow_link, | 1183 | .follow_link = gfs2_follow_link, |
| 1175 | .permission = gfs2_permission, | 1184 | .permission = gfs2_iop_permission, |
| 1176 | .setattr = gfs2_setattr, | 1185 | .setattr = gfs2_setattr, |
| 1177 | .getattr = gfs2_getattr, | 1186 | .getattr = gfs2_getattr, |
| 1178 | .setxattr = gfs2_setxattr, | 1187 | .setxattr = gfs2_setxattr, |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 0b7cc920eb89..f66ea0f7a356 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
| @@ -126,7 +126,7 @@ static void gfs2_put_super(struct super_block *sb) | |||
| 126 | gfs2_clear_rgrpd(sdp); | 126 | gfs2_clear_rgrpd(sdp); |
| 127 | gfs2_jindex_free(sdp); | 127 | gfs2_jindex_free(sdp); |
| 128 | /* Take apart glock structures and buffer lists */ | 128 | /* Take apart glock structures and buffer lists */ |
| 129 | gfs2_gl_hash_clear(sdp, WAIT); | 129 | gfs2_gl_hash_clear(sdp); |
| 130 | /* Unmount the locking protocol */ | 130 | /* Unmount the locking protocol */ |
| 131 | gfs2_lm_unmount(sdp); | 131 | gfs2_lm_unmount(sdp); |
| 132 | 132 | ||
| @@ -155,7 +155,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
| 155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 155 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
| 156 | { | 156 | { |
| 157 | sb->s_dirt = 0; | 157 | sb->s_dirt = 0; |
| 158 | if (wait) | 158 | if (wait && sb->s_fs_info) |
| 159 | gfs2_log_flush(sb->s_fs_info, NULL); | 159 | gfs2_log_flush(sb->s_fs_info, NULL); |
| 160 | return 0; | 160 | return 0; |
| 161 | } | 161 | } |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 56aaf915c59a..3e073f5144fa 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -904,7 +904,7 @@ static int need_sync(struct gfs2_quota_data *qd) | |||
| 904 | do_sync = 0; | 904 | do_sync = 0; |
| 905 | else { | 905 | else { |
| 906 | value *= gfs2_jindex_size(sdp) * num; | 906 | value *= gfs2_jindex_size(sdp) * num; |
| 907 | do_div(value, den); | 907 | value = div_s64(value, den); |
| 908 | value += (s64)be64_to_cpu(qd->qd_qb.qb_value); | 908 | value += (s64)be64_to_cpu(qd->qd_qb.qb_value); |
| 909 | if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) | 909 | if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) |
| 910 | do_sync = 0; | 910 | do_sync = 0; |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 2888e4b4b1c5..d5e91f4f6a0b 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
| @@ -428,6 +428,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea | |||
| 428 | static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | 428 | static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, |
| 429 | unsigned int message) | 429 | unsigned int message) |
| 430 | { | 430 | { |
| 431 | if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done) | ||
| 432 | return; | ||
| 433 | |||
| 431 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 434 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 432 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | 435 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( |
| 433 | sdp->sd_lockstruct.ls_lockspace, jid, message); | 436 | sdp->sd_lockstruct.ls_lockspace, jid, message); |
| @@ -505,7 +508,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) | |||
| 505 | 508 | ||
| 506 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | 509 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, |
| 507 | LM_FLAG_NOEXP | LM_FLAG_PRIORITY | | 510 | LM_FLAG_NOEXP | LM_FLAG_PRIORITY | |
| 508 | GL_NOCANCEL | GL_NOCACHE, &t_gh); | 511 | GL_NOCACHE, &t_gh); |
| 509 | if (error) | 512 | if (error) |
| 510 | goto fail_gunlock_ji; | 513 | goto fail_gunlock_ji; |
| 511 | 514 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3401628d742b..2d90fb253505 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -371,11 +371,6 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) | |||
| 371 | 371 | ||
| 372 | spin_lock(&sdp->sd_rindex_spin); | 372 | spin_lock(&sdp->sd_rindex_spin); |
| 373 | sdp->sd_rindex_forward = NULL; | 373 | sdp->sd_rindex_forward = NULL; |
| 374 | head = &sdp->sd_rindex_recent_list; | ||
| 375 | while (!list_empty(head)) { | ||
| 376 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); | ||
| 377 | list_del(&rgd->rd_recent); | ||
| 378 | } | ||
| 379 | spin_unlock(&sdp->sd_rindex_spin); | 374 | spin_unlock(&sdp->sd_rindex_spin); |
| 380 | 375 | ||
| 381 | head = &sdp->sd_rindex_list; | 376 | head = &sdp->sd_rindex_list; |
| @@ -945,107 +940,30 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
| 945 | } | 940 | } |
| 946 | 941 | ||
| 947 | /** | 942 | /** |
| 948 | * recent_rgrp_first - get first RG from "recent" list | ||
| 949 | * @sdp: The GFS2 superblock | ||
| 950 | * @rglast: address of the rgrp used last | ||
| 951 | * | ||
| 952 | * Returns: The first rgrp in the recent list | ||
| 953 | */ | ||
| 954 | |||
| 955 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, | ||
| 956 | u64 rglast) | ||
| 957 | { | ||
| 958 | struct gfs2_rgrpd *rgd; | ||
| 959 | |||
| 960 | spin_lock(&sdp->sd_rindex_spin); | ||
| 961 | |||
| 962 | if (rglast) { | ||
| 963 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 964 | if (rgrp_contains_block(rgd, rglast)) | ||
| 965 | goto out; | ||
| 966 | } | ||
| 967 | } | ||
| 968 | rgd = NULL; | ||
| 969 | if (!list_empty(&sdp->sd_rindex_recent_list)) | ||
| 970 | rgd = list_entry(sdp->sd_rindex_recent_list.next, | ||
| 971 | struct gfs2_rgrpd, rd_recent); | ||
| 972 | out: | ||
| 973 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 974 | return rgd; | ||
| 975 | } | ||
| 976 | |||
| 977 | /** | ||
| 978 | * recent_rgrp_next - get next RG from "recent" list | 943 | * recent_rgrp_next - get next RG from "recent" list |
| 979 | * @cur_rgd: current rgrp | 944 | * @cur_rgd: current rgrp |
| 980 | * @remove: | ||
| 981 | * | 945 | * |
| 982 | * Returns: The next rgrp in the recent list | 946 | * Returns: The next rgrp in the recent list |
| 983 | */ | 947 | */ |
| 984 | 948 | ||
| 985 | static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd, | 949 | static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd) |
| 986 | int remove) | ||
| 987 | { | 950 | { |
| 988 | struct gfs2_sbd *sdp = cur_rgd->rd_sbd; | 951 | struct gfs2_sbd *sdp = cur_rgd->rd_sbd; |
| 989 | struct list_head *head; | 952 | struct list_head *head; |
| 990 | struct gfs2_rgrpd *rgd; | 953 | struct gfs2_rgrpd *rgd; |
| 991 | 954 | ||
| 992 | spin_lock(&sdp->sd_rindex_spin); | 955 | spin_lock(&sdp->sd_rindex_spin); |
| 993 | 956 | head = &sdp->sd_rindex_mru_list; | |
| 994 | head = &sdp->sd_rindex_recent_list; | 957 | if (unlikely(cur_rgd->rd_list_mru.next == head)) { |
| 995 | 958 | spin_unlock(&sdp->sd_rindex_spin); | |
| 996 | list_for_each_entry(rgd, head, rd_recent) { | 959 | return NULL; |
| 997 | if (rgd == cur_rgd) { | ||
| 998 | if (cur_rgd->rd_recent.next != head) | ||
| 999 | rgd = list_entry(cur_rgd->rd_recent.next, | ||
| 1000 | struct gfs2_rgrpd, rd_recent); | ||
| 1001 | else | ||
| 1002 | rgd = NULL; | ||
| 1003 | |||
| 1004 | if (remove) | ||
| 1005 | list_del(&cur_rgd->rd_recent); | ||
| 1006 | |||
| 1007 | goto out; | ||
| 1008 | } | ||
| 1009 | } | 960 | } |
| 1010 | 961 | rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru); | |
| 1011 | rgd = NULL; | ||
| 1012 | if (!list_empty(head)) | ||
| 1013 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); | ||
| 1014 | |||
| 1015 | out: | ||
| 1016 | spin_unlock(&sdp->sd_rindex_spin); | 962 | spin_unlock(&sdp->sd_rindex_spin); |
| 1017 | return rgd; | 963 | return rgd; |
| 1018 | } | 964 | } |
| 1019 | 965 | ||
| 1020 | /** | 966 | /** |
| 1021 | * recent_rgrp_add - add an RG to tail of "recent" list | ||
| 1022 | * @new_rgd: The rgrp to add | ||
| 1023 | * | ||
| 1024 | */ | ||
| 1025 | |||
| 1026 | static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd) | ||
| 1027 | { | ||
| 1028 | struct gfs2_sbd *sdp = new_rgd->rd_sbd; | ||
| 1029 | struct gfs2_rgrpd *rgd; | ||
| 1030 | unsigned int count = 0; | ||
| 1031 | unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp); | ||
| 1032 | |||
| 1033 | spin_lock(&sdp->sd_rindex_spin); | ||
| 1034 | |||
| 1035 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 1036 | if (rgd == new_rgd) | ||
| 1037 | goto out; | ||
| 1038 | |||
| 1039 | if (++count >= max) | ||
| 1040 | goto out; | ||
| 1041 | } | ||
| 1042 | list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list); | ||
| 1043 | |||
| 1044 | out: | ||
| 1045 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | /** | ||
| 1049 | * forward_rgrp_get - get an rgrp to try next from full list | 967 | * forward_rgrp_get - get an rgrp to try next from full list |
| 1050 | * @sdp: The GFS2 superblock | 968 | * @sdp: The GFS2 superblock |
| 1051 | * | 969 | * |
| @@ -1112,9 +1030,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1112 | int loops = 0; | 1030 | int loops = 0; |
| 1113 | int error, rg_locked; | 1031 | int error, rg_locked; |
| 1114 | 1032 | ||
| 1115 | /* Try recently successful rgrps */ | 1033 | rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); |
| 1116 | |||
| 1117 | rgd = recent_rgrp_first(sdp, ip->i_goal); | ||
| 1118 | 1034 | ||
| 1119 | while (rgd) { | 1035 | while (rgd) { |
| 1120 | rg_locked = 0; | 1036 | rg_locked = 0; |
| @@ -1136,11 +1052,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1136 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1052 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
| 1137 | if (inode) | 1053 | if (inode) |
| 1138 | return inode; | 1054 | return inode; |
| 1139 | rgd = recent_rgrp_next(rgd, 1); | 1055 | /* fall through */ |
| 1140 | break; | ||
| 1141 | |||
| 1142 | case GLR_TRYFAILED: | 1056 | case GLR_TRYFAILED: |
| 1143 | rgd = recent_rgrp_next(rgd, 0); | 1057 | rgd = recent_rgrp_next(rgd); |
| 1144 | break; | 1058 | break; |
| 1145 | 1059 | ||
| 1146 | default: | 1060 | default: |
| @@ -1199,7 +1113,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1199 | 1113 | ||
| 1200 | out: | 1114 | out: |
| 1201 | if (begin) { | 1115 | if (begin) { |
| 1202 | recent_rgrp_add(rgd); | 1116 | spin_lock(&sdp->sd_rindex_spin); |
| 1117 | list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | ||
| 1118 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1203 | rgd = gfs2_rgrpd_get_next(rgd); | 1119 | rgd = gfs2_rgrpd_get_next(rgd); |
| 1204 | if (!rgd) | 1120 | if (!rgd) |
| 1205 | rgd = gfs2_rgrpd_get_first(sdp); | 1121 | rgd = gfs2_rgrpd_get_first(sdp); |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 7aeacbc65f35..63a8a902d9db 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -65,7 +65,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
| 65 | gt->gt_quota_quantum = 60; | 65 | gt->gt_quota_quantum = 60; |
| 66 | gt->gt_atime_quantum = 3600; | 66 | gt->gt_atime_quantum = 3600; |
| 67 | gt->gt_new_files_jdata = 0; | 67 | gt->gt_new_files_jdata = 0; |
| 68 | gt->gt_new_files_directio = 0; | ||
| 69 | gt->gt_max_readahead = 1 << 18; | 68 | gt->gt_max_readahead = 1 << 18; |
| 70 | gt->gt_stall_secs = 600; | 69 | gt->gt_stall_secs = 600; |
| 71 | gt->gt_complain_secs = 10; | 70 | gt->gt_complain_secs = 10; |
| @@ -941,8 +940,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, | |||
| 941 | } | 940 | } |
| 942 | 941 | ||
| 943 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, | 942 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, |
| 944 | LM_FLAG_PRIORITY | GL_NOCACHE, | 943 | GL_NOCACHE, t_gh); |
| 945 | t_gh); | ||
| 946 | 944 | ||
| 947 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | 945 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
| 948 | error = gfs2_jdesc_check(jd); | 946 | error = gfs2_jdesc_check(jd); |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9ab9fc85ecd0..74846559fc3f 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -110,18 +110,6 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, | |||
| 110 | return len; | 110 | return len; |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
| 114 | { | ||
| 115 | if (!capable(CAP_SYS_ADMIN)) | ||
| 116 | return -EACCES; | ||
| 117 | |||
| 118 | if (simple_strtol(buf, NULL, 0) != 1) | ||
| 119 | return -EINVAL; | ||
| 120 | |||
| 121 | gfs2_gl_hash_clear(sdp, NO_WAIT); | ||
| 122 | return len; | ||
| 123 | } | ||
| 124 | |||
| 125 | static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, | 113 | static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, |
| 126 | size_t len) | 114 | size_t len) |
| 127 | { | 115 | { |
| @@ -175,7 +163,6 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) | |||
| 175 | GFS2_ATTR(id, 0444, id_show, NULL); | 163 | GFS2_ATTR(id, 0444, id_show, NULL); |
| 176 | GFS2_ATTR(fsname, 0444, fsname_show, NULL); | 164 | GFS2_ATTR(fsname, 0444, fsname_show, NULL); |
| 177 | GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); | 165 | GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); |
| 178 | GFS2_ATTR(shrink, 0200, NULL, shrink_store); | ||
| 179 | GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | 166 | GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); |
| 180 | GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); | 167 | GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); |
| 181 | GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); | 168 | GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); |
| @@ -186,7 +173,6 @@ static struct attribute *gfs2_attrs[] = { | |||
| 186 | &gfs2_attr_id.attr, | 173 | &gfs2_attr_id.attr, |
| 187 | &gfs2_attr_fsname.attr, | 174 | &gfs2_attr_fsname.attr, |
| 188 | &gfs2_attr_freeze.attr, | 175 | &gfs2_attr_freeze.attr, |
| 189 | &gfs2_attr_shrink.attr, | ||
| 190 | &gfs2_attr_withdraw.attr, | 176 | &gfs2_attr_withdraw.attr, |
| 191 | &gfs2_attr_statfs_sync.attr, | 177 | &gfs2_attr_statfs_sync.attr, |
| 192 | &gfs2_attr_quota_sync.attr, | 178 | &gfs2_attr_quota_sync.attr, |
| @@ -426,7 +412,6 @@ TUNE_ATTR(max_readahead, 0); | |||
| 426 | TUNE_ATTR(complain_secs, 0); | 412 | TUNE_ATTR(complain_secs, 0); |
| 427 | TUNE_ATTR(statfs_slow, 0); | 413 | TUNE_ATTR(statfs_slow, 0); |
| 428 | TUNE_ATTR(new_files_jdata, 0); | 414 | TUNE_ATTR(new_files_jdata, 0); |
| 429 | TUNE_ATTR(new_files_directio, 0); | ||
| 430 | TUNE_ATTR(quota_simul_sync, 1); | 415 | TUNE_ATTR(quota_simul_sync, 1); |
| 431 | TUNE_ATTR(quota_cache_secs, 1); | 416 | TUNE_ATTR(quota_cache_secs, 1); |
| 432 | TUNE_ATTR(stall_secs, 1); | 417 | TUNE_ATTR(stall_secs, 1); |
| @@ -455,7 +440,6 @@ static struct attribute *tune_attrs[] = { | |||
| 455 | &tune_attr_quotad_secs.attr, | 440 | &tune_attr_quotad_secs.attr, |
| 456 | &tune_attr_quota_scale.attr, | 441 | &tune_attr_quota_scale.attr, |
| 457 | &tune_attr_new_files_jdata.attr, | 442 | &tune_attr_new_files_jdata.attr, |
| 458 | &tune_attr_new_files_directio.attr, | ||
| 459 | NULL, | 443 | NULL, |
| 460 | }; | 444 | }; |
| 461 | 445 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6914598022ce..91389c8aee8a 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
| 688 | 688 | ||
| 689 | J_ASSERT(transaction->t_state == T_FINISHED); | 689 | J_ASSERT(transaction->t_state == T_FINISHED); |
| 690 | J_ASSERT(transaction->t_buffers == NULL); | 690 | J_ASSERT(transaction->t_buffers == NULL); |
| 691 | J_ASSERT(transaction->t_sync_datalist == NULL); | ||
| 692 | J_ASSERT(transaction->t_forget == NULL); | 691 | J_ASSERT(transaction->t_forget == NULL); |
| 693 | J_ASSERT(transaction->t_iobuf_list == NULL); | 692 | J_ASSERT(transaction->t_iobuf_list == NULL); |
| 694 | J_ASSERT(transaction->t_shadow_list == NULL); | 693 | J_ASSERT(transaction->t_shadow_list == NULL); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index a2ed72f7ceee..f8b3be873226 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
| 23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
| 24 | #include <linux/crc32.h> | 24 | #include <linux/crc32.h> |
| 25 | #include <linux/writeback.h> | ||
| 26 | #include <linux/backing-dev.h> | ||
| 25 | 27 | ||
| 26 | /* | 28 | /* |
| 27 | * Default IO end handler for temporary BJ_IO buffer_heads. | 29 | * Default IO end handler for temporary BJ_IO buffer_heads. |
| @@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
| 37 | } | 39 | } |
| 38 | 40 | ||
| 39 | /* | 41 | /* |
| 40 | * When an ext3-ordered file is truncated, it is possible that many pages are | 42 | * When an ext4 file is truncated, it is possible that some pages are not |
| 41 | * not sucessfully freed, because they are attached to a committing transaction. | 43 | * successfully freed, because they are attached to a committing transaction. |
| 42 | * After the transaction commits, these pages are left on the LRU, with no | 44 | * After the transaction commits, these pages are left on the LRU, with no |
| 43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable | 45 | * ->mapping, and with attached buffers. These pages are trivially reclaimable |
| 44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes | 46 | * by the VM, but their apparent absence upsets the VM accounting, and it makes |
| @@ -80,21 +82,6 @@ nope: | |||
| 80 | } | 82 | } |
| 81 | 83 | ||
| 82 | /* | 84 | /* |
| 83 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is | ||
| 84 | * held. For ranking reasons we must trylock. If we lose, schedule away and | ||
| 85 | * return 0. j_list_lock is dropped in this case. | ||
| 86 | */ | ||
| 87 | static int inverted_lock(journal_t *journal, struct buffer_head *bh) | ||
| 88 | { | ||
| 89 | if (!jbd_trylock_bh_state(bh)) { | ||
| 90 | spin_unlock(&journal->j_list_lock); | ||
| 91 | schedule(); | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | return 1; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Done it all: now submit the commit record. We should have | 85 | * Done it all: now submit the commit record. We should have |
| 99 | * cleaned up our previous buffers by now, so if we are in abort | 86 | * cleaned up our previous buffers by now, so if we are in abort |
| 100 | * mode we can now just skip the rest of the journal write | 87 | * mode we can now just skip the rest of the journal write |
| @@ -112,6 +99,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 112 | struct buffer_head *bh; | 99 | struct buffer_head *bh; |
| 113 | int ret; | 100 | int ret; |
| 114 | int barrier_done = 0; | 101 | int barrier_done = 0; |
| 102 | struct timespec now = current_kernel_time(); | ||
| 115 | 103 | ||
| 116 | if (is_journal_aborted(journal)) | 104 | if (is_journal_aborted(journal)) |
| 117 | return 0; | 105 | return 0; |
| @@ -126,6 +114,8 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 126 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 114 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| 127 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 115 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
| 128 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 116 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
| 117 | tmp->h_commit_sec = cpu_to_be64(now.tv_sec); | ||
| 118 | tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); | ||
| 129 | 119 | ||
| 130 | if (JBD2_HAS_COMPAT_FEATURE(journal, | 120 | if (JBD2_HAS_COMPAT_FEATURE(journal, |
| 131 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | 121 | JBD2_FEATURE_COMPAT_CHECKSUM)) { |
| @@ -197,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
| 197 | } | 187 | } |
| 198 | 188 | ||
| 199 | /* | 189 | /* |
| 200 | * Wait for all submitted IO to complete. | 190 | * write the filemap data using writepage() address_space_operations. |
| 191 | * We don't do block allocation here even for delalloc. We don't | ||
| 192 | * use writepages() because with dealyed allocation we may be doing | ||
| 193 | * block allocation in writepages(). | ||
| 201 | */ | 194 | */ |
| 202 | static int journal_wait_on_locked_list(journal_t *journal, | 195 | static int journal_submit_inode_data_buffers(struct address_space *mapping) |
| 203 | transaction_t *commit_transaction) | ||
| 204 | { | 196 | { |
| 205 | int ret = 0; | 197 | int ret; |
| 206 | struct journal_head *jh; | 198 | struct writeback_control wbc = { |
| 207 | 199 | .sync_mode = WB_SYNC_ALL, | |
| 208 | while (commit_transaction->t_locked_list) { | 200 | .nr_to_write = mapping->nrpages * 2, |
| 209 | struct buffer_head *bh; | 201 | .range_start = 0, |
| 210 | 202 | .range_end = i_size_read(mapping->host), | |
| 211 | jh = commit_transaction->t_locked_list->b_tprev; | 203 | .for_writepages = 1, |
| 212 | bh = jh2bh(jh); | 204 | }; |
| 213 | get_bh(bh); | 205 | |
| 214 | if (buffer_locked(bh)) { | 206 | ret = generic_writepages(mapping, &wbc); |
| 215 | spin_unlock(&journal->j_list_lock); | ||
| 216 | wait_on_buffer(bh); | ||
| 217 | if (unlikely(!buffer_uptodate(bh))) | ||
| 218 | ret = -EIO; | ||
| 219 | spin_lock(&journal->j_list_lock); | ||
| 220 | } | ||
| 221 | if (!inverted_lock(journal, bh)) { | ||
| 222 | put_bh(bh); | ||
| 223 | spin_lock(&journal->j_list_lock); | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
| 227 | __jbd2_journal_unfile_buffer(jh); | ||
| 228 | jbd_unlock_bh_state(bh); | ||
| 229 | jbd2_journal_remove_journal_head(bh); | ||
| 230 | put_bh(bh); | ||
| 231 | } else { | ||
| 232 | jbd_unlock_bh_state(bh); | ||
| 233 | } | ||
| 234 | put_bh(bh); | ||
| 235 | cond_resched_lock(&journal->j_list_lock); | ||
| 236 | } | ||
| 237 | return ret; | 207 | return ret; |
| 238 | } | 208 | } |
| 239 | 209 | ||
| 240 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | 210 | /* |
| 211 | * Submit all the data buffers of inode associated with the transaction to | ||
| 212 | * disk. | ||
| 213 | * | ||
| 214 | * We are in a committing transaction. Therefore no new inode can be added to | ||
| 215 | * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently | ||
| 216 | * operate on from being released while we write out pages. | ||
| 217 | */ | ||
| 218 | static int journal_submit_data_buffers(journal_t *journal, | ||
| 219 | transaction_t *commit_transaction) | ||
| 241 | { | 220 | { |
| 242 | int i; | 221 | struct jbd2_inode *jinode; |
| 222 | int err, ret = 0; | ||
| 223 | struct address_space *mapping; | ||
| 243 | 224 | ||
| 244 | for (i = 0; i < bufs; i++) { | 225 | spin_lock(&journal->j_list_lock); |
| 245 | wbuf[i]->b_end_io = end_buffer_write_sync; | 226 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
| 246 | /* We use-up our safety reference in submit_bh() */ | 227 | mapping = jinode->i_vfs_inode->i_mapping; |
| 247 | submit_bh(WRITE, wbuf[i]); | 228 | jinode->i_flags |= JI_COMMIT_RUNNING; |
| 229 | spin_unlock(&journal->j_list_lock); | ||
| 230 | /* | ||
| 231 | * submit the inode data buffers. We use writepage | ||
| 232 | * instead of writepages. Because writepages can do | ||
| 233 | * block allocation with delalloc. We need to write | ||
| 234 | * only allocated blocks here. | ||
| 235 | */ | ||
| 236 | err = journal_submit_inode_data_buffers(mapping); | ||
| 237 | if (!ret) | ||
| 238 | ret = err; | ||
| 239 | spin_lock(&journal->j_list_lock); | ||
| 240 | J_ASSERT(jinode->i_transaction == commit_transaction); | ||
| 241 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
| 242 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 248 | } | 243 | } |
| 244 | spin_unlock(&journal->j_list_lock); | ||
| 245 | return ret; | ||
| 249 | } | 246 | } |
| 250 | 247 | ||
| 251 | /* | 248 | /* |
| 252 | * Submit all the data buffers to disk | 249 | * Wait for data submitted for writeout, refile inodes to proper |
| 250 | * transaction if needed. | ||
| 251 | * | ||
| 253 | */ | 252 | */ |
| 254 | static void journal_submit_data_buffers(journal_t *journal, | 253 | static int journal_finish_inode_data_buffers(journal_t *journal, |
| 255 | transaction_t *commit_transaction) | 254 | transaction_t *commit_transaction) |
| 256 | { | 255 | { |
| 257 | struct journal_head *jh; | 256 | struct jbd2_inode *jinode, *next_i; |
| 258 | struct buffer_head *bh; | 257 | int err, ret = 0; |
| 259 | int locked; | ||
| 260 | int bufs = 0; | ||
| 261 | struct buffer_head **wbuf = journal->j_wbuf; | ||
| 262 | 258 | ||
| 263 | /* | 259 | /* For locking, see the comment in journal_submit_data_buffers() */ |
| 264 | * Whenever we unlock the journal and sleep, things can get added | ||
| 265 | * onto ->t_sync_datalist, so we have to keep looping back to | ||
| 266 | * write_out_data until we *know* that the list is empty. | ||
| 267 | * | ||
| 268 | * Cleanup any flushed data buffers from the data list. Even in | ||
| 269 | * abort mode, we want to flush this out as soon as possible. | ||
| 270 | */ | ||
| 271 | write_out_data: | ||
| 272 | cond_resched(); | ||
| 273 | spin_lock(&journal->j_list_lock); | 260 | spin_lock(&journal->j_list_lock); |
| 261 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | ||
| 262 | jinode->i_flags |= JI_COMMIT_RUNNING; | ||
| 263 | spin_unlock(&journal->j_list_lock); | ||
| 264 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); | ||
| 265 | if (!ret) | ||
| 266 | ret = err; | ||
| 267 | spin_lock(&journal->j_list_lock); | ||
| 268 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
| 269 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 270 | } | ||
| 274 | 271 | ||
| 275 | while (commit_transaction->t_sync_datalist) { | 272 | /* Now refile inode to proper lists */ |
| 276 | jh = commit_transaction->t_sync_datalist; | 273 | list_for_each_entry_safe(jinode, next_i, |
| 277 | bh = jh2bh(jh); | 274 | &commit_transaction->t_inode_list, i_list) { |
| 278 | locked = 0; | 275 | list_del(&jinode->i_list); |
| 279 | 276 | if (jinode->i_next_transaction) { | |
| 280 | /* Get reference just to make sure buffer does not disappear | 277 | jinode->i_transaction = jinode->i_next_transaction; |
| 281 | * when we are forced to drop various locks */ | 278 | jinode->i_next_transaction = NULL; |
| 282 | get_bh(bh); | 279 | list_add(&jinode->i_list, |
| 283 | /* If the buffer is dirty, we need to submit IO and hence | 280 | &jinode->i_transaction->t_inode_list); |
| 284 | * we need the buffer lock. We try to lock the buffer without | ||
| 285 | * blocking. If we fail, we need to drop j_list_lock and do | ||
| 286 | * blocking lock_buffer(). | ||
| 287 | */ | ||
| 288 | if (buffer_dirty(bh)) { | ||
| 289 | if (test_set_buffer_locked(bh)) { | ||
| 290 | BUFFER_TRACE(bh, "needs blocking lock"); | ||
| 291 | spin_unlock(&journal->j_list_lock); | ||
| 292 | /* Write out all data to prevent deadlocks */ | ||
| 293 | journal_do_submit_data(wbuf, bufs); | ||
| 294 | bufs = 0; | ||
| 295 | lock_buffer(bh); | ||
| 296 | spin_lock(&journal->j_list_lock); | ||
| 297 | } | ||
| 298 | locked = 1; | ||
| 299 | } | ||
| 300 | /* We have to get bh_state lock. Again out of order, sigh. */ | ||
| 301 | if (!inverted_lock(journal, bh)) { | ||
| 302 | jbd_lock_bh_state(bh); | ||
| 303 | spin_lock(&journal->j_list_lock); | ||
| 304 | } | ||
| 305 | /* Someone already cleaned up the buffer? */ | ||
| 306 | if (!buffer_jbd(bh) | ||
| 307 | || jh->b_transaction != commit_transaction | ||
| 308 | || jh->b_jlist != BJ_SyncData) { | ||
| 309 | jbd_unlock_bh_state(bh); | ||
| 310 | if (locked) | ||
| 311 | unlock_buffer(bh); | ||
| 312 | BUFFER_TRACE(bh, "already cleaned up"); | ||
| 313 | put_bh(bh); | ||
| 314 | continue; | ||
| 315 | } | ||
| 316 | if (locked && test_clear_buffer_dirty(bh)) { | ||
| 317 | BUFFER_TRACE(bh, "needs writeout, adding to array"); | ||
| 318 | wbuf[bufs++] = bh; | ||
| 319 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 320 | BJ_Locked); | ||
| 321 | jbd_unlock_bh_state(bh); | ||
| 322 | if (bufs == journal->j_wbufsize) { | ||
| 323 | spin_unlock(&journal->j_list_lock); | ||
| 324 | journal_do_submit_data(wbuf, bufs); | ||
| 325 | bufs = 0; | ||
| 326 | goto write_out_data; | ||
| 327 | } | ||
| 328 | } else if (!locked && buffer_locked(bh)) { | ||
| 329 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 330 | BJ_Locked); | ||
| 331 | jbd_unlock_bh_state(bh); | ||
| 332 | put_bh(bh); | ||
| 333 | } else { | 281 | } else { |
| 334 | BUFFER_TRACE(bh, "writeout complete: unfile"); | 282 | jinode->i_transaction = NULL; |
| 335 | __jbd2_journal_unfile_buffer(jh); | ||
| 336 | jbd_unlock_bh_state(bh); | ||
| 337 | if (locked) | ||
| 338 | unlock_buffer(bh); | ||
| 339 | jbd2_journal_remove_journal_head(bh); | ||
| 340 | /* Once for our safety reference, once for | ||
| 341 | * jbd2_journal_remove_journal_head() */ | ||
| 342 | put_bh(bh); | ||
| 343 | put_bh(bh); | ||
| 344 | } | ||
| 345 | |||
| 346 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { | ||
| 347 | spin_unlock(&journal->j_list_lock); | ||
| 348 | goto write_out_data; | ||
| 349 | } | 283 | } |
| 350 | } | 284 | } |
| 351 | spin_unlock(&journal->j_list_lock); | 285 | spin_unlock(&journal->j_list_lock); |
| 352 | journal_do_submit_data(wbuf, bufs); | 286 | |
| 287 | return ret; | ||
| 353 | } | 288 | } |
| 354 | 289 | ||
| 355 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) | 290 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) |
| @@ -524,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 524 | * Now start flushing things to disk, in the order they appear | 459 | * Now start flushing things to disk, in the order they appear |
| 525 | * on the transaction lists. Data blocks go first. | 460 | * on the transaction lists. Data blocks go first. |
| 526 | */ | 461 | */ |
| 527 | err = 0; | 462 | err = journal_submit_data_buffers(journal, commit_transaction); |
| 528 | journal_submit_data_buffers(journal, commit_transaction); | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Wait for all previously submitted IO to complete if commit | ||
| 532 | * record is to be written synchronously. | ||
| 533 | */ | ||
| 534 | spin_lock(&journal->j_list_lock); | ||
| 535 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
| 536 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) | ||
| 537 | err = journal_wait_on_locked_list(journal, | ||
| 538 | commit_transaction); | ||
| 539 | |||
| 540 | spin_unlock(&journal->j_list_lock); | ||
| 541 | |||
| 542 | if (err) | 463 | if (err) |
| 543 | jbd2_journal_abort(journal, err); | 464 | jbd2_journal_abort(journal, err); |
| 544 | 465 | ||
| @@ -547,16 +468,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 547 | jbd_debug(3, "JBD: commit phase 2\n"); | 468 | jbd_debug(3, "JBD: commit phase 2\n"); |
| 548 | 469 | ||
| 549 | /* | 470 | /* |
| 550 | * If we found any dirty or locked buffers, then we should have | ||
| 551 | * looped back up to the write_out_data label. If there weren't | ||
| 552 | * any then journal_clean_data_list should have wiped the list | ||
| 553 | * clean by now, so check that it is in fact empty. | ||
| 554 | */ | ||
| 555 | J_ASSERT (commit_transaction->t_sync_datalist == NULL); | ||
| 556 | |||
| 557 | jbd_debug (3, "JBD: commit phase 3\n"); | ||
| 558 | |||
| 559 | /* | ||
| 560 | * Way to go: we have now written out all of the data for a | 471 | * Way to go: we have now written out all of the data for a |
| 561 | * transaction! Now comes the tricky part: we need to write out | 472 | * transaction! Now comes the tricky part: we need to write out |
| 562 | * metadata. Loop over the transaction's entire buffer list: | 473 | * metadata. Loop over the transaction's entire buffer list: |
| @@ -574,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 574 | J_ASSERT(commit_transaction->t_nr_buffers <= | 485 | J_ASSERT(commit_transaction->t_nr_buffers <= |
| 575 | commit_transaction->t_outstanding_credits); | 486 | commit_transaction->t_outstanding_credits); |
| 576 | 487 | ||
| 488 | err = 0; | ||
| 577 | descriptor = NULL; | 489 | descriptor = NULL; |
| 578 | bufs = 0; | 490 | bufs = 0; |
| 579 | while (commit_transaction->t_buffers) { | 491 | while (commit_transaction->t_buffers) { |
| @@ -748,15 +660,19 @@ start_journal_io: | |||
| 748 | &cbh, crc32_sum); | 660 | &cbh, crc32_sum); |
| 749 | if (err) | 661 | if (err) |
| 750 | __jbd2_journal_abort_hard(journal); | 662 | __jbd2_journal_abort_hard(journal); |
| 751 | |||
| 752 | spin_lock(&journal->j_list_lock); | ||
| 753 | err = journal_wait_on_locked_list(journal, | ||
| 754 | commit_transaction); | ||
| 755 | spin_unlock(&journal->j_list_lock); | ||
| 756 | if (err) | ||
| 757 | __jbd2_journal_abort_hard(journal); | ||
| 758 | } | 663 | } |
| 759 | 664 | ||
| 665 | /* | ||
| 666 | * This is the right place to wait for data buffers both for ASYNC | ||
| 667 | * and !ASYNC commit. If commit is ASYNC, we need to wait only after | ||
| 668 | * the commit block went to disk (which happens above). If commit is | ||
| 669 | * SYNC, we need to wait for data buffers before we start writing | ||
| 670 | * commit block, which happens below in such setting. | ||
| 671 | */ | ||
| 672 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
| 673 | if (err) | ||
| 674 | jbd2_journal_abort(journal, err); | ||
| 675 | |||
| 760 | /* Lo and behold: we have just managed to send a transaction to | 676 | /* Lo and behold: we have just managed to send a transaction to |
| 761 | the log. Before we can commit it, wait for the IO so far to | 677 | the log. Before we can commit it, wait for the IO so far to |
| 762 | complete. Control buffers being written are on the | 678 | complete. Control buffers being written are on the |
| @@ -768,7 +684,7 @@ start_journal_io: | |||
| 768 | so we incur less scheduling load. | 684 | so we incur less scheduling load. |
| 769 | */ | 685 | */ |
| 770 | 686 | ||
| 771 | jbd_debug(3, "JBD: commit phase 4\n"); | 687 | jbd_debug(3, "JBD: commit phase 3\n"); |
| 772 | 688 | ||
| 773 | /* | 689 | /* |
| 774 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 690 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
| @@ -827,7 +743,7 @@ wait_for_iobuf: | |||
| 827 | 743 | ||
| 828 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 744 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
| 829 | 745 | ||
| 830 | jbd_debug(3, "JBD: commit phase 5\n"); | 746 | jbd_debug(3, "JBD: commit phase 4\n"); |
| 831 | 747 | ||
| 832 | /* Here we wait for the revoke record and descriptor record buffers */ | 748 | /* Here we wait for the revoke record and descriptor record buffers */ |
| 833 | wait_for_ctlbuf: | 749 | wait_for_ctlbuf: |
| @@ -854,7 +770,7 @@ wait_for_iobuf: | |||
| 854 | /* AKPM: bforget here */ | 770 | /* AKPM: bforget here */ |
| 855 | } | 771 | } |
| 856 | 772 | ||
| 857 | jbd_debug(3, "JBD: commit phase 6\n"); | 773 | jbd_debug(3, "JBD: commit phase 5\n"); |
| 858 | 774 | ||
| 859 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 775 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 860 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 776 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| @@ -874,9 +790,9 @@ wait_for_iobuf: | |||
| 874 | transaction can be removed from any checkpoint list it was on | 790 | transaction can be removed from any checkpoint list it was on |
| 875 | before. */ | 791 | before. */ |
| 876 | 792 | ||
| 877 | jbd_debug(3, "JBD: commit phase 7\n"); | 793 | jbd_debug(3, "JBD: commit phase 6\n"); |
| 878 | 794 | ||
| 879 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); | 795 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
| 880 | J_ASSERT(commit_transaction->t_buffers == NULL); | 796 | J_ASSERT(commit_transaction->t_buffers == NULL); |
| 881 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 797 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
| 882 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | 798 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); |
| @@ -997,7 +913,7 @@ restart_loop: | |||
| 997 | 913 | ||
| 998 | /* Done with this transaction! */ | 914 | /* Done with this transaction! */ |
| 999 | 915 | ||
| 1000 | jbd_debug(3, "JBD: commit phase 8\n"); | 916 | jbd_debug(3, "JBD: commit phase 7\n"); |
| 1001 | 917 | ||
| 1002 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 918 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
| 1003 | 919 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 2e24567c4a79..b26c6d9fe6ae 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates); | |||
| 50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); | 50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); |
| 51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); | 51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); |
| 52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); | 52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); |
| 53 | EXPORT_SYMBOL(jbd2_journal_dirty_data); | ||
| 54 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); | 53 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); |
| 55 | EXPORT_SYMBOL(jbd2_journal_release_buffer); | 54 | EXPORT_SYMBOL(jbd2_journal_release_buffer); |
| 56 | EXPORT_SYMBOL(jbd2_journal_forget); | 55 | EXPORT_SYMBOL(jbd2_journal_forget); |
| @@ -82,6 +81,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); | |||
| 82 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); | 81 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); |
| 83 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); | 82 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); |
| 84 | EXPORT_SYMBOL(jbd2_journal_force_commit); | 83 | EXPORT_SYMBOL(jbd2_journal_force_commit); |
| 84 | EXPORT_SYMBOL(jbd2_journal_file_inode); | ||
| 85 | EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); | ||
| 86 | EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | ||
| 87 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | ||
| 85 | 88 | ||
| 86 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 89 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
| 87 | static void __journal_abort_soft (journal_t *journal, int errno); | 90 | static void __journal_abort_soft (journal_t *journal, int errno); |
| @@ -2195,6 +2198,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) | |||
| 2195 | } | 2198 | } |
| 2196 | 2199 | ||
| 2197 | /* | 2200 | /* |
| 2201 | * Initialize jbd inode head | ||
| 2202 | */ | ||
| 2203 | void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) | ||
| 2204 | { | ||
| 2205 | jinode->i_transaction = NULL; | ||
| 2206 | jinode->i_next_transaction = NULL; | ||
| 2207 | jinode->i_vfs_inode = inode; | ||
| 2208 | jinode->i_flags = 0; | ||
| 2209 | INIT_LIST_HEAD(&jinode->i_list); | ||
| 2210 | } | ||
| 2211 | |||
| 2212 | /* | ||
| 2213 | * Function to be called before we start removing inode from memory (i.e., | ||
| 2214 | * clear_inode() is a fine place to be called from). It removes inode from | ||
| 2215 | * transaction's lists. | ||
| 2216 | */ | ||
| 2217 | void jbd2_journal_release_jbd_inode(journal_t *journal, | ||
| 2218 | struct jbd2_inode *jinode) | ||
| 2219 | { | ||
| 2220 | int writeout = 0; | ||
| 2221 | |||
| 2222 | if (!journal) | ||
| 2223 | return; | ||
| 2224 | restart: | ||
| 2225 | spin_lock(&journal->j_list_lock); | ||
| 2226 | /* Is commit writing out inode - we have to wait */ | ||
| 2227 | if (jinode->i_flags & JI_COMMIT_RUNNING) { | ||
| 2228 | wait_queue_head_t *wq; | ||
| 2229 | DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 2230 | wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 2231 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
| 2232 | spin_unlock(&journal->j_list_lock); | ||
| 2233 | schedule(); | ||
| 2234 | finish_wait(wq, &wait.wait); | ||
| 2235 | goto restart; | ||
| 2236 | } | ||
| 2237 | |||
| 2238 | /* Do we need to wait for data writeback? */ | ||
| 2239 | if (journal->j_committing_transaction == jinode->i_transaction) | ||
| 2240 | writeout = 1; | ||
| 2241 | if (jinode->i_transaction) { | ||
| 2242 | list_del(&jinode->i_list); | ||
| 2243 | jinode->i_transaction = NULL; | ||
| 2244 | } | ||
| 2245 | spin_unlock(&journal->j_list_lock); | ||
| 2246 | } | ||
| 2247 | |||
| 2248 | /* | ||
| 2198 | * debugfs tunables | 2249 | * debugfs tunables |
| 2199 | */ | 2250 | */ |
| 2200 | #ifdef CONFIG_JBD2_DEBUG | 2251 | #ifdef CONFIG_JBD2_DEBUG |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d6e006e67804..4f7cadbb19fa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | |||
| 41 | * new transaction and we can't block without protecting against other | 41 | * new transaction and we can't block without protecting against other |
| 42 | * processes trying to touch the journal while it is in transition. | 42 | * processes trying to touch the journal while it is in transition. |
| 43 | * | 43 | * |
| 44 | * Called under j_state_lock | ||
| 45 | */ | 44 | */ |
| 46 | 45 | ||
| 47 | static transaction_t * | 46 | static transaction_t * |
| @@ -52,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
| 52 | transaction->t_tid = journal->j_transaction_sequence++; | 51 | transaction->t_tid = journal->j_transaction_sequence++; |
| 53 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
| 54 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
| 54 | INIT_LIST_HEAD(&transaction->t_inode_list); | ||
| 55 | 55 | ||
| 56 | /* Set up the commit timer for the new transaction. */ | 56 | /* Set up the commit timer for the new transaction. */ |
| 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
| @@ -943,183 +943,6 @@ out: | |||
| 943 | } | 943 | } |
| 944 | 944 | ||
| 945 | /** | 945 | /** |
| 946 | * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which | ||
| 947 | * needs to be flushed before we can commit the | ||
| 948 | * current transaction. | ||
| 949 | * @handle: transaction | ||
| 950 | * @bh: bufferhead to mark | ||
| 951 | * | ||
| 952 | * The buffer is placed on the transaction's data list and is marked as | ||
| 953 | * belonging to the transaction. | ||
| 954 | * | ||
| 955 | * Returns error number or 0 on success. | ||
| 956 | * | ||
| 957 | * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage | ||
| 958 | * by kswapd. | ||
| 959 | */ | ||
| 960 | int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
| 961 | { | ||
| 962 | journal_t *journal = handle->h_transaction->t_journal; | ||
| 963 | int need_brelse = 0; | ||
| 964 | struct journal_head *jh; | ||
| 965 | |||
| 966 | if (is_handle_aborted(handle)) | ||
| 967 | return 0; | ||
| 968 | |||
| 969 | jh = jbd2_journal_add_journal_head(bh); | ||
| 970 | JBUFFER_TRACE(jh, "entry"); | ||
| 971 | |||
| 972 | /* | ||
| 973 | * The buffer could *already* be dirty. Writeout can start | ||
| 974 | * at any time. | ||
| 975 | */ | ||
| 976 | jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); | ||
| 977 | |||
| 978 | /* | ||
| 979 | * What if the buffer is already part of a running transaction? | ||
| 980 | * | ||
| 981 | * There are two cases: | ||
| 982 | * 1) It is part of the current running transaction. Refile it, | ||
| 983 | * just in case we have allocated it as metadata, deallocated | ||
| 984 | * it, then reallocated it as data. | ||
| 985 | * 2) It is part of the previous, still-committing transaction. | ||
| 986 | * If all we want to do is to guarantee that the buffer will be | ||
| 987 | * written to disk before this new transaction commits, then | ||
| 988 | * being sure that the *previous* transaction has this same | ||
| 989 | * property is sufficient for us! Just leave it on its old | ||
| 990 | * transaction. | ||
| 991 | * | ||
| 992 | * In case (2), the buffer must not already exist as metadata | ||
| 993 | * --- that would violate write ordering (a transaction is free | ||
| 994 | * to write its data at any point, even before the previous | ||
| 995 | * committing transaction has committed). The caller must | ||
| 996 | * never, ever allow this to happen: there's nothing we can do | ||
| 997 | * about it in this layer. | ||
| 998 | */ | ||
| 999 | jbd_lock_bh_state(bh); | ||
| 1000 | spin_lock(&journal->j_list_lock); | ||
| 1001 | |||
| 1002 | /* Now that we have bh_state locked, are we really still mapped? */ | ||
| 1003 | if (!buffer_mapped(bh)) { | ||
| 1004 | JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); | ||
| 1005 | goto no_journal; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | if (jh->b_transaction) { | ||
| 1009 | JBUFFER_TRACE(jh, "has transaction"); | ||
| 1010 | if (jh->b_transaction != handle->h_transaction) { | ||
| 1011 | JBUFFER_TRACE(jh, "belongs to older transaction"); | ||
| 1012 | J_ASSERT_JH(jh, jh->b_transaction == | ||
| 1013 | journal->j_committing_transaction); | ||
| 1014 | |||
| 1015 | /* @@@ IS THIS TRUE ? */ | ||
| 1016 | /* | ||
| 1017 | * Not any more. Scenario: someone does a write() | ||
| 1018 | * in data=journal mode. The buffer's transaction has | ||
| 1019 | * moved into commit. Then someone does another | ||
| 1020 | * write() to the file. We do the frozen data copyout | ||
| 1021 | * and set b_next_transaction to point to j_running_t. | ||
| 1022 | * And while we're in that state, someone does a | ||
| 1023 | * writepage() in an attempt to pageout the same area | ||
| 1024 | * of the file via a shared mapping. At present that | ||
| 1025 | * calls jbd2_journal_dirty_data(), and we get right here. | ||
| 1026 | * It may be too late to journal the data. Simply | ||
| 1027 | * falling through to the next test will suffice: the | ||
| 1028 | * data will be dirty and wil be checkpointed. The | ||
| 1029 | * ordering comments in the next comment block still | ||
| 1030 | * apply. | ||
| 1031 | */ | ||
| 1032 | //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); | ||
| 1033 | |||
| 1034 | /* | ||
| 1035 | * If we're journalling data, and this buffer was | ||
| 1036 | * subject to a write(), it could be metadata, forget | ||
| 1037 | * or shadow against the committing transaction. Now, | ||
| 1038 | * someone has dirtied the same darn page via a mapping | ||
| 1039 | * and it is being writepage()'d. | ||
| 1040 | * We *could* just steal the page from commit, with some | ||
| 1041 | * fancy locking there. Instead, we just skip it - | ||
| 1042 | * don't tie the page's buffers to the new transaction | ||
| 1043 | * at all. | ||
| 1044 | * Implication: if we crash before the writepage() data | ||
| 1045 | * is written into the filesystem, recovery will replay | ||
| 1046 | * the write() data. | ||
| 1047 | */ | ||
| 1048 | if (jh->b_jlist != BJ_None && | ||
| 1049 | jh->b_jlist != BJ_SyncData && | ||
| 1050 | jh->b_jlist != BJ_Locked) { | ||
| 1051 | JBUFFER_TRACE(jh, "Not stealing"); | ||
| 1052 | goto no_journal; | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * This buffer may be undergoing writeout in commit. We | ||
| 1057 | * can't return from here and let the caller dirty it | ||
| 1058 | * again because that can cause the write-out loop in | ||
| 1059 | * commit to never terminate. | ||
| 1060 | */ | ||
| 1061 | if (buffer_dirty(bh)) { | ||
| 1062 | get_bh(bh); | ||
| 1063 | spin_unlock(&journal->j_list_lock); | ||
| 1064 | jbd_unlock_bh_state(bh); | ||
| 1065 | need_brelse = 1; | ||
| 1066 | sync_dirty_buffer(bh); | ||
| 1067 | jbd_lock_bh_state(bh); | ||
| 1068 | spin_lock(&journal->j_list_lock); | ||
| 1069 | /* Since we dropped the lock... */ | ||
| 1070 | if (!buffer_mapped(bh)) { | ||
| 1071 | JBUFFER_TRACE(jh, "buffer got unmapped"); | ||
| 1072 | goto no_journal; | ||
| 1073 | } | ||
| 1074 | /* The buffer may become locked again at any | ||
| 1075 | time if it is redirtied */ | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | /* journal_clean_data_list() may have got there first */ | ||
| 1079 | if (jh->b_transaction != NULL) { | ||
| 1080 | JBUFFER_TRACE(jh, "unfile from commit"); | ||
| 1081 | __jbd2_journal_temp_unlink_buffer(jh); | ||
| 1082 | /* It still points to the committing | ||
| 1083 | * transaction; move it to this one so | ||
| 1084 | * that the refile assert checks are | ||
| 1085 | * happy. */ | ||
| 1086 | jh->b_transaction = handle->h_transaction; | ||
| 1087 | } | ||
| 1088 | /* The buffer will be refiled below */ | ||
| 1089 | |||
| 1090 | } | ||
| 1091 | /* | ||
| 1092 | * Special case --- the buffer might actually have been | ||
| 1093 | * allocated and then immediately deallocated in the previous, | ||
| 1094 | * committing transaction, so might still be left on that | ||
| 1095 | * transaction's metadata lists. | ||
| 1096 | */ | ||
| 1097 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | ||
| 1098 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | ||
| 1099 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | ||
| 1100 | __jbd2_journal_temp_unlink_buffer(jh); | ||
| 1101 | jh->b_transaction = handle->h_transaction; | ||
| 1102 | JBUFFER_TRACE(jh, "file as data"); | ||
| 1103 | __jbd2_journal_file_buffer(jh, handle->h_transaction, | ||
| 1104 | BJ_SyncData); | ||
| 1105 | } | ||
| 1106 | } else { | ||
| 1107 | JBUFFER_TRACE(jh, "not on a transaction"); | ||
| 1108 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); | ||
| 1109 | } | ||
| 1110 | no_journal: | ||
| 1111 | spin_unlock(&journal->j_list_lock); | ||
| 1112 | jbd_unlock_bh_state(bh); | ||
| 1113 | if (need_brelse) { | ||
| 1114 | BUFFER_TRACE(bh, "brelse"); | ||
| 1115 | __brelse(bh); | ||
| 1116 | } | ||
| 1117 | JBUFFER_TRACE(jh, "exit"); | ||
| 1118 | jbd2_journal_put_journal_head(jh); | ||
| 1119 | return 0; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | /** | ||
| 1123 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata | 946 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata |
| 1124 | * @handle: transaction to add buffer to. | 947 | * @handle: transaction to add buffer to. |
| 1125 | * @bh: buffer to mark | 948 | * @bh: buffer to mark |
| @@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
| 1541 | * Remove a buffer from the appropriate transaction list. | 1364 | * Remove a buffer from the appropriate transaction list. |
| 1542 | * | 1365 | * |
| 1543 | * Note that this function can *change* the value of | 1366 | * Note that this function can *change* the value of |
| 1544 | * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, | 1367 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, |
| 1545 | * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller | 1368 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one |
| 1546 | * is holding onto a copy of one of thee pointers, it could go bad. | 1369 | * of these pointers, it could go bad. Generally the caller needs to re-read |
| 1547 | * Generally the caller needs to re-read the pointer from the transaction_t. | 1370 | * the pointer from the transaction_t. |
| 1548 | * | 1371 | * |
| 1549 | * Called under j_list_lock. The journal may not be locked. | 1372 | * Called under j_list_lock. The journal may not be locked. |
| 1550 | */ | 1373 | */ |
| @@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1566 | switch (jh->b_jlist) { | 1389 | switch (jh->b_jlist) { |
| 1567 | case BJ_None: | 1390 | case BJ_None: |
| 1568 | return; | 1391 | return; |
| 1569 | case BJ_SyncData: | ||
| 1570 | list = &transaction->t_sync_datalist; | ||
| 1571 | break; | ||
| 1572 | case BJ_Metadata: | 1392 | case BJ_Metadata: |
| 1573 | transaction->t_nr_buffers--; | 1393 | transaction->t_nr_buffers--; |
| 1574 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); | 1394 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); |
| @@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1589 | case BJ_Reserved: | 1409 | case BJ_Reserved: |
| 1590 | list = &transaction->t_reserved_list; | 1410 | list = &transaction->t_reserved_list; |
| 1591 | break; | 1411 | break; |
| 1592 | case BJ_Locked: | ||
| 1593 | list = &transaction->t_locked_list; | ||
| 1594 | break; | ||
| 1595 | } | 1412 | } |
| 1596 | 1413 | ||
| 1597 | __blist_del_buffer(list, jh); | 1414 | __blist_del_buffer(list, jh); |
| @@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1634 | goto out; | 1451 | goto out; |
| 1635 | 1452 | ||
| 1636 | spin_lock(&journal->j_list_lock); | 1453 | spin_lock(&journal->j_list_lock); |
| 1637 | if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { | 1454 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
| 1638 | if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { | ||
| 1639 | /* A written-back ordered data buffer */ | ||
| 1640 | JBUFFER_TRACE(jh, "release data"); | ||
| 1641 | __jbd2_journal_unfile_buffer(jh); | ||
| 1642 | jbd2_journal_remove_journal_head(bh); | ||
| 1643 | __brelse(bh); | ||
| 1644 | } | ||
| 1645 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | ||
| 1646 | /* written-back checkpointed metadata buffer */ | 1455 | /* written-back checkpointed metadata buffer */ |
| 1647 | if (jh->b_jlist == BJ_None) { | 1456 | if (jh->b_jlist == BJ_None) { |
| 1648 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1457 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| @@ -1656,12 +1465,43 @@ out: | |||
| 1656 | return; | 1465 | return; |
| 1657 | } | 1466 | } |
| 1658 | 1467 | ||
| 1468 | /* | ||
| 1469 | * jbd2_journal_try_to_free_buffers() could race with | ||
| 1470 | * jbd2_journal_commit_transaction(). The later might still hold the | ||
| 1471 | * reference count to the buffers when inspecting them on | ||
| 1472 | * t_syncdata_list or t_locked_list. | ||
| 1473 | * | ||
| 1474 | * jbd2_journal_try_to_free_buffers() will call this function to | ||
| 1475 | * wait for the current transaction to finish syncing data buffers, before | ||
| 1476 | * try to free that buffer. | ||
| 1477 | * | ||
| 1478 | * Called with journal->j_state_lock hold. | ||
| 1479 | */ | ||
| 1480 | static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal) | ||
| 1481 | { | ||
| 1482 | transaction_t *transaction; | ||
| 1483 | tid_t tid; | ||
| 1484 | |||
| 1485 | spin_lock(&journal->j_state_lock); | ||
| 1486 | transaction = journal->j_committing_transaction; | ||
| 1487 | |||
| 1488 | if (!transaction) { | ||
| 1489 | spin_unlock(&journal->j_state_lock); | ||
| 1490 | return; | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | tid = transaction->t_tid; | ||
| 1494 | spin_unlock(&journal->j_state_lock); | ||
| 1495 | jbd2_log_wait_commit(journal, tid); | ||
| 1496 | } | ||
| 1659 | 1497 | ||
| 1660 | /** | 1498 | /** |
| 1661 | * int jbd2_journal_try_to_free_buffers() - try to free page buffers. | 1499 | * int jbd2_journal_try_to_free_buffers() - try to free page buffers. |
| 1662 | * @journal: journal for operation | 1500 | * @journal: journal for operation |
| 1663 | * @page: to try and free | 1501 | * @page: to try and free |
| 1664 | * @unused_gfp_mask: unused | 1502 | * @gfp_mask: we use the mask to detect how hard should we try to release |
| 1503 | * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to | ||
| 1504 | * release the buffers. | ||
| 1665 | * | 1505 | * |
| 1666 | * | 1506 | * |
| 1667 | * For all the buffers on this page, | 1507 | * For all the buffers on this page, |
| @@ -1690,9 +1530,11 @@ out: | |||
| 1690 | * journal_try_to_free_buffer() is changing its state. But that | 1530 | * journal_try_to_free_buffer() is changing its state. But that |
| 1691 | * cannot happen because we never reallocate freed data as metadata | 1531 | * cannot happen because we never reallocate freed data as metadata |
| 1692 | * while the data is part of a transaction. Yes? | 1532 | * while the data is part of a transaction. Yes? |
| 1533 | * | ||
| 1534 | * Return 0 on failure, 1 on success | ||
| 1693 | */ | 1535 | */ |
| 1694 | int jbd2_journal_try_to_free_buffers(journal_t *journal, | 1536 | int jbd2_journal_try_to_free_buffers(journal_t *journal, |
| 1695 | struct page *page, gfp_t unused_gfp_mask) | 1537 | struct page *page, gfp_t gfp_mask) |
| 1696 | { | 1538 | { |
| 1697 | struct buffer_head *head; | 1539 | struct buffer_head *head; |
| 1698 | struct buffer_head *bh; | 1540 | struct buffer_head *bh; |
| @@ -1708,7 +1550,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
| 1708 | /* | 1550 | /* |
| 1709 | * We take our own ref against the journal_head here to avoid | 1551 | * We take our own ref against the journal_head here to avoid |
| 1710 | * having to add tons of locking around each instance of | 1552 | * having to add tons of locking around each instance of |
| 1711 | * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). | 1553 | * jbd2_journal_remove_journal_head() and |
| 1554 | * jbd2_journal_put_journal_head(). | ||
| 1712 | */ | 1555 | */ |
| 1713 | jh = jbd2_journal_grab_journal_head(bh); | 1556 | jh = jbd2_journal_grab_journal_head(bh); |
| 1714 | if (!jh) | 1557 | if (!jh) |
| @@ -1721,7 +1564,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
| 1721 | if (buffer_jbd(bh)) | 1564 | if (buffer_jbd(bh)) |
| 1722 | goto busy; | 1565 | goto busy; |
| 1723 | } while ((bh = bh->b_this_page) != head); | 1566 | } while ((bh = bh->b_this_page) != head); |
| 1567 | |||
| 1724 | ret = try_to_free_buffers(page); | 1568 | ret = try_to_free_buffers(page); |
| 1569 | |||
| 1570 | /* | ||
| 1571 | * There are a number of places where jbd2_journal_try_to_free_buffers() | ||
| 1572 | * could race with jbd2_journal_commit_transaction(), the later still | ||
| 1573 | * holds the reference to the buffers to free while processing them. | ||
| 1574 | * try_to_free_buffers() failed to free those buffers. Some of the | ||
| 1575 | * caller of releasepage() request page buffers to be dropped, otherwise | ||
| 1576 | * treat the fail-to-free as errors (such as generic_file_direct_IO()) | ||
| 1577 | * | ||
| 1578 | * So, if the caller of try_to_release_page() wants the synchronous | ||
| 1579 | * behaviour(i.e make sure buffers are dropped upon return), | ||
| 1580 | * let's wait for the current transaction to finish flush of | ||
| 1581 | * dirty data buffers, then try to free those buffers again, | ||
| 1582 | * with the journal locked. | ||
| 1583 | */ | ||
| 1584 | if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { | ||
| 1585 | jbd2_journal_wait_for_transaction_sync_data(journal); | ||
| 1586 | ret = try_to_free_buffers(page); | ||
| 1587 | } | ||
| 1588 | |||
| 1725 | busy: | 1589 | busy: |
| 1726 | return ret; | 1590 | return ret; |
| 1727 | } | 1591 | } |
| @@ -1823,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1823 | if (!buffer_jbd(bh)) | 1687 | if (!buffer_jbd(bh)) |
| 1824 | goto zap_buffer_unlocked; | 1688 | goto zap_buffer_unlocked; |
| 1825 | 1689 | ||
| 1690 | /* OK, we have data buffer in journaled mode */ | ||
| 1826 | spin_lock(&journal->j_state_lock); | 1691 | spin_lock(&journal->j_state_lock); |
| 1827 | jbd_lock_bh_state(bh); | 1692 | jbd_lock_bh_state(bh); |
| 1828 | spin_lock(&journal->j_list_lock); | 1693 | spin_lock(&journal->j_list_lock); |
| @@ -1886,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1886 | } | 1751 | } |
| 1887 | } else if (transaction == journal->j_committing_transaction) { | 1752 | } else if (transaction == journal->j_committing_transaction) { |
| 1888 | JBUFFER_TRACE(jh, "on committing transaction"); | 1753 | JBUFFER_TRACE(jh, "on committing transaction"); |
| 1889 | if (jh->b_jlist == BJ_Locked) { | ||
| 1890 | /* | ||
| 1891 | * The buffer is on the committing transaction's locked | ||
| 1892 | * list. We have the buffer locked, so I/O has | ||
| 1893 | * completed. So we can nail the buffer now. | ||
| 1894 | */ | ||
| 1895 | may_free = __dispose_buffer(jh, transaction); | ||
| 1896 | goto zap_buffer; | ||
| 1897 | } | ||
| 1898 | /* | 1754 | /* |
| 1899 | * If it is committing, we simply cannot touch it. We | 1755 | * If it is committing, we simply cannot touch it. We |
| 1900 | * can remove it's next_transaction pointer from the | 1756 | * can remove it's next_transaction pointer from the |
| @@ -2027,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2027 | J_ASSERT_JH(jh, !jh->b_committed_data); | 1883 | J_ASSERT_JH(jh, !jh->b_committed_data); |
| 2028 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1884 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
| 2029 | return; | 1885 | return; |
| 2030 | case BJ_SyncData: | ||
| 2031 | list = &transaction->t_sync_datalist; | ||
| 2032 | break; | ||
| 2033 | case BJ_Metadata: | 1886 | case BJ_Metadata: |
| 2034 | transaction->t_nr_buffers++; | 1887 | transaction->t_nr_buffers++; |
| 2035 | list = &transaction->t_buffers; | 1888 | list = &transaction->t_buffers; |
| @@ -2049,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2049 | case BJ_Reserved: | 1902 | case BJ_Reserved: |
| 2050 | list = &transaction->t_reserved_list; | 1903 | list = &transaction->t_reserved_list; |
| 2051 | break; | 1904 | break; |
| 2052 | case BJ_Locked: | ||
| 2053 | list = &transaction->t_locked_list; | ||
| 2054 | break; | ||
| 2055 | } | 1905 | } |
| 2056 | 1906 | ||
| 2057 | __blist_add_buffer(list, jh); | 1907 | __blist_add_buffer(list, jh); |
| @@ -2141,3 +1991,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
| 2141 | spin_unlock(&journal->j_list_lock); | 1991 | spin_unlock(&journal->j_list_lock); |
| 2142 | __brelse(bh); | 1992 | __brelse(bh); |
| 2143 | } | 1993 | } |
| 1994 | |||
| 1995 | /* | ||
| 1996 | * File inode in the inode list of the handle's transaction | ||
| 1997 | */ | ||
| 1998 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | ||
| 1999 | { | ||
| 2000 | transaction_t *transaction = handle->h_transaction; | ||
| 2001 | journal_t *journal = transaction->t_journal; | ||
| 2002 | |||
| 2003 | if (is_handle_aborted(handle)) | ||
| 2004 | return -EIO; | ||
| 2005 | |||
| 2006 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, | ||
| 2007 | transaction->t_tid); | ||
| 2008 | |||
| 2009 | /* | ||
| 2010 | * First check whether inode isn't already on the transaction's | ||
| 2011 | * lists without taking the lock. Note that this check is safe | ||
| 2012 | * without the lock as we cannot race with somebody removing inode | ||
| 2013 | * from the transaction. The reason is that we remove inode from the | ||
| 2014 | * transaction only in journal_release_jbd_inode() and when we commit | ||
| 2015 | * the transaction. We are guarded from the first case by holding | ||
| 2016 | * a reference to the inode. We are safe against the second case | ||
| 2017 | * because if jinode->i_transaction == transaction, commit code | ||
| 2018 | * cannot touch the transaction because we hold reference to it, | ||
| 2019 | * and if jinode->i_next_transaction == transaction, commit code | ||
| 2020 | * will only file the inode where we want it. | ||
| 2021 | */ | ||
| 2022 | if (jinode->i_transaction == transaction || | ||
| 2023 | jinode->i_next_transaction == transaction) | ||
| 2024 | return 0; | ||
| 2025 | |||
| 2026 | spin_lock(&journal->j_list_lock); | ||
| 2027 | |||
| 2028 | if (jinode->i_transaction == transaction || | ||
| 2029 | jinode->i_next_transaction == transaction) | ||
| 2030 | goto done; | ||
| 2031 | |||
| 2032 | /* On some different transaction's list - should be | ||
| 2033 | * the committing one */ | ||
| 2034 | if (jinode->i_transaction) { | ||
| 2035 | J_ASSERT(jinode->i_next_transaction == NULL); | ||
| 2036 | J_ASSERT(jinode->i_transaction == | ||
| 2037 | journal->j_committing_transaction); | ||
| 2038 | jinode->i_next_transaction = transaction; | ||
| 2039 | goto done; | ||
| 2040 | } | ||
| 2041 | /* Not on any transaction list... */ | ||
| 2042 | J_ASSERT(!jinode->i_next_transaction); | ||
| 2043 | jinode->i_transaction = transaction; | ||
| 2044 | list_add(&jinode->i_list, &transaction->t_inode_list); | ||
| 2045 | done: | ||
| 2046 | spin_unlock(&journal->j_list_lock); | ||
| 2047 | |||
| 2048 | return 0; | ||
| 2049 | } | ||
| 2050 | |||
| 2051 | /* | ||
| 2052 | * This function must be called when inode is journaled in ordered mode | ||
| 2053 | * before truncation happens. It starts writeout of truncated part in | ||
| 2054 | * case it is in the committing transaction so that we stand to ordered | ||
| 2055 | * mode consistency guarantees. | ||
| 2056 | */ | ||
| 2057 | int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | ||
| 2058 | loff_t new_size) | ||
| 2059 | { | ||
| 2060 | journal_t *journal; | ||
| 2061 | transaction_t *commit_trans; | ||
| 2062 | int ret = 0; | ||
| 2063 | |||
| 2064 | if (!inode->i_transaction && !inode->i_next_transaction) | ||
| 2065 | goto out; | ||
| 2066 | journal = inode->i_transaction->t_journal; | ||
| 2067 | spin_lock(&journal->j_state_lock); | ||
| 2068 | commit_trans = journal->j_committing_transaction; | ||
| 2069 | spin_unlock(&journal->j_state_lock); | ||
| 2070 | if (inode->i_transaction == commit_trans) { | ||
| 2071 | ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | ||
| 2072 | new_size, LLONG_MAX); | ||
| 2073 | if (ret) | ||
| 2074 | jbd2_journal_abort(journal, ret); | ||
| 2075 | } | ||
| 2076 | out: | ||
| 2077 | return ret; | ||
| 2078 | } | ||
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index bf6ab19b86ee..6a73de84bcef 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
| 22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
| 23 | #include <linux/proc_fs.h> | 23 | #include <linux/proc_fs.h> |
| 24 | #include <linux/seq_file.h> | ||
| 24 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
| 25 | #include "jfs_incore.h" | 26 | #include "jfs_incore.h" |
| 26 | #include "jfs_filsys.h" | 27 | #include "jfs_filsys.h" |
| @@ -30,29 +31,19 @@ | |||
| 30 | 31 | ||
| 31 | static struct proc_dir_entry *base; | 32 | static struct proc_dir_entry *base; |
| 32 | #ifdef CONFIG_JFS_DEBUG | 33 | #ifdef CONFIG_JFS_DEBUG |
| 33 | static int loglevel_read(char *page, char **start, off_t off, | 34 | static int jfs_loglevel_proc_show(struct seq_file *m, void *v) |
| 34 | int count, int *eof, void *data) | ||
| 35 | { | 35 | { |
| 36 | int len; | 36 | seq_printf(m, "%d\n", jfsloglevel); |
| 37 | 37 | return 0; | |
| 38 | len = sprintf(page, "%d\n", jfsloglevel); | 38 | } |
| 39 | |||
| 40 | len -= off; | ||
| 41 | *start = page + off; | ||
| 42 | |||
| 43 | if (len > count) | ||
| 44 | len = count; | ||
| 45 | else | ||
| 46 | *eof = 1; | ||
| 47 | |||
| 48 | if (len < 0) | ||
| 49 | len = 0; | ||
| 50 | 39 | ||
| 51 | return len; | 40 | static int jfs_loglevel_proc_open(struct inode *inode, struct file *file) |
| 41 | { | ||
| 42 | return single_open(file, jfs_loglevel_proc_show, NULL); | ||
| 52 | } | 43 | } |
| 53 | 44 | ||
| 54 | static int loglevel_write(struct file *file, const char __user *buffer, | 45 | static ssize_t jfs_loglevel_proc_write(struct file *file, |
| 55 | unsigned long count, void *data) | 46 | const char __user *buffer, size_t count, loff_t *ppos) |
| 56 | { | 47 | { |
| 57 | char c; | 48 | char c; |
| 58 | 49 | ||
| @@ -65,22 +56,30 @@ static int loglevel_write(struct file *file, const char __user *buffer, | |||
| 65 | jfsloglevel = c - '0'; | 56 | jfsloglevel = c - '0'; |
| 66 | return count; | 57 | return count; |
| 67 | } | 58 | } |
| 59 | |||
| 60 | static const struct file_operations jfs_loglevel_proc_fops = { | ||
| 61 | .owner = THIS_MODULE, | ||
| 62 | .open = jfs_loglevel_proc_open, | ||
| 63 | .read = seq_read, | ||
| 64 | .llseek = seq_lseek, | ||
| 65 | .release = single_release, | ||
| 66 | .write = jfs_loglevel_proc_write, | ||
| 67 | }; | ||
| 68 | #endif | 68 | #endif |
| 69 | 69 | ||
| 70 | static struct { | 70 | static struct { |
| 71 | const char *name; | 71 | const char *name; |
| 72 | read_proc_t *read_fn; | 72 | const struct file_operations *proc_fops; |
| 73 | write_proc_t *write_fn; | ||
| 74 | } Entries[] = { | 73 | } Entries[] = { |
| 75 | #ifdef CONFIG_JFS_STATISTICS | 74 | #ifdef CONFIG_JFS_STATISTICS |
| 76 | { "lmstats", jfs_lmstats_read, }, | 75 | { "lmstats", &jfs_lmstats_proc_fops, }, |
| 77 | { "txstats", jfs_txstats_read, }, | 76 | { "txstats", &jfs_txstats_proc_fops, }, |
| 78 | { "xtstat", jfs_xtstat_read, }, | 77 | { "xtstat", &jfs_xtstat_proc_fops, }, |
| 79 | { "mpstat", jfs_mpstat_read, }, | 78 | { "mpstat", &jfs_mpstat_proc_fops, }, |
| 80 | #endif | 79 | #endif |
| 81 | #ifdef CONFIG_JFS_DEBUG | 80 | #ifdef CONFIG_JFS_DEBUG |
| 82 | { "TxAnchor", jfs_txanchor_read, }, | 81 | { "TxAnchor", &jfs_txanchor_proc_fops, }, |
| 83 | { "loglevel", loglevel_read, loglevel_write } | 82 | { "loglevel", &jfs_loglevel_proc_fops } |
| 84 | #endif | 83 | #endif |
| 85 | }; | 84 | }; |
| 86 | #define NPROCENT ARRAY_SIZE(Entries) | 85 | #define NPROCENT ARRAY_SIZE(Entries) |
| @@ -93,13 +92,8 @@ void jfs_proc_init(void) | |||
| 93 | return; | 92 | return; |
| 94 | base->owner = THIS_MODULE; | 93 | base->owner = THIS_MODULE; |
| 95 | 94 | ||
| 96 | for (i = 0; i < NPROCENT; i++) { | 95 | for (i = 0; i < NPROCENT; i++) |
| 97 | struct proc_dir_entry *p; | 96 | proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); |
| 98 | if ((p = create_proc_entry(Entries[i].name, 0, base))) { | ||
| 99 | p->read_proc = Entries[i].read_fn; | ||
| 100 | p->write_proc = Entries[i].write_fn; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } | 97 | } |
| 104 | 98 | ||
| 105 | void jfs_proc_clean(void) | 99 | void jfs_proc_clean(void) |
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index 044c1e654cc0..eafd1300a00b 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h | |||
| @@ -62,7 +62,7 @@ extern void jfs_proc_clean(void); | |||
| 62 | 62 | ||
| 63 | extern int jfsloglevel; | 63 | extern int jfsloglevel; |
| 64 | 64 | ||
| 65 | extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); | 65 | extern const struct file_operations jfs_txanchor_proc_fops; |
| 66 | 66 | ||
| 67 | /* information message: e.g., configuration, major event */ | 67 | /* information message: e.g., configuration, major event */ |
| 68 | #define jfs_info(fmt, arg...) do { \ | 68 | #define jfs_info(fmt, arg...) do { \ |
| @@ -105,10 +105,10 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); | |||
| 105 | * ---------- | 105 | * ---------- |
| 106 | */ | 106 | */ |
| 107 | #ifdef CONFIG_JFS_STATISTICS | 107 | #ifdef CONFIG_JFS_STATISTICS |
| 108 | extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); | 108 | extern const struct file_operations jfs_lmstats_proc_fops; |
| 109 | extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); | 109 | extern const struct file_operations jfs_txstats_proc_fops; |
| 110 | extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); | 110 | extern const struct file_operations jfs_mpstat_proc_fops; |
| 111 | extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); | 111 | extern const struct file_operations jfs_xtstat_proc_fops; |
| 112 | 112 | ||
| 113 | #define INCREMENT(x) ((x)++) | 113 | #define INCREMENT(x) ((x)++) |
| 114 | #define DECREMENT(x) ((x)--) | 114 | #define DECREMENT(x) ((x)--) |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index cdac2d5bafeb..2545bb317235 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
| @@ -243,9 +243,6 @@ typedef union { | |||
| 243 | #define JFS_REMOVE 3 | 243 | #define JFS_REMOVE 3 |
| 244 | #define JFS_RENAME 4 | 244 | #define JFS_RENAME 4 |
| 245 | 245 | ||
| 246 | #define DIRENTSIZ(namlen) \ | ||
| 247 | ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 ) | ||
| 248 | |||
| 249 | /* | 246 | /* |
| 250 | * Maximum file offset for directories. | 247 | * Maximum file offset for directories. |
| 251 | */ | 248 | */ |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 734ec916beaf..d6363d8309d0 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
| @@ -1520,7 +1520,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
| 1520 | jfs_error(ip->i_sb, | 1520 | jfs_error(ip->i_sb, |
| 1521 | "diAlloc: can't find free bit " | 1521 | "diAlloc: can't find free bit " |
| 1522 | "in wmap"); | 1522 | "in wmap"); |
| 1523 | return EIO; | 1523 | return -EIO; |
| 1524 | } | 1524 | } |
| 1525 | 1525 | ||
| 1526 | /* determine the inode number within the | 1526 | /* determine the inode number within the |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 325a9679b95a..cd2ec2988b59 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
| @@ -69,6 +69,7 @@ | |||
| 69 | #include <linux/freezer.h> | 69 | #include <linux/freezer.h> |
| 70 | #include <linux/delay.h> | 70 | #include <linux/delay.h> |
| 71 | #include <linux/mutex.h> | 71 | #include <linux/mutex.h> |
| 72 | #include <linux/seq_file.h> | ||
| 72 | #include "jfs_incore.h" | 73 | #include "jfs_incore.h" |
| 73 | #include "jfs_filsys.h" | 74 | #include "jfs_filsys.h" |
| 74 | #include "jfs_metapage.h" | 75 | #include "jfs_metapage.h" |
| @@ -2503,13 +2504,9 @@ exit: | |||
| 2503 | } | 2504 | } |
| 2504 | 2505 | ||
| 2505 | #ifdef CONFIG_JFS_STATISTICS | 2506 | #ifdef CONFIG_JFS_STATISTICS |
| 2506 | int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, | 2507 | static int jfs_lmstats_proc_show(struct seq_file *m, void *v) |
| 2507 | int *eof, void *data) | ||
| 2508 | { | 2508 | { |
| 2509 | int len = 0; | 2509 | seq_printf(m, |
| 2510 | off_t begin; | ||
| 2511 | |||
| 2512 | len += sprintf(buffer, | ||
| 2513 | "JFS Logmgr stats\n" | 2510 | "JFS Logmgr stats\n" |
| 2514 | "================\n" | 2511 | "================\n" |
| 2515 | "commits = %d\n" | 2512 | "commits = %d\n" |
| @@ -2522,19 +2519,19 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, | |||
| 2522 | lmStat.pagedone, | 2519 | lmStat.pagedone, |
| 2523 | lmStat.full_page, | 2520 | lmStat.full_page, |
| 2524 | lmStat.partial_page); | 2521 | lmStat.partial_page); |
| 2522 | return 0; | ||
| 2523 | } | ||
| 2525 | 2524 | ||
| 2526 | begin = offset; | 2525 | static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) |
| 2527 | *start = buffer + begin; | 2526 | { |
| 2528 | len -= begin; | 2527 | return single_open(file, jfs_lmstats_proc_show, NULL); |
| 2529 | |||
| 2530 | if (len > length) | ||
| 2531 | len = length; | ||
| 2532 | else | ||
| 2533 | *eof = 1; | ||
| 2534 | |||
| 2535 | if (len < 0) | ||
| 2536 | len = 0; | ||
| 2537 | |||
| 2538 | return len; | ||
| 2539 | } | 2528 | } |
| 2529 | |||
| 2530 | const struct file_operations jfs_lmstats_proc_fops = { | ||
| 2531 | .owner = THIS_MODULE, | ||
| 2532 | .open = jfs_lmstats_proc_open, | ||
| 2533 | .read = seq_read, | ||
| 2534 | .llseek = seq_lseek, | ||
| 2535 | .release = single_release, | ||
| 2536 | }; | ||
| 2540 | #endif /* CONFIG_JFS_STATISTICS */ | 2537 | #endif /* CONFIG_JFS_STATISTICS */ |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index d1e64f2f2fcd..854ff0ec574f 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
| @@ -19,10 +19,12 @@ | |||
| 19 | 19 | ||
| 20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
| 21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
| 22 | #include <linux/module.h> | ||
| 22 | #include <linux/bio.h> | 23 | #include <linux/bio.h> |
| 23 | #include <linux/init.h> | 24 | #include <linux/init.h> |
| 24 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
| 25 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
| 27 | #include <linux/seq_file.h> | ||
| 26 | #include "jfs_incore.h" | 28 | #include "jfs_incore.h" |
| 27 | #include "jfs_superblock.h" | 29 | #include "jfs_superblock.h" |
| 28 | #include "jfs_filsys.h" | 30 | #include "jfs_filsys.h" |
| @@ -804,13 +806,9 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) | |||
| 804 | } | 806 | } |
| 805 | 807 | ||
| 806 | #ifdef CONFIG_JFS_STATISTICS | 808 | #ifdef CONFIG_JFS_STATISTICS |
| 807 | int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, | 809 | static int jfs_mpstat_proc_show(struct seq_file *m, void *v) |
| 808 | int *eof, void *data) | ||
| 809 | { | 810 | { |
| 810 | int len = 0; | 811 | seq_printf(m, |
| 811 | off_t begin; | ||
| 812 | |||
| 813 | len += sprintf(buffer, | ||
| 814 | "JFS Metapage statistics\n" | 812 | "JFS Metapage statistics\n" |
| 815 | "=======================\n" | 813 | "=======================\n" |
| 816 | "page allocations = %d\n" | 814 | "page allocations = %d\n" |
| @@ -819,19 +817,19 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, | |||
| 819 | mpStat.pagealloc, | 817 | mpStat.pagealloc, |
| 820 | mpStat.pagefree, | 818 | mpStat.pagefree, |
| 821 | mpStat.lockwait); | 819 | mpStat.lockwait); |
| 820 | return 0; | ||
| 821 | } | ||
| 822 | 822 | ||
| 823 | begin = offset; | 823 | static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) |
| 824 | *start = buffer + begin; | 824 | { |
| 825 | len -= begin; | 825 | return single_open(file, jfs_mpstat_proc_show, NULL); |
| 826 | |||
| 827 | if (len > length) | ||
| 828 | len = length; | ||
| 829 | else | ||
| 830 | *eof = 1; | ||
| 831 | |||
| 832 | if (len < 0) | ||
| 833 | len = 0; | ||
| 834 | |||
| 835 | return len; | ||
| 836 | } | 826 | } |
| 827 | |||
| 828 | const struct file_operations jfs_mpstat_proc_fops = { | ||
| 829 | .owner = THIS_MODULE, | ||
| 830 | .open = jfs_mpstat_proc_open, | ||
| 831 | .read = seq_read, | ||
| 832 | .llseek = seq_lseek, | ||
| 833 | .release = single_release, | ||
| 834 | }; | ||
| 837 | #endif | 835 | #endif |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index e7c60ae6b5b2..f26e4d03ada5 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
| 50 | #include <linux/moduleparam.h> | 50 | #include <linux/moduleparam.h> |
| 51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
| 52 | #include <linux/seq_file.h> | ||
| 52 | #include "jfs_incore.h" | 53 | #include "jfs_incore.h" |
| 53 | #include "jfs_inode.h" | 54 | #include "jfs_inode.h" |
| 54 | #include "jfs_filsys.h" | 55 | #include "jfs_filsys.h" |
| @@ -3009,11 +3010,8 @@ int jfs_sync(void *arg) | |||
| 3009 | } | 3010 | } |
| 3010 | 3011 | ||
| 3011 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) | 3012 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) |
| 3012 | int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | 3013 | static int jfs_txanchor_proc_show(struct seq_file *m, void *v) |
| 3013 | int *eof, void *data) | ||
| 3014 | { | 3014 | { |
| 3015 | int len = 0; | ||
| 3016 | off_t begin; | ||
| 3017 | char *freewait; | 3015 | char *freewait; |
| 3018 | char *freelockwait; | 3016 | char *freelockwait; |
| 3019 | char *lowlockwait; | 3017 | char *lowlockwait; |
| @@ -3025,7 +3023,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | |||
| 3025 | lowlockwait = | 3023 | lowlockwait = |
| 3026 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; | 3024 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; |
| 3027 | 3025 | ||
| 3028 | len += sprintf(buffer, | 3026 | seq_printf(m, |
| 3029 | "JFS TxAnchor\n" | 3027 | "JFS TxAnchor\n" |
| 3030 | "============\n" | 3028 | "============\n" |
| 3031 | "freetid = %d\n" | 3029 | "freetid = %d\n" |
| @@ -3044,31 +3042,27 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | |||
| 3044 | TxAnchor.tlocksInUse, | 3042 | TxAnchor.tlocksInUse, |
| 3045 | jfs_tlocks_low, | 3043 | jfs_tlocks_low, |
| 3046 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); | 3044 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); |
| 3045 | return 0; | ||
| 3046 | } | ||
| 3047 | 3047 | ||
| 3048 | begin = offset; | 3048 | static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) |
| 3049 | *start = buffer + begin; | 3049 | { |
| 3050 | len -= begin; | 3050 | return single_open(file, jfs_txanchor_proc_show, NULL); |
| 3051 | |||
| 3052 | if (len > length) | ||
| 3053 | len = length; | ||
| 3054 | else | ||
| 3055 | *eof = 1; | ||
| 3056 | |||
| 3057 | if (len < 0) | ||
| 3058 | len = 0; | ||
| 3059 | |||
| 3060 | return len; | ||
| 3061 | } | 3051 | } |
| 3052 | |||
| 3053 | const struct file_operations jfs_txanchor_proc_fops = { | ||
| 3054 | .owner = THIS_MODULE, | ||
| 3055 | .open = jfs_txanchor_proc_open, | ||
| 3056 | .read = seq_read, | ||
| 3057 | .llseek = seq_lseek, | ||
| 3058 | .release = single_release, | ||
| 3059 | }; | ||
| 3062 | #endif | 3060 | #endif |
| 3063 | 3061 | ||
| 3064 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) | 3062 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) |
| 3065 | int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, | 3063 | static int jfs_txstats_proc_show(struct seq_file *m, void *v) |
| 3066 | int *eof, void *data) | ||
| 3067 | { | 3064 | { |
| 3068 | int len = 0; | 3065 | seq_printf(m, |
| 3069 | off_t begin; | ||
| 3070 | |||
| 3071 | len += sprintf(buffer, | ||
| 3072 | "JFS TxStats\n" | 3066 | "JFS TxStats\n" |
| 3073 | "===========\n" | 3067 | "===========\n" |
| 3074 | "calls to txBegin = %d\n" | 3068 | "calls to txBegin = %d\n" |
| @@ -3089,19 +3083,19 @@ int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, | |||
| 3089 | TxStat.txBeginAnon_lockslow, | 3083 | TxStat.txBeginAnon_lockslow, |
| 3090 | TxStat.txLockAlloc, | 3084 | TxStat.txLockAlloc, |
| 3091 | TxStat.txLockAlloc_freelock); | 3085 | TxStat.txLockAlloc_freelock); |
| 3086 | return 0; | ||
| 3087 | } | ||
| 3092 | 3088 | ||
| 3093 | begin = offset; | 3089 | static int jfs_txstats_proc_open(struct inode *inode, struct file *file) |
| 3094 | *start = buffer + begin; | 3090 | { |
| 3095 | len -= begin; | 3091 | return single_open(file, jfs_txstats_proc_show, NULL); |
| 3096 | |||
| 3097 | if (len > length) | ||
| 3098 | len = length; | ||
| 3099 | else | ||
| 3100 | *eof = 1; | ||
| 3101 | |||
| 3102 | if (len < 0) | ||
| 3103 | len = 0; | ||
| 3104 | |||
| 3105 | return len; | ||
| 3106 | } | 3092 | } |
| 3093 | |||
| 3094 | const struct file_operations jfs_txstats_proc_fops = { | ||
| 3095 | .owner = THIS_MODULE, | ||
| 3096 | .open = jfs_txstats_proc_open, | ||
| 3097 | .read = seq_read, | ||
| 3098 | .llseek = seq_lseek, | ||
| 3099 | .release = single_release, | ||
| 3100 | }; | ||
| 3107 | #endif | 3101 | #endif |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 5a61ebf2cbcc..ae3acafb447b 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
| @@ -20,7 +20,9 @@ | |||
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| 23 | #include <linux/module.h> | ||
| 23 | #include <linux/quotaops.h> | 24 | #include <linux/quotaops.h> |
| 25 | #include <linux/seq_file.h> | ||
| 24 | #include "jfs_incore.h" | 26 | #include "jfs_incore.h" |
| 25 | #include "jfs_filsys.h" | 27 | #include "jfs_filsys.h" |
| 26 | #include "jfs_metapage.h" | 28 | #include "jfs_metapage.h" |
| @@ -4134,13 +4136,9 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 4134 | } | 4136 | } |
| 4135 | 4137 | ||
| 4136 | #ifdef CONFIG_JFS_STATISTICS | 4138 | #ifdef CONFIG_JFS_STATISTICS |
| 4137 | int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, | 4139 | static int jfs_xtstat_proc_show(struct seq_file *m, void *v) |
| 4138 | int *eof, void *data) | ||
| 4139 | { | 4140 | { |
| 4140 | int len = 0; | 4141 | seq_printf(m, |
| 4141 | off_t begin; | ||
| 4142 | |||
| 4143 | len += sprintf(buffer, | ||
| 4144 | "JFS Xtree statistics\n" | 4142 | "JFS Xtree statistics\n" |
| 4145 | "====================\n" | 4143 | "====================\n" |
| 4146 | "searches = %d\n" | 4144 | "searches = %d\n" |
| @@ -4149,19 +4147,19 @@ int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, | |||
| 4149 | xtStat.search, | 4147 | xtStat.search, |
| 4150 | xtStat.fastSearch, | 4148 | xtStat.fastSearch, |
| 4151 | xtStat.split); | 4149 | xtStat.split); |
| 4150 | return 0; | ||
| 4151 | } | ||
| 4152 | 4152 | ||
| 4153 | begin = offset; | 4153 | static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) |
| 4154 | *start = buffer + begin; | 4154 | { |
| 4155 | len -= begin; | 4155 | return single_open(file, jfs_xtstat_proc_show, NULL); |
| 4156 | |||
| 4157 | if (len > length) | ||
| 4158 | len = length; | ||
| 4159 | else | ||
| 4160 | *eof = 1; | ||
| 4161 | |||
| 4162 | if (len < 0) | ||
| 4163 | len = 0; | ||
| 4164 | |||
| 4165 | return len; | ||
| 4166 | } | 4156 | } |
| 4157 | |||
| 4158 | const struct file_operations jfs_xtstat_proc_fops = { | ||
| 4159 | .owner = THIS_MODULE, | ||
| 4160 | .open = jfs_xtstat_proc_open, | ||
| 4161 | .read = seq_read, | ||
| 4162 | .llseek = seq_lseek, | ||
| 4163 | .release = single_release, | ||
| 4164 | }; | ||
| 4167 | #endif | 4165 | #endif |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 0ba6778edaa2..2aba82386810 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -1455,7 +1455,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc | |||
| 1455 | free_UCSname(&key); | 1455 | free_UCSname(&key); |
| 1456 | if (rc == -ENOENT) { | 1456 | if (rc == -ENOENT) { |
| 1457 | d_add(dentry, NULL); | 1457 | d_add(dentry, NULL); |
| 1458 | return ERR_PTR(0); | 1458 | return NULL; |
| 1459 | } else if (rc) { | 1459 | } else if (rc) { |
| 1460 | jfs_err("jfs_lookup: dtSearch returned %d", rc); | 1460 | jfs_err("jfs_lookup: dtSearch returned %d", rc); |
| 1461 | return ERR_PTR(rc); | 1461 | return ERR_PTR(rc); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 50ea65451732..0288e6d7936a 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -499,7 +499,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 499 | inode = jfs_iget(sb, ROOT_I); | 499 | inode = jfs_iget(sb, ROOT_I); |
| 500 | if (IS_ERR(inode)) { | 500 | if (IS_ERR(inode)) { |
| 501 | ret = PTR_ERR(inode); | 501 | ret = PTR_ERR(inode); |
| 502 | goto out_no_root; | 502 | goto out_no_rw; |
| 503 | } | 503 | } |
| 504 | sb->s_root = d_alloc_root(inode); | 504 | sb->s_root = d_alloc_root(inode); |
| 505 | if (!sb->s_root) | 505 | if (!sb->s_root) |
| @@ -521,9 +521,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 521 | return 0; | 521 | return 0; |
| 522 | 522 | ||
| 523 | out_no_root: | 523 | out_no_root: |
| 524 | jfs_err("jfs_read_super: get root inode failed"); | 524 | jfs_err("jfs_read_super: get root dentry failed"); |
| 525 | if (inode) | 525 | iput(inode); |
| 526 | iput(inode); | ||
| 527 | 526 | ||
| 528 | out_no_rw: | 527 | out_no_rw: |
| 529 | rc = jfs_umount(sb); | 528 | rc = jfs_umount(sb); |
diff --git a/fs/mpage.c b/fs/mpage.c index 235e4d3873a8..dbcc7af76a15 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
| @@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err) | |||
| 82 | bio_put(bio); | 82 | bio_put(bio); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) | 85 | struct bio *mpage_bio_submit(int rw, struct bio *bio) |
| 86 | { | 86 | { |
| 87 | bio->bi_end_io = mpage_end_io_read; | 87 | bio->bi_end_io = mpage_end_io_read; |
| 88 | if (rw == WRITE) | 88 | if (rw == WRITE) |
| @@ -90,6 +90,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio) | |||
| 90 | submit_bio(rw, bio); | 90 | submit_bio(rw, bio); |
| 91 | return NULL; | 91 | return NULL; |
| 92 | } | 92 | } |
| 93 | EXPORT_SYMBOL(mpage_bio_submit); | ||
| 93 | 94 | ||
| 94 | static struct bio * | 95 | static struct bio * |
| 95 | mpage_alloc(struct block_device *bdev, | 96 | mpage_alloc(struct block_device *bdev, |
| @@ -435,15 +436,9 @@ EXPORT_SYMBOL(mpage_readpage); | |||
| 435 | * written, so it can intelligently allocate a suitably-sized BIO. For now, | 436 | * written, so it can intelligently allocate a suitably-sized BIO. For now, |
| 436 | * just allocate full-size (16-page) BIOs. | 437 | * just allocate full-size (16-page) BIOs. |
| 437 | */ | 438 | */ |
| 438 | struct mpage_data { | ||
| 439 | struct bio *bio; | ||
| 440 | sector_t last_block_in_bio; | ||
| 441 | get_block_t *get_block; | ||
| 442 | unsigned use_writepage; | ||
| 443 | }; | ||
| 444 | 439 | ||
| 445 | static int __mpage_writepage(struct page *page, struct writeback_control *wbc, | 440 | int __mpage_writepage(struct page *page, struct writeback_control *wbc, |
| 446 | void *data) | 441 | void *data) |
| 447 | { | 442 | { |
| 448 | struct mpage_data *mpd = data; | 443 | struct mpage_data *mpd = data; |
| 449 | struct bio *bio = mpd->bio; | 444 | struct bio *bio = mpd->bio; |
| @@ -651,6 +646,7 @@ out: | |||
| 651 | mpd->bio = bio; | 646 | mpd->bio = bio; |
| 652 | return ret; | 647 | return ret; |
| 653 | } | 648 | } |
| 649 | EXPORT_SYMBOL(__mpage_writepage); | ||
| 654 | 650 | ||
| 655 | /** | 651 | /** |
| 656 | * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them | 652 | * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them |
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 05ff4f1d7026..1f7f2956412a 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c | |||
| @@ -214,7 +214,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, | |||
| 214 | 214 | ||
| 215 | dentry->d_op = &msdos_dentry_operations; | 215 | dentry->d_op = &msdos_dentry_operations; |
| 216 | 216 | ||
| 217 | lock_kernel(); | 217 | lock_super(sb); |
| 218 | res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); | 218 | res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); |
| 219 | if (res == -ENOENT) | 219 | if (res == -ENOENT) |
| 220 | goto add; | 220 | goto add; |
| @@ -232,7 +232,7 @@ add: | |||
| 232 | if (dentry) | 232 | if (dentry) |
| 233 | dentry->d_op = &msdos_dentry_operations; | 233 | dentry->d_op = &msdos_dentry_operations; |
| 234 | out: | 234 | out: |
| 235 | unlock_kernel(); | 235 | unlock_super(sb); |
| 236 | if (!res) | 236 | if (!res) |
| 237 | return dentry; | 237 | return dentry; |
| 238 | return ERR_PTR(res); | 238 | return ERR_PTR(res); |
| @@ -286,7 +286,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 286 | unsigned char msdos_name[MSDOS_NAME]; | 286 | unsigned char msdos_name[MSDOS_NAME]; |
| 287 | int err, is_hid; | 287 | int err, is_hid; |
| 288 | 288 | ||
| 289 | lock_kernel(); | 289 | lock_super(sb); |
| 290 | 290 | ||
| 291 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, | 291 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, |
| 292 | msdos_name, &MSDOS_SB(sb)->options); | 292 | msdos_name, &MSDOS_SB(sb)->options); |
| @@ -315,7 +315,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 315 | 315 | ||
| 316 | d_instantiate(dentry, inode); | 316 | d_instantiate(dentry, inode); |
| 317 | out: | 317 | out: |
| 318 | unlock_kernel(); | 318 | unlock_super(sb); |
| 319 | if (!err) | 319 | if (!err) |
| 320 | err = fat_flush_inodes(sb, dir, inode); | 320 | err = fat_flush_inodes(sb, dir, inode); |
| 321 | return err; | 321 | return err; |
| @@ -324,11 +324,12 @@ out: | |||
| 324 | /***** Remove a directory */ | 324 | /***** Remove a directory */ |
| 325 | static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | 325 | static int msdos_rmdir(struct inode *dir, struct dentry *dentry) |
| 326 | { | 326 | { |
| 327 | struct super_block *sb = dir->i_sb; | ||
| 327 | struct inode *inode = dentry->d_inode; | 328 | struct inode *inode = dentry->d_inode; |
| 328 | struct fat_slot_info sinfo; | 329 | struct fat_slot_info sinfo; |
| 329 | int err; | 330 | int err; |
| 330 | 331 | ||
| 331 | lock_kernel(); | 332 | lock_super(sb); |
| 332 | /* | 333 | /* |
| 333 | * Check whether the directory is not in use, then check | 334 | * Check whether the directory is not in use, then check |
| 334 | * whether it is empty. | 335 | * whether it is empty. |
| @@ -349,9 +350,9 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 349 | inode->i_ctime = CURRENT_TIME_SEC; | 350 | inode->i_ctime = CURRENT_TIME_SEC; |
| 350 | fat_detach(inode); | 351 | fat_detach(inode); |
| 351 | out: | 352 | out: |
| 352 | unlock_kernel(); | 353 | unlock_super(sb); |
| 353 | if (!err) | 354 | if (!err) |
| 354 | err = fat_flush_inodes(inode->i_sb, dir, inode); | 355 | err = fat_flush_inodes(sb, dir, inode); |
| 355 | 356 | ||
| 356 | return err; | 357 | return err; |
| 357 | } | 358 | } |
| @@ -366,7 +367,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 366 | struct timespec ts; | 367 | struct timespec ts; |
| 367 | int err, is_hid, cluster; | 368 | int err, is_hid, cluster; |
| 368 | 369 | ||
| 369 | lock_kernel(); | 370 | lock_super(sb); |
| 370 | 371 | ||
| 371 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, | 372 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, |
| 372 | msdos_name, &MSDOS_SB(sb)->options); | 373 | msdos_name, &MSDOS_SB(sb)->options); |
| @@ -404,14 +405,14 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 404 | 405 | ||
| 405 | d_instantiate(dentry, inode); | 406 | d_instantiate(dentry, inode); |
| 406 | 407 | ||
| 407 | unlock_kernel(); | 408 | unlock_super(sb); |
| 408 | fat_flush_inodes(sb, dir, inode); | 409 | fat_flush_inodes(sb, dir, inode); |
| 409 | return 0; | 410 | return 0; |
| 410 | 411 | ||
| 411 | out_free: | 412 | out_free: |
| 412 | fat_free_clusters(dir, cluster); | 413 | fat_free_clusters(dir, cluster); |
| 413 | out: | 414 | out: |
| 414 | unlock_kernel(); | 415 | unlock_super(sb); |
| 415 | return err; | 416 | return err; |
| 416 | } | 417 | } |
| 417 | 418 | ||
| @@ -419,10 +420,11 @@ out: | |||
| 419 | static int msdos_unlink(struct inode *dir, struct dentry *dentry) | 420 | static int msdos_unlink(struct inode *dir, struct dentry *dentry) |
| 420 | { | 421 | { |
| 421 | struct inode *inode = dentry->d_inode; | 422 | struct inode *inode = dentry->d_inode; |
| 423 | struct super_block *sb= inode->i_sb; | ||
| 422 | struct fat_slot_info sinfo; | 424 | struct fat_slot_info sinfo; |
| 423 | int err; | 425 | int err; |
| 424 | 426 | ||
| 425 | lock_kernel(); | 427 | lock_super(sb); |
| 426 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); | 428 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); |
| 427 | if (err) | 429 | if (err) |
| 428 | goto out; | 430 | goto out; |
| @@ -434,9 +436,9 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) | |||
| 434 | inode->i_ctime = CURRENT_TIME_SEC; | 436 | inode->i_ctime = CURRENT_TIME_SEC; |
| 435 | fat_detach(inode); | 437 | fat_detach(inode); |
| 436 | out: | 438 | out: |
| 437 | unlock_kernel(); | 439 | unlock_super(sb); |
| 438 | if (!err) | 440 | if (!err) |
| 439 | err = fat_flush_inodes(inode->i_sb, dir, inode); | 441 | err = fat_flush_inodes(sb, dir, inode); |
| 440 | 442 | ||
| 441 | return err; | 443 | return err; |
| 442 | } | 444 | } |
| @@ -618,10 +620,11 @@ error_inode: | |||
| 618 | static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, | 620 | static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 619 | struct inode *new_dir, struct dentry *new_dentry) | 621 | struct inode *new_dir, struct dentry *new_dentry) |
| 620 | { | 622 | { |
| 623 | struct super_block *sb = old_dir->i_sb; | ||
| 621 | unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; | 624 | unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; |
| 622 | int err, is_hid; | 625 | int err, is_hid; |
| 623 | 626 | ||
| 624 | lock_kernel(); | 627 | lock_super(sb); |
| 625 | 628 | ||
| 626 | err = msdos_format_name(old_dentry->d_name.name, | 629 | err = msdos_format_name(old_dentry->d_name.name, |
| 627 | old_dentry->d_name.len, old_msdos_name, | 630 | old_dentry->d_name.len, old_msdos_name, |
| @@ -640,9 +643,9 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 640 | err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, | 643 | err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, |
| 641 | new_dir, new_msdos_name, new_dentry, is_hid); | 644 | new_dir, new_msdos_name, new_dentry, is_hid); |
| 642 | out: | 645 | out: |
| 643 | unlock_kernel(); | 646 | unlock_super(sb); |
| 644 | if (!err) | 647 | if (!err) |
| 645 | err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); | 648 | err = fat_flush_inodes(sb, old_dir, new_dir); |
| 646 | return err; | 649 | return err; |
| 647 | } | 650 | } |
| 648 | 651 | ||
diff --git a/fs/namespace.c b/fs/namespace.c index 4fc302c2a0e0..4f6f7635b59c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -750,7 +750,7 @@ struct proc_fs_info { | |||
| 750 | const char *str; | 750 | const char *str; |
| 751 | }; | 751 | }; |
| 752 | 752 | ||
| 753 | static void show_sb_opts(struct seq_file *m, struct super_block *sb) | 753 | static int show_sb_opts(struct seq_file *m, struct super_block *sb) |
| 754 | { | 754 | { |
| 755 | static const struct proc_fs_info fs_info[] = { | 755 | static const struct proc_fs_info fs_info[] = { |
| 756 | { MS_SYNCHRONOUS, ",sync" }, | 756 | { MS_SYNCHRONOUS, ",sync" }, |
| @@ -764,6 +764,8 @@ static void show_sb_opts(struct seq_file *m, struct super_block *sb) | |||
| 764 | if (sb->s_flags & fs_infop->flag) | 764 | if (sb->s_flags & fs_infop->flag) |
| 765 | seq_puts(m, fs_infop->str); | 765 | seq_puts(m, fs_infop->str); |
| 766 | } | 766 | } |
| 767 | |||
| 768 | return security_sb_show_options(m, sb); | ||
| 767 | } | 769 | } |
| 768 | 770 | ||
| 769 | static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) | 771 | static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) |
| @@ -806,11 +808,14 @@ static int show_vfsmnt(struct seq_file *m, void *v) | |||
| 806 | seq_putc(m, ' '); | 808 | seq_putc(m, ' '); |
| 807 | show_type(m, mnt->mnt_sb); | 809 | show_type(m, mnt->mnt_sb); |
| 808 | seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); | 810 | seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); |
| 809 | show_sb_opts(m, mnt->mnt_sb); | 811 | err = show_sb_opts(m, mnt->mnt_sb); |
| 812 | if (err) | ||
| 813 | goto out; | ||
| 810 | show_mnt_opts(m, mnt); | 814 | show_mnt_opts(m, mnt); |
| 811 | if (mnt->mnt_sb->s_op->show_options) | 815 | if (mnt->mnt_sb->s_op->show_options) |
| 812 | err = mnt->mnt_sb->s_op->show_options(m, mnt); | 816 | err = mnt->mnt_sb->s_op->show_options(m, mnt); |
| 813 | seq_puts(m, " 0 0\n"); | 817 | seq_puts(m, " 0 0\n"); |
| 818 | out: | ||
| 814 | return err; | 819 | return err; |
| 815 | } | 820 | } |
| 816 | 821 | ||
| @@ -865,10 +870,13 @@ static int show_mountinfo(struct seq_file *m, void *v) | |||
| 865 | seq_putc(m, ' '); | 870 | seq_putc(m, ' '); |
| 866 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); | 871 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); |
| 867 | seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); | 872 | seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); |
| 868 | show_sb_opts(m, sb); | 873 | err = show_sb_opts(m, sb); |
| 874 | if (err) | ||
| 875 | goto out; | ||
| 869 | if (sb->s_op->show_options) | 876 | if (sb->s_op->show_options) |
| 870 | err = sb->s_op->show_options(m, mnt); | 877 | err = sb->s_op->show_options(m, mnt); |
| 871 | seq_putc(m, '\n'); | 878 | seq_putc(m, '\n'); |
| 879 | out: | ||
| 872 | return err; | 880 | return err; |
| 873 | } | 881 | } |
| 874 | 882 | ||
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 2b145de45b39..6a7d901f1936 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
| 19 | #include <linux/vmalloc.h> | 19 | #include <linux/vmalloc.h> |
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/smp_lock.h> | ||
| 21 | 22 | ||
| 22 | #include <linux/ncp_fs.h> | 23 | #include <linux/ncp_fs.h> |
| 23 | #include "ncplib_kernel.h" | 24 | #include "ncplib_kernel.h" |
| @@ -281,9 +282,18 @@ static int ncp_release(struct inode *inode, struct file *file) { | |||
| 281 | return 0; | 282 | return 0; |
| 282 | } | 283 | } |
| 283 | 284 | ||
| 285 | static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin) | ||
| 286 | { | ||
| 287 | loff_t ret; | ||
| 288 | lock_kernel(); | ||
| 289 | ret = generic_file_llseek_unlocked(file, offset, origin); | ||
| 290 | unlock_kernel(); | ||
| 291 | return ret; | ||
| 292 | } | ||
| 293 | |||
| 284 | const struct file_operations ncp_file_operations = | 294 | const struct file_operations ncp_file_operations = |
| 285 | { | 295 | { |
| 286 | .llseek = remote_llseek, | 296 | .llseek = ncp_remote_llseek, |
| 287 | .read = ncp_file_read, | 297 | .read = ncp_file_read, |
| 288 | .write = ncp_file_write, | 298 | .write = ncp_file_write, |
| 289 | .ioctl = ncp_ioctl, | 299 | .ioctl = ncp_ioctl, |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 509dcb58959e..43164fe86069 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -180,6 +180,8 @@ force_reval: | |||
| 180 | 180 | ||
| 181 | static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | 181 | static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) |
| 182 | { | 182 | { |
| 183 | loff_t loff; | ||
| 184 | |||
| 183 | dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", | 185 | dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", |
| 184 | filp->f_path.dentry->d_parent->d_name.name, | 186 | filp->f_path.dentry->d_parent->d_name.name, |
| 185 | filp->f_path.dentry->d_name.name, | 187 | filp->f_path.dentry->d_name.name, |
| @@ -192,7 +194,10 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
| 192 | if (retval < 0) | 194 | if (retval < 0) |
| 193 | return (loff_t)retval; | 195 | return (loff_t)retval; |
| 194 | } | 196 | } |
| 195 | return remote_llseek(filp, offset, origin); | 197 | lock_kernel(); /* BKL needed? */ |
| 198 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
| 199 | unlock_kernel(); | ||
| 200 | return loff; | ||
| 196 | } | 201 | } |
| 197 | 202 | ||
| 198 | /* | 203 | /* |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index efc015c6128a..44f87caf3683 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -606,7 +606,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 606 | 606 | ||
| 607 | res->last_used = 0; | 607 | res->last_used = 0; |
| 608 | 608 | ||
| 609 | spin_lock(&dlm->spinlock); | ||
| 609 | list_add_tail(&res->tracking, &dlm->tracking_list); | 610 | list_add_tail(&res->tracking, &dlm->tracking_list); |
| 611 | spin_unlock(&dlm->spinlock); | ||
| 610 | 612 | ||
| 611 | memset(res->lvb, 0, DLM_LVB_LEN); | 613 | memset(res->lvb, 0, DLM_LVB_LEN); |
| 612 | memset(res->refmap, 0, sizeof(res->refmap)); | 614 | memset(res->refmap, 0, sizeof(res->refmap)); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 394d25a131a5..80e20d9f2780 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -1554,8 +1554,8 @@ out: | |||
| 1554 | */ | 1554 | */ |
| 1555 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | 1555 | int ocfs2_file_lock(struct file *file, int ex, int trylock) |
| 1556 | { | 1556 | { |
| 1557 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | 1557 | int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1558 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | 1558 | unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; |
| 1559 | unsigned long flags; | 1559 | unsigned long flags; |
| 1560 | struct ocfs2_file_private *fp = file->private_data; | 1560 | struct ocfs2_file_private *fp = file->private_data; |
| 1561 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1561 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
| @@ -1582,7 +1582,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1582 | * Get the lock at NLMODE to start - that way we | 1582 | * Get the lock at NLMODE to start - that way we |
| 1583 | * can cancel the upconvert request if need be. | 1583 | * can cancel the upconvert request if need be. |
| 1584 | */ | 1584 | */ |
| 1585 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | 1585 | ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); |
| 1586 | if (ret < 0) { | 1586 | if (ret < 0) { |
| 1587 | mlog_errno(ret); | 1587 | mlog_errno(ret); |
| 1588 | goto out; | 1588 | goto out; |
| @@ -1597,7 +1597,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1597 | } | 1597 | } |
| 1598 | 1598 | ||
| 1599 | lockres->l_action = OCFS2_AST_CONVERT; | 1599 | lockres->l_action = OCFS2_AST_CONVERT; |
| 1600 | lkm_flags |= LKM_CONVERT; | 1600 | lkm_flags |= DLM_LKF_CONVERT; |
| 1601 | lockres->l_requested = level; | 1601 | lockres->l_requested = level; |
| 1602 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1602 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 1603 | 1603 | ||
| @@ -1664,7 +1664,7 @@ void ocfs2_file_unlock(struct file *file) | |||
| 1664 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 1664 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
| 1665 | return; | 1665 | return; |
| 1666 | 1666 | ||
| 1667 | if (lockres->l_level == LKM_NLMODE) | 1667 | if (lockres->l_level == DLM_LOCK_NL) |
| 1668 | return; | 1668 | return; |
| 1669 | 1669 | ||
| 1670 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | 1670 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", |
| @@ -1678,11 +1678,11 @@ void ocfs2_file_unlock(struct file *file) | |||
| 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
| 1679 | lockres->l_blocking = DLM_LOCK_EX; | 1679 | lockres->l_blocking = DLM_LOCK_EX; |
| 1680 | 1680 | ||
| 1681 | gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | 1681 | gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); |
| 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
| 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1684 | 1684 | ||
| 1685 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); | 1685 | ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); |
| 1686 | if (ret) { | 1686 | if (ret) { |
| 1687 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
| 1688 | return; | 1688 | return; |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index c021280dd462..bd7e0f3acfc7 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
| 22 | #include <linux/miscdevice.h> | 22 | #include <linux/miscdevice.h> |
| 23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
| 24 | #include <linux/smp_lock.h> | ||
| 24 | #include <linux/reboot.h> | 25 | #include <linux/reboot.h> |
| 25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
| 26 | 27 | ||
| @@ -619,10 +620,12 @@ static int ocfs2_control_open(struct inode *inode, struct file *file) | |||
| 619 | return -ENOMEM; | 620 | return -ENOMEM; |
| 620 | p->op_this_node = -1; | 621 | p->op_this_node = -1; |
| 621 | 622 | ||
| 623 | lock_kernel(); | ||
| 622 | mutex_lock(&ocfs2_control_lock); | 624 | mutex_lock(&ocfs2_control_lock); |
| 623 | file->private_data = p; | 625 | file->private_data = p; |
| 624 | list_add(&p->op_list, &ocfs2_control_private_list); | 626 | list_add(&p->op_list, &ocfs2_control_private_list); |
| 625 | mutex_unlock(&ocfs2_control_lock); | 627 | mutex_unlock(&ocfs2_control_lock); |
| 628 | unlock_kernel(); | ||
| 626 | 629 | ||
| 627 | return 0; | 630 | return 0; |
| 628 | } | 631 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b455371e7ff..58c3e6a8e15e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -233,7 +233,7 @@ static int check_mem_permission(struct task_struct *task) | |||
| 233 | */ | 233 | */ |
| 234 | if (task->parent == current && (task->ptrace & PT_PTRACED) && | 234 | if (task->parent == current && (task->ptrace & PT_PTRACED) && |
| 235 | task_is_stopped_or_traced(task) && | 235 | task_is_stopped_or_traced(task) && |
| 236 | ptrace_may_attach(task)) | 236 | ptrace_may_access(task, PTRACE_MODE_ATTACH)) |
| 237 | return 0; | 237 | return 0; |
| 238 | 238 | ||
| 239 | /* | 239 | /* |
| @@ -251,7 +251,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task) | |||
| 251 | task_lock(task); | 251 | task_lock(task); |
| 252 | if (task->mm != mm) | 252 | if (task->mm != mm) |
| 253 | goto out; | 253 | goto out; |
| 254 | if (task->mm != current->mm && __ptrace_may_attach(task) < 0) | 254 | if (task->mm != current->mm && |
| 255 | __ptrace_may_access(task, PTRACE_MODE_READ) < 0) | ||
| 255 | goto out; | 256 | goto out; |
| 256 | task_unlock(task); | 257 | task_unlock(task); |
| 257 | return mm; | 258 | return mm; |
| @@ -518,7 +519,7 @@ static int proc_fd_access_allowed(struct inode *inode) | |||
| 518 | */ | 519 | */ |
| 519 | task = get_proc_task(inode); | 520 | task = get_proc_task(inode); |
| 520 | if (task) { | 521 | if (task) { |
| 521 | allowed = ptrace_may_attach(task); | 522 | allowed = ptrace_may_access(task, PTRACE_MODE_READ); |
| 522 | put_task_struct(task); | 523 | put_task_struct(task); |
| 523 | } | 524 | } |
| 524 | return allowed; | 525 | return allowed; |
| @@ -904,7 +905,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, | |||
| 904 | if (!task) | 905 | if (!task) |
| 905 | goto out_no_task; | 906 | goto out_no_task; |
| 906 | 907 | ||
| 907 | if (!ptrace_may_attach(task)) | 908 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 908 | goto out; | 909 | goto out; |
| 909 | 910 | ||
| 910 | ret = -ENOMEM; | 911 | ret = -ENOMEM; |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 7e277f2ad466..c652d469dc08 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
| @@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off, | |||
| 123 | return proc_calc_metrics(page, start, off, count, eof, len); | 123 | return proc_calc_metrics(page, start, off, count, eof, len); |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | int __attribute__((weak)) arch_report_meminfo(char *page) | ||
| 127 | { | ||
| 128 | return 0; | ||
| 129 | } | ||
| 130 | |||
| 126 | static int meminfo_read_proc(char *page, char **start, off_t off, | 131 | static int meminfo_read_proc(char *page, char **start, off_t off, |
| 127 | int count, int *eof, void *data) | 132 | int count, int *eof, void *data) |
| 128 | { | 133 | { |
| @@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 221 | 226 | ||
| 222 | len += hugetlb_report_meminfo(page + len); | 227 | len += hugetlb_report_meminfo(page + len); |
| 223 | 228 | ||
| 229 | len += arch_report_meminfo(page + len); | ||
| 230 | |||
| 224 | return proc_calc_metrics(page, start, off, count, eof, len); | 231 | return proc_calc_metrics(page, start, off, count, eof, len); |
| 225 | #undef K | 232 | #undef K |
| 226 | } | 233 | } |
| @@ -472,6 +479,13 @@ static const struct file_operations proc_vmalloc_operations = { | |||
| 472 | }; | 479 | }; |
| 473 | #endif | 480 | #endif |
| 474 | 481 | ||
| 482 | #ifndef arch_irq_stat_cpu | ||
| 483 | #define arch_irq_stat_cpu(cpu) 0 | ||
| 484 | #endif | ||
| 485 | #ifndef arch_irq_stat | ||
| 486 | #define arch_irq_stat() 0 | ||
| 487 | #endif | ||
| 488 | |||
| 475 | static int show_stat(struct seq_file *p, void *v) | 489 | static int show_stat(struct seq_file *p, void *v) |
| 476 | { | 490 | { |
| 477 | int i; | 491 | int i; |
| @@ -509,7 +523,9 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 509 | sum += temp; | 523 | sum += temp; |
| 510 | per_irq_sum[j] += temp; | 524 | per_irq_sum[j] += temp; |
| 511 | } | 525 | } |
| 526 | sum += arch_irq_stat_cpu(i); | ||
| 512 | } | 527 | } |
| 528 | sum += arch_irq_stat(); | ||
| 513 | 529 | ||
| 514 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | 530 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", |
| 515 | (unsigned long long)cputime64_to_clock_t(user), | 531 | (unsigned long long)cputime64_to_clock_t(user), |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c492449f3b45..164bd9f9ede3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -210,7 +210,7 @@ static int show_map(struct seq_file *m, void *v) | |||
| 210 | dev_t dev = 0; | 210 | dev_t dev = 0; |
| 211 | int len; | 211 | int len; |
| 212 | 212 | ||
| 213 | if (maps_protect && !ptrace_may_attach(task)) | 213 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) |
| 214 | return -EACCES; | 214 | return -EACCES; |
| 215 | 215 | ||
| 216 | if (file) { | 216 | if (file) { |
| @@ -646,7 +646,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
| 646 | goto out; | 646 | goto out; |
| 647 | 647 | ||
| 648 | ret = -EACCES; | 648 | ret = -EACCES; |
| 649 | if (!ptrace_may_attach(task)) | 649 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 650 | goto out_task; | 650 | goto out_task; |
| 651 | 651 | ||
| 652 | ret = -EINVAL; | 652 | ret = -EINVAL; |
| @@ -747,7 +747,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v) | |||
| 747 | struct proc_maps_private *priv = m->private; | 747 | struct proc_maps_private *priv = m->private; |
| 748 | struct task_struct *task = priv->task; | 748 | struct task_struct *task = priv->task; |
| 749 | 749 | ||
| 750 | if (maps_protect && !ptrace_may_attach(task)) | 750 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) |
| 751 | return -EACCES; | 751 | return -EACCES; |
| 752 | 752 | ||
| 753 | return show_numa_map(m, v); | 753 | return show_numa_map(m, v); |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 4b4f9cc2f186..5d84e7121df8 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
| @@ -113,7 +113,7 @@ static int show_map(struct seq_file *m, void *_vml) | |||
| 113 | struct proc_maps_private *priv = m->private; | 113 | struct proc_maps_private *priv = m->private; |
| 114 | struct task_struct *task = priv->task; | 114 | struct task_struct *task = priv->task; |
| 115 | 115 | ||
| 116 | if (maps_protect && !ptrace_may_attach(task)) | 116 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) |
| 117 | return -EACCES; | 117 | return -EACCES; |
| 118 | 118 | ||
| 119 | return nommu_vma_show(m, vml->vma); | 119 | return nommu_vma_show(m, vml->vma); |
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 9590b9024300..78f613cb9c76 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c | |||
| @@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = { | |||
| 45 | .mmap = generic_file_mmap, | 45 | .mmap = generic_file_mmap, |
| 46 | .fsync = simple_sync_file, | 46 | .fsync = simple_sync_file, |
| 47 | .splice_read = generic_file_splice_read, | 47 | .splice_read = generic_file_splice_read, |
| 48 | .splice_write = generic_file_splice_write, | ||
| 48 | .llseek = generic_file_llseek, | 49 | .llseek = generic_file_llseek, |
| 49 | }; | 50 | }; |
| 50 | 51 | ||
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 0989bc2c2f69..52312ec93ff4 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
| @@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = { | |||
| 43 | .aio_write = generic_file_aio_write, | 43 | .aio_write = generic_file_aio_write, |
| 44 | .fsync = simple_sync_file, | 44 | .fsync = simple_sync_file, |
| 45 | .splice_read = generic_file_splice_read, | 45 | .splice_read = generic_file_splice_read, |
| 46 | .splice_write = generic_file_splice_write, | ||
| 46 | .llseek = generic_file_llseek, | 47 | .llseek = generic_file_llseek, |
| 47 | }; | 48 | }; |
| 48 | 49 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index f0d1240a5c69..9ba495d5a29b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -31,12 +31,12 @@ const struct file_operations generic_ro_fops = { | |||
| 31 | 31 | ||
| 32 | EXPORT_SYMBOL(generic_ro_fops); | 32 | EXPORT_SYMBOL(generic_ro_fops); |
| 33 | 33 | ||
| 34 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | 34 | loff_t |
| 35 | generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | ||
| 35 | { | 36 | { |
| 36 | loff_t retval; | 37 | loff_t retval; |
| 37 | struct inode *inode = file->f_mapping->host; | 38 | struct inode *inode = file->f_mapping->host; |
| 38 | 39 | ||
| 39 | mutex_lock(&inode->i_mutex); | ||
| 40 | switch (origin) { | 40 | switch (origin) { |
| 41 | case SEEK_END: | 41 | case SEEK_END: |
| 42 | offset += inode->i_size; | 42 | offset += inode->i_size; |
| @@ -46,42 +46,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | |||
| 46 | } | 46 | } |
| 47 | retval = -EINVAL; | 47 | retval = -EINVAL; |
| 48 | if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { | 48 | if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { |
| 49 | /* Special lock needed here? */ | ||
| 49 | if (offset != file->f_pos) { | 50 | if (offset != file->f_pos) { |
| 50 | file->f_pos = offset; | 51 | file->f_pos = offset; |
| 51 | file->f_version = 0; | 52 | file->f_version = 0; |
| 52 | } | 53 | } |
| 53 | retval = offset; | 54 | retval = offset; |
| 54 | } | 55 | } |
| 55 | mutex_unlock(&inode->i_mutex); | ||
| 56 | return retval; | 56 | return retval; |
| 57 | } | 57 | } |
| 58 | EXPORT_SYMBOL(generic_file_llseek_unlocked); | ||
| 58 | 59 | ||
| 59 | EXPORT_SYMBOL(generic_file_llseek); | 60 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) |
| 60 | |||
| 61 | loff_t remote_llseek(struct file *file, loff_t offset, int origin) | ||
| 62 | { | 61 | { |
| 63 | loff_t retval; | 62 | loff_t n; |
| 64 | 63 | mutex_lock(&file->f_dentry->d_inode->i_mutex); | |
| 65 | lock_kernel(); | 64 | n = generic_file_llseek_unlocked(file, offset, origin); |
| 66 | switch (origin) { | 65 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); |
| 67 | case SEEK_END: | 66 | return n; |
| 68 | offset += i_size_read(file->f_path.dentry->d_inode); | ||
| 69 | break; | ||
| 70 | case SEEK_CUR: | ||
| 71 | offset += file->f_pos; | ||
| 72 | } | ||
| 73 | retval = -EINVAL; | ||
| 74 | if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) { | ||
| 75 | if (offset != file->f_pos) { | ||
| 76 | file->f_pos = offset; | ||
| 77 | file->f_version = 0; | ||
| 78 | } | ||
| 79 | retval = offset; | ||
| 80 | } | ||
| 81 | unlock_kernel(); | ||
| 82 | return retval; | ||
| 83 | } | 67 | } |
| 84 | EXPORT_SYMBOL(remote_llseek); | 68 | EXPORT_SYMBOL(generic_file_llseek); |
| 85 | 69 | ||
| 86 | loff_t no_llseek(struct file *file, loff_t offset, int origin) | 70 | loff_t no_llseek(struct file *file, loff_t offset, int origin) |
| 87 | { | 71 | { |
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index efbe29af3d7a..2294783320cb 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c | |||
| @@ -422,9 +422,18 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 422 | return error; | 422 | return error; |
| 423 | } | 423 | } |
| 424 | 424 | ||
| 425 | static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin) | ||
| 426 | { | ||
| 427 | loff_t ret; | ||
| 428 | lock_kernel(); | ||
| 429 | ret = generic_file_llseek_unlocked(file, offset, origin); | ||
| 430 | unlock_kernel(); | ||
| 431 | return ret; | ||
| 432 | } | ||
| 433 | |||
| 425 | const struct file_operations smb_file_operations = | 434 | const struct file_operations smb_file_operations = |
| 426 | { | 435 | { |
| 427 | .llseek = remote_llseek, | 436 | .llseek = smb_remote_llseek, |
| 428 | .read = do_sync_read, | 437 | .read = do_sync_read, |
| 429 | .aio_read = smb_file_aio_read, | 438 | .aio_read = smb_file_aio_read, |
| 430 | .write = do_sync_write, | 439 | .write = do_sync_write, |
diff --git a/fs/splice.c b/fs/splice.c index aa5f6f60b305..399442179d89 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
| 379 | lock_page(page); | 379 | lock_page(page); |
| 380 | 380 | ||
| 381 | /* | 381 | /* |
| 382 | * page was truncated, stop here. if this isn't the | 382 | * Page was truncated, or invalidated by the |
| 383 | * first page, we'll just complete what we already | 383 | * filesystem. Redo the find/create, but this time the |
| 384 | * added | 384 | * page is kept locked, so there's no chance of another |
| 385 | * race with truncate/invalidate. | ||
| 385 | */ | 386 | */ |
| 386 | if (!page->mapping) { | 387 | if (!page->mapping) { |
| 387 | unlock_page(page); | 388 | unlock_page(page); |
| 388 | break; | 389 | page = find_or_create_page(mapping, index, |
| 390 | mapping_gfp_mask(mapping)); | ||
| 391 | |||
| 392 | if (!page) { | ||
| 393 | error = -ENOMEM; | ||
| 394 | break; | ||
| 395 | } | ||
| 396 | page_cache_release(pages[page_nr]); | ||
| 397 | pages[page_nr] = page; | ||
| 389 | } | 398 | } |
| 390 | /* | 399 | /* |
| 391 | * page was already under io and is now done, great | 400 | * page was already under io and is now done, great |
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index a3522727ea5b..b546ba69be82 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c | |||
| @@ -645,7 +645,7 @@ static int vfat_add_entry(struct inode *dir, struct qstr *qname, int is_dir, | |||
| 645 | if (len == 0) | 645 | if (len == 0) |
| 646 | return -ENOENT; | 646 | return -ENOENT; |
| 647 | 647 | ||
| 648 | slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_KERNEL); | 648 | slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_NOFS); |
| 649 | if (slots == NULL) | 649 | if (slots == NULL) |
| 650 | return -ENOMEM; | 650 | return -ENOMEM; |
| 651 | 651 | ||
| @@ -687,7 +687,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 687 | struct dentry *alias; | 687 | struct dentry *alias; |
| 688 | int err, table; | 688 | int err, table; |
| 689 | 689 | ||
| 690 | lock_kernel(); | 690 | lock_super(sb); |
| 691 | table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; | 691 | table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; |
| 692 | dentry->d_op = &vfat_dentry_ops[table]; | 692 | dentry->d_op = &vfat_dentry_ops[table]; |
| 693 | 693 | ||
| @@ -699,7 +699,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 699 | inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); | 699 | inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); |
| 700 | brelse(sinfo.bh); | 700 | brelse(sinfo.bh); |
| 701 | if (IS_ERR(inode)) { | 701 | if (IS_ERR(inode)) { |
| 702 | unlock_kernel(); | 702 | unlock_super(sb); |
| 703 | return ERR_CAST(inode); | 703 | return ERR_CAST(inode); |
| 704 | } | 704 | } |
| 705 | alias = d_find_alias(inode); | 705 | alias = d_find_alias(inode); |
| @@ -708,13 +708,13 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 708 | dput(alias); | 708 | dput(alias); |
| 709 | else { | 709 | else { |
| 710 | iput(inode); | 710 | iput(inode); |
| 711 | unlock_kernel(); | 711 | unlock_super(sb); |
| 712 | return alias; | 712 | return alias; |
| 713 | } | 713 | } |
| 714 | 714 | ||
| 715 | } | 715 | } |
| 716 | error: | 716 | error: |
| 717 | unlock_kernel(); | 717 | unlock_super(sb); |
| 718 | dentry->d_op = &vfat_dentry_ops[table]; | 718 | dentry->d_op = &vfat_dentry_ops[table]; |
| 719 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 719 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
| 720 | dentry = d_splice_alias(inode, dentry); | 720 | dentry = d_splice_alias(inode, dentry); |
| @@ -734,7 +734,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 734 | struct timespec ts; | 734 | struct timespec ts; |
| 735 | int err; | 735 | int err; |
| 736 | 736 | ||
| 737 | lock_kernel(); | 737 | lock_super(sb); |
| 738 | 738 | ||
| 739 | ts = CURRENT_TIME_SEC; | 739 | ts = CURRENT_TIME_SEC; |
| 740 | err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); | 740 | err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); |
| @@ -755,17 +755,18 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 755 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 755 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
| 756 | d_instantiate(dentry, inode); | 756 | d_instantiate(dentry, inode); |
| 757 | out: | 757 | out: |
| 758 | unlock_kernel(); | 758 | unlock_super(sb); |
| 759 | return err; | 759 | return err; |
| 760 | } | 760 | } |
| 761 | 761 | ||
| 762 | static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | 762 | static int vfat_rmdir(struct inode *dir, struct dentry *dentry) |
| 763 | { | 763 | { |
| 764 | struct inode *inode = dentry->d_inode; | 764 | struct inode *inode = dentry->d_inode; |
| 765 | struct super_block *sb = dir->i_sb; | ||
| 765 | struct fat_slot_info sinfo; | 766 | struct fat_slot_info sinfo; |
| 766 | int err; | 767 | int err; |
| 767 | 768 | ||
| 768 | lock_kernel(); | 769 | lock_super(sb); |
| 769 | 770 | ||
| 770 | err = fat_dir_empty(inode); | 771 | err = fat_dir_empty(inode); |
| 771 | if (err) | 772 | if (err) |
| @@ -783,7 +784,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 783 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 784 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
| 784 | fat_detach(inode); | 785 | fat_detach(inode); |
| 785 | out: | 786 | out: |
| 786 | unlock_kernel(); | 787 | unlock_super(sb); |
| 787 | 788 | ||
| 788 | return err; | 789 | return err; |
| 789 | } | 790 | } |
| @@ -791,10 +792,11 @@ out: | |||
| 791 | static int vfat_unlink(struct inode *dir, struct dentry *dentry) | 792 | static int vfat_unlink(struct inode *dir, struct dentry *dentry) |
| 792 | { | 793 | { |
| 793 | struct inode *inode = dentry->d_inode; | 794 | struct inode *inode = dentry->d_inode; |
| 795 | struct super_block *sb = dir->i_sb; | ||
| 794 | struct fat_slot_info sinfo; | 796 | struct fat_slot_info sinfo; |
| 795 | int err; | 797 | int err; |
| 796 | 798 | ||
| 797 | lock_kernel(); | 799 | lock_super(sb); |
| 798 | 800 | ||
| 799 | err = vfat_find(dir, &dentry->d_name, &sinfo); | 801 | err = vfat_find(dir, &dentry->d_name, &sinfo); |
| 800 | if (err) | 802 | if (err) |
| @@ -807,7 +809,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) | |||
| 807 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 809 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
| 808 | fat_detach(inode); | 810 | fat_detach(inode); |
| 809 | out: | 811 | out: |
| 810 | unlock_kernel(); | 812 | unlock_super(sb); |
| 811 | 813 | ||
| 812 | return err; | 814 | return err; |
| 813 | } | 815 | } |
| @@ -820,7 +822,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 820 | struct timespec ts; | 822 | struct timespec ts; |
| 821 | int err, cluster; | 823 | int err, cluster; |
| 822 | 824 | ||
| 823 | lock_kernel(); | 825 | lock_super(sb); |
| 824 | 826 | ||
| 825 | ts = CURRENT_TIME_SEC; | 827 | ts = CURRENT_TIME_SEC; |
| 826 | cluster = fat_alloc_new_dir(dir, &ts); | 828 | cluster = fat_alloc_new_dir(dir, &ts); |
| @@ -849,13 +851,13 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 849 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 851 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
| 850 | d_instantiate(dentry, inode); | 852 | d_instantiate(dentry, inode); |
| 851 | 853 | ||
| 852 | unlock_kernel(); | 854 | unlock_super(sb); |
| 853 | return 0; | 855 | return 0; |
| 854 | 856 | ||
| 855 | out_free: | 857 | out_free: |
| 856 | fat_free_clusters(dir, cluster); | 858 | fat_free_clusters(dir, cluster); |
| 857 | out: | 859 | out: |
| 858 | unlock_kernel(); | 860 | unlock_super(sb); |
| 859 | return err; | 861 | return err; |
| 860 | } | 862 | } |
| 861 | 863 | ||
| @@ -869,11 +871,12 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 869 | struct timespec ts; | 871 | struct timespec ts; |
| 870 | loff_t dotdot_i_pos, new_i_pos; | 872 | loff_t dotdot_i_pos, new_i_pos; |
| 871 | int err, is_dir, update_dotdot, corrupt = 0; | 873 | int err, is_dir, update_dotdot, corrupt = 0; |
| 874 | struct super_block *sb = old_dir->i_sb; | ||
| 872 | 875 | ||
| 873 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; | 876 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; |
| 874 | old_inode = old_dentry->d_inode; | 877 | old_inode = old_dentry->d_inode; |
| 875 | new_inode = new_dentry->d_inode; | 878 | new_inode = new_dentry->d_inode; |
| 876 | lock_kernel(); | 879 | lock_super(sb); |
| 877 | err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); | 880 | err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); |
| 878 | if (err) | 881 | if (err) |
| 879 | goto out; | 882 | goto out; |
| @@ -951,7 +954,7 @@ out: | |||
| 951 | brelse(sinfo.bh); | 954 | brelse(sinfo.bh); |
| 952 | brelse(dotdot_bh); | 955 | brelse(dotdot_bh); |
| 953 | brelse(old_sinfo.bh); | 956 | brelse(old_sinfo.bh); |
| 954 | unlock_kernel(); | 957 | unlock_super(sb); |
| 955 | 958 | ||
| 956 | return err; | 959 | return err; |
| 957 | 960 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index afaee301b0ee..ad3d26ddfe31 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -2427,13 +2427,20 @@ restart: | |||
| 2427 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { | 2427 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { |
| 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); | 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
| 2429 | 2429 | ||
| 2430 | /* If I'm the only one writing to this iclog, sync it to disk */ | 2430 | /* |
| 2431 | if (atomic_read(&iclog->ic_refcnt) == 1) { | 2431 | * If I'm the only one writing to this iclog, sync it to disk. |
| 2432 | * We need to do an atomic compare and decrement here to avoid | ||
| 2433 | * racing with concurrent atomic_dec_and_lock() calls in | ||
| 2434 | * xlog_state_release_iclog() when there is more than one | ||
| 2435 | * reference to the iclog. | ||
| 2436 | */ | ||
| 2437 | if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) { | ||
| 2438 | /* we are the only one */ | ||
| 2432 | spin_unlock(&log->l_icloglock); | 2439 | spin_unlock(&log->l_icloglock); |
| 2433 | if ((error = xlog_state_release_iclog(log, iclog))) | 2440 | error = xlog_state_release_iclog(log, iclog); |
| 2441 | if (error) | ||
| 2434 | return error; | 2442 | return error; |
| 2435 | } else { | 2443 | } else { |
| 2436 | atomic_dec(&iclog->ic_refcnt); | ||
| 2437 | spin_unlock(&log->l_icloglock); | 2444 | spin_unlock(&log->l_icloglock); |
| 2438 | } | 2445 | } |
| 2439 | goto restart; | 2446 | goto restart; |
