aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/netdevices.c3
-rw-r--r--fs/ext2/inode.c44
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/mdb.c1
-rw-r--r--fs/jbd/revoke.c24
-rw-r--r--fs/nilfs2/bmap.c5
-rw-r--r--fs/nilfs2/nilfs.h5
-rw-r--r--fs/nilfs2/recovery.c20
-rw-r--r--fs/nilfs2/sufile.c290
-rw-r--r--fs/nilfs2/sufile.h79
-rw-r--r--fs/nilfs2/super.c7
-rw-r--r--fs/nilfs2/the_nilfs.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c38
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c78
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h9
-rw-r--r--fs/xfs/xfs_iget.c23
-rw-r--r--fs/xfs/xfs_iomap.c61
-rw-r--r--fs/xfs/xfs_iomap.h3
-rw-r--r--fs/xfs/xfs_log.c78
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c7
28 files changed, 449 insertions, 397 deletions
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
index 49f189423063..7ad36506c256 100644
--- a/fs/afs/netdevices.c
+++ b/fs/afs/netdevices.c
@@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen)
20 struct net_device *dev; 20 struct net_device *dev;
21 int ret = -ENODEV; 21 int ret = -ENODEV;
22 22
23 if (maclen != ETH_ALEN) 23 BUG_ON(maclen != ETH_ALEN);
24 BUG();
25 24
26 rtnl_lock(); 25 rtnl_lock();
27 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); 26 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b43b95563663..acf678831103 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode,
590 590
591 if (depth == 0) 591 if (depth == 0)
592 return (err); 592 return (err);
593reread:
594 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
595 593
594 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
596 /* Simplest case - block found, no allocation needed */ 595 /* Simplest case - block found, no allocation needed */
597 if (!partial) { 596 if (!partial) {
598 first_block = le32_to_cpu(chain[depth - 1].key); 597 first_block = le32_to_cpu(chain[depth - 1].key);
@@ -602,15 +601,16 @@ reread:
602 while (count < maxblocks && count <= blocks_to_boundary) { 601 while (count < maxblocks && count <= blocks_to_boundary) {
603 ext2_fsblk_t blk; 602 ext2_fsblk_t blk;
604 603
605 if (!verify_chain(chain, partial)) { 604 if (!verify_chain(chain, chain + depth - 1)) {
606 /* 605 /*
607 * Indirect block might be removed by 606 * Indirect block might be removed by
608 * truncate while we were reading it. 607 * truncate while we were reading it.
609 * Handling of that case: forget what we've 608 * Handling of that case: forget what we've
610 * got now, go to reread. 609 * got now, go to reread.
611 */ 610 */
611 err = -EAGAIN;
612 count = 0; 612 count = 0;
613 goto changed; 613 break;
614 } 614 }
615 blk = le32_to_cpu(*(chain[depth-1].p + count)); 615 blk = le32_to_cpu(*(chain[depth-1].p + count));
616 if (blk == first_block + count) 616 if (blk == first_block + count)
@@ -618,7 +618,8 @@ reread:
618 else 618 else
619 break; 619 break;
620 } 620 }
621 goto got_it; 621 if (err != -EAGAIN)
622 goto got_it;
622 } 623 }
623 624
624 /* Next simple case - plain lookup or failed read of indirect block */ 625 /* Next simple case - plain lookup or failed read of indirect block */
@@ -626,6 +627,33 @@ reread:
626 goto cleanup; 627 goto cleanup;
627 628
628 mutex_lock(&ei->truncate_mutex); 629 mutex_lock(&ei->truncate_mutex);
630 /*
631 * If the indirect block is missing while we are reading
632 * the chain(ext3_get_branch() returns -EAGAIN err), or
633 * if the chain has been changed after we grab the semaphore,
634 * (either because another process truncated this branch, or
635 * another get_block allocated this branch) re-grab the chain to see if
636 * the request block has been allocated or not.
637 *
638 * Since we already block the truncate/other get_block
639 * at this point, we will have the current copy of the chain when we
640 * splice the branch into the tree.
641 */
642 if (err == -EAGAIN || !verify_chain(chain, partial)) {
643 while (partial > chain) {
644 brelse(partial->bh);
645 partial--;
646 }
647 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
648 if (!partial) {
649 count++;
650 mutex_unlock(&ei->truncate_mutex);
651 if (err)
652 goto cleanup;
653 clear_buffer_new(bh_result);
654 goto got_it;
655 }
656 }
629 657
630 /* 658 /*
631 * Okay, we need to do block allocation. Lazily initialize the block 659 * Okay, we need to do block allocation. Lazily initialize the block
@@ -683,12 +711,6 @@ cleanup:
683 partial--; 711 partial--;
684 } 712 }
685 return err; 713 return err;
686changed:
687 while (partial > chain) {
688 brelse(partial->bh);
689 partial--;
690 }
691 goto reread;
692} 714}
693 715
694int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 716int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ac77d8b8251d..6132353dcf62 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -342,7 +342,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
342 ext4_fsblk_t block = idx_pblock(ext_idx); 342 ext4_fsblk_t block = idx_pblock(ext_idx);
343 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 343 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
344 if (unlikely(block < le32_to_cpu(es->s_first_data_block) || 344 if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
345 (block > ext4_blocks_count(es)))) 345 (block >= ext4_blocks_count(es))))
346 return 0; 346 return 0;
347 else 347 else
348 return 1; 348 return 1;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a2e7952bc5f9..c6bd6ced3bb7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,16 +372,16 @@ static int ext4_block_to_path(struct inode *inode,
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode, 374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 unsigned int *p, unsigned int max) { 375 __le32 *p, unsigned int max) {
376 376
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); 377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 unsigned int *bref = p; 378 __le32 *bref = p;
379 while (bref < p+max) { 379 while (bref < p+max) {
380 if (unlikely(*bref >= maxblocks)) { 380 if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
381 ext4_error(inode->i_sb, function, 381 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) " 382 "block reference %u >= max (%u) "
383 "in inode #%lu, offset=%d", 383 "in inode #%lu, offset=%d",
384 *bref, maxblocks, 384 le32_to_cpu(*bref), maxblocks,
385 inode->i_ino, (int)(bref-p)); 385 inode->i_ino, (int)(bref-p));
386 return -EIO; 386 return -EIO;
387 } 387 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9987bba99db3..2958f4e6f222 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2508,6 +2508,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2508 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2508 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2509 goto cantfind_ext4; 2509 goto cantfind_ext4;
2510 2510
2511 /* check blocks count against device size */
2512 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2513 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2514 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
2515 "exceeds size of device (%llu blocks)\n",
2516 ext4_blocks_count(es), blocks_count);
2517 goto failed_mount;
2518 }
2519
2511 /* 2520 /*
2512 * It makes no sense for the first data block to be beyond the end 2521 * It makes no sense for the first data block to be beyond the end
2513 * of the filesystem. 2522 * of the filesystem.
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9435dda8f1e0..a1cbff2b4d99 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
70 BUG(); 70 BUG();
71 return 0; 71 return 0;
72 } 72 }
73
74 if (!tree)
75 return 0;
76
73 if (tree->node_size >= PAGE_CACHE_SIZE) { 77 if (tree->node_size >= PAGE_CACHE_SIZE) {
74 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); 78 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
75 spin_lock(&tree->hash_lock); 79 spin_lock(&tree->hash_lock);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 36ca2e1a4fa3..7b6165f25fbe 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb)
349 if (HFS_SB(sb)->nls_disk) 349 if (HFS_SB(sb)->nls_disk)
350 unload_nls(HFS_SB(sb)->nls_disk); 350 unload_nls(HFS_SB(sb)->nls_disk);
351 351
352 free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
352 kfree(HFS_SB(sb)); 353 kfree(HFS_SB(sb));
353 sb->s_fs_info = NULL; 354 sb->s_fs_info = NULL;
354} 355}
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c7bd649bbbdc..3e9afc2a91d2 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -55,6 +55,25 @@
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 *
59 * Locking rules:
60 * We keep two hash tables of revoke records. One hashtable belongs to the
61 * running transaction (is pointed to by journal->j_revoke), the other one
62 * belongs to the committing transaction. Accesses to the second hash table
63 * happen only from the kjournald and no other thread touches this table. Also
64 * journal_switch_revoke_table() which switches which hashtable belongs to the
65 * running and which to the committing transaction is called only from
66 * kjournald. Therefore we need no locks when accessing the hashtable belonging
67 * to the committing transaction.
68 *
69 * All users operating on the hash table belonging to the running transaction
70 * have a handle to the transaction. Therefore they are safe from kjournald
71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used.
73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed.
58 */ 77 */
59 78
60#ifndef __KERNEL__ 79#ifndef __KERNEL__
@@ -402,8 +421,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
402 * the second time we would still have a pending revoke to cancel. So, 421 * the second time we would still have a pending revoke to cancel. So,
403 * do not trust the Revoked bit on buffers unless RevokeValid is also 422 * do not trust the Revoked bit on buffers unless RevokeValid is also
404 * set. 423 * set.
405 *
406 * The caller must have the journal locked.
407 */ 424 */
408int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) 425int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
409{ 426{
@@ -481,10 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
481/* 498/*
482 * Write revoke records to the journal for all entries in the current 499 * Write revoke records to the journal for all entries in the current
483 * revoke hash, deleting the entries as we go. 500 * revoke hash, deleting the entries as we go.
484 *
485 * Called with the journal lock held.
486 */ 501 */
487
488void journal_write_revoke_records(journal_t *journal, 502void journal_write_revoke_records(journal_t *journal,
489 transaction_t *transaction) 503 transaction_t *transaction)
490{ 504{
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 24638e059bf3..064279e33bbb 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
688 .bpop_translate = NULL, 688 .bpop_translate = NULL,
689}; 689};
690 690
691static struct lock_class_key nilfs_bmap_dat_lock_key;
692
691/** 693/**
692 * nilfs_bmap_read - read a bmap from an inode 694 * nilfs_bmap_read - read a bmap from an inode
693 * @bmap: bmap 695 * @bmap: bmap
@@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
715 bmap->b_pops = &nilfs_bmap_ptr_ops_p; 717 bmap->b_pops = &nilfs_bmap_ptr_ops_p;
716 bmap->b_last_allocated_key = 0; /* XXX: use macro */ 718 bmap->b_last_allocated_key = 0; /* XXX: use macro */
717 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; 719 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
720 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
718 break; 721 break;
719 case NILFS_CPFILE_INO: 722 case NILFS_CPFILE_INO:
720 case NILFS_SUFILE_INO: 723 case NILFS_SUFILE_INO:
@@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
772{ 775{
773 memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); 776 memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union));
774 init_rwsem(&gcbmap->b_sem); 777 init_rwsem(&gcbmap->b_sem);
778 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
775 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; 779 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
776} 780}
777 781
@@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
779{ 783{
780 memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); 784 memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union));
781 init_rwsem(&bmap->b_sem); 785 init_rwsem(&bmap->b_sem);
786 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
782 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; 787 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
783} 788}
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 7558c977db02..3d0c18a16db1 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -35,11 +35,6 @@
35#include "bmap_union.h" 35#include "bmap_union.h"
36 36
37/* 37/*
38 * NILFS filesystem version
39 */
40#define NILFS_VERSION "2.0.5"
41
42/*
43 * nilfs inode data in memory 38 * nilfs inode data in memory
44 */ 39 */
45struct nilfs_inode_info { 40struct nilfs_inode_info {
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6ade0963fc1d..4fc081e47d70 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
413 struct nilfs_segment_entry *ent, *n; 413 struct nilfs_segment_entry *ent, *n;
414 struct inode *sufile = nilfs->ns_sufile; 414 struct inode *sufile = nilfs->ns_sufile;
415 __u64 segnum[4]; 415 __u64 segnum[4];
416 time_t mtime;
417 int err; 416 int err;
418 int i; 417 int i;
419 418
@@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
442 * Collecting segments written after the latest super root. 441 * Collecting segments written after the latest super root.
443 * These are marked dirty to avoid being reallocated in the next write. 442 * These are marked dirty to avoid being reallocated in the next write.
444 */ 443 */
445 mtime = get_seconds();
446 list_for_each_entry_safe(ent, n, head, list) { 444 list_for_each_entry_safe(ent, n, head, list) {
447 if (ent->segnum == segnum[0]) { 445 if (ent->segnum != segnum[0]) {
448 list_del(&ent->list); 446 err = nilfs_sufile_scrap(sufile, ent->segnum);
449 nilfs_free_segment_entry(ent); 447 if (unlikely(err))
450 continue; 448 goto failed;
451 }
452 err = nilfs_open_segment_entry(ent, sufile);
453 if (unlikely(err))
454 goto failed;
455 if (!nilfs_segment_usage_dirty(ent->raw_su)) {
456 /* make the segment garbage */
457 ent->raw_su->su_nblocks = cpu_to_le32(0);
458 ent->raw_su->su_lastmod = cpu_to_le32(mtime);
459 nilfs_segment_usage_set_dirty(ent->raw_su);
460 } 449 }
461 list_del(&ent->list); 450 list_del(&ent->list);
462 nilfs_close_segment_entry(ent, sufile);
463 nilfs_free_segment_entry(ent); 451 nilfs_free_segment_entry(ent);
464 } 452 }
465 453
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index c774cf397e2f..98e68677f045 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
93 create, NULL, bhp); 93 create, NULL, bhp);
94} 94}
95 95
96static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
97 u64 ncleanadd, u64 ndirtyadd)
98{
99 struct nilfs_sufile_header *header;
100 void *kaddr;
101
102 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
103 header = kaddr + bh_offset(header_bh);
104 le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
105 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
106 kunmap_atomic(kaddr, KM_USER0);
107
108 nilfs_mdt_mark_buffer_dirty(header_bh);
109}
110
111int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
112 void (*dofunc)(struct inode *, __u64,
113 struct buffer_head *,
114 struct buffer_head *))
115{
116 struct buffer_head *header_bh, *bh;
117 int ret;
118
119 if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
120 printk(KERN_WARNING "%s: invalid segment number: %llu\n",
121 __func__, (unsigned long long)segnum);
122 return -EINVAL;
123 }
124 down_write(&NILFS_MDT(sufile)->mi_sem);
125
126 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
127 if (ret < 0)
128 goto out_sem;
129
130 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh);
131 if (!ret) {
132 dofunc(sufile, segnum, header_bh, bh);
133 brelse(bh);
134 }
135 brelse(header_bh);
136
137 out_sem:
138 up_write(&NILFS_MDT(sufile)->mi_sem);
139 return ret;
140}
141
96/** 142/**
97 * nilfs_sufile_alloc - allocate a segment 143 * nilfs_sufile_alloc - allocate a segment
98 * @sufile: inode of segment usage file 144 * @sufile: inode of segment usage file
@@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
113int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) 159int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
114{ 160{
115 struct buffer_head *header_bh, *su_bh; 161 struct buffer_head *header_bh, *su_bh;
116 struct the_nilfs *nilfs;
117 struct nilfs_sufile_header *header; 162 struct nilfs_sufile_header *header;
118 struct nilfs_segment_usage *su; 163 struct nilfs_segment_usage *su;
119 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 164 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
@@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
124 169
125 down_write(&NILFS_MDT(sufile)->mi_sem); 170 down_write(&NILFS_MDT(sufile)->mi_sem);
126 171
127 nilfs = NILFS_MDT(sufile)->mi_nilfs;
128
129 ret = nilfs_sufile_get_header_block(sufile, &header_bh); 172 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
130 if (ret < 0) 173 if (ret < 0)
131 goto out_sem; 174 goto out_sem;
@@ -192,165 +235,84 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
192 return ret; 235 return ret;
193} 236}
194 237
195/** 238void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
196 * nilfs_sufile_cancel_free - 239 struct buffer_head *header_bh,
197 * @sufile: inode of segment usage file 240 struct buffer_head *su_bh)
198 * @segnum: segment number
199 *
200 * Description:
201 *
202 * Return Value: On success, 0 is returned. On error, one of the following
203 * negative error codes is returned.
204 *
205 * %-EIO - I/O error.
206 *
207 * %-ENOMEM - Insufficient amount of memory available.
208 */
209int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
210{ 241{
211 struct buffer_head *header_bh, *su_bh;
212 struct the_nilfs *nilfs;
213 struct nilfs_sufile_header *header;
214 struct nilfs_segment_usage *su; 242 struct nilfs_segment_usage *su;
215 void *kaddr; 243 void *kaddr;
216 int ret;
217
218 down_write(&NILFS_MDT(sufile)->mi_sem);
219
220 nilfs = NILFS_MDT(sufile)->mi_nilfs;
221
222 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
223 if (ret < 0)
224 goto out_sem;
225
226 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
227 if (ret < 0)
228 goto out_header;
229 244
230 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 245 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
231 su = nilfs_sufile_block_get_segment_usage( 246 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
232 sufile, segnum, su_bh, kaddr);
233 if (unlikely(!nilfs_segment_usage_clean(su))) { 247 if (unlikely(!nilfs_segment_usage_clean(su))) {
234 printk(KERN_WARNING "%s: segment %llu must be clean\n", 248 printk(KERN_WARNING "%s: segment %llu must be clean\n",
235 __func__, (unsigned long long)segnum); 249 __func__, (unsigned long long)segnum);
236 kunmap_atomic(kaddr, KM_USER0); 250 kunmap_atomic(kaddr, KM_USER0);
237 goto out_su_bh; 251 return;
238 } 252 }
239 nilfs_segment_usage_set_dirty(su); 253 nilfs_segment_usage_set_dirty(su);
240 kunmap_atomic(kaddr, KM_USER0); 254 kunmap_atomic(kaddr, KM_USER0);
241 255
242 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 256 nilfs_sufile_mod_counter(header_bh, -1, 1);
243 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
244 le64_add_cpu(&header->sh_ncleansegs, -1);
245 le64_add_cpu(&header->sh_ndirtysegs, 1);
246 kunmap_atomic(kaddr, KM_USER0);
247
248 nilfs_mdt_mark_buffer_dirty(header_bh);
249 nilfs_mdt_mark_buffer_dirty(su_bh); 257 nilfs_mdt_mark_buffer_dirty(su_bh);
250 nilfs_mdt_mark_dirty(sufile); 258 nilfs_mdt_mark_dirty(sufile);
251
252 out_su_bh:
253 brelse(su_bh);
254 out_header:
255 brelse(header_bh);
256 out_sem:
257 up_write(&NILFS_MDT(sufile)->mi_sem);
258 return ret;
259} 259}
260 260
261/** 261void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
262 * nilfs_sufile_freev - free segments 262 struct buffer_head *header_bh,
263 * @sufile: inode of segment usage file 263 struct buffer_head *su_bh)
264 * @segnum: array of segment numbers
265 * @nsegs: number of segments
266 *
267 * Description: nilfs_sufile_freev() frees segments specified by @segnum and
268 * @nsegs, which must have been returned by a previous call to
269 * nilfs_sufile_alloc().
270 *
271 * Return Value: On success, 0 is returned. On error, one of the following
272 * negative error codes is returned.
273 *
274 * %-EIO - I/O error.
275 *
276 * %-ENOMEM - Insufficient amount of memory available.
277 */
278#define NILFS_SUFILE_FREEV_PREALLOC 16
279int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs)
280{ 264{
281 struct buffer_head *header_bh, **su_bh,
282 *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC];
283 struct the_nilfs *nilfs;
284 struct nilfs_sufile_header *header;
285 struct nilfs_segment_usage *su; 265 struct nilfs_segment_usage *su;
286 void *kaddr; 266 void *kaddr;
287 int ret, i; 267 int clean, dirty;
288 268
289 down_write(&NILFS_MDT(sufile)->mi_sem); 269 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
290 270 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
291 nilfs = NILFS_MDT(sufile)->mi_nilfs; 271 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
292 272 su->su_nblocks == cpu_to_le32(0)) {
293 /* prepare resources */
294 if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC)
295 su_bh = su_bh_prealloc;
296 else {
297 su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS);
298 if (su_bh == NULL) {
299 ret = -ENOMEM;
300 goto out_sem;
301 }
302 }
303
304 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
305 if (ret < 0)
306 goto out_su_bh;
307 for (i = 0; i < nsegs; i++) {
308 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i],
309 0, &su_bh[i]);
310 if (ret < 0)
311 goto out_bh;
312 }
313
314 /* free segments */
315 for (i = 0; i < nsegs; i++) {
316 kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0);
317 su = nilfs_sufile_block_get_segment_usage(
318 sufile, segnum[i], su_bh[i], kaddr);
319 WARN_ON(nilfs_segment_usage_error(su));
320 nilfs_segment_usage_set_clean(su);
321 kunmap_atomic(kaddr, KM_USER0); 273 kunmap_atomic(kaddr, KM_USER0);
322 nilfs_mdt_mark_buffer_dirty(su_bh[i]); 274 return;
323 } 275 }
324 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 276 clean = nilfs_segment_usage_clean(su);
325 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 277 dirty = nilfs_segment_usage_dirty(su);
326 le64_add_cpu(&header->sh_ncleansegs, nsegs); 278
327 le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); 279 /* make the segment garbage */
280 su->su_lastmod = cpu_to_le64(0);
281 su->su_nblocks = cpu_to_le32(0);
282 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
328 kunmap_atomic(kaddr, KM_USER0); 283 kunmap_atomic(kaddr, KM_USER0);
329 nilfs_mdt_mark_buffer_dirty(header_bh); 284
285 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
286 nilfs_mdt_mark_buffer_dirty(su_bh);
330 nilfs_mdt_mark_dirty(sufile); 287 nilfs_mdt_mark_dirty(sufile);
288}
331 289
332 out_bh: 290void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
333 for (i--; i >= 0; i--) 291 struct buffer_head *header_bh,
334 brelse(su_bh[i]); 292 struct buffer_head *su_bh)
335 brelse(header_bh); 293{
294 struct nilfs_segment_usage *su;
295 void *kaddr;
296 int sudirty;
336 297
337 out_su_bh: 298 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
338 if (su_bh != su_bh_prealloc) 299 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
339 kfree(su_bh); 300 if (nilfs_segment_usage_clean(su)) {
301 printk(KERN_WARNING "%s: segment %llu is already clean\n",
302 __func__, (unsigned long long)segnum);
303 kunmap_atomic(kaddr, KM_USER0);
304 return;
305 }
306 WARN_ON(nilfs_segment_usage_error(su));
307 WARN_ON(!nilfs_segment_usage_dirty(su));
340 308
341 out_sem: 309 sudirty = nilfs_segment_usage_dirty(su);
342 up_write(&NILFS_MDT(sufile)->mi_sem); 310 nilfs_segment_usage_set_clean(su);
343 return ret; 311 kunmap_atomic(kaddr, KM_USER0);
344} 312 nilfs_mdt_mark_buffer_dirty(su_bh);
345 313
346/** 314 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
347 * nilfs_sufile_free - 315 nilfs_mdt_mark_dirty(sufile);
348 * @sufile:
349 * @segnum:
350 */
351int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
352{
353 return nilfs_sufile_freev(sufile, &segnum, 1);
354} 316}
355 317
356/** 318/**
@@ -500,72 +462,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
500 return ret; 462 return ret;
501} 463}
502 464
503/** 465void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
504 * nilfs_sufile_set_error - mark a segment as erroneous 466 struct buffer_head *header_bh,
505 * @sufile: inode of segment usage file 467 struct buffer_head *su_bh)
506 * @segnum: segment number
507 *
508 * Description: nilfs_sufile_set_error() marks the segment specified by
509 * @segnum as erroneous. The error segment will never be used again.
510 *
511 * Return Value: On success, 0 is returned. On error, one of the following
512 * negative error codes is returned.
513 *
514 * %-EIO - I/O error.
515 *
516 * %-ENOMEM - Insufficient amount of memory available.
517 *
518 * %-EINVAL - Invalid segment usage number.
519 */
520int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
521{ 468{
522 struct buffer_head *header_bh, *su_bh;
523 struct nilfs_segment_usage *su; 469 struct nilfs_segment_usage *su;
524 struct nilfs_sufile_header *header;
525 void *kaddr; 470 void *kaddr;
526 int ret; 471 int suclean;
527
528 if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
529 printk(KERN_WARNING "%s: invalid segment number: %llu\n",
530 __func__, (unsigned long long)segnum);
531 return -EINVAL;
532 }
533 down_write(&NILFS_MDT(sufile)->mi_sem);
534
535 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
536 if (ret < 0)
537 goto out_sem;
538 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
539 if (ret < 0)
540 goto out_header;
541 472
542 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 473 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
543 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 474 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
544 if (nilfs_segment_usage_error(su)) { 475 if (nilfs_segment_usage_error(su)) {
545 kunmap_atomic(kaddr, KM_USER0); 476 kunmap_atomic(kaddr, KM_USER0);
546 brelse(su_bh); 477 return;
547 goto out_header;
548 } 478 }
549 479 suclean = nilfs_segment_usage_clean(su);
550 nilfs_segment_usage_set_error(su); 480 nilfs_segment_usage_set_error(su);
551 kunmap_atomic(kaddr, KM_USER0); 481 kunmap_atomic(kaddr, KM_USER0);
552 brelse(su_bh);
553 482
554 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 483 if (suclean)
555 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 484 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 le64_add_cpu(&header->sh_ndirtysegs, -1);
557 kunmap_atomic(kaddr, KM_USER0);
558 nilfs_mdt_mark_buffer_dirty(header_bh);
559 nilfs_mdt_mark_buffer_dirty(su_bh); 485 nilfs_mdt_mark_buffer_dirty(su_bh);
560 nilfs_mdt_mark_dirty(sufile); 486 nilfs_mdt_mark_dirty(sufile);
561 brelse(su_bh);
562
563 out_header:
564 brelse(header_bh);
565
566 out_sem:
567 up_write(&NILFS_MDT(sufile)->mi_sem);
568 return ret;
569} 487}
570 488
571/** 489/**
@@ -625,7 +543,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
625 si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); 543 si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks);
626 si[i + j].sui_flags = le32_to_cpu(su->su_flags) & 544 si[i + j].sui_flags = le32_to_cpu(su->su_flags) &
627 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); 545 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
628 if (nilfs_segment_is_active(nilfs, segnum + i + j)) 546 if (nilfs_segment_is_active(nilfs, segnum + j))
629 si[i + j].sui_flags |= 547 si[i + j].sui_flags |=
630 (1UL << NILFS_SEGMENT_USAGE_ACTIVE); 548 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
631 } 549 }
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index d595f33a768d..a2e2efd4ade1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
36} 36}
37 37
38int nilfs_sufile_alloc(struct inode *, __u64 *); 38int nilfs_sufile_alloc(struct inode *, __u64 *);
39int nilfs_sufile_cancel_free(struct inode *, __u64);
40int nilfs_sufile_freev(struct inode *, __u64 *, size_t);
41int nilfs_sufile_free(struct inode *, __u64);
42int nilfs_sufile_get_segment_usage(struct inode *, __u64, 39int nilfs_sufile_get_segment_usage(struct inode *, __u64,
43 struct nilfs_segment_usage **, 40 struct nilfs_segment_usage **,
44 struct buffer_head **); 41 struct buffer_head **);
@@ -46,9 +43,83 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64,
46 struct buffer_head *); 43 struct buffer_head *);
47int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); 44int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
48int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); 45int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
49int nilfs_sufile_set_error(struct inode *, __u64);
50ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, 46ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *,
51 size_t); 47 size_t);
52 48
49int nilfs_sufile_update(struct inode *, __u64, int,
50 void (*dofunc)(struct inode *, __u64,
51 struct buffer_head *,
52 struct buffer_head *));
53void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
54 struct buffer_head *);
55void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
56 struct buffer_head *);
57void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
58 struct buffer_head *);
59void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
60 struct buffer_head *);
61
62/**
63 * nilfs_sufile_cancel_free -
64 * @sufile: inode of segment usage file
65 * @segnum: segment number
66 *
67 * Description:
68 *
69 * Return Value: On success, 0 is returned. On error, one of the following
70 * negative error codes is returned.
71 *
72 * %-EIO - I/O error.
73 *
74 * %-ENOMEM - Insufficient amount of memory available.
75 */
76static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
77{
78 return nilfs_sufile_update(sufile, segnum, 0,
79 nilfs_sufile_do_cancel_free);
80}
81
82/**
83 * nilfs_sufile_scrap - make a segment garbage
84 * @sufile: inode of segment usage file
85 * @segnum: segment number to be freed
86 */
87static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
88{
89 return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap);
90}
91
92/**
93 * nilfs_sufile_free - free segment
94 * @sufile: inode of segment usage file
95 * @segnum: segment number to be freed
96 */
97static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
98{
99 return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free);
100}
101
102/**
103 * nilfs_sufile_set_error - mark a segment as erroneous
104 * @sufile: inode of segment usage file
105 * @segnum: segment number
106 *
107 * Description: nilfs_sufile_set_error() marks the segment specified by
108 * @segnum as erroneous. The error segment will never be used again.
109 *
110 * Return Value: On success, 0 is returned. On error, one of the following
111 * negative error codes is returned.
112 *
113 * %-EIO - I/O error.
114 *
115 * %-ENOMEM - Insufficient amount of memory available.
116 *
117 * %-EINVAL - Invalid segment usage number.
118 */
119static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
120{
121 return nilfs_sufile_update(sufile, segnum, 0,
122 nilfs_sufile_do_set_error);
123}
53 124
54#endif /* _NILFS_SUFILE_H */ 125#endif /* _NILFS_SUFILE_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index e117e1ea9bff..6989b03e97ab 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -63,7 +63,6 @@
63MODULE_AUTHOR("NTT Corp."); 63MODULE_AUTHOR("NTT Corp.");
64MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " 64MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
65 "(NILFS)"); 65 "(NILFS)");
66MODULE_VERSION(NILFS_VERSION);
67MODULE_LICENSE("GPL"); 66MODULE_LICENSE("GPL");
68 67
69static int nilfs_remount(struct super_block *sb, int *flags, char *data); 68static int nilfs_remount(struct super_block *sb, int *flags, char *data);
@@ -476,11 +475,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
476{ 475{
477 struct super_block *sb = dentry->d_sb; 476 struct super_block *sb = dentry->d_sb;
478 struct nilfs_sb_info *sbi = NILFS_SB(sb); 477 struct nilfs_sb_info *sbi = NILFS_SB(sb);
478 struct the_nilfs *nilfs = sbi->s_nilfs;
479 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
479 unsigned long long blocks; 480 unsigned long long blocks;
480 unsigned long overhead; 481 unsigned long overhead;
481 unsigned long nrsvblocks; 482 unsigned long nrsvblocks;
482 sector_t nfreeblocks; 483 sector_t nfreeblocks;
483 struct the_nilfs *nilfs = sbi->s_nilfs;
484 int err; 484 int err;
485 485
486 /* 486 /*
@@ -514,6 +514,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
514 buf->f_files = atomic_read(&sbi->s_inodes_count); 514 buf->f_files = atomic_read(&sbi->s_inodes_count);
515 buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ 515 buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
516 buf->f_namelen = NILFS_NAME_LEN; 516 buf->f_namelen = NILFS_NAME_LEN;
517 buf->f_fsid.val[0] = (u32)id;
518 buf->f_fsid.val[1] = (u32)(id >> 32);
519
517 return 0; 520 return 0;
518} 521}
519 522
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 33400cf0bbe2..7f65b3be4aa9 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs)
115static int nilfs_load_super_root(struct the_nilfs *nilfs, 115static int nilfs_load_super_root(struct the_nilfs *nilfs,
116 struct nilfs_sb_info *sbi, sector_t sr_block) 116 struct nilfs_sb_info *sbi, sector_t sr_block)
117{ 117{
118 static struct lock_class_key dat_lock_key;
118 struct buffer_head *bh_sr; 119 struct buffer_head *bh_sr;
119 struct nilfs_super_root *raw_sr; 120 struct nilfs_super_root *raw_sr;
120 struct nilfs_super_block **sbp = nilfs->ns_sbp; 121 struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
163 if (unlikely(err)) 164 if (unlikely(err))
164 goto failed_sufile; 165 goto failed_sufile;
165 166
167 lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
168 lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
169
166 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); 170 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
167 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, 171 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
168 sizeof(struct nilfs_cpfile_header)); 172 sizeof(struct nilfs_cpfile_header));
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c13f67300fe7..7ec89fc05b2b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -153,23 +153,6 @@ xfs_find_bdev_for_inode(
153} 153}
154 154
155/* 155/*
156 * Schedule IO completion handling on a xfsdatad if this was
157 * the final hold on this ioend. If we are asked to wait,
158 * flush the workqueue.
159 */
160STATIC void
161xfs_finish_ioend(
162 xfs_ioend_t *ioend,
163 int wait)
164{
165 if (atomic_dec_and_test(&ioend->io_remaining)) {
166 queue_work(xfsdatad_workqueue, &ioend->io_work);
167 if (wait)
168 flush_workqueue(xfsdatad_workqueue);
169 }
170}
171
172/*
173 * We're now finished for good with this ioend structure. 156 * We're now finished for good with this ioend structure.
174 * Update the page state via the associated buffer_heads, 157 * Update the page state via the associated buffer_heads,
175 * release holds on the inode and bio, and finally free 158 * release holds on the inode and bio, and finally free
@@ -310,6 +293,27 @@ xfs_end_bio_read(
310} 293}
311 294
312/* 295/*
296 * Schedule IO completion handling on a xfsdatad if this was
297 * the final hold on this ioend. If we are asked to wait,
298 * flush the workqueue.
299 */
300STATIC void
301xfs_finish_ioend(
302 xfs_ioend_t *ioend,
303 int wait)
304{
305 if (atomic_dec_and_test(&ioend->io_remaining)) {
306 struct workqueue_struct *wq = xfsdatad_workqueue;
307 if (ioend->io_work.func == xfs_end_bio_unwritten)
308 wq = xfsconvertd_workqueue;
309
310 queue_work(wq, &ioend->io_work);
311 if (wait)
312 flush_workqueue(wq);
313 }
314}
315
316/*
313 * Allocate and initialise an IO completion structure. 317 * Allocate and initialise an IO completion structure.
314 * We need to track unwritten extent write completion here initially. 318 * We need to track unwritten extent write completion here initially.
315 * We'll need to extend this for updating the ondisk inode size later 319 * We'll need to extend this for updating the ondisk inode size later
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 1dd528849755..221b3e66ceef 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,6 +19,7 @@
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern struct workqueue_struct *xfsdatad_workqueue; 21extern struct workqueue_struct *xfsdatad_workqueue;
22extern struct workqueue_struct *xfsconvertd_workqueue;
22extern mempool_t *xfs_ioend_pool; 23extern mempool_t *xfs_ioend_pool;
23 24
24/* 25/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1016bb9134..e28800a9f2b5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
51 51
52static struct workqueue_struct *xfslogd_workqueue; 52static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 53struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue;
54 55
55#ifdef XFS_BUF_TRACE 56#ifdef XFS_BUF_TRACE
56void 57void
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
1775 xfs_buf_t *bp, *n; 1776 xfs_buf_t *bp, *n;
1776 int pincount = 0; 1777 int pincount = 0;
1777 1778
1779 xfs_buf_runall_queues(xfsconvertd_workqueue);
1778 xfs_buf_runall_queues(xfsdatad_workqueue); 1780 xfs_buf_runall_queues(xfsdatad_workqueue);
1779 xfs_buf_runall_queues(xfslogd_workqueue); 1781 xfs_buf_runall_queues(xfslogd_workqueue);
1780 1782
@@ -1831,9 +1833,15 @@ xfs_buf_init(void)
1831 if (!xfsdatad_workqueue) 1833 if (!xfsdatad_workqueue)
1832 goto out_destroy_xfslogd_workqueue; 1834 goto out_destroy_xfslogd_workqueue;
1833 1835
1836 xfsconvertd_workqueue = create_workqueue("xfsconvertd");
1837 if (!xfsconvertd_workqueue)
1838 goto out_destroy_xfsdatad_workqueue;
1839
1834 register_shrinker(&xfs_buf_shake); 1840 register_shrinker(&xfs_buf_shake);
1835 return 0; 1841 return 0;
1836 1842
1843 out_destroy_xfsdatad_workqueue:
1844 destroy_workqueue(xfsdatad_workqueue);
1837 out_destroy_xfslogd_workqueue: 1845 out_destroy_xfslogd_workqueue:
1838 destroy_workqueue(xfslogd_workqueue); 1846 destroy_workqueue(xfslogd_workqueue);
1839 out_free_buf_zone: 1847 out_free_buf_zone:
@@ -1849,6 +1857,7 @@ void
1849xfs_buf_terminate(void) 1857xfs_buf_terminate(void)
1850{ 1858{
1851 unregister_shrinker(&xfs_buf_shake); 1859 unregister_shrinker(&xfs_buf_shake);
1860 destroy_workqueue(xfsconvertd_workqueue);
1852 destroy_workqueue(xfsdatad_workqueue); 1861 destroy_workqueue(xfsdatad_workqueue);
1853 destroy_workqueue(xfslogd_workqueue); 1862 destroy_workqueue(xfslogd_workqueue);
1854 kmem_zone_destroy(xfs_buf_zone); 1863 kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 5aeb77776961..08be36d7326c 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -74,14 +74,14 @@ xfs_flush_pages(
74 74
75 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 75 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
76 xfs_iflags_clear(ip, XFS_ITRUNCATED); 76 xfs_iflags_clear(ip, XFS_ITRUNCATED);
77 ret = filemap_fdatawrite(mapping); 77 ret = -filemap_fdatawrite(mapping);
78 if (flags & XFS_B_ASYNC)
79 return -ret;
80 ret2 = filemap_fdatawait(mapping);
81 if (!ret)
82 ret = ret2;
83 } 78 }
84 return -ret; 79 if (flags & XFS_B_ASYNC)
80 return ret;
81 ret2 = xfs_wait_on_pages(ip, first, last);
82 if (!ret)
83 ret = ret2;
84 return ret;
85} 85}
86 86
87int 87int
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7e90daa0d1d1..9142192ccbe6 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -751,10 +751,26 @@ start:
751 goto relock; 751 goto relock;
752 } 752 }
753 } else { 753 } else {
754 int enospc = 0;
755 ssize_t ret2 = 0;
756
757write_retry:
754 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, 758 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
755 *offset, ioflags); 759 *offset, ioflags);
756 ret = generic_file_buffered_write(iocb, iovp, segs, 760 ret2 = generic_file_buffered_write(iocb, iovp, segs,
757 pos, offset, count, ret); 761 pos, offset, count, ret);
762 /*
763 * if we just got an ENOSPC, flush the inode now we
764 * aren't holding any page locks and retry *once*
765 */
766 if (ret2 == -ENOSPC && !enospc) {
767 error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
768 if (error)
769 goto out_unlock_internal;
770 enospc = 1;
771 goto write_retry;
772 }
773 ret = ret2;
758 } 774 }
759 775
760 current->backing_dev_info = NULL; 776 current->backing_dev_info = NULL;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a608e72fa405..f7ba76633c29 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -62,12 +62,6 @@ xfs_sync_inodes_ag(
62 uint32_t first_index = 0; 62 uint32_t first_index = 0;
63 int error = 0; 63 int error = 0;
64 int last_error = 0; 64 int last_error = 0;
65 int fflag = XFS_B_ASYNC;
66
67 if (flags & SYNC_DELWRI)
68 fflag = XFS_B_DELWRI;
69 if (flags & SYNC_WAIT)
70 fflag = 0; /* synchronous overrides all */
71 65
72 do { 66 do {
73 struct inode *inode; 67 struct inode *inode;
@@ -128,11 +122,23 @@ xfs_sync_inodes_ag(
128 * If we have to flush data or wait for I/O completion 122 * If we have to flush data or wait for I/O completion
129 * we need to hold the iolock. 123 * we need to hold the iolock.
130 */ 124 */
131 if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { 125 if (flags & SYNC_DELWRI) {
132 xfs_ilock(ip, XFS_IOLOCK_SHARED); 126 if (VN_DIRTY(inode)) {
133 lock_flags |= XFS_IOLOCK_SHARED; 127 if (flags & SYNC_TRYLOCK) {
134 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); 128 if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
135 if (flags & SYNC_IOWAIT) 129 lock_flags |= XFS_IOLOCK_SHARED;
130 } else {
131 xfs_ilock(ip, XFS_IOLOCK_SHARED);
132 lock_flags |= XFS_IOLOCK_SHARED;
133 }
134 if (lock_flags & XFS_IOLOCK_SHARED) {
135 error = xfs_flush_pages(ip, 0, -1,
136 (flags & SYNC_WAIT) ? 0
137 : XFS_B_ASYNC,
138 FI_NONE);
139 }
140 }
141 if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
136 xfs_ioend_wait(ip); 142 xfs_ioend_wait(ip);
137 } 143 }
138 xfs_ilock(ip, XFS_ILOCK_SHARED); 144 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -398,15 +404,17 @@ STATIC void
398xfs_syncd_queue_work( 404xfs_syncd_queue_work(
399 struct xfs_mount *mp, 405 struct xfs_mount *mp,
400 void *data, 406 void *data,
401 void (*syncer)(struct xfs_mount *, void *)) 407 void (*syncer)(struct xfs_mount *, void *),
408 struct completion *completion)
402{ 409{
403 struct bhv_vfs_sync_work *work; 410 struct xfs_sync_work *work;
404 411
405 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); 412 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
406 INIT_LIST_HEAD(&work->w_list); 413 INIT_LIST_HEAD(&work->w_list);
407 work->w_syncer = syncer; 414 work->w_syncer = syncer;
408 work->w_data = data; 415 work->w_data = data;
409 work->w_mount = mp; 416 work->w_mount = mp;
417 work->w_completion = completion;
410 spin_lock(&mp->m_sync_lock); 418 spin_lock(&mp->m_sync_lock);
411 list_add_tail(&work->w_list, &mp->m_sync_list); 419 list_add_tail(&work->w_list, &mp->m_sync_list);
412 spin_unlock(&mp->m_sync_lock); 420 spin_unlock(&mp->m_sync_lock);
@@ -420,49 +428,26 @@ xfs_syncd_queue_work(
420 * heads, looking about for more room... 428 * heads, looking about for more room...
421 */ 429 */
422STATIC void 430STATIC void
423xfs_flush_inode_work( 431xfs_flush_inodes_work(
424 struct xfs_mount *mp,
425 void *arg)
426{
427 struct inode *inode = arg;
428 filemap_flush(inode->i_mapping);
429 iput(inode);
430}
431
432void
433xfs_flush_inode(
434 xfs_inode_t *ip)
435{
436 struct inode *inode = VFS_I(ip);
437
438 igrab(inode);
439 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
440 delay(msecs_to_jiffies(500));
441}
442
443/*
444 * This is the "bigger hammer" version of xfs_flush_inode_work...
445 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
446 */
447STATIC void
448xfs_flush_device_work(
449 struct xfs_mount *mp, 432 struct xfs_mount *mp,
450 void *arg) 433 void *arg)
451{ 434{
452 struct inode *inode = arg; 435 struct inode *inode = arg;
453 sync_blockdev(mp->m_super->s_bdev); 436 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
437 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
454 iput(inode); 438 iput(inode);
455} 439}
456 440
457void 441void
458xfs_flush_device( 442xfs_flush_inodes(
459 xfs_inode_t *ip) 443 xfs_inode_t *ip)
460{ 444{
461 struct inode *inode = VFS_I(ip); 445 struct inode *inode = VFS_I(ip);
446 DECLARE_COMPLETION_ONSTACK(completion);
462 447
463 igrab(inode); 448 igrab(inode);
464 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); 449 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
465 delay(msecs_to_jiffies(500)); 450 wait_for_completion(&completion);
466 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 451 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
467} 452}
468 453
@@ -497,7 +482,7 @@ xfssyncd(
497{ 482{
498 struct xfs_mount *mp = arg; 483 struct xfs_mount *mp = arg;
499 long timeleft; 484 long timeleft;
500 bhv_vfs_sync_work_t *work, *n; 485 xfs_sync_work_t *work, *n;
501 LIST_HEAD (tmp); 486 LIST_HEAD (tmp);
502 487
503 set_freezable(); 488 set_freezable();
@@ -532,6 +517,8 @@ xfssyncd(
532 list_del(&work->w_list); 517 list_del(&work->w_list);
533 if (work == &mp->m_sync_work) 518 if (work == &mp->m_sync_work)
534 continue; 519 continue;
520 if (work->w_completion)
521 complete(work->w_completion);
535 kmem_free(work); 522 kmem_free(work);
536 } 523 }
537 } 524 }
@@ -545,6 +532,7 @@ xfs_syncd_init(
545{ 532{
546 mp->m_sync_work.w_syncer = xfs_sync_worker; 533 mp->m_sync_work.w_syncer = xfs_sync_worker;
547 mp->m_sync_work.w_mount = mp; 534 mp->m_sync_work.w_mount = mp;
535 mp->m_sync_work.w_completion = NULL;
548 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); 536 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
549 if (IS_ERR(mp->m_sync_task)) 537 if (IS_ERR(mp->m_sync_task))
550 return -PTR_ERR(mp->m_sync_task); 538 return -PTR_ERR(mp->m_sync_task);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 04f058c848ae..308d5bf6dfbd 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,18 +21,20 @@
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_perag; 22struct xfs_perag;
23 23
24typedef struct bhv_vfs_sync_work { 24typedef struct xfs_sync_work {
25 struct list_head w_list; 25 struct list_head w_list;
26 struct xfs_mount *w_mount; 26 struct xfs_mount *w_mount;
27 void *w_data; /* syncer routine argument */ 27 void *w_data; /* syncer routine argument */
28 void (*w_syncer)(struct xfs_mount *, void *); 28 void (*w_syncer)(struct xfs_mount *, void *);
29} bhv_vfs_sync_work_t; 29 struct completion *w_completion;
30} xfs_sync_work_t;
30 31
31#define SYNC_ATTR 0x0001 /* sync attributes */ 32#define SYNC_ATTR 0x0001 /* sync attributes */
32#define SYNC_DELWRI 0x0002 /* look at delayed writes */ 33#define SYNC_DELWRI 0x0002 /* look at delayed writes */
33#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ 34#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
34#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ 35#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
35#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ 36#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
37#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
36 38
37int xfs_syncd_init(struct xfs_mount *mp); 39int xfs_syncd_init(struct xfs_mount *mp);
38void xfs_syncd_stop(struct xfs_mount *mp); 40void xfs_syncd_stop(struct xfs_mount *mp);
@@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
43int xfs_quiesce_data(struct xfs_mount *mp); 45int xfs_quiesce_data(struct xfs_mount *mp);
44void xfs_quiesce_attr(struct xfs_mount *mp); 46void xfs_quiesce_attr(struct xfs_mount *mp);
45 47
46void xfs_flush_inode(struct xfs_inode *ip); 48void xfs_flush_inodes(struct xfs_inode *ip);
47void xfs_flush_device(struct xfs_inode *ip);
48 49
49int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); 50int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
50int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); 51int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 478e587087fe..89b81eedce6a 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -69,15 +69,6 @@ xfs_inode_alloc(
69 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 69 ASSERT(!spin_is_locked(&ip->i_flags_lock));
70 ASSERT(completion_done(&ip->i_flush)); 70 ASSERT(completion_done(&ip->i_flush));
71 71
72 /*
73 * initialise the VFS inode here to get failures
74 * out of the way early.
75 */
76 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
77 kmem_zone_free(xfs_inode_zone, ip);
78 return NULL;
79 }
80
81 /* initialise the xfs inode */ 72 /* initialise the xfs inode */
82 ip->i_ino = ino; 73 ip->i_ino = ino;
83 ip->i_mount = mp; 74 ip->i_mount = mp;
@@ -113,6 +104,20 @@ xfs_inode_alloc(
113#ifdef XFS_DIR2_TRACE 104#ifdef XFS_DIR2_TRACE
114 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 105 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
115#endif 106#endif
107 /*
108 * Now initialise the VFS inode. We do this after the xfs_inode
109 * initialisation as internal failures will result in ->destroy_inode
110 * being called and that will pass down through the reclaim path and
111 * free the XFS inode. This path requires the XFS inode to already be
112 * initialised. Hence if this call fails, the xfs_inode has already
113 * been freed and we should not reference it at all in the error
114 * handling.
115 */
116 if (!inode_init_always(mp->m_super, VFS_I(ip)))
117 return NULL;
118
119 /* prevent anyone from using this yet */
120 VFS_I(ip)->i_state = I_NEW|I_LOCK;
116 121
117 return ip; 122 return ip;
118} 123}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 08ce72316bfe..5aaa2d7ec155 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb(
338} 338}
339 339
340STATIC int 340STATIC int
341xfs_flush_space(
342 xfs_inode_t *ip,
343 int *fsynced,
344 int *ioflags)
345{
346 switch (*fsynced) {
347 case 0:
348 if (ip->i_delayed_blks) {
349 xfs_iunlock(ip, XFS_ILOCK_EXCL);
350 xfs_flush_inode(ip);
351 xfs_ilock(ip, XFS_ILOCK_EXCL);
352 *fsynced = 1;
353 } else {
354 *ioflags |= BMAPI_SYNC;
355 *fsynced = 2;
356 }
357 return 0;
358 case 1:
359 *fsynced = 2;
360 *ioflags |= BMAPI_SYNC;
361 return 0;
362 case 2:
363 xfs_iunlock(ip, XFS_ILOCK_EXCL);
364 xfs_flush_device(ip);
365 xfs_ilock(ip, XFS_ILOCK_EXCL);
366 *fsynced = 3;
367 return 0;
368 }
369 return 1;
370}
371
372STATIC int
373xfs_cmn_err_fsblock_zero( 341xfs_cmn_err_fsblock_zero(
374 xfs_inode_t *ip, 342 xfs_inode_t *ip,
375 xfs_bmbt_irec_t *imap) 343 xfs_bmbt_irec_t *imap)
@@ -538,15 +506,9 @@ error_out:
538} 506}
539 507
540/* 508/*
541 * If the caller is doing a write at the end of the file, 509 * If the caller is doing a write at the end of the file, then extend the
542 * then extend the allocation out to the file system's write 510 * allocation out to the file system's write iosize. We clean up any extra
543 * iosize. We clean up any extra space left over when the 511 * space left over when the file is closed in xfs_inactive().
544 * file is closed in xfs_inactive().
545 *
546 * For sync writes, we are flushing delayed allocate space to
547 * try to make additional space available for allocation near
548 * the filesystem full boundary - preallocation hurts in that
549 * situation, of course.
550 */ 512 */
551STATIC int 513STATIC int
552xfs_iomap_eof_want_preallocate( 514xfs_iomap_eof_want_preallocate(
@@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate(
565 int n, error, imaps; 527 int n, error, imaps;
566 528
567 *prealloc = 0; 529 *prealloc = 0;
568 if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) 530 if ((offset + count) <= ip->i_size)
569 return 0; 531 return 0;
570 532
571 /* 533 /*
@@ -611,7 +573,7 @@ xfs_iomap_write_delay(
611 xfs_extlen_t extsz; 573 xfs_extlen_t extsz;
612 int nimaps; 574 int nimaps;
613 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 575 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
614 int prealloc, fsynced = 0; 576 int prealloc, flushed = 0;
615 int error; 577 int error;
616 578
617 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 579 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -627,12 +589,12 @@ xfs_iomap_write_delay(
627 extsz = xfs_get_extsz_hint(ip); 589 extsz = xfs_get_extsz_hint(ip);
628 offset_fsb = XFS_B_TO_FSBT(mp, offset); 590 offset_fsb = XFS_B_TO_FSBT(mp, offset);
629 591
630retry:
631 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 592 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
632 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 593 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
633 if (error) 594 if (error)
634 return error; 595 return error;
635 596
597retry:
636 if (prealloc) { 598 if (prealloc) {
637 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 599 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
638 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 600 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -659,15 +621,22 @@ retry:
659 621
660 /* 622 /*
661 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 623 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
662 * then we must have run out of space - flush delalloc, and retry.. 624 * then we must have run out of space - flush all other inodes with
625 * delalloc blocks and retry without EOF preallocation.
663 */ 626 */
664 if (nimaps == 0) { 627 if (nimaps == 0) {
665 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 628 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
666 ip, offset, count); 629 ip, offset, count);
667 if (xfs_flush_space(ip, &fsynced, &ioflag)) 630 if (flushed)
668 return XFS_ERROR(ENOSPC); 631 return XFS_ERROR(ENOSPC);
669 632
633 xfs_iunlock(ip, XFS_ILOCK_EXCL);
634 xfs_flush_inodes(ip);
635 xfs_ilock(ip, XFS_ILOCK_EXCL);
636
637 flushed = 1;
670 error = 0; 638 error = 0;
639 prealloc = 0;
671 goto retry; 640 goto retry;
672 } 641 }
673 642
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index a1cc1322fc0f..fdcf7b82747f 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -40,8 +40,7 @@ typedef enum {
40 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ 40 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
41 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ 41 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */
42 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ 42 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */
43 BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ 43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
44 BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */
45} bmapi_flags_t; 44} bmapi_flags_t;
46 45
47 46
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f76c6d7cea21..3750f04ede0b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -562,9 +562,8 @@ xfs_log_mount(
562 } 562 }
563 563
564 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 564 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
565 if (!mp->m_log) { 565 if (IS_ERR(mp->m_log)) {
566 cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); 566 error = -PTR_ERR(mp->m_log);
567 error = ENOMEM;
568 goto out; 567 goto out;
569 } 568 }
570 569
@@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp,
1180 xfs_buf_t *bp; 1179 xfs_buf_t *bp;
1181 int i; 1180 int i;
1182 int iclogsize; 1181 int iclogsize;
1182 int error = ENOMEM;
1183 1183
1184 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1184 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1185 if (!log) 1185 if (!log) {
1186 return NULL; 1186 xlog_warn("XFS: Log allocation failed: No memory!");
1187 goto out;
1188 }
1187 1189
1188 log->l_mp = mp; 1190 log->l_mp = mp;
1189 log->l_targ = log_target; 1191 log->l_targ = log_target;
@@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp,
1201 log->l_grant_reserve_cycle = 1; 1203 log->l_grant_reserve_cycle = 1;
1202 log->l_grant_write_cycle = 1; 1204 log->l_grant_write_cycle = 1;
1203 1205
1206 error = EFSCORRUPTED;
1204 if (xfs_sb_version_hassector(&mp->m_sb)) { 1207 if (xfs_sb_version_hassector(&mp->m_sb)) {
1205 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1208 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1206 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); 1209 if (log->l_sectbb_log < 0 ||
1210 log->l_sectbb_log > mp->m_sectbb_log) {
1211 xlog_warn("XFS: Log sector size (0x%x) out of range.",
1212 log->l_sectbb_log);
1213 goto out_free_log;
1214 }
1215
1207 /* for larger sector sizes, must have v2 or external log */ 1216 /* for larger sector sizes, must have v2 or external log */
1208 ASSERT(log->l_sectbb_log == 0 || 1217 if (log->l_sectbb_log != 0 &&
1209 log->l_logBBstart == 0 || 1218 (log->l_logBBstart != 0 &&
1210 xfs_sb_version_haslogv2(&mp->m_sb)); 1219 !xfs_sb_version_haslogv2(&mp->m_sb))) {
1211 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); 1220 xlog_warn("XFS: log sector size (0x%x) invalid "
1221 "for configuration.", log->l_sectbb_log);
1222 goto out_free_log;
1223 }
1224 if (mp->m_sb.sb_logsectlog < BBSHIFT) {
1225 xlog_warn("XFS: Log sector log (0x%x) too small.",
1226 mp->m_sb.sb_logsectlog);
1227 goto out_free_log;
1228 }
1212 } 1229 }
1213 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1230 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
1214 1231
1215 xlog_get_iclog_buffer_size(mp, log); 1232 xlog_get_iclog_buffer_size(mp, log);
1216 1233
1234 error = ENOMEM;
1217 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); 1235 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1218 if (!bp) 1236 if (!bp)
1219 goto out_free_log; 1237 goto out_free_log;
@@ -1313,7 +1331,8 @@ out_free_iclog:
1313 xfs_buf_free(log->l_xbuf); 1331 xfs_buf_free(log->l_xbuf);
1314out_free_log: 1332out_free_log:
1315 kmem_free(log); 1333 kmem_free(log);
1316 return NULL; 1334out:
1335 return ERR_PTR(-error);
1317} /* xlog_alloc_log */ 1336} /* xlog_alloc_log */
1318 1337
1319 1338
@@ -2541,18 +2560,19 @@ redo:
2541 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2560 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2542 xlog_trace_loggrant(log, tic, 2561 xlog_trace_loggrant(log, tic,
2543 "xlog_grant_log_space: sleep 2"); 2562 "xlog_grant_log_space: sleep 2");
2563 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock);
2566
2544 XFS_STATS_INC(xs_sleep_logspace); 2567 XFS_STATS_INC(xs_sleep_logspace);
2545 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2568 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2546 2569
2547 if (XLOG_FORCED_SHUTDOWN(log)) { 2570 spin_lock(&log->l_grant_lock);
2548 spin_lock(&log->l_grant_lock); 2571 if (XLOG_FORCED_SHUTDOWN(log))
2549 goto error_return; 2572 goto error_return;
2550 }
2551 2573
2552 xlog_trace_loggrant(log, tic, 2574 xlog_trace_loggrant(log, tic,
2553 "xlog_grant_log_space: wake 2"); 2575 "xlog_grant_log_space: wake 2");
2554 xlog_grant_push_ail(log->l_mp, need_bytes);
2555 spin_lock(&log->l_grant_lock);
2556 goto redo; 2576 goto redo;
2557 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2577 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2558 xlog_del_ticketq(&log->l_reserve_headq, tic); 2578 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2631 * for more free space, otherwise try to get some space for 2651 * for more free space, otherwise try to get some space for
2632 * this transaction. 2652 * this transaction.
2633 */ 2653 */
2634 2654 need_bytes = tic->t_unit_res;
2635 if ((ntic = log->l_write_headq)) { 2655 if ((ntic = log->l_write_headq)) {
2636 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2656 free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2637 log->l_grant_write_bytes); 2657 log->l_grant_write_bytes);
@@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log,
2651 2671
2652 xlog_trace_loggrant(log, tic, 2672 xlog_trace_loggrant(log, tic,
2653 "xlog_regrant_write_log_space: sleep 1"); 2673 "xlog_regrant_write_log_space: sleep 1");
2674 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock);
2677
2654 XFS_STATS_INC(xs_sleep_logspace); 2678 XFS_STATS_INC(xs_sleep_logspace);
2655 sv_wait(&tic->t_wait, PINOD|PLTWAIT, 2679 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2656 &log->l_grant_lock, s); 2680 &log->l_grant_lock, s);
2657 2681
2658 /* If we're shutting down, this tic is already 2682 /* If we're shutting down, this tic is already
2659 * off the queue */ 2683 * off the queue */
2660 if (XLOG_FORCED_SHUTDOWN(log)) { 2684 spin_lock(&log->l_grant_lock);
2661 spin_lock(&log->l_grant_lock); 2685 if (XLOG_FORCED_SHUTDOWN(log))
2662 goto error_return; 2686 goto error_return;
2663 }
2664 2687
2665 xlog_trace_loggrant(log, tic, 2688 xlog_trace_loggrant(log, tic,
2666 "xlog_regrant_write_log_space: wake 1"); 2689 "xlog_regrant_write_log_space: wake 1");
2667 xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
2668 spin_lock(&log->l_grant_lock);
2669 } 2690 }
2670 } 2691 }
2671 2692
2672 need_bytes = tic->t_unit_res;
2673
2674redo: 2693redo:
2675 if (XLOG_FORCED_SHUTDOWN(log)) 2694 if (XLOG_FORCED_SHUTDOWN(log))
2676 goto error_return; 2695 goto error_return;
@@ -2680,19 +2699,20 @@ redo:
2680 if (free_bytes < need_bytes) { 2699 if (free_bytes < need_bytes) {
2681 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2700 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2682 xlog_ins_ticketq(&log->l_write_headq, tic); 2701 xlog_ins_ticketq(&log->l_write_headq, tic);
2702 spin_unlock(&log->l_grant_lock);
2703 xlog_grant_push_ail(log->l_mp, need_bytes);
2704 spin_lock(&log->l_grant_lock);
2705
2683 XFS_STATS_INC(xs_sleep_logspace); 2706 XFS_STATS_INC(xs_sleep_logspace);
2684 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2685 2708
2686 /* If we're shutting down, this tic is already off the queue */ 2709 /* If we're shutting down, this tic is already off the queue */
2687 if (XLOG_FORCED_SHUTDOWN(log)) { 2710 spin_lock(&log->l_grant_lock);
2688 spin_lock(&log->l_grant_lock); 2711 if (XLOG_FORCED_SHUTDOWN(log))
2689 goto error_return; 2712 goto error_return;
2690 }
2691 2713
2692 xlog_trace_loggrant(log, tic, 2714 xlog_trace_loggrant(log, tic,
2693 "xlog_regrant_write_log_space: wake 2"); 2715 "xlog_regrant_write_log_space: wake 2");
2694 xlog_grant_push_ail(log->l_mp, need_bytes);
2695 spin_lock(&log->l_grant_lock);
2696 goto redo; 2716 goto redo;
2697 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2717 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2698 xlog_del_ticketq(&log->l_write_headq, tic); 2718 xlog_del_ticketq(&log->l_write_headq, tic);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7af44adffc8f..d6a64392f983 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ typedef struct xfs_mount {
313#endif 313#endif
314 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 314 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
315 struct task_struct *m_sync_task; /* generalised sync thread */ 315 struct task_struct *m_sync_task; /* generalised sync thread */
316 bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 316 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
317 struct list_head m_sync_list; /* sync thread work item list */ 317 struct list_head m_sync_list; /* sync thread work item list */
318 spinlock_t m_sync_lock; /* work item list lock */ 318 spinlock_t m_sync_lock; /* work item list lock */
319 int m_sync_seq; /* sync thread generation no. */ 319 int m_sync_seq; /* sync thread generation no. */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7394c7af5de5..19cf90a9c762 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1457,6 +1457,13 @@ xfs_create(
1457 error = xfs_trans_reserve(tp, resblks, log_res, 0, 1457 error = xfs_trans_reserve(tp, resblks, log_res, 0,
1458 XFS_TRANS_PERM_LOG_RES, log_count); 1458 XFS_TRANS_PERM_LOG_RES, log_count);
1459 if (error == ENOSPC) { 1459 if (error == ENOSPC) {
1460 /* flush outstanding delalloc blocks and retry */
1461 xfs_flush_inodes(dp);
1462 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
1463 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1464 }
1465 if (error == ENOSPC) {
1466 /* No space at all so try a "no-allocation" reservation */
1460 resblks = 0; 1467 resblks = 0;
1461 error = xfs_trans_reserve(tp, 0, log_res, 0, 1468 error = xfs_trans_reserve(tp, 0, log_res, 0,
1462 XFS_TRANS_PERM_LOG_RES, log_count); 1469 XFS_TRANS_PERM_LOG_RES, log_count);