aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-19 05:27:32 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-19 05:28:41 -0400
commit929bf0d0156562ce631728b6fa53d68004d456d2 (patch)
tree739063990a8077b29ef97e69d73bce94573daae4 /fs
parentdef0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff)
parent202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
Merge branch 'linus' into perfcounters/core
Merge reason: Bring in tracing changes we depend on. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/ordered-data.c1
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/cifs/CHANGES5
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/cifsfs.c22
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h21
-rw-r--r--fs/cifs/cifssmb.c316
-rw-r--r--fs/cifs/connect.c49
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c43
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/transport.c17
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/file.c61
-rw-r--r--fs/ext4/file.c53
-rw-r--r--fs/fat/file.c22
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fs-writeback.c399
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/acl.c106
-rw-r--r--fs/gfs2/dentry.c18
-rw-r--r--fs/gfs2/eaops.c157
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/export.c36
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/gfs2/incore.h15
-rw-r--r--fs/gfs2/inode.c159
-rw-r--r--fs/gfs2/ops_fstype.c66
-rw-r--r--fs/gfs2/ops_inode.c82
-rw-r--r--fs/gfs2/rgrp.c88
-rw-r--r--fs/gfs2/rgrp.h6
-rw-r--r--fs/gfs2/super.c46
-rw-r--r--fs/gfs2/super.h5
-rw-r--r--fs/gfs2/sys.c31
-rw-r--r--fs/gfs2/util.c41
-rw-r--r--fs/gfs2/xattr.c (renamed from fs/gfs2/eattr.c)425
-rw-r--r--fs/gfs2/xattr.h (renamed from fs/gfs2/eattr.h)54
-rw-r--r--fs/inode.c4
-rw-r--r--fs/jbd2/commit.c1
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/write.c1
-rw-r--r--fs/nilfs2/Kconfig2
-rw-r--r--fs/nilfs2/bmap.c151
-rw-r--r--fs/nilfs2/bmap.h76
-rw-r--r--fs/nilfs2/btree.c625
-rw-r--r--fs/nilfs2/cpfile.c11
-rw-r--r--fs/nilfs2/cpfile.h2
-rw-r--r--fs/nilfs2/dat.c42
-rw-r--r--fs/nilfs2/dat.h8
-rw-r--r--fs/nilfs2/direct.c161
-rw-r--r--fs/nilfs2/ifile.h1
-rw-r--r--fs/nilfs2/inode.c3
-rw-r--r--fs/nilfs2/ioctl.c26
-rw-r--r--fs/nilfs2/mdt.c40
-rw-r--r--fs/nilfs2/mdt.h3
-rw-r--r--fs/nilfs2/recovery.c3
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/sufile.h1
-rw-r--r--fs/nilfs2/super.c100
-rw-r--r--fs/nilfs2/the_nilfs.c19
-rw-r--r--fs/nilfs2/the_nilfs.h43
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/mft.c13
-rw-r--r--fs/ocfs2/file.c49
-rw-r--r--fs/partitions/check.c14
-rw-r--r--fs/splice.c30
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c65
-rw-r--r--fs/ubifs/budget.c20
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/directory.c86
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c19
-rw-r--r--fs/udf/lowlevel.c4
-rw-r--r--fs/udf/namei.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c51
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c78
-rw-r--r--fs/xfs/xfs_ag.h9
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_bmap.h11
-rw-r--r--fs/xfs/xfs_bmap_btree.c20
-rw-r--r--fs/xfs/xfs_bmap_btree.h1
-rw-r--r--fs/xfs/xfs_btree.c42
-rw-r--r--fs/xfs/xfs_btree.h15
-rw-r--r--fs/xfs/xfs_ialloc.c805
-rw-r--r--fs/xfs/xfs_ialloc.h18
-rw-r--r--fs/xfs/xfs_iget.c27
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode.h8
-rw-r--r--fs/xfs/xfs_inode_item.c10
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_inum.h1
-rw-r--r--fs/xfs/xfs_itable.c98
-rw-r--r--fs/xfs/xfs_itable.h5
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_mru_cache.c29
-rw-r--r--fs/xfs/xfs_mru_cache.h1
-rw-r--r--fs/xfs/xfs_rw.c84
-rw-r--r--fs/xfs/xfs_rw.h7
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c86
-rw-r--r--fs/xfs/xfs_vnodeops.c17
124 files changed, 2295 insertions, 3330 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 0e7da7bb5d93..455aa207e67e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -43,6 +43,7 @@ source "fs/xfs/Kconfig"
43source "fs/gfs2/Kconfig" 43source "fs/gfs2/Kconfig"
44source "fs/ocfs2/Kconfig" 44source "fs/ocfs2/Kconfig"
45source "fs/btrfs/Kconfig" 45source "fs/btrfs/Kconfig"
46source "fs/nilfs2/Kconfig"
46 47
47endif # BLOCK 48endif # BLOCK
48 49
@@ -186,7 +187,6 @@ source "fs/romfs/Kconfig"
186source "fs/sysv/Kconfig" 187source "fs/sysv/Kconfig"
187source "fs/ufs/Kconfig" 188source "fs/ufs/Kconfig"
188source "fs/exofs/Kconfig" 189source "fs/exofs/Kconfig"
189source "fs/nilfs2/Kconfig"
190 190
191endif # MISC_FILESYSTEMS 191endif # MISC_FILESYSTEMS
192 192
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c2e7a7ff0080..c63a3c8beb73 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
712 .bdi = mapping->backing_dev_info, 712 .bdi = mapping->backing_dev_info,
713 .sync_mode = WB_SYNC_ALL, 713 .sync_mode = WB_SYNC_ALL,
714 .nr_to_write = LONG_MAX, 714 .nr_to_write = LONG_MAX,
715 .for_writepages = 1,
716 .range_cyclic = 1, 715 .range_cyclic = 1,
717 }; 716 };
718 int ret; 717 int ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 94dfda24c06e..71e7e03ac343 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -420,7 +420,6 @@ static void bdev_destroy_inode(struct inode *inode)
420{ 420{
421 struct bdev_inode *bdi = BDEV_I(inode); 421 struct bdev_inode *bdi = BDEV_I(inode);
422 422
423 bdi->bdev.bd_inode_backing_dev_info = NULL;
424 kmem_cache_free(bdev_cachep, bdi); 423 kmem_cache_free(bdev_cachep, bdi);
425} 424}
426 425
@@ -1405,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1405} 1404}
1406 1405
1407/* 1406/*
1407 * Write data to the block device. Only intended for the block device itself
1408 * and the raw driver which basically is a fake block device.
1409 *
1410 * Does not take i_mutex for the write and thus is not for general purpose
1411 * use.
1412 */
1413ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1414 unsigned long nr_segs, loff_t pos)
1415{
1416 struct file *file = iocb->ki_filp;
1417 ssize_t ret;
1418
1419 BUG_ON(iocb->ki_pos != pos);
1420
1421 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1422 if (ret > 0 || ret == -EIOCBQUEUED) {
1423 ssize_t err;
1424
1425 err = generic_write_sync(file, pos, ret);
1426 if (err < 0 && ret > 0)
1427 ret = err;
1428 }
1429 return ret;
1430}
1431EXPORT_SYMBOL_GPL(blkdev_aio_write);
1432
1433/*
1408 * Try to release a page associated with block device when the system 1434 * Try to release a page associated with block device when the system
1409 * is under memory pressure. 1435 * is under memory pressure.
1410 */ 1436 */
@@ -1436,7 +1462,7 @@ const struct file_operations def_blk_fops = {
1436 .read = do_sync_read, 1462 .read = do_sync_read,
1437 .write = do_sync_write, 1463 .write = do_sync_write,
1438 .aio_read = generic_file_aio_read, 1464 .aio_read = generic_file_aio_read,
1439 .aio_write = generic_file_aio_write_nolock, 1465 .aio_write = blkdev_aio_write,
1440 .mmap = generic_file_mmap, 1466 .mmap = generic_file_mmap,
1441 .fsync = block_fsync, 1467 .fsync = block_fsync,
1442 .unlocked_ioctl = block_ioctl, 1468 .unlocked_ioctl = block_ioctl,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 15831d5c7367..8b8192790011 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1600,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1600 1600
1601 sb->s_blocksize = 4096; 1601 sb->s_blocksize = 4096;
1602 sb->s_blocksize_bits = blksize_bits(4096); 1602 sb->s_blocksize_bits = blksize_bits(4096);
1603 sb->s_bdi = &fs_info->bdi;
1603 1604
1604 /* 1605 /*
1605 * we set the i_size on the btree inode to the max possible int. 1606 * we set the i_size on the btree inode to the max possible int.
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9f..535f85ba104f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1511,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1511static void btrfs_issue_discard(struct block_device *bdev, 1511static void btrfs_issue_discard(struct block_device *bdev,
1512 u64 start, u64 len) 1512 u64 start, u64 len)
1513{ 1513{
1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1515 DISCARD_FL_BARRIER);
1515} 1516}
1516#endif 1517#endif
1517 1518
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index d6f0806c682f..7b2f401e604e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
740 .nr_to_write = mapping->nrpages * 2, 740 .nr_to_write = mapping->nrpages * 2,
741 .range_start = start, 741 .range_start = start,
742 .range_end = end, 742 .range_end = end,
743 .for_writepages = 1,
744 }; 743 };
745 return btrfs_writepages(mapping, &wbc); 744 return btrfs_writepages(mapping, &wbc);
746} 745}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4af..5cf405b0828d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -260,7 +260,7 @@ loop_lock:
260 num_run++; 260 num_run++;
261 batch_run++; 261 batch_run++;
262 262
263 if (bio_sync(cur)) 263 if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
264 num_sync_run++; 264 num_sync_run++;
265 265
266 if (need_resched()) { 266 if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2903 bio->bi_rw |= rw; 2903 bio->bi_rw |= rw;
2904 2904
2905 spin_lock(&device->io_lock); 2905 spin_lock(&device->io_lock);
2906 if (bio_sync(bio)) 2906 if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
2907 pending_bios = &device->pending_sync_bios; 2907 pending_bios = &device->pending_sync_bios;
2908 else 2908 else
2909 pending_bios = &device->pending_bios; 2909 pending_bios = &device->pending_bios;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index e85b1e4389e0..145540a316ab 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -3,7 +3,10 @@ Version 1.60
3Fix memory leak in reconnect. Fix oops in DFS mount error path. 3Fix memory leak in reconnect. Fix oops in DFS mount error path.
4Set s_maxbytes to smaller (the max that vfs can handle) so that 4Set s_maxbytes to smaller (the max that vfs can handle) so that
5sendfile will now work over cifs mounts again. Add noforcegid 5sendfile will now work over cifs mounts again. Add noforcegid
6and noforceuid mount parameters. 6and noforceuid mount parameters. Fix small mem leak when using
7ntlmv2. Fix 2nd mount to same server but with different port to
8be allowed (rather than reusing the 1st port) - only when the
9user explicitly overrides the port on the 2nd mount.
7 10
8Version 1.59 11Version 1.59
9------------ 12------------
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 051caecf7d67..8ec7736ce954 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -125,7 +125,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
125 if (server->addr.sockAddr.sin_family == AF_INET) 125 if (server->addr.sockAddr.sin_family == AF_INET)
126 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); 126 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr);
127 else if (server->addr.sockAddr.sin_family == AF_INET6) 127 else if (server->addr.sockAddr.sin_family == AF_INET6)
128 sprintf(dp, "ip6=%pi6", &server->addr.sockAddr6.sin6_addr); 128 sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr);
129 else 129 else
130 goto out; 130 goto out;
131 131
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 6941c22398a6..7dfe0842a6f6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -607,7 +607,7 @@ static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
607 return get_cifs_acl_by_path(cifs_sb, path, pacllen); 607 return get_cifs_acl_by_path(cifs_sb, path, pacllen);
608 608
609 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); 609 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
610 atomic_dec(&open_file->wrtPending); 610 cifsFileInfo_put(open_file);
611 return pntsd; 611 return pntsd;
612} 612}
613 613
@@ -665,7 +665,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
665 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); 665 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
666 666
667 rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); 667 rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
668 atomic_dec(&open_file->wrtPending); 668 cifsFileInfo_put(open_file);
669 return rc; 669 return rc;
670} 670}
671 671
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7c9809523f42..7efe1745494d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -373,6 +373,7 @@ calc_exit_2:
373 compare with the NTLM example */ 373 compare with the NTLM example */
374 hmac_md5_final(ses->server->ntlmv2_hash, pctxt); 374 hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
375 375
376 kfree(pctxt);
376 return rc; 377 return rc;
377} 378}
378 379
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84b75253b05a..3610e9958b4c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -361,13 +361,10 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
361static int 361static int
362cifs_show_options(struct seq_file *s, struct vfsmount *m) 362cifs_show_options(struct seq_file *s, struct vfsmount *m)
363{ 363{
364 struct cifs_sb_info *cifs_sb; 364 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
365 struct cifsTconInfo *tcon; 365 struct cifsTconInfo *tcon = cifs_sb->tcon;
366
367 cifs_sb = CIFS_SB(m->mnt_sb);
368 tcon = cifs_sb->tcon;
369 366
370 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); 367 seq_printf(s, ",unc=%s", tcon->treeName);
371 if (tcon->ses->userName) 368 if (tcon->ses->userName)
372 seq_printf(s, ",username=%s", tcon->ses->userName); 369 seq_printf(s, ",username=%s", tcon->ses->userName);
373 if (tcon->ses->domainName) 370 if (tcon->ses->domainName)
@@ -989,19 +986,19 @@ static int cifs_oplock_thread(void *dummyarg)
989 if (try_to_freeze()) 986 if (try_to_freeze())
990 continue; 987 continue;
991 988
992 spin_lock(&GlobalMid_Lock); 989 spin_lock(&cifs_oplock_lock);
993 if (list_empty(&GlobalOplock_Q)) { 990 if (list_empty(&cifs_oplock_list)) {
994 spin_unlock(&GlobalMid_Lock); 991 spin_unlock(&cifs_oplock_lock);
995 set_current_state(TASK_INTERRUPTIBLE); 992 set_current_state(TASK_INTERRUPTIBLE);
996 schedule_timeout(39*HZ); 993 schedule_timeout(39*HZ);
997 } else { 994 } else {
998 oplock_item = list_entry(GlobalOplock_Q.next, 995 oplock_item = list_entry(cifs_oplock_list.next,
999 struct oplock_q_entry, qhead); 996 struct oplock_q_entry, qhead);
1000 cFYI(1, ("found oplock item to write out")); 997 cFYI(1, ("found oplock item to write out"));
1001 pTcon = oplock_item->tcon; 998 pTcon = oplock_item->tcon;
1002 inode = oplock_item->pinode; 999 inode = oplock_item->pinode;
1003 netfid = oplock_item->netfid; 1000 netfid = oplock_item->netfid;
1004 spin_unlock(&GlobalMid_Lock); 1001 spin_unlock(&cifs_oplock_lock);
1005 DeleteOplockQEntry(oplock_item); 1002 DeleteOplockQEntry(oplock_item);
1006 /* can not grab inode sem here since it would 1003 /* can not grab inode sem here since it would
1007 deadlock when oplock received on delete 1004 deadlock when oplock received on delete
@@ -1058,7 +1055,7 @@ init_cifs(void)
1058 int rc = 0; 1055 int rc = 0;
1059 cifs_proc_init(); 1056 cifs_proc_init();
1060 INIT_LIST_HEAD(&cifs_tcp_ses_list); 1057 INIT_LIST_HEAD(&cifs_tcp_ses_list);
1061 INIT_LIST_HEAD(&GlobalOplock_Q); 1058 INIT_LIST_HEAD(&cifs_oplock_list);
1062#ifdef CONFIG_CIFS_EXPERIMENTAL 1059#ifdef CONFIG_CIFS_EXPERIMENTAL
1063 INIT_LIST_HEAD(&GlobalDnotifyReqList); 1060 INIT_LIST_HEAD(&GlobalDnotifyReqList);
1064 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 1061 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
@@ -1087,6 +1084,7 @@ init_cifs(void)
1087 rwlock_init(&GlobalSMBSeslock); 1084 rwlock_init(&GlobalSMBSeslock);
1088 rwlock_init(&cifs_tcp_ses_lock); 1085 rwlock_init(&cifs_tcp_ses_lock);
1089 spin_lock_init(&GlobalMid_Lock); 1086 spin_lock_init(&GlobalMid_Lock);
1087 spin_lock_init(&cifs_oplock_lock);
1090 1088
1091 if (cifs_max_pending < 2) { 1089 if (cifs_max_pending < 2) {
1092 cifs_max_pending = 2; 1090 cifs_max_pending = 2;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6c170948300d..094325e3f714 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
113extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
114#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
115 115
116#define CIFS_VERSION "1.60" 116#define CIFS_VERSION "1.61"
117#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6084d6379c03..6cfc81a32703 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -351,11 +351,24 @@ struct cifsFileInfo {
351 bool closePend:1; /* file is marked to close */ 351 bool closePend:1; /* file is marked to close */
352 bool invalidHandle:1; /* file closed via session abend */ 352 bool invalidHandle:1; /* file closed via session abend */
353 bool messageMode:1; /* for pipes: message vs byte mode */ 353 bool messageMode:1; /* for pipes: message vs byte mode */
354 atomic_t wrtPending; /* handle in use - defer close */ 354 atomic_t count; /* reference count */
355 struct mutex fh_mutex; /* prevents reopen race after dead ses*/ 355 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
356 struct cifs_search_info srch_inf; 356 struct cifs_search_info srch_inf;
357}; 357};
358 358
359/* Take a reference on the file private data */
360static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
361{
362 atomic_inc(&cifs_file->count);
363}
364
365/* Release a reference on the file private data */
366static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
367{
368 if (atomic_dec_and_test(&cifs_file->count))
369 kfree(cifs_file);
370}
371
359/* 372/*
360 * One of these for each file inode 373 * One of these for each file inode
361 */ 374 */
@@ -656,7 +669,11 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock;
656 */ 669 */
657GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; 670GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;
658 671
659GLOBAL_EXTERN struct list_head GlobalOplock_Q; 672/* Global list of oplocks */
673GLOBAL_EXTERN struct list_head cifs_oplock_list;
674
675/* Protects the cifs_oplock_list */
676GLOBAL_EXTERN spinlock_t cifs_oplock_lock;
660 677
661/* Outstanding dir notify requests */ 678/* Outstanding dir notify requests */
662GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 679GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1866bc2927d4..301e307e1279 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -100,110 +100,138 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
100 to this tcon */ 100 to this tcon */
101} 101}
102 102
103/* Allocate and return pointer to an SMB request buffer, and set basic 103/* reconnect the socket, tcon, and smb session if needed */
104 SMB information in the SMB header. If the return code is zero, this
105 function must have filled in request_buf pointer */
106static int 104static int
107small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 105cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
108 void **request_buf)
109{ 106{
110 int rc = 0; 107 int rc = 0;
108 struct cifsSesInfo *ses;
109 struct TCP_Server_Info *server;
110 struct nls_table *nls_codepage;
111 111
112 /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so 112 /*
113 check for tcp and smb session status done differently 113 * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
114 for those three - in the calling routine */ 114 * tcp and smb session status done differently for those three - in the
115 if (tcon) { 115 * calling routine
116 if (tcon->tidStatus == CifsExiting) { 116 */
117 /* only tree disconnect, open, and write, 117 if (!tcon)
118 (and ulogoff which does not have tcon) 118 return 0;
119 are allowed as we start force umount */ 119
120 if ((smb_command != SMB_COM_WRITE_ANDX) && 120 ses = tcon->ses;
121 (smb_command != SMB_COM_OPEN_ANDX) && 121 server = ses->server;
122 (smb_command != SMB_COM_TREE_DISCONNECT)) { 122
123 cFYI(1, ("can not send cmd %d while umounting", 123 /*
124 smb_command)); 124 * only tree disconnect, open, and write, (and ulogoff which does not
125 return -ENODEV; 125 * have tcon) are allowed as we start force umount
126 } 126 */
127 if (tcon->tidStatus == CifsExiting) {
128 if (smb_command != SMB_COM_WRITE_ANDX &&
129 smb_command != SMB_COM_OPEN_ANDX &&
130 smb_command != SMB_COM_TREE_DISCONNECT) {
131 cFYI(1, ("can not send cmd %d while umounting",
132 smb_command));
133 return -ENODEV;
127 } 134 }
128 if ((tcon->ses) && (tcon->ses->status != CifsExiting) && 135 }
129 (tcon->ses->server)) {
130 struct nls_table *nls_codepage;
131 /* Give Demultiplex thread up to 10 seconds to
132 reconnect, should be greater than cifs socket
133 timeout which is 7 seconds */
134 while (tcon->ses->server->tcpStatus ==
135 CifsNeedReconnect) {
136 wait_event_interruptible_timeout(tcon->ses->server->response_q,
137 (tcon->ses->server->tcpStatus ==
138 CifsGood), 10 * HZ);
139 if (tcon->ses->server->tcpStatus ==
140 CifsNeedReconnect) {
141 /* on "soft" mounts we wait once */
142 if (!tcon->retry ||
143 (tcon->ses->status == CifsExiting)) {
144 cFYI(1, ("gave up waiting on "
145 "reconnect in smb_init"));
146 return -EHOSTDOWN;
147 } /* else "hard" mount - keep retrying
148 until process is killed or server
149 comes back on-line */
150 } else /* TCP session is reestablished now */
151 break;
152 }
153 136
154 nls_codepage = load_nls_default(); 137 if (ses->status == CifsExiting)
155 /* need to prevent multiple threads trying to 138 return -EIO;
156 simultaneously reconnect the same SMB session */
157 down(&tcon->ses->sesSem);
158 if (tcon->ses->need_reconnect)
159 rc = cifs_setup_session(0, tcon->ses,
160 nls_codepage);
161 if (!rc && (tcon->need_reconnect)) {
162 mark_open_files_invalid(tcon);
163 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
164 tcon, nls_codepage);
165 up(&tcon->ses->sesSem);
166 /* BB FIXME add code to check if wsize needs
167 update due to negotiated smb buffer size
168 shrinking */
169 if (rc == 0) {
170 atomic_inc(&tconInfoReconnectCount);
171 /* tell server Unix caps we support */
172 if (tcon->ses->capabilities & CAP_UNIX)
173 reset_cifs_unix_caps(
174 0 /* no xid */,
175 tcon,
176 NULL /* we do not know sb */,
177 NULL /* no vol info */);
178 }
179 139
180 cFYI(1, ("reconnect tcon rc = %d", rc)); 140 /*
181 /* Removed call to reopen open files here. 141 * Give demultiplex thread up to 10 seconds to reconnect, should be
182 It is safer (and faster) to reopen files 142 * greater than cifs socket timeout which is 7 seconds
183 one at a time as needed in read and write */ 143 */
184 144 while (server->tcpStatus == CifsNeedReconnect) {
185 /* Check if handle based operation so we 145 wait_event_interruptible_timeout(server->response_q,
186 know whether we can continue or not without 146 (server->tcpStatus == CifsGood), 10 * HZ);
187 returning to caller to reset file handle */
188 switch (smb_command) {
189 case SMB_COM_READ_ANDX:
190 case SMB_COM_WRITE_ANDX:
191 case SMB_COM_CLOSE:
192 case SMB_COM_FIND_CLOSE2:
193 case SMB_COM_LOCKING_ANDX: {
194 unload_nls(nls_codepage);
195 return -EAGAIN;
196 }
197 }
198 } else {
199 up(&tcon->ses->sesSem);
200 }
201 unload_nls(nls_codepage);
202 147
203 } else { 148 /* is TCP session is reestablished now ?*/
204 return -EIO; 149 if (server->tcpStatus != CifsNeedReconnect)
150 break;
151
152 /*
153 * on "soft" mounts we wait once. Hard mounts keep
154 * retrying until process is killed or server comes
155 * back on-line
156 */
157 if (!tcon->retry || ses->status == CifsExiting) {
158 cFYI(1, ("gave up waiting on reconnect in smb_init"));
159 return -EHOSTDOWN;
205 } 160 }
206 } 161 }
162
163 if (!ses->need_reconnect && !tcon->need_reconnect)
164 return 0;
165
166 nls_codepage = load_nls_default();
167
168 /*
169 * need to prevent multiple threads trying to simultaneously
170 * reconnect the same SMB session
171 */
172 down(&ses->sesSem);
173 if (ses->need_reconnect)
174 rc = cifs_setup_session(0, ses, nls_codepage);
175
176 /* do we need to reconnect tcon? */
177 if (rc || !tcon->need_reconnect) {
178 up(&ses->sesSem);
179 goto out;
180 }
181
182 mark_open_files_invalid(tcon);
183 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
184 up(&ses->sesSem);
185 cFYI(1, ("reconnect tcon rc = %d", rc));
186
187 if (rc)
188 goto out;
189
190 /*
191 * FIXME: check if wsize needs updated due to negotiated smb buffer
192 * size shrinking
193 */
194 atomic_inc(&tconInfoReconnectCount);
195
196 /* tell server Unix caps we support */
197 if (ses->capabilities & CAP_UNIX)
198 reset_cifs_unix_caps(0, tcon, NULL, NULL);
199
200 /*
201 * Removed call to reopen open files here. It is safer (and faster) to
202 * reopen files one at a time as needed in read and write.
203 *
204 * FIXME: what about file locks? don't we need to reclaim them ASAP?
205 */
206
207out:
208 /*
209 * Check if handle based operation so we know whether we can continue
210 * or not without returning to caller to reset file handle
211 */
212 switch (smb_command) {
213 case SMB_COM_READ_ANDX:
214 case SMB_COM_WRITE_ANDX:
215 case SMB_COM_CLOSE:
216 case SMB_COM_FIND_CLOSE2:
217 case SMB_COM_LOCKING_ANDX:
218 rc = -EAGAIN;
219 }
220
221 unload_nls(nls_codepage);
222 return rc;
223}
224
225/* Allocate and return pointer to an SMB request buffer, and set basic
226 SMB information in the SMB header. If the return code is zero, this
227 function must have filled in request_buf pointer */
228static int
229small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
230 void **request_buf)
231{
232 int rc = 0;
233
234 rc = cifs_reconnect_tcon(tcon, smb_command);
207 if (rc) 235 if (rc)
208 return rc; 236 return rc;
209 237
@@ -256,101 +284,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
256{ 284{
257 int rc = 0; 285 int rc = 0;
258 286
259 /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so 287 rc = cifs_reconnect_tcon(tcon, smb_command);
260 check for tcp and smb session status done differently
261 for those three - in the calling routine */
262 if (tcon) {
263 if (tcon->tidStatus == CifsExiting) {
264 /* only tree disconnect, open, and write,
265 (and ulogoff which does not have tcon)
266 are allowed as we start force umount */
267 if ((smb_command != SMB_COM_WRITE_ANDX) &&
268 (smb_command != SMB_COM_OPEN_ANDX) &&
269 (smb_command != SMB_COM_TREE_DISCONNECT)) {
270 cFYI(1, ("can not send cmd %d while umounting",
271 smb_command));
272 return -ENODEV;
273 }
274 }
275
276 if ((tcon->ses) && (tcon->ses->status != CifsExiting) &&
277 (tcon->ses->server)) {
278 struct nls_table *nls_codepage;
279 /* Give Demultiplex thread up to 10 seconds to
280 reconnect, should be greater than cifs socket
281 timeout which is 7 seconds */
282 while (tcon->ses->server->tcpStatus ==
283 CifsNeedReconnect) {
284 wait_event_interruptible_timeout(tcon->ses->server->response_q,
285 (tcon->ses->server->tcpStatus ==
286 CifsGood), 10 * HZ);
287 if (tcon->ses->server->tcpStatus ==
288 CifsNeedReconnect) {
289 /* on "soft" mounts we wait once */
290 if (!tcon->retry ||
291 (tcon->ses->status == CifsExiting)) {
292 cFYI(1, ("gave up waiting on "
293 "reconnect in smb_init"));
294 return -EHOSTDOWN;
295 } /* else "hard" mount - keep retrying
296 until process is killed or server
297 comes on-line */
298 } else /* TCP session is reestablished now */
299 break;
300 }
301 nls_codepage = load_nls_default();
302 /* need to prevent multiple threads trying to
303 simultaneously reconnect the same SMB session */
304 down(&tcon->ses->sesSem);
305 if (tcon->ses->need_reconnect)
306 rc = cifs_setup_session(0, tcon->ses,
307 nls_codepage);
308 if (!rc && (tcon->need_reconnect)) {
309 mark_open_files_invalid(tcon);
310 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
311 tcon, nls_codepage);
312 up(&tcon->ses->sesSem);
313 /* BB FIXME add code to check if wsize needs
314 update due to negotiated smb buffer size
315 shrinking */
316 if (rc == 0) {
317 atomic_inc(&tconInfoReconnectCount);
318 /* tell server Unix caps we support */
319 if (tcon->ses->capabilities & CAP_UNIX)
320 reset_cifs_unix_caps(
321 0 /* no xid */,
322 tcon,
323 NULL /* do not know sb */,
324 NULL /* no vol info */);
325 }
326
327 cFYI(1, ("reconnect tcon rc = %d", rc));
328 /* Removed call to reopen open files here.
329 It is safer (and faster) to reopen files
330 one at a time as needed in read and write */
331
332 /* Check if handle based operation so we
333 know whether we can continue or not without
334 returning to caller to reset file handle */
335 switch (smb_command) {
336 case SMB_COM_READ_ANDX:
337 case SMB_COM_WRITE_ANDX:
338 case SMB_COM_CLOSE:
339 case SMB_COM_FIND_CLOSE2:
340 case SMB_COM_LOCKING_ANDX: {
341 unload_nls(nls_codepage);
342 return -EAGAIN;
343 }
344 }
345 } else {
346 up(&tcon->ses->sesSem);
347 }
348 unload_nls(nls_codepage);
349
350 } else {
351 return -EIO;
352 }
353 }
354 if (rc) 288 if (rc)
355 return rc; 289 return rc;
356 290
@@ -3961,6 +3895,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3961 if (is_unicode) { 3895 if (is_unicode) {
3962 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, 3896 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
3963 GFP_KERNEL); 3897 GFP_KERNEL);
3898 if (tmp == NULL) {
3899 rc = -ENOMEM;
3900 goto parse_DFS_referrals_exit;
3901 }
3964 cifsConvertToUCS((__le16 *) tmp, searchName, 3902 cifsConvertToUCS((__le16 *) tmp, searchName,
3965 PATH_MAX, nls_codepage, remap); 3903 PATH_MAX, nls_codepage, remap);
3966 node->path_consumed = cifs_ucs2_bytes(tmp, 3904 node->path_consumed = cifs_ucs2_bytes(tmp,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1f3345d7fa79..d49682433c20 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1377,7 +1377,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1377} 1377}
1378 1378
1379static struct TCP_Server_Info * 1379static struct TCP_Server_Info *
1380cifs_find_tcp_session(struct sockaddr_storage *addr) 1380cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
1381{ 1381{
1382 struct list_head *tmp; 1382 struct list_head *tmp;
1383 struct TCP_Server_Info *server; 1383 struct TCP_Server_Info *server;
@@ -1397,16 +1397,37 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
1397 if (server->tcpStatus == CifsNew) 1397 if (server->tcpStatus == CifsNew)
1398 continue; 1398 continue;
1399 1399
1400 if (addr->ss_family == AF_INET && 1400 switch (addr->ss_family) {
1401 (addr4->sin_addr.s_addr != 1401 case AF_INET:
1402 server->addr.sockAddr.sin_addr.s_addr)) 1402 if (addr4->sin_addr.s_addr ==
1403 continue; 1403 server->addr.sockAddr.sin_addr.s_addr) {
1404 else if (addr->ss_family == AF_INET6 && 1404 addr4->sin_port = htons(port);
1405 (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, 1405 /* user overrode default port? */
1406 &addr6->sin6_addr) || 1406 if (addr4->sin_port) {
1407 server->addr.sockAddr6.sin6_scope_id != 1407 if (addr4->sin_port !=
1408 addr6->sin6_scope_id)) 1408 server->addr.sockAddr.sin_port)
1409 continue; 1409 continue;
1410 }
1411 break;
1412 } else
1413 continue;
1414
1415 case AF_INET6:
1416 if (ipv6_addr_equal(&addr6->sin6_addr,
1417 &server->addr.sockAddr6.sin6_addr) &&
1418 (addr6->sin6_scope_id ==
1419 server->addr.sockAddr6.sin6_scope_id)) {
1420 addr6->sin6_port = htons(port);
1421 /* user overrode default port? */
1422 if (addr6->sin6_port) {
1423 if (addr6->sin6_port !=
1424 server->addr.sockAddr6.sin6_port)
1425 continue;
1426 }
1427 break;
1428 } else
1429 continue;
1430 }
1410 1431
1411 ++server->srv_count; 1432 ++server->srv_count;
1412 write_unlock(&cifs_tcp_ses_lock); 1433 write_unlock(&cifs_tcp_ses_lock);
@@ -1475,7 +1496,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1475 } 1496 }
1476 1497
1477 /* see if we already have a matching tcp_ses */ 1498 /* see if we already have a matching tcp_ses */
1478 tcp_ses = cifs_find_tcp_session(&addr); 1499 tcp_ses = cifs_find_tcp_session(&addr, volume_info->port);
1479 if (tcp_ses) 1500 if (tcp_ses)
1480 return tcp_ses; 1501 return tcp_ses;
1481 1502
@@ -2636,9 +2657,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2636 return -EIO; 2657 return -EIO;
2637 2658
2638 smb_buffer = cifs_buf_get(); 2659 smb_buffer = cifs_buf_get();
2639 if (smb_buffer == NULL) { 2660 if (smb_buffer == NULL)
2640 return -ENOMEM; 2661 return -ENOMEM;
2641 } 2662
2642 smb_buffer_response = smb_buffer; 2663 smb_buffer_response = smb_buffer;
2643 2664
2644 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, 2665 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4326ffd90fa9..a6424cfc0121 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -153,7 +153,7 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
153 mutex_init(&pCifsFile->fh_mutex); 153 mutex_init(&pCifsFile->fh_mutex);
154 mutex_init(&pCifsFile->lock_mutex); 154 mutex_init(&pCifsFile->lock_mutex);
155 INIT_LIST_HEAD(&pCifsFile->llist); 155 INIT_LIST_HEAD(&pCifsFile->llist);
156 atomic_set(&pCifsFile->wrtPending, 0); 156 atomic_set(&pCifsFile->count, 1);
157 157
158 /* set the following in open now 158 /* set the following in open now
159 pCifsFile->pfile = file; */ 159 pCifsFile->pfile = file; */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c34b7f8a217b..fa7beac8b80e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -53,11 +53,9 @@ static inline struct cifsFileInfo *cifs_init_private(
53 private_data->pInode = inode; 53 private_data->pInode = inode;
54 private_data->invalidHandle = false; 54 private_data->invalidHandle = false;
55 private_data->closePend = false; 55 private_data->closePend = false;
56 /* we have to track num writers to the inode, since writepages 56 /* Initialize reference count to one. The private data is
57 does not tell us which handle the write is for so there can 57 freed on the release of the last reference */
58 be a close (overlapping with write) of the filehandle that 58 atomic_set(&private_data->count, 1);
59 cifs_writepages chose to use */
60 atomic_set(&private_data->wrtPending, 0);
61 59
62 return private_data; 60 return private_data;
63} 61}
@@ -643,7 +641,7 @@ int cifs_close(struct inode *inode, struct file *file)
643 if (!pTcon->need_reconnect) { 641 if (!pTcon->need_reconnect) {
644 write_unlock(&GlobalSMBSeslock); 642 write_unlock(&GlobalSMBSeslock);
645 timeout = 2; 643 timeout = 2;
646 while ((atomic_read(&pSMBFile->wrtPending) != 0) 644 while ((atomic_read(&pSMBFile->count) != 1)
647 && (timeout <= 2048)) { 645 && (timeout <= 2048)) {
648 /* Give write a better chance to get to 646 /* Give write a better chance to get to
649 server ahead of the close. We do not 647 server ahead of the close. We do not
@@ -657,8 +655,6 @@ int cifs_close(struct inode *inode, struct file *file)
657 msleep(timeout); 655 msleep(timeout);
658 timeout *= 4; 656 timeout *= 4;
659 } 657 }
660 if (atomic_read(&pSMBFile->wrtPending))
661 cERROR(1, ("close with pending write"));
662 if (!pTcon->need_reconnect && 658 if (!pTcon->need_reconnect &&
663 !pSMBFile->invalidHandle) 659 !pSMBFile->invalidHandle)
664 rc = CIFSSMBClose(xid, pTcon, 660 rc = CIFSSMBClose(xid, pTcon,
@@ -681,24 +677,7 @@ int cifs_close(struct inode *inode, struct file *file)
681 list_del(&pSMBFile->flist); 677 list_del(&pSMBFile->flist);
682 list_del(&pSMBFile->tlist); 678 list_del(&pSMBFile->tlist);
683 write_unlock(&GlobalSMBSeslock); 679 write_unlock(&GlobalSMBSeslock);
684 timeout = 10; 680 cifsFileInfo_put(file->private_data);
685 /* We waited above to give the SMBWrite a chance to issue
686 on the wire (so we do not get SMBWrite returning EBADF
687 if writepages is racing with close. Note that writepages
688 does not specify a file handle, so it is possible for a file
689 to be opened twice, and the application close the "wrong"
690 file handle - in these cases we delay long enough to allow
691 the SMBWrite to get on the wire before the SMB Close.
692 We allow total wait here over 45 seconds, more than
693 oplock break time, and more than enough to allow any write
694 to complete on the server, or to time out on the client */
695 while ((atomic_read(&pSMBFile->wrtPending) != 0)
696 && (timeout <= 50000)) {
697 cERROR(1, ("writes pending, delay free of handle"));
698 msleep(timeout);
699 timeout *= 8;
700 }
701 kfree(file->private_data);
702 file->private_data = NULL; 681 file->private_data = NULL;
703 } else 682 } else
704 rc = -EBADF; 683 rc = -EBADF;
@@ -1236,7 +1215,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1236 if (!open_file->invalidHandle) { 1215 if (!open_file->invalidHandle) {
1237 /* found a good file */ 1216 /* found a good file */
1238 /* lock it so it will not be closed on us */ 1217 /* lock it so it will not be closed on us */
1239 atomic_inc(&open_file->wrtPending); 1218 cifsFileInfo_get(open_file);
1240 read_unlock(&GlobalSMBSeslock); 1219 read_unlock(&GlobalSMBSeslock);
1241 return open_file; 1220 return open_file;
1242 } /* else might as well continue, and look for 1221 } /* else might as well continue, and look for
@@ -1276,7 +1255,7 @@ refind_writable:
1276 if (open_file->pfile && 1255 if (open_file->pfile &&
1277 ((open_file->pfile->f_flags & O_RDWR) || 1256 ((open_file->pfile->f_flags & O_RDWR) ||
1278 (open_file->pfile->f_flags & O_WRONLY))) { 1257 (open_file->pfile->f_flags & O_WRONLY))) {
1279 atomic_inc(&open_file->wrtPending); 1258 cifsFileInfo_get(open_file);
1280 1259
1281 if (!open_file->invalidHandle) { 1260 if (!open_file->invalidHandle) {
1282 /* found a good writable file */ 1261 /* found a good writable file */
@@ -1293,7 +1272,7 @@ refind_writable:
1293 else { /* start over in case this was deleted */ 1272 else { /* start over in case this was deleted */
1294 /* since the list could be modified */ 1273 /* since the list could be modified */
1295 read_lock(&GlobalSMBSeslock); 1274 read_lock(&GlobalSMBSeslock);
1296 atomic_dec(&open_file->wrtPending); 1275 cifsFileInfo_put(open_file);
1297 goto refind_writable; 1276 goto refind_writable;
1298 } 1277 }
1299 } 1278 }
@@ -1309,7 +1288,7 @@ refind_writable:
1309 read_lock(&GlobalSMBSeslock); 1288 read_lock(&GlobalSMBSeslock);
1310 /* can not use this handle, no write 1289 /* can not use this handle, no write
1311 pending on this one after all */ 1290 pending on this one after all */
1312 atomic_dec(&open_file->wrtPending); 1291 cifsFileInfo_put(open_file);
1313 1292
1314 if (open_file->closePend) /* list could have changed */ 1293 if (open_file->closePend) /* list could have changed */
1315 goto refind_writable; 1294 goto refind_writable;
@@ -1373,7 +1352,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1373 if (open_file) { 1352 if (open_file) {
1374 bytes_written = cifs_write(open_file->pfile, write_data, 1353 bytes_written = cifs_write(open_file->pfile, write_data,
1375 to-from, &offset); 1354 to-from, &offset);
1376 atomic_dec(&open_file->wrtPending); 1355 cifsFileInfo_put(open_file);
1377 /* Does mm or vfs already set times? */ 1356 /* Does mm or vfs already set times? */
1378 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); 1357 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1379 if ((bytes_written > 0) && (offset)) 1358 if ((bytes_written > 0) && (offset))
@@ -1562,7 +1541,7 @@ retry:
1562 bytes_to_write, offset, 1541 bytes_to_write, offset,
1563 &bytes_written, iov, n_iov, 1542 &bytes_written, iov, n_iov,
1564 long_op); 1543 long_op);
1565 atomic_dec(&open_file->wrtPending); 1544 cifsFileInfo_put(open_file);
1566 cifs_update_eof(cifsi, offset, bytes_written); 1545 cifs_update_eof(cifsi, offset, bytes_written);
1567 1546
1568 if (rc || bytes_written < bytes_to_write) { 1547 if (rc || bytes_written < bytes_to_write) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 82d83839655e..1f09c7619319 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -800,7 +800,7 @@ set_via_filehandle:
800 if (open_file == NULL) 800 if (open_file == NULL)
801 CIFSSMBClose(xid, pTcon, netfid); 801 CIFSSMBClose(xid, pTcon, netfid);
802 else 802 else
803 atomic_dec(&open_file->wrtPending); 803 cifsFileInfo_put(open_file);
804out: 804out:
805 return rc; 805 return rc;
806} 806}
@@ -1635,7 +1635,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1635 __u32 npid = open_file->pid; 1635 __u32 npid = open_file->pid;
1636 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, 1636 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
1637 npid, false); 1637 npid, false);
1638 atomic_dec(&open_file->wrtPending); 1638 cifsFileInfo_put(open_file);
1639 cFYI(1, ("SetFSize for attrs rc = %d", rc)); 1639 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1640 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1640 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1641 unsigned int bytes_written; 1641 unsigned int bytes_written;
@@ -1790,7 +1790,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1790 u16 nfid = open_file->netfid; 1790 u16 nfid = open_file->netfid;
1791 u32 npid = open_file->pid; 1791 u32 npid = open_file->pid;
1792 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); 1792 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
1793 atomic_dec(&open_file->wrtPending); 1793 cifsFileInfo_put(open_file);
1794 } else { 1794 } else {
1795 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, 1795 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
1796 cifs_sb->local_nls, 1796 cifs_sb->local_nls,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0ad3e2d116a6..1da4ab250eae 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,20 +119,19 @@ AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon)
119 temp->pinode = pinode; 119 temp->pinode = pinode;
120 temp->tcon = tcon; 120 temp->tcon = tcon;
121 temp->netfid = fid; 121 temp->netfid = fid;
122 spin_lock(&GlobalMid_Lock); 122 spin_lock(&cifs_oplock_lock);
123 list_add_tail(&temp->qhead, &GlobalOplock_Q); 123 list_add_tail(&temp->qhead, &cifs_oplock_list);
124 spin_unlock(&GlobalMid_Lock); 124 spin_unlock(&cifs_oplock_lock);
125 } 125 }
126 return temp; 126 return temp;
127
128} 127}
129 128
130void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) 129void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry)
131{ 130{
132 spin_lock(&GlobalMid_Lock); 131 spin_lock(&cifs_oplock_lock);
133 /* should we check if list empty first? */ 132 /* should we check if list empty first? */
134 list_del(&oplockEntry->qhead); 133 list_del(&oplockEntry->qhead);
135 spin_unlock(&GlobalMid_Lock); 134 spin_unlock(&cifs_oplock_lock);
136 kmem_cache_free(cifs_oplock_cachep, oplockEntry); 135 kmem_cache_free(cifs_oplock_cachep, oplockEntry);
137} 136}
138 137
@@ -144,14 +143,14 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
144 if (tcon == NULL) 143 if (tcon == NULL)
145 return; 144 return;
146 145
147 spin_lock(&GlobalMid_Lock); 146 spin_lock(&cifs_oplock_lock);
148 list_for_each_entry(temp, &GlobalOplock_Q, qhead) { 147 list_for_each_entry(temp, &cifs_oplock_list, qhead) {
149 if ((temp->tcon) && (temp->tcon == tcon)) { 148 if ((temp->tcon) && (temp->tcon == tcon)) {
150 list_del(&temp->qhead); 149 list_del(&temp->qhead);
151 kmem_cache_free(cifs_oplock_cachep, temp); 150 kmem_cache_free(cifs_oplock_cachep, temp);
152 } 151 }
153 } 152 }
154 spin_unlock(&GlobalMid_Lock); 153 spin_unlock(&cifs_oplock_lock);
155} 154}
156 155
157static int 156static int
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ccc9d62c462d..55ea369f43a9 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -63,7 +63,7 @@ static int send_data(struct sk_buff *skb)
63 return rv; 63 return rv;
64 } 64 }
65 65
66 return genlmsg_unicast(skb, listener_nlpid); 66 return genlmsg_unicast(&init_net, skb, listener_nlpid);
67} 67}
68 68
69static int user_cmd(struct sk_buff *skb, struct genl_info *info) 69static int user_cmd(struct sk_buff *skb, struct genl_info *info)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e27130341d4f..1c1638f873a4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
482 unlock_buffer(bh); 482 unlock_buffer(bh);
483 mark_buffer_dirty_inode(bh, inode); 483 mark_buffer_dirty_inode(bh, inode);
484 /* We used to sync bh here if IS_SYNC(inode). 484 /* We used to sync bh here if IS_SYNC(inode).
485 * But we now rely upon generic_osync_inode() 485 * But we now rely upon generic_write_sync()
486 * and b_inode_buffers. But not for directories. 486 * and b_inode_buffers. But not for directories.
487 */ 487 */
488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 299253214789..388bbdfa0b4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
51 return 0; 51 return 0;
52} 52}
53 53
54static ssize_t
55ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos)
57{
58 struct file *file = iocb->ki_filp;
59 struct inode *inode = file->f_path.dentry->d_inode;
60 ssize_t ret;
61 int err;
62
63 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
64
65 /*
66 * Skip flushing if there was an error, or if nothing was written.
67 */
68 if (ret <= 0)
69 return ret;
70
71 /*
72 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
73 * journalling then we need to make sure that we force the transaction
74 * to disk to keep all metadata uptodate synchronously.
75 */
76 if (file->f_flags & O_SYNC) {
77 /*
78 * If we are non-data-journaled, then the dirty data has
79 * already been flushed to backing store by generic_osync_inode,
80 * and the inode has been flushed too if there have been any
81 * modifications other than mere timestamp updates.
82 *
83 * Open question --- do we care about flushing timestamps too
84 * if the inode is IS_SYNC?
85 */
86 if (!ext3_should_journal_data(inode))
87 return ret;
88
89 goto force_commit;
90 }
91
92 /*
93 * So we know that there has been no forced data flush. If the inode
94 * is marked IS_SYNC, we need to force one ourselves.
95 */
96 if (!IS_SYNC(inode))
97 return ret;
98
99 /*
100 * Open question #2 --- should we force data to disk here too? If we
101 * don't, the only impact is that data=writeback filesystems won't
102 * flush data to disk automatically on IS_SYNC, only metadata (but
103 * historically, that is what ext2 has done.)
104 */
105
106force_commit:
107 err = ext3_force_commit(inode->i_sb);
108 if (err)
109 return err;
110 return ret;
111}
112
113const struct file_operations ext3_file_operations = { 54const struct file_operations ext3_file_operations = {
114 .llseek = generic_file_llseek, 55 .llseek = generic_file_llseek,
115 .read = do_sync_read, 56 .read = do_sync_read,
116 .write = do_sync_write, 57 .write = do_sync_write,
117 .aio_read = generic_file_aio_read, 58 .aio_read = generic_file_aio_read,
118 .aio_write = ext3_file_write, 59 .aio_write = generic_file_aio_write,
119 .unlocked_ioctl = ext3_ioctl, 60 .unlocked_ioctl = ext3_ioctl,
120#ifdef CONFIG_COMPAT 61#ifdef CONFIG_COMPAT
121 .compat_ioctl = ext3_compat_ioctl, 62 .compat_ioctl = ext3_compat_ioctl,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 27f3c5354c0e..5ca3eca70a1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -58,10 +58,7 @@ static ssize_t
58ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 58ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
59 unsigned long nr_segs, loff_t pos) 59 unsigned long nr_segs, loff_t pos)
60{ 60{
61 struct file *file = iocb->ki_filp; 61 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
62 struct inode *inode = file->f_path.dentry->d_inode;
63 ssize_t ret;
64 int err;
65 62
66 /* 63 /*
67 * If we have encountered a bitmap-format file, the size limit 64 * If we have encountered a bitmap-format file, the size limit
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
81 } 78 }
82 } 79 }
83 80
84 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 81 return generic_file_aio_write(iocb, iov, nr_segs, pos);
85 /*
86 * Skip flushing if there was an error, or if nothing was written.
87 */
88 if (ret <= 0)
89 return ret;
90
91 /*
92 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
93 * journalling then we need to make sure that we force the transaction
94 * to disk to keep all metadata uptodate synchronously.
95 */
96 if (file->f_flags & O_SYNC) {
97 /*
98 * If we are non-data-journaled, then the dirty data has
99 * already been flushed to backing store by generic_osync_inode,
100 * and the inode has been flushed too if there have been any
101 * modifications other than mere timestamp updates.
102 *
103 * Open question --- do we care about flushing timestamps too
104 * if the inode is IS_SYNC?
105 */
106 if (!ext4_should_journal_data(inode))
107 return ret;
108
109 goto force_commit;
110 }
111
112 /*
113 * So we know that there has been no forced data flush. If the inode
114 * is marked IS_SYNC, we need to force one ourselves.
115 */
116 if (!IS_SYNC(inode))
117 return ret;
118
119 /*
120 * Open question #2 --- should we force data to disk here too? If we
121 * don't, the only impact is that data=writeback filesystems won't
122 * flush data to disk automatically on IS_SYNC, only metadata (but
123 * historically, that is what ext2 has done.)
124 */
125
126force_commit:
127 err = ext4_force_commit(inode->i_sb);
128 if (err)
129 return err;
130 return ret;
131} 82}
132 83
133static struct vm_operations_struct ext4_file_vm_ops = { 84static struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f042b965c95c..e8c159de236b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
176 176
177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
178 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
179 if (IS_SYNC(inode)) 179 if (IS_SYNC(inode)) {
180 err = sync_page_range_nolock(inode, mapping, start, count); 180 int err2;
181
182 /*
183 * Opencode syncing since we don't have a file open to use
184 * standard fsync path.
185 */
186 err = filemap_fdatawrite_range(mapping, start,
187 start + count - 1);
188 err2 = sync_mapping_buffers(mapping);
189 if (!err)
190 err = err2;
191 err2 = write_inode_now(inode, 1);
192 if (!err)
193 err = err2;
194 if (!err) {
195 err = filemap_fdatawait_range(mapping, start,
196 start + count - 1);
197 }
198 }
181out: 199out:
182 return err; 200 return err;
183} 201}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index a6c20473dfd7..4e35be873e09 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
119 MSDOS_I(inode)->i_start = new_dclus; 119 MSDOS_I(inode)->i_start = new_dclus;
120 MSDOS_I(inode)->i_logstart = new_dclus; 120 MSDOS_I(inode)->i_logstart = new_dclus;
121 /* 121 /*
122 * Since generic_osync_inode() synchronize later if 122 * Since generic_write_sync() synchronizes regular files later,
123 * this is not directory, we don't here. 123 * we sync here only directories.
124 */ 124 */
125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { 125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
126 ret = fat_sync_inode(inode); 126 ret = fat_sync_inode(inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index da86ef58e427..8e1e5e19d21e 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -35,21 +35,29 @@
35int nr_pdflush_threads; 35int nr_pdflush_threads;
36 36
37/* 37/*
38 * Passed into wb_writeback(), essentially a subset of writeback_control
39 */
40struct wb_writeback_args {
41 long nr_pages;
42 struct super_block *sb;
43 enum writeback_sync_modes sync_mode;
44 int for_kupdate;
45 int range_cyclic;
46};
47
48/*
38 * Work items for the bdi_writeback threads 49 * Work items for the bdi_writeback threads
39 */ 50 */
40struct bdi_work { 51struct bdi_work {
41 struct list_head list; 52 struct list_head list; /* pending work list */
42 struct list_head wait_list; 53 struct rcu_head rcu_head; /* for RCU free/clear of work */
43 struct rcu_head rcu_head;
44 54
45 unsigned long seen; 55 unsigned long seen; /* threads that have seen this work */
46 atomic_t pending; 56 atomic_t pending; /* number of threads still to do work */
47 57
48 struct super_block *sb; 58 struct wb_writeback_args args; /* writeback arguments */
49 unsigned long nr_pages;
50 enum writeback_sync_modes sync_mode;
51 59
52 unsigned long state; 60 unsigned long state; /* flag bits, see WS_* */
53}; 61};
54 62
55enum { 63enum {
@@ -66,22 +74,13 @@ static inline bool bdi_work_on_stack(struct bdi_work *work)
66} 74}
67 75
68static inline void bdi_work_init(struct bdi_work *work, 76static inline void bdi_work_init(struct bdi_work *work,
69 struct writeback_control *wbc) 77 struct wb_writeback_args *args)
70{ 78{
71 INIT_RCU_HEAD(&work->rcu_head); 79 INIT_RCU_HEAD(&work->rcu_head);
72 work->sb = wbc->sb; 80 work->args = *args;
73 work->nr_pages = wbc->nr_to_write;
74 work->sync_mode = wbc->sync_mode;
75 work->state = WS_USED; 81 work->state = WS_USED;
76} 82}
77 83
78static inline void bdi_work_init_on_stack(struct bdi_work *work,
79 struct writeback_control *wbc)
80{
81 bdi_work_init(work, wbc);
82 work->state |= WS_ONSTACK;
83}
84
85/** 84/**
86 * writeback_in_progress - determine whether there is writeback in progress 85 * writeback_in_progress - determine whether there is writeback in progress
87 * @bdi: the device's backing_dev_info structure. 86 * @bdi: the device's backing_dev_info structure.
@@ -98,6 +97,11 @@ static void bdi_work_clear(struct bdi_work *work)
98{ 97{
99 clear_bit(WS_USED_B, &work->state); 98 clear_bit(WS_USED_B, &work->state);
100 smp_mb__after_clear_bit(); 99 smp_mb__after_clear_bit();
100 /*
101 * work can have disappeared at this point. bit waitq functions
102 * should be able to tolerate this, provided bdi_sched_wait does
103 * not dereference it's pointer argument.
104 */
101 wake_up_bit(&work->state, WS_USED_B); 105 wake_up_bit(&work->state, WS_USED_B);
102} 106}
103 107
@@ -113,7 +117,8 @@ static void bdi_work_free(struct rcu_head *head)
113 117
114static void wb_work_complete(struct bdi_work *work) 118static void wb_work_complete(struct bdi_work *work)
115{ 119{
116 const enum writeback_sync_modes sync_mode = work->sync_mode; 120 const enum writeback_sync_modes sync_mode = work->args.sync_mode;
121 int onstack = bdi_work_on_stack(work);
117 122
118 /* 123 /*
119 * For allocated work, we can clear the done/seen bit right here. 124 * For allocated work, we can clear the done/seen bit right here.
@@ -121,9 +126,9 @@ static void wb_work_complete(struct bdi_work *work)
121 * to after the RCU grace period, since the stack could be invalidated 126 * to after the RCU grace period, since the stack could be invalidated
122 * as soon as bdi_work_clear() has done the wakeup. 127 * as soon as bdi_work_clear() has done the wakeup.
123 */ 128 */
124 if (!bdi_work_on_stack(work)) 129 if (!onstack)
125 bdi_work_clear(work); 130 bdi_work_clear(work);
126 if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work)) 131 if (sync_mode == WB_SYNC_NONE || onstack)
127 call_rcu(&work->rcu_head, bdi_work_free); 132 call_rcu(&work->rcu_head, bdi_work_free);
128} 133}
129 134
@@ -146,21 +151,19 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
146 151
147static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) 152static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
148{ 153{
149 if (work) { 154 work->seen = bdi->wb_mask;
150 work->seen = bdi->wb_mask; 155 BUG_ON(!work->seen);
151 BUG_ON(!work->seen); 156 atomic_set(&work->pending, bdi->wb_cnt);
152 atomic_set(&work->pending, bdi->wb_cnt); 157 BUG_ON(!bdi->wb_cnt);
153 BUG_ON(!bdi->wb_cnt);
154
155 /*
156 * Make sure stores are seen before it appears on the list
157 */
158 smp_mb();
159 158
160 spin_lock(&bdi->wb_lock); 159 /*
161 list_add_tail_rcu(&work->list, &bdi->work_list); 160 * list_add_tail_rcu() contains the necessary barriers to
162 spin_unlock(&bdi->wb_lock); 161 * make sure the above stores are seen before the item is
163 } 162 * noticed on the list
163 */
164 spin_lock(&bdi->wb_lock);
165 list_add_tail_rcu(&work->list, &bdi->work_list);
166 spin_unlock(&bdi->wb_lock);
164 167
165 /* 168 /*
166 * If the default thread isn't there, make sure we add it. When 169 * If the default thread isn't there, make sure we add it. When
@@ -171,15 +174,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
171 else { 174 else {
172 struct bdi_writeback *wb = &bdi->wb; 175 struct bdi_writeback *wb = &bdi->wb;
173 176
174 /* 177 if (wb->task)
175 * If we failed allocating the bdi work item, wake up the wb
176 * thread always. As a safety precaution, it'll flush out
177 * everything
178 */
179 if (!wb_has_dirty_io(wb)) {
180 if (work)
181 wb_clear_pending(wb, work);
182 } else if (wb->task)
183 wake_up_process(wb->task); 178 wake_up_process(wb->task);
184 } 179 }
185} 180}
@@ -194,48 +189,75 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
194 TASK_UNINTERRUPTIBLE); 189 TASK_UNINTERRUPTIBLE);
195} 190}
196 191
197static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc) 192static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
193 struct wb_writeback_args *args)
198{ 194{
199 struct bdi_work *work; 195 struct bdi_work *work;
200 196
197 /*
198 * This is WB_SYNC_NONE writeback, so if allocation fails just
199 * wakeup the thread for old dirty data writeback
200 */
201 work = kmalloc(sizeof(*work), GFP_ATOMIC); 201 work = kmalloc(sizeof(*work), GFP_ATOMIC);
202 if (work) 202 if (work) {
203 bdi_work_init(work, wbc); 203 bdi_work_init(work, args);
204 bdi_queue_work(bdi, work);
205 } else {
206 struct bdi_writeback *wb = &bdi->wb;
204 207
205 return work; 208 if (wb->task)
209 wake_up_process(wb->task);
210 }
206} 211}
207 212
208void bdi_start_writeback(struct writeback_control *wbc) 213/**
214 * bdi_sync_writeback - start and wait for writeback
215 * @bdi: the backing device to write from
216 * @sb: write inodes from this super_block
217 *
218 * Description:
219 * This does WB_SYNC_ALL data integrity writeback and waits for the
220 * IO to complete. Callers must hold the sb s_umount semaphore for
221 * reading, to avoid having the super disappear before we are done.
222 */
223static void bdi_sync_writeback(struct backing_dev_info *bdi,
224 struct super_block *sb)
209{ 225{
210 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; 226 struct wb_writeback_args args = {
211 struct bdi_work work_stack, *work = NULL; 227 .sb = sb,
228 .sync_mode = WB_SYNC_ALL,
229 .nr_pages = LONG_MAX,
230 .range_cyclic = 0,
231 };
232 struct bdi_work work;
212 233
213 if (!must_wait) 234 bdi_work_init(&work, &args);
214 work = bdi_alloc_work(wbc); 235 work.state |= WS_ONSTACK;
215 236
216 if (!work) { 237 bdi_queue_work(bdi, &work);
217 work = &work_stack; 238 bdi_wait_on_work_clear(&work);
218 bdi_work_init_on_stack(work, wbc); 239}
219 }
220 240
221 bdi_queue_work(wbc->bdi, work); 241/**
242 * bdi_start_writeback - start writeback
243 * @bdi: the backing device to write from
244 * @nr_pages: the number of pages to write
245 *
246 * Description:
247 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
248 * started when this function returns, we make no guarentees on
249 * completion. Caller need not hold sb s_umount semaphore.
250 *
251 */
252void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
253{
254 struct wb_writeback_args args = {
255 .sync_mode = WB_SYNC_NONE,
256 .nr_pages = nr_pages,
257 .range_cyclic = 1,
258 };
222 259
223 /* 260 bdi_alloc_queue_work(bdi, &args);
224 * If the sync mode is WB_SYNC_ALL, block waiting for the work to
225 * complete. If not, we only need to wait for the work to be started,
226 * if we allocated it on-stack. We use the same mechanism, if the
227 * wait bit is set in the bdi_work struct, then threads will not
228 * clear pending until after they are done.
229 *
230 * Note that work == &work_stack if must_wait is true, so we don't
231 * need to do call_rcu() here ever, since the completion path will
232 * have done that for us.
233 */
234 if (must_wait || work == &work_stack) {
235 bdi_wait_on_work_clear(work);
236 if (work != &work_stack)
237 call_rcu(&work->rcu_head, bdi_work_free);
238 }
239} 261}
240 262
241/* 263/*
@@ -671,17 +693,16 @@ static inline bool over_bground_thresh(void)
671 * older_than_this takes precedence over nr_to_write. So we'll only write back 693 * older_than_this takes precedence over nr_to_write. So we'll only write back
672 * all dirty pages if they are all attached to "old" mappings. 694 * all dirty pages if they are all attached to "old" mappings.
673 */ 695 */
674static long wb_writeback(struct bdi_writeback *wb, long nr_pages, 696static long wb_writeback(struct bdi_writeback *wb,
675 struct super_block *sb, 697 struct wb_writeback_args *args)
676 enum writeback_sync_modes sync_mode, int for_kupdate)
677{ 698{
678 struct writeback_control wbc = { 699 struct writeback_control wbc = {
679 .bdi = wb->bdi, 700 .bdi = wb->bdi,
680 .sb = sb, 701 .sb = args->sb,
681 .sync_mode = sync_mode, 702 .sync_mode = args->sync_mode,
682 .older_than_this = NULL, 703 .older_than_this = NULL,
683 .for_kupdate = for_kupdate, 704 .for_kupdate = args->for_kupdate,
684 .range_cyclic = 1, 705 .range_cyclic = args->range_cyclic,
685 }; 706 };
686 unsigned long oldest_jif; 707 unsigned long oldest_jif;
687 long wrote = 0; 708 long wrote = 0;
@@ -691,13 +712,18 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
691 oldest_jif = jiffies - 712 oldest_jif = jiffies -
692 msecs_to_jiffies(dirty_expire_interval * 10); 713 msecs_to_jiffies(dirty_expire_interval * 10);
693 } 714 }
715 if (!wbc.range_cyclic) {
716 wbc.range_start = 0;
717 wbc.range_end = LLONG_MAX;
718 }
694 719
695 for (;;) { 720 for (;;) {
696 /* 721 /*
697 * Don't flush anything for non-integrity writeback where 722 * Don't flush anything for non-integrity writeback where
698 * no nr_pages was given 723 * no nr_pages was given
699 */ 724 */
700 if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE) 725 if (!args->for_kupdate && args->nr_pages <= 0 &&
726 args->sync_mode == WB_SYNC_NONE)
701 break; 727 break;
702 728
703 /* 729 /*
@@ -705,7 +731,8 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
705 * periodic background writeout and we are below the 731 * periodic background writeout and we are below the
706 * background dirty threshold, don't do anything 732 * background dirty threshold, don't do anything
707 */ 733 */
708 if (for_kupdate && nr_pages <= 0 && !over_bground_thresh()) 734 if (args->for_kupdate && args->nr_pages <= 0 &&
735 !over_bground_thresh())
709 break; 736 break;
710 737
711 wbc.more_io = 0; 738 wbc.more_io = 0;
@@ -713,7 +740,7 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
713 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 740 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
714 wbc.pages_skipped = 0; 741 wbc.pages_skipped = 0;
715 writeback_inodes_wb(wb, &wbc); 742 writeback_inodes_wb(wb, &wbc);
716 nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 743 args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
717 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 744 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
718 745
719 /* 746 /*
@@ -731,7 +758,11 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
731 758
732/* 759/*
733 * Return the next bdi_work struct that hasn't been processed by this 760 * Return the next bdi_work struct that hasn't been processed by this
734 * wb thread yet 761 * wb thread yet. ->seen is initially set for each thread that exists
762 * for this device, when a thread first notices a piece of work it
763 * clears its bit. Depending on writeback type, the thread will notify
764 * completion on either receiving the work (WB_SYNC_NONE) or after
765 * it is done (WB_SYNC_ALL).
735 */ 766 */
736static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, 767static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
737 struct bdi_writeback *wb) 768 struct bdi_writeback *wb)
@@ -741,8 +772,9 @@ static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
741 rcu_read_lock(); 772 rcu_read_lock();
742 773
743 list_for_each_entry_rcu(work, &bdi->work_list, list) { 774 list_for_each_entry_rcu(work, &bdi->work_list, list) {
744 if (!test_and_clear_bit(wb->nr, &work->seen)) 775 if (!test_bit(wb->nr, &work->seen))
745 continue; 776 continue;
777 clear_bit(wb->nr, &work->seen);
746 778
747 ret = work; 779 ret = work;
748 break; 780 break;
@@ -767,8 +799,16 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
767 global_page_state(NR_UNSTABLE_NFS) + 799 global_page_state(NR_UNSTABLE_NFS) +
768 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 800 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
769 801
770 if (nr_pages) 802 if (nr_pages) {
771 return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1); 803 struct wb_writeback_args args = {
804 .nr_pages = nr_pages,
805 .sync_mode = WB_SYNC_NONE,
806 .for_kupdate = 1,
807 .range_cyclic = 1,
808 };
809
810 return wb_writeback(wb, &args);
811 }
772 812
773 return 0; 813 return 0;
774} 814}
@@ -780,35 +820,31 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
780{ 820{
781 struct backing_dev_info *bdi = wb->bdi; 821 struct backing_dev_info *bdi = wb->bdi;
782 struct bdi_work *work; 822 struct bdi_work *work;
783 long nr_pages, wrote = 0; 823 long wrote = 0;
784 824
785 while ((work = get_next_work_item(bdi, wb)) != NULL) { 825 while ((work = get_next_work_item(bdi, wb)) != NULL) {
786 enum writeback_sync_modes sync_mode; 826 struct wb_writeback_args args = work->args;
787
788 nr_pages = work->nr_pages;
789 827
790 /* 828 /*
791 * Override sync mode, in case we must wait for completion 829 * Override sync mode, in case we must wait for completion
792 */ 830 */
793 if (force_wait) 831 if (force_wait)
794 work->sync_mode = sync_mode = WB_SYNC_ALL; 832 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
795 else
796 sync_mode = work->sync_mode;
797 833
798 /* 834 /*
799 * If this isn't a data integrity operation, just notify 835 * If this isn't a data integrity operation, just notify
800 * that we have seen this work and we are now starting it. 836 * that we have seen this work and we are now starting it.
801 */ 837 */
802 if (sync_mode == WB_SYNC_NONE) 838 if (args.sync_mode == WB_SYNC_NONE)
803 wb_clear_pending(wb, work); 839 wb_clear_pending(wb, work);
804 840
805 wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0); 841 wrote += wb_writeback(wb, &args);
806 842
807 /* 843 /*
808 * This is a data integrity writeback, so only do the 844 * This is a data integrity writeback, so only do the
809 * notification when we have completed the work. 845 * notification when we have completed the work.
810 */ 846 */
811 if (sync_mode == WB_SYNC_ALL) 847 if (args.sync_mode == WB_SYNC_ALL)
812 wb_clear_pending(wb, work); 848 wb_clear_pending(wb, work);
813 } 849 }
814 850
@@ -849,8 +885,7 @@ int bdi_writeback_task(struct bdi_writeback *wb)
849 } 885 }
850 886
851 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); 887 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
852 set_current_state(TASK_INTERRUPTIBLE); 888 schedule_timeout_interruptible(wait_jiffies);
853 schedule_timeout(wait_jiffies);
854 try_to_freeze(); 889 try_to_freeze();
855 } 890 }
856 891
@@ -858,67 +893,28 @@ int bdi_writeback_task(struct bdi_writeback *wb)
858} 893}
859 894
860/* 895/*
861 * Schedule writeback for all backing devices. Expensive! If this is a data 896 * Schedule writeback for all backing devices. This does WB_SYNC_NONE
862 * integrity operation, writeback will be complete when this returns. If 897 * writeback, for integrity writeback see bdi_sync_writeback().
863 * we are simply called for WB_SYNC_NONE, then writeback will merely be
864 * scheduled to run.
865 */ 898 */
866static void bdi_writeback_all(struct writeback_control *wbc) 899static void bdi_writeback_all(struct super_block *sb, long nr_pages)
867{ 900{
868 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; 901 struct wb_writeback_args args = {
902 .sb = sb,
903 .nr_pages = nr_pages,
904 .sync_mode = WB_SYNC_NONE,
905 };
869 struct backing_dev_info *bdi; 906 struct backing_dev_info *bdi;
870 struct bdi_work *work;
871 LIST_HEAD(list);
872 907
873restart: 908 rcu_read_lock();
874 spin_lock(&bdi_lock);
875
876 list_for_each_entry(bdi, &bdi_list, bdi_list) {
877 struct bdi_work *work;
878 909
910 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
879 if (!bdi_has_dirty_io(bdi)) 911 if (!bdi_has_dirty_io(bdi))
880 continue; 912 continue;
881 913
882 /* 914 bdi_alloc_queue_work(bdi, &args);
883 * If work allocation fails, do the writes inline. We drop
884 * the lock and restart the list writeout. This should be OK,
885 * since this happens rarely and because the writeout should
886 * eventually make more free memory available.
887 */
888 work = bdi_alloc_work(wbc);
889 if (!work) {
890 struct writeback_control __wbc;
891
892 /*
893 * Not a data integrity writeout, just continue
894 */
895 if (!must_wait)
896 continue;
897
898 spin_unlock(&bdi_lock);
899 __wbc = *wbc;
900 __wbc.bdi = bdi;
901 writeback_inodes_wbc(&__wbc);
902 goto restart;
903 }
904 if (must_wait)
905 list_add_tail(&work->wait_list, &list);
906
907 bdi_queue_work(bdi, work);
908 } 915 }
909 916
910 spin_unlock(&bdi_lock); 917 rcu_read_unlock();
911
912 /*
913 * If this is for WB_SYNC_ALL, wait for pending work to complete
914 * before returning.
915 */
916 while (!list_empty(&list)) {
917 work = list_entry(list.next, struct bdi_work, wait_list);
918 list_del(&work->wait_list);
919 bdi_wait_on_work_clear(work);
920 call_rcu(&work->rcu_head, bdi_work_free);
921 }
922} 918}
923 919
924/* 920/*
@@ -927,17 +923,10 @@ restart:
927 */ 923 */
928void wakeup_flusher_threads(long nr_pages) 924void wakeup_flusher_threads(long nr_pages)
929{ 925{
930 struct writeback_control wbc = {
931 .sync_mode = WB_SYNC_NONE,
932 .older_than_this = NULL,
933 .range_cyclic = 1,
934 };
935
936 if (nr_pages == 0) 926 if (nr_pages == 0)
937 nr_pages = global_page_state(NR_FILE_DIRTY) + 927 nr_pages = global_page_state(NR_FILE_DIRTY) +
938 global_page_state(NR_UNSTABLE_NFS); 928 global_page_state(NR_UNSTABLE_NFS);
939 wbc.nr_to_write = nr_pages; 929 bdi_writeback_all(NULL, nr_pages);
940 bdi_writeback_all(&wbc);
941} 930}
942 931
943static noinline void block_dump___mark_inode_dirty(struct inode *inode) 932static noinline void block_dump___mark_inode_dirty(struct inode *inode)
@@ -1084,7 +1073,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
1084 * on the writer throttling path, and we get decent balancing between many 1073 * on the writer throttling path, and we get decent balancing between many
1085 * throttled threads: we don't want them all piling up on inode_sync_wait. 1074 * throttled threads: we don't want them all piling up on inode_sync_wait.
1086 */ 1075 */
1087static void wait_sb_inodes(struct writeback_control *wbc) 1076static void wait_sb_inodes(struct super_block *sb)
1088{ 1077{
1089 struct inode *inode, *old_inode = NULL; 1078 struct inode *inode, *old_inode = NULL;
1090 1079
@@ -1092,7 +1081,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
1092 * We need to be protected against the filesystem going from 1081 * We need to be protected against the filesystem going from
1093 * r/o to r/w or vice versa. 1082 * r/o to r/w or vice versa.
1094 */ 1083 */
1095 WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); 1084 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1096 1085
1097 spin_lock(&inode_lock); 1086 spin_lock(&inode_lock);
1098 1087
@@ -1103,7 +1092,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
1103 * In which case, the inode may not be on the dirty list, but 1092 * In which case, the inode may not be on the dirty list, but
1104 * we still have to wait for that writeout. 1093 * we still have to wait for that writeout.
1105 */ 1094 */
1106 list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { 1095 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1107 struct address_space *mapping; 1096 struct address_space *mapping;
1108 1097
1109 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 1098 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
@@ -1143,14 +1132,8 @@ static void wait_sb_inodes(struct writeback_control *wbc)
1143 * for IO completion of submitted IO. The number of pages submitted is 1132 * for IO completion of submitted IO. The number of pages submitted is
1144 * returned. 1133 * returned.
1145 */ 1134 */
1146long writeback_inodes_sb(struct super_block *sb) 1135void writeback_inodes_sb(struct super_block *sb)
1147{ 1136{
1148 struct writeback_control wbc = {
1149 .sb = sb,
1150 .sync_mode = WB_SYNC_NONE,
1151 .range_start = 0,
1152 .range_end = LLONG_MAX,
1153 };
1154 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1137 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1155 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 1138 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1156 long nr_to_write; 1139 long nr_to_write;
@@ -1158,9 +1141,7 @@ long writeback_inodes_sb(struct super_block *sb)
1158 nr_to_write = nr_dirty + nr_unstable + 1141 nr_to_write = nr_dirty + nr_unstable +
1159 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1142 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1160 1143
1161 wbc.nr_to_write = nr_to_write; 1144 bdi_writeback_all(sb, nr_to_write);
1162 bdi_writeback_all(&wbc);
1163 return nr_to_write - wbc.nr_to_write;
1164} 1145}
1165EXPORT_SYMBOL(writeback_inodes_sb); 1146EXPORT_SYMBOL(writeback_inodes_sb);
1166 1147
@@ -1171,20 +1152,10 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1171 * This function writes and waits on any dirty inode belonging to this 1152 * This function writes and waits on any dirty inode belonging to this
1172 * super_block. The number of pages synced is returned. 1153 * super_block. The number of pages synced is returned.
1173 */ 1154 */
1174long sync_inodes_sb(struct super_block *sb) 1155void sync_inodes_sb(struct super_block *sb)
1175{ 1156{
1176 struct writeback_control wbc = { 1157 bdi_sync_writeback(sb->s_bdi, sb);
1177 .sb = sb, 1158 wait_sb_inodes(sb);
1178 .sync_mode = WB_SYNC_ALL,
1179 .range_start = 0,
1180 .range_end = LLONG_MAX,
1181 };
1182 long nr_to_write = LONG_MAX; /* doesn't actually matter */
1183
1184 wbc.nr_to_write = nr_to_write;
1185 bdi_writeback_all(&wbc);
1186 wait_sb_inodes(&wbc);
1187 return nr_to_write - wbc.nr_to_write;
1188} 1159}
1189EXPORT_SYMBOL(sync_inodes_sb); 1160EXPORT_SYMBOL(sync_inodes_sb);
1190 1161
@@ -1242,57 +1213,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1242 return ret; 1213 return ret;
1243} 1214}
1244EXPORT_SYMBOL(sync_inode); 1215EXPORT_SYMBOL(sync_inode);
1245
1246/**
1247 * generic_osync_inode - flush all dirty data for a given inode to disk
1248 * @inode: inode to write
1249 * @mapping: the address_space that should be flushed
1250 * @what: what to write and wait upon
1251 *
1252 * This can be called by file_write functions for files which have the
1253 * O_SYNC flag set, to flush dirty writes to disk.
1254 *
1255 * @what is a bitmask, specifying which part of the inode's data should be
1256 * written and waited upon.
1257 *
1258 * OSYNC_DATA: i_mapping's dirty data
1259 * OSYNC_METADATA: the buffers at i_mapping->private_list
1260 * OSYNC_INODE: the inode itself
1261 */
1262
1263int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
1264{
1265 int err = 0;
1266 int need_write_inode_now = 0;
1267 int err2;
1268
1269 if (what & OSYNC_DATA)
1270 err = filemap_fdatawrite(mapping);
1271 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
1272 err2 = sync_mapping_buffers(mapping);
1273 if (!err)
1274 err = err2;
1275 }
1276 if (what & OSYNC_DATA) {
1277 err2 = filemap_fdatawait(mapping);
1278 if (!err)
1279 err = err2;
1280 }
1281
1282 spin_lock(&inode_lock);
1283 if ((inode->i_state & I_DIRTY) &&
1284 ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
1285 need_write_inode_now = 1;
1286 spin_unlock(&inode_lock);
1287
1288 if (need_write_inode_now) {
1289 err2 = write_inode_now(inode, 1);
1290 if (!err)
1291 err = err2;
1292 }
1293 else
1294 inode_sync_wait(inode);
1295
1296 return err;
1297}
1298EXPORT_SYMBOL(generic_osync_inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4567db6f9430..e5dbecd87b0f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -894,6 +894,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
894 if (err) 894 if (err)
895 goto err_put_conn; 895 goto err_put_conn;
896 896
897 sb->s_bdi = &fc->bdi;
898
897 /* Handle umasking inside the fuse code */ 899 /* Handle umasking inside the fuse code */
898 if (sb->s_flags & MS_POSIXACL) 900 if (sb->s_flags & MS_POSIXACL)
899 fc->dont_mask = 1; 901 fc->dont_mask = 1;
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 3da2f1f4f738..21f7e46da4c0 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,6 +1,6 @@
1EXTRA_CFLAGS := -I$(src) 1EXTRA_CFLAGS := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o inode.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o ops_inode.o quota.o \
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index fa881bdc3d85..3fc4e3ac7d84 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -19,8 +19,7 @@
19#include "gfs2.h" 19#include "gfs2.h"
20#include "incore.h" 20#include "incore.h"
21#include "acl.h" 21#include "acl.h"
22#include "eaops.h" 22#include "xattr.h"
23#include "eattr.h"
24#include "glock.h" 23#include "glock.h"
25#include "inode.h" 24#include "inode.h"
26#include "meta_io.h" 25#include "meta_io.h"
@@ -31,8 +30,7 @@
31#define ACL_DEFAULT 0 30#define ACL_DEFAULT 0
32 31
33int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, 32int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
34 struct gfs2_ea_request *er, 33 struct gfs2_ea_request *er, int *remove, mode_t *mode)
35 int *remove, mode_t *mode)
36{ 34{
37 struct posix_acl *acl; 35 struct posix_acl *acl;
38 int error; 36 int error;
@@ -83,30 +81,20 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
83 return 0; 81 return 0;
84} 82}
85 83
86static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, 84static int acl_get(struct gfs2_inode *ip, const char *name,
87 struct gfs2_ea_location *el, char **data, unsigned int *len) 85 struct posix_acl **acl, struct gfs2_ea_location *el,
86 char **datap, unsigned int *lenp)
88{ 87{
89 struct gfs2_ea_request er; 88 char *data;
90 struct gfs2_ea_location el_this; 89 unsigned int len;
91 int error; 90 int error;
92 91
92 el->el_bh = NULL;
93
93 if (!ip->i_eattr) 94 if (!ip->i_eattr)
94 return 0; 95 return 0;
95 96
96 memset(&er, 0, sizeof(struct gfs2_ea_request)); 97 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el);
97 if (access) {
98 er.er_name = GFS2_POSIX_ACL_ACCESS;
99 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
100 } else {
101 er.er_name = GFS2_POSIX_ACL_DEFAULT;
102 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
103 }
104 er.er_type = GFS2_EATYPE_SYS;
105
106 if (!el)
107 el = &el_this;
108
109 error = gfs2_ea_find(ip, &er, el);
110 if (error) 98 if (error)
111 return error; 99 return error;
112 if (!el->el_ea) 100 if (!el->el_ea)
@@ -114,32 +102,31 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
114 if (!GFS2_EA_DATA_LEN(el->el_ea)) 102 if (!GFS2_EA_DATA_LEN(el->el_ea))
115 goto out; 103 goto out;
116 104
117 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); 105 len = GFS2_EA_DATA_LEN(el->el_ea);
118 er.er_data = kmalloc(er.er_data_len, GFP_NOFS); 106 data = kmalloc(len, GFP_NOFS);
119 error = -ENOMEM; 107 error = -ENOMEM;
120 if (!er.er_data) 108 if (!data)
121 goto out; 109 goto out;
122 110
123 error = gfs2_ea_get_copy(ip, el, er.er_data); 111 error = gfs2_ea_get_copy(ip, el, data, len);
124 if (error) 112 if (error < 0)
125 goto out_kfree; 113 goto out_kfree;
114 error = 0;
126 115
127 if (acl) { 116 if (acl) {
128 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len); 117 *acl = posix_acl_from_xattr(data, len);
129 if (IS_ERR(*acl)) 118 if (IS_ERR(*acl))
130 error = PTR_ERR(*acl); 119 error = PTR_ERR(*acl);
131 } 120 }
132 121
133out_kfree: 122out_kfree:
134 if (error || !data) 123 if (error || !datap) {
135 kfree(er.er_data); 124 kfree(data);
136 else { 125 } else {
137 *data = er.er_data; 126 *datap = data;
138 *len = er.er_data_len; 127 *lenp = len;
139 } 128 }
140out: 129out:
141 if (error || el == &el_this)
142 brelse(el->el_bh);
143 return error; 130 return error;
144} 131}
145 132
@@ -153,10 +140,12 @@ out:
153 140
154int gfs2_check_acl(struct inode *inode, int mask) 141int gfs2_check_acl(struct inode *inode, int mask)
155{ 142{
143 struct gfs2_ea_location el;
156 struct posix_acl *acl = NULL; 144 struct posix_acl *acl = NULL;
157 int error; 145 int error;
158 146
159 error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL); 147 error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL);
148 brelse(el.el_bh);
160 if (error) 149 if (error)
161 return error; 150 return error;
162 151
@@ -196,10 +185,12 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
196 185
197int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) 186int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
198{ 187{
188 struct gfs2_ea_location el;
199 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 189 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
200 struct posix_acl *acl = NULL, *clone; 190 struct posix_acl *acl = NULL, *clone;
201 struct gfs2_ea_request er;
202 mode_t mode = ip->i_inode.i_mode; 191 mode_t mode = ip->i_inode.i_mode;
192 char *data = NULL;
193 unsigned int len;
203 int error; 194 int error;
204 195
205 if (!sdp->sd_args.ar_posix_acl) 196 if (!sdp->sd_args.ar_posix_acl)
@@ -207,11 +198,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
207 if (S_ISLNK(ip->i_inode.i_mode)) 198 if (S_ISLNK(ip->i_inode.i_mode))
208 return 0; 199 return 0;
209 200
210 memset(&er, 0, sizeof(struct gfs2_ea_request)); 201 error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len);
211 er.er_type = GFS2_EATYPE_SYS; 202 brelse(el.el_bh);
212
213 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
214 &er.er_data, &er.er_data_len);
215 if (error) 203 if (error)
216 return error; 204 return error;
217 if (!acl) { 205 if (!acl) {
@@ -229,9 +217,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
229 acl = clone; 217 acl = clone;
230 218
231 if (S_ISDIR(ip->i_inode.i_mode)) { 219 if (S_ISDIR(ip->i_inode.i_mode)) {
232 er.er_name = GFS2_POSIX_ACL_DEFAULT; 220 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
233 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; 221 GFS2_POSIX_ACL_DEFAULT, data, len, 0);
234 error = gfs2_system_eaops.eo_set(ip, &er);
235 if (error) 222 if (error)
236 goto out; 223 goto out;
237 } 224 }
@@ -239,21 +226,19 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
239 error = posix_acl_create_masq(acl, &mode); 226 error = posix_acl_create_masq(acl, &mode);
240 if (error < 0) 227 if (error < 0)
241 goto out; 228 goto out;
242 if (error > 0) { 229 if (error == 0)
243 er.er_name = GFS2_POSIX_ACL_ACCESS; 230 goto munge;
244 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
245 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
246 er.er_mode = mode;
247 er.er_flags = GFS2_ERF_MODE;
248 error = gfs2_system_eaops.eo_set(ip, &er);
249 if (error)
250 goto out;
251 } else
252 munge_mode(ip, mode);
253 231
232 posix_acl_to_xattr(acl, data, len);
233 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
234 GFS2_POSIX_ACL_ACCESS, data, len, 0);
235 if (error)
236 goto out;
237munge:
238 error = munge_mode(ip, mode);
254out: 239out:
255 posix_acl_release(acl); 240 posix_acl_release(acl);
256 kfree(er.er_data); 241 kfree(data);
257 return error; 242 return error;
258} 243}
259 244
@@ -265,9 +250,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
265 unsigned int len; 250 unsigned int len;
266 int error; 251 int error;
267 252
268 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len); 253 error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len);
269 if (error) 254 if (error)
270 return error; 255 goto out_brelse;
271 if (!acl) 256 if (!acl)
272 return gfs2_setattr_simple(ip, attr); 257 return gfs2_setattr_simple(ip, attr);
273 258
@@ -286,8 +271,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
286 271
287out: 272out:
288 posix_acl_release(acl); 273 posix_acl_release(acl);
289 brelse(el.el_bh);
290 kfree(data); 274 kfree(data);
275out_brelse:
276 brelse(el.el_bh);
291 return error; 277 return error;
292} 278}
293 279
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 022c66cd5606..91beddadd388 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -107,8 +107,26 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
107 return 0; 107 return 0;
108} 108}
109 109
110static int gfs2_dentry_delete(struct dentry *dentry)
111{
112 struct gfs2_inode *ginode;
113
114 if (!dentry->d_inode)
115 return 0;
116
117 ginode = GFS2_I(dentry->d_inode);
118 if (!ginode->i_iopen_gh.gh_gl)
119 return 0;
120
121 if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
122 return 1;
123
124 return 0;
125}
126
110const struct dentry_operations gfs2_dops = { 127const struct dentry_operations gfs2_dops = {
111 .d_revalidate = gfs2_drevalidate, 128 .d_revalidate = gfs2_drevalidate,
112 .d_hash = gfs2_dhash, 129 .d_hash = gfs2_dhash,
130 .d_delete = gfs2_dentry_delete,
113}; 131};
114 132
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
deleted file mode 100644
index dee9b03e5b37..000000000000
--- a/fs/gfs2/eaops.c
+++ /dev/null
@@ -1,157 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/capability.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "acl.h"
22#include "eaops.h"
23#include "eattr.h"
24#include "util.h"
25
26/**
27 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
28 * @namep: ea name, possibly with type appended
29 *
30 * Returns: GFS2_EATYPE_XXX
31 */
32
33unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
34{
35 unsigned int type;
36
37 if (strncmp(name, "system.", 7) == 0) {
38 type = GFS2_EATYPE_SYS;
39 if (truncated_name)
40 *truncated_name = name + sizeof("system.") - 1;
41 } else if (strncmp(name, "user.", 5) == 0) {
42 type = GFS2_EATYPE_USR;
43 if (truncated_name)
44 *truncated_name = name + sizeof("user.") - 1;
45 } else if (strncmp(name, "security.", 9) == 0) {
46 type = GFS2_EATYPE_SECURITY;
47 if (truncated_name)
48 *truncated_name = name + sizeof("security.") - 1;
49 } else {
50 type = GFS2_EATYPE_UNUSED;
51 if (truncated_name)
52 *truncated_name = NULL;
53 }
54
55 return type;
56}
57
58static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
59{
60 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
61 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
62 !capable(CAP_SYS_ADMIN))
63 return -EPERM;
64
65 if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 &&
66 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
67 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
68 return -EOPNOTSUPP;
69
70 return gfs2_ea_get_i(ip, er);
71}
72
73static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
74{
75 int remove = 0;
76 int error;
77
78 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
79 if (!(er->er_flags & GFS2_ERF_MODE)) {
80 er->er_mode = ip->i_inode.i_mode;
81 er->er_flags |= GFS2_ERF_MODE;
82 }
83 error = gfs2_acl_validate_set(ip, 1, er,
84 &remove, &er->er_mode);
85 if (error)
86 return error;
87 error = gfs2_ea_set_i(ip, er);
88 if (error)
89 return error;
90 if (remove)
91 gfs2_ea_remove_i(ip, er);
92 return 0;
93
94 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
95 error = gfs2_acl_validate_set(ip, 0, er,
96 &remove, NULL);
97 if (error)
98 return error;
99 if (!remove)
100 error = gfs2_ea_set_i(ip, er);
101 else {
102 error = gfs2_ea_remove_i(ip, er);
103 if (error == -ENODATA)
104 error = 0;
105 }
106 return error;
107 }
108
109 return -EPERM;
110}
111
112static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
113{
114 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
115 int error = gfs2_acl_validate_remove(ip, 1);
116 if (error)
117 return error;
118
119 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
120 int error = gfs2_acl_validate_remove(ip, 0);
121 if (error)
122 return error;
123
124 } else
125 return -EPERM;
126
127 return gfs2_ea_remove_i(ip, er);
128}
129
130static const struct gfs2_eattr_operations gfs2_user_eaops = {
131 .eo_get = gfs2_ea_get_i,
132 .eo_set = gfs2_ea_set_i,
133 .eo_remove = gfs2_ea_remove_i,
134 .eo_name = "user",
135};
136
137const struct gfs2_eattr_operations gfs2_system_eaops = {
138 .eo_get = system_eo_get,
139 .eo_set = system_eo_set,
140 .eo_remove = system_eo_remove,
141 .eo_name = "system",
142};
143
144static const struct gfs2_eattr_operations gfs2_security_eaops = {
145 .eo_get = gfs2_ea_get_i,
146 .eo_set = gfs2_ea_set_i,
147 .eo_remove = gfs2_ea_remove_i,
148 .eo_name = "security",
149};
150
151const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
152 NULL,
153 &gfs2_user_eaops,
154 &gfs2_system_eaops,
155 &gfs2_security_eaops,
156};
157
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
deleted file mode 100644
index da2f7fbbb40d..000000000000
--- a/fs/gfs2/eaops.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14struct gfs2_inode;
15
16struct gfs2_eattr_operations {
17 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
20 char *eo_name;
21};
22
23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
24
25extern const struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9200ef221716..d15876e9aa26 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -143,17 +143,14 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
143} 143}
144 144
145static struct dentry *gfs2_get_dentry(struct super_block *sb, 145static struct dentry *gfs2_get_dentry(struct super_block *sb,
146 struct gfs2_inum_host *inum) 146 struct gfs2_inum_host *inum)
147{ 147{
148 struct gfs2_sbd *sdp = sb->s_fs_info; 148 struct gfs2_sbd *sdp = sb->s_fs_info;
149 struct gfs2_holder i_gh, ri_gh, rgd_gh; 149 struct gfs2_holder i_gh;
150 struct gfs2_rgrpd *rgd;
151 struct inode *inode; 150 struct inode *inode;
152 struct dentry *dentry; 151 struct dentry *dentry;
153 int error; 152 int error;
154 153
155 /* System files? */
156
157 inode = gfs2_ilookup(sb, inum->no_addr); 154 inode = gfs2_ilookup(sb, inum->no_addr);
158 if (inode) { 155 if (inode) {
159 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 156 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
@@ -168,29 +165,11 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
168 if (error) 165 if (error)
169 return ERR_PTR(error); 166 return ERR_PTR(error);
170 167
171 error = gfs2_rindex_hold(sdp, &ri_gh); 168 error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
172 if (error) 169 if (error)
173 goto fail; 170 goto fail;
174 171
175 error = -EINVAL; 172 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0, 0);
176 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
177 if (!rgd)
178 goto fail_rindex;
179
180 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
181 if (error)
182 goto fail_rindex;
183
184 error = -ESTALE;
185 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
186 goto fail_rgd;
187
188 gfs2_glock_dq_uninit(&rgd_gh);
189 gfs2_glock_dq_uninit(&ri_gh);
190
191 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
192 inum->no_addr,
193 0, 0);
194 if (IS_ERR(inode)) { 173 if (IS_ERR(inode)) {
195 error = PTR_ERR(inode); 174 error = PTR_ERR(inode);
196 goto fail; 175 goto fail;
@@ -224,13 +203,6 @@ out_inode:
224 if (!IS_ERR(dentry)) 203 if (!IS_ERR(dentry))
225 dentry->d_op = &gfs2_dops; 204 dentry->d_op = &gfs2_dops;
226 return dentry; 205 return dentry;
227
228fail_rgd:
229 gfs2_glock_dq_uninit(&rgd_gh);
230
231fail_rindex:
232 gfs2_glock_dq_uninit(&ri_gh);
233
234fail: 206fail:
235 gfs2_glock_dq_uninit(&i_gh); 207 gfs2_glock_dq_uninit(&i_gh);
236 return ERR_PTR(error); 208 return ERR_PTR(error);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 73318a3ce6f1..166f38fbd246 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -38,7 +38,6 @@
38#include "rgrp.h" 38#include "rgrp.h"
39#include "trans.h" 39#include "trans.h"
40#include "util.h" 40#include "util.h"
41#include "eaops.h"
42 41
43/** 42/**
44 * gfs2_llseek - seek to a location in a file 43 * gfs2_llseek - seek to a location in a file
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 61801ada36f0..6edb423f90b3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -406,6 +406,12 @@ struct gfs2_statfs_change_host {
406#define GFS2_DATA_WRITEBACK 1 406#define GFS2_DATA_WRITEBACK 1
407#define GFS2_DATA_ORDERED 2 407#define GFS2_DATA_ORDERED 2
408 408
409#define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW
410#define GFS2_ERRORS_WITHDRAW 0
411#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */
412#define GFS2_ERRORS_RO 2 /* place holder for future feature */
413#define GFS2_ERRORS_PANIC 3
414
409struct gfs2_args { 415struct gfs2_args {
410 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ 416 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
411 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ 417 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
@@ -422,6 +428,7 @@ struct gfs2_args {
422 unsigned int ar_data:2; /* ordered/writeback */ 428 unsigned int ar_data:2; /* ordered/writeback */
423 unsigned int ar_meta:1; /* mount metafs */ 429 unsigned int ar_meta:1; /* mount metafs */
424 unsigned int ar_discard:1; /* discard requests */ 430 unsigned int ar_discard:1; /* discard requests */
431 unsigned int ar_errors:2; /* errors=withdraw | panic */
425 int ar_commit; /* Commit interval */ 432 int ar_commit; /* Commit interval */
426}; 433};
427 434
@@ -489,7 +496,6 @@ struct gfs2_sb_host {
489 */ 496 */
490 497
491struct lm_lockstruct { 498struct lm_lockstruct {
492 u32 ls_id;
493 unsigned int ls_jid; 499 unsigned int ls_jid;
494 unsigned int ls_first; 500 unsigned int ls_first;
495 unsigned int ls_first_done; 501 unsigned int ls_first_done;
@@ -541,18 +547,12 @@ struct gfs2_sbd {
541 struct dentry *sd_root_dir; 547 struct dentry *sd_root_dir;
542 548
543 struct inode *sd_jindex; 549 struct inode *sd_jindex;
544 struct inode *sd_inum_inode;
545 struct inode *sd_statfs_inode; 550 struct inode *sd_statfs_inode;
546 struct inode *sd_ir_inode;
547 struct inode *sd_sc_inode; 551 struct inode *sd_sc_inode;
548 struct inode *sd_qc_inode; 552 struct inode *sd_qc_inode;
549 struct inode *sd_rindex; 553 struct inode *sd_rindex;
550 struct inode *sd_quota_inode; 554 struct inode *sd_quota_inode;
551 555
552 /* Inum stuff */
553
554 struct mutex sd_inum_mutex;
555
556 /* StatFS stuff */ 556 /* StatFS stuff */
557 557
558 spinlock_t sd_statfs_spin; 558 spinlock_t sd_statfs_spin;
@@ -580,7 +580,6 @@ struct gfs2_sbd {
580 struct gfs2_holder sd_journal_gh; 580 struct gfs2_holder sd_journal_gh;
581 struct gfs2_holder sd_jinode_gh; 581 struct gfs2_holder sd_jinode_gh;
582 582
583 struct gfs2_holder sd_ir_gh;
584 struct gfs2_holder sd_sc_gh; 583 struct gfs2_holder sd_sc_gh;
585 struct gfs2_holder sd_qc_gh; 584 struct gfs2_holder sd_qc_gh;
586 585
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2f94bd723698..fb15d3b1f409 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -24,7 +24,7 @@
24#include "acl.h" 24#include "acl.h"
25#include "bmap.h" 25#include "bmap.h"
26#include "dir.h" 26#include "dir.h"
27#include "eattr.h" 27#include "xattr.h"
28#include "glock.h" 28#include "glock.h"
29#include "glops.h" 29#include "glops.h"
30#include "inode.h" 30#include "inode.h"
@@ -519,139 +519,6 @@ out:
519 return inode ? inode : ERR_PTR(error); 519 return inode ? inode : ERR_PTR(error);
520} 520}
521 521
522static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
523{
524 const struct gfs2_inum_range *str = buf;
525
526 ir->ir_start = be64_to_cpu(str->ir_start);
527 ir->ir_length = be64_to_cpu(str->ir_length);
528}
529
530static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
531{
532 struct gfs2_inum_range *str = buf;
533
534 str->ir_start = cpu_to_be64(ir->ir_start);
535 str->ir_length = cpu_to_be64(ir->ir_length);
536}
537
538static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
539{
540 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
541 struct buffer_head *bh;
542 struct gfs2_inum_range_host ir;
543 int error;
544
545 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
546 if (error)
547 return error;
548 mutex_lock(&sdp->sd_inum_mutex);
549
550 error = gfs2_meta_inode_buffer(ip, &bh);
551 if (error) {
552 mutex_unlock(&sdp->sd_inum_mutex);
553 gfs2_trans_end(sdp);
554 return error;
555 }
556
557 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
558
559 if (ir.ir_length) {
560 *formal_ino = ir.ir_start++;
561 ir.ir_length--;
562 gfs2_trans_add_bh(ip->i_gl, bh, 1);
563 gfs2_inum_range_out(&ir,
564 bh->b_data + sizeof(struct gfs2_dinode));
565 brelse(bh);
566 mutex_unlock(&sdp->sd_inum_mutex);
567 gfs2_trans_end(sdp);
568 return 0;
569 }
570
571 brelse(bh);
572
573 mutex_unlock(&sdp->sd_inum_mutex);
574 gfs2_trans_end(sdp);
575
576 return 1;
577}
578
579static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
580{
581 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
582 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
583 struct gfs2_holder gh;
584 struct buffer_head *bh;
585 struct gfs2_inum_range_host ir;
586 int error;
587
588 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
589 if (error)
590 return error;
591
592 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
593 if (error)
594 goto out;
595 mutex_lock(&sdp->sd_inum_mutex);
596
597 error = gfs2_meta_inode_buffer(ip, &bh);
598 if (error)
599 goto out_end_trans;
600
601 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
602
603 if (!ir.ir_length) {
604 struct buffer_head *m_bh;
605 u64 x, y;
606 __be64 z;
607
608 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
609 if (error)
610 goto out_brelse;
611
612 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
613 x = y = be64_to_cpu(z);
614 ir.ir_start = x;
615 ir.ir_length = GFS2_INUM_QUANTUM;
616 x += GFS2_INUM_QUANTUM;
617 if (x < y)
618 gfs2_consist_inode(m_ip);
619 z = cpu_to_be64(x);
620 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
621 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z;
622
623 brelse(m_bh);
624 }
625
626 *formal_ino = ir.ir_start++;
627 ir.ir_length--;
628
629 gfs2_trans_add_bh(ip->i_gl, bh, 1);
630 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
631
632out_brelse:
633 brelse(bh);
634out_end_trans:
635 mutex_unlock(&sdp->sd_inum_mutex);
636 gfs2_trans_end(sdp);
637out:
638 gfs2_glock_dq_uninit(&gh);
639 return error;
640}
641
642static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum)
643{
644 int error;
645
646 error = pick_formal_ino_1(sdp, inum);
647 if (error <= 0)
648 return error;
649
650 error = pick_formal_ino_2(sdp, inum);
651
652 return error;
653}
654
655/** 522/**
656 * create_ok - OK to create a new on-disk inode here? 523 * create_ok - OK to create a new on-disk inode here?
657 * @dip: Directory in which dinode is to be created 524 * @dip: Directory in which dinode is to be created
@@ -731,7 +598,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
731 if (error) 598 if (error)
732 goto out_ipreserv; 599 goto out_ipreserv;
733 600
734 *no_addr = gfs2_alloc_di(dip, generation); 601 error = gfs2_alloc_di(dip, no_addr, generation);
735 602
736 gfs2_trans_end(sdp); 603 gfs2_trans_end(sdp);
737 604
@@ -924,7 +791,6 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
924 size_t len; 791 size_t len;
925 void *value; 792 void *value;
926 char *name; 793 char *name;
927 struct gfs2_ea_request er;
928 794
929 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 795 err = security_inode_init_security(&ip->i_inode, &dip->i_inode,
930 &name, &value, &len); 796 &name, &value, &len);
@@ -935,16 +801,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
935 return err; 801 return err;
936 } 802 }
937 803
938 memset(&er, 0, sizeof(struct gfs2_ea_request)); 804 err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
939
940 er.er_type = GFS2_EATYPE_SECURITY;
941 er.er_name = name;
942 er.er_data = value;
943 er.er_name_len = strlen(name);
944 er.er_data_len = len;
945
946 err = gfs2_ea_set_i(ip, &er);
947
948 kfree(value); 805 kfree(value);
949 kfree(name); 806 kfree(name);
950 807
@@ -991,13 +848,10 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
991 if (error) 848 if (error)
992 goto fail_gunlock; 849 goto fail_gunlock;
993 850
994 error = pick_formal_ino(sdp, &inum.no_formal_ino);
995 if (error)
996 goto fail_gunlock;
997
998 error = alloc_dinode(dip, &inum.no_addr, &generation); 851 error = alloc_dinode(dip, &inum.no_addr, &generation);
999 if (error) 852 if (error)
1000 goto fail_gunlock; 853 goto fail_gunlock;
854 inum.no_formal_ino = generation;
1001 855
1002 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 856 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops,
1003 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 857 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
@@ -1008,9 +862,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
1008 if (error) 862 if (error)
1009 goto fail_gunlock2; 863 goto fail_gunlock2;
1010 864
1011 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 865 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
1012 inum.no_addr, 866 inum.no_formal_ino, 0);
1013 inum.no_formal_ino, 0);
1014 if (IS_ERR(inode)) 867 if (IS_ERR(inode))
1015 goto fail_gunlock2; 868 goto fail_gunlock2;
1016 869
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7bc3c45cd676..52fb6c048981 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -84,7 +84,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
84 84
85 gfs2_tune_init(&sdp->sd_tune); 85 gfs2_tune_init(&sdp->sd_tune);
86 86
87 mutex_init(&sdp->sd_inum_mutex);
88 spin_lock_init(&sdp->sd_statfs_spin); 87 spin_lock_init(&sdp->sd_statfs_spin);
89 88
90 spin_lock_init(&sdp->sd_rindex_spin); 89 spin_lock_init(&sdp->sd_rindex_spin);
@@ -833,21 +832,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
833 if (error) 832 if (error)
834 goto fail; 833 goto fail;
835 834
836 /* Read in the master inode number inode */
837 sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
838 if (IS_ERR(sdp->sd_inum_inode)) {
839 error = PTR_ERR(sdp->sd_inum_inode);
840 fs_err(sdp, "can't read in inum inode: %d\n", error);
841 goto fail_journal;
842 }
843
844
845 /* Read in the master statfs inode */ 835 /* Read in the master statfs inode */
846 sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); 836 sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
847 if (IS_ERR(sdp->sd_statfs_inode)) { 837 if (IS_ERR(sdp->sd_statfs_inode)) {
848 error = PTR_ERR(sdp->sd_statfs_inode); 838 error = PTR_ERR(sdp->sd_statfs_inode);
849 fs_err(sdp, "can't read in statfs inode: %d\n", error); 839 fs_err(sdp, "can't read in statfs inode: %d\n", error);
850 goto fail_inum; 840 goto fail_journal;
851 } 841 }
852 842
853 /* Read in the resource index inode */ 843 /* Read in the resource index inode */
@@ -876,8 +866,6 @@ fail_rindex:
876 iput(sdp->sd_rindex); 866 iput(sdp->sd_rindex);
877fail_statfs: 867fail_statfs:
878 iput(sdp->sd_statfs_inode); 868 iput(sdp->sd_statfs_inode);
879fail_inum:
880 iput(sdp->sd_inum_inode);
881fail_journal: 869fail_journal:
882 init_journal(sdp, UNDO); 870 init_journal(sdp, UNDO);
883fail: 871fail:
@@ -905,20 +893,12 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
905 return error; 893 return error;
906 } 894 }
907 895
908 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
909 sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
910 if (IS_ERR(sdp->sd_ir_inode)) {
911 error = PTR_ERR(sdp->sd_ir_inode);
912 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
913 goto fail;
914 }
915
916 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); 896 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
917 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf); 897 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
918 if (IS_ERR(sdp->sd_sc_inode)) { 898 if (IS_ERR(sdp->sd_sc_inode)) {
919 error = PTR_ERR(sdp->sd_sc_inode); 899 error = PTR_ERR(sdp->sd_sc_inode);
920 fs_err(sdp, "can't find local \"sc\" file: %d\n", error); 900 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
921 goto fail_ir_i; 901 goto fail;
922 } 902 }
923 903
924 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); 904 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
@@ -932,27 +912,16 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
932 iput(pn); 912 iput(pn);
933 pn = NULL; 913 pn = NULL;
934 914
935 ip = GFS2_I(sdp->sd_ir_inode);
936 error = gfs2_glock_nq_init(ip->i_gl,
937 LM_ST_EXCLUSIVE, 0,
938 &sdp->sd_ir_gh);
939 if (error) {
940 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
941 goto fail_qc_i;
942 }
943
944 ip = GFS2_I(sdp->sd_sc_inode); 915 ip = GFS2_I(sdp->sd_sc_inode);
945 error = gfs2_glock_nq_init(ip->i_gl, 916 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
946 LM_ST_EXCLUSIVE, 0,
947 &sdp->sd_sc_gh); 917 &sdp->sd_sc_gh);
948 if (error) { 918 if (error) {
949 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); 919 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
950 goto fail_ir_gh; 920 goto fail_qc_i;
951 } 921 }
952 922
953 ip = GFS2_I(sdp->sd_qc_inode); 923 ip = GFS2_I(sdp->sd_qc_inode);
954 error = gfs2_glock_nq_init(ip->i_gl, 924 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
955 LM_ST_EXCLUSIVE, 0,
956 &sdp->sd_qc_gh); 925 &sdp->sd_qc_gh);
957 if (error) { 926 if (error) {
958 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); 927 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
@@ -965,14 +934,10 @@ fail_qc_gh:
965 gfs2_glock_dq_uninit(&sdp->sd_qc_gh); 934 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
966fail_ut_gh: 935fail_ut_gh:
967 gfs2_glock_dq_uninit(&sdp->sd_sc_gh); 936 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
968fail_ir_gh:
969 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
970fail_qc_i: 937fail_qc_i:
971 iput(sdp->sd_qc_inode); 938 iput(sdp->sd_qc_inode);
972fail_ut_i: 939fail_ut_i:
973 iput(sdp->sd_sc_inode); 940 iput(sdp->sd_sc_inode);
974fail_ir_i:
975 iput(sdp->sd_ir_inode);
976fail: 941fail:
977 if (pn) 942 if (pn)
978 iput(pn); 943 iput(pn);
@@ -1063,7 +1028,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1063 1028
1064 ls->ls_ops = lm; 1029 ls->ls_ops = lm;
1065 ls->ls_first = 1; 1030 ls->ls_first = 1;
1066 ls->ls_id = 0;
1067 1031
1068 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) { 1032 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
1069 substring_t tmp[MAX_OPT_ARGS]; 1033 substring_t tmp[MAX_OPT_ARGS];
@@ -1081,10 +1045,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1081 ls->ls_jid = option; 1045 ls->ls_jid = option;
1082 break; 1046 break;
1083 case Opt_id: 1047 case Opt_id:
1084 ret = match_int(&tmp[0], &option); 1048 /* Obsolete, but left for backward compat purposes */
1085 if (ret)
1086 goto hostdata_error;
1087 ls->ls_id = option;
1088 break; 1049 break;
1089 case Opt_first: 1050 case Opt_first:
1090 ret = match_int(&tmp[0], &option); 1051 ret = match_int(&tmp[0], &option);
@@ -1133,6 +1094,17 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1133 lm->lm_unmount(sdp); 1094 lm->lm_unmount(sdp);
1134} 1095}
1135 1096
1097void gfs2_online_uevent(struct gfs2_sbd *sdp)
1098{
1099 struct super_block *sb = sdp->sd_vfs;
1100 char ro[20];
1101 char spectator[20];
1102 char *envp[] = { ro, spectator, NULL };
1103 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
1104 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
1105 kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
1106}
1107
1136/** 1108/**
1137 * fill_super - Read in superblock 1109 * fill_super - Read in superblock
1138 * @sb: The VFS superblock 1110 * @sb: The VFS superblock
@@ -1157,6 +1129,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1157 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; 1129 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
1158 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; 1130 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
1159 sdp->sd_args.ar_commit = 60; 1131 sdp->sd_args.ar_commit = 60;
1132 sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT;
1160 1133
1161 error = gfs2_mount_args(sdp, &sdp->sd_args, data); 1134 error = gfs2_mount_args(sdp, &sdp->sd_args, data);
1162 if (error) { 1135 if (error) {
@@ -1174,6 +1147,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1174 sb->s_magic = GFS2_MAGIC; 1147 sb->s_magic = GFS2_MAGIC;
1175 sb->s_op = &gfs2_super_ops; 1148 sb->s_op = &gfs2_super_ops;
1176 sb->s_export_op = &gfs2_export_ops; 1149 sb->s_export_op = &gfs2_export_ops;
1150 sb->s_xattr = gfs2_xattr_handlers;
1177 sb->s_time_gran = 1; 1151 sb->s_time_gran = 1;
1178 sb->s_maxbytes = MAX_LFS_FILESIZE; 1152 sb->s_maxbytes = MAX_LFS_FILESIZE;
1179 1153
@@ -1236,7 +1210,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1236 } 1210 }
1237 1211
1238 gfs2_glock_dq_uninit(&mount_gh); 1212 gfs2_glock_dq_uninit(&mount_gh);
1239 1213 gfs2_online_uevent(sdp);
1240 return 0; 1214 return 0;
1241 1215
1242fail_threads: 1216fail_threads:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f8bd20baf99c..c3ac18054057 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -26,8 +26,7 @@
26#include "acl.h" 26#include "acl.h"
27#include "bmap.h" 27#include "bmap.h"
28#include "dir.h" 28#include "dir.h"
29#include "eaops.h" 29#include "xattr.h"
30#include "eattr.h"
31#include "glock.h" 30#include "glock.h"
32#include "inode.h" 31#include "inode.h"
33#include "meta_io.h" 32#include "meta_io.h"
@@ -349,7 +348,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
349 348
350 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); 349 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
351 if (error) 350 if (error)
352 goto out_rgrp; 351 goto out_gunlock;
353 352
354 error = gfs2_dir_del(dip, &dentry->d_name); 353 error = gfs2_dir_del(dip, &dentry->d_name);
355 if (error) 354 if (error)
@@ -1302,60 +1301,53 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
1302 const void *data, size_t size, int flags) 1301 const void *data, size_t size, int flags)
1303{ 1302{
1304 struct inode *inode = dentry->d_inode; 1303 struct inode *inode = dentry->d_inode;
1305 struct gfs2_ea_request er; 1304 struct gfs2_inode *ip = GFS2_I(inode);
1306 1305 struct gfs2_holder gh;
1307 memset(&er, 0, sizeof(struct gfs2_ea_request)); 1306 int ret;
1308 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1309 if (er.er_type == GFS2_EATYPE_UNUSED)
1310 return -EOPNOTSUPP;
1311 er.er_data = (char *)data;
1312 er.er_name_len = strlen(er.er_name);
1313 er.er_data_len = size;
1314 er.er_flags = flags;
1315
1316 gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE));
1317 1307
1318 return gfs2_ea_set(GFS2_I(inode), &er); 1308 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1309 ret = gfs2_glock_nq(&gh);
1310 if (ret == 0) {
1311 ret = generic_setxattr(dentry, name, data, size, flags);
1312 gfs2_glock_dq(&gh);
1313 }
1314 gfs2_holder_uninit(&gh);
1315 return ret;
1319} 1316}
1320 1317
1321static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, 1318static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1322 void *data, size_t size) 1319 void *data, size_t size)
1323{ 1320{
1324 struct gfs2_ea_request er; 1321 struct inode *inode = dentry->d_inode;
1325 1322 struct gfs2_inode *ip = GFS2_I(inode);
1326 memset(&er, 0, sizeof(struct gfs2_ea_request)); 1323 struct gfs2_holder gh;
1327 er.er_type = gfs2_ea_name2type(name, &er.er_name); 1324 int ret;
1328 if (er.er_type == GFS2_EATYPE_UNUSED)
1329 return -EOPNOTSUPP;
1330 er.er_data = data;
1331 er.er_name_len = strlen(er.er_name);
1332 er.er_data_len = size;
1333
1334 return gfs2_ea_get(GFS2_I(dentry->d_inode), &er);
1335}
1336
1337static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1338{
1339 struct gfs2_ea_request er;
1340
1341 memset(&er, 0, sizeof(struct gfs2_ea_request));
1342 er.er_data = (size) ? buffer : NULL;
1343 er.er_data_len = size;
1344 1325
1345 return gfs2_ea_list(GFS2_I(dentry->d_inode), &er); 1326 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1327 ret = gfs2_glock_nq(&gh);
1328 if (ret == 0) {
1329 ret = generic_getxattr(dentry, name, data, size);
1330 gfs2_glock_dq(&gh);
1331 }
1332 gfs2_holder_uninit(&gh);
1333 return ret;
1346} 1334}
1347 1335
1348static int gfs2_removexattr(struct dentry *dentry, const char *name) 1336static int gfs2_removexattr(struct dentry *dentry, const char *name)
1349{ 1337{
1350 struct gfs2_ea_request er; 1338 struct inode *inode = dentry->d_inode;
1351 1339 struct gfs2_inode *ip = GFS2_I(inode);
1352 memset(&er, 0, sizeof(struct gfs2_ea_request)); 1340 struct gfs2_holder gh;
1353 er.er_type = gfs2_ea_name2type(name, &er.er_name); 1341 int ret;
1354 if (er.er_type == GFS2_EATYPE_UNUSED)
1355 return -EOPNOTSUPP;
1356 er.er_name_len = strlen(er.er_name);
1357 1342
1358 return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er); 1343 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1344 ret = gfs2_glock_nq(&gh);
1345 if (ret == 0) {
1346 ret = generic_removexattr(dentry, name);
1347 gfs2_glock_dq(&gh);
1348 }
1349 gfs2_holder_uninit(&gh);
1350 return ret;
1359} 1351}
1360 1352
1361static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1353static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fba795798d3a..28c590b7c9da 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -857,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
857 goto start_new_extent; 857 goto start_new_extent;
858 if ((start + nr_sects) != blk) { 858 if ((start + nr_sects) != blk) {
859 rv = blkdev_issue_discard(bdev, start, 859 rv = blkdev_issue_discard(bdev, start,
860 nr_sects, GFP_NOFS); 860 nr_sects, GFP_NOFS,
861 DISCARD_FL_BARRIER);
861 if (rv) 862 if (rv)
862 goto fail; 863 goto fail;
863 nr_sects = 0; 864 nr_sects = 0;
@@ -871,7 +872,8 @@ start_new_extent:
871 } 872 }
872 } 873 }
873 if (nr_sects) { 874 if (nr_sects) {
874 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS); 875 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
876 DISCARD_FL_BARRIER);
875 if (rv) 877 if (rv)
876 goto fail; 878 goto fail;
877 } 879 }
@@ -1256,7 +1258,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1256 * Returns: The block type (GFS2_BLKST_*) 1258 * Returns: The block type (GFS2_BLKST_*)
1257 */ 1259 */
1258 1260
1259unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1261static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1260{ 1262{
1261 struct gfs2_bitmap *bi = NULL; 1263 struct gfs2_bitmap *bi = NULL;
1262 u32 length, rgrp_block, buf_block; 1264 u32 length, rgrp_block, buf_block;
@@ -1459,6 +1461,16 @@ int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
1459 return 0; 1461 return 0;
1460} 1462}
1461 1463
1464static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1465{
1466 struct gfs2_sbd *sdp = rgd->rd_sbd;
1467 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
1468 (unsigned long long)rgd->rd_addr);
1469 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
1470 gfs2_rgrp_dump(NULL, rgd->rd_gl);
1471 rgd->rd_flags |= GFS2_RDF_ERROR;
1472}
1473
1462/** 1474/**
1463 * gfs2_alloc_block - Allocate one or more blocks 1475 * gfs2_alloc_block - Allocate one or more blocks
1464 * @ip: the inode to allocate the block for 1476 * @ip: the inode to allocate the block for
@@ -1520,22 +1532,20 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
1520 return 0; 1532 return 0;
1521 1533
1522rgrp_error: 1534rgrp_error:
1523 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1535 gfs2_rgrp_error(rgd);
1524 (unsigned long long)rgd->rd_addr);
1525 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
1526 gfs2_rgrp_dump(NULL, rgd->rd_gl);
1527 rgd->rd_flags |= GFS2_RDF_ERROR;
1528 return -EIO; 1536 return -EIO;
1529} 1537}
1530 1538
1531/** 1539/**
1532 * gfs2_alloc_di - Allocate a dinode 1540 * gfs2_alloc_di - Allocate a dinode
1533 * @dip: the directory that the inode is going in 1541 * @dip: the directory that the inode is going in
1542 * @bn: the block number which is allocated
1543 * @generation: the generation number of the inode
1534 * 1544 *
1535 * Returns: the block allocated 1545 * Returns: 0 on success or error
1536 */ 1546 */
1537 1547
1538u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) 1548int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
1539{ 1549{
1540 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1550 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1541 struct gfs2_alloc *al = dip->i_alloc; 1551 struct gfs2_alloc *al = dip->i_alloc;
@@ -1546,16 +1556,21 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1546 1556
1547 blk = rgblk_search(rgd, rgd->rd_last_alloc, 1557 blk = rgblk_search(rgd, rgd->rd_last_alloc,
1548 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); 1558 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1549 BUG_ON(blk == BFITNOENT);
1550 1559
1551 rgd->rd_last_alloc = blk; 1560 /* Since all blocks are reserved in advance, this shouldn't happen */
1561 if (blk == BFITNOENT)
1562 goto rgrp_error;
1552 1563
1564 rgd->rd_last_alloc = blk;
1553 block = rgd->rd_data0 + blk; 1565 block = rgd->rd_data0 + blk;
1566 if (rgd->rd_free == 0)
1567 goto rgrp_error;
1554 1568
1555 gfs2_assert_withdraw(sdp, rgd->rd_free);
1556 rgd->rd_free--; 1569 rgd->rd_free--;
1557 rgd->rd_dinodes++; 1570 rgd->rd_dinodes++;
1558 *generation = rgd->rd_igeneration++; 1571 *generation = rgd->rd_igeneration++;
1572 if (*generation == 0)
1573 *generation = rgd->rd_igeneration++;
1559 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1574 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1560 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1575 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1561 1576
@@ -1568,7 +1583,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1568 rgd->rd_free_clone--; 1583 rgd->rd_free_clone--;
1569 spin_unlock(&sdp->sd_rindex_spin); 1584 spin_unlock(&sdp->sd_rindex_spin);
1570 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); 1585 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
1571 return block; 1586 *bn = block;
1587 return 0;
1588
1589rgrp_error:
1590 gfs2_rgrp_error(rgd);
1591 return -EIO;
1572} 1592}
1573 1593
1574/** 1594/**
@@ -1676,6 +1696,46 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1676} 1696}
1677 1697
1678/** 1698/**
1699 * gfs2_check_blk_type - Check the type of a block
1700 * @sdp: The superblock
1701 * @no_addr: The block number to check
1702 * @type: The block type we are looking for
1703 *
1704 * Returns: 0 if the block type matches the expected type
1705 * -ESTALE if it doesn't match
1706 * or -ve errno if something went wrong while checking
1707 */
1708
1709int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1710{
1711 struct gfs2_rgrpd *rgd;
1712 struct gfs2_holder ri_gh, rgd_gh;
1713 int error;
1714
1715 error = gfs2_rindex_hold(sdp, &ri_gh);
1716 if (error)
1717 goto fail;
1718
1719 error = -EINVAL;
1720 rgd = gfs2_blk2rgrpd(sdp, no_addr);
1721 if (!rgd)
1722 goto fail_rindex;
1723
1724 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
1725 if (error)
1726 goto fail_rindex;
1727
1728 if (gfs2_get_block_type(rgd, no_addr) != type)
1729 error = -ESTALE;
1730
1731 gfs2_glock_dq_uninit(&rgd_gh);
1732fail_rindex:
1733 gfs2_glock_dq_uninit(&ri_gh);
1734fail:
1735 return error;
1736}
1737
1738/**
1679 * gfs2_rlist_add - add a RG to a list of RGs 1739 * gfs2_rlist_add - add a RG to a list of RGs
1680 * @sdp: the filesystem 1740 * @sdp: the filesystem
1681 * @rlist: the list of resource groups 1741 * @rlist: the list of resource groups
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 1e76ff0f3e00..b4106ddaaa98 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -44,15 +44,15 @@ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
44 44
45extern void gfs2_inplace_release(struct gfs2_inode *ip); 45extern void gfs2_inplace_release(struct gfs2_inode *ip);
46 46
47extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
48
49extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 47extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
50extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); 48extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
51 49
52extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 50extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
53extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 51extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
54extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 52extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
55extern void gfs2_unlink_di(struct inode *inode); 53extern void gfs2_unlink_di(struct inode *inode);
54extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
55 unsigned int type);
56 56
57struct gfs2_rgrp_list { 57struct gfs2_rgrp_list {
58 unsigned int rl_rgrps; 58 unsigned int rl_rgrps;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index f522bb017973..0ec3ec672de1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -38,7 +38,7 @@
38#include "trans.h" 38#include "trans.h"
39#include "util.h" 39#include "util.h"
40#include "sys.h" 40#include "sys.h"
41#include "eattr.h" 41#include "xattr.h"
42 42
43#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) 43#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
44 44
@@ -68,6 +68,8 @@ enum {
68 Opt_discard, 68 Opt_discard,
69 Opt_nodiscard, 69 Opt_nodiscard,
70 Opt_commit, 70 Opt_commit,
71 Opt_err_withdraw,
72 Opt_err_panic,
71 Opt_error, 73 Opt_error,
72}; 74};
73 75
@@ -97,6 +99,8 @@ static const match_table_t tokens = {
97 {Opt_discard, "discard"}, 99 {Opt_discard, "discard"},
98 {Opt_nodiscard, "nodiscard"}, 100 {Opt_nodiscard, "nodiscard"},
99 {Opt_commit, "commit=%d"}, 101 {Opt_commit, "commit=%d"},
102 {Opt_err_withdraw, "errors=withdraw"},
103 {Opt_err_panic, "errors=panic"},
100 {Opt_error, NULL} 104 {Opt_error, NULL}
101}; 105};
102 106
@@ -152,6 +156,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
152 args->ar_localcaching = 1; 156 args->ar_localcaching = 1;
153 break; 157 break;
154 case Opt_debug: 158 case Opt_debug:
159 if (args->ar_errors == GFS2_ERRORS_PANIC) {
160 fs_info(sdp, "-o debug and -o errors=panic "
161 "are mutually exclusive.\n");
162 return -EINVAL;
163 }
155 args->ar_debug = 1; 164 args->ar_debug = 1;
156 break; 165 break;
157 case Opt_nodebug: 166 case Opt_nodebug:
@@ -205,6 +214,17 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
205 return rv ? rv : -EINVAL; 214 return rv ? rv : -EINVAL;
206 } 215 }
207 break; 216 break;
217 case Opt_err_withdraw:
218 args->ar_errors = GFS2_ERRORS_WITHDRAW;
219 break;
220 case Opt_err_panic:
221 if (args->ar_debug) {
222 fs_info(sdp, "-o debug and -o errors=panic "
223 "are mutually exclusive.\n");
224 return -EINVAL;
225 }
226 args->ar_errors = GFS2_ERRORS_PANIC;
227 break;
208 case Opt_error: 228 case Opt_error:
209 default: 229 default:
210 fs_info(sdp, "invalid mount option: %s\n", o); 230 fs_info(sdp, "invalid mount option: %s\n", o);
@@ -768,7 +788,6 @@ restart:
768 /* Release stuff */ 788 /* Release stuff */
769 789
770 iput(sdp->sd_jindex); 790 iput(sdp->sd_jindex);
771 iput(sdp->sd_inum_inode);
772 iput(sdp->sd_statfs_inode); 791 iput(sdp->sd_statfs_inode);
773 iput(sdp->sd_rindex); 792 iput(sdp->sd_rindex);
774 iput(sdp->sd_quota_inode); 793 iput(sdp->sd_quota_inode);
@@ -779,10 +798,8 @@ restart:
779 if (!sdp->sd_args.ar_spectator) { 798 if (!sdp->sd_args.ar_spectator) {
780 gfs2_glock_dq_uninit(&sdp->sd_journal_gh); 799 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
781 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); 800 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
782 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
783 gfs2_glock_dq_uninit(&sdp->sd_sc_gh); 801 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
784 gfs2_glock_dq_uninit(&sdp->sd_qc_gh); 802 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
785 iput(sdp->sd_ir_inode);
786 iput(sdp->sd_sc_inode); 803 iput(sdp->sd_sc_inode);
787 iput(sdp->sd_qc_inode); 804 iput(sdp->sd_qc_inode);
788 } 805 }
@@ -1084,6 +1101,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1084 gt->gt_log_flush_secs = args.ar_commit; 1101 gt->gt_log_flush_secs = args.ar_commit;
1085 spin_unlock(&gt->gt_spin); 1102 spin_unlock(&gt->gt_spin);
1086 1103
1104 gfs2_online_uevent(sdp);
1087 return 0; 1105 return 0;
1088} 1106}
1089 1107
@@ -1225,6 +1243,22 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1225 lfsecs = sdp->sd_tune.gt_log_flush_secs; 1243 lfsecs = sdp->sd_tune.gt_log_flush_secs;
1226 if (lfsecs != 60) 1244 if (lfsecs != 60)
1227 seq_printf(s, ",commit=%d", lfsecs); 1245 seq_printf(s, ",commit=%d", lfsecs);
1246 if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1247 const char *state;
1248
1249 switch (args->ar_errors) {
1250 case GFS2_ERRORS_WITHDRAW:
1251 state = "withdraw";
1252 break;
1253 case GFS2_ERRORS_PANIC:
1254 state = "panic";
1255 break;
1256 default:
1257 state = "unknown";
1258 break;
1259 }
1260 seq_printf(s, ",errors=%s", state);
1261 }
1228 return 0; 1262 return 0;
1229} 1263}
1230 1264
@@ -1252,6 +1286,10 @@ static void gfs2_delete_inode(struct inode *inode)
1252 goto out; 1286 goto out;
1253 } 1287 }
1254 1288
1289 error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1290 if (error)
1291 goto out_truncate;
1292
1255 gfs2_glock_dq_wait(&ip->i_iopen_gh); 1293 gfs2_glock_dq_wait(&ip->i_iopen_gh);
1256 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 1294 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
1257 error = gfs2_glock_nq(&ip->i_iopen_gh); 1295 error = gfs2_glock_nq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 22e0417ed996..235db3682885 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -25,7 +25,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
25 return x; 25 return x;
26} 26}
27 27
28void gfs2_jindex_free(struct gfs2_sbd *sdp); 28extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
29 29
30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); 30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
31 31
@@ -36,7 +36,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
36 struct gfs2_inode **ipp); 36 struct gfs2_inode **ipp);
37 37
38extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); 38extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
39 39extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
40extern int gfs2_statfs_init(struct gfs2_sbd *sdp); 40extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, 41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
42 s64 dinodes); 42 s64 dinodes);
@@ -54,6 +54,7 @@ extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 54extern const struct export_operations gfs2_export_ops;
55extern const struct super_operations gfs2_super_ops; 55extern const struct super_operations gfs2_super_ops;
56extern const struct dentry_operations gfs2_dops; 56extern const struct dentry_operations gfs2_dops;
57extern struct xattr_handler *gfs2_xattr_handlers[];
57 58
58#endif /* __SUPER_DOT_H__ */ 59#endif /* __SUPER_DOT_H__ */
59 60
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index a7cbfbd340c7..446329728d52 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -16,6 +16,7 @@
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
19#include <linux/genhd.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -319,12 +320,6 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
319 return ret; 320 return ret;
320} 321}
321 322
322static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
323{
324 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
325 return sprintf(buf, "%u\n", ls->ls_id);
326}
327
328static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 323static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
329{ 324{
330 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 325 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -389,7 +384,6 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 384GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390GDLM_ATTR(block, 0644, block_show, block_store); 385GDLM_ATTR(block, 0644, block_show, block_store);
391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 386GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392GDLM_ATTR(id, 0444, lkid_show, NULL);
393GDLM_ATTR(jid, 0444, jid_show, NULL); 387GDLM_ATTR(jid, 0444, jid_show, NULL);
394GDLM_ATTR(first, 0444, lkfirst_show, NULL); 388GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395GDLM_ATTR(first_done, 0444, first_done_show, NULL); 389GDLM_ATTR(first_done, 0444, first_done_show, NULL);
@@ -401,7 +395,6 @@ static struct attribute *lock_module_attrs[] = {
401 &gdlm_attr_proto_name.attr, 395 &gdlm_attr_proto_name.attr,
402 &gdlm_attr_block.attr, 396 &gdlm_attr_block.attr,
403 &gdlm_attr_withdraw.attr, 397 &gdlm_attr_withdraw.attr,
404 &gdlm_attr_id.attr,
405 &gdlm_attr_jid.attr, 398 &gdlm_attr_jid.attr,
406 &gdlm_attr_first.attr, 399 &gdlm_attr_first.attr,
407 &gdlm_attr_first_done.attr, 400 &gdlm_attr_first_done.attr,
@@ -519,7 +512,14 @@ static struct attribute_group lock_module_group = {
519 512
520int gfs2_sys_fs_add(struct gfs2_sbd *sdp) 513int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
521{ 514{
515 struct super_block *sb = sdp->sd_vfs;
522 int error; 516 int error;
517 char ro[20];
518 char spectator[20];
519 char *envp[] = { ro, spectator, NULL };
520
521 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
522 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
523 523
524 sdp->sd_kobj.kset = gfs2_kset; 524 sdp->sd_kobj.kset = gfs2_kset;
525 error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, 525 error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
@@ -535,9 +535,17 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
535 if (error) 535 if (error)
536 goto fail_tune; 536 goto fail_tune;
537 537
538 kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); 538 error = sysfs_create_link(&sdp->sd_kobj,
539 &disk_to_dev(sb->s_bdev->bd_disk)->kobj,
540 "device");
541 if (error)
542 goto fail_lock_module;
543
544 kobject_uevent_env(&sdp->sd_kobj, KOBJ_ADD, envp);
539 return 0; 545 return 0;
540 546
547fail_lock_module:
548 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
541fail_tune: 549fail_tune:
542 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 550 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
543fail_reg: 551fail_reg:
@@ -549,12 +557,12 @@ fail:
549 557
550void gfs2_sys_fs_del(struct gfs2_sbd *sdp) 558void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
551{ 559{
560 sysfs_remove_link(&sdp->sd_kobj, "device");
552 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 561 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
553 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); 562 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
554 kobject_put(&sdp->sd_kobj); 563 kobject_put(&sdp->sd_kobj);
555} 564}
556 565
557
558static int gfs2_uevent(struct kset *kset, struct kobject *kobj, 566static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
559 struct kobj_uevent_env *env) 567 struct kobj_uevent_env *env)
560{ 568{
@@ -563,6 +571,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
563 571
564 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 572 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
565 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 573 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
574 if (!sdp->sd_args.ar_spectator)
575 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
566 if (gfs2_uuid_valid(uuid)) { 576 if (gfs2_uuid_valid(uuid)) {
567 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-" 577 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
568 "%02X%02X-%02X%02X%02X%02X%02X%02X", 578 "%02X%02X-%02X%02X%02X%02X%02X%02X",
@@ -578,7 +588,6 @@ static struct kset_uevent_ops gfs2_uevent_ops = {
578 .uevent = gfs2_uevent, 588 .uevent = gfs2_uevent,
579}; 589};
580 590
581
582int gfs2_sys_init(void) 591int gfs2_sys_init(void)
583{ 592{
584 gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj); 593 gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 9d12b1118ba0..f6a7efa34eb9 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -38,24 +38,30 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
38 const struct lm_lockops *lm = ls->ls_ops; 38 const struct lm_lockops *lm = ls->ls_ops;
39 va_list args; 39 va_list args;
40 40
41 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 41 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
42 test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
42 return 0; 43 return 0;
43 44
44 va_start(args, fmt); 45 va_start(args, fmt);
45 vprintk(fmt, args); 46 vprintk(fmt, args);
46 va_end(args); 47 va_end(args);
47 48
48 fs_err(sdp, "about to withdraw this file system\n"); 49 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
49 BUG_ON(sdp->sd_args.ar_debug); 50 fs_err(sdp, "about to withdraw this file system\n");
51 BUG_ON(sdp->sd_args.ar_debug);
50 52
51 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 53 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
52 54
53 if (lm->lm_unmount) { 55 if (lm->lm_unmount) {
54 fs_err(sdp, "telling LM to unmount\n"); 56 fs_err(sdp, "telling LM to unmount\n");
55 lm->lm_unmount(sdp); 57 lm->lm_unmount(sdp);
58 }
59 fs_err(sdp, "withdrawn\n");
60 dump_stack();
56 } 61 }
57 fs_err(sdp, "withdrawn\n"); 62
58 dump_stack(); 63 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
64 panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname);
59 65
60 return -1; 66 return -1;
61} 67}
@@ -93,17 +99,24 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
93 gfs2_tune_get(sdp, gt_complain_secs) * HZ)) 99 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
94 return -2; 100 return -2;
95 101
96 printk(KERN_WARNING 102 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
97 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" 103 printk(KERN_WARNING
98 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 104 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
99 sdp->sd_fsname, assertion, 105 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
100 sdp->sd_fsname, function, file, line); 106 sdp->sd_fsname, assertion,
107 sdp->sd_fsname, function, file, line);
101 108
102 if (sdp->sd_args.ar_debug) 109 if (sdp->sd_args.ar_debug)
103 BUG(); 110 BUG();
104 else 111 else
105 dump_stack(); 112 dump_stack();
106 113
114 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
115 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
117 sdp->sd_fsname, assertion,
118 sdp->sd_fsname, function, file, line);
119
107 sdp->sd_last_warning = jiffies; 120 sdp->sd_last_warning = jiffies;
108 121
109 return -1; 122 return -1;
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/xattr.c
index 07ea9529adda..8a0f8ef6ee27 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/xattr.c
@@ -18,8 +18,7 @@
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "acl.h" 20#include "acl.h"
21#include "eaops.h" 21#include "xattr.h"
22#include "eattr.h"
23#include "glock.h" 22#include "glock.h"
24#include "inode.h" 23#include "inode.h"
25#include "meta_io.h" 24#include "meta_io.h"
@@ -38,26 +37,32 @@
38 * Returns: 1 if the EA should be stuffed 37 * Returns: 1 if the EA should be stuffed
39 */ 38 */
40 39
41static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er, 40static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize,
42 unsigned int *size) 41 unsigned int *size)
43{ 42{
44 *size = GFS2_EAREQ_SIZE_STUFFED(er); 43 unsigned int jbsize = sdp->sd_jbsize;
45 if (*size <= sdp->sd_jbsize) 44
45 /* Stuffed */
46 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8);
47
48 if (*size <= jbsize)
46 return 1; 49 return 1;
47 50
48 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er); 51 /* Unstuffed */
52 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize +
53 (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8);
49 54
50 return 0; 55 return 0;
51} 56}
52 57
53static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er) 58static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
54{ 59{
55 unsigned int size; 60 unsigned int size;
56 61
57 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN) 62 if (dsize > GFS2_EA_MAX_DATA_LEN)
58 return -ERANGE; 63 return -ERANGE;
59 64
60 ea_calc_size(sdp, er, &size); 65 ea_calc_size(sdp, nsize, dsize, &size);
61 66
62 /* This can only happen with 512 byte blocks */ 67 /* This can only happen with 512 byte blocks */
63 if (size > sdp->sd_jbsize) 68 if (size > sdp->sd_jbsize)
@@ -151,7 +156,9 @@ out:
151} 156}
152 157
153struct ea_find { 158struct ea_find {
154 struct gfs2_ea_request *ef_er; 159 int type;
160 const char *name;
161 size_t namel;
155 struct gfs2_ea_location *ef_el; 162 struct gfs2_ea_location *ef_el;
156}; 163};
157 164
@@ -160,14 +167,13 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
160 void *private) 167 void *private)
161{ 168{
162 struct ea_find *ef = private; 169 struct ea_find *ef = private;
163 struct gfs2_ea_request *er = ef->ef_er;
164 170
165 if (ea->ea_type == GFS2_EATYPE_UNUSED) 171 if (ea->ea_type == GFS2_EATYPE_UNUSED)
166 return 0; 172 return 0;
167 173
168 if (ea->ea_type == er->er_type) { 174 if (ea->ea_type == ef->type) {
169 if (ea->ea_name_len == er->er_name_len && 175 if (ea->ea_name_len == ef->namel &&
170 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) { 176 !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) {
171 struct gfs2_ea_location *el = ef->ef_el; 177 struct gfs2_ea_location *el = ef->ef_el;
172 get_bh(bh); 178 get_bh(bh);
173 el->el_bh = bh; 179 el->el_bh = bh;
@@ -180,13 +186,15 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
180 return 0; 186 return 0;
181} 187}
182 188
183int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er, 189int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
184 struct gfs2_ea_location *el) 190 struct gfs2_ea_location *el)
185{ 191{
186 struct ea_find ef; 192 struct ea_find ef;
187 int error; 193 int error;
188 194
189 ef.ef_er = er; 195 ef.type = type;
196 ef.name = name;
197 ef.namel = strlen(name);
190 ef.ef_el = el; 198 ef.ef_el = el;
191 199
192 memset(el, 0, sizeof(struct gfs2_ea_location)); 200 memset(el, 0, sizeof(struct gfs2_ea_location));
@@ -344,6 +352,20 @@ struct ea_list {
344 unsigned int ei_size; 352 unsigned int ei_size;
345}; 353};
346 354
355static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
356{
357 switch (ea->ea_type) {
358 case GFS2_EATYPE_USR:
359 return 5 + ea->ea_name_len + 1;
360 case GFS2_EATYPE_SYS:
361 return 7 + ea->ea_name_len + 1;
362 case GFS2_EATYPE_SECURITY:
363 return 9 + ea->ea_name_len + 1;
364 default:
365 return 0;
366 }
367}
368
347static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, 369static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
348 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, 370 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
349 void *private) 371 void *private)
@@ -392,21 +414,25 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
392} 414}
393 415
394/** 416/**
395 * gfs2_ea_list - 417 * gfs2_listxattr - List gfs2 extended attributes
396 * @ip: 418 * @dentry: The dentry whose inode we are interested in
397 * @er: 419 * @buffer: The buffer to write the results
420 * @size: The size of the buffer
398 * 421 *
399 * Returns: actual size of data on success, -errno on error 422 * Returns: actual size of data on success, -errno on error
400 */ 423 */
401 424
402int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er) 425ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
403{ 426{
427 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
428 struct gfs2_ea_request er;
404 struct gfs2_holder i_gh; 429 struct gfs2_holder i_gh;
405 int error; 430 int error;
406 431
407 if (!er->er_data || !er->er_data_len) { 432 memset(&er, 0, sizeof(struct gfs2_ea_request));
408 er->er_data = NULL; 433 if (size) {
409 er->er_data_len = 0; 434 er.er_data = buffer;
435 er.er_data_len = size;
410 } 436 }
411 437
412 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 438 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
@@ -414,7 +440,7 @@ int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
414 return error; 440 return error;
415 441
416 if (ip->i_eattr) { 442 if (ip->i_eattr) {
417 struct ea_list ei = { .ei_er = er, .ei_size = 0 }; 443 struct ea_list ei = { .ei_er = &er, .ei_size = 0 };
418 444
419 error = ea_foreach(ip, ea_list_i, &ei); 445 error = ea_foreach(ip, ea_list_i, &ei);
420 if (!error) 446 if (!error)
@@ -491,84 +517,61 @@ out:
491} 517}
492 518
493int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, 519int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
494 char *data) 520 char *data, size_t size)
495{ 521{
522 int ret;
523 size_t len = GFS2_EA_DATA_LEN(el->el_ea);
524 if (len > size)
525 return -ERANGE;
526
496 if (GFS2_EA_IS_STUFFED(el->el_ea)) { 527 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
497 memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea)); 528 memcpy(data, GFS2_EA2DATA(el->el_ea), len);
498 return 0; 529 return len;
499 } else 530 }
500 return ea_get_unstuffed(ip, el->el_ea, data); 531 ret = ea_get_unstuffed(ip, el->el_ea, data);
532 if (ret < 0)
533 return ret;
534 return len;
501} 535}
502 536
503/** 537/**
504 * gfs2_ea_get_i - 538 * gfs2_xattr_get - Get a GFS2 extended attribute
505 * @ip: The GFS2 inode 539 * @inode: The inode
506 * @er: The request structure 540 * @type: The type of extended attribute
541 * @name: The name of the extended attribute
542 * @buffer: The buffer to write the result into
543 * @size: The size of the buffer
507 * 544 *
508 * Returns: actual size of data on success, -errno on error 545 * Returns: actual size of data on success, -errno on error
509 */ 546 */
510 547
511int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) 548int gfs2_xattr_get(struct inode *inode, int type, const char *name,
549 void *buffer, size_t size)
512{ 550{
551 struct gfs2_inode *ip = GFS2_I(inode);
513 struct gfs2_ea_location el; 552 struct gfs2_ea_location el;
514 int error; 553 int error;
515 554
516 if (!ip->i_eattr) 555 if (!ip->i_eattr)
517 return -ENODATA; 556 return -ENODATA;
557 if (strlen(name) > GFS2_EA_MAX_NAME_LEN)
558 return -EINVAL;
518 559
519 error = gfs2_ea_find(ip, er, &el); 560 error = gfs2_ea_find(ip, type, name, &el);
520 if (error) 561 if (error)
521 return error; 562 return error;
522 if (!el.el_ea) 563 if (!el.el_ea)
523 return -ENODATA; 564 return -ENODATA;
524 565 if (size)
525 if (er->er_data_len) { 566 error = gfs2_ea_get_copy(ip, &el, buffer, size);
526 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len) 567 else
527 error = -ERANGE;
528 else
529 error = gfs2_ea_get_copy(ip, &el, er->er_data);
530 }
531 if (!error)
532 error = GFS2_EA_DATA_LEN(el.el_ea); 568 error = GFS2_EA_DATA_LEN(el.el_ea);
533
534 brelse(el.el_bh); 569 brelse(el.el_bh);
535 570
536 return error; 571 return error;
537} 572}
538 573
539/** 574/**
540 * gfs2_ea_get -
541 * @ip: The GFS2 inode
542 * @er: The request structure
543 *
544 * Returns: actual size of data on success, -errno on error
545 */
546
547int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
548{
549 struct gfs2_holder i_gh;
550 int error;
551
552 if (!er->er_name_len ||
553 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
554 return -EINVAL;
555 if (!er->er_data || !er->er_data_len) {
556 er->er_data = NULL;
557 er->er_data_len = 0;
558 }
559
560 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
561 if (error)
562 return error;
563
564 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
565
566 gfs2_glock_dq_uninit(&i_gh);
567
568 return error;
569}
570
571/**
572 * ea_alloc_blk - allocates a new block for extended attributes. 575 * ea_alloc_blk - allocates a new block for extended attributes.
573 * @ip: A pointer to the inode that's getting extended attributes 576 * @ip: A pointer to the inode that's getting extended attributes
574 * @bhp: Pointer to pointer to a struct buffer_head 577 * @bhp: Pointer to pointer to a struct buffer_head
@@ -713,12 +716,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
713 716
714 error = gfs2_meta_inode_buffer(ip, &dibh); 717 error = gfs2_meta_inode_buffer(ip, &dibh);
715 if (!error) { 718 if (!error) {
716 if (er->er_flags & GFS2_ERF_MODE) {
717 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
718 (ip->i_inode.i_mode & S_IFMT) ==
719 (er->er_mode & S_IFMT));
720 ip->i_inode.i_mode = er->er_mode;
721 }
722 ip->i_inode.i_ctime = CURRENT_TIME; 719 ip->i_inode.i_ctime = CURRENT_TIME;
723 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 720 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
724 gfs2_dinode_out(ip, dibh->b_data); 721 gfs2_dinode_out(ip, dibh->b_data);
@@ -762,15 +759,23 @@ static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
762 * Returns: errno 759 * Returns: errno
763 */ 760 */
764 761
765static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er) 762static int ea_init(struct gfs2_inode *ip, int type, const char *name,
763 const void *data, size_t size)
766{ 764{
765 struct gfs2_ea_request er;
767 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; 766 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize;
768 unsigned int blks = 1; 767 unsigned int blks = 1;
769 768
770 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize) 769 er.er_type = type;
771 blks += DIV_ROUND_UP(er->er_data_len, jbsize); 770 er.er_name = name;
771 er.er_name_len = strlen(name);
772 er.er_data = (void *)data;
773 er.er_data_len = size;
774
775 if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize)
776 blks += DIV_ROUND_UP(er.er_data_len, jbsize);
772 777
773 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL); 778 return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL);
774} 779}
775 780
776static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) 781static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
@@ -848,12 +853,6 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
848 error = gfs2_meta_inode_buffer(ip, &dibh); 853 error = gfs2_meta_inode_buffer(ip, &dibh);
849 if (error) 854 if (error)
850 goto out; 855 goto out;
851
852 if (er->er_flags & GFS2_ERF_MODE) {
853 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
854 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
855 ip->i_inode.i_mode = er->er_mode;
856 }
857 ip->i_inode.i_ctime = CURRENT_TIME; 856 ip->i_inode.i_ctime = CURRENT_TIME;
858 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 857 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
859 gfs2_dinode_out(ip, dibh->b_data); 858 gfs2_dinode_out(ip, dibh->b_data);
@@ -894,7 +893,8 @@ static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
894 int stuffed; 893 int stuffed;
895 int error; 894 int error;
896 895
897 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size); 896 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len,
897 es->es_er->er_data_len, &size);
898 898
899 if (ea->ea_type == GFS2_EATYPE_UNUSED) { 899 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
900 if (GFS2_EA_REC_LEN(ea) < size) 900 if (GFS2_EA_REC_LEN(ea) < size)
@@ -1005,15 +1005,22 @@ out:
1005 return error; 1005 return error;
1006} 1006}
1007 1007
1008static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, 1008static int ea_set_i(struct gfs2_inode *ip, int type, const char *name,
1009 struct gfs2_ea_location *el) 1009 const void *value, size_t size, struct gfs2_ea_location *el)
1010{ 1010{
1011 struct gfs2_ea_request er;
1011 struct ea_set es; 1012 struct ea_set es;
1012 unsigned int blks = 2; 1013 unsigned int blks = 2;
1013 int error; 1014 int error;
1014 1015
1016 er.er_type = type;
1017 er.er_name = name;
1018 er.er_data = (void *)value;
1019 er.er_name_len = strlen(name);
1020 er.er_data_len = size;
1021
1015 memset(&es, 0, sizeof(struct ea_set)); 1022 memset(&es, 0, sizeof(struct ea_set));
1016 es.es_er = er; 1023 es.es_er = &er;
1017 es.es_el = el; 1024 es.es_el = el;
1018 1025
1019 error = ea_foreach(ip, ea_set_simple, &es); 1026 error = ea_foreach(ip, ea_set_simple, &es);
@@ -1024,10 +1031,10 @@ static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1024 1031
1025 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) 1032 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT))
1026 blks++; 1033 blks++;
1027 if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize) 1034 if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
1028 blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); 1035 blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
1029 1036
1030 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el); 1037 return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el);
1031} 1038}
1032 1039
1033static int ea_set_remove_unstuffed(struct gfs2_inode *ip, 1040static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
@@ -1039,75 +1046,7 @@ static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1039 GFS2_EA2NEXT(el->el_prev) == el->el_ea); 1046 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1040 } 1047 }
1041 1048
1042 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0); 1049 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0);
1043}
1044
1045int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1046{
1047 struct gfs2_ea_location el;
1048 int error;
1049
1050 if (!ip->i_eattr) {
1051 if (er->er_flags & XATTR_REPLACE)
1052 return -ENODATA;
1053 return ea_init(ip, er);
1054 }
1055
1056 error = gfs2_ea_find(ip, er, &el);
1057 if (error)
1058 return error;
1059
1060 if (el.el_ea) {
1061 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
1062 brelse(el.el_bh);
1063 return -EPERM;
1064 }
1065
1066 error = -EEXIST;
1067 if (!(er->er_flags & XATTR_CREATE)) {
1068 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1069 error = ea_set_i(ip, er, &el);
1070 if (!error && unstuffed)
1071 ea_set_remove_unstuffed(ip, &el);
1072 }
1073
1074 brelse(el.el_bh);
1075 } else {
1076 error = -ENODATA;
1077 if (!(er->er_flags & XATTR_REPLACE))
1078 error = ea_set_i(ip, er, NULL);
1079 }
1080
1081 return error;
1082}
1083
1084int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1085{
1086 struct gfs2_holder i_gh;
1087 int error;
1088
1089 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1090 return -EINVAL;
1091 if (!er->er_data || !er->er_data_len) {
1092 er->er_data = NULL;
1093 er->er_data_len = 0;
1094 }
1095 error = ea_check_size(GFS2_SB(&ip->i_inode), er);
1096 if (error)
1097 return error;
1098
1099 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1100 if (error)
1101 return error;
1102
1103 if (IS_IMMUTABLE(&ip->i_inode))
1104 error = -EPERM;
1105 else
1106 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1107
1108 gfs2_glock_dq_uninit(&i_gh);
1109
1110 return error;
1111} 1050}
1112 1051
1113static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) 1052static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
@@ -1131,8 +1070,9 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1131 1070
1132 if (GFS2_EA_IS_LAST(ea)) 1071 if (GFS2_EA_IS_LAST(ea))
1133 prev->ea_flags |= GFS2_EAFLAG_LAST; 1072 prev->ea_flags |= GFS2_EAFLAG_LAST;
1134 } else 1073 } else {
1135 ea->ea_type = GFS2_EATYPE_UNUSED; 1074 ea->ea_type = GFS2_EATYPE_UNUSED;
1075 }
1136 1076
1137 error = gfs2_meta_inode_buffer(ip, &dibh); 1077 error = gfs2_meta_inode_buffer(ip, &dibh);
1138 if (!error) { 1078 if (!error) {
@@ -1147,15 +1087,29 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1147 return error; 1087 return error;
1148} 1088}
1149 1089
1150int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) 1090/**
1091 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1092 * @inode: The inode
1093 * @type: The type of the extended attribute
1094 * @name: The name of the extended attribute
1095 *
1096 * This is not called directly by the VFS since we use the (common)
1097 * scheme of making a "set with NULL data" mean a remove request. Note
1098 * that this is different from a set with zero length data.
1099 *
1100 * Returns: 0, or errno on failure
1101 */
1102
1103static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
1151{ 1104{
1105 struct gfs2_inode *ip = GFS2_I(inode);
1152 struct gfs2_ea_location el; 1106 struct gfs2_ea_location el;
1153 int error; 1107 int error;
1154 1108
1155 if (!ip->i_eattr) 1109 if (!ip->i_eattr)
1156 return -ENODATA; 1110 return -ENODATA;
1157 1111
1158 error = gfs2_ea_find(ip, er, &el); 1112 error = gfs2_ea_find(ip, type, name, &el);
1159 if (error) 1113 if (error)
1160 return error; 1114 return error;
1161 if (!el.el_ea) 1115 if (!el.el_ea)
@@ -1164,8 +1118,7 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1164 if (GFS2_EA_IS_STUFFED(el.el_ea)) 1118 if (GFS2_EA_IS_STUFFED(el.el_ea))
1165 error = ea_remove_stuffed(ip, &el); 1119 error = ea_remove_stuffed(ip, &el);
1166 else 1120 else
1167 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 1121 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0);
1168 0);
1169 1122
1170 brelse(el.el_bh); 1123 brelse(el.el_bh);
1171 1124
@@ -1173,31 +1126,70 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1173} 1126}
1174 1127
1175/** 1128/**
1176 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute 1129 * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1177 * @ip: pointer to the inode of the target file 1130 * @inode: The inode
1178 * @er: request information 1131 * @type: The type of the extended attribute
1132 * @name: The name of the extended attribute
1133 * @value: The value of the extended attribute (NULL for remove)
1134 * @size: The size of the @value argument
1135 * @flags: Create or Replace
1179 * 1136 *
1180 * Returns: errno 1137 * See gfs2_xattr_remove() for details of the removal of xattrs.
1138 *
1139 * Returns: 0 or errno on failure
1181 */ 1140 */
1182 1141
1183int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) 1142int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1143 const void *value, size_t size, int flags)
1184{ 1144{
1185 struct gfs2_holder i_gh; 1145 struct gfs2_sbd *sdp = GFS2_SB(inode);
1146 struct gfs2_inode *ip = GFS2_I(inode);
1147 struct gfs2_ea_location el;
1148 unsigned int namel = strlen(name);
1186 int error; 1149 int error;
1187 1150
1188 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) 1151 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1189 return -EINVAL; 1152 return -EPERM;
1153 if (namel > GFS2_EA_MAX_NAME_LEN)
1154 return -ERANGE;
1190 1155
1191 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 1156 if (value == NULL)
1157 return gfs2_xattr_remove(inode, type, name);
1158
1159 if (ea_check_size(sdp, namel, size))
1160 return -ERANGE;
1161
1162 if (!ip->i_eattr) {
1163 if (flags & XATTR_REPLACE)
1164 return -ENODATA;
1165 return ea_init(ip, type, name, value, size);
1166 }
1167
1168 error = gfs2_ea_find(ip, type, name, &el);
1192 if (error) 1169 if (error)
1193 return error; 1170 return error;
1194 1171
1195 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1172 if (el.el_ea) {
1196 error = -EPERM; 1173 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
1197 else 1174 brelse(el.el_bh);
1198 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er); 1175 return -EPERM;
1176 }
1199 1177
1200 gfs2_glock_dq_uninit(&i_gh); 1178 error = -EEXIST;
1179 if (!(flags & XATTR_CREATE)) {
1180 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1181 error = ea_set_i(ip, type, name, value, size, &el);
1182 if (!error && unstuffed)
1183 ea_set_remove_unstuffed(ip, &el);
1184 }
1185
1186 brelse(el.el_bh);
1187 return error;
1188 }
1189
1190 error = -ENODATA;
1191 if (!(flags & XATTR_REPLACE))
1192 error = ea_set_i(ip, type, name, value, size, NULL);
1201 1193
1202 return error; 1194 return error;
1203} 1195}
@@ -1503,3 +1495,64 @@ out_alloc:
1503 return error; 1495 return error;
1504} 1496}
1505 1497
1498static int gfs2_xattr_user_get(struct inode *inode, const char *name,
1499 void *buffer, size_t size)
1500{
1501 return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
1502}
1503
1504static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1505 const void *value, size_t size, int flags)
1506{
1507 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1508}
1509
1510static int gfs2_xattr_system_get(struct inode *inode, const char *name,
1511 void *buffer, size_t size)
1512{
1513 return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size);
1514}
1515
1516static int gfs2_xattr_system_set(struct inode *inode, const char *name,
1517 const void *value, size_t size, int flags)
1518{
1519 return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags);
1520}
1521
1522static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1523 void *buffer, size_t size)
1524{
1525 return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
1526}
1527
1528static int gfs2_xattr_security_set(struct inode *inode, const char *name,
1529 const void *value, size_t size, int flags)
1530{
1531 return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
1532}
1533
1534static struct xattr_handler gfs2_xattr_user_handler = {
1535 .prefix = XATTR_USER_PREFIX,
1536 .get = gfs2_xattr_user_get,
1537 .set = gfs2_xattr_user_set,
1538};
1539
1540static struct xattr_handler gfs2_xattr_security_handler = {
1541 .prefix = XATTR_SECURITY_PREFIX,
1542 .get = gfs2_xattr_security_get,
1543 .set = gfs2_xattr_security_set,
1544};
1545
1546static struct xattr_handler gfs2_xattr_system_handler = {
1547 .prefix = XATTR_SYSTEM_PREFIX,
1548 .get = gfs2_xattr_system_get,
1549 .set = gfs2_xattr_system_set,
1550};
1551
1552struct xattr_handler *gfs2_xattr_handlers[] = {
1553 &gfs2_xattr_user_handler,
1554 &gfs2_xattr_security_handler,
1555 &gfs2_xattr_system_handler,
1556 NULL,
1557};
1558
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/xattr.h
index c82dbe01d713..cbdfd7743733 100644
--- a/fs/gfs2/eattr.h
+++ b/fs/gfs2/xattr.h
@@ -19,7 +19,7 @@ struct iattr;
19#define GFS2_EA_SIZE(ea) \ 19#define GFS2_EA_SIZE(ea) \
20ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ 20ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
21 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \ 21 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
22 (sizeof(__be64) * (ea)->ea_num_ptrs)), 8) 22 (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
23 23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs) 24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST) 25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
@@ -27,10 +27,6 @@ ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
27#define GFS2_EAREQ_SIZE_STUFFED(er) \ 27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8) 28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29 29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1)) 30#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len) 31#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36 32
@@ -43,16 +39,12 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
43#define GFS2_EA_BH2FIRST(bh) \ 39#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header))) 40((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45 41
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request { 42struct gfs2_ea_request {
49 const char *er_name; 43 const char *er_name;
50 char *er_data; 44 char *er_data;
51 unsigned int er_name_len; 45 unsigned int er_name_len;
52 unsigned int er_data_len; 46 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */ 47 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56}; 48};
57 49
58struct gfs2_ea_location { 50struct gfs2_ea_location {
@@ -61,40 +53,20 @@ struct gfs2_ea_location {
61 struct gfs2_ea_header *el_prev; 53 struct gfs2_ea_header *el_prev;
62}; 54};
63 55
64int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); 56extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
65int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); 57 void *buffer, size_t size);
66int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); 58extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
67 59 const void *value, size_t size, int flags);
68int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er); 60extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
69int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er); 61extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
70int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72
73int gfs2_ea_dealloc(struct gfs2_inode *ip);
74 62
75/* Exported to acl.c */ 63/* Exported to acl.c */
76 64
77int gfs2_ea_find(struct gfs2_inode *ip, 65extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
78 struct gfs2_ea_request *er, 66 struct gfs2_ea_location *el);
79 struct gfs2_ea_location *el); 67extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
80int gfs2_ea_get_copy(struct gfs2_inode *ip, 68 char *data, size_t size);
81 struct gfs2_ea_location *el, 69extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
82 char *data); 70 struct iattr *attr, char *data);
83int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
84 struct iattr *attr, char *data);
85
86static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
87{
88 switch (ea->ea_type) {
89 case GFS2_EATYPE_USR:
90 return 5 + ea->ea_name_len + 1;
91 case GFS2_EATYPE_SYS:
92 return 7 + ea->ea_name_len + 1;
93 case GFS2_EATYPE_SECURITY:
94 return 9 + ea->ea_name_len + 1;
95 default:
96 return 0;
97 }
98}
99 71
100#endif /* __EATTR_DOT_H__ */ 72#endif /* __EATTR_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index ae7b67e48661..b2ba83d2c4e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -182,9 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
182 if (sb->s_bdev) { 182 if (sb->s_bdev) {
183 struct backing_dev_info *bdi; 183 struct backing_dev_info *bdi;
184 184
185 bdi = sb->s_bdev->bd_inode_backing_dev_info; 185 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
186 if (!bdi)
187 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
188 mapping->backing_dev_info = bdi; 186 mapping->backing_dev_info = bdi;
189 } 187 }
190 inode->i_private = NULL; 188 inode->i_private = NULL;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7b4088b2364d..0df600e9162d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -220,7 +220,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping)
220 .nr_to_write = mapping->nrpages * 2, 220 .nr_to_write = mapping->nrpages * 2,
221 .range_start = 0, 221 .range_start = 0,
222 .range_end = i_size_read(mapping->host), 222 .range_end = i_size_read(mapping->host),
223 .for_writepages = 1,
224 }; 223 };
225 224
226 ret = generic_writepages(mapping, &wbc); 225 ret = generic_writepages(mapping, &wbc);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 867f70504531..de935692d40d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1918,6 +1918,8 @@ static inline void nfs_initialise_sb(struct super_block *sb)
1918 if (server->flags & NFS_MOUNT_NOAC) 1918 if (server->flags & NFS_MOUNT_NOAC)
1919 sb->s_flags |= MS_SYNCHRONOUS; 1919 sb->s_flags |= MS_SYNCHRONOUS;
1920 1920
1921 sb->s_bdi = &server->backing_dev_info;
1922
1921 nfs_super_set_maxbytes(sb, server->maxfilesize); 1923 nfs_super_set_maxbytes(sb, server->maxfilesize);
1922} 1924}
1923 1925
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 120acadc6a84..53eb26c16b50 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1490,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
1490 .nr_to_write = LONG_MAX, 1490 .nr_to_write = LONG_MAX,
1491 .range_start = 0, 1491 .range_start = 0,
1492 .range_end = LLONG_MAX, 1492 .range_end = LLONG_MAX,
1493 .for_writepages = 1,
1494 }; 1493 };
1495 1494
1496 return __nfs_write_mapping(mapping, &wbc, how); 1495 return __nfs_write_mapping(mapping, &wbc, how);
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 72da095d4009..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,6 @@
1config NILFS2_FS 1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support (EXPERIMENTAL)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on EXPERIMENTAL
4 select CRC32 4 select CRC32
5 help 5 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous 6 NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 99d58a028b94..08834df6ec68 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -36,6 +36,26 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); 36 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
37} 37}
38 38
39/**
40 * nilfs_bmap_lookup_at_level - find a data block or node block
41 * @bmap: bmap
42 * @key: key
43 * @level: level
44 * @ptrp: place to store the value associated to @key
45 *
46 * Description: nilfs_bmap_lookup_at_level() finds a record whose key
47 * matches @key in the block at @level of the bmap.
48 *
49 * Return Value: On success, 0 is returned and the record associated with @key
50 * is stored in the place pointed by @ptrp. On error, one of the following
51 * negative error codes is returned.
52 *
53 * %-EIO - I/O error.
54 *
55 * %-ENOMEM - Insufficient amount of memory available.
56 *
57 * %-ENOENT - A record associated with @key does not exist.
58 */
39int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, 59int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
40 __u64 *ptrp) 60 __u64 *ptrp)
41{ 61{
@@ -69,39 +89,6 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
69 return ret; 89 return ret;
70} 90}
71 91
72/**
73 * nilfs_bmap_lookup - find a record
74 * @bmap: bmap
75 * @key: key
76 * @recp: pointer to record
77 *
78 * Description: nilfs_bmap_lookup() finds a record whose key matches @key in
79 * @bmap.
80 *
81 * Return Value: On success, 0 is returned and the record associated with @key
82 * is stored in the place pointed by @recp. On error, one of the following
83 * negative error codes is returned.
84 *
85 * %-EIO - I/O error.
86 *
87 * %-ENOMEM - Insufficient amount of memory available.
88 *
89 * %-ENOENT - A record associated with @key does not exist.
90 */
91int nilfs_bmap_lookup(struct nilfs_bmap *bmap,
92 unsigned long key,
93 unsigned long *recp)
94{
95 __u64 ptr;
96 int ret;
97
98 /* XXX: use macro for level 1 */
99 ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr);
100 if (recp != NULL)
101 *recp = ptr;
102 return ret;
103}
104
105static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 92static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
106{ 93{
107 __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; 94 __u64 keys[NILFS_BMAP_SMALL_HIGH + 1];
@@ -469,104 +456,6 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
469 (entries_per_group / NILFS_BMAP_GROUP_DIV); 456 (entries_per_group / NILFS_BMAP_GROUP_DIV);
470} 457}
471 458
472int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
473 union nilfs_bmap_ptr_req *req)
474{
475 return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
476}
477
478void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
479 union nilfs_bmap_ptr_req *req)
480{
481 nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
482}
483
484void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
485 union nilfs_bmap_ptr_req *req)
486{
487 nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
488}
489
490int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req,
491 sector_t blocknr)
492{
493 struct inode *dat = nilfs_bmap_get_dat(bmap);
494 int ret;
495
496 ret = nilfs_dat_prepare_start(dat, &req->bpr_req);
497 if (likely(!ret))
498 nilfs_dat_commit_start(dat, &req->bpr_req, blocknr);
499 return ret;
500}
501
502int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
503 union nilfs_bmap_ptr_req *req)
504{
505 return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
506}
507
508void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
509 union nilfs_bmap_ptr_req *req)
510{
511 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req,
512 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
513}
514
515void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
516 union nilfs_bmap_ptr_req *req)
517{
518 nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
519}
520
521int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr,
522 sector_t blocknr)
523{
524 return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr);
525}
526
527int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr)
528{
529 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr);
530}
531
532int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap,
533 union nilfs_bmap_ptr_req *oldreq,
534 union nilfs_bmap_ptr_req *newreq)
535{
536 struct inode *dat = nilfs_bmap_get_dat(bmap);
537 int ret;
538
539 ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req);
540 if (ret < 0)
541 return ret;
542 ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req);
543 if (ret < 0)
544 nilfs_dat_abort_end(dat, &oldreq->bpr_req);
545
546 return ret;
547}
548
549void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap,
550 union nilfs_bmap_ptr_req *oldreq,
551 union nilfs_bmap_ptr_req *newreq)
552{
553 struct inode *dat = nilfs_bmap_get_dat(bmap);
554
555 nilfs_dat_commit_end(dat, &oldreq->bpr_req,
556 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
557 nilfs_dat_commit_alloc(dat, &newreq->bpr_req);
558}
559
560void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
561 union nilfs_bmap_ptr_req *oldreq,
562 union nilfs_bmap_ptr_req *newreq)
563{
564 struct inode *dat = nilfs_bmap_get_dat(bmap);
565
566 nilfs_dat_abort_end(dat, &oldreq->bpr_req);
567 nilfs_dat_abort_alloc(dat, &newreq->bpr_req);
568}
569
570static struct lock_class_key nilfs_bmap_dat_lock_key; 459static struct lock_class_key nilfs_bmap_dat_lock_key;
571static struct lock_class_key nilfs_bmap_mdt_lock_key; 460static struct lock_class_key nilfs_bmap_mdt_lock_key;
572 461
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b2890cdcef12..9980d7dbab91 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -28,6 +28,7 @@
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h> 29#include <linux/nilfs2_fs.h>
30#include "alloc.h" 30#include "alloc.h"
31#include "dat.h"
31 32
32#define NILFS_BMAP_INVALID_PTR 0 33#define NILFS_BMAP_INVALID_PTR 0
33 34
@@ -141,7 +142,6 @@ struct nilfs_bmap {
141int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); 142int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
142int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); 143int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
143void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); 144void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
144int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *);
145int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); 145int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
146int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); 146int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
147int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); 147int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
@@ -160,90 +160,76 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
160void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); 160void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
161 161
162 162
163static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key,
164 __u64 *ptr)
165{
166 return nilfs_bmap_lookup_at_level(bmap, key, 1, ptr);
167}
168
163/* 169/*
164 * Internal use only 170 * Internal use only
165 */ 171 */
166struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); 172struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *);
167int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *,
168 union nilfs_bmap_ptr_req *);
169void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *,
170 union nilfs_bmap_ptr_req *);
171void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *,
172 union nilfs_bmap_ptr_req *);
173 173
174static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, 174static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap,
175 union nilfs_bmap_ptr_req *req) 175 union nilfs_bmap_ptr_req *req,
176 struct inode *dat)
176{ 177{
177 if (NILFS_BMAP_USE_VBN(bmap)) 178 if (dat)
178 return nilfs_bmap_prepare_alloc_v(bmap, req); 179 return nilfs_dat_prepare_alloc(dat, &req->bpr_req);
179 /* ignore target ptr */ 180 /* ignore target ptr */
180 req->bpr_ptr = bmap->b_last_allocated_ptr++; 181 req->bpr_ptr = bmap->b_last_allocated_ptr++;
181 return 0; 182 return 0;
182} 183}
183 184
184static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, 185static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap,
185 union nilfs_bmap_ptr_req *req) 186 union nilfs_bmap_ptr_req *req,
187 struct inode *dat)
186{ 188{
187 if (NILFS_BMAP_USE_VBN(bmap)) 189 if (dat)
188 nilfs_bmap_commit_alloc_v(bmap, req); 190 nilfs_dat_commit_alloc(dat, &req->bpr_req);
189} 191}
190 192
191static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, 193static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap,
192 union nilfs_bmap_ptr_req *req) 194 union nilfs_bmap_ptr_req *req,
195 struct inode *dat)
193{ 196{
194 if (NILFS_BMAP_USE_VBN(bmap)) 197 if (dat)
195 nilfs_bmap_abort_alloc_v(bmap, req); 198 nilfs_dat_abort_alloc(dat, &req->bpr_req);
196 else 199 else
197 bmap->b_last_allocated_ptr--; 200 bmap->b_last_allocated_ptr--;
198} 201}
199 202
200int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
201void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
202void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
203
204static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, 203static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap,
205 union nilfs_bmap_ptr_req *req) 204 union nilfs_bmap_ptr_req *req,
205 struct inode *dat)
206{ 206{
207 return NILFS_BMAP_USE_VBN(bmap) ? 207 return dat ? nilfs_dat_prepare_end(dat, &req->bpr_req) : 0;
208 nilfs_bmap_prepare_end_v(bmap, req) : 0;
209} 208}
210 209
211static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, 210static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap,
212 union nilfs_bmap_ptr_req *req) 211 union nilfs_bmap_ptr_req *req,
212 struct inode *dat)
213{ 213{
214 if (NILFS_BMAP_USE_VBN(bmap)) 214 if (dat)
215 nilfs_bmap_commit_end_v(bmap, req); 215 nilfs_dat_commit_end(dat, &req->bpr_req,
216 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
216} 217}
217 218
218static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, 219static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
219 union nilfs_bmap_ptr_req *req) 220 union nilfs_bmap_ptr_req *req,
221 struct inode *dat)
220{ 222{
221 if (NILFS_BMAP_USE_VBN(bmap)) 223 if (dat)
222 nilfs_bmap_abort_end_v(bmap, req); 224 nilfs_dat_abort_end(dat, &req->bpr_req);
223} 225}
224 226
225int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *,
226 sector_t);
227int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t);
228int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64);
229
230
231__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, 227__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
232 const struct buffer_head *); 228 const struct buffer_head *);
233 229
234__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); 230__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
235__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); 231__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
236 232
237int nilfs_bmap_prepare_update_v(struct nilfs_bmap *,
238 union nilfs_bmap_ptr_req *,
239 union nilfs_bmap_ptr_req *);
240void nilfs_bmap_commit_update_v(struct nilfs_bmap *,
241 union nilfs_bmap_ptr_req *,
242 union nilfs_bmap_ptr_req *);
243void nilfs_bmap_abort_update_v(struct nilfs_bmap *,
244 union nilfs_bmap_ptr_req *,
245 union nilfs_bmap_ptr_req *);
246
247void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); 233void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
248void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); 234void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
249 235
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index aa412724b64e..e25b507a474f 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -71,21 +71,17 @@ void nilfs_btree_path_cache_destroy(void)
71 kmem_cache_destroy(nilfs_btree_path_cache); 71 kmem_cache_destroy(nilfs_btree_path_cache);
72} 72}
73 73
74static inline struct nilfs_btree_path * 74static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75nilfs_btree_alloc_path(const struct nilfs_btree *btree)
76{ 75{
77 return (struct nilfs_btree_path *) 76 return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
79} 77}
80 78
81static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, 79static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
82 struct nilfs_btree_path *path)
83{ 80{
84 kmem_cache_free(nilfs_btree_path_cache, path); 81 kmem_cache_free(nilfs_btree_path_cache, path);
85} 82}
86 83
87static void nilfs_btree_init_path(const struct nilfs_btree *btree, 84static void nilfs_btree_init_path(struct nilfs_btree_path *path)
88 struct nilfs_btree_path *path)
89{ 85{
90 int level; 86 int level;
91 87
@@ -101,26 +97,13 @@ static void nilfs_btree_init_path(const struct nilfs_btree *btree,
101 } 97 }
102} 98}
103 99
104static void nilfs_btree_clear_path(const struct nilfs_btree *btree, 100static void nilfs_btree_release_path(struct nilfs_btree_path *path)
105 struct nilfs_btree_path *path)
106{ 101{
107 int level; 102 int level;
108 103
109 for (level = NILFS_BTREE_LEVEL_DATA; 104 for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
110 level < NILFS_BTREE_LEVEL_MAX; 105 level++)
111 level++) { 106 brelse(path[level].bp_bh);
112 if (path[level].bp_bh != NULL) {
113 brelse(path[level].bp_bh);
114 path[level].bp_bh = NULL;
115 }
116 /* sib_bh is released or deleted by prepare or commit
117 * operations. */
118 path[level].bp_sib_bh = NULL;
119 path[level].bp_index = 0;
120 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122 path[level].bp_op = NULL;
123 }
124} 107}
125 108
126/* 109/*
@@ -148,129 +131,110 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
148} 131}
149 132
150static inline int 133static inline int
151nilfs_btree_node_get_flags(const struct nilfs_btree *btree, 134nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
152 const struct nilfs_btree_node *node)
153{ 135{
154 return node->bn_flags; 136 return node->bn_flags;
155} 137}
156 138
157static inline void 139static inline void
158nilfs_btree_node_set_flags(struct nilfs_btree *btree, 140nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
159 struct nilfs_btree_node *node,
160 int flags)
161{ 141{
162 node->bn_flags = flags; 142 node->bn_flags = flags;
163} 143}
164 144
165static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, 145static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
166 const struct nilfs_btree_node *node)
167{ 146{
168 return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; 147 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
169} 148}
170 149
171static inline int 150static inline int
172nilfs_btree_node_get_level(const struct nilfs_btree *btree, 151nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
173 const struct nilfs_btree_node *node)
174{ 152{
175 return node->bn_level; 153 return node->bn_level;
176} 154}
177 155
178static inline void 156static inline void
179nilfs_btree_node_set_level(struct nilfs_btree *btree, 157nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
180 struct nilfs_btree_node *node,
181 int level)
182{ 158{
183 node->bn_level = level; 159 node->bn_level = level;
184} 160}
185 161
186static inline int 162static inline int
187nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, 163nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
188 const struct nilfs_btree_node *node)
189{ 164{
190 return le16_to_cpu(node->bn_nchildren); 165 return le16_to_cpu(node->bn_nchildren);
191} 166}
192 167
193static inline void 168static inline void
194nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, 169nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
195 struct nilfs_btree_node *node,
196 int nchildren)
197{ 170{
198 node->bn_nchildren = cpu_to_le16(nchildren); 171 node->bn_nchildren = cpu_to_le16(nchildren);
199} 172}
200 173
201static inline int 174static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
202nilfs_btree_node_size(const struct nilfs_btree *btree)
203{ 175{
204 return 1 << btree->bt_bmap.b_inode->i_blkbits; 176 return 1 << btree->bt_bmap.b_inode->i_blkbits;
205} 177}
206 178
207static inline int 179static inline int
208nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, 180nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
209 const struct nilfs_btree_node *node) 181 const struct nilfs_btree *btree)
210{ 182{
211 return nilfs_btree_node_root(btree, node) ? 183 return nilfs_btree_node_root(node) ?
212 NILFS_BTREE_ROOT_NCHILDREN_MIN : 184 NILFS_BTREE_ROOT_NCHILDREN_MIN :
213 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 185 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
214} 186}
215 187
216static inline int 188static inline int
217nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, 189nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
218 const struct nilfs_btree_node *node) 190 const struct nilfs_btree *btree)
219{ 191{
220 return nilfs_btree_node_root(btree, node) ? 192 return nilfs_btree_node_root(node) ?
221 NILFS_BTREE_ROOT_NCHILDREN_MAX : 193 NILFS_BTREE_ROOT_NCHILDREN_MAX :
222 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); 194 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
223} 195}
224 196
225static inline __le64 * 197static inline __le64 *
226nilfs_btree_node_dkeys(const struct nilfs_btree *btree, 198nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
227 const struct nilfs_btree_node *node)
228{ 199{
229 return (__le64 *)((char *)(node + 1) + 200 return (__le64 *)((char *)(node + 1) +
230 (nilfs_btree_node_root(btree, node) ? 201 (nilfs_btree_node_root(node) ?
231 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 202 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
232} 203}
233 204
234static inline __le64 * 205static inline __le64 *
235nilfs_btree_node_dptrs(const struct nilfs_btree *btree, 206nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
236 const struct nilfs_btree_node *node) 207 const struct nilfs_btree *btree)
237{ 208{
238 return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + 209 return (__le64 *)(nilfs_btree_node_dkeys(node) +
239 nilfs_btree_node_nchildren_max(btree, node)); 210 nilfs_btree_node_nchildren_max(node, btree));
240} 211}
241 212
242static inline __u64 213static inline __u64
243nilfs_btree_node_get_key(const struct nilfs_btree *btree, 214nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
244 const struct nilfs_btree_node *node, int index)
245{ 215{
246 return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + 216 return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
247 index));
248} 217}
249 218
250static inline void 219static inline void
251nilfs_btree_node_set_key(struct nilfs_btree *btree, 220nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
252 struct nilfs_btree_node *node, int index, __u64 key)
253{ 221{
254 *(nilfs_btree_node_dkeys(btree, node) + index) = 222 *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
255 nilfs_bmap_key_to_dkey(key);
256} 223}
257 224
258static inline __u64 225static inline __u64
259nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, 226nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
260 const struct nilfs_btree_node *node, 227 const struct nilfs_btree_node *node, int index)
261 int index)
262{ 228{
263 return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + 229 return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
264 index)); 230 index));
265} 231}
266 232
267static inline void 233static inline void
268nilfs_btree_node_set_ptr(struct nilfs_btree *btree, 234nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
269 struct nilfs_btree_node *node, 235 struct nilfs_btree_node *node, int index, __u64 ptr)
270 int index,
271 __u64 ptr)
272{ 236{
273 *(nilfs_btree_node_dptrs(btree, node) + index) = 237 *(nilfs_btree_node_dptrs(node, btree) + index) =
274 nilfs_bmap_ptr_to_dptr(ptr); 238 nilfs_bmap_ptr_to_dptr(ptr);
275} 239}
276 240
@@ -283,12 +247,12 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree,
283 __le64 *dptrs; 247 __le64 *dptrs;
284 int i; 248 int i;
285 249
286 nilfs_btree_node_set_flags(btree, node, flags); 250 nilfs_btree_node_set_flags(node, flags);
287 nilfs_btree_node_set_level(btree, node, level); 251 nilfs_btree_node_set_level(node, level);
288 nilfs_btree_node_set_nchildren(btree, node, nchildren); 252 nilfs_btree_node_set_nchildren(node, nchildren);
289 253
290 dkeys = nilfs_btree_node_dkeys(btree, node); 254 dkeys = nilfs_btree_node_dkeys(node);
291 dptrs = nilfs_btree_node_dptrs(btree, node); 255 dptrs = nilfs_btree_node_dptrs(node, btree);
292 for (i = 0; i < nchildren; i++) { 256 for (i = 0; i < nchildren; i++) {
293 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); 257 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
294 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); 258 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
@@ -305,13 +269,13 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
305 __le64 *ldptrs, *rdptrs; 269 __le64 *ldptrs, *rdptrs;
306 int lnchildren, rnchildren; 270 int lnchildren, rnchildren;
307 271
308 ldkeys = nilfs_btree_node_dkeys(btree, left); 272 ldkeys = nilfs_btree_node_dkeys(left);
309 ldptrs = nilfs_btree_node_dptrs(btree, left); 273 ldptrs = nilfs_btree_node_dptrs(left, btree);
310 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 274 lnchildren = nilfs_btree_node_get_nchildren(left);
311 275
312 rdkeys = nilfs_btree_node_dkeys(btree, right); 276 rdkeys = nilfs_btree_node_dkeys(right);
313 rdptrs = nilfs_btree_node_dptrs(btree, right); 277 rdptrs = nilfs_btree_node_dptrs(right, btree);
314 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 278 rnchildren = nilfs_btree_node_get_nchildren(right);
315 279
316 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 280 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
317 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 281 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
@@ -320,8 +284,8 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
320 284
321 lnchildren += n; 285 lnchildren += n;
322 rnchildren -= n; 286 rnchildren -= n;
323 nilfs_btree_node_set_nchildren(btree, left, lnchildren); 287 nilfs_btree_node_set_nchildren(left, lnchildren);
324 nilfs_btree_node_set_nchildren(btree, right, rnchildren); 288 nilfs_btree_node_set_nchildren(right, rnchildren);
325} 289}
326 290
327/* Assume that the buffer heads corresponding to left and right are locked. */ 291/* Assume that the buffer heads corresponding to left and right are locked. */
@@ -334,13 +298,13 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
334 __le64 *ldptrs, *rdptrs; 298 __le64 *ldptrs, *rdptrs;
335 int lnchildren, rnchildren; 299 int lnchildren, rnchildren;
336 300
337 ldkeys = nilfs_btree_node_dkeys(btree, left); 301 ldkeys = nilfs_btree_node_dkeys(left);
338 ldptrs = nilfs_btree_node_dptrs(btree, left); 302 ldptrs = nilfs_btree_node_dptrs(left, btree);
339 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 303 lnchildren = nilfs_btree_node_get_nchildren(left);
340 304
341 rdkeys = nilfs_btree_node_dkeys(btree, right); 305 rdkeys = nilfs_btree_node_dkeys(right);
342 rdptrs = nilfs_btree_node_dptrs(btree, right); 306 rdptrs = nilfs_btree_node_dptrs(right, btree);
343 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 307 rnchildren = nilfs_btree_node_get_nchildren(right);
344 308
345 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 309 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
346 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 310 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
@@ -349,8 +313,8 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
349 313
350 lnchildren -= n; 314 lnchildren -= n;
351 rnchildren += n; 315 rnchildren += n;
352 nilfs_btree_node_set_nchildren(btree, left, lnchildren); 316 nilfs_btree_node_set_nchildren(left, lnchildren);
353 nilfs_btree_node_set_nchildren(btree, right, rnchildren); 317 nilfs_btree_node_set_nchildren(right, rnchildren);
354} 318}
355 319
356/* Assume that the buffer head corresponding to node is locked. */ 320/* Assume that the buffer head corresponding to node is locked. */
@@ -362,9 +326,9 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
362 __le64 *dptrs; 326 __le64 *dptrs;
363 int nchildren; 327 int nchildren;
364 328
365 dkeys = nilfs_btree_node_dkeys(btree, node); 329 dkeys = nilfs_btree_node_dkeys(node);
366 dptrs = nilfs_btree_node_dptrs(btree, node); 330 dptrs = nilfs_btree_node_dptrs(node, btree);
367 nchildren = nilfs_btree_node_get_nchildren(btree, node); 331 nchildren = nilfs_btree_node_get_nchildren(node);
368 if (index < nchildren) { 332 if (index < nchildren) {
369 memmove(dkeys + index + 1, dkeys + index, 333 memmove(dkeys + index + 1, dkeys + index,
370 (nchildren - index) * sizeof(*dkeys)); 334 (nchildren - index) * sizeof(*dkeys));
@@ -374,7 +338,7 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
374 dkeys[index] = nilfs_bmap_key_to_dkey(key); 338 dkeys[index] = nilfs_bmap_key_to_dkey(key);
375 dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); 339 dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
376 nchildren++; 340 nchildren++;
377 nilfs_btree_node_set_nchildren(btree, node, nchildren); 341 nilfs_btree_node_set_nchildren(node, nchildren);
378} 342}
379 343
380/* Assume that the buffer head corresponding to node is locked. */ 344/* Assume that the buffer head corresponding to node is locked. */
@@ -388,11 +352,11 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
388 __le64 *dptrs; 352 __le64 *dptrs;
389 int nchildren; 353 int nchildren;
390 354
391 dkeys = nilfs_btree_node_dkeys(btree, node); 355 dkeys = nilfs_btree_node_dkeys(node);
392 dptrs = nilfs_btree_node_dptrs(btree, node); 356 dptrs = nilfs_btree_node_dptrs(node, btree);
393 key = nilfs_bmap_dkey_to_key(dkeys[index]); 357 key = nilfs_bmap_dkey_to_key(dkeys[index]);
394 ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); 358 ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
395 nchildren = nilfs_btree_node_get_nchildren(btree, node); 359 nchildren = nilfs_btree_node_get_nchildren(node);
396 if (keyp != NULL) 360 if (keyp != NULL)
397 *keyp = key; 361 *keyp = key;
398 if (ptrp != NULL) 362 if (ptrp != NULL)
@@ -405,11 +369,10 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
405 (nchildren - index - 1) * sizeof(*dptrs)); 369 (nchildren - index - 1) * sizeof(*dptrs));
406 } 370 }
407 nchildren--; 371 nchildren--;
408 nilfs_btree_node_set_nchildren(btree, node, nchildren); 372 nilfs_btree_node_set_nchildren(node, nchildren);
409} 373}
410 374
411static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, 375static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
412 const struct nilfs_btree_node *node,
413 __u64 key, int *indexp) 376 __u64 key, int *indexp)
414{ 377{
415 __u64 nkey; 378 __u64 nkey;
@@ -417,12 +380,12 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
417 380
418 /* binary search */ 381 /* binary search */
419 low = 0; 382 low = 0;
420 high = nilfs_btree_node_get_nchildren(btree, node) - 1; 383 high = nilfs_btree_node_get_nchildren(node) - 1;
421 index = 0; 384 index = 0;
422 s = 0; 385 s = 0;
423 while (low <= high) { 386 while (low <= high) {
424 index = (low + high) / 2; 387 index = (low + high) / 2;
425 nkey = nilfs_btree_node_get_key(btree, node, index); 388 nkey = nilfs_btree_node_get_key(node, index);
426 if (nkey == key) { 389 if (nkey == key) {
427 s = 0; 390 s = 0;
428 goto out; 391 goto out;
@@ -436,9 +399,8 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
436 } 399 }
437 400
438 /* adjust index */ 401 /* adjust index */
439 if (nilfs_btree_node_get_level(btree, node) > 402 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
440 NILFS_BTREE_LEVEL_NODE_MIN) { 403 if (s > 0 && index > 0)
441 if ((s > 0) && (index > 0))
442 index--; 404 index--;
443 } else if (s < 0) 405 } else if (s < 0)
444 index++; 406 index++;
@@ -456,25 +418,20 @@ nilfs_btree_get_root(const struct nilfs_btree *btree)
456} 418}
457 419
458static inline struct nilfs_btree_node * 420static inline struct nilfs_btree_node *
459nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, 421nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
460 const struct nilfs_btree_path *path,
461 int level)
462{ 422{
463 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 423 return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
464} 424}
465 425
466static inline struct nilfs_btree_node * 426static inline struct nilfs_btree_node *
467nilfs_btree_get_sib_node(const struct nilfs_btree *btree, 427nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
468 const struct nilfs_btree_path *path,
469 int level)
470{ 428{
471 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 429 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
472} 430}
473 431
474static inline int nilfs_btree_height(const struct nilfs_btree *btree) 432static inline int nilfs_btree_height(const struct nilfs_btree *btree)
475{ 433{
476 return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) 434 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
477 + 1;
478} 435}
479 436
480static inline struct nilfs_btree_node * 437static inline struct nilfs_btree_node *
@@ -484,7 +441,7 @@ nilfs_btree_get_node(const struct nilfs_btree *btree,
484{ 441{
485 return (level == nilfs_btree_height(btree) - 1) ? 442 return (level == nilfs_btree_height(btree) - 1) ?
486 nilfs_btree_get_root(btree) : 443 nilfs_btree_get_root(btree) :
487 nilfs_btree_get_nonroot_node(btree, path, level); 444 nilfs_btree_get_nonroot_node(path, level);
488} 445}
489 446
490static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, 447static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
@@ -496,12 +453,11 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
496 int level, index, found, ret; 453 int level, index, found, ret;
497 454
498 node = nilfs_btree_get_root(btree); 455 node = nilfs_btree_get_root(btree);
499 level = nilfs_btree_node_get_level(btree, node); 456 level = nilfs_btree_node_get_level(node);
500 if ((level < minlevel) || 457 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
501 (nilfs_btree_node_get_nchildren(btree, node) <= 0))
502 return -ENOENT; 458 return -ENOENT;
503 459
504 found = nilfs_btree_node_lookup(btree, node, key, &index); 460 found = nilfs_btree_node_lookup(node, key, &index);
505 ptr = nilfs_btree_node_get_ptr(btree, node, index); 461 ptr = nilfs_btree_node_get_ptr(btree, node, index);
506 path[level].bp_bh = NULL; 462 path[level].bp_bh = NULL;
507 path[level].bp_index = index; 463 path[level].bp_index = index;
@@ -510,14 +466,13 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
510 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 466 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
511 if (ret < 0) 467 if (ret < 0)
512 return ret; 468 return ret;
513 node = nilfs_btree_get_nonroot_node(btree, path, level); 469 node = nilfs_btree_get_nonroot_node(path, level);
514 BUG_ON(level != nilfs_btree_node_get_level(btree, node)); 470 BUG_ON(level != nilfs_btree_node_get_level(node));
515 if (!found) 471 if (!found)
516 found = nilfs_btree_node_lookup(btree, node, key, 472 found = nilfs_btree_node_lookup(node, key, &index);
517 &index);
518 else 473 else
519 index = 0; 474 index = 0;
520 if (index < nilfs_btree_node_nchildren_max(btree, node)) 475 if (index < nilfs_btree_node_nchildren_max(node, btree))
521 ptr = nilfs_btree_node_get_ptr(btree, node, index); 476 ptr = nilfs_btree_node_get_ptr(btree, node, index);
522 else { 477 else {
523 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 478 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
@@ -544,10 +499,10 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
544 int index, level, ret; 499 int index, level, ret;
545 500
546 node = nilfs_btree_get_root(btree); 501 node = nilfs_btree_get_root(btree);
547 index = nilfs_btree_node_get_nchildren(btree, node) - 1; 502 index = nilfs_btree_node_get_nchildren(node) - 1;
548 if (index < 0) 503 if (index < 0)
549 return -ENOENT; 504 return -ENOENT;
550 level = nilfs_btree_node_get_level(btree, node); 505 level = nilfs_btree_node_get_level(node);
551 ptr = nilfs_btree_node_get_ptr(btree, node, index); 506 ptr = nilfs_btree_node_get_ptr(btree, node, index);
552 path[level].bp_bh = NULL; 507 path[level].bp_bh = NULL;
553 path[level].bp_index = index; 508 path[level].bp_index = index;
@@ -556,15 +511,15 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
556 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 511 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
557 if (ret < 0) 512 if (ret < 0)
558 return ret; 513 return ret;
559 node = nilfs_btree_get_nonroot_node(btree, path, level); 514 node = nilfs_btree_get_nonroot_node(path, level);
560 BUG_ON(level != nilfs_btree_node_get_level(btree, node)); 515 BUG_ON(level != nilfs_btree_node_get_level(node));
561 index = nilfs_btree_node_get_nchildren(btree, node) - 1; 516 index = nilfs_btree_node_get_nchildren(node) - 1;
562 ptr = nilfs_btree_node_get_ptr(btree, node, index); 517 ptr = nilfs_btree_node_get_ptr(btree, node, index);
563 path[level].bp_index = index; 518 path[level].bp_index = index;
564 } 519 }
565 520
566 if (keyp != NULL) 521 if (keyp != NULL)
567 *keyp = nilfs_btree_node_get_key(btree, node, index); 522 *keyp = nilfs_btree_node_get_key(node, index);
568 if (ptrp != NULL) 523 if (ptrp != NULL)
569 *ptrp = ptr; 524 *ptrp = ptr;
570 525
@@ -580,18 +535,18 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
580 int ret; 535 int ret;
581 536
582 btree = (struct nilfs_btree *)bmap; 537 btree = (struct nilfs_btree *)bmap;
583 path = nilfs_btree_alloc_path(btree); 538 path = nilfs_btree_alloc_path();
584 if (path == NULL) 539 if (path == NULL)
585 return -ENOMEM; 540 return -ENOMEM;
586 nilfs_btree_init_path(btree, path); 541 nilfs_btree_init_path(path);
587 542
588 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 543 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
589 544
590 if (ptrp != NULL) 545 if (ptrp != NULL)
591 *ptrp = ptr; 546 *ptrp = ptr;
592 547
593 nilfs_btree_clear_path(btree, path); 548 nilfs_btree_release_path(path);
594 nilfs_btree_free_path(btree, path); 549 nilfs_btree_free_path(path);
595 550
596 return ret; 551 return ret;
597} 552}
@@ -608,10 +563,10 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
608 int level = NILFS_BTREE_LEVEL_NODE_MIN; 563 int level = NILFS_BTREE_LEVEL_NODE_MIN;
609 int ret, cnt, index, maxlevel; 564 int ret, cnt, index, maxlevel;
610 565
611 path = nilfs_btree_alloc_path(btree); 566 path = nilfs_btree_alloc_path();
612 if (path == NULL) 567 if (path == NULL)
613 return -ENOMEM; 568 return -ENOMEM;
614 nilfs_btree_init_path(btree, path); 569 nilfs_btree_init_path(path);
615 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 570 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
616 if (ret < 0) 571 if (ret < 0)
617 goto out; 572 goto out;
@@ -631,8 +586,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
631 node = nilfs_btree_get_node(btree, path, level); 586 node = nilfs_btree_get_node(btree, path, level);
632 index = path[level].bp_index + 1; 587 index = path[level].bp_index + 1;
633 for (;;) { 588 for (;;) {
634 while (index < nilfs_btree_node_get_nchildren(btree, node)) { 589 while (index < nilfs_btree_node_get_nchildren(node)) {
635 if (nilfs_btree_node_get_key(btree, node, index) != 590 if (nilfs_btree_node_get_key(node, index) !=
636 key + cnt) 591 key + cnt)
637 goto end; 592 goto end;
638 ptr2 = nilfs_btree_node_get_ptr(btree, node, index); 593 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
@@ -653,8 +608,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
653 /* look-up right sibling node */ 608 /* look-up right sibling node */
654 node = nilfs_btree_get_node(btree, path, level + 1); 609 node = nilfs_btree_get_node(btree, path, level + 1);
655 index = path[level + 1].bp_index + 1; 610 index = path[level + 1].bp_index + 1;
656 if (index >= nilfs_btree_node_get_nchildren(btree, node) || 611 if (index >= nilfs_btree_node_get_nchildren(node) ||
657 nilfs_btree_node_get_key(btree, node, index) != key + cnt) 612 nilfs_btree_node_get_key(node, index) != key + cnt)
658 break; 613 break;
659 ptr2 = nilfs_btree_node_get_ptr(btree, node, index); 614 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
660 path[level + 1].bp_index = index; 615 path[level + 1].bp_index = index;
@@ -664,7 +619,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
664 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); 619 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
665 if (ret < 0) 620 if (ret < 0)
666 goto out; 621 goto out;
667 node = nilfs_btree_get_nonroot_node(btree, path, level); 622 node = nilfs_btree_get_nonroot_node(path, level);
668 index = 0; 623 index = 0;
669 path[level].bp_index = index; 624 path[level].bp_index = index;
670 } 625 }
@@ -672,8 +627,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
672 *ptrp = ptr; 627 *ptrp = ptr;
673 ret = cnt; 628 ret = cnt;
674 out: 629 out:
675 nilfs_btree_clear_path(btree, path); 630 nilfs_btree_release_path(path);
676 nilfs_btree_free_path(btree, path); 631 nilfs_btree_free_path(path);
677 return ret; 632 return ret;
678} 633}
679 634
@@ -685,9 +640,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
685 do { 640 do {
686 lock_buffer(path[level].bp_bh); 641 lock_buffer(path[level].bp_bh);
687 nilfs_btree_node_set_key( 642 nilfs_btree_node_set_key(
688 btree, 643 nilfs_btree_get_nonroot_node(path, level),
689 nilfs_btree_get_nonroot_node(
690 btree, path, level),
691 path[level].bp_index, key); 644 path[level].bp_index, key);
692 if (!buffer_dirty(path[level].bp_bh)) 645 if (!buffer_dirty(path[level].bp_bh))
693 nilfs_btnode_mark_dirty(path[level].bp_bh); 646 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -698,8 +651,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
698 651
699 /* root */ 652 /* root */
700 if (level == nilfs_btree_height(btree) - 1) { 653 if (level == nilfs_btree_height(btree) - 1) {
701 nilfs_btree_node_set_key(btree, 654 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
702 nilfs_btree_get_root(btree),
703 path[level].bp_index, key); 655 path[level].bp_index, key);
704 } 656 }
705} 657}
@@ -712,7 +664,7 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
712 664
713 if (level < nilfs_btree_height(btree) - 1) { 665 if (level < nilfs_btree_height(btree) - 1) {
714 lock_buffer(path[level].bp_bh); 666 lock_buffer(path[level].bp_bh);
715 node = nilfs_btree_get_nonroot_node(btree, path, level); 667 node = nilfs_btree_get_nonroot_node(path, level);
716 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 668 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
717 path[level].bp_index); 669 path[level].bp_index);
718 if (!buffer_dirty(path[level].bp_bh)) 670 if (!buffer_dirty(path[level].bp_bh))
@@ -721,8 +673,8 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
721 673
722 if (path[level].bp_index == 0) 674 if (path[level].bp_index == 0)
723 nilfs_btree_promote_key(btree, path, level + 1, 675 nilfs_btree_promote_key(btree, path, level + 1,
724 nilfs_btree_node_get_key( 676 nilfs_btree_node_get_key(node,
725 btree, node, 0)); 677 0));
726 } else { 678 } else {
727 node = nilfs_btree_get_root(btree); 679 node = nilfs_btree_get_root(btree);
728 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 680 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
@@ -740,10 +692,10 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
740 lock_buffer(path[level].bp_bh); 692 lock_buffer(path[level].bp_bh);
741 lock_buffer(path[level].bp_sib_bh); 693 lock_buffer(path[level].bp_sib_bh);
742 694
743 node = nilfs_btree_get_nonroot_node(btree, path, level); 695 node = nilfs_btree_get_nonroot_node(path, level);
744 left = nilfs_btree_get_sib_node(btree, path, level); 696 left = nilfs_btree_get_sib_node(path, level);
745 nchildren = nilfs_btree_node_get_nchildren(btree, node); 697 nchildren = nilfs_btree_node_get_nchildren(node);
746 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 698 lnchildren = nilfs_btree_node_get_nchildren(left);
747 move = 0; 699 move = 0;
748 700
749 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 701 n = (nchildren + lnchildren + 1) / 2 - lnchildren;
@@ -764,7 +716,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
764 unlock_buffer(path[level].bp_sib_bh); 716 unlock_buffer(path[level].bp_sib_bh);
765 717
766 nilfs_btree_promote_key(btree, path, level + 1, 718 nilfs_btree_promote_key(btree, path, level + 1,
767 nilfs_btree_node_get_key(btree, node, 0)); 719 nilfs_btree_node_get_key(node, 0));
768 720
769 if (move) { 721 if (move) {
770 brelse(path[level].bp_bh); 722 brelse(path[level].bp_bh);
@@ -791,10 +743,10 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
791 lock_buffer(path[level].bp_bh); 743 lock_buffer(path[level].bp_bh);
792 lock_buffer(path[level].bp_sib_bh); 744 lock_buffer(path[level].bp_sib_bh);
793 745
794 node = nilfs_btree_get_nonroot_node(btree, path, level); 746 node = nilfs_btree_get_nonroot_node(path, level);
795 right = nilfs_btree_get_sib_node(btree, path, level); 747 right = nilfs_btree_get_sib_node(path, level);
796 nchildren = nilfs_btree_node_get_nchildren(btree, node); 748 nchildren = nilfs_btree_node_get_nchildren(node);
797 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 749 rnchildren = nilfs_btree_node_get_nchildren(right);
798 move = 0; 750 move = 0;
799 751
800 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 752 n = (nchildren + rnchildren + 1) / 2 - rnchildren;
@@ -816,15 +768,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
816 768
817 path[level + 1].bp_index++; 769 path[level + 1].bp_index++;
818 nilfs_btree_promote_key(btree, path, level + 1, 770 nilfs_btree_promote_key(btree, path, level + 1,
819 nilfs_btree_node_get_key(btree, right, 0)); 771 nilfs_btree_node_get_key(right, 0));
820 path[level + 1].bp_index--; 772 path[level + 1].bp_index--;
821 773
822 if (move) { 774 if (move) {
823 brelse(path[level].bp_bh); 775 brelse(path[level].bp_bh);
824 path[level].bp_bh = path[level].bp_sib_bh; 776 path[level].bp_bh = path[level].bp_sib_bh;
825 path[level].bp_sib_bh = NULL; 777 path[level].bp_sib_bh = NULL;
826 path[level].bp_index -= 778 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
827 nilfs_btree_node_get_nchildren(btree, node);
828 path[level + 1].bp_index++; 779 path[level + 1].bp_index++;
829 } else { 780 } else {
830 brelse(path[level].bp_sib_bh); 781 brelse(path[level].bp_sib_bh);
@@ -846,9 +797,9 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
846 lock_buffer(path[level].bp_bh); 797 lock_buffer(path[level].bp_bh);
847 lock_buffer(path[level].bp_sib_bh); 798 lock_buffer(path[level].bp_sib_bh);
848 799
849 node = nilfs_btree_get_nonroot_node(btree, path, level); 800 node = nilfs_btree_get_nonroot_node(path, level);
850 right = nilfs_btree_get_sib_node(btree, path, level); 801 right = nilfs_btree_get_sib_node(path, level);
851 nchildren = nilfs_btree_node_get_nchildren(btree, node); 802 nchildren = nilfs_btree_node_get_nchildren(node);
852 move = 0; 803 move = 0;
853 804
854 n = (nchildren + 1) / 2; 805 n = (nchildren + 1) / 2;
@@ -867,16 +818,15 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
867 unlock_buffer(path[level].bp_bh); 818 unlock_buffer(path[level].bp_bh);
868 unlock_buffer(path[level].bp_sib_bh); 819 unlock_buffer(path[level].bp_sib_bh);
869 820
870 newkey = nilfs_btree_node_get_key(btree, right, 0); 821 newkey = nilfs_btree_node_get_key(right, 0);
871 newptr = path[level].bp_newreq.bpr_ptr; 822 newptr = path[level].bp_newreq.bpr_ptr;
872 823
873 if (move) { 824 if (move) {
874 path[level].bp_index -= 825 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
875 nilfs_btree_node_get_nchildren(btree, node);
876 nilfs_btree_node_insert(btree, right, *keyp, *ptrp, 826 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
877 path[level].bp_index); 827 path[level].bp_index);
878 828
879 *keyp = nilfs_btree_node_get_key(btree, right, 0); 829 *keyp = nilfs_btree_node_get_key(right, 0);
880 *ptrp = path[level].bp_newreq.bpr_ptr; 830 *ptrp = path[level].bp_newreq.bpr_ptr;
881 831
882 brelse(path[level].bp_bh); 832 brelse(path[level].bp_bh);
@@ -885,7 +835,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
885 } else { 835 } else {
886 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 836 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
887 837
888 *keyp = nilfs_btree_node_get_key(btree, right, 0); 838 *keyp = nilfs_btree_node_get_key(right, 0);
889 *ptrp = path[level].bp_newreq.bpr_ptr; 839 *ptrp = path[level].bp_newreq.bpr_ptr;
890 840
891 brelse(path[level].bp_sib_bh); 841 brelse(path[level].bp_sib_bh);
@@ -905,12 +855,12 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
905 lock_buffer(path[level].bp_sib_bh); 855 lock_buffer(path[level].bp_sib_bh);
906 856
907 root = nilfs_btree_get_root(btree); 857 root = nilfs_btree_get_root(btree);
908 child = nilfs_btree_get_sib_node(btree, path, level); 858 child = nilfs_btree_get_sib_node(path, level);
909 859
910 n = nilfs_btree_node_get_nchildren(btree, root); 860 n = nilfs_btree_node_get_nchildren(root);
911 861
912 nilfs_btree_node_move_right(btree, root, child, n); 862 nilfs_btree_node_move_right(btree, root, child, n);
913 nilfs_btree_node_set_level(btree, root, level + 1); 863 nilfs_btree_node_set_level(root, level + 1);
914 864
915 if (!buffer_dirty(path[level].bp_sib_bh)) 865 if (!buffer_dirty(path[level].bp_sib_bh))
916 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 866 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
@@ -922,7 +872,7 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
922 872
923 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 873 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
924 874
925 *keyp = nilfs_btree_node_get_key(btree, child, 0); 875 *keyp = nilfs_btree_node_get_key(child, 0);
926 *ptrp = path[level].bp_newreq.bpr_ptr; 876 *ptrp = path[level].bp_newreq.bpr_ptr;
927} 877}
928 878
@@ -990,26 +940,29 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
990 struct nilfs_btree_node *node, *parent, *sib; 940 struct nilfs_btree_node *node, *parent, *sib;
991 __u64 sibptr; 941 __u64 sibptr;
992 int pindex, level, ret; 942 int pindex, level, ret;
943 struct inode *dat = NULL;
993 944
994 stats->bs_nblocks = 0; 945 stats->bs_nblocks = 0;
995 level = NILFS_BTREE_LEVEL_DATA; 946 level = NILFS_BTREE_LEVEL_DATA;
996 947
997 /* allocate a new ptr for data block */ 948 /* allocate a new ptr for data block */
998 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) 949 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
999 path[level].bp_newreq.bpr_ptr = 950 path[level].bp_newreq.bpr_ptr =
1000 nilfs_btree_find_target_v(btree, path, key); 951 nilfs_btree_find_target_v(btree, path, key);
952 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
953 }
1001 954
1002 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 955 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1003 &path[level].bp_newreq); 956 &path[level].bp_newreq, dat);
1004 if (ret < 0) 957 if (ret < 0)
1005 goto err_out_data; 958 goto err_out_data;
1006 959
1007 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 960 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1008 level < nilfs_btree_height(btree) - 1; 961 level < nilfs_btree_height(btree) - 1;
1009 level++) { 962 level++) {
1010 node = nilfs_btree_get_nonroot_node(btree, path, level); 963 node = nilfs_btree_get_nonroot_node(path, level);
1011 if (nilfs_btree_node_get_nchildren(btree, node) < 964 if (nilfs_btree_node_get_nchildren(node) <
1012 nilfs_btree_node_nchildren_max(btree, node)) { 965 nilfs_btree_node_nchildren_max(node, btree)) {
1013 path[level].bp_op = nilfs_btree_do_insert; 966 path[level].bp_op = nilfs_btree_do_insert;
1014 stats->bs_nblocks++; 967 stats->bs_nblocks++;
1015 goto out; 968 goto out;
@@ -1026,8 +979,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1026 if (ret < 0) 979 if (ret < 0)
1027 goto err_out_child_node; 980 goto err_out_child_node;
1028 sib = (struct nilfs_btree_node *)bh->b_data; 981 sib = (struct nilfs_btree_node *)bh->b_data;
1029 if (nilfs_btree_node_get_nchildren(btree, sib) < 982 if (nilfs_btree_node_get_nchildren(sib) <
1030 nilfs_btree_node_nchildren_max(btree, sib)) { 983 nilfs_btree_node_nchildren_max(sib, btree)) {
1031 path[level].bp_sib_bh = bh; 984 path[level].bp_sib_bh = bh;
1032 path[level].bp_op = nilfs_btree_carry_left; 985 path[level].bp_op = nilfs_btree_carry_left;
1033 stats->bs_nblocks++; 986 stats->bs_nblocks++;
@@ -1038,15 +991,15 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1038 991
1039 /* right sibling */ 992 /* right sibling */
1040 if (pindex < 993 if (pindex <
1041 nilfs_btree_node_get_nchildren(btree, parent) - 1) { 994 nilfs_btree_node_get_nchildren(parent) - 1) {
1042 sibptr = nilfs_btree_node_get_ptr(btree, parent, 995 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1043 pindex + 1); 996 pindex + 1);
1044 ret = nilfs_btree_get_block(btree, sibptr, &bh); 997 ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045 if (ret < 0) 998 if (ret < 0)
1046 goto err_out_child_node; 999 goto err_out_child_node;
1047 sib = (struct nilfs_btree_node *)bh->b_data; 1000 sib = (struct nilfs_btree_node *)bh->b_data;
1048 if (nilfs_btree_node_get_nchildren(btree, sib) < 1001 if (nilfs_btree_node_get_nchildren(sib) <
1049 nilfs_btree_node_nchildren_max(btree, sib)) { 1002 nilfs_btree_node_nchildren_max(sib, btree)) {
1050 path[level].bp_sib_bh = bh; 1003 path[level].bp_sib_bh = bh;
1051 path[level].bp_op = nilfs_btree_carry_right; 1004 path[level].bp_op = nilfs_btree_carry_right;
1052 stats->bs_nblocks++; 1005 stats->bs_nblocks++;
@@ -1059,7 +1012,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1059 path[level].bp_newreq.bpr_ptr = 1012 path[level].bp_newreq.bpr_ptr =
1060 path[level - 1].bp_newreq.bpr_ptr + 1; 1013 path[level - 1].bp_newreq.bpr_ptr + 1;
1061 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1014 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1062 &path[level].bp_newreq); 1015 &path[level].bp_newreq, dat);
1063 if (ret < 0) 1016 if (ret < 0)
1064 goto err_out_child_node; 1017 goto err_out_child_node;
1065 ret = nilfs_btree_get_new_block(btree, 1018 ret = nilfs_btree_get_new_block(btree,
@@ -1081,8 +1034,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1081 1034
1082 /* root */ 1035 /* root */
1083 node = nilfs_btree_get_root(btree); 1036 node = nilfs_btree_get_root(btree);
1084 if (nilfs_btree_node_get_nchildren(btree, node) < 1037 if (nilfs_btree_node_get_nchildren(node) <
1085 nilfs_btree_node_nchildren_max(btree, node)) { 1038 nilfs_btree_node_nchildren_max(node, btree)) {
1086 path[level].bp_op = nilfs_btree_do_insert; 1039 path[level].bp_op = nilfs_btree_do_insert;
1087 stats->bs_nblocks++; 1040 stats->bs_nblocks++;
1088 goto out; 1041 goto out;
@@ -1091,7 +1044,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1091 /* grow */ 1044 /* grow */
1092 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1045 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1093 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1046 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1094 &path[level].bp_newreq); 1047 &path[level].bp_newreq, dat);
1095 if (ret < 0) 1048 if (ret < 0)
1096 goto err_out_child_node; 1049 goto err_out_child_node;
1097 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1050 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
@@ -1119,16 +1072,18 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1119 1072
1120 /* error */ 1073 /* error */
1121 err_out_curr_node: 1074 err_out_curr_node:
1122 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); 1075 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1076 dat);
1123 err_out_child_node: 1077 err_out_child_node:
1124 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1078 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1125 nilfs_btnode_delete(path[level].bp_sib_bh); 1079 nilfs_btnode_delete(path[level].bp_sib_bh);
1126 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, 1080 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1127 &path[level].bp_newreq); 1081 &path[level].bp_newreq, dat);
1128 1082
1129 } 1083 }
1130 1084
1131 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); 1085 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1086 dat);
1132 err_out_data: 1087 err_out_data:
1133 *levelp = level; 1088 *levelp = level;
1134 stats->bs_nblocks = 0; 1089 stats->bs_nblocks = 0;
@@ -1139,16 +1094,19 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1139 struct nilfs_btree_path *path, 1094 struct nilfs_btree_path *path,
1140 int maxlevel, __u64 key, __u64 ptr) 1095 int maxlevel, __u64 key, __u64 ptr)
1141{ 1096{
1097 struct inode *dat = NULL;
1142 int level; 1098 int level;
1143 1099
1144 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1100 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1145 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1101 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1146 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) 1102 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1147 nilfs_btree_set_target_v(btree, key, ptr); 1103 nilfs_btree_set_target_v(btree, key, ptr);
1104 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1105 }
1148 1106
1149 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1107 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1150 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, 1108 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1151 &path[level - 1].bp_newreq); 1109 &path[level - 1].bp_newreq, dat);
1152 path[level].bp_op(btree, path, level, &key, &ptr); 1110 path[level].bp_op(btree, path, level, &key, &ptr);
1153 } 1111 }
1154 1112
@@ -1164,10 +1122,10 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1164 int level, ret; 1122 int level, ret;
1165 1123
1166 btree = (struct nilfs_btree *)bmap; 1124 btree = (struct nilfs_btree *)bmap;
1167 path = nilfs_btree_alloc_path(btree); 1125 path = nilfs_btree_alloc_path();
1168 if (path == NULL) 1126 if (path == NULL)
1169 return -ENOMEM; 1127 return -ENOMEM;
1170 nilfs_btree_init_path(btree, path); 1128 nilfs_btree_init_path(path);
1171 1129
1172 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1130 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1173 NILFS_BTREE_LEVEL_NODE_MIN); 1131 NILFS_BTREE_LEVEL_NODE_MIN);
@@ -1184,8 +1142,8 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1184 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1142 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1185 1143
1186 out: 1144 out:
1187 nilfs_btree_clear_path(btree, path); 1145 nilfs_btree_release_path(path);
1188 nilfs_btree_free_path(btree, path); 1146 nilfs_btree_free_path(path);
1189 return ret; 1147 return ret;
1190} 1148}
1191 1149
@@ -1197,7 +1155,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1197 1155
1198 if (level < nilfs_btree_height(btree) - 1) { 1156 if (level < nilfs_btree_height(btree) - 1) {
1199 lock_buffer(path[level].bp_bh); 1157 lock_buffer(path[level].bp_bh);
1200 node = nilfs_btree_get_nonroot_node(btree, path, level); 1158 node = nilfs_btree_get_nonroot_node(path, level);
1201 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1159 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1202 path[level].bp_index); 1160 path[level].bp_index);
1203 if (!buffer_dirty(path[level].bp_bh)) 1161 if (!buffer_dirty(path[level].bp_bh))
@@ -1205,7 +1163,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1205 unlock_buffer(path[level].bp_bh); 1163 unlock_buffer(path[level].bp_bh);
1206 if (path[level].bp_index == 0) 1164 if (path[level].bp_index == 0)
1207 nilfs_btree_promote_key(btree, path, level + 1, 1165 nilfs_btree_promote_key(btree, path, level + 1,
1208 nilfs_btree_node_get_key(btree, node, 0)); 1166 nilfs_btree_node_get_key(node, 0));
1209 } else { 1167 } else {
1210 node = nilfs_btree_get_root(btree); 1168 node = nilfs_btree_get_root(btree);
1211 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1169 nilfs_btree_node_delete(btree, node, keyp, ptrp,
@@ -1225,10 +1183,10 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1225 lock_buffer(path[level].bp_bh); 1183 lock_buffer(path[level].bp_bh);
1226 lock_buffer(path[level].bp_sib_bh); 1184 lock_buffer(path[level].bp_sib_bh);
1227 1185
1228 node = nilfs_btree_get_nonroot_node(btree, path, level); 1186 node = nilfs_btree_get_nonroot_node(path, level);
1229 left = nilfs_btree_get_sib_node(btree, path, level); 1187 left = nilfs_btree_get_sib_node(path, level);
1230 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1188 nchildren = nilfs_btree_node_get_nchildren(node);
1231 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 1189 lnchildren = nilfs_btree_node_get_nchildren(left);
1232 1190
1233 n = (nchildren + lnchildren) / 2 - nchildren; 1191 n = (nchildren + lnchildren) / 2 - nchildren;
1234 1192
@@ -1243,7 +1201,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1243 unlock_buffer(path[level].bp_sib_bh); 1201 unlock_buffer(path[level].bp_sib_bh);
1244 1202
1245 nilfs_btree_promote_key(btree, path, level + 1, 1203 nilfs_btree_promote_key(btree, path, level + 1,
1246 nilfs_btree_node_get_key(btree, node, 0)); 1204 nilfs_btree_node_get_key(node, 0));
1247 1205
1248 brelse(path[level].bp_sib_bh); 1206 brelse(path[level].bp_sib_bh);
1249 path[level].bp_sib_bh = NULL; 1207 path[level].bp_sib_bh = NULL;
@@ -1262,10 +1220,10 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1262 lock_buffer(path[level].bp_bh); 1220 lock_buffer(path[level].bp_bh);
1263 lock_buffer(path[level].bp_sib_bh); 1221 lock_buffer(path[level].bp_sib_bh);
1264 1222
1265 node = nilfs_btree_get_nonroot_node(btree, path, level); 1223 node = nilfs_btree_get_nonroot_node(path, level);
1266 right = nilfs_btree_get_sib_node(btree, path, level); 1224 right = nilfs_btree_get_sib_node(path, level);
1267 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1225 nchildren = nilfs_btree_node_get_nchildren(node);
1268 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 1226 rnchildren = nilfs_btree_node_get_nchildren(right);
1269 1227
1270 n = (nchildren + rnchildren) / 2 - nchildren; 1228 n = (nchildren + rnchildren) / 2 - nchildren;
1271 1229
@@ -1281,7 +1239,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1281 1239
1282 path[level + 1].bp_index++; 1240 path[level + 1].bp_index++;
1283 nilfs_btree_promote_key(btree, path, level + 1, 1241 nilfs_btree_promote_key(btree, path, level + 1,
1284 nilfs_btree_node_get_key(btree, right, 0)); 1242 nilfs_btree_node_get_key(right, 0));
1285 path[level + 1].bp_index--; 1243 path[level + 1].bp_index--;
1286 1244
1287 brelse(path[level].bp_sib_bh); 1245 brelse(path[level].bp_sib_bh);
@@ -1300,10 +1258,10 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1300 lock_buffer(path[level].bp_bh); 1258 lock_buffer(path[level].bp_bh);
1301 lock_buffer(path[level].bp_sib_bh); 1259 lock_buffer(path[level].bp_sib_bh);
1302 1260
1303 node = nilfs_btree_get_nonroot_node(btree, path, level); 1261 node = nilfs_btree_get_nonroot_node(path, level);
1304 left = nilfs_btree_get_sib_node(btree, path, level); 1262 left = nilfs_btree_get_sib_node(path, level);
1305 1263
1306 n = nilfs_btree_node_get_nchildren(btree, node); 1264 n = nilfs_btree_node_get_nchildren(node);
1307 1265
1308 nilfs_btree_node_move_left(btree, left, node, n); 1266 nilfs_btree_node_move_left(btree, left, node, n);
1309 1267
@@ -1316,7 +1274,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1316 nilfs_btnode_delete(path[level].bp_bh); 1274 nilfs_btnode_delete(path[level].bp_bh);
1317 path[level].bp_bh = path[level].bp_sib_bh; 1275 path[level].bp_bh = path[level].bp_sib_bh;
1318 path[level].bp_sib_bh = NULL; 1276 path[level].bp_sib_bh = NULL;
1319 path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); 1277 path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1320} 1278}
1321 1279
1322static void nilfs_btree_concat_right(struct nilfs_btree *btree, 1280static void nilfs_btree_concat_right(struct nilfs_btree *btree,
@@ -1331,10 +1289,10 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1331 lock_buffer(path[level].bp_bh); 1289 lock_buffer(path[level].bp_bh);
1332 lock_buffer(path[level].bp_sib_bh); 1290 lock_buffer(path[level].bp_sib_bh);
1333 1291
1334 node = nilfs_btree_get_nonroot_node(btree, path, level); 1292 node = nilfs_btree_get_nonroot_node(path, level);
1335 right = nilfs_btree_get_sib_node(btree, path, level); 1293 right = nilfs_btree_get_sib_node(path, level);
1336 1294
1337 n = nilfs_btree_node_get_nchildren(btree, right); 1295 n = nilfs_btree_node_get_nchildren(right);
1338 1296
1339 nilfs_btree_node_move_left(btree, node, right, n); 1297 nilfs_btree_node_move_left(btree, node, right, n);
1340 1298
@@ -1360,11 +1318,11 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1360 1318
1361 lock_buffer(path[level].bp_bh); 1319 lock_buffer(path[level].bp_bh);
1362 root = nilfs_btree_get_root(btree); 1320 root = nilfs_btree_get_root(btree);
1363 child = nilfs_btree_get_nonroot_node(btree, path, level); 1321 child = nilfs_btree_get_nonroot_node(path, level);
1364 1322
1365 nilfs_btree_node_delete(btree, root, NULL, NULL, 0); 1323 nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1366 nilfs_btree_node_set_level(btree, root, level); 1324 nilfs_btree_node_set_level(root, level);
1367 n = nilfs_btree_node_get_nchildren(btree, child); 1325 n = nilfs_btree_node_get_nchildren(child);
1368 nilfs_btree_node_move_left(btree, root, child, n); 1326 nilfs_btree_node_move_left(btree, root, child, n);
1369 unlock_buffer(path[level].bp_bh); 1327 unlock_buffer(path[level].bp_bh);
1370 1328
@@ -1376,7 +1334,8 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1376static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, 1334static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1377 struct nilfs_btree_path *path, 1335 struct nilfs_btree_path *path,
1378 int *levelp, 1336 int *levelp,
1379 struct nilfs_bmap_stats *stats) 1337 struct nilfs_bmap_stats *stats,
1338 struct inode *dat)
1380{ 1339{
1381 struct buffer_head *bh; 1340 struct buffer_head *bh;
1382 struct nilfs_btree_node *node, *parent, *sib; 1341 struct nilfs_btree_node *node, *parent, *sib;
@@ -1388,17 +1347,17 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1388 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1347 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1389 level < nilfs_btree_height(btree) - 1; 1348 level < nilfs_btree_height(btree) - 1;
1390 level++) { 1349 level++) {
1391 node = nilfs_btree_get_nonroot_node(btree, path, level); 1350 node = nilfs_btree_get_nonroot_node(path, level);
1392 path[level].bp_oldreq.bpr_ptr = 1351 path[level].bp_oldreq.bpr_ptr =
1393 nilfs_btree_node_get_ptr(btree, node, 1352 nilfs_btree_node_get_ptr(btree, node,
1394 path[level].bp_index); 1353 path[level].bp_index);
1395 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, 1354 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1396 &path[level].bp_oldreq); 1355 &path[level].bp_oldreq, dat);
1397 if (ret < 0) 1356 if (ret < 0)
1398 goto err_out_child_node; 1357 goto err_out_child_node;
1399 1358
1400 if (nilfs_btree_node_get_nchildren(btree, node) > 1359 if (nilfs_btree_node_get_nchildren(node) >
1401 nilfs_btree_node_nchildren_min(btree, node)) { 1360 nilfs_btree_node_nchildren_min(node, btree)) {
1402 path[level].bp_op = nilfs_btree_do_delete; 1361 path[level].bp_op = nilfs_btree_do_delete;
1403 stats->bs_nblocks++; 1362 stats->bs_nblocks++;
1404 goto out; 1363 goto out;
@@ -1415,8 +1374,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1415 if (ret < 0) 1374 if (ret < 0)
1416 goto err_out_curr_node; 1375 goto err_out_curr_node;
1417 sib = (struct nilfs_btree_node *)bh->b_data; 1376 sib = (struct nilfs_btree_node *)bh->b_data;
1418 if (nilfs_btree_node_get_nchildren(btree, sib) > 1377 if (nilfs_btree_node_get_nchildren(sib) >
1419 nilfs_btree_node_nchildren_min(btree, sib)) { 1378 nilfs_btree_node_nchildren_min(sib, btree)) {
1420 path[level].bp_sib_bh = bh; 1379 path[level].bp_sib_bh = bh;
1421 path[level].bp_op = nilfs_btree_borrow_left; 1380 path[level].bp_op = nilfs_btree_borrow_left;
1422 stats->bs_nblocks++; 1381 stats->bs_nblocks++;
@@ -1428,7 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1428 /* continue; */ 1387 /* continue; */
1429 } 1388 }
1430 } else if (pindex < 1389 } else if (pindex <
1431 nilfs_btree_node_get_nchildren(btree, parent) - 1) { 1390 nilfs_btree_node_get_nchildren(parent) - 1) {
1432 /* right sibling */ 1391 /* right sibling */
1433 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1392 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1434 pindex + 1); 1393 pindex + 1);
@@ -1436,8 +1395,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1436 if (ret < 0) 1395 if (ret < 0)
1437 goto err_out_curr_node; 1396 goto err_out_curr_node;
1438 sib = (struct nilfs_btree_node *)bh->b_data; 1397 sib = (struct nilfs_btree_node *)bh->b_data;
1439 if (nilfs_btree_node_get_nchildren(btree, sib) > 1398 if (nilfs_btree_node_get_nchildren(sib) >
1440 nilfs_btree_node_nchildren_min(btree, sib)) { 1399 nilfs_btree_node_nchildren_min(sib, btree)) {
1441 path[level].bp_sib_bh = bh; 1400 path[level].bp_sib_bh = bh;
1442 path[level].bp_op = nilfs_btree_borrow_right; 1401 path[level].bp_op = nilfs_btree_borrow_right;
1443 stats->bs_nblocks++; 1402 stats->bs_nblocks++;
@@ -1452,7 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1452 /* no siblings */ 1411 /* no siblings */
1453 /* the only child of the root node */ 1412 /* the only child of the root node */
1454 WARN_ON(level != nilfs_btree_height(btree) - 2); 1413 WARN_ON(level != nilfs_btree_height(btree) - 2);
1455 if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= 1414 if (nilfs_btree_node_get_nchildren(node) - 1 <=
1456 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1415 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1457 path[level].bp_op = nilfs_btree_shrink; 1416 path[level].bp_op = nilfs_btree_shrink;
1458 stats->bs_nblocks += 2; 1417 stats->bs_nblocks += 2;
@@ -1471,7 +1430,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1471 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); 1430 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1472 1431
1473 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, 1432 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1474 &path[level].bp_oldreq); 1433 &path[level].bp_oldreq, dat);
1475 if (ret < 0) 1434 if (ret < 0)
1476 goto err_out_child_node; 1435 goto err_out_child_node;
1477 1436
@@ -1486,12 +1445,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1486 1445
1487 /* error */ 1446 /* error */
1488 err_out_curr_node: 1447 err_out_curr_node:
1489 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq); 1448 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1490 err_out_child_node: 1449 err_out_child_node:
1491 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1450 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1492 brelse(path[level].bp_sib_bh); 1451 brelse(path[level].bp_sib_bh);
1493 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, 1452 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1494 &path[level].bp_oldreq); 1453 &path[level].bp_oldreq, dat);
1495 } 1454 }
1496 *levelp = level; 1455 *levelp = level;
1497 stats->bs_nblocks = 0; 1456 stats->bs_nblocks = 0;
@@ -1500,13 +1459,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1500 1459
1501static void nilfs_btree_commit_delete(struct nilfs_btree *btree, 1460static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1502 struct nilfs_btree_path *path, 1461 struct nilfs_btree_path *path,
1503 int maxlevel) 1462 int maxlevel, struct inode *dat)
1504{ 1463{
1505 int level; 1464 int level;
1506 1465
1507 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1466 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1508 nilfs_bmap_commit_end_ptr(&btree->bt_bmap, 1467 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1509 &path[level].bp_oldreq); 1468 &path[level].bp_oldreq, dat);
1510 path[level].bp_op(btree, path, level, NULL, NULL); 1469 path[level].bp_op(btree, path, level, NULL, NULL);
1511 } 1470 }
1512 1471
@@ -1520,27 +1479,32 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1520 struct nilfs_btree *btree; 1479 struct nilfs_btree *btree;
1521 struct nilfs_btree_path *path; 1480 struct nilfs_btree_path *path;
1522 struct nilfs_bmap_stats stats; 1481 struct nilfs_bmap_stats stats;
1482 struct inode *dat;
1523 int level, ret; 1483 int level, ret;
1524 1484
1525 btree = (struct nilfs_btree *)bmap; 1485 btree = (struct nilfs_btree *)bmap;
1526 path = nilfs_btree_alloc_path(btree); 1486 path = nilfs_btree_alloc_path();
1527 if (path == NULL) 1487 if (path == NULL)
1528 return -ENOMEM; 1488 return -ENOMEM;
1529 nilfs_btree_init_path(btree, path); 1489 nilfs_btree_init_path(path);
1530 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1490 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1531 NILFS_BTREE_LEVEL_NODE_MIN); 1491 NILFS_BTREE_LEVEL_NODE_MIN);
1532 if (ret < 0) 1492 if (ret < 0)
1533 goto out; 1493 goto out;
1534 1494
1535 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); 1495
1496 dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1497 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1498
1499 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1536 if (ret < 0) 1500 if (ret < 0)
1537 goto out; 1501 goto out;
1538 nilfs_btree_commit_delete(btree, path, level); 1502 nilfs_btree_commit_delete(btree, path, level, dat);
1539 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); 1503 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1540 1504
1541out: 1505out:
1542 nilfs_btree_clear_path(btree, path); 1506 nilfs_btree_release_path(path);
1543 nilfs_btree_free_path(btree, path); 1507 nilfs_btree_free_path(path);
1544 return ret; 1508 return ret;
1545} 1509}
1546 1510
@@ -1551,15 +1515,15 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1551 int ret; 1515 int ret;
1552 1516
1553 btree = (struct nilfs_btree *)bmap; 1517 btree = (struct nilfs_btree *)bmap;
1554 path = nilfs_btree_alloc_path(btree); 1518 path = nilfs_btree_alloc_path();
1555 if (path == NULL) 1519 if (path == NULL)
1556 return -ENOMEM; 1520 return -ENOMEM;
1557 nilfs_btree_init_path(btree, path); 1521 nilfs_btree_init_path(path);
1558 1522
1559 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1523 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1560 1524
1561 nilfs_btree_clear_path(btree, path); 1525 nilfs_btree_release_path(path);
1562 nilfs_btree_free_path(btree, path); 1526 nilfs_btree_free_path(path);
1563 1527
1564 return ret; 1528 return ret;
1565} 1529}
@@ -1581,7 +1545,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1581 node = root; 1545 node = root;
1582 break; 1546 break;
1583 case 3: 1547 case 3:
1584 nchildren = nilfs_btree_node_get_nchildren(btree, root); 1548 nchildren = nilfs_btree_node_get_nchildren(root);
1585 if (nchildren > 1) 1549 if (nchildren > 1)
1586 return 0; 1550 return 0;
1587 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1551 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
@@ -1594,10 +1558,10 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1594 return 0; 1558 return 0;
1595 } 1559 }
1596 1560
1597 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1561 nchildren = nilfs_btree_node_get_nchildren(node);
1598 maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); 1562 maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1599 nextmaxkey = (nchildren > 1) ? 1563 nextmaxkey = (nchildren > 1) ?
1600 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; 1564 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1601 if (bh != NULL) 1565 if (bh != NULL)
1602 brelse(bh); 1566 brelse(bh);
1603 1567
@@ -1623,7 +1587,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1623 node = root; 1587 node = root;
1624 break; 1588 break;
1625 case 3: 1589 case 3:
1626 nchildren = nilfs_btree_node_get_nchildren(btree, root); 1590 nchildren = nilfs_btree_node_get_nchildren(root);
1627 WARN_ON(nchildren > 1); 1591 WARN_ON(nchildren > 1);
1628 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1592 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1629 ret = nilfs_btree_get_block(btree, ptr, &bh); 1593 ret = nilfs_btree_get_block(btree, ptr, &bh);
@@ -1636,11 +1600,11 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1636 return -EINVAL; 1600 return -EINVAL;
1637 } 1601 }
1638 1602
1639 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1603 nchildren = nilfs_btree_node_get_nchildren(node);
1640 if (nchildren < nitems) 1604 if (nchildren < nitems)
1641 nitems = nchildren; 1605 nitems = nchildren;
1642 dkeys = nilfs_btree_node_dkeys(btree, node); 1606 dkeys = nilfs_btree_node_dkeys(node);
1643 dptrs = nilfs_btree_node_dptrs(btree, node); 1607 dptrs = nilfs_btree_node_dptrs(node, btree);
1644 for (i = 0; i < nitems; i++) { 1608 for (i = 0; i < nitems; i++) {
1645 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); 1609 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1646 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); 1610 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
@@ -1660,18 +1624,20 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1660 struct nilfs_bmap_stats *stats) 1624 struct nilfs_bmap_stats *stats)
1661{ 1625{
1662 struct buffer_head *bh; 1626 struct buffer_head *bh;
1663 struct nilfs_btree *btree; 1627 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1628 struct inode *dat = NULL;
1664 int ret; 1629 int ret;
1665 1630
1666 btree = (struct nilfs_btree *)bmap;
1667 stats->bs_nblocks = 0; 1631 stats->bs_nblocks = 0;
1668 1632
1669 /* for data */ 1633 /* for data */
1670 /* cannot find near ptr */ 1634 /* cannot find near ptr */
1671 if (NILFS_BMAP_USE_VBN(bmap)) 1635 if (NILFS_BMAP_USE_VBN(bmap)) {
1672 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1636 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1637 dat = nilfs_bmap_get_dat(bmap);
1638 }
1673 1639
1674 ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq); 1640 ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1675 if (ret < 0) 1641 if (ret < 0)
1676 return ret; 1642 return ret;
1677 1643
@@ -1679,7 +1645,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1679 stats->bs_nblocks++; 1645 stats->bs_nblocks++;
1680 if (nreq != NULL) { 1646 if (nreq != NULL) {
1681 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1647 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1682 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq); 1648 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1683 if (ret < 0) 1649 if (ret < 0)
1684 goto err_out_dreq; 1650 goto err_out_dreq;
1685 1651
@@ -1696,9 +1662,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1696 1662
1697 /* error */ 1663 /* error */
1698 err_out_nreq: 1664 err_out_nreq:
1699 nilfs_bmap_abort_alloc_ptr(bmap, nreq); 1665 nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1700 err_out_dreq: 1666 err_out_dreq:
1701 nilfs_bmap_abort_alloc_ptr(bmap, dreq); 1667 nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1702 stats->bs_nblocks = 0; 1668 stats->bs_nblocks = 0;
1703 return ret; 1669 return ret;
1704 1670
@@ -1713,8 +1679,9 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1713 union nilfs_bmap_ptr_req *nreq, 1679 union nilfs_bmap_ptr_req *nreq,
1714 struct buffer_head *bh) 1680 struct buffer_head *bh)
1715{ 1681{
1716 struct nilfs_btree *btree; 1682 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1717 struct nilfs_btree_node *node; 1683 struct nilfs_btree_node *node;
1684 struct inode *dat;
1718 __u64 tmpptr; 1685 __u64 tmpptr;
1719 1686
1720 /* free resources */ 1687 /* free resources */
@@ -1725,11 +1692,11 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1725 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1692 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1726 1693
1727 /* convert and insert */ 1694 /* convert and insert */
1728 btree = (struct nilfs_btree *)bmap; 1695 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1729 nilfs_btree_init(bmap); 1696 nilfs_btree_init(bmap);
1730 if (nreq != NULL) { 1697 if (nreq != NULL) {
1731 nilfs_bmap_commit_alloc_ptr(bmap, dreq); 1698 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1732 nilfs_bmap_commit_alloc_ptr(bmap, nreq); 1699 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1733 1700
1734 /* create child node at level 1 */ 1701 /* create child node at level 1 */
1735 lock_buffer(bh); 1702 lock_buffer(bh);
@@ -1751,7 +1718,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1751 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 1718 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1752 2, 1, &keys[0], &tmpptr); 1719 2, 1, &keys[0], &tmpptr);
1753 } else { 1720 } else {
1754 nilfs_bmap_commit_alloc_ptr(bmap, dreq); 1721 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1755 1722
1756 /* create root node at level 1 */ 1723 /* create root node at level 1 */
1757 node = nilfs_btree_get_root(btree); 1724 node = nilfs_btree_get_root(btree);
@@ -1822,7 +1789,7 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1822 1789
1823static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, 1790static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1824 struct nilfs_btree_path *path, 1791 struct nilfs_btree_path *path,
1825 int level) 1792 int level, struct inode *dat)
1826{ 1793{
1827 struct nilfs_btree_node *parent; 1794 struct nilfs_btree_node *parent;
1828 int ret; 1795 int ret;
@@ -1832,9 +1799,8 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1832 nilfs_btree_node_get_ptr(btree, parent, 1799 nilfs_btree_node_get_ptr(btree, parent,
1833 path[level + 1].bp_index); 1800 path[level + 1].bp_index);
1834 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1801 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1835 ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap, 1802 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1836 &path[level].bp_oldreq, 1803 &path[level].bp_newreq.bpr_req);
1837 &path[level].bp_newreq);
1838 if (ret < 0) 1804 if (ret < 0)
1839 return ret; 1805 return ret;
1840 1806
@@ -1846,9 +1812,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1846 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1812 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1847 &path[level].bp_ctxt); 1813 &path[level].bp_ctxt);
1848 if (ret < 0) { 1814 if (ret < 0) {
1849 nilfs_bmap_abort_update_v(&btree->bt_bmap, 1815 nilfs_dat_abort_update(dat,
1850 &path[level].bp_oldreq, 1816 &path[level].bp_oldreq.bpr_req,
1851 &path[level].bp_newreq); 1817 &path[level].bp_newreq.bpr_req);
1852 return ret; 1818 return ret;
1853 } 1819 }
1854 } 1820 }
@@ -1858,13 +1824,13 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1858 1824
1859static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, 1825static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1860 struct nilfs_btree_path *path, 1826 struct nilfs_btree_path *path,
1861 int level) 1827 int level, struct inode *dat)
1862{ 1828{
1863 struct nilfs_btree_node *parent; 1829 struct nilfs_btree_node *parent;
1864 1830
1865 nilfs_bmap_commit_update_v(&btree->bt_bmap, 1831 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1866 &path[level].bp_oldreq, 1832 &path[level].bp_newreq.bpr_req,
1867 &path[level].bp_newreq); 1833 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1868 1834
1869 if (buffer_nilfs_node(path[level].bp_bh)) { 1835 if (buffer_nilfs_node(path[level].bp_bh)) {
1870 nilfs_btnode_commit_change_key( 1836 nilfs_btnode_commit_change_key(
@@ -1881,11 +1847,10 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1881 1847
1882static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, 1848static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1883 struct nilfs_btree_path *path, 1849 struct nilfs_btree_path *path,
1884 int level) 1850 int level, struct inode *dat)
1885{ 1851{
1886 nilfs_bmap_abort_update_v(&btree->bt_bmap, 1852 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1887 &path[level].bp_oldreq, 1853 &path[level].bp_newreq.bpr_req);
1888 &path[level].bp_newreq);
1889 if (buffer_nilfs_node(path[level].bp_bh)) 1854 if (buffer_nilfs_node(path[level].bp_bh))
1890 nilfs_btnode_abort_change_key( 1855 nilfs_btnode_abort_change_key(
1891 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1856 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
@@ -1894,14 +1859,14 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1894 1859
1895static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, 1860static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1896 struct nilfs_btree_path *path, 1861 struct nilfs_btree_path *path,
1897 int minlevel, 1862 int minlevel, int *maxlevelp,
1898 int *maxlevelp) 1863 struct inode *dat)
1899{ 1864{
1900 int level, ret; 1865 int level, ret;
1901 1866
1902 level = minlevel; 1867 level = minlevel;
1903 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1868 if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1904 ret = nilfs_btree_prepare_update_v(btree, path, level); 1869 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1905 if (ret < 0) 1870 if (ret < 0)
1906 return ret; 1871 return ret;
1907 } 1872 }
@@ -1909,7 +1874,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1909 !buffer_dirty(path[level].bp_bh)) { 1874 !buffer_dirty(path[level].bp_bh)) {
1910 1875
1911 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1876 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1912 ret = nilfs_btree_prepare_update_v(btree, path, level); 1877 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1913 if (ret < 0) 1878 if (ret < 0)
1914 goto out; 1879 goto out;
1915 } 1880 }
@@ -1921,39 +1886,40 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1921 /* error */ 1886 /* error */
1922 out: 1887 out:
1923 while (--level > minlevel) 1888 while (--level > minlevel)
1924 nilfs_btree_abort_update_v(btree, path, level); 1889 nilfs_btree_abort_update_v(btree, path, level, dat);
1925 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1890 if (!buffer_nilfs_volatile(path[level].bp_bh))
1926 nilfs_btree_abort_update_v(btree, path, level); 1891 nilfs_btree_abort_update_v(btree, path, level, dat);
1927 return ret; 1892 return ret;
1928} 1893}
1929 1894
1930static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, 1895static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1931 struct nilfs_btree_path *path, 1896 struct nilfs_btree_path *path,
1932 int minlevel, 1897 int minlevel, int maxlevel,
1933 int maxlevel, 1898 struct buffer_head *bh,
1934 struct buffer_head *bh) 1899 struct inode *dat)
1935{ 1900{
1936 int level; 1901 int level;
1937 1902
1938 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 1903 if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1939 nilfs_btree_commit_update_v(btree, path, minlevel); 1904 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1940 1905
1941 for (level = minlevel + 1; level <= maxlevel; level++) 1906 for (level = minlevel + 1; level <= maxlevel; level++)
1942 nilfs_btree_commit_update_v(btree, path, level); 1907 nilfs_btree_commit_update_v(btree, path, level, dat);
1943} 1908}
1944 1909
1945static int nilfs_btree_propagate_v(struct nilfs_btree *btree, 1910static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1946 struct nilfs_btree_path *path, 1911 struct nilfs_btree_path *path,
1947 int level, 1912 int level, struct buffer_head *bh)
1948 struct buffer_head *bh)
1949{ 1913{
1950 int maxlevel, ret; 1914 int maxlevel, ret;
1951 struct nilfs_btree_node *parent; 1915 struct nilfs_btree_node *parent;
1916 struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1952 __u64 ptr; 1917 __u64 ptr;
1953 1918
1954 get_bh(bh); 1919 get_bh(bh);
1955 path[level].bp_bh = bh; 1920 path[level].bp_bh = bh;
1956 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); 1921 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1922 dat);
1957 if (ret < 0) 1923 if (ret < 0)
1958 goto out; 1924 goto out;
1959 1925
@@ -1961,12 +1927,12 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1961 parent = nilfs_btree_get_node(btree, path, level + 1); 1927 parent = nilfs_btree_get_node(btree, path, level + 1);
1962 ptr = nilfs_btree_node_get_ptr(btree, parent, 1928 ptr = nilfs_btree_node_get_ptr(btree, parent,
1963 path[level + 1].bp_index); 1929 path[level + 1].bp_index);
1964 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); 1930 ret = nilfs_dat_mark_dirty(dat, ptr);
1965 if (ret < 0) 1931 if (ret < 0)
1966 goto out; 1932 goto out;
1967 } 1933 }
1968 1934
1969 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); 1935 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1970 1936
1971 out: 1937 out:
1972 brelse(path[level].bp_bh); 1938 brelse(path[level].bp_bh);
@@ -1986,15 +1952,15 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1986 WARN_ON(!buffer_dirty(bh)); 1952 WARN_ON(!buffer_dirty(bh));
1987 1953
1988 btree = (struct nilfs_btree *)bmap; 1954 btree = (struct nilfs_btree *)bmap;
1989 path = nilfs_btree_alloc_path(btree); 1955 path = nilfs_btree_alloc_path();
1990 if (path == NULL) 1956 if (path == NULL)
1991 return -ENOMEM; 1957 return -ENOMEM;
1992 nilfs_btree_init_path(btree, path); 1958 nilfs_btree_init_path(path);
1993 1959
1994 if (buffer_nilfs_node(bh)) { 1960 if (buffer_nilfs_node(bh)) {
1995 node = (struct nilfs_btree_node *)bh->b_data; 1961 node = (struct nilfs_btree_node *)bh->b_data;
1996 key = nilfs_btree_node_get_key(btree, node, 0); 1962 key = nilfs_btree_node_get_key(node, 0);
1997 level = nilfs_btree_node_get_level(btree, node); 1963 level = nilfs_btree_node_get_level(node);
1998 } else { 1964 } else {
1999 key = nilfs_bmap_data_get_key(bmap, bh); 1965 key = nilfs_bmap_data_get_key(bmap, bh);
2000 level = NILFS_BTREE_LEVEL_DATA; 1966 level = NILFS_BTREE_LEVEL_DATA;
@@ -2013,8 +1979,8 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
2013 nilfs_btree_propagate_p(btree, path, level, bh); 1979 nilfs_btree_propagate_p(btree, path, level, bh);
2014 1980
2015 out: 1981 out:
2016 nilfs_btree_clear_path(btree, path); 1982 nilfs_btree_release_path(path);
2017 nilfs_btree_free_path(btree, path); 1983 nilfs_btree_free_path(path);
2018 1984
2019 return ret; 1985 return ret;
2020} 1986}
@@ -2022,7 +1988,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
2022static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, 1988static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
2023 struct buffer_head *bh) 1989 struct buffer_head *bh)
2024{ 1990{
2025 return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); 1991 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
2026} 1992}
2027 1993
2028static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, 1994static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
@@ -2037,12 +2003,12 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
2037 2003
2038 get_bh(bh); 2004 get_bh(bh);
2039 node = (struct nilfs_btree_node *)bh->b_data; 2005 node = (struct nilfs_btree_node *)bh->b_data;
2040 key = nilfs_btree_node_get_key(btree, node, 0); 2006 key = nilfs_btree_node_get_key(node, 0);
2041 level = nilfs_btree_node_get_level(btree, node); 2007 level = nilfs_btree_node_get_level(node);
2042 list_for_each(head, &lists[level]) { 2008 list_for_each(head, &lists[level]) {
2043 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2009 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2044 cnode = (struct nilfs_btree_node *)cbh->b_data; 2010 cnode = (struct nilfs_btree_node *)cbh->b_data;
2045 ckey = nilfs_btree_node_get_key(btree, cnode, 0); 2011 ckey = nilfs_btree_node_get_key(cnode, 0);
2046 if (key < ckey) 2012 if (key < ckey)
2047 break; 2013 break;
2048 } 2014 }
@@ -2120,8 +2086,7 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2120 nilfs_btree_node_set_ptr(btree, parent, 2086 nilfs_btree_node_set_ptr(btree, parent,
2121 path[level + 1].bp_index, blocknr); 2087 path[level + 1].bp_index, blocknr);
2122 2088
2123 key = nilfs_btree_node_get_key(btree, parent, 2089 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2124 path[level + 1].bp_index);
2125 /* on-disk format */ 2090 /* on-disk format */
2126 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2091 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2127 binfo->bi_dat.bi_level = level; 2092 binfo->bi_dat.bi_level = level;
@@ -2137,6 +2102,7 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2137 union nilfs_binfo *binfo) 2102 union nilfs_binfo *binfo)
2138{ 2103{
2139 struct nilfs_btree_node *parent; 2104 struct nilfs_btree_node *parent;
2105 struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2140 __u64 key; 2106 __u64 key;
2141 __u64 ptr; 2107 __u64 ptr;
2142 union nilfs_bmap_ptr_req req; 2108 union nilfs_bmap_ptr_req req;
@@ -2146,12 +2112,12 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2146 ptr = nilfs_btree_node_get_ptr(btree, parent, 2112 ptr = nilfs_btree_node_get_ptr(btree, parent,
2147 path[level + 1].bp_index); 2113 path[level + 1].bp_index);
2148 req.bpr_ptr = ptr; 2114 req.bpr_ptr = ptr;
2149 ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr); 2115 ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2150 if (unlikely(ret < 0)) 2116 if (ret < 0)
2151 return ret; 2117 return ret;
2118 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2152 2119
2153 key = nilfs_btree_node_get_key(btree, parent, 2120 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2154 path[level + 1].bp_index);
2155 /* on-disk format */ 2121 /* on-disk format */
2156 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 2122 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2157 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2123 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
@@ -2171,15 +2137,15 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2171 int level, ret; 2137 int level, ret;
2172 2138
2173 btree = (struct nilfs_btree *)bmap; 2139 btree = (struct nilfs_btree *)bmap;
2174 path = nilfs_btree_alloc_path(btree); 2140 path = nilfs_btree_alloc_path();
2175 if (path == NULL) 2141 if (path == NULL)
2176 return -ENOMEM; 2142 return -ENOMEM;
2177 nilfs_btree_init_path(btree, path); 2143 nilfs_btree_init_path(path);
2178 2144
2179 if (buffer_nilfs_node(*bh)) { 2145 if (buffer_nilfs_node(*bh)) {
2180 node = (struct nilfs_btree_node *)(*bh)->b_data; 2146 node = (struct nilfs_btree_node *)(*bh)->b_data;
2181 key = nilfs_btree_node_get_key(btree, node, 0); 2147 key = nilfs_btree_node_get_key(node, 0);
2182 level = nilfs_btree_node_get_level(btree, node); 2148 level = nilfs_btree_node_get_level(node);
2183 } else { 2149 } else {
2184 key = nilfs_bmap_data_get_key(bmap, *bh); 2150 key = nilfs_bmap_data_get_key(bmap, *bh);
2185 level = NILFS_BTREE_LEVEL_DATA; 2151 level = NILFS_BTREE_LEVEL_DATA;
@@ -2196,8 +2162,8 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2196 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2162 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2197 2163
2198 out: 2164 out:
2199 nilfs_btree_clear_path(btree, path); 2165 nilfs_btree_release_path(path);
2200 nilfs_btree_free_path(btree, path); 2166 nilfs_btree_free_path(path);
2201 2167
2202 return ret; 2168 return ret;
2203} 2169}
@@ -2207,19 +2173,18 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2207 sector_t blocknr, 2173 sector_t blocknr,
2208 union nilfs_binfo *binfo) 2174 union nilfs_binfo *binfo)
2209{ 2175{
2210 struct nilfs_btree *btree;
2211 struct nilfs_btree_node *node; 2176 struct nilfs_btree_node *node;
2212 __u64 key; 2177 __u64 key;
2213 int ret; 2178 int ret;
2214 2179
2215 btree = (struct nilfs_btree *)bmap; 2180 ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2216 ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); 2181 blocknr);
2217 if (ret < 0) 2182 if (ret < 0)
2218 return ret; 2183 return ret;
2219 2184
2220 if (buffer_nilfs_node(*bh)) { 2185 if (buffer_nilfs_node(*bh)) {
2221 node = (struct nilfs_btree_node *)(*bh)->b_data; 2186 node = (struct nilfs_btree_node *)(*bh)->b_data;
2222 key = nilfs_btree_node_get_key(btree, node, 0); 2187 key = nilfs_btree_node_get_key(node, 0);
2223 } else 2188 } else
2224 key = nilfs_bmap_data_get_key(bmap, *bh); 2189 key = nilfs_bmap_data_get_key(bmap, *bh);
2225 2190
@@ -2239,10 +2204,10 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2239 int ret; 2204 int ret;
2240 2205
2241 btree = (struct nilfs_btree *)bmap; 2206 btree = (struct nilfs_btree *)bmap;
2242 path = nilfs_btree_alloc_path(btree); 2207 path = nilfs_btree_alloc_path();
2243 if (path == NULL) 2208 if (path == NULL)
2244 return -ENOMEM; 2209 return -ENOMEM;
2245 nilfs_btree_init_path(btree, path); 2210 nilfs_btree_init_path(path);
2246 2211
2247 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); 2212 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2248 if (ret < 0) { 2213 if (ret < 0) {
@@ -2262,8 +2227,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2262 nilfs_bmap_set_dirty(&btree->bt_bmap); 2227 nilfs_bmap_set_dirty(&btree->bt_bmap);
2263 2228
2264 out: 2229 out:
2265 nilfs_btree_clear_path(btree, path); 2230 nilfs_btree_release_path(path);
2266 nilfs_btree_free_path(btree, path); 2231 nilfs_btree_free_path(path);
2267 return ret; 2232 return ret;
2268} 2233}
2269 2234
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index aec942cf79e3..1c6cfb59128d 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -815,8 +815,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
815 void *kaddr; 815 void *kaddr;
816 int ret; 816 int ret;
817 817
818 if (cno == 0) 818 /* CP number is invalid if it's zero or larger than the
819 return -ENOENT; /* checkpoint number 0 is invalid */ 819 largest exist one.*/
820 if (cno == 0 || cno >= nilfs_mdt_cno(cpfile))
821 return -ENOENT;
820 down_read(&NILFS_MDT(cpfile)->mi_sem); 822 down_read(&NILFS_MDT(cpfile)->mi_sem);
821 823
822 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 824 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
@@ -824,7 +826,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
824 goto out; 826 goto out;
825 kaddr = kmap_atomic(bh->b_page, KM_USER0); 827 kaddr = kmap_atomic(bh->b_page, KM_USER0);
826 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 828 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
827 ret = nilfs_checkpoint_snapshot(cp); 829 if (nilfs_checkpoint_invalid(cp))
830 ret = -ENOENT;
831 else
832 ret = nilfs_checkpoint_snapshot(cp);
828 kunmap_atomic(kaddr, KM_USER0); 833 kunmap_atomic(kaddr, KM_USER0);
829 brelse(bh); 834 brelse(bh);
830 835
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index 788a45950197..debea896e701 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -27,8 +27,6 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/nilfs2_fs.h> 28#include <linux/nilfs2_fs.h>
29 29
30#define NILFS_CPFILE_GFP NILFS_MDT_GFP
31
32 30
33int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, 31int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
34 struct nilfs_checkpoint **, 32 struct nilfs_checkpoint **,
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 8927ca27e6f7..1ff8e15bd36b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -109,12 +109,6 @@ void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req)
109 nilfs_palloc_commit_free_entry(dat, req); 109 nilfs_palloc_commit_free_entry(dat, req);
110} 110}
111 111
112void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req)
113{
114 nilfs_dat_abort_entry(dat, req);
115 nilfs_palloc_abort_free_entry(dat, req);
116}
117
118int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) 112int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
119{ 113{
120 int ret; 114 int ret;
@@ -140,11 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
140 nilfs_dat_commit_entry(dat, req); 134 nilfs_dat_commit_entry(dat, req);
141} 135}
142 136
143void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req)
144{
145 nilfs_dat_abort_entry(dat, req);
146}
147
148int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 137int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
149{ 138{
150 struct nilfs_dat_entry *entry; 139 struct nilfs_dat_entry *entry;
@@ -222,6 +211,37 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
222 nilfs_dat_abort_entry(dat, req); 211 nilfs_dat_abort_entry(dat, req);
223} 212}
224 213
214int nilfs_dat_prepare_update(struct inode *dat,
215 struct nilfs_palloc_req *oldreq,
216 struct nilfs_palloc_req *newreq)
217{
218 int ret;
219
220 ret = nilfs_dat_prepare_end(dat, oldreq);
221 if (!ret) {
222 ret = nilfs_dat_prepare_alloc(dat, newreq);
223 if (ret < 0)
224 nilfs_dat_abort_end(dat, oldreq);
225 }
226 return ret;
227}
228
229void nilfs_dat_commit_update(struct inode *dat,
230 struct nilfs_palloc_req *oldreq,
231 struct nilfs_palloc_req *newreq, int dead)
232{
233 nilfs_dat_commit_end(dat, oldreq, dead);
234 nilfs_dat_commit_alloc(dat, newreq);
235}
236
237void nilfs_dat_abort_update(struct inode *dat,
238 struct nilfs_palloc_req *oldreq,
239 struct nilfs_palloc_req *newreq)
240{
241 nilfs_dat_abort_end(dat, oldreq);
242 nilfs_dat_abort_alloc(dat, newreq);
243}
244
225/** 245/**
226 * nilfs_dat_mark_dirty - 246 * nilfs_dat_mark_dirty -
227 * @dat: DAT file inode 247 * @dat: DAT file inode
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index d328b81eead4..406070d3ff49 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -27,7 +27,6 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29 29
30#define NILFS_DAT_GFP NILFS_MDT_GFP
31 30
32struct nilfs_palloc_req; 31struct nilfs_palloc_req;
33 32
@@ -39,10 +38,15 @@ void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *);
39int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); 38int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *);
40void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, 39void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *,
41 sector_t); 40 sector_t);
42void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *);
43int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); 41int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *);
44void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); 42void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int);
45void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); 43void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
44int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *,
45 struct nilfs_palloc_req *);
46void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
47 struct nilfs_palloc_req *, int);
48void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
49 struct nilfs_palloc_req *);
46 50
47int nilfs_dat_mark_dirty(struct inode *, __u64); 51int nilfs_dat_mark_dirty(struct inode *, __u64);
48int nilfs_dat_freev(struct inode *, __u64 *, size_t); 52int nilfs_dat_freev(struct inode *, __u64 *, size_t);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 342d9765df8d..d369ac718277 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -125,106 +125,64 @@ static void nilfs_direct_set_target_v(struct nilfs_direct *direct,
125 direct->d_bmap.b_last_allocated_ptr = ptr; 125 direct->d_bmap.b_last_allocated_ptr = ptr;
126} 126}
127 127
128static int nilfs_direct_prepare_insert(struct nilfs_direct *direct,
129 __u64 key,
130 union nilfs_bmap_ptr_req *req,
131 struct nilfs_bmap_stats *stats)
132{
133 int ret;
134
135 if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
136 req->bpr_ptr = nilfs_direct_find_target_v(direct, key);
137 ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req);
138 if (ret < 0)
139 return ret;
140
141 stats->bs_nblocks = 1;
142 return 0;
143}
144
145static void nilfs_direct_commit_insert(struct nilfs_direct *direct,
146 union nilfs_bmap_ptr_req *req,
147 __u64 key, __u64 ptr)
148{
149 struct buffer_head *bh;
150
151 /* ptr must be a pointer to a buffer head. */
152 bh = (struct buffer_head *)((unsigned long)ptr);
153 set_buffer_nilfs_volatile(bh);
154
155 nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req);
156 nilfs_direct_set_ptr(direct, key, req->bpr_ptr);
157
158 if (!nilfs_bmap_dirty(&direct->d_bmap))
159 nilfs_bmap_set_dirty(&direct->d_bmap);
160
161 if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
162 nilfs_direct_set_target_v(direct, key, req->bpr_ptr);
163}
164
165static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 128static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
166{ 129{
167 struct nilfs_direct *direct; 130 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
168 union nilfs_bmap_ptr_req req; 131 union nilfs_bmap_ptr_req req;
169 struct nilfs_bmap_stats stats; 132 struct inode *dat = NULL;
133 struct buffer_head *bh;
170 int ret; 134 int ret;
171 135
172 direct = (struct nilfs_direct *)bmap;
173 if (key > NILFS_DIRECT_KEY_MAX) 136 if (key > NILFS_DIRECT_KEY_MAX)
174 return -ENOENT; 137 return -ENOENT;
175 if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) 138 if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR)
176 return -EEXIST; 139 return -EEXIST;
177 140
178 ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); 141 if (NILFS_BMAP_USE_VBN(bmap)) {
179 if (ret < 0) 142 req.bpr_ptr = nilfs_direct_find_target_v(direct, key);
180 return ret; 143 dat = nilfs_bmap_get_dat(bmap);
181 nilfs_direct_commit_insert(direct, &req, key, ptr); 144 }
182 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 145 ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat);
146 if (!ret) {
147 /* ptr must be a pointer to a buffer head. */
148 bh = (struct buffer_head *)((unsigned long)ptr);
149 set_buffer_nilfs_volatile(bh);
183 150
184 return 0; 151 nilfs_bmap_commit_alloc_ptr(bmap, &req, dat);
185} 152 nilfs_direct_set_ptr(direct, key, req.bpr_ptr);
186 153
187static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, 154 if (!nilfs_bmap_dirty(bmap))
188 union nilfs_bmap_ptr_req *req, 155 nilfs_bmap_set_dirty(bmap);
189 __u64 key,
190 struct nilfs_bmap_stats *stats)
191{
192 int ret;
193 156
194 req->bpr_ptr = nilfs_direct_get_ptr(direct, key); 157 if (NILFS_BMAP_USE_VBN(bmap))
195 ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req); 158 nilfs_direct_set_target_v(direct, key, req.bpr_ptr);
196 if (!ret)
197 stats->bs_nblocks = 1;
198 return ret;
199}
200 159
201static void nilfs_direct_commit_delete(struct nilfs_direct *direct, 160 nilfs_bmap_add_blocks(bmap, 1);
202 union nilfs_bmap_ptr_req *req, 161 }
203 __u64 key) 162 return ret;
204{
205 nilfs_bmap_commit_end_ptr(&direct->d_bmap, req);
206 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
207} 163}
208 164
209static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) 165static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
210{ 166{
211 struct nilfs_direct *direct; 167 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
212 union nilfs_bmap_ptr_req req; 168 union nilfs_bmap_ptr_req req;
213 struct nilfs_bmap_stats stats; 169 struct inode *dat;
214 int ret; 170 int ret;
215 171
216 direct = (struct nilfs_direct *)bmap; 172 if (key > NILFS_DIRECT_KEY_MAX ||
217 if ((key > NILFS_DIRECT_KEY_MAX) ||
218 nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) 173 nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR)
219 return -ENOENT; 174 return -ENOENT;
220 175
221 ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); 176 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
222 if (ret < 0) 177 req.bpr_ptr = nilfs_direct_get_ptr(direct, key);
223 return ret;
224 nilfs_direct_commit_delete(direct, &req, key);
225 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
226 178
227 return 0; 179 ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat);
180 if (!ret) {
181 nilfs_bmap_commit_end_ptr(bmap, &req, dat);
182 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
183 nilfs_bmap_sub_blocks(bmap, 1);
184 }
185 return ret;
228} 186}
229 187
230static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) 188static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
@@ -310,59 +268,56 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
310 return 0; 268 return 0;
311} 269}
312 270
313static int nilfs_direct_propagate_v(struct nilfs_direct *direct, 271static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
314 struct buffer_head *bh) 272 struct buffer_head *bh)
315{ 273{
316 union nilfs_bmap_ptr_req oldreq, newreq; 274 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
275 struct nilfs_palloc_req oldreq, newreq;
276 struct inode *dat;
317 __u64 key; 277 __u64 key;
318 __u64 ptr; 278 __u64 ptr;
319 int ret; 279 int ret;
320 280
321 key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); 281 if (!NILFS_BMAP_USE_VBN(bmap))
282 return 0;
283
284 dat = nilfs_bmap_get_dat(bmap);
285 key = nilfs_bmap_data_get_key(bmap, bh);
322 ptr = nilfs_direct_get_ptr(direct, key); 286 ptr = nilfs_direct_get_ptr(direct, key);
323 if (!buffer_nilfs_volatile(bh)) { 287 if (!buffer_nilfs_volatile(bh)) {
324 oldreq.bpr_ptr = ptr; 288 oldreq.pr_entry_nr = ptr;
325 newreq.bpr_ptr = ptr; 289 newreq.pr_entry_nr = ptr;
326 ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq, 290 ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq);
327 &newreq);
328 if (ret < 0) 291 if (ret < 0)
329 return ret; 292 return ret;
330 nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq); 293 nilfs_dat_commit_update(dat, &oldreq, &newreq,
294 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
331 set_buffer_nilfs_volatile(bh); 295 set_buffer_nilfs_volatile(bh);
332 nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); 296 nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr);
333 } else 297 } else
334 ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); 298 ret = nilfs_dat_mark_dirty(dat, ptr);
335 299
336 return ret; 300 return ret;
337} 301}
338 302
339static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
340 struct buffer_head *bh)
341{
342 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
343
344 return NILFS_BMAP_USE_VBN(bmap) ?
345 nilfs_direct_propagate_v(direct, bh) : 0;
346}
347
348static int nilfs_direct_assign_v(struct nilfs_direct *direct, 303static int nilfs_direct_assign_v(struct nilfs_direct *direct,
349 __u64 key, __u64 ptr, 304 __u64 key, __u64 ptr,
350 struct buffer_head **bh, 305 struct buffer_head **bh,
351 sector_t blocknr, 306 sector_t blocknr,
352 union nilfs_binfo *binfo) 307 union nilfs_binfo *binfo)
353{ 308{
309 struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap);
354 union nilfs_bmap_ptr_req req; 310 union nilfs_bmap_ptr_req req;
355 int ret; 311 int ret;
356 312
357 req.bpr_ptr = ptr; 313 req.bpr_ptr = ptr;
358 ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr); 314 ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
359 if (unlikely(ret < 0)) 315 if (!ret) {
360 return ret; 316 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
361 317 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
362 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 318 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
363 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 319 }
364 320 return ret;
365 return 0;
366} 321}
367 322
368static int nilfs_direct_assign_p(struct nilfs_direct *direct, 323static int nilfs_direct_assign_p(struct nilfs_direct *direct,
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 5d30a35679b5..ecc3ba76db47 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -31,7 +31,6 @@
31#include "mdt.h" 31#include "mdt.h"
32#include "alloc.h" 32#include "alloc.h"
33 33
34#define NILFS_IFILE_GFP NILFS_MDT_GFP
35 34
36static inline struct nilfs_inode * 35static inline struct nilfs_inode *
37nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) 36nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index fe9d8f2a13f8..807e584b163d 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -430,7 +430,8 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
430 430
431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); 431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
432 432
433 if (nilfs_read_inode_common(inode, raw_inode)) 433 err = nilfs_read_inode_common(inode, raw_inode);
434 if (err)
434 goto failed_unmap; 435 goto failed_unmap;
435 436
436 if (S_ISREG(inode->i_mode)) { 437 if (S_ISREG(inode->i_mode)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 6ea5f872e2de..6572ea4bc4df 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -442,12 +442,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
442 const char *msg; 442 const char *msg;
443 int ret; 443 int ret;
444 444
445 ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
446 if (ret < 0) {
447 msg = "cannot read source blocks";
448 goto failed;
449 }
450
451 ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); 445 ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]);
452 if (ret < 0) { 446 if (ret < 0) {
453 /* 447 /*
@@ -548,7 +542,25 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
548 } 542 }
549 } 543 }
550 544
551 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 545 /*
546 * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(),
547 * which will operates an inode list without blocking.
548 * To protect the list from concurrent operations,
549 * nilfs_ioctl_move_blocks should be atomic operation.
550 */
551 if (test_and_set_bit(THE_NILFS_GC_RUNNING, &nilfs->ns_flags)) {
552 ret = -EBUSY;
553 goto out_free;
554 }
555
556 ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
557 if (ret < 0)
558 printk(KERN_ERR "NILFS: GC failed during preparation: "
559 "cannot read source blocks: err=%d\n", ret);
560 else
561 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
562
563 clear_nilfs_gc_running(nilfs);
552 564
553 out_free: 565 out_free:
554 while (--n >= 0) 566 while (--n >= 0)
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 2dfd47714ae5..156bf6091a96 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -103,15 +103,12 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
103 goto failed_unlock; 103 goto failed_unlock;
104 104
105 err = -EEXIST; 105 err = -EEXIST;
106 if (buffer_uptodate(bh) || buffer_mapped(bh)) 106 if (buffer_uptodate(bh))
107 goto failed_bh; 107 goto failed_bh;
108#if 0 108
109 /* The uptodate flag is not protected by the page lock, but
110 the mapped flag is. Thus, we don't have to wait the buffer. */
111 wait_on_buffer(bh); 109 wait_on_buffer(bh);
112 if (buffer_uptodate(bh)) 110 if (buffer_uptodate(bh))
113 goto failed_bh; 111 goto failed_bh;
114#endif
115 112
116 bh->b_bdev = nilfs->ns_bdev; 113 bh->b_bdev = nilfs->ns_bdev;
117 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); 114 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
@@ -139,7 +136,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
139 int mode, struct buffer_head **out_bh) 136 int mode, struct buffer_head **out_bh)
140{ 137{
141 struct buffer_head *bh; 138 struct buffer_head *bh;
142 unsigned long blknum = 0; 139 __u64 blknum = 0;
143 int ret = -ENOMEM; 140 int ret = -ENOMEM;
144 141
145 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); 142 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
@@ -162,17 +159,15 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
162 unlock_buffer(bh); 159 unlock_buffer(bh);
163 goto out; 160 goto out;
164 } 161 }
165 if (!buffer_mapped(bh)) { /* unused buffer */ 162
166 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, 163 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
167 &blknum); 164 if (unlikely(ret)) {
168 if (unlikely(ret)) { 165 unlock_buffer(bh);
169 unlock_buffer(bh); 166 goto failed_bh;
170 goto failed_bh;
171 }
172 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
173 bh->b_blocknr = blknum;
174 set_buffer_mapped(bh);
175 } 167 }
168 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
169 bh->b_blocknr = (sector_t)blknum;
170 set_buffer_mapped(bh);
176 171
177 bh->b_end_io = end_buffer_read_sync; 172 bh->b_end_io = end_buffer_read_sync;
178 get_bh(bh); 173 get_bh(bh);
@@ -402,6 +397,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
402 struct inode *inode = container_of(page->mapping, 397 struct inode *inode = container_of(page->mapping,
403 struct inode, i_data); 398 struct inode, i_data);
404 struct super_block *sb = inode->i_sb; 399 struct super_block *sb = inode->i_sb;
400 struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs;
405 struct nilfs_sb_info *writer = NULL; 401 struct nilfs_sb_info *writer = NULL;
406 int err = 0; 402 int err = 0;
407 403
@@ -411,9 +407,10 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
411 if (page->mapping->assoc_mapping) 407 if (page->mapping->assoc_mapping)
412 return 0; /* Do not request flush for shadow page cache */ 408 return 0; /* Do not request flush for shadow page cache */
413 if (!sb) { 409 if (!sb) {
414 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); 410 down_read(&nilfs->ns_writer_sem);
411 writer = nilfs->ns_writer;
415 if (!writer) { 412 if (!writer) {
416 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); 413 up_read(&nilfs->ns_writer_sem);
417 return -EROFS; 414 return -EROFS;
418 } 415 }
419 sb = writer->s_super; 416 sb = writer->s_super;
@@ -425,7 +422,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
425 nilfs_flush_segment(sb, inode->i_ino); 422 nilfs_flush_segment(sb, inode->i_ino);
426 423
427 if (writer) 424 if (writer)
428 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); 425 up_read(&nilfs->ns_writer_sem);
429 return err; 426 return err;
430} 427}
431 428
@@ -516,9 +513,10 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
516} 513}
517 514
518struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, 515struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
519 ino_t ino, gfp_t gfp_mask) 516 ino_t ino)
520{ 517{
521 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); 518 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino,
519 NILFS_MDT_GFP);
522 520
523 if (!inode) 521 if (!inode)
524 return NULL; 522 return NULL;
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index df683e0bca6a..431599733c9b 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -74,8 +74,7 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long);
74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); 74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
75int nilfs_mdt_fetch_dirty(struct inode *); 75int nilfs_mdt_fetch_dirty(struct inode *);
76 76
77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, 77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t);
78 gfp_t);
79struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, 78struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
80 ino_t, gfp_t); 79 ino_t, gfp_t);
81void nilfs_mdt_destroy(struct inode *); 80void nilfs_mdt_destroy(struct inode *);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index d80cc71be749..6dc83591d118 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -552,7 +552,8 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
552 printk(KERN_WARNING 552 printk(KERN_WARNING
553 "NILFS warning: error recovering data block " 553 "NILFS warning: error recovering data block "
554 "(err=%d, ino=%lu, block-offset=%llu)\n", 554 "(err=%d, ino=%lu, block-offset=%llu)\n",
555 err, rb->ino, (unsigned long long)rb->blkoff); 555 err, (unsigned long)rb->ino,
556 (unsigned long long)rb->blkoff);
556 if (!err2) 557 if (!err2)
557 err2 = err; 558 err2 = err;
558 next: 559 next:
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 9e3fe17bb96b..e6d9e37fa241 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -316,10 +316,10 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
316{ 316{
317 struct bio *bio; 317 struct bio *bio;
318 318
319 bio = bio_alloc(GFP_NOWAIT, nr_vecs); 319 bio = bio_alloc(GFP_NOIO, nr_vecs);
320 if (bio == NULL) { 320 if (bio == NULL) {
321 while (!bio && (nr_vecs >>= 1)) 321 while (!bio && (nr_vecs >>= 1))
322 bio = bio_alloc(GFP_NOWAIT, nr_vecs); 322 bio = bio_alloc(GFP_NOIO, nr_vecs);
323 } 323 }
324 if (likely(bio)) { 324 if (likely(bio)) {
325 bio->bi_bdev = sb->s_bdev; 325 bio->bi_bdev = sb->s_bdev;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 51ff3d0a4ee2..683df89dbae5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2501,7 +2501,8 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci,
2501 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2501 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2502 nilfs_discontinued(nilfs)) { 2502 nilfs_discontinued(nilfs)) {
2503 down_write(&nilfs->ns_sem); 2503 down_write(&nilfs->ns_sem);
2504 req->sb_err = nilfs_commit_super(sbi, 0); 2504 req->sb_err = nilfs_commit_super(sbi,
2505 nilfs_altsb_need_update(nilfs));
2505 up_write(&nilfs->ns_sem); 2506 up_write(&nilfs->ns_sem);
2506 } 2507 }
2507 } 2508 }
@@ -2689,6 +2690,7 @@ static int nilfs_segctor_thread(void *arg)
2689 } else { 2690 } else {
2690 DEFINE_WAIT(wait); 2691 DEFINE_WAIT(wait);
2691 int should_sleep = 1; 2692 int should_sleep = 1;
2693 struct the_nilfs *nilfs;
2692 2694
2693 prepare_to_wait(&sci->sc_wait_daemon, &wait, 2695 prepare_to_wait(&sci->sc_wait_daemon, &wait,
2694 TASK_INTERRUPTIBLE); 2696 TASK_INTERRUPTIBLE);
@@ -2709,6 +2711,9 @@ static int nilfs_segctor_thread(void *arg)
2709 finish_wait(&sci->sc_wait_daemon, &wait); 2711 finish_wait(&sci->sc_wait_daemon, &wait);
2710 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2712 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2711 time_after_eq(jiffies, sci->sc_timer->expires)); 2713 time_after_eq(jiffies, sci->sc_timer->expires));
2714 nilfs = sci->sc_sbi->s_nilfs;
2715 if (sci->sc_super->s_dirt && nilfs_sb_need_update(nilfs))
2716 set_nilfs_discontinued(nilfs);
2712 } 2717 }
2713 goto loop; 2718 goto loop;
2714 2719
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a2c4d76c3366..0e99e5c0bd0f 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -28,7 +28,6 @@
28#include <linux/nilfs2_fs.h> 28#include <linux/nilfs2_fs.h>
29#include "mdt.h" 29#include "mdt.h"
30 30
31#define NILFS_SUFILE_GFP NILFS_MDT_GFP
32 31
33static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
34{ 33{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 151964f0de4c..55f3d6b60732 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -50,6 +50,8 @@
50#include <linux/writeback.h> 50#include <linux/writeback.h>
51#include <linux/kobject.h> 51#include <linux/kobject.h>
52#include <linux/exportfs.h> 52#include <linux/exportfs.h>
53#include <linux/seq_file.h>
54#include <linux/mount.h>
53#include "nilfs.h" 55#include "nilfs.h"
54#include "mdt.h" 56#include "mdt.h"
55#include "alloc.h" 57#include "alloc.h"
@@ -65,7 +67,6 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
65 "(NILFS)"); 67 "(NILFS)");
66MODULE_LICENSE("GPL"); 68MODULE_LICENSE("GPL");
67 69
68static void nilfs_write_super(struct super_block *sb);
69static int nilfs_remount(struct super_block *sb, int *flags, char *data); 70static int nilfs_remount(struct super_block *sb, int *flags, char *data);
70 71
71/** 72/**
@@ -311,9 +312,6 @@ static void nilfs_put_super(struct super_block *sb)
311 312
312 lock_kernel(); 313 lock_kernel();
313 314
314 if (sb->s_dirt)
315 nilfs_write_super(sb);
316
317 nilfs_detach_segment_constructor(sbi); 315 nilfs_detach_segment_constructor(sbi);
318 316
319 if (!(sb->s_flags & MS_RDONLY)) { 317 if (!(sb->s_flags & MS_RDONLY)) {
@@ -336,63 +334,21 @@ static void nilfs_put_super(struct super_block *sb)
336 unlock_kernel(); 334 unlock_kernel();
337} 335}
338 336
339/** 337static int nilfs_sync_fs(struct super_block *sb, int wait)
340 * nilfs_write_super - write super block(s) of NILFS
341 * @sb: super_block
342 *
343 * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and
344 * clears s_dirt. This function is called in the section protected by
345 * lock_super().
346 *
347 * The s_dirt flag is managed by each filesystem and we protect it by ns_sem
348 * of the struct the_nilfs. Lock order must be as follows:
349 *
350 * 1. lock_super()
351 * 2. down_write(&nilfs->ns_sem)
352 *
353 * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer
354 * of the super block (nilfs->ns_sbp[]).
355 *
356 * In most cases, VFS functions call lock_super() before calling these
357 * methods. So we must be careful not to bring on deadlocks when using
358 * lock_super(); see generic_shutdown_super(), write_super(), and so on.
359 *
360 * Note that order of lock_kernel() and lock_super() depends on contexts
361 * of VFS. We should also note that lock_kernel() can be used in its
362 * protective section and only the outermost one has an effect.
363 */
364static void nilfs_write_super(struct super_block *sb)
365{ 338{
366 struct nilfs_sb_info *sbi = NILFS_SB(sb); 339 struct nilfs_sb_info *sbi = NILFS_SB(sb);
367 struct the_nilfs *nilfs = sbi->s_nilfs; 340 struct the_nilfs *nilfs = sbi->s_nilfs;
368
369 down_write(&nilfs->ns_sem);
370 if (!(sb->s_flags & MS_RDONLY)) {
371 struct nilfs_super_block **sbp = nilfs->ns_sbp;
372 u64 t = get_seconds();
373 int dupsb;
374
375 if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] &&
376 t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) {
377 up_write(&nilfs->ns_sem);
378 return;
379 }
380 dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
381 nilfs_commit_super(sbi, dupsb);
382 }
383 sb->s_dirt = 0;
384 up_write(&nilfs->ns_sem);
385}
386
387static int nilfs_sync_fs(struct super_block *sb, int wait)
388{
389 int err = 0; 341 int err = 0;
390 342
391 nilfs_write_super(sb);
392
393 /* This function is called when super block should be written back */ 343 /* This function is called when super block should be written back */
394 if (wait) 344 if (wait)
395 err = nilfs_construct_segment(sb); 345 err = nilfs_construct_segment(sb);
346
347 down_write(&nilfs->ns_sem);
348 if (sb->s_dirt)
349 nilfs_commit_super(sbi, 1);
350 up_write(&nilfs->ns_sem);
351
396 return err; 352 return err;
397} 353}
398 354
@@ -407,8 +363,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
407 list_add(&sbi->s_list, &nilfs->ns_supers); 363 list_add(&sbi->s_list, &nilfs->ns_supers);
408 up_write(&nilfs->ns_super_sem); 364 up_write(&nilfs->ns_super_sem);
409 365
410 sbi->s_ifile = nilfs_mdt_new( 366 sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO);
411 nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
412 if (!sbi->s_ifile) 367 if (!sbi->s_ifile)
413 return -ENOMEM; 368 return -ENOMEM;
414 369
@@ -529,6 +484,26 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
529 return 0; 484 return 0;
530} 485}
531 486
487static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
488{
489 struct super_block *sb = vfs->mnt_sb;
490 struct nilfs_sb_info *sbi = NILFS_SB(sb);
491
492 if (!nilfs_test_opt(sbi, BARRIER))
493 seq_printf(seq, ",barrier=off");
494 if (nilfs_test_opt(sbi, SNAPSHOT))
495 seq_printf(seq, ",cp=%llu",
496 (unsigned long long int)sbi->s_snapshot_cno);
497 if (nilfs_test_opt(sbi, ERRORS_RO))
498 seq_printf(seq, ",errors=remount-ro");
499 if (nilfs_test_opt(sbi, ERRORS_PANIC))
500 seq_printf(seq, ",errors=panic");
501 if (nilfs_test_opt(sbi, STRICT_ORDER))
502 seq_printf(seq, ",order=strict");
503
504 return 0;
505}
506
532static struct super_operations nilfs_sops = { 507static struct super_operations nilfs_sops = {
533 .alloc_inode = nilfs_alloc_inode, 508 .alloc_inode = nilfs_alloc_inode,
534 .destroy_inode = nilfs_destroy_inode, 509 .destroy_inode = nilfs_destroy_inode,
@@ -538,7 +513,7 @@ static struct super_operations nilfs_sops = {
538 /* .drop_inode = nilfs_drop_inode, */ 513 /* .drop_inode = nilfs_drop_inode, */
539 .delete_inode = nilfs_delete_inode, 514 .delete_inode = nilfs_delete_inode,
540 .put_super = nilfs_put_super, 515 .put_super = nilfs_put_super,
541 .write_super = nilfs_write_super, 516 /* .write_super = nilfs_write_super, */
542 .sync_fs = nilfs_sync_fs, 517 .sync_fs = nilfs_sync_fs,
543 /* .write_super_lockfs */ 518 /* .write_super_lockfs */
544 /* .unlockfs */ 519 /* .unlockfs */
@@ -546,7 +521,7 @@ static struct super_operations nilfs_sops = {
546 .remount_fs = nilfs_remount, 521 .remount_fs = nilfs_remount,
547 .clear_inode = nilfs_clear_inode, 522 .clear_inode = nilfs_clear_inode,
548 /* .umount_begin */ 523 /* .umount_begin */
549 /* .show_options */ 524 .show_options = nilfs_show_options
550}; 525};
551 526
552static struct inode * 527static struct inode *
@@ -816,10 +791,15 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
816 791
817 if (sb->s_flags & MS_RDONLY) { 792 if (sb->s_flags & MS_RDONLY) {
818 if (nilfs_test_opt(sbi, SNAPSHOT)) { 793 if (nilfs_test_opt(sbi, SNAPSHOT)) {
794 down_read(&nilfs->ns_segctor_sem);
819 err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, 795 err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile,
820 sbi->s_snapshot_cno); 796 sbi->s_snapshot_cno);
821 if (err < 0) 797 up_read(&nilfs->ns_segctor_sem);
798 if (err < 0) {
799 if (err == -ENOENT)
800 err = -EINVAL;
822 goto failed_sbi; 801 goto failed_sbi;
802 }
823 if (!err) { 803 if (!err) {
824 printk(KERN_ERR 804 printk(KERN_ERR
825 "NILFS: The specified checkpoint is " 805 "NILFS: The specified checkpoint is "
@@ -1127,10 +1107,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1127 */ 1107 */
1128 sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); 1108 sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
1129 1109
1130 if (!sd.cno)
1131 /* trying to get the latest checkpoint. */
1132 sd.cno = nilfs_last_cno(nilfs);
1133
1134 /* 1110 /*
1135 * Get super block instance holding the nilfs_sb_info struct. 1111 * Get super block instance holding the nilfs_sb_info struct.
1136 * A new instance is allocated if no existing mount is present or 1112 * A new instance is allocated if no existing mount is present or
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8b8889825716..ad391a8c3e7e 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -68,12 +68,11 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
68 68
69 nilfs->ns_bdev = bdev; 69 nilfs->ns_bdev = bdev;
70 atomic_set(&nilfs->ns_count, 1); 70 atomic_set(&nilfs->ns_count, 1);
71 atomic_set(&nilfs->ns_writer_refcount, -1);
72 atomic_set(&nilfs->ns_ndirtyblks, 0); 71 atomic_set(&nilfs->ns_ndirtyblks, 0);
73 init_rwsem(&nilfs->ns_sem); 72 init_rwsem(&nilfs->ns_sem);
74 init_rwsem(&nilfs->ns_super_sem); 73 init_rwsem(&nilfs->ns_super_sem);
75 mutex_init(&nilfs->ns_mount_mutex); 74 mutex_init(&nilfs->ns_mount_mutex);
76 mutex_init(&nilfs->ns_writer_mutex); 75 init_rwsem(&nilfs->ns_writer_sem);
77 INIT_LIST_HEAD(&nilfs->ns_list); 76 INIT_LIST_HEAD(&nilfs->ns_list);
78 INIT_LIST_HEAD(&nilfs->ns_supers); 77 INIT_LIST_HEAD(&nilfs->ns_supers);
79 spin_lock_init(&nilfs->ns_last_segment_lock); 78 spin_lock_init(&nilfs->ns_last_segment_lock);
@@ -188,23 +187,19 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
188 inode_size = nilfs->ns_inode_size; 187 inode_size = nilfs->ns_inode_size;
189 188
190 err = -ENOMEM; 189 err = -ENOMEM;
191 nilfs->ns_dat = nilfs_mdt_new( 190 nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
192 nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
193 if (unlikely(!nilfs->ns_dat)) 191 if (unlikely(!nilfs->ns_dat))
194 goto failed; 192 goto failed;
195 193
196 nilfs->ns_gc_dat = nilfs_mdt_new( 194 nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
197 nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
198 if (unlikely(!nilfs->ns_gc_dat)) 195 if (unlikely(!nilfs->ns_gc_dat))
199 goto failed_dat; 196 goto failed_dat;
200 197
201 nilfs->ns_cpfile = nilfs_mdt_new( 198 nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO);
202 nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP);
203 if (unlikely(!nilfs->ns_cpfile)) 199 if (unlikely(!nilfs->ns_cpfile))
204 goto failed_gc_dat; 200 goto failed_gc_dat;
205 201
206 nilfs->ns_sufile = nilfs_mdt_new( 202 nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO);
207 nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP);
208 if (unlikely(!nilfs->ns_sufile)) 203 if (unlikely(!nilfs->ns_sufile))
209 goto failed_cpfile; 204 goto failed_cpfile;
210 205
@@ -596,9 +591,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
596 591
597 nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); 592 nilfs->ns_mount_state = le16_to_cpu(sbp->s_state);
598 593
599 bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; 594 bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
600 if (!bdi)
601 bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
602 nilfs->ns_bdi = bdi ? : &default_backing_dev_info; 595 nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
603 596
604 /* Finding last segment */ 597 /* Finding last segment */
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 1b9caafb8662..20abd55881e0 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -37,6 +37,7 @@ enum {
37 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and 37 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
38 the latest checkpoint was loaded */ 38 the latest checkpoint was loaded */
39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ 39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
40 THE_NILFS_GC_RUNNING, /* gc process is running */
40}; 41};
41 42
42/** 43/**
@@ -50,8 +51,7 @@ enum {
50 * @ns_sem: semaphore for shared states 51 * @ns_sem: semaphore for shared states
51 * @ns_super_sem: semaphore for global operations across super block instances 52 * @ns_super_sem: semaphore for global operations across super block instances
52 * @ns_mount_mutex: mutex protecting mount process of nilfs 53 * @ns_mount_mutex: mutex protecting mount process of nilfs
53 * @ns_writer_mutex: mutex protecting ns_writer attach/detach 54 * @ns_writer_sem: semaphore protecting ns_writer attach/detach
54 * @ns_writer_refcount: number of referrers on ns_writer
55 * @ns_current: back pointer to current mount 55 * @ns_current: back pointer to current mount
56 * @ns_sbh: buffer heads of on-disk super blocks 56 * @ns_sbh: buffer heads of on-disk super blocks
57 * @ns_sbp: pointers to super block data 57 * @ns_sbp: pointers to super block data
@@ -100,8 +100,7 @@ struct the_nilfs {
100 struct rw_semaphore ns_sem; 100 struct rw_semaphore ns_sem;
101 struct rw_semaphore ns_super_sem; 101 struct rw_semaphore ns_super_sem;
102 struct mutex ns_mount_mutex; 102 struct mutex ns_mount_mutex;
103 struct mutex ns_writer_mutex; 103 struct rw_semaphore ns_writer_sem;
104 atomic_t ns_writer_refcount;
105 104
106 /* 105 /*
107 * components protected by ns_super_sem 106 * components protected by ns_super_sem
@@ -197,11 +196,26 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \
197THE_NILFS_FNS(INIT, init) 196THE_NILFS_FNS(INIT, init)
198THE_NILFS_FNS(LOADED, loaded) 197THE_NILFS_FNS(LOADED, loaded)
199THE_NILFS_FNS(DISCONTINUED, discontinued) 198THE_NILFS_FNS(DISCONTINUED, discontinued)
199THE_NILFS_FNS(GC_RUNNING, gc_running)
200 200
201/* Minimum interval of periodical update of superblocks (in seconds) */ 201/* Minimum interval of periodical update of superblocks (in seconds) */
202#define NILFS_SB_FREQ 10 202#define NILFS_SB_FREQ 10
203#define NILFS_ALTSB_FREQ 60 /* spare superblock */ 203#define NILFS_ALTSB_FREQ 60 /* spare superblock */
204 204
205static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
206{
207 u64 t = get_seconds();
208 return t < nilfs->ns_sbwtime[0] ||
209 t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ;
210}
211
212static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs)
213{
214 u64 t = get_seconds();
215 struct nilfs_super_block **sbp = nilfs->ns_sbp;
216 return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
217}
218
205void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); 219void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
206struct the_nilfs *find_or_create_nilfs(struct block_device *); 220struct the_nilfs *find_or_create_nilfs(struct block_device *);
207void put_nilfs(struct the_nilfs *); 221void put_nilfs(struct the_nilfs *);
@@ -221,34 +235,21 @@ static inline void get_nilfs(struct the_nilfs *nilfs)
221 atomic_inc(&nilfs->ns_count); 235 atomic_inc(&nilfs->ns_count);
222} 236}
223 237
224static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs)
225{
226 if (atomic_inc_and_test(&nilfs->ns_writer_refcount))
227 mutex_lock(&nilfs->ns_writer_mutex);
228 return nilfs->ns_writer;
229}
230
231static inline void nilfs_put_writer(struct the_nilfs *nilfs)
232{
233 if (atomic_add_negative(-1, &nilfs->ns_writer_refcount))
234 mutex_unlock(&nilfs->ns_writer_mutex);
235}
236
237static inline void 238static inline void
238nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 239nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
239{ 240{
240 mutex_lock(&nilfs->ns_writer_mutex); 241 down_write(&nilfs->ns_writer_sem);
241 nilfs->ns_writer = sbi; 242 nilfs->ns_writer = sbi;
242 mutex_unlock(&nilfs->ns_writer_mutex); 243 up_write(&nilfs->ns_writer_sem);
243} 244}
244 245
245static inline void 246static inline void
246nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 247nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
247{ 248{
248 mutex_lock(&nilfs->ns_writer_mutex); 249 down_write(&nilfs->ns_writer_sem);
249 if (sbi == nilfs->ns_writer) 250 if (sbi == nilfs->ns_writer)
250 nilfs->ns_writer = NULL; 251 nilfs->ns_writer = NULL;
251 mutex_unlock(&nilfs->ns_writer_mutex); 252 up_write(&nilfs->ns_writer_sem);
252} 253}
253 254
254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) 255static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3140a4429af1..4350d4993b18 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2076,14 +2076,6 @@ err_out:
2076 *ppos = pos; 2076 *ppos = pos;
2077 if (cached_page) 2077 if (cached_page)
2078 page_cache_release(cached_page); 2078 page_cache_release(cached_page);
2079 /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
2080 if (likely(!status)) {
2081 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
2082 if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
2083 status = generic_osync_inode(vi, mapping,
2084 OSYNC_METADATA|OSYNC_DATA);
2085 }
2086 }
2087 pagevec_lru_add_file(&lru_pvec); 2079 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2080 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2081 written ? "written" : "status", (unsigned long)written,
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2145 mutex_lock(&inode->i_mutex); 2137 mutex_lock(&inode->i_mutex);
2146 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2138 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2147 mutex_unlock(&inode->i_mutex); 2139 mutex_unlock(&inode->i_mutex);
2148 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2140 if (ret > 0) {
2149 int err = sync_page_range(inode, mapping, pos, ret); 2141 int err = generic_write_sync(file, pos, ret);
2150 if (err < 0) 2142 if (err < 0)
2151 ret = err; 2143 ret = err;
2152 } 2144 }
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
2173 if (ret == -EIOCBQUEUED) 2165 if (ret == -EIOCBQUEUED)
2174 ret = wait_on_sync_kiocb(&kiocb); 2166 ret = wait_on_sync_kiocb(&kiocb);
2175 mutex_unlock(&inode->i_mutex); 2167 mutex_unlock(&inode->i_mutex);
2176 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2168 if (ret > 0) {
2177 int err = sync_page_range(inode, mapping, *ppos - ret, ret); 2169 int err = generic_write_sync(file, *ppos - ret, ret);
2178 if (err < 0) 2170 if (err < 0)
2179 ret = err; 2171 ret = err;
2180 } 2172 }
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 23bf68453d7d..1caa0ef0b2bb 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -384,13 +384,12 @@ unm_err_out:
384 * it is dirty in the inode meta data rather than the data page cache of the 384 * it is dirty in the inode meta data rather than the data page cache of the
385 * inode, and thus there are no data pages that need writing out. Therefore, a 385 * inode, and thus there are no data pages that need writing out. Therefore, a
386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
387 * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to 387 * other hand, is not sufficient, because ->write_inode needs to be called even
388 * ensure ->write_inode is called from generic_osync_inode() and this needs to 388 * in case of fdatasync. This needs to happen or the file data would not
389 * happen or the file data would not necessarily hit the device synchronously, 389 * necessarily hit the device synchronously, even though the vfs inode has the
390 * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC 390 * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
391 * simply "feels" better than just I_DIRTY_SYNC, since the file data has not 391 * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
392 * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own 392 * which is not what I_DIRTY_SYNC on its own would suggest.
393 * would suggest.
394 */ 393 */
395void __mark_mft_record_dirty(ntfs_inode *ni) 394void __mark_mft_record_dirty(ntfs_inode *ni)
396{ 395{
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..221c5e98957b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1871,8 +1871,7 @@ relock:
1871 goto out_dio; 1871 goto out_dio;
1872 } 1872 }
1873 } else { 1873 } else {
1874 written = generic_file_aio_write_nolock(iocb, iov, nr_segs, 1874 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
1875 *ppos);
1876 } 1875 }
1877 1876
1878out_dio: 1877out_dio:
@@ -1880,18 +1879,21 @@ out_dio:
1880 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 1879 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
1881 1880
1882 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 1881 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
1883 /* 1882 ret = filemap_fdatawrite_range(file->f_mapping, pos,
1884 * The generic write paths have handled getting data 1883 pos + count - 1);
1885 * to disk, but since we don't make use of the dirty 1884 if (ret < 0)
1886 * inode list, a manual journal commit is necessary 1885 written = ret;
1887 * here. 1886
1888 */ 1887 if (!ret && (old_size != i_size_read(inode) ||
1889 if (old_size != i_size_read(inode) || 1888 old_clusters != OCFS2_I(inode)->ip_clusters)) {
1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
1891 ret = jbd2_journal_force_commit(osb->journal->j_journal); 1889 ret = jbd2_journal_force_commit(osb->journal->j_journal);
1892 if (ret < 0) 1890 if (ret < 0)
1893 written = ret; 1891 written = ret;
1894 } 1892 }
1893
1894 if (!ret)
1895 ret = filemap_fdatawait_range(file->f_mapping, pos,
1896 pos + count - 1);
1895 } 1897 }
1896 1898
1897 /* 1899 /*
@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1991 1993
1992 if (ret > 0) { 1994 if (ret > 0) {
1993 unsigned long nr_pages; 1995 unsigned long nr_pages;
1996 int err;
1994 1997
1995 *ppos += ret;
1996 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1998 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1997 1999
1998 /* 2000 err = generic_write_sync(out, *ppos, ret);
1999 * If file or inode is SYNC and we actually wrote some data, 2001 if (err)
2000 * sync it. 2002 ret = err;
2001 */ 2003 else
2002 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 2004 *ppos += ret;
2003 int err;
2004
2005 mutex_lock(&inode->i_mutex);
2006 err = ocfs2_rw_lock(inode, 1);
2007 if (err < 0) {
2008 mlog_errno(err);
2009 } else {
2010 err = generic_osync_inode(inode, mapping,
2011 OSYNC_METADATA|OSYNC_DATA);
2012 ocfs2_rw_unlock(inode, 1);
2013 }
2014 mutex_unlock(&inode->i_mutex);
2015 2005
2016 if (err)
2017 ret = err;
2018 }
2019 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2006 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2020 } 2007 }
2021 2008
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ea4e6cb29e13..fbeaddf595d3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
248 part_stat_read(p, merges[WRITE]), 248 part_stat_read(p, merges[WRITE]),
249 (unsigned long long)part_stat_read(p, sectors[WRITE]), 249 (unsigned long long)part_stat_read(p, sectors[WRITE]),
250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
251 p->in_flight, 251 part_in_flight(p),
252 jiffies_to_msecs(part_stat_read(p, io_ticks)), 252 jiffies_to_msecs(part_stat_read(p, io_ticks)),
253 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 253 jiffies_to_msecs(part_stat_read(p, time_in_queue)));
254} 254}
255 255
256ssize_t part_inflight_show(struct device *dev,
257 struct device_attribute *attr, char *buf)
258{
259 struct hd_struct *p = dev_to_part(dev);
260
261 return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
262}
263
256#ifdef CONFIG_FAIL_MAKE_REQUEST 264#ifdef CONFIG_FAIL_MAKE_REQUEST
257ssize_t part_fail_show(struct device *dev, 265ssize_t part_fail_show(struct device *dev,
258 struct device_attribute *attr, char *buf) 266 struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
281static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 289static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
282static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 290static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
283static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 291static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
292static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
284#ifdef CONFIG_FAIL_MAKE_REQUEST 293#ifdef CONFIG_FAIL_MAKE_REQUEST
285static struct device_attribute dev_attr_fail = 294static struct device_attribute dev_attr_fail =
286 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 295 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
292 &dev_attr_size.attr, 301 &dev_attr_size.attr,
293 &dev_attr_alignment_offset.attr, 302 &dev_attr_alignment_offset.attr,
294 &dev_attr_stat.attr, 303 &dev_attr_stat.attr,
304 &dev_attr_inflight.attr,
295#ifdef CONFIG_FAIL_MAKE_REQUEST 305#ifdef CONFIG_FAIL_MAKE_REQUEST
296 &dev_attr_fail.attr, 306 &dev_attr_fail.attr,
297#endif 307#endif
@@ -302,7 +312,7 @@ static struct attribute_group part_attr_group = {
302 .attrs = part_attrs, 312 .attrs = part_attrs,
303}; 313};
304 314
305static struct attribute_group *part_attr_groups[] = { 315static const struct attribute_group *part_attr_groups[] = {
306 &part_attr_group, 316 &part_attr_group,
307#ifdef CONFIG_BLK_DEV_IO_TRACE 317#ifdef CONFIG_BLK_DEV_IO_TRACE
308 &blk_trace_attr_group, 318 &blk_trace_attr_group,
diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..7394e9e17534 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
502 len = left; 502 len = left;
503 503
504 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 504 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
505 if (ret > 0) 505 if (ret > 0) {
506 *ppos += ret; 506 *ppos += ret;
507 file_accessed(in);
508 }
507 509
508 return ret; 510 return ret;
509} 511}
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
963 965
964 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 966 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
965 ret = file_remove_suid(out); 967 ret = file_remove_suid(out);
966 if (!ret) 968 if (!ret) {
969 file_update_time(out);
967 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); 970 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
971 }
968 mutex_unlock(&inode->i_mutex); 972 mutex_unlock(&inode->i_mutex);
969 } while (ret > 0); 973 } while (ret > 0);
970 splice_from_pipe_end(pipe, &sd); 974 splice_from_pipe_end(pipe, &sd);
@@ -976,25 +980,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
976 980
977 if (ret > 0) { 981 if (ret > 0) {
978 unsigned long nr_pages; 982 unsigned long nr_pages;
983 int err;
979 984
980 *ppos += ret;
981 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 985 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
982 986
983 /* 987 err = generic_write_sync(out, *ppos, ret);
984 * If file or inode is SYNC and we actually wrote some data, 988 if (err)
985 * sync it. 989 ret = err;
986 */ 990 else
987 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 991 *ppos += ret;
988 int err;
989
990 mutex_lock(&inode->i_mutex);
991 err = generic_osync_inode(inode, mapping,
992 OSYNC_METADATA|OSYNC_DATA);
993 mutex_unlock(&inode->i_mutex);
994
995 if (err)
996 ret = err;
997 }
998 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 992 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
999 } 993 }
1000 994
diff --git a/fs/super.c b/fs/super.c
index 9cda337ddae2..b03fea8fbfb6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -707,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data)
707{ 707{
708 s->s_bdev = data; 708 s->s_bdev = data;
709 s->s_dev = s->s_bdev->bd_dev; 709 s->s_dev = s->s_bdev->bd_dev;
710
711 /*
712 * We set the bdi here to the queue backing, file systems can
713 * overwrite this in ->fill_super()
714 */
715 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
710 return 0; 716 return 0;
711} 717}
712 718
diff --git a/fs/sync.c b/fs/sync.c
index 103cc7fdd3df..c08467a5d7cb 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,6 +27,13 @@
27 */ 27 */
28static int __sync_filesystem(struct super_block *sb, int wait) 28static int __sync_filesystem(struct super_block *sb, int wait)
29{ 29{
30 /*
31 * This should be safe, as we require bdi backing to actually
32 * write out data in the first place
33 */
34 if (!sb->s_bdi)
35 return 0;
36
30 /* Avoid doing twice syncing and cache pruning for quota sync */ 37 /* Avoid doing twice syncing and cache pruning for quota sync */
31 if (!wait) { 38 if (!wait) {
32 writeout_quota_sb(sb, -1); 39 writeout_quota_sb(sb, -1);
@@ -101,7 +108,7 @@ restart:
101 spin_unlock(&sb_lock); 108 spin_unlock(&sb_lock);
102 109
103 down_read(&sb->s_umount); 110 down_read(&sb->s_umount);
104 if (!(sb->s_flags & MS_RDONLY) && sb->s_root) 111 if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi)
105 __sync_filesystem(sb, wait); 112 __sync_filesystem(sb, wait);
106 up_read(&sb->s_umount); 113 up_read(&sb->s_umount);
107 114
@@ -178,19 +185,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
178} 185}
179 186
180/** 187/**
181 * vfs_fsync - perform a fsync or fdatasync on a file 188 * vfs_fsync_range - helper to sync a range of data & metadata to disk
182 * @file: file to sync 189 * @file: file to sync
183 * @dentry: dentry of @file 190 * @dentry: dentry of @file
184 * @data: only perform a fdatasync operation 191 * @start: offset in bytes of the beginning of data range to sync
192 * @end: offset in bytes of the end of data range (inclusive)
193 * @datasync: perform only datasync
185 * 194 *
186 * Write back data and metadata for @file to disk. If @datasync is 195 * Write back data in range @start..@end and metadata for @file to disk. If
187 * set only metadata needed to access modified file data is written. 196 * @datasync is set only metadata needed to access modified file data is
197 * written.
188 * 198 *
189 * In case this function is called from nfsd @file may be %NULL and 199 * In case this function is called from nfsd @file may be %NULL and
190 * only @dentry is set. This can only happen when the filesystem 200 * only @dentry is set. This can only happen when the filesystem
191 * implements the export_operations API. 201 * implements the export_operations API.
192 */ 202 */
193int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) 203int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
204 loff_t end, int datasync)
194{ 205{
195 const struct file_operations *fop; 206 const struct file_operations *fop;
196 struct address_space *mapping; 207 struct address_space *mapping;
@@ -214,7 +225,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
214 goto out; 225 goto out;
215 } 226 }
216 227
217 ret = filemap_fdatawrite(mapping); 228 ret = filemap_write_and_wait_range(mapping, start, end);
218 229
219 /* 230 /*
220 * We need to protect against concurrent writers, which could cause 231 * We need to protect against concurrent writers, which could cause
@@ -225,12 +236,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
225 if (!ret) 236 if (!ret)
226 ret = err; 237 ret = err;
227 mutex_unlock(&mapping->host->i_mutex); 238 mutex_unlock(&mapping->host->i_mutex);
228 err = filemap_fdatawait(mapping); 239
229 if (!ret)
230 ret = err;
231out: 240out:
232 return ret; 241 return ret;
233} 242}
243EXPORT_SYMBOL(vfs_fsync_range);
244
245/**
246 * vfs_fsync - perform a fsync or fdatasync on a file
247 * @file: file to sync
248 * @dentry: dentry of @file
249 * @datasync: only perform a fdatasync operation
250 *
251 * Write back data and metadata for @file to disk. If @datasync is
252 * set only metadata needed to access modified file data is written.
253 *
254 * In case this function is called from nfsd @file may be %NULL and
255 * only @dentry is set. This can only happen when the filesystem
256 * implements the export_operations API.
257 */
258int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
259{
260 return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
261}
234EXPORT_SYMBOL(vfs_fsync); 262EXPORT_SYMBOL(vfs_fsync);
235 263
236static int do_fsync(unsigned int fd, int datasync) 264static int do_fsync(unsigned int fd, int datasync)
@@ -256,6 +284,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
256 return do_fsync(fd, 1); 284 return do_fsync(fd, 1);
257} 285}
258 286
287/**
288 * generic_write_sync - perform syncing after a write if file / inode is sync
289 * @file: file to which the write happened
290 * @pos: offset where the write started
291 * @count: length of the write
292 *
293 * This is just a simple wrapper about our general syncing function.
294 */
295int generic_write_sync(struct file *file, loff_t pos, loff_t count)
296{
297 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
298 return 0;
299 return vfs_fsync_range(file, file->f_path.dentry, pos,
300 pos + count - 1, 1);
301}
302EXPORT_SYMBOL(generic_write_sync);
303
259/* 304/*
260 * sys_sync_file_range() permits finely controlled syncing over a segment of 305 * sys_sync_file_range() permits finely controlled syncing over a segment of
261 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is 306 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1c8991b0db13..ee1ce68fd98b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -54,29 +54,15 @@
54 * @nr_to_write: how many dirty pages to write-back 54 * @nr_to_write: how many dirty pages to write-back
55 * 55 *
56 * This function shrinks UBIFS liability by means of writing back some amount 56 * This function shrinks UBIFS liability by means of writing back some amount
57 * of dirty inodes and their pages. Returns the amount of pages which were 57 * of dirty inodes and their pages.
58 * written back. The returned value does not include dirty inodes which were
59 * synchronized.
60 * 58 *
61 * Note, this function synchronizes even VFS inodes which are locked 59 * Note, this function synchronizes even VFS inodes which are locked
62 * (@i_mutex) by the caller of the budgeting function, because write-back does 60 * (@i_mutex) by the caller of the budgeting function, because write-back does
63 * not touch @i_mutex. 61 * not touch @i_mutex.
64 */ 62 */
65static int shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
66{ 64{
67 int nr_written; 65 writeback_inodes_sb(c->vfs_sb);
68
69 nr_written = writeback_inodes_sb(c->vfs_sb);
70 if (!nr_written) {
71 /*
72 * Re-try again but wait on pages/inodes which are being
73 * written-back concurrently (e.g., by pdflush).
74 */
75 nr_written = sync_inodes_sb(c->vfs_sb);
76 }
77
78 dbg_budg("%d pages were written back", nr_written);
79 return nr_written;
80} 66}
81 67
82/** 68/**
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 51763aa8f4de..c4af069df1ad 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1980,6 +1980,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1980 if (err) 1980 if (err)
1981 goto out_bdi; 1981 goto out_bdi;
1982 1982
1983 sb->s_bdi = &c->bdi;
1983 sb->s_fs_info = c; 1984 sb->s_fs_info = c;
1984 sb->s_magic = UBIFS_SUPER_MAGIC; 1985 sb->s_magic = UBIFS_SUPER_MAGIC;
1985 sb->s_blocksize = UBIFS_BLOCK_SIZE; 1986 sb->s_blocksize = UBIFS_BLOCK_SIZE;
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 1d2c570704c8..2ffdb6733af1 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -18,59 +18,6 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20 20
21#if 0
22static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad,
23 uint8_t ad_size, struct kernel_lb_addr fe_loc,
24 int *pos, int *offset, struct buffer_head **bh,
25 int *error)
26{
27 int loffset = *offset;
28 int block;
29 uint8_t *ad;
30 int remainder;
31
32 *error = 0;
33
34 ad = (uint8_t *)(*bh)->b_data + *offset;
35 *offset += ad_size;
36
37 if (!ad) {
38 brelse(*bh);
39 *error = 1;
40 return NULL;
41 }
42
43 if (*offset == dir->i_sb->s_blocksize) {
44 brelse(*bh);
45 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
46 if (!block)
47 return NULL;
48 *bh = udf_tread(dir->i_sb, block);
49 if (!*bh)
50 return NULL;
51 } else if (*offset > dir->i_sb->s_blocksize) {
52 ad = tmpad;
53
54 remainder = dir->i_sb->s_blocksize - loffset;
55 memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder);
56
57 brelse(*bh);
58 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
59 if (!block)
60 return NULL;
61 (*bh) = udf_tread(dir->i_sb, block);
62 if (!*bh)
63 return NULL;
64
65 memcpy((uint8_t *)ad + remainder, (*bh)->b_data,
66 ad_size - remainder);
67 *offset = ad_size - remainder;
68 }
69
70 return ad;
71}
72#endif
73
74struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, 21struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
75 struct udf_fileident_bh *fibh, 22 struct udf_fileident_bh *fibh,
76 struct fileIdentDesc *cfi, 23 struct fileIdentDesc *cfi,
@@ -248,39 +195,6 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
248 return fi; 195 return fi;
249} 196}
250 197
251#if 0
252static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
253{
254 struct extent_ad *ext;
255 struct fileEntry *fe;
256 uint8_t *ptr;
257
258 if ((!buffer) || (!offset)) {
259 printk(KERN_ERR "udf: udf_get_fileextent() invalidparms\n");
260 return NULL;
261 }
262
263 fe = (struct fileEntry *)buffer;
264
265 if (fe->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FE)) {
266 udf_debug("0x%x != TAG_IDENT_FE\n",
267 le16_to_cpu(fe->descTag.tagIdent));
268 return NULL;
269 }
270
271 ptr = (uint8_t *)(fe->extendedAttr) +
272 le32_to_cpu(fe->lengthExtendedAttr);
273
274 if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)))
275 ptr += *offset;
276
277 ext = (struct extent_ad *)ptr;
278
279 *offset = *offset + sizeof(struct extent_ad);
280 return ext;
281}
282#endif
283
284struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, 198struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
285 int inc) 199 int inc)
286{ 200{
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7464305382b5..b80cbd78833c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -193,9 +193,11 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
193static int udf_release_file(struct inode *inode, struct file *filp) 193static int udf_release_file(struct inode *inode, struct file *filp)
194{ 194{
195 if (filp->f_mode & FMODE_WRITE) { 195 if (filp->f_mode & FMODE_WRITE) {
196 mutex_lock(&inode->i_mutex);
196 lock_kernel(); 197 lock_kernel();
197 udf_discard_prealloc(inode); 198 udf_discard_prealloc(inode);
198 unlock_kernel(); 199 unlock_kernel();
200 mutex_unlock(&inode->i_mutex);
199 } 201 }
200 return 0; 202 return 0;
201} 203}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e7533f785636..6d24c2c63f93 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -90,19 +90,16 @@ no_delete:
90} 90}
91 91
92/* 92/*
93 * If we are going to release inode from memory, we discard preallocation and 93 * If we are going to release inode from memory, we truncate last inode extent
94 * truncate last inode extent to proper length. We could use drop_inode() but 94 * to proper length. We could use drop_inode() but it's called under inode_lock
95 * it's called under inode_lock and thus we cannot mark inode dirty there. We 95 * and thus we cannot mark inode dirty there. We use clear_inode() but we have
96 * use clear_inode() but we have to make sure to write inode as it's not written 96 * to make sure to write inode as it's not written automatically.
97 * automatically.
98 */ 97 */
99void udf_clear_inode(struct inode *inode) 98void udf_clear_inode(struct inode *inode)
100{ 99{
101 struct udf_inode_info *iinfo; 100 struct udf_inode_info *iinfo;
102 if (!(inode->i_sb->s_flags & MS_RDONLY)) { 101 if (!(inode->i_sb->s_flags & MS_RDONLY)) {
103 lock_kernel(); 102 lock_kernel();
104 /* Discard preallocation for directories, symlinks, etc. */
105 udf_discard_prealloc(inode);
106 udf_truncate_tail_extent(inode); 103 udf_truncate_tail_extent(inode);
107 unlock_kernel(); 104 unlock_kernel();
108 write_inode_now(inode, 0); 105 write_inode_now(inode, 0);
@@ -664,8 +661,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
664 udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum); 661 udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum);
665 662
666#ifdef UDF_PREALLOCATE 663#ifdef UDF_PREALLOCATE
667 /* preallocate blocks */ 664 /* We preallocate blocks only for regular files. It also makes sense
668 udf_prealloc_extents(inode, c, lastblock, laarr, &endnum); 665 * for directories but there's a problem when to drop the
666 * preallocation. We might use some delayed work for that but I feel
667 * it's overengineering for a filesystem like UDF. */
668 if (S_ISREG(inode->i_mode))
669 udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
669#endif 670#endif
670 671
671 /* merge any continuous blocks in laarr */ 672 /* merge any continuous blocks in laarr */
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 1b88fd5df05d..43e24a3b8e10 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -36,14 +36,10 @@ unsigned int udf_get_last_session(struct super_block *sb)
36 ms_info.addr_format = CDROM_LBA; 36 ms_info.addr_format = CDROM_LBA;
37 i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info); 37 i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
38 38
39#define WE_OBEY_THE_WRITTEN_STANDARDS 1
40
41 if (i == 0) { 39 if (i == 0) {
42 udf_debug("XA disk: %s, vol_desc_start=%d\n", 40 udf_debug("XA disk: %s, vol_desc_start=%d\n",
43 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); 41 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba);
44#if WE_OBEY_THE_WRITTEN_STANDARDS
45 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ 42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
46#endif
47 vol_desc_start = ms_info.addr.lba; 43 vol_desc_start = ms_info.addr.lba;
48 } else { 44 } else {
49 udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i); 45 udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6a29fa34c478..21dad8c608f9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -943,7 +943,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
943 pc->componentType = 1; 943 pc->componentType = 1;
944 pc->lengthComponentIdent = 0; 944 pc->lengthComponentIdent = 0;
945 pc->componentFileVersionNum = 0; 945 pc->componentFileVersionNum = 0;
946 pc += sizeof(struct pathComponent);
947 elen += sizeof(struct pathComponent); 946 elen += sizeof(struct pathComponent);
948 } 947 }
949 948
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index aecf2519db76..d5e5559e31db 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -216,7 +216,6 @@ xfs_setfilesize(
216 if (ip->i_d.di_size < isize) { 216 if (ip->i_d.di_size < isize) {
217 ip->i_d.di_size = isize; 217 ip->i_d.di_size = isize;
218 ip->i_update_core = 1; 218 ip->i_update_core = 1;
219 ip->i_update_size = 1;
220 xfs_mark_inode_dirty_sync(ip); 219 xfs_mark_inode_dirty_sync(ip);
221 } 220 }
222 221
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 0542fd507649..988d8f87bc0f 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -172,12 +172,21 @@ xfs_file_release(
172 */ 172 */
173STATIC int 173STATIC int
174xfs_file_fsync( 174xfs_file_fsync(
175 struct file *filp, 175 struct file *file,
176 struct dentry *dentry, 176 struct dentry *dentry,
177 int datasync) 177 int datasync)
178{ 178{
179 xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); 179 struct inode *inode = dentry->d_inode;
180 return -xfs_fsync(XFS_I(dentry->d_inode)); 180 struct xfs_inode *ip = XFS_I(inode);
181 int error;
182
183 /* capture size updates in I/O completion before writing the inode. */
184 error = filemap_fdatawait(inode->i_mapping);
185 if (error)
186 return error;
187
188 xfs_iflags_clear(ip, XFS_ITRUNCATED);
189 return -xfs_fsync(ip);
181} 190}
182 191
183STATIC int 192STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6c32f1d63d8c..da0159d99f82 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -43,7 +43,6 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_itable.h" 44#include "xfs_itable.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_acl.h"
47#include "xfs_attr.h" 46#include "xfs_attr.h"
48#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
49#include "xfs_utils.h" 48#include "xfs_utils.h"
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7078974a6eee..49e4a6aea73c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -812,18 +812,21 @@ write_retry:
812 812
813 /* Handle various SYNC-type writes */ 813 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1;
815 int error2; 816 int error2;
816 817
817 xfs_iunlock(xip, iolock); 818 xfs_iunlock(xip, iolock);
818 if (need_i_mutex) 819 if (need_i_mutex)
819 mutex_unlock(&inode->i_mutex); 820 mutex_unlock(&inode->i_mutex);
820 error2 = sync_page_range(inode, mapping, pos, ret); 821
822 error2 = filemap_write_and_wait_range(mapping, pos, end);
821 if (!error) 823 if (!error)
822 error = error2; 824 error = error2;
823 if (need_i_mutex) 825 if (need_i_mutex)
824 mutex_lock(&inode->i_mutex); 826 mutex_lock(&inode->i_mutex);
825 xfs_ilock(xip, iolock); 827 xfs_ilock(xip, iolock);
826 error2 = xfs_write_sync_logforce(mp, xip); 828
829 error2 = xfs_fsync(xip);
827 if (!error) 830 if (!error)
828 error = error2; 831 error = error2;
829 } 832 }
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index c3526d445f6a..76fdc5861932 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -20,16 +20,9 @@
20 20
21DEFINE_PER_CPU(struct xfsstats, xfsstats); 21DEFINE_PER_CPU(struct xfsstats, xfsstats);
22 22
23STATIC int 23static int xfs_stat_proc_show(struct seq_file *m, void *v)
24xfs_read_xfsstats(
25 char *buffer,
26 char **start,
27 off_t offset,
28 int count,
29 int *eof,
30 void *data)
31{ 24{
32 int c, i, j, len, val; 25 int c, i, j, val;
33 __uint64_t xs_xstrat_bytes = 0; 26 __uint64_t xs_xstrat_bytes = 0;
34 __uint64_t xs_write_bytes = 0; 27 __uint64_t xs_write_bytes = 0;
35 __uint64_t xs_read_bytes = 0; 28 __uint64_t xs_read_bytes = 0;
@@ -60,18 +53,18 @@ xfs_read_xfsstats(
60 }; 53 };
61 54
62 /* Loop over all stats groups */ 55 /* Loop over all stats groups */
63 for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { 56 for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
64 len += sprintf(buffer + len, "%s", xstats[i].desc); 57 seq_printf(m, "%s", xstats[i].desc);
65 /* inner loop does each group */ 58 /* inner loop does each group */
66 while (j < xstats[i].endpoint) { 59 while (j < xstats[i].endpoint) {
67 val = 0; 60 val = 0;
68 /* sum over all cpus */ 61 /* sum over all cpus */
69 for_each_possible_cpu(c) 62 for_each_possible_cpu(c)
70 val += *(((__u32*)&per_cpu(xfsstats, c) + j)); 63 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
71 len += sprintf(buffer + len, " %u", val); 64 seq_printf(m, " %u", val);
72 j++; 65 j++;
73 } 66 }
74 buffer[len++] = '\n'; 67 seq_putc(m, '\n');
75 } 68 }
76 /* extra precision counters */ 69 /* extra precision counters */
77 for_each_possible_cpu(i) { 70 for_each_possible_cpu(i) {
@@ -80,36 +73,38 @@ xfs_read_xfsstats(
80 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; 73 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
81 } 74 }
82 75
83 len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", 76 seq_printf(m, "xpc %Lu %Lu %Lu\n",
84 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); 77 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
85 len += sprintf(buffer + len, "debug %u\n", 78 seq_printf(m, "debug %u\n",
86#if defined(DEBUG) 79#if defined(DEBUG)
87 1); 80 1);
88#else 81#else
89 0); 82 0);
90#endif 83#endif
84 return 0;
85}
91 86
92 if (offset >= len) { 87static int xfs_stat_proc_open(struct inode *inode, struct file *file)
93 *start = buffer; 88{
94 *eof = 1; 89 return single_open(file, xfs_stat_proc_show, NULL);
95 return 0;
96 }
97 *start = buffer + offset;
98 if ((len -= offset) > count)
99 return count;
100 *eof = 1;
101
102 return len;
103} 90}
104 91
92static const struct file_operations xfs_stat_proc_fops = {
93 .owner = THIS_MODULE,
94 .open = xfs_stat_proc_open,
95 .read = seq_read,
96 .llseek = seq_lseek,
97 .release = single_release,
98};
99
105int 100int
106xfs_init_procfs(void) 101xfs_init_procfs(void)
107{ 102{
108 if (!proc_mkdir("fs/xfs", NULL)) 103 if (!proc_mkdir("fs/xfs", NULL))
109 goto out; 104 goto out;
110 105
111 if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, 106 if (!proc_create("fs/xfs/stat", 0, NULL,
112 xfs_read_xfsstats, NULL)) 107 &xfs_stat_proc_fops))
113 goto out_remove_entry; 108 goto out_remove_entry;
114 return 0; 109 return 0;
115 110
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a220d36f789b..5d7c60ac77b4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -579,15 +579,19 @@ xfs_showargs(
579 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 579 else if (mp->m_qflags & XFS_UQUOTA_ACCT)
580 seq_puts(m, "," MNTOPT_UQUOTANOENF); 580 seq_puts(m, "," MNTOPT_UQUOTANOENF);
581 581
582 if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) 582 /* Either project or group quotas can be active, not both */
583 seq_puts(m, "," MNTOPT_PRJQUOTA); 583
584 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 584 if (mp->m_qflags & XFS_PQUOTA_ACCT) {
585 seq_puts(m, "," MNTOPT_PQUOTANOENF); 585 if (mp->m_qflags & XFS_OQUOTA_ENFD)
586 586 seq_puts(m, "," MNTOPT_PRJQUOTA);
587 if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) 587 else
588 seq_puts(m, "," MNTOPT_GRPQUOTA); 588 seq_puts(m, "," MNTOPT_PQUOTANOENF);
589 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 589 } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
590 seq_puts(m, "," MNTOPT_GQUOTANOENF); 590 if (mp->m_qflags & XFS_OQUOTA_ENFD)
591 seq_puts(m, "," MNTOPT_GRPQUOTA);
592 else
593 seq_puts(m, "," MNTOPT_GQUOTANOENF);
594 }
591 595
592 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 596 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
593 seq_puts(m, "," MNTOPT_NOQUOTA); 597 seq_puts(m, "," MNTOPT_NOQUOTA);
@@ -687,7 +691,7 @@ xfs_barrier_test(
687 return error; 691 return error;
688} 692}
689 693
690void 694STATIC void
691xfs_mountfs_check_barriers(xfs_mount_t *mp) 695xfs_mountfs_check_barriers(xfs_mount_t *mp)
692{ 696{
693 int error; 697 int error;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 98ef624d9baf..320be6aea492 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -749,21 +749,6 @@ __xfs_inode_clear_reclaim_tag(
749 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 749 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
750} 750}
751 751
752void
753xfs_inode_clear_reclaim_tag(
754 xfs_inode_t *ip)
755{
756 xfs_mount_t *mp = ip->i_mount;
757 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
758
759 read_lock(&pag->pag_ici_lock);
760 spin_lock(&ip->i_flags_lock);
761 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
762 spin_unlock(&ip->i_flags_lock);
763 read_unlock(&pag->pag_ici_lock);
764 xfs_put_perag(mp, pag);
765}
766
767STATIC int 752STATIC int
768xfs_reclaim_inode_now( 753xfs_reclaim_inode_now(
769 struct xfs_inode *ip, 754 struct xfs_inode *ip,
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 59120602588a..27920eb7a820 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -49,7 +49,6 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); 51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54 struct xfs_inode *ip); 53 struct xfs_inode *ip);
55 54
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 21b08c0396a1..83e7ea3e25fa 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -48,50 +48,34 @@
48 48
49struct xqmstats xqmstats; 49struct xqmstats xqmstats;
50 50
51STATIC int 51static int xqm_proc_show(struct seq_file *m, void *v)
52xfs_qm_read_xfsquota(
53 char *buffer,
54 char **start,
55 off_t offset,
56 int count,
57 int *eof,
58 void *data)
59{ 52{
60 int len;
61
62 /* maximum; incore; ratio free to inuse; freelist */ 53 /* maximum; incore; ratio free to inuse; freelist */
63 len = sprintf(buffer, "%d\t%d\t%d\t%u\n", 54 seq_printf(m, "%d\t%d\t%d\t%u\n",
64 ndquot, 55 ndquot,
65 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, 56 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
66 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 57 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
67 xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); 58 xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
68 59 return 0;
69 if (offset >= len) {
70 *start = buffer;
71 *eof = 1;
72 return 0;
73 }
74 *start = buffer + offset;
75 if ((len -= offset) > count)
76 return count;
77 *eof = 1;
78
79 return len;
80} 60}
81 61
82STATIC int 62static int xqm_proc_open(struct inode *inode, struct file *file)
83xfs_qm_read_stats(
84 char *buffer,
85 char **start,
86 off_t offset,
87 int count,
88 int *eof,
89 void *data)
90{ 63{
91 int len; 64 return single_open(file, xqm_proc_show, NULL);
65}
66
67static const struct file_operations xqm_proc_fops = {
68 .owner = THIS_MODULE,
69 .open = xqm_proc_open,
70 .read = seq_read,
71 .llseek = seq_lseek,
72 .release = single_release,
73};
92 74
75static int xqmstat_proc_show(struct seq_file *m, void *v)
76{
93 /* quota performance statistics */ 77 /* quota performance statistics */
94 len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", 78 seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
95 xqmstats.xs_qm_dqreclaims, 79 xqmstats.xs_qm_dqreclaims,
96 xqmstats.xs_qm_dqreclaim_misses, 80 xqmstats.xs_qm_dqreclaim_misses,
97 xqmstats.xs_qm_dquot_dups, 81 xqmstats.xs_qm_dquot_dups,
@@ -100,25 +84,27 @@ xfs_qm_read_stats(
100 xqmstats.xs_qm_dqwants, 84 xqmstats.xs_qm_dqwants,
101 xqmstats.xs_qm_dqshake_reclaims, 85 xqmstats.xs_qm_dqshake_reclaims,
102 xqmstats.xs_qm_dqinact_reclaims); 86 xqmstats.xs_qm_dqinact_reclaims);
87 return 0;
88}
103 89
104 if (offset >= len) { 90static int xqmstat_proc_open(struct inode *inode, struct file *file)
105 *start = buffer; 91{
106 *eof = 1; 92 return single_open(file, xqmstat_proc_show, NULL);
107 return 0;
108 }
109 *start = buffer + offset;
110 if ((len -= offset) > count)
111 return count;
112 *eof = 1;
113
114 return len;
115} 93}
116 94
95static const struct file_operations xqmstat_proc_fops = {
96 .owner = THIS_MODULE,
97 .open = xqmstat_proc_open,
98 .read = seq_read,
99 .llseek = seq_lseek,
100 .release = single_release,
101};
102
117void 103void
118xfs_qm_init_procfs(void) 104xfs_qm_init_procfs(void)
119{ 105{
120 create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); 106 proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
121 create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); 107 proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
122} 108}
123 109
124void 110void
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index f24b50b68d03..a5d54bf4931b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -198,6 +198,15 @@ typedef struct xfs_perag
198 xfs_agino_t pagi_count; /* number of allocated inodes */ 198 xfs_agino_t pagi_count; /* number of allocated inodes */
199 int pagb_count; /* pagb slots in use */ 199 int pagb_count; /* pagb slots in use */
200 xfs_perag_busy_t *pagb_list; /* unstable blocks */ 200 xfs_perag_busy_t *pagb_list; /* unstable blocks */
201
202 /*
203 * Inode allocation search lookup optimisation.
204 * If the pagino matches, the search for new inodes
205 * doesn't need to search the near ones again straight away
206 */
207 xfs_agino_t pagl_pagino;
208 xfs_agino_t pagl_leftrec;
209 xfs_agino_t pagl_rightrec;
201#ifdef __KERNEL__ 210#ifdef __KERNEL__
202 spinlock_t pagb_lock; /* lock for pagb_list */ 211 spinlock_t pagb_lock; /* lock for pagb_list */
203 212
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8ee5b5a76a2a..8971fb09d387 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3713,7 +3713,7 @@ done:
3713 * entry (null if none). Else, *lastxp will be set to the index 3713 * entry (null if none). Else, *lastxp will be set to the index
3714 * of the found entry; *gotp will contain the entry. 3714 * of the found entry; *gotp will contain the entry.
3715 */ 3715 */
3716xfs_bmbt_rec_host_t * /* pointer to found extent entry */ 3716STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
3717xfs_bmap_search_multi_extents( 3717xfs_bmap_search_multi_extents(
3718 xfs_ifork_t *ifp, /* inode fork pointer */ 3718 xfs_ifork_t *ifp, /* inode fork pointer */
3719 xfs_fileoff_t bno, /* block number searched for */ 3719 xfs_fileoff_t bno, /* block number searched for */
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 1b8ff9256bd0..56f62d2edc35 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -392,17 +392,6 @@ xfs_bmap_count_blocks(
392 int whichfork, 392 int whichfork,
393 int *count); 393 int *count);
394 394
395/*
396 * Search the extent records for the entry containing block bno.
397 * If bno lies in a hole, point to the next entry. If bno lies
398 * past eof, *eofp will be set, and *prevp will contain the last
399 * entry (null if none). Else, *lastxp will be set to the index
400 * of the found entry; *gotp will contain the entry.
401 */
402xfs_bmbt_rec_host_t *
403xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
404 xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
405
406#endif /* __KERNEL__ */ 395#endif /* __KERNEL__ */
407 396
408#endif /* __XFS_BMAP_H__ */ 397#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 5c1ade06578e..eb7b702d0690 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -202,16 +202,6 @@ xfs_bmbt_get_state(
202 ext_flag); 202 ext_flag);
203} 203}
204 204
205/* Endian flipping versions of the bmbt extraction functions */
206void
207xfs_bmbt_disk_get_all(
208 xfs_bmbt_rec_t *r,
209 xfs_bmbt_irec_t *s)
210{
211 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
212 get_unaligned_be64(&r->l1), s);
213}
214
215/* 205/*
216 * Extract the blockcount field from an on disk bmap extent record. 206 * Extract the blockcount field from an on disk bmap extent record.
217 */ 207 */
@@ -816,6 +806,16 @@ xfs_bmbt_trace_key(
816 *l1 = 0; 806 *l1 = 0;
817} 807}
818 808
809/* Endian flipping versions of the bmbt extraction functions */
810STATIC void
811xfs_bmbt_disk_get_all(
812 xfs_bmbt_rec_t *r,
813 xfs_bmbt_irec_t *s)
814{
815 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
816 get_unaligned_be64(&r->l1), s);
817}
818
819STATIC void 819STATIC void
820xfs_bmbt_trace_record( 820xfs_bmbt_trace_record(
821 struct xfs_btree_cur *cur, 821 struct xfs_btree_cur *cur,
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 0e8df007615e..5549d495947f 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
220extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); 220extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
221extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); 221extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r);
222 222
223extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
224extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 223extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
225extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 224extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
226 225
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 26717388acf5..52b5f14d0c32 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -646,46 +646,6 @@ xfs_btree_read_bufl(
646} 646}
647 647
648/* 648/*
649 * Get a buffer for the block, return it read in.
650 * Short-form addressing.
651 */
652int /* error */
653xfs_btree_read_bufs(
654 xfs_mount_t *mp, /* file system mount point */
655 xfs_trans_t *tp, /* transaction pointer */
656 xfs_agnumber_t agno, /* allocation group number */
657 xfs_agblock_t agbno, /* allocation group block number */
658 uint lock, /* lock flags for read_buf */
659 xfs_buf_t **bpp, /* buffer for agno/agbno */
660 int refval) /* ref count value for buffer */
661{
662 xfs_buf_t *bp; /* return value */
663 xfs_daddr_t d; /* real disk block address */
664 int error;
665
666 ASSERT(agno != NULLAGNUMBER);
667 ASSERT(agbno != NULLAGBLOCK);
668 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
669 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
670 mp->m_bsize, lock, &bp))) {
671 return error;
672 }
673 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
674 if (bp != NULL) {
675 switch (refval) {
676 case XFS_ALLOC_BTREE_REF:
677 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
678 break;
679 case XFS_INO_BTREE_REF:
680 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
681 break;
682 }
683 }
684 *bpp = bp;
685 return 0;
686}
687
688/*
689 * Read-ahead the block, don't wait for it, don't return a buffer. 649 * Read-ahead the block, don't wait for it, don't return a buffer.
690 * Long-form addressing. 650 * Long-form addressing.
691 */ 651 */
@@ -2951,7 +2911,7 @@ error0:
2951 * inode we have to copy the single block it was pointing to into the 2911 * inode we have to copy the single block it was pointing to into the
2952 * inode. 2912 * inode.
2953 */ 2913 */
2954int 2914STATIC int
2955xfs_btree_kill_iroot( 2915xfs_btree_kill_iroot(
2956 struct xfs_btree_cur *cur) 2916 struct xfs_btree_cur *cur)
2957{ 2917{
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4f852b735b96..7fa07062bdda 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -379,20 +379,6 @@ xfs_btree_read_bufl(
379 int refval);/* ref count value for buffer */ 379 int refval);/* ref count value for buffer */
380 380
381/* 381/*
382 * Get a buffer for the block, return it read in.
383 * Short-form addressing.
384 */
385int /* error */
386xfs_btree_read_bufs(
387 struct xfs_mount *mp, /* file system mount point */
388 struct xfs_trans *tp, /* transaction pointer */
389 xfs_agnumber_t agno, /* allocation group number */
390 xfs_agblock_t agbno, /* allocation group block number */
391 uint lock, /* lock flags for read_buf */
392 struct xfs_buf **bpp, /* buffer for agno/agbno */
393 int refval);/* ref count value for buffer */
394
395/*
396 * Read-ahead the block, don't wait for it, don't return a buffer. 382 * Read-ahead the block, don't wait for it, don't return a buffer.
397 * Long-form addressing. 383 * Long-form addressing.
398 */ 384 */
@@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
432int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); 418int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
433int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); 419int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
434int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); 420int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
435int xfs_btree_kill_iroot(struct xfs_btree_cur *);
436int xfs_btree_insert(struct xfs_btree_cur *, int *); 421int xfs_btree_insert(struct xfs_btree_cur *, int *);
437int xfs_btree_delete(struct xfs_btree_cur *, int *); 422int xfs_btree_delete(struct xfs_btree_cur *, int *);
438int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); 423int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 3120a3a5e20f..ab64f3efb43b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment(
57} 57}
58 58
59/* 59/*
60 * Lookup the record equal to ino in the btree given by cur. 60 * Lookup a record by ino in the btree given by cur.
61 */
62STATIC int /* error */
63xfs_inobt_lookup_eq(
64 struct xfs_btree_cur *cur, /* btree cursor */
65 xfs_agino_t ino, /* starting inode of chunk */
66 __int32_t fcnt, /* free inode count */
67 xfs_inofree_t free, /* free inode mask */
68 int *stat) /* success/failure */
69{
70 cur->bc_rec.i.ir_startino = ino;
71 cur->bc_rec.i.ir_freecount = fcnt;
72 cur->bc_rec.i.ir_free = free;
73 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
74}
75
76/*
77 * Lookup the first record greater than or equal to ino
78 * in the btree given by cur.
79 */ 61 */
80int /* error */ 62int /* error */
81xfs_inobt_lookup_ge( 63xfs_inobt_lookup(
82 struct xfs_btree_cur *cur, /* btree cursor */ 64 struct xfs_btree_cur *cur, /* btree cursor */
83 xfs_agino_t ino, /* starting inode of chunk */ 65 xfs_agino_t ino, /* starting inode of chunk */
84 __int32_t fcnt, /* free inode count */ 66 xfs_lookup_t dir, /* <=, >=, == */
85 xfs_inofree_t free, /* free inode mask */
86 int *stat) /* success/failure */ 67 int *stat) /* success/failure */
87{ 68{
88 cur->bc_rec.i.ir_startino = ino; 69 cur->bc_rec.i.ir_startino = ino;
89 cur->bc_rec.i.ir_freecount = fcnt; 70 cur->bc_rec.i.ir_freecount = 0;
90 cur->bc_rec.i.ir_free = free; 71 cur->bc_rec.i.ir_free = 0;
91 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 72 return xfs_btree_lookup(cur, dir, stat);
92} 73}
93 74
94/* 75/*
95 * Lookup the first record less than or equal to ino 76 * Update the record referred to by cur to the value given.
96 * in the btree given by cur.
97 */
98int /* error */
99xfs_inobt_lookup_le(
100 struct xfs_btree_cur *cur, /* btree cursor */
101 xfs_agino_t ino, /* starting inode of chunk */
102 __int32_t fcnt, /* free inode count */
103 xfs_inofree_t free, /* free inode mask */
104 int *stat) /* success/failure */
105{
106 cur->bc_rec.i.ir_startino = ino;
107 cur->bc_rec.i.ir_freecount = fcnt;
108 cur->bc_rec.i.ir_free = free;
109 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
110}
111
112/*
113 * Update the record referred to by cur to the value given
114 * by [ino, fcnt, free].
115 * This either works (return 0) or gets an EFSCORRUPTED error. 77 * This either works (return 0) or gets an EFSCORRUPTED error.
116 */ 78 */
117STATIC int /* error */ 79STATIC int /* error */
118xfs_inobt_update( 80xfs_inobt_update(
119 struct xfs_btree_cur *cur, /* btree cursor */ 81 struct xfs_btree_cur *cur, /* btree cursor */
120 xfs_agino_t ino, /* starting inode of chunk */ 82 xfs_inobt_rec_incore_t *irec) /* btree record */
121 __int32_t fcnt, /* free inode count */
122 xfs_inofree_t free) /* free inode mask */
123{ 83{
124 union xfs_btree_rec rec; 84 union xfs_btree_rec rec;
125 85
126 rec.inobt.ir_startino = cpu_to_be32(ino); 86 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
127 rec.inobt.ir_freecount = cpu_to_be32(fcnt); 87 rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
128 rec.inobt.ir_free = cpu_to_be64(free); 88 rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
129 return xfs_btree_update(cur, &rec); 89 return xfs_btree_update(cur, &rec);
130} 90}
131 91
@@ -135,9 +95,7 @@ xfs_inobt_update(
135int /* error */ 95int /* error */
136xfs_inobt_get_rec( 96xfs_inobt_get_rec(
137 struct xfs_btree_cur *cur, /* btree cursor */ 97 struct xfs_btree_cur *cur, /* btree cursor */
138 xfs_agino_t *ino, /* output: starting inode of chunk */ 98 xfs_inobt_rec_incore_t *irec, /* btree record */
139 __int32_t *fcnt, /* output: number of free inodes */
140 xfs_inofree_t *free, /* output: free inode mask */
141 int *stat) /* output: success/failure */ 99 int *stat) /* output: success/failure */
142{ 100{
143 union xfs_btree_rec *rec; 101 union xfs_btree_rec *rec;
@@ -145,14 +103,136 @@ xfs_inobt_get_rec(
145 103
146 error = xfs_btree_get_rec(cur, &rec, stat); 104 error = xfs_btree_get_rec(cur, &rec, stat);
147 if (!error && *stat == 1) { 105 if (!error && *stat == 1) {
148 *ino = be32_to_cpu(rec->inobt.ir_startino); 106 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
149 *fcnt = be32_to_cpu(rec->inobt.ir_freecount); 107 irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
150 *free = be64_to_cpu(rec->inobt.ir_free); 108 irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
151 } 109 }
152 return error; 110 return error;
153} 111}
154 112
155/* 113/*
114 * Verify that the number of free inodes in the AGI is correct.
115 */
116#ifdef DEBUG
117STATIC int
118xfs_check_agi_freecount(
119 struct xfs_btree_cur *cur,
120 struct xfs_agi *agi)
121{
122 if (cur->bc_nlevels == 1) {
123 xfs_inobt_rec_incore_t rec;
124 int freecount = 0;
125 int error;
126 int i;
127
128 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
129 if (error)
130 return error;
131
132 do {
133 error = xfs_inobt_get_rec(cur, &rec, &i);
134 if (error)
135 return error;
136
137 if (i) {
138 freecount += rec.ir_freecount;
139 error = xfs_btree_increment(cur, 0, &i);
140 if (error)
141 return error;
142 }
143 } while (i == 1);
144
145 if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
146 ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
147 }
148 return 0;
149}
150#else
151#define xfs_check_agi_freecount(cur, agi) 0
152#endif
153
154/*
155 * Initialise a new set of inodes.
156 */
157STATIC void
158xfs_ialloc_inode_init(
159 struct xfs_mount *mp,
160 struct xfs_trans *tp,
161 xfs_agnumber_t agno,
162 xfs_agblock_t agbno,
163 xfs_agblock_t length,
164 unsigned int gen)
165{
166 struct xfs_buf *fbuf;
167 struct xfs_dinode *free;
168 int blks_per_cluster, nbufs, ninodes;
169 int version;
170 int i, j;
171 xfs_daddr_t d;
172
173 /*
174 * Loop over the new block(s), filling in the inodes.
175 * For small block sizes, manipulate the inodes in buffers
176 * which are multiples of the blocks size.
177 */
178 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
179 blks_per_cluster = 1;
180 nbufs = length;
181 ninodes = mp->m_sb.sb_inopblock;
182 } else {
183 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
184 mp->m_sb.sb_blocksize;
185 nbufs = length / blks_per_cluster;
186 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
187 }
188
189 /*
190 * Figure out what version number to use in the inodes we create.
191 * If the superblock version has caught up to the one that supports
192 * the new inode format, then use the new inode version. Otherwise
193 * use the old version so that old kernels will continue to be
194 * able to use the file system.
195 */
196 if (xfs_sb_version_hasnlink(&mp->m_sb))
197 version = 2;
198 else
199 version = 1;
200
201 for (j = 0; j < nbufs; j++) {
202 /*
203 * Get the block.
204 */
205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
207 mp->m_bsize * blks_per_cluster,
208 XFS_BUF_LOCK);
209 ASSERT(fbuf);
210 ASSERT(!XFS_BUF_GETERROR(fbuf));
211
212 /*
213 * Initialize all inodes in this buffer and then log them.
214 *
215 * XXX: It would be much better if we had just one transaction
216 * to log a whole cluster of inodes instead of all the
217 * individual transactions causing a lot of log traffic.
218 */
219 xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
220 for (i = 0; i < ninodes; i++) {
221 int ioffset = i << mp->m_sb.sb_inodelog;
222 uint isize = sizeof(struct xfs_dinode);
223
224 free = xfs_make_iptr(mp, fbuf, i);
225 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
226 free->di_version = version;
227 free->di_gen = cpu_to_be32(gen);
228 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
229 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
230 }
231 xfs_trans_inode_alloc_buf(tp, fbuf);
232 }
233}
234
235/*
156 * Allocate new inodes in the allocation group specified by agbp. 236 * Allocate new inodes in the allocation group specified by agbp.
157 * Return 0 for success, else error code. 237 * Return 0 for success, else error code.
158 */ 238 */
@@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc(
164{ 244{
165 xfs_agi_t *agi; /* allocation group header */ 245 xfs_agi_t *agi; /* allocation group header */
166 xfs_alloc_arg_t args; /* allocation argument structure */ 246 xfs_alloc_arg_t args; /* allocation argument structure */
167 int blks_per_cluster; /* fs blocks per inode cluster */
168 xfs_btree_cur_t *cur; /* inode btree cursor */ 247 xfs_btree_cur_t *cur; /* inode btree cursor */
169 xfs_daddr_t d; /* disk addr of buffer */
170 xfs_agnumber_t agno; 248 xfs_agnumber_t agno;
171 int error; 249 int error;
172 xfs_buf_t *fbuf; /* new free inodes' buffer */ 250 int i;
173 xfs_dinode_t *free; /* new free inode structure */
174 int i; /* inode counter */
175 int j; /* block counter */
176 int nbufs; /* num bufs of new inodes */
177 xfs_agino_t newino; /* new first inode's number */ 251 xfs_agino_t newino; /* new first inode's number */
178 xfs_agino_t newlen; /* new number of inodes */ 252 xfs_agino_t newlen; /* new number of inodes */
179 int ninodes; /* num inodes per buf */
180 xfs_agino_t thisino; /* current inode number, for loop */ 253 xfs_agino_t thisino; /* current inode number, for loop */
181 int version; /* inode version number to use */
182 int isaligned = 0; /* inode allocation at stripe unit */ 254 int isaligned = 0; /* inode allocation at stripe unit */
183 /* boundary */ 255 /* boundary */
184 unsigned int gen;
185 256
186 args.tp = tp; 257 args.tp = tp;
187 args.mp = tp->t_mountp; 258 args.mp = tp->t_mountp;
@@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc(
202 */ 273 */
203 agi = XFS_BUF_TO_AGI(agbp); 274 agi = XFS_BUF_TO_AGI(agbp);
204 newino = be32_to_cpu(agi->agi_newino); 275 newino = be32_to_cpu(agi->agi_newino);
276 agno = be32_to_cpu(agi->agi_seqno);
205 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 277 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
206 XFS_IALLOC_BLOCKS(args.mp); 278 XFS_IALLOC_BLOCKS(args.mp);
207 if (likely(newino != NULLAGINO && 279 if (likely(newino != NULLAGINO &&
208 (args.agbno < be32_to_cpu(agi->agi_length)))) { 280 (args.agbno < be32_to_cpu(agi->agi_length)))) {
209 args.fsbno = XFS_AGB_TO_FSB(args.mp, 281 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
210 be32_to_cpu(agi->agi_seqno), args.agbno);
211 args.type = XFS_ALLOCTYPE_THIS_BNO; 282 args.type = XFS_ALLOCTYPE_THIS_BNO;
212 args.mod = args.total = args.wasdel = args.isfl = 283 args.mod = args.total = args.wasdel = args.isfl =
213 args.userdata = args.minalignslop = 0; 284 args.userdata = args.minalignslop = 0;
@@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc(
258 * For now, just allocate blocks up front. 329 * For now, just allocate blocks up front.
259 */ 330 */
260 args.agbno = be32_to_cpu(agi->agi_root); 331 args.agbno = be32_to_cpu(agi->agi_root);
261 args.fsbno = XFS_AGB_TO_FSB(args.mp, 332 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
262 be32_to_cpu(agi->agi_seqno), args.agbno);
263 /* 333 /*
264 * Allocate a fixed-size extent of inodes. 334 * Allocate a fixed-size extent of inodes.
265 */ 335 */
@@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc(
282 if (isaligned && args.fsbno == NULLFSBLOCK) { 352 if (isaligned && args.fsbno == NULLFSBLOCK) {
283 args.type = XFS_ALLOCTYPE_NEAR_BNO; 353 args.type = XFS_ALLOCTYPE_NEAR_BNO;
284 args.agbno = be32_to_cpu(agi->agi_root); 354 args.agbno = be32_to_cpu(agi->agi_root);
285 args.fsbno = XFS_AGB_TO_FSB(args.mp, 355 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
286 be32_to_cpu(agi->agi_seqno), args.agbno);
287 args.alignment = xfs_ialloc_cluster_alignment(&args); 356 args.alignment = xfs_ialloc_cluster_alignment(&args);
288 if ((error = xfs_alloc_vextent(&args))) 357 if ((error = xfs_alloc_vextent(&args)))
289 return error; 358 return error;
@@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc(
294 return 0; 363 return 0;
295 } 364 }
296 ASSERT(args.len == args.minlen); 365 ASSERT(args.len == args.minlen);
297 /*
298 * Convert the results.
299 */
300 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
301 /*
302 * Loop over the new block(s), filling in the inodes.
303 * For small block sizes, manipulate the inodes in buffers
304 * which are multiples of the blocks size.
305 */
306 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
307 blks_per_cluster = 1;
308 nbufs = (int)args.len;
309 ninodes = args.mp->m_sb.sb_inopblock;
310 } else {
311 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
312 args.mp->m_sb.sb_blocksize;
313 nbufs = (int)args.len / blks_per_cluster;
314 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
315 }
316 /*
317 * Figure out what version number to use in the inodes we create.
318 * If the superblock version has caught up to the one that supports
319 * the new inode format, then use the new inode version. Otherwise
320 * use the old version so that old kernels will continue to be
321 * able to use the file system.
322 */
323 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
324 version = 2;
325 else
326 version = 1;
327 366
328 /* 367 /*
368 * Stamp and write the inode buffers.
369 *
329 * Seed the new inode cluster with a random generation number. This 370 * Seed the new inode cluster with a random generation number. This
330 * prevents short-term reuse of generation numbers if a chunk is 371 * prevents short-term reuse of generation numbers if a chunk is
331 * freed and then immediately reallocated. We use random numbers 372 * freed and then immediately reallocated. We use random numbers
332 * rather than a linear progression to prevent the next generation 373 * rather than a linear progression to prevent the next generation
333 * number from being easily guessable. 374 * number from being easily guessable.
334 */ 375 */
335 gen = random32(); 376 xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
336 for (j = 0; j < nbufs; j++) { 377 random32());
337 /*
338 * Get the block.
339 */
340 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
341 args.agbno + (j * blks_per_cluster));
342 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
343 args.mp->m_bsize * blks_per_cluster,
344 XFS_BUF_LOCK);
345 ASSERT(fbuf);
346 ASSERT(!XFS_BUF_GETERROR(fbuf));
347 378
348 /* 379 /*
349 * Initialize all inodes in this buffer and then log them. 380 * Convert the results.
350 * 381 */
351 * XXX: It would be much better if we had just one transaction to 382 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
352 * log a whole cluster of inodes instead of all the individual
353 * transactions causing a lot of log traffic.
354 */
355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
356 for (i = 0; i < ninodes; i++) {
357 int ioffset = i << args.mp->m_sb.sb_inodelog;
358 uint isize = sizeof(struct xfs_dinode);
359
360 free = xfs_make_iptr(args.mp, fbuf, i);
361 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
362 free->di_version = version;
363 free->di_gen = cpu_to_be32(gen);
364 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
365 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
366 }
367 xfs_trans_inode_alloc_buf(tp, fbuf);
368 }
369 be32_add_cpu(&agi->agi_count, newlen); 383 be32_add_cpu(&agi->agi_count, newlen);
370 be32_add_cpu(&agi->agi_freecount, newlen); 384 be32_add_cpu(&agi->agi_freecount, newlen);
371 agno = be32_to_cpu(agi->agi_seqno);
372 down_read(&args.mp->m_peraglock); 385 down_read(&args.mp->m_peraglock);
373 args.mp->m_perag[agno].pagi_freecount += newlen; 386 args.mp->m_perag[agno].pagi_freecount += newlen;
374 up_read(&args.mp->m_peraglock); 387 up_read(&args.mp->m_peraglock);
375 agi->agi_newino = cpu_to_be32(newino); 388 agi->agi_newino = cpu_to_be32(newino);
389
376 /* 390 /*
377 * Insert records describing the new inode chunk into the btree. 391 * Insert records describing the new inode chunk into the btree.
378 */ 392 */
@@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc(
380 for (thisino = newino; 394 for (thisino = newino;
381 thisino < newino + newlen; 395 thisino < newino + newlen;
382 thisino += XFS_INODES_PER_CHUNK) { 396 thisino += XFS_INODES_PER_CHUNK) {
383 if ((error = xfs_inobt_lookup_eq(cur, thisino, 397 cur->bc_rec.i.ir_startino = thisino;
384 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { 398 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
399 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
400 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
401 if (error) {
385 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 402 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
386 return error; 403 return error;
387 } 404 }
388 ASSERT(i == 0); 405 ASSERT(i == 0);
389 if ((error = xfs_btree_insert(cur, &i))) { 406 error = xfs_btree_insert(cur, &i);
407 if (error) {
390 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 408 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
391 return error; 409 return error;
392 } 410 }
@@ -539,6 +557,62 @@ nextag:
539} 557}
540 558
541/* 559/*
560 * Try to retrieve the next record to the left/right from the current one.
561 */
562STATIC int
563xfs_ialloc_next_rec(
564 struct xfs_btree_cur *cur,
565 xfs_inobt_rec_incore_t *rec,
566 int *done,
567 int left)
568{
569 int error;
570 int i;
571
572 if (left)
573 error = xfs_btree_decrement(cur, 0, &i);
574 else
575 error = xfs_btree_increment(cur, 0, &i);
576
577 if (error)
578 return error;
579 *done = !i;
580 if (i) {
581 error = xfs_inobt_get_rec(cur, rec, &i);
582 if (error)
583 return error;
584 XFS_WANT_CORRUPTED_RETURN(i == 1);
585 }
586
587 return 0;
588}
589
590STATIC int
591xfs_ialloc_get_rec(
592 struct xfs_btree_cur *cur,
593 xfs_agino_t agino,
594 xfs_inobt_rec_incore_t *rec,
595 int *done,
596 int left)
597{
598 int error;
599 int i;
600
601 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
602 if (error)
603 return error;
604 *done = !i;
605 if (i) {
606 error = xfs_inobt_get_rec(cur, rec, &i);
607 if (error)
608 return error;
609 XFS_WANT_CORRUPTED_RETURN(i == 1);
610 }
611
612 return 0;
613}
614
615/*
542 * Visible inode allocation functions. 616 * Visible inode allocation functions.
543 */ 617 */
544 618
@@ -592,8 +666,8 @@ xfs_dialloc(
592 int j; /* result code */ 666 int j; /* result code */
593 xfs_mount_t *mp; /* file system mount structure */ 667 xfs_mount_t *mp; /* file system mount structure */
594 int offset; /* index of inode in chunk */ 668 int offset; /* index of inode in chunk */
595 xfs_agino_t pagino; /* parent's a.g. relative inode # */ 669 xfs_agino_t pagino; /* parent's AG relative inode # */
596 xfs_agnumber_t pagno; /* parent's allocation group number */ 670 xfs_agnumber_t pagno; /* parent's AG number */
597 xfs_inobt_rec_incore_t rec; /* inode allocation record */ 671 xfs_inobt_rec_incore_t rec; /* inode allocation record */
598 xfs_agnumber_t tagno; /* testing allocation group number */ 672 xfs_agnumber_t tagno; /* testing allocation group number */
599 xfs_btree_cur_t *tcur; /* temp cursor */ 673 xfs_btree_cur_t *tcur; /* temp cursor */
@@ -716,6 +790,8 @@ nextag:
716 */ 790 */
717 agno = tagno; 791 agno = tagno;
718 *IO_agbp = NULL; 792 *IO_agbp = NULL;
793
794 restart_pagno:
719 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 795 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
720 /* 796 /*
721 * If pagino is 0 (this is the root inode allocation) use newino. 797 * If pagino is 0 (this is the root inode allocation) use newino.
@@ -723,220 +799,199 @@ nextag:
723 */ 799 */
724 if (!pagino) 800 if (!pagino)
725 pagino = be32_to_cpu(agi->agi_newino); 801 pagino = be32_to_cpu(agi->agi_newino);
726#ifdef DEBUG
727 if (cur->bc_nlevels == 1) {
728 int freecount = 0;
729 802
730 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 803 error = xfs_check_agi_freecount(cur, agi);
731 goto error0; 804 if (error)
732 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 805 goto error0;
733 do {
734 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
735 &rec.ir_freecount, &rec.ir_free, &i)))
736 goto error0;
737 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
738 freecount += rec.ir_freecount;
739 if ((error = xfs_btree_increment(cur, 0, &i)))
740 goto error0;
741 } while (i == 1);
742 806
743 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
744 XFS_FORCED_SHUTDOWN(mp));
745 }
746#endif
747 /* 807 /*
748 * If in the same a.g. as the parent, try to get near the parent. 808 * If in the same AG as the parent, try to get near the parent.
749 */ 809 */
750 if (pagno == agno) { 810 if (pagno == agno) {
751 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) 811 xfs_perag_t *pag = &mp->m_perag[agno];
812 int doneleft; /* done, to the left */
813 int doneright; /* done, to the right */
814 int searchdistance = 10;
815
816 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
817 if (error)
818 goto error0;
819 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
820
821 error = xfs_inobt_get_rec(cur, &rec, &j);
822 if (error)
752 goto error0; 823 goto error0;
753 if (i != 0 && 824 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
754 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 825
755 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 826 if (rec.ir_freecount > 0) {
756 j == 1 &&
757 rec.ir_freecount > 0) {
758 /* 827 /*
759 * Found a free inode in the same chunk 828 * Found a free inode in the same chunk
760 * as parent, done. 829 * as the parent, done.
761 */ 830 */
831 goto alloc_inode;
762 } 832 }
833
834
835 /*
836 * In the same AG as parent, but parent's chunk is full.
837 */
838
839 /* duplicate the cursor, search left & right simultaneously */
840 error = xfs_btree_dup_cursor(cur, &tcur);
841 if (error)
842 goto error0;
843
763 /* 844 /*
764 * In the same a.g. as parent, but parent's chunk is full. 845 * Skip to last blocks looked up if same parent inode.
765 */ 846 */
766 else { 847 if (pagino != NULLAGINO &&
767 int doneleft; /* done, to the left */ 848 pag->pagl_pagino == pagino &&
768 int doneright; /* done, to the right */ 849 pag->pagl_leftrec != NULLAGINO &&
850 pag->pagl_rightrec != NULLAGINO) {
851 error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
852 &trec, &doneleft, 1);
853 if (error)
854 goto error1;
769 855
856 error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
857 &rec, &doneright, 0);
770 if (error) 858 if (error)
771 goto error0;
772 ASSERT(i == 1);
773 ASSERT(j == 1);
774 /*
775 * Duplicate the cursor, search left & right
776 * simultaneously.
777 */
778 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
779 goto error0;
780 /*
781 * Search left with tcur, back up 1 record.
782 */
783 if ((error = xfs_btree_decrement(tcur, 0, &i)))
784 goto error1; 859 goto error1;
785 doneleft = !i; 860 } else {
786 if (!doneleft) { 861 /* search left with tcur, back up 1 record */
787 if ((error = xfs_inobt_get_rec(tcur, 862 error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
788 &trec.ir_startino, 863 if (error)
789 &trec.ir_freecount,
790 &trec.ir_free, &i)))
791 goto error1;
792 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
793 }
794 /*
795 * Search right with cur, go forward 1 record.
796 */
797 if ((error = xfs_btree_increment(cur, 0, &i)))
798 goto error1; 864 goto error1;
799 doneright = !i;
800 if (!doneright) {
801 if ((error = xfs_inobt_get_rec(cur,
802 &rec.ir_startino,
803 &rec.ir_freecount,
804 &rec.ir_free, &i)))
805 goto error1;
806 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
807 }
808 /*
809 * Loop until we find the closest inode chunk
810 * with a free one.
811 */
812 while (!doneleft || !doneright) {
813 int useleft; /* using left inode
814 chunk this time */
815 865
866 /* search right with cur, go forward 1 record. */
867 error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
868 if (error)
869 goto error1;
870 }
871
872 /*
873 * Loop until we find an inode chunk with a free inode.
874 */
875 while (!doneleft || !doneright) {
876 int useleft; /* using left inode chunk this time */
877
878 if (!--searchdistance) {
816 /* 879 /*
817 * Figure out which block is closer, 880 * Not in range - save last search
818 * if both are valid. 881 * location and allocate a new inode
819 */
820 if (!doneleft && !doneright)
821 useleft =
822 pagino -
823 (trec.ir_startino +
824 XFS_INODES_PER_CHUNK - 1) <
825 rec.ir_startino - pagino;
826 else
827 useleft = !doneleft;
828 /*
829 * If checking the left, does it have
830 * free inodes?
831 */
832 if (useleft && trec.ir_freecount) {
833 /*
834 * Yes, set it up as the chunk to use.
835 */
836 rec = trec;
837 xfs_btree_del_cursor(cur,
838 XFS_BTREE_NOERROR);
839 cur = tcur;
840 break;
841 }
842 /*
843 * If checking the right, does it have
844 * free inodes?
845 */
846 if (!useleft && rec.ir_freecount) {
847 /*
848 * Yes, it's already set up.
849 */
850 xfs_btree_del_cursor(tcur,
851 XFS_BTREE_NOERROR);
852 break;
853 }
854 /*
855 * If used the left, get another one
856 * further left.
857 */
858 if (useleft) {
859 if ((error = xfs_btree_decrement(tcur, 0,
860 &i)))
861 goto error1;
862 doneleft = !i;
863 if (!doneleft) {
864 if ((error = xfs_inobt_get_rec(
865 tcur,
866 &trec.ir_startino,
867 &trec.ir_freecount,
868 &trec.ir_free, &i)))
869 goto error1;
870 XFS_WANT_CORRUPTED_GOTO(i == 1,
871 error1);
872 }
873 }
874 /*
875 * If used the right, get another one
876 * further right.
877 */ 882 */
878 else { 883 pag->pagl_leftrec = trec.ir_startino;
879 if ((error = xfs_btree_increment(cur, 0, 884 pag->pagl_rightrec = rec.ir_startino;
880 &i))) 885 pag->pagl_pagino = pagino;
881 goto error1; 886 goto newino;
882 doneright = !i; 887 }
883 if (!doneright) { 888
884 if ((error = xfs_inobt_get_rec( 889 /* figure out the closer block if both are valid. */
885 cur, 890 if (!doneleft && !doneright) {
886 &rec.ir_startino, 891 useleft = pagino -
887 &rec.ir_freecount, 892 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
888 &rec.ir_free, &i))) 893 rec.ir_startino - pagino;
889 goto error1; 894 } else {
890 XFS_WANT_CORRUPTED_GOTO(i == 1, 895 useleft = !doneleft;
891 error1);
892 }
893 }
894 } 896 }
895 ASSERT(!doneleft || !doneright); 897
898 /* free inodes to the left? */
899 if (useleft && trec.ir_freecount) {
900 rec = trec;
901 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
902 cur = tcur;
903
904 pag->pagl_leftrec = trec.ir_startino;
905 pag->pagl_rightrec = rec.ir_startino;
906 pag->pagl_pagino = pagino;
907 goto alloc_inode;
908 }
909
910 /* free inodes to the right? */
911 if (!useleft && rec.ir_freecount) {
912 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
913
914 pag->pagl_leftrec = trec.ir_startino;
915 pag->pagl_rightrec = rec.ir_startino;
916 pag->pagl_pagino = pagino;
917 goto alloc_inode;
918 }
919
920 /* get next record to check */
921 if (useleft) {
922 error = xfs_ialloc_next_rec(tcur, &trec,
923 &doneleft, 1);
924 } else {
925 error = xfs_ialloc_next_rec(cur, &rec,
926 &doneright, 0);
927 }
928 if (error)
929 goto error1;
896 } 930 }
931
932 /*
933 * We've reached the end of the btree. because
934 * we are only searching a small chunk of the
935 * btree each search, there is obviously free
936 * inodes closer to the parent inode than we
937 * are now. restart the search again.
938 */
939 pag->pagl_pagino = NULLAGINO;
940 pag->pagl_leftrec = NULLAGINO;
941 pag->pagl_rightrec = NULLAGINO;
942 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
943 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
944 goto restart_pagno;
897 } 945 }
946
898 /* 947 /*
899 * In a different a.g. from the parent. 948 * In a different AG from the parent.
900 * See if the most recently allocated block has any free. 949 * See if the most recently allocated block has any free.
901 */ 950 */
902 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { 951newino:
903 if ((error = xfs_inobt_lookup_eq(cur, 952 if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
904 be32_to_cpu(agi->agi_newino), 0, 0, &i))) 953 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
954 XFS_LOOKUP_EQ, &i);
955 if (error)
905 goto error0; 956 goto error0;
906 if (i == 1 && 957
907 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 958 if (i == 1) {
908 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 959 error = xfs_inobt_get_rec(cur, &rec, &j);
909 j == 1 &&
910 rec.ir_freecount > 0) {
911 /*
912 * The last chunk allocated in the group still has
913 * a free inode.
914 */
915 }
916 /*
917 * None left in the last group, search the whole a.g.
918 */
919 else {
920 if (error) 960 if (error)
921 goto error0; 961 goto error0;
922 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 962
923 goto error0; 963 if (j == 1 && rec.ir_freecount > 0) {
924 ASSERT(i == 1); 964 /*
925 for (;;) { 965 * The last chunk allocated in the group
926 if ((error = xfs_inobt_get_rec(cur, 966 * still has a free inode.
927 &rec.ir_startino, 967 */
928 &rec.ir_freecount, &rec.ir_free, 968 goto alloc_inode;
929 &i)))
930 goto error0;
931 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
932 if (rec.ir_freecount > 0)
933 break;
934 if ((error = xfs_btree_increment(cur, 0, &i)))
935 goto error0;
936 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
937 } 969 }
938 } 970 }
939 } 971 }
972
973 /*
974 * None left in the last group, search the whole AG
975 */
976 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
977 if (error)
978 goto error0;
979 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
980
981 for (;;) {
982 error = xfs_inobt_get_rec(cur, &rec, &i);
983 if (error)
984 goto error0;
985 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
986 if (rec.ir_freecount > 0)
987 break;
988 error = xfs_btree_increment(cur, 0, &i);
989 if (error)
990 goto error0;
991 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
992 }
993
994alloc_inode:
940 offset = xfs_ialloc_find_free(&rec.ir_free); 995 offset = xfs_ialloc_find_free(&rec.ir_free);
941 ASSERT(offset >= 0); 996 ASSERT(offset >= 0);
942 ASSERT(offset < XFS_INODES_PER_CHUNK); 997 ASSERT(offset < XFS_INODES_PER_CHUNK);
@@ -945,33 +1000,19 @@ nextag:
945 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 1000 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
946 rec.ir_free &= ~XFS_INOBT_MASK(offset); 1001 rec.ir_free &= ~XFS_INOBT_MASK(offset);
947 rec.ir_freecount--; 1002 rec.ir_freecount--;
948 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, 1003 error = xfs_inobt_update(cur, &rec);
949 rec.ir_free))) 1004 if (error)
950 goto error0; 1005 goto error0;
951 be32_add_cpu(&agi->agi_freecount, -1); 1006 be32_add_cpu(&agi->agi_freecount, -1);
952 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1007 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
953 down_read(&mp->m_peraglock); 1008 down_read(&mp->m_peraglock);
954 mp->m_perag[tagno].pagi_freecount--; 1009 mp->m_perag[tagno].pagi_freecount--;
955 up_read(&mp->m_peraglock); 1010 up_read(&mp->m_peraglock);
956#ifdef DEBUG
957 if (cur->bc_nlevels == 1) {
958 int freecount = 0;
959 1011
960 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1012 error = xfs_check_agi_freecount(cur, agi);
961 goto error0; 1013 if (error)
962 do { 1014 goto error0;
963 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 1015
964 &rec.ir_freecount, &rec.ir_free, &i)))
965 goto error0;
966 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
967 freecount += rec.ir_freecount;
968 if ((error = xfs_btree_increment(cur, 0, &i)))
969 goto error0;
970 } while (i == 1);
971 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
972 XFS_FORCED_SHUTDOWN(mp));
973 }
974#endif
975 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1016 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
976 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1017 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
977 *inop = ino; 1018 *inop = ino;
@@ -1062,38 +1103,23 @@ xfs_difree(
1062 * Initialize the cursor. 1103 * Initialize the cursor.
1063 */ 1104 */
1064 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1105 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1065#ifdef DEBUG
1066 if (cur->bc_nlevels == 1) {
1067 int freecount = 0;
1068 1106
1069 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1107 error = xfs_check_agi_freecount(cur, agi);
1070 goto error0; 1108 if (error)
1071 do { 1109 goto error0;
1072 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 1110
1073 &rec.ir_freecount, &rec.ir_free, &i)))
1074 goto error0;
1075 if (i) {
1076 freecount += rec.ir_freecount;
1077 if ((error = xfs_btree_increment(cur, 0, &i)))
1078 goto error0;
1079 }
1080 } while (i == 1);
1081 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1082 XFS_FORCED_SHUTDOWN(mp));
1083 }
1084#endif
1085 /* 1111 /*
1086 * Look for the entry describing this inode. 1112 * Look for the entry describing this inode.
1087 */ 1113 */
1088 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1114 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1089 cmn_err(CE_WARN, 1115 cmn_err(CE_WARN,
1090 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", 1116 "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.",
1091 error, mp->m_fsname); 1117 error, mp->m_fsname);
1092 goto error0; 1118 goto error0;
1093 } 1119 }
1094 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1120 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1095 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, 1121 error = xfs_inobt_get_rec(cur, &rec, &i);
1096 &rec.ir_free, &i))) { 1122 if (error) {
1097 cmn_err(CE_WARN, 1123 cmn_err(CE_WARN,
1098 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1124 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
1099 error, mp->m_fsname); 1125 error, mp->m_fsname);
@@ -1148,12 +1174,14 @@ xfs_difree(
1148 } else { 1174 } else {
1149 *delete = 0; 1175 *delete = 0;
1150 1176
1151 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { 1177 error = xfs_inobt_update(cur, &rec);
1178 if (error) {
1152 cmn_err(CE_WARN, 1179 cmn_err(CE_WARN,
1153 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", 1180 "xfs_difree: xfs_inobt_update returned an error %d on %s.",
1154 error, mp->m_fsname); 1181 error, mp->m_fsname);
1155 goto error0; 1182 goto error0;
1156 } 1183 }
1184
1157 /* 1185 /*
1158 * Change the inode free counts and log the ag/sb changes. 1186 * Change the inode free counts and log the ag/sb changes.
1159 */ 1187 */
@@ -1165,28 +1193,10 @@ xfs_difree(
1165 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1193 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1166 } 1194 }
1167 1195
1168#ifdef DEBUG 1196 error = xfs_check_agi_freecount(cur, agi);
1169 if (cur->bc_nlevels == 1) { 1197 if (error)
1170 int freecount = 0; 1198 goto error0;
1171 1199
1172 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1173 goto error0;
1174 do {
1175 if ((error = xfs_inobt_get_rec(cur,
1176 &rec.ir_startino,
1177 &rec.ir_freecount,
1178 &rec.ir_free, &i)))
1179 goto error0;
1180 if (i) {
1181 freecount += rec.ir_freecount;
1182 if ((error = xfs_btree_increment(cur, 0, &i)))
1183 goto error0;
1184 }
1185 } while (i == 1);
1186 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1187 XFS_FORCED_SHUTDOWN(mp));
1188 }
1189#endif
1190 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1200 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1191 return 0; 1201 return 0;
1192 1202
@@ -1297,9 +1307,7 @@ xfs_imap(
1297 chunk_agbno = agbno - offset_agbno; 1307 chunk_agbno = agbno - offset_agbno;
1298 } else { 1308 } else {
1299 xfs_btree_cur_t *cur; /* inode btree cursor */ 1309 xfs_btree_cur_t *cur; /* inode btree cursor */
1300 xfs_agino_t chunk_agino; /* first agino in inode chunk */ 1310 xfs_inobt_rec_incore_t chunk_rec;
1301 __int32_t chunk_cnt; /* count of free inodes in chunk */
1302 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1303 xfs_buf_t *agbp; /* agi buffer */ 1311 xfs_buf_t *agbp; /* agi buffer */
1304 int i; /* temp state */ 1312 int i; /* temp state */
1305 1313
@@ -1315,15 +1323,14 @@ xfs_imap(
1315 } 1323 }
1316 1324
1317 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1325 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1318 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); 1326 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1319 if (error) { 1327 if (error) {
1320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1321 "xfs_inobt_lookup_le() failed"); 1329 "xfs_inobt_lookup() failed");
1322 goto error0; 1330 goto error0;
1323 } 1331 }
1324 1332
1325 error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, 1333 error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
1326 &chunk_free, &i);
1327 if (error) { 1334 if (error) {
1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1335 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1329 "xfs_inobt_get_rec() failed"); 1336 "xfs_inobt_get_rec() failed");
@@ -1341,7 +1348,7 @@ xfs_imap(
1341 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1348 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1342 if (error) 1349 if (error)
1343 return error; 1350 return error;
1344 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); 1351 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
1345 offset_agbno = agbno - chunk_agbno; 1352 offset_agbno = agbno - chunk_agbno;
1346 } 1353 }
1347 1354
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index aeee8278f92c..bb5385475e1f 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -150,23 +150,15 @@ xfs_ialloc_pagi_init(
150 xfs_agnumber_t agno); /* allocation group number */ 150 xfs_agnumber_t agno); /* allocation group number */
151 151
152/* 152/*
153 * Lookup the first record greater than or equal to ino 153 * Lookup a record by ino in the btree given by cur.
154 * in the btree given by cur.
155 */ 154 */
156int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, 155int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
157 __int32_t fcnt, xfs_inofree_t free, int *stat); 156 xfs_lookup_t dir, int *stat);
158
159/*
160 * Lookup the first record less than or equal to ino
161 * in the btree given by cur.
162 */
163int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
164 __int32_t fcnt, xfs_inofree_t free, int *stat);
165 157
166/* 158/*
167 * Get the data from the pointed-to record. 159 * Get the data from the pointed-to record.
168 */ 160 */
169extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, 161extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
170 __int32_t *fcnt, xfs_inofree_t *free, int *stat); 162 xfs_inobt_rec_incore_t *rec, int *stat);
171 163
172#endif /* __XFS_IALLOC_H__ */ 164#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index ecbf8b4d2e2e..80e526489be5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -82,7 +82,6 @@ xfs_inode_alloc(
82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
83 ip->i_flags = 0; 83 ip->i_flags = 0;
84 ip->i_update_core = 0; 84 ip->i_update_core = 0;
85 ip->i_update_size = 0;
86 ip->i_delayed_blks = 0; 85 ip->i_delayed_blks = 0;
87 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 86 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
88 ip->i_size = 0; 87 ip->i_size = 0;
@@ -456,32 +455,6 @@ out_error_or_again:
456 return error; 455 return error;
457} 456}
458 457
459
460/*
461 * Look for the inode corresponding to the given ino in the hash table.
462 * If it is there and its i_transp pointer matches tp, return it.
463 * Otherwise, return NULL.
464 */
465xfs_inode_t *
466xfs_inode_incore(xfs_mount_t *mp,
467 xfs_ino_t ino,
468 xfs_trans_t *tp)
469{
470 xfs_inode_t *ip;
471 xfs_perag_t *pag;
472
473 pag = xfs_get_perag(mp, ino);
474 read_lock(&pag->pag_ici_lock);
475 ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
476 read_unlock(&pag->pag_ici_lock);
477 xfs_put_perag(mp, pag);
478
479 /* the returned inode must match the transaction */
480 if (ip && (ip->i_transp != tp))
481 return NULL;
482 return ip;
483}
484
485/* 458/*
486 * Decrement reference count of an inode structure and unlock it. 459 * Decrement reference count of an inode structure and unlock it.
487 * 460 *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index da428b3fe0f5..c1dc7ef5a1d8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -651,7 +651,7 @@ xfs_iformat_btree(
651 return 0; 651 return 0;
652} 652}
653 653
654void 654STATIC void
655xfs_dinode_from_disk( 655xfs_dinode_from_disk(
656 xfs_icdinode_t *to, 656 xfs_icdinode_t *to,
657 xfs_dinode_t *from) 657 xfs_dinode_t *from)
@@ -1247,7 +1247,7 @@ xfs_isize_check(
1247 * In that case the pages will still be in memory, but the inode size 1247 * In that case the pages will still be in memory, but the inode size
1248 * will never have been updated. 1248 * will never have been updated.
1249 */ 1249 */
1250xfs_fsize_t 1250STATIC xfs_fsize_t
1251xfs_file_last_byte( 1251xfs_file_last_byte(
1252 xfs_inode_t *ip) 1252 xfs_inode_t *ip)
1253{ 1253{
@@ -3837,7 +3837,7 @@ xfs_iext_inline_to_direct(
3837/* 3837/*
3838 * Resize an extent indirection array to new_size bytes. 3838 * Resize an extent indirection array to new_size bytes.
3839 */ 3839 */
3840void 3840STATIC void
3841xfs_iext_realloc_indirect( 3841xfs_iext_realloc_indirect(
3842 xfs_ifork_t *ifp, /* inode fork pointer */ 3842 xfs_ifork_t *ifp, /* inode fork pointer */
3843 int new_size) /* new indirection array size */ 3843 int new_size) /* new indirection array size */
@@ -3862,7 +3862,7 @@ xfs_iext_realloc_indirect(
3862/* 3862/*
3863 * Switch from indirection array to linear (direct) extent allocations. 3863 * Switch from indirection array to linear (direct) extent allocations.
3864 */ 3864 */
3865void 3865STATIC void
3866xfs_iext_indirect_to_direct( 3866xfs_iext_indirect_to_direct(
3867 xfs_ifork_t *ifp) /* inode fork pointer */ 3867 xfs_ifork_t *ifp) /* inode fork pointer */
3868{ 3868{
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65f24a3cc992..0b38b9a869ec 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -261,7 +261,6 @@ typedef struct xfs_inode {
261 /* Miscellaneous state. */ 261 /* Miscellaneous state. */
262 unsigned short i_flags; /* see defined flags below */ 262 unsigned short i_flags; /* see defined flags below */
263 unsigned char i_update_core; /* timestamps/size is dirty */ 263 unsigned char i_update_core; /* timestamps/size is dirty */
264 unsigned char i_update_size; /* di_size field is dirty */
265 unsigned int i_delayed_blks; /* count of delay alloc blks */ 264 unsigned int i_delayed_blks; /* count of delay alloc blks */
266 265
267 xfs_icdinode_t i_d; /* most of ondisk inode */ 266 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -468,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
468/* 467/*
469 * xfs_iget.c prototypes. 468 * xfs_iget.c prototypes.
470 */ 469 */
471xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
472 struct xfs_trans *);
473int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 470int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
474 uint, uint, xfs_inode_t **, xfs_daddr_t); 471 uint, uint, xfs_inode_t **, xfs_daddr_t);
475void xfs_iput(xfs_inode_t *, uint); 472void xfs_iput(xfs_inode_t *, uint);
@@ -504,7 +501,6 @@ void xfs_ipin(xfs_inode_t *);
504void xfs_iunpin(xfs_inode_t *); 501void xfs_iunpin(xfs_inode_t *);
505int xfs_iflush(xfs_inode_t *, uint); 502int xfs_iflush(xfs_inode_t *, uint);
506void xfs_ichgtime(xfs_inode_t *, int); 503void xfs_ichgtime(xfs_inode_t *, int);
507xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
508void xfs_lock_inodes(xfs_inode_t **, int, uint); 504void xfs_lock_inodes(xfs_inode_t **, int, uint);
509void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 505void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
510 506
@@ -572,8 +568,6 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
572 struct xfs_buf **, uint); 568 struct xfs_buf **, uint);
573int xfs_iread(struct xfs_mount *, struct xfs_trans *, 569int xfs_iread(struct xfs_mount *, struct xfs_trans *,
574 struct xfs_inode *, xfs_daddr_t, uint); 570 struct xfs_inode *, xfs_daddr_t, uint);
575void xfs_dinode_from_disk(struct xfs_icdinode *,
576 struct xfs_dinode *);
577void xfs_dinode_to_disk(struct xfs_dinode *, 571void xfs_dinode_to_disk(struct xfs_dinode *,
578 struct xfs_icdinode *); 572 struct xfs_icdinode *);
579void xfs_idestroy_fork(struct xfs_inode *, int); 573void xfs_idestroy_fork(struct xfs_inode *, int);
@@ -592,8 +586,6 @@ void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
592void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 586void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
593void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 587void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
594void xfs_iext_realloc_direct(xfs_ifork_t *, int); 588void xfs_iext_realloc_direct(xfs_ifork_t *, int);
595void xfs_iext_realloc_indirect(xfs_ifork_t *, int);
596void xfs_iext_indirect_to_direct(xfs_ifork_t *);
597void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); 589void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
598void xfs_iext_inline_to_direct(xfs_ifork_t *, int); 590void xfs_iext_inline_to_direct(xfs_ifork_t *, int);
599void xfs_iext_destroy(xfs_ifork_t *); 591void xfs_iext_destroy(xfs_ifork_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 977c4aec587e..47d5b663c37e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -263,14 +263,6 @@ xfs_inode_item_format(
263 } 263 }
264 264
265 /* 265 /*
266 * We don't have to worry about re-ordering here because
267 * the update_size field is protected by the inode lock
268 * and we have that held in exclusive mode.
269 */
270 if (ip->i_update_size)
271 ip->i_update_size = 0;
272
273 /*
274 * Make sure to get the latest atime from the Linux inode. 266 * Make sure to get the latest atime from the Linux inode.
275 */ 267 */
276 xfs_synchronize_atime(ip); 268 xfs_synchronize_atime(ip);
@@ -712,8 +704,6 @@ xfs_inode_item_unlock(
712 * Clear out the fields of the inode log item particular 704 * Clear out the fields of the inode log item particular
713 * to the current transaction. 705 * to the current transaction.
714 */ 706 */
715 iip->ili_ilock_recur = 0;
716 iip->ili_iolock_recur = 0;
717 iip->ili_flags = 0; 707 iip->ili_flags = 0;
718 708
719 /* 709 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index a52ac125f055..65bae4c9b8bf 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item {
137 struct xfs_inode *ili_inode; /* inode ptr */ 137 struct xfs_inode *ili_inode; /* inode ptr */
138 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 138 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
139 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ 139 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
140 unsigned short ili_ilock_recur; /* lock recursion count */
141 unsigned short ili_iolock_recur; /* lock recursion count */
142 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
143 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
144 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index 7a28191cb0de..b8e4ee4e89a4 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -72,7 +72,6 @@ struct xfs_mount;
72 72
73#if XFS_BIG_INUMS 73#if XFS_BIG_INUMS
74#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) 74#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
75#define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32))
76#else 75#else
77#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) 76#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL))
78#endif 77#endif
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index aeb2d2221c7d..b68f9107e26c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -39,7 +39,7 @@
39#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_btree.h" 40#include "xfs_btree.h"
41 41
42int 42STATIC int
43xfs_internal_inum( 43xfs_internal_inum(
44 xfs_mount_t *mp, 44 xfs_mount_t *mp,
45 xfs_ino_t ino) 45 xfs_ino_t ino)
@@ -353,9 +353,6 @@ xfs_bulkstat(
353 int end_of_ag; /* set if we've seen the ag end */ 353 int end_of_ag; /* set if we've seen the ag end */
354 int error; /* error code */ 354 int error; /* error code */
355 int fmterror;/* bulkstat formatter result */ 355 int fmterror;/* bulkstat formatter result */
356 __int32_t gcnt; /* current btree rec's count */
357 xfs_inofree_t gfree; /* current btree rec's free mask */
358 xfs_agino_t gino; /* current btree rec's start inode */
359 int i; /* loop index */ 356 int i; /* loop index */
360 int icount; /* count of inodes good in irbuf */ 357 int icount; /* count of inodes good in irbuf */
361 size_t irbsize; /* size of irec buffer in bytes */ 358 size_t irbsize; /* size of irec buffer in bytes */
@@ -442,40 +439,43 @@ xfs_bulkstat(
442 * we need to get the remainder of the chunk we're in. 439 * we need to get the remainder of the chunk we're in.
443 */ 440 */
444 if (agino > 0) { 441 if (agino > 0) {
442 xfs_inobt_rec_incore_t r;
443
445 /* 444 /*
446 * Lookup the inode chunk that this inode lives in. 445 * Lookup the inode chunk that this inode lives in.
447 */ 446 */
448 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); 447 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
448 &tmp);
449 if (!error && /* no I/O error */ 449 if (!error && /* no I/O error */
450 tmp && /* lookup succeeded */ 450 tmp && /* lookup succeeded */
451 /* got the record, should always work */ 451 /* got the record, should always work */
452 !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, 452 !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
453 &gfree, &i)) &&
454 i == 1 && 453 i == 1 &&
455 /* this is the right chunk */ 454 /* this is the right chunk */
456 agino < gino + XFS_INODES_PER_CHUNK && 455 agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
457 /* lastino was not last in chunk */ 456 /* lastino was not last in chunk */
458 (chunkidx = agino - gino + 1) < 457 (chunkidx = agino - r.ir_startino + 1) <
459 XFS_INODES_PER_CHUNK && 458 XFS_INODES_PER_CHUNK &&
460 /* there are some left allocated */ 459 /* there are some left allocated */
461 xfs_inobt_maskn(chunkidx, 460 xfs_inobt_maskn(chunkidx,
462 XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { 461 XFS_INODES_PER_CHUNK - chunkidx) &
462 ~r.ir_free) {
463 /* 463 /*
464 * Grab the chunk record. Mark all the 464 * Grab the chunk record. Mark all the
465 * uninteresting inodes (because they're 465 * uninteresting inodes (because they're
466 * before our start point) free. 466 * before our start point) free.
467 */ 467 */
468 for (i = 0; i < chunkidx; i++) { 468 for (i = 0; i < chunkidx; i++) {
469 if (XFS_INOBT_MASK(i) & ~gfree) 469 if (XFS_INOBT_MASK(i) & ~r.ir_free)
470 gcnt++; 470 r.ir_freecount++;
471 } 471 }
472 gfree |= xfs_inobt_maskn(0, chunkidx); 472 r.ir_free |= xfs_inobt_maskn(0, chunkidx);
473 irbp->ir_startino = gino; 473 irbp->ir_startino = r.ir_startino;
474 irbp->ir_freecount = gcnt; 474 irbp->ir_freecount = r.ir_freecount;
475 irbp->ir_free = gfree; 475 irbp->ir_free = r.ir_free;
476 irbp++; 476 irbp++;
477 agino = gino + XFS_INODES_PER_CHUNK; 477 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
478 icount = XFS_INODES_PER_CHUNK - gcnt; 478 icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
479 } else { 479 } else {
480 /* 480 /*
481 * If any of those tests failed, bump the 481 * If any of those tests failed, bump the
@@ -493,7 +493,7 @@ xfs_bulkstat(
493 /* 493 /*
494 * Start of ag. Lookup the first inode chunk. 494 * Start of ag. Lookup the first inode chunk.
495 */ 495 */
496 error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); 496 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
497 icount = 0; 497 icount = 0;
498 } 498 }
499 /* 499 /*
@@ -501,6 +501,8 @@ xfs_bulkstat(
501 * until we run out of inodes or space in the buffer. 501 * until we run out of inodes or space in the buffer.
502 */ 502 */
503 while (irbp < irbufend && icount < ubcount) { 503 while (irbp < irbufend && icount < ubcount) {
504 xfs_inobt_rec_incore_t r;
505
504 /* 506 /*
505 * Loop as long as we're unable to read the 507 * Loop as long as we're unable to read the
506 * inode btree. 508 * inode btree.
@@ -510,51 +512,55 @@ xfs_bulkstat(
510 if (XFS_AGINO_TO_AGBNO(mp, agino) >= 512 if (XFS_AGINO_TO_AGBNO(mp, agino) >=
511 be32_to_cpu(agi->agi_length)) 513 be32_to_cpu(agi->agi_length))
512 break; 514 break;
513 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, 515 error = xfs_inobt_lookup(cur, agino,
514 &tmp); 516 XFS_LOOKUP_GE, &tmp);
515 cond_resched(); 517 cond_resched();
516 } 518 }
517 /* 519 /*
518 * If ran off the end of the ag either with an error, 520 * If ran off the end of the ag either with an error,
519 * or the normal way, set end and stop collecting. 521 * or the normal way, set end and stop collecting.
520 */ 522 */
521 if (error || 523 if (error) {
522 (error = xfs_inobt_get_rec(cur, &gino, &gcnt,
523 &gfree, &i)) ||
524 i == 0) {
525 end_of_ag = 1; 524 end_of_ag = 1;
526 break; 525 break;
527 } 526 }
527
528 error = xfs_inobt_get_rec(cur, &r, &i);
529 if (error || i == 0) {
530 end_of_ag = 1;
531 break;
532 }
533
528 /* 534 /*
529 * If this chunk has any allocated inodes, save it. 535 * If this chunk has any allocated inodes, save it.
530 * Also start read-ahead now for this chunk. 536 * Also start read-ahead now for this chunk.
531 */ 537 */
532 if (gcnt < XFS_INODES_PER_CHUNK) { 538 if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
533 /* 539 /*
534 * Loop over all clusters in the next chunk. 540 * Loop over all clusters in the next chunk.
535 * Do a readahead if there are any allocated 541 * Do a readahead if there are any allocated
536 * inodes in that cluster. 542 * inodes in that cluster.
537 */ 543 */
538 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), 544 agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
539 chunkidx = 0; 545 for (chunkidx = 0;
540 chunkidx < XFS_INODES_PER_CHUNK; 546 chunkidx < XFS_INODES_PER_CHUNK;
541 chunkidx += nicluster, 547 chunkidx += nicluster,
542 agbno += nbcluster) { 548 agbno += nbcluster) {
543 if (xfs_inobt_maskn(chunkidx, 549 if (xfs_inobt_maskn(chunkidx, nicluster)
544 nicluster) & ~gfree) 550 & ~r.ir_free)
545 xfs_btree_reada_bufs(mp, agno, 551 xfs_btree_reada_bufs(mp, agno,
546 agbno, nbcluster); 552 agbno, nbcluster);
547 } 553 }
548 irbp->ir_startino = gino; 554 irbp->ir_startino = r.ir_startino;
549 irbp->ir_freecount = gcnt; 555 irbp->ir_freecount = r.ir_freecount;
550 irbp->ir_free = gfree; 556 irbp->ir_free = r.ir_free;
551 irbp++; 557 irbp++;
552 icount += XFS_INODES_PER_CHUNK - gcnt; 558 icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
553 } 559 }
554 /* 560 /*
555 * Set agino to after this chunk and bump the cursor. 561 * Set agino to after this chunk and bump the cursor.
556 */ 562 */
557 agino = gino + XFS_INODES_PER_CHUNK; 563 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
558 error = xfs_btree_increment(cur, 0, &tmp); 564 error = xfs_btree_increment(cur, 0, &tmp);
559 cond_resched(); 565 cond_resched();
560 } 566 }
@@ -820,9 +826,7 @@ xfs_inumbers(
820 int bufidx; 826 int bufidx;
821 xfs_btree_cur_t *cur; 827 xfs_btree_cur_t *cur;
822 int error; 828 int error;
823 __int32_t gcnt; 829 xfs_inobt_rec_incore_t r;
824 xfs_inofree_t gfree;
825 xfs_agino_t gino;
826 int i; 830 int i;
827 xfs_ino_t ino; 831 xfs_ino_t ino;
828 int left; 832 int left;
@@ -855,7 +859,8 @@ xfs_inumbers(
855 continue; 859 continue;
856 } 860 }
857 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 861 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
858 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 862 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
863 &tmp);
859 if (error) { 864 if (error) {
860 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 865 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
861 cur = NULL; 866 cur = NULL;
@@ -870,9 +875,8 @@ xfs_inumbers(
870 continue; 875 continue;
871 } 876 }
872 } 877 }
873 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, 878 error = xfs_inobt_get_rec(cur, &r, &i);
874 &i)) || 879 if (error || i == 0) {
875 i == 0) {
876 xfs_buf_relse(agbp); 880 xfs_buf_relse(agbp);
877 agbp = NULL; 881 agbp = NULL;
878 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 882 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -881,10 +885,12 @@ xfs_inumbers(
881 agino = 0; 885 agino = 0;
882 continue; 886 continue;
883 } 887 }
884 agino = gino + XFS_INODES_PER_CHUNK - 1; 888 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
885 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); 889 buffer[bufidx].xi_startino =
886 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; 890 XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
887 buffer[bufidx].xi_allocmask = ~gfree; 891 buffer[bufidx].xi_alloccount =
892 XFS_INODES_PER_CHUNK - r.ir_freecount;
893 buffer[bufidx].xi_allocmask = ~r.ir_free;
888 bufidx++; 894 bufidx++;
889 left--; 895 left--;
890 if (bufidx == bcount) { 896 if (bufidx == bcount) {
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 1fb04e7deb61..20792bf45946 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -99,11 +99,6 @@ xfs_bulkstat_one(
99 void *dibuff, 99 void *dibuff,
100 int *stat); 100 int *stat);
101 101
102int
103xfs_internal_inum(
104 xfs_mount_t *mp,
105 xfs_ino_t ino);
106
107typedef int (*inumbers_fmt_pf)( 102typedef int (*inumbers_fmt_pf)(
108 void __user *ubuffer, /* buffer to write to */ 103 void __user *ubuffer, /* buffer to write to */
109 const xfs_inogrp_t *buffer, /* buffer to read from */ 104 const xfs_inogrp_t *buffer, /* buffer to read from */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index bcad5f4c1fd1..679c7c4926a2 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -451,8 +451,6 @@ extern int xlog_find_tail(xlog_t *log,
451extern int xlog_recover(xlog_t *log); 451extern int xlog_recover(xlog_t *log);
452extern int xlog_recover_finish(xlog_t *log); 452extern int xlog_recover_finish(xlog_t *log);
453extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 453extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
454extern void xlog_recover_process_iunlinks(xlog_t *log);
455
456extern struct xfs_buf *xlog_get_bp(xlog_t *, int); 454extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
457extern void xlog_put_bp(struct xfs_buf *); 455extern void xlog_put_bp(struct xfs_buf *);
458 456
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 47da2fb45377..1099395d7d6c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink(
3263 * freeing of the inode and its removal from the list must be 3263 * freeing of the inode and its removal from the list must be
3264 * atomic. 3264 * atomic.
3265 */ 3265 */
3266void 3266STATIC void
3267xlog_recover_process_iunlinks( 3267xlog_recover_process_iunlinks(
3268 xlog_t *log) 3268 xlog_t *log)
3269{ 3269{
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5c6f092659c1..8b6c9e807efb 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1568 * 1568 *
1569 * The m_sb_lock must be held when this routine is called. 1569 * The m_sb_lock must be held when this routine is called.
1570 */ 1570 */
1571int 1571STATIC int
1572xfs_mod_incore_sb_unlocked( 1572xfs_mod_incore_sb_unlocked(
1573 xfs_mount_t *mp, 1573 xfs_mount_t *mp,
1574 xfs_sb_field_t field, 1574 xfs_sb_field_t field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a5122382afde..a6c023bc0fb2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -414,13 +414,10 @@ typedef struct xfs_mod_sb {
414 414
415extern int xfs_log_sbcount(xfs_mount_t *, uint); 415extern int xfs_log_sbcount(xfs_mount_t *, uint);
416extern int xfs_mountfs(xfs_mount_t *mp); 416extern int xfs_mountfs(xfs_mount_t *mp);
417extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
418 417
419extern void xfs_unmountfs(xfs_mount_t *); 418extern void xfs_unmountfs(xfs_mount_t *);
420extern int xfs_unmountfs_writesb(xfs_mount_t *); 419extern int xfs_unmountfs_writesb(xfs_mount_t *);
421extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 420extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
422extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
423 int64_t, int);
424extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 421extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
425 uint, int); 422 uint, int);
426extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 423extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index afee7eb24323..4b0613d99faa 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -564,35 +564,6 @@ xfs_mru_cache_lookup(
564} 564}
565 565
566/* 566/*
567 * To look up an element using its key, but leave its location in the internal
568 * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this
569 * function returns NULL.
570 *
571 * See the comments above the declaration of the xfs_mru_cache_lookup() function
572 * for important locking information pertaining to this call.
573 */
574void *
575xfs_mru_cache_peek(
576 xfs_mru_cache_t *mru,
577 unsigned long key)
578{
579 xfs_mru_cache_elem_t *elem;
580
581 ASSERT(mru && mru->lists);
582 if (!mru || !mru->lists)
583 return NULL;
584
585 spin_lock(&mru->lock);
586 elem = radix_tree_lookup(&mru->store, key);
587 if (!elem)
588 spin_unlock(&mru->lock);
589 else
590 __release(mru_lock); /* help sparse not be stupid */
591
592 return elem ? elem->value : NULL;
593}
594
595/*
596 * To release the internal data structure spinlock after having performed an 567 * To release the internal data structure spinlock after having performed an
597 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() 568 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
598 * with the data store pointer. 569 * with the data store pointer.
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index dd58ea1bbebe..5d439f34b0c9 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
49void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); 49void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
50void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); 50void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
51void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); 51void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
52void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
53void xfs_mru_cache_done(struct xfs_mru_cache *mru); 52void xfs_mru_cache_done(struct xfs_mru_cache *mru);
54 53
55#endif /* __XFS_MRU_CACHE_H__ */ 54#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index fea68615ed23..3f816ad7ff19 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -88,90 +88,6 @@ xfs_write_clear_setuid(
88} 88}
89 89
90/* 90/*
91 * Handle logging requirements of various synchronous types of write.
92 */
93int
94xfs_write_sync_logforce(
95 xfs_mount_t *mp,
96 xfs_inode_t *ip)
97{
98 int error = 0;
99
100 /*
101 * If we're treating this as O_DSYNC and we have not updated the
102 * size, force the log.
103 */
104 if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
105 !(ip->i_update_size)) {
106 xfs_inode_log_item_t *iip = ip->i_itemp;
107
108 /*
109 * If an allocation transaction occurred
110 * without extending the size, then we have to force
111 * the log up the proper point to ensure that the
112 * allocation is permanent. We can't count on
113 * the fact that buffered writes lock out direct I/O
114 * writes - the direct I/O write could have extended
115 * the size nontransactionally, then finished before
116 * we started. xfs_write_file will think that the file
117 * didn't grow but the update isn't safe unless the
118 * size change is logged.
119 *
120 * Force the log if we've committed a transaction
121 * against the inode or if someone else has and
122 * the commit record hasn't gone to disk (e.g.
123 * the inode is pinned). This guarantees that
124 * all changes affecting the inode are permanent
125 * when we return.
126 */
127 if (iip && iip->ili_last_lsn) {
128 error = _xfs_log_force(mp, iip->ili_last_lsn,
129 XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
130 } else if (xfs_ipincount(ip) > 0) {
131 error = _xfs_log_force(mp, (xfs_lsn_t)0,
132 XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
133 }
134
135 } else {
136 xfs_trans_t *tp;
137
138 /*
139 * O_SYNC or O_DSYNC _with_ a size update are handled
140 * the same way.
141 *
142 * If the write was synchronous then we need to make
143 * sure that the inode modification time is permanent.
144 * We'll have updated the timestamp above, so here
145 * we use a synchronous transaction to log the inode.
146 * It's not fast, but it's necessary.
147 *
148 * If this a dsync write and the size got changed
149 * non-transactionally, then we need to ensure that
150 * the size change gets logged in a synchronous
151 * transaction.
152 */
153 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
154 if ((error = xfs_trans_reserve(tp, 0,
155 XFS_SWRITE_LOG_RES(mp),
156 0, 0, 0))) {
157 /* Transaction reserve failed */
158 xfs_trans_cancel(tp, 0);
159 } else {
160 /* Transaction reserve successful */
161 xfs_ilock(ip, XFS_ILOCK_EXCL);
162 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
163 xfs_trans_ihold(tp, ip);
164 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
165 xfs_trans_set_sync(tp);
166 error = xfs_trans_commit(tp, 0);
167 xfs_iunlock(ip, XFS_ILOCK_EXCL);
168 }
169 }
170
171 return error;
172}
173
174/*
175 * Force a shutdown of the filesystem instantly while keeping 91 * Force a shutdown of the filesystem instantly while keeping
176 * the filesystem consistent. We don't do an unmount here; just shutdown 92 * the filesystem consistent. We don't do an unmount here; just shutdown
177 * the shop, make sure that absolutely nothing persistent happens to 93 * the shop, make sure that absolutely nothing persistent happens to
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f76c003ec55d..f5e4874c37d8 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -68,7 +68,6 @@ xfs_get_extsz_hint(
68 * Prototypes for functions in xfs_rw.c. 68 * Prototypes for functions in xfs_rw.c.
69 */ 69 */
70extern int xfs_write_clear_setuid(struct xfs_inode *ip); 70extern int xfs_write_clear_setuid(struct xfs_inode *ip);
71extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip);
72extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); 71extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
73extern int xfs_bioerror(struct xfs_buf *bp); 72extern int xfs_bioerror(struct xfs_buf *bp);
74extern int xfs_bioerror_relse(struct xfs_buf *bp); 73extern int xfs_bioerror_relse(struct xfs_buf *bp);
@@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
78extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, 77extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
79 xfs_buf_t *bp, xfs_daddr_t blkno); 78 xfs_buf_t *bp, xfs_daddr_t blkno);
80 79
81/*
82 * Prototypes for functions in xfs_vnodeops.c.
83 */
84extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
85 int flags);
86
87#endif /* __XFS_RW_H__ */ 80#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 775249a54f6f..ed47fc77759c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -68,7 +68,7 @@ typedef struct xfs_trans_header {
68#define XFS_TRANS_GROWFS 14 68#define XFS_TRANS_GROWFS 14
69#define XFS_TRANS_STRAT_WRITE 15 69#define XFS_TRANS_STRAT_WRITE 15
70#define XFS_TRANS_DIOSTRAT 16 70#define XFS_TRANS_DIOSTRAT 16
71#define XFS_TRANS_WRITE_SYNC 17 71/* 17 was XFS_TRANS_WRITE_SYNC */
72#define XFS_TRANS_WRITEID 18 72#define XFS_TRANS_WRITEID 18
73#define XFS_TRANS_ADDAFORK 19 73#define XFS_TRANS_ADDAFORK 19
74#define XFS_TRANS_ATTRINVAL 20 74#define XFS_TRANS_ATTRINVAL 20
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee2f8c8b0a6..218829e6a152 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -307,7 +307,7 @@ xfs_trans_read_buf(
307 return (flags & XFS_BUF_TRYLOCK) ? 307 return (flags & XFS_BUF_TRYLOCK) ?
308 EAGAIN : XFS_ERROR(ENOMEM); 308 EAGAIN : XFS_ERROR(ENOMEM);
309 309
310 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 310 if (XFS_BUF_GETERROR(bp) != 0) {
311 xfs_ioerror_alert("xfs_trans_read_buf", mp, 311 xfs_ioerror_alert("xfs_trans_read_buf", mp,
312 bp, blkno); 312 bp, blkno);
313 error = XFS_BUF_GETERROR(bp); 313 error = XFS_BUF_GETERROR(bp);
@@ -315,7 +315,7 @@ xfs_trans_read_buf(
315 return error; 315 return error;
316 } 316 }
317#ifdef DEBUG 317#ifdef DEBUG
318 if (xfs_do_error && (bp != NULL)) { 318 if (xfs_do_error) {
319 if (xfs_error_target == target) { 319 if (xfs_error_target == target) {
320 if (((xfs_req_num++) % xfs_error_mod) == 0) { 320 if (((xfs_req_num++) % xfs_error_mod) == 0) {
321 xfs_buf_relse(bp); 321 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 23d276af2e0c..785ff101da0a 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug(
49 49
50 50
51/* 51/*
52 * Get and lock the inode for the caller if it is not already 52 * Get an inode and join it to the transaction.
53 * locked within the given transaction. If it is already locked
54 * within the transaction, just increment its lock recursion count
55 * and return a pointer to it.
56 *
57 * For an inode to be locked in a transaction, the inode lock, as
58 * opposed to the io lock, must be taken exclusively. This ensures
59 * that the inode can be involved in only 1 transaction at a time.
60 * Lock recursion is handled on the io lock, but only for lock modes
61 * of equal or lesser strength. That is, you can recur on the io lock
62 * held EXCL with a SHARED request but not vice versa. Also, if
63 * the inode is already a part of the transaction then you cannot
64 * go from not holding the io lock to having it EXCL or SHARED.
65 *
66 * Use the inode cache routine xfs_inode_incore() to find the inode
67 * if it is already owned by this transaction.
68 *
69 * If we don't already own the inode, use xfs_iget() to get it.
70 * Since the inode log item structure is embedded in the incore
71 * inode structure and is initialized when the inode is brought
72 * into memory, there is nothing to do with it here.
73 *
74 * If the given transaction pointer is NULL, just call xfs_iget().
75 * This simplifies code which must handle both cases.
76 */ 53 */
77int 54int
78xfs_trans_iget( 55xfs_trans_iget(
@@ -84,62 +61,11 @@ xfs_trans_iget(
84 xfs_inode_t **ipp) 61 xfs_inode_t **ipp)
85{ 62{
86 int error; 63 int error;
87 xfs_inode_t *ip;
88
89 /*
90 * If the transaction pointer is NULL, just call the normal
91 * xfs_iget().
92 */
93 if (tp == NULL)
94 return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0);
95
96 /*
97 * If we find the inode in core with this transaction
98 * pointer in its i_transp field, then we know we already
99 * have it locked. In this case we just increment the lock
100 * recursion count and return the inode to the caller.
101 * Assert that the inode is already locked in the mode requested
102 * by the caller. We cannot do lock promotions yet, so
103 * die if someone gets this wrong.
104 */
105 if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) {
106 /*
107 * Make sure that the inode lock is held EXCL and
108 * that the io lock is never upgraded when the inode
109 * is already a part of the transaction.
110 */
111 ASSERT(ip->i_itemp != NULL);
112 ASSERT(lock_flags & XFS_ILOCK_EXCL);
113 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
114 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
115 xfs_isilocked(ip, XFS_IOLOCK_EXCL));
116 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
117 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
118 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
119 xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
120 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
121 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
122
123 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
124 ip->i_itemp->ili_iolock_recur++;
125 }
126 if (lock_flags & XFS_ILOCK_EXCL) {
127 ip->i_itemp->ili_ilock_recur++;
128 }
129 *ipp = ip;
130 return 0;
131 }
132
133 ASSERT(lock_flags & XFS_ILOCK_EXCL);
134 error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0);
135 if (error) {
136 return error;
137 }
138 ASSERT(ip != NULL);
139 64
140 xfs_trans_ijoin(tp, ip, lock_flags); 65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
141 *ipp = ip; 66 if (!error && tp)
142 return 0; 67 xfs_trans_ijoin(tp, *ipp, lock_flags);
68 return error;
143} 69}
144 70
145/* 71/*
@@ -163,8 +89,6 @@ xfs_trans_ijoin(
163 xfs_inode_item_init(ip, ip->i_mount); 89 xfs_inode_item_init(ip, ip->i_mount);
164 iip = ip->i_itemp; 90 iip = ip->i_itemp;
165 ASSERT(iip->ili_flags == 0); 91 ASSERT(iip->ili_flags == 0);
166 ASSERT(iip->ili_ilock_recur == 0);
167 ASSERT(iip->ili_iolock_recur == 0);
168 92
169 /* 93 /*
170 * Get a log_item_desc to point at the new item. 94 * Get a log_item_desc to point at the new item.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 492d75bae2bf..a434f287962d 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -611,7 +611,7 @@ xfs_fsync(
611 xfs_inode_t *ip) 611 xfs_inode_t *ip)
612{ 612{
613 xfs_trans_t *tp; 613 xfs_trans_t *tp;
614 int error; 614 int error = 0;
615 int log_flushed = 0, changed = 1; 615 int log_flushed = 0, changed = 1;
616 616
617 xfs_itrace_entry(ip); 617 xfs_itrace_entry(ip);
@@ -619,14 +619,9 @@ xfs_fsync(
619 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 619 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
620 return XFS_ERROR(EIO); 620 return XFS_ERROR(EIO);
621 621
622 /* capture size updates in I/O completion before writing the inode. */
623 error = xfs_wait_on_pages(ip, 0, -1);
624 if (error)
625 return XFS_ERROR(error);
626
627 /* 622 /*
628 * We always need to make sure that the required inode state is safe on 623 * We always need to make sure that the required inode state is safe on
629 * disk. The vnode might be clean but we still might need to force the 624 * disk. The inode might be clean but we still might need to force the
630 * log because of committed transactions that haven't hit the disk yet. 625 * log because of committed transactions that haven't hit the disk yet.
631 * Likewise, there could be unflushed non-transactional changes to the 626 * Likewise, there could be unflushed non-transactional changes to the
632 * inode core that have to go to disk and this requires us to issue 627 * inode core that have to go to disk and this requires us to issue
@@ -638,7 +633,7 @@ xfs_fsync(
638 */ 633 */
639 xfs_ilock(ip, XFS_ILOCK_SHARED); 634 xfs_ilock(ip, XFS_ILOCK_SHARED);
640 635
641 if (!(ip->i_update_size || ip->i_update_core)) { 636 if (!ip->i_update_core) {
642 /* 637 /*
643 * Timestamps/size haven't changed since last inode flush or 638 * Timestamps/size haven't changed since last inode flush or
644 * inode transaction commit. That means either nothing got 639 * inode transaction commit. That means either nothing got
@@ -718,7 +713,7 @@ xfs_fsync(
718 * when the link count isn't zero and by xfs_dm_punch_hole() when 713 * when the link count isn't zero and by xfs_dm_punch_hole() when
719 * punching a hole to EOF. 714 * punching a hole to EOF.
720 */ 715 */
721int 716STATIC int
722xfs_free_eofblocks( 717xfs_free_eofblocks(
723 xfs_mount_t *mp, 718 xfs_mount_t *mp,
724 xfs_inode_t *ip, 719 xfs_inode_t *ip,
@@ -1476,8 +1471,8 @@ xfs_create(
1476 if (error == ENOSPC) { 1471 if (error == ENOSPC) {
1477 /* flush outstanding delalloc blocks and retry */ 1472 /* flush outstanding delalloc blocks and retry */
1478 xfs_flush_inodes(dp); 1473 xfs_flush_inodes(dp);
1479 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1474 error = xfs_trans_reserve(tp, resblks, log_res, 0,
1480 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1475 XFS_TRANS_PERM_LOG_RES, log_count);
1481 } 1476 }
1482 if (error == ENOSPC) { 1477 if (error == ENOSPC) {
1483 /* No space at all so try a "no-allocation" reservation */ 1478 /* No space at all so try a "no-allocation" reservation */