aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/aio.c16
-rw-r--r--fs/attr.c26
-rw-r--r--fs/autofs/waitq.c2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/cifs/inode.c5
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/cramfs/inode.c9
-rw-r--r--fs/ecryptfs/inode.c8
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/ext3/fsync.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/resize.c6
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/ext4/balloc.c112
-rw-r--r--fs/ext4/dir.c7
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/group.h27
-rw-r--r--fs/ext4/ialloc.c151
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/namei.c20
-rw-r--r--fs/ext4/resize.c59
-rw-r--r--fs/ext4/super.c97
-rw-r--r--fs/ext4/xattr.c7
-rw-r--r--fs/fuse/dir.c241
-rw-r--r--fs/fuse/file.c111
-rw-r--r--fs/fuse/fuse_i.h32
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/journal.c108
-rw-r--r--fs/jbd/transaction.c24
-rw-r--r--fs/jbd2/commit.c16
-rw-r--r--fs/jbd2/journal.c128
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/revoke.c4
-rw-r--r--fs/jbd2/transaction.c19
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nls/nls_base.c2
-rw-r--r--fs/reiserfs/inode.c6
42 files changed, 864 insertions, 508 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index d8062745716a..e31f3691b151 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,7 @@ config EXT4DEV_FS
140 tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" 140 tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
141 depends on EXPERIMENTAL 141 depends on EXPERIMENTAL
142 select JBD2 142 select JBD2
143 select CRC16
143 help 144 help
144 Ext4dev is a predecessor filesystem of the next generation 145 Ext4dev is a predecessor filesystem of the next generation
145 extended fs ext4, based on ext3 filesystem code. It will be 146 extended fs ext4, based on ext3 filesystem code. It will be
diff --git a/fs/aio.c b/fs/aio.c
index d02f43b50a3d..f12db415c0f6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -710,18 +710,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
710 710
711 /* 711 /*
712 * Now we are all set to call the retry method in async 712 * Now we are all set to call the retry method in async
713 * context. By setting this thread's io_wait context 713 * context.
714 * to point to the wait queue entry inside the currently
715 * running iocb for the duration of the retry, we ensure
716 * that async notification wakeups are queued by the
717 * operation instead of blocking waits, and when notified,
718 * cause the iocb to be kicked for continuation (through
719 * the aio_wake_function callback).
720 */ 714 */
721 BUG_ON(current->io_wait != NULL);
722 current->io_wait = &iocb->ki_wait;
723 ret = retry(iocb); 715 ret = retry(iocb);
724 current->io_wait = NULL;
725 716
726 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { 717 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
727 BUG_ON(!list_empty(&iocb->ki_wait.task_list)); 718 BUG_ON(!list_empty(&iocb->ki_wait.task_list));
@@ -1508,10 +1499,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
1508 * Simply triggers a retry of the operation via kick_iocb. 1499 * Simply triggers a retry of the operation via kick_iocb.
1509 * 1500 *
1510 * This callback is specified in the wait queue entry in 1501 * This callback is specified in the wait queue entry in
1511 * a kiocb (current->io_wait points to this wait queue 1502 * a kiocb.
1512 * entry when an aio operation executes; it is used
1513 * instead of a synchronous wait when an i/o blocking
1514 * condition is encountered during aio).
1515 * 1503 *
1516 * Note: 1504 * Note:
1517 * This routine is executed with the wait queue lock held. 1505 * This routine is executed with the wait queue lock held.
diff --git a/fs/attr.c b/fs/attr.c
index ae58bd3f875f..966b73e25f82 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -103,12 +103,11 @@ EXPORT_SYMBOL(inode_setattr);
103int notify_change(struct dentry * dentry, struct iattr * attr) 103int notify_change(struct dentry * dentry, struct iattr * attr)
104{ 104{
105 struct inode *inode = dentry->d_inode; 105 struct inode *inode = dentry->d_inode;
106 mode_t mode; 106 mode_t mode = inode->i_mode;
107 int error; 107 int error;
108 struct timespec now; 108 struct timespec now;
109 unsigned int ia_valid = attr->ia_valid; 109 unsigned int ia_valid = attr->ia_valid;
110 110
111 mode = inode->i_mode;
112 now = current_fs_time(inode->i_sb); 111 now = current_fs_time(inode->i_sb);
113 112
114 attr->ia_ctime = now; 113 attr->ia_ctime = now;
@@ -125,18 +124,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
125 if (error) 124 if (error)
126 return error; 125 return error;
127 } 126 }
127
128 /*
129 * We now pass ATTR_KILL_S*ID to the lower level setattr function so
130 * that the function has the ability to reinterpret a mode change
131 * that's due to these bits. This adds an implicit restriction that
132 * no function will ever call notify_change with both ATTR_MODE and
133 * ATTR_KILL_S*ID set.
134 */
135 if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) &&
136 (ia_valid & ATTR_MODE))
137 BUG();
138
128 if (ia_valid & ATTR_KILL_SUID) { 139 if (ia_valid & ATTR_KILL_SUID) {
129 attr->ia_valid &= ~ATTR_KILL_SUID;
130 if (mode & S_ISUID) { 140 if (mode & S_ISUID) {
131 if (!(ia_valid & ATTR_MODE)) { 141 ia_valid = attr->ia_valid |= ATTR_MODE;
132 ia_valid = attr->ia_valid |= ATTR_MODE; 142 attr->ia_mode = (inode->i_mode & ~S_ISUID);
133 attr->ia_mode = inode->i_mode;
134 }
135 attr->ia_mode &= ~S_ISUID;
136 } 143 }
137 } 144 }
138 if (ia_valid & ATTR_KILL_SGID) { 145 if (ia_valid & ATTR_KILL_SGID) {
139 attr->ia_valid &= ~ ATTR_KILL_SGID;
140 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 146 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
141 if (!(ia_valid & ATTR_MODE)) { 147 if (!(ia_valid & ATTR_MODE)) {
142 ia_valid = attr->ia_valid |= ATTR_MODE; 148 ia_valid = attr->ia_valid |= ATTR_MODE;
@@ -145,7 +151,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
145 attr->ia_mode &= ~S_ISGID; 151 attr->ia_mode &= ~S_ISGID;
146 } 152 }
147 } 153 }
148 if (!attr->ia_valid) 154 if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
149 return 0; 155 return 0;
150 156
151 if (ia_valid & ATTR_SIZE) 157 if (ia_valid & ATTR_SIZE)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 19a9cafb5ddf..be46805972f0 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -182,7 +182,7 @@ int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_toke
182{ 182{
183 struct autofs_wait_queue *wq, **wql; 183 struct autofs_wait_queue *wq, **wql;
184 184
185 for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) { 185 for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
186 if ( wq->wait_queue_token == wait_queue_token ) 186 if ( wq->wait_queue_token == wait_queue_token )
187 break; 187 break;
188 } 188 }
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0d041a9cb348..1fe28e4754c2 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -376,7 +376,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
376 struct autofs_wait_queue *wq, **wql; 376 struct autofs_wait_queue *wq, **wql;
377 377
378 mutex_lock(&sbi->wq_mutex); 378 mutex_lock(&sbi->wq_mutex);
379 for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) { 379 for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
380 if (wq->wait_queue_token == wait_queue_token) 380 if (wq->wait_queue_token == wait_queue_token)
381 break; 381 break;
382 } 382 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index dd4167762a8e..279f3c5e0ce3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1538,6 +1538,11 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1538 } 1538 }
1539 1539
1540 time_buf.Attributes = 0; 1540 time_buf.Attributes = 0;
1541
1542 /* skip mode change if it's just for clearing setuid/setgid */
1543 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
1544 attrs->ia_valid &= ~ATTR_MODE;
1545
1541 if (attrs->ia_valid & ATTR_MODE) { 1546 if (attrs->ia_valid & ATTR_MODE) {
1542 cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); 1547 cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode));
1543 mode = attrs->ia_mode; 1548 mode = attrs->ia_mode;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6dacd39bf048..a4284ccac1f9 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -3001,7 +3001,7 @@ static int __init init_sys32_ioctl(void)
3001 int i; 3001 int i;
3002 3002
3003 for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) { 3003 for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
3004 if (ioctl_start[i].next != 0) { 3004 if (ioctl_start[i].next) {
3005 printk("ioctl translation %d bad\n",i); 3005 printk("ioctl translation %d bad\n",i);
3006 return -1; 3006 return -1;
3007 } 3007 }
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 5c817bd08389..350680fd7da7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -148,7 +148,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
148{ 148{
149 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; 149 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
150 struct page *pages[BLKS_PER_BUF]; 150 struct page *pages[BLKS_PER_BUF];
151 unsigned i, blocknr, buffer, unread; 151 unsigned i, blocknr, buffer;
152 unsigned long devsize; 152 unsigned long devsize;
153 char *data; 153 char *data;
154 154
@@ -175,7 +175,6 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
175 devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT; 175 devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT;
176 176
177 /* Ok, read in BLKS_PER_BUF pages completely first. */ 177 /* Ok, read in BLKS_PER_BUF pages completely first. */
178 unread = 0;
179 for (i = 0; i < BLKS_PER_BUF; i++) { 178 for (i = 0; i < BLKS_PER_BUF; i++) {
180 struct page *page = NULL; 179 struct page *page = NULL;
181 180
@@ -362,7 +361,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
362 if (offset & 3) 361 if (offset & 3)
363 return -EINVAL; 362 return -EINVAL;
364 363
365 buf = kmalloc(256, GFP_KERNEL); 364 buf = kmalloc(CRAMFS_MAXPATHLEN, GFP_KERNEL);
366 if (!buf) 365 if (!buf)
367 return -ENOMEM; 366 return -ENOMEM;
368 367
@@ -376,7 +375,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
376 int namelen, error; 375 int namelen, error;
377 376
378 mutex_lock(&read_mutex); 377 mutex_lock(&read_mutex);
379 de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+256); 378 de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
380 name = (char *)(de+1); 379 name = (char *)(de+1);
381 380
382 /* 381 /*
@@ -426,7 +425,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
426 char *name; 425 char *name;
427 int namelen, retval; 426 int namelen, retval;
428 427
429 de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+256); 428 de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
430 name = (char *)(de+1); 429 name = (char *)(de+1);
431 430
432 /* Try to take advantage of sorted directories */ 431 /* Try to take advantage of sorted directories */
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5701f816faf4..0b1ab016fa2e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -914,6 +914,14 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
914 if (rc < 0) 914 if (rc < 0)
915 goto out; 915 goto out;
916 } 916 }
917
918 /*
919 * mode change is for clearing setuid/setgid bits. Allow lower fs
920 * to interpret this in its own way.
921 */
922 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
923 ia->ia_valid &= ~ATTR_MODE;
924
917 rc = notify_change(lower_dentry, ia); 925 rc = notify_change(lower_dentry, ia);
918out: 926out:
919 fsstack_copy_attr_all(inode, lower_inode, NULL); 927 fsstack_copy_attr_all(inode, lower_inode, NULL);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 77b9953624f4..de6189291954 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -463,7 +463,7 @@ static void ep_free(struct eventpoll *ep)
463 * holding "epmutex" we can be sure that no file cleanup code will hit 463 * holding "epmutex" we can be sure that no file cleanup code will hit
464 * us during this operation. So we can avoid the lock on "ep->lock". 464 * us during this operation. So we can avoid the lock on "ep->lock".
465 */ 465 */
466 while ((rbp = rb_first(&ep->rbr)) != 0) { 466 while ((rbp = rb_first(&ep->rbr)) != NULL) {
467 epi = rb_entry(rbp, struct epitem, rbn); 467 epi = rb_entry(rbp, struct epitem, rbn);
468 ep_remove(ep, epi); 468 ep_remove(ep, epi);
469 } 469 }
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index dd1fd3c0fc05..a588e23841d4 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -47,7 +47,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
47 struct inode *inode = dentry->d_inode; 47 struct inode *inode = dentry->d_inode;
48 int ret = 0; 48 int ret = 0;
49 49
50 J_ASSERT(ext3_journal_current_handle() == 0); 50 J_ASSERT(ext3_journal_current_handle() == NULL);
51 51
52 /* 52 /*
53 * data=writeback: 53 * data=writeback:
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2f2b6864db10..3dec003b773e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1028,7 +1028,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
1028 } 1028 }
1029 if (buffer_new(&dummy)) { 1029 if (buffer_new(&dummy)) {
1030 J_ASSERT(create != 0); 1030 J_ASSERT(create != 0);
1031 J_ASSERT(handle != 0); 1031 J_ASSERT(handle != NULL);
1032 1032
1033 /* 1033 /*
1034 * Now that we do not always journal data, we should 1034 * Now that we do not always journal data, we should
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 771f7ada15d9..44de1453c301 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -245,10 +245,10 @@ static int setup_new_group_blocks(struct super_block *sb,
245 brelse(gdb); 245 brelse(gdb);
246 goto exit_bh; 246 goto exit_bh;
247 } 247 }
248 lock_buffer(bh); 248 lock_buffer(gdb);
249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); 249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
250 set_buffer_uptodate(gdb); 250 set_buffer_uptodate(gdb);
251 unlock_buffer(bh); 251 unlock_buffer(gdb);
252 ext3_journal_dirty_metadata(handle, gdb); 252 ext3_journal_dirty_metadata(handle, gdb);
253 ext3_set_bit(bit, bh->b_data); 253 ext3_set_bit(bit, bh->b_data);
254 brelse(gdb); 254 brelse(gdb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 141573de7a9a..81868c0bc40e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1620,7 +1620,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1620 } 1620 }
1621 1621
1622 brelse (bh); 1622 brelse (bh);
1623 sb_set_blocksize(sb, blocksize); 1623 if (!sb_set_blocksize(sb, blocksize)) {
1624 printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
1625 blocksize);
1626 goto out_fail;
1627 }
1624 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1628 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1625 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1629 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1626 bh = sb_bread(sb, logic_sb_block); 1630 bh = sb_bread(sb, logic_sb_block);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b74bf4368441..e906b65448e2 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22 22
23#include "group.h"
23/* 24/*
24 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
25 */ 26 */
@@ -42,6 +43,94 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
42 43
43} 44}
44 45
46/* Initializes an uninitialized block bitmap if given, and returns the
47 * number of blocks free in the group. */
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
49 int block_group, struct ext4_group_desc *gdp)
50{
51 unsigned long start;
52 int bit, bit_max;
53 unsigned free_blocks, group_blocks;
54 struct ext4_sb_info *sbi = EXT4_SB(sb);
55
56 if (bh) {
57 J_ASSERT_BH(bh, buffer_locked(bh));
58
59 /* If checksum is bad mark all blocks used to prevent allocation
60 * essentially implementing a per-group read-only flag. */
61 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
62 ext4_error(sb, __FUNCTION__,
63 "Checksum bad for group %u\n", block_group);
64 gdp->bg_free_blocks_count = 0;
65 gdp->bg_free_inodes_count = 0;
66 gdp->bg_itable_unused = 0;
67 memset(bh->b_data, 0xff, sb->s_blocksize);
68 return 0;
69 }
70 memset(bh->b_data, 0, sb->s_blocksize);
71 }
72
73 /* Check for superblock and gdt backups in this group */
74 bit_max = ext4_bg_has_super(sb, block_group);
75
76 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
77 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
78 sbi->s_desc_per_block) {
79 if (bit_max) {
80 bit_max += ext4_bg_num_gdb(sb, block_group);
81 bit_max +=
82 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
83 }
84 } else { /* For META_BG_BLOCK_GROUPS */
85 int group_rel = (block_group -
86 le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
87 EXT4_DESC_PER_BLOCK(sb);
88 if (group_rel == 0 || group_rel == 1 ||
89 (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
90 bit_max += 1;
91 }
92
93 if (block_group == sbi->s_groups_count - 1) {
94 /*
95 * Even though mke2fs always initialize first and last group
96 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
97 * to make sure we calculate the right free blocks
98 */
99 group_blocks = ext4_blocks_count(sbi->s_es) -
100 le32_to_cpu(sbi->s_es->s_first_data_block) -
101 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
102 } else {
103 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
104 }
105
106 free_blocks = group_blocks - bit_max;
107
108 if (bh) {
109 for (bit = 0; bit < bit_max; bit++)
110 ext4_set_bit(bit, bh->b_data);
111
112 start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
113 le32_to_cpu(sbi->s_es->s_first_data_block);
114
115 /* Set bits for block and inode bitmaps, and inode table */
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
117 ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
118 for (bit = (ext4_inode_table(sb, gdp) - start),
119 bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
120 ext4_set_bit(bit, bh->b_data);
121
122 /*
123 * Also if the number of blocks within the group is
124 * less than the blocksize * 8 ( which is the size
125 * of bitmap ), set rest of the block bitmap to 1
126 */
127 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
128 }
129
130 return free_blocks - sbi->s_itb_per_group - 2;
131}
132
133
45/* 134/*
46 * The free blocks are managed by bitmaps. A file system contains several 135 * The free blocks are managed by bitmaps. A file system contains several
47 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 136 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -119,7 +208,7 @@ block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
119 * 208 *
120 * Return buffer_head on success or NULL in case of failure. 209 * Return buffer_head on success or NULL in case of failure.
121 */ 210 */
122static struct buffer_head * 211struct buffer_head *
123read_block_bitmap(struct super_block *sb, unsigned int block_group) 212read_block_bitmap(struct super_block *sb, unsigned int block_group)
124{ 213{
125 int i; 214 int i;
@@ -127,11 +216,24 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
127 struct buffer_head * bh = NULL; 216 struct buffer_head * bh = NULL;
128 ext4_fsblk_t bitmap_blk; 217 ext4_fsblk_t bitmap_blk;
129 218
130 desc = ext4_get_group_desc (sb, block_group, NULL); 219 desc = ext4_get_group_desc(sb, block_group, NULL);
131 if (!desc) 220 if (!desc)
132 return NULL; 221 return NULL;
133 bitmap_blk = ext4_block_bitmap(sb, desc); 222 bitmap_blk = ext4_block_bitmap(sb, desc);
134 bh = sb_bread(sb, bitmap_blk); 223 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
224 bh = sb_getblk(sb, bitmap_blk);
225 if (!buffer_uptodate(bh)) {
226 lock_buffer(bh);
227 if (!buffer_uptodate(bh)) {
228 ext4_init_block_bitmap(sb, bh, block_group,
229 desc);
230 set_buffer_uptodate(bh);
231 }
232 unlock_buffer(bh);
233 }
234 } else {
235 bh = sb_bread(sb, bitmap_blk);
236 }
135 if (!bh) 237 if (!bh)
136 ext4_error (sb, __FUNCTION__, 238 ext4_error (sb, __FUNCTION__,
137 "Cannot read block bitmap - " 239 "Cannot read block bitmap - "
@@ -627,6 +729,7 @@ do_more:
627 desc->bg_free_blocks_count = 729 desc->bg_free_blocks_count =
628 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + 730 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
629 group_freed); 731 group_freed);
732 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
630 spin_unlock(sb_bgl_lock(sbi, block_group)); 733 spin_unlock(sb_bgl_lock(sbi, block_group));
631 percpu_counter_add(&sbi->s_freeblocks_counter, count); 734 percpu_counter_add(&sbi->s_freeblocks_counter, count);
632 735
@@ -1685,8 +1788,11 @@ allocated:
1685 ret_block, goal_hits, goal_attempts); 1788 ret_block, goal_hits, goal_attempts);
1686 1789
1687 spin_lock(sb_bgl_lock(sbi, group_no)); 1790 spin_lock(sb_bgl_lock(sbi, group_no));
1791 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1792 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1688 gdp->bg_free_blocks_count = 1793 gdp->bg_free_blocks_count =
1689 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); 1794 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1795 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1690 spin_unlock(sb_bgl_lock(sbi, group_no)); 1796 spin_unlock(sb_bgl_lock(sbi, group_no));
1691 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1797 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1692 1798
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0fb1e62b20d0..f612bef98315 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = {
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
49 .fsync = ext4_sync_file, /* BKL held */ 49 .fsync = ext4_sync_file, /* BKL held */
50#ifdef CONFIG_EXT4_INDEX
51 .release = ext4_release_dir, 50 .release = ext4_release_dir,
52#endif
53}; 51};
54 52
55 53
@@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp,
107 105
108 sb = inode->i_sb; 106 sb = inode->i_sb;
109 107
110#ifdef CONFIG_EXT4_INDEX
111 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, 108 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
112 EXT4_FEATURE_COMPAT_DIR_INDEX) && 109 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
113 ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || 110 ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
@@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp,
123 */ 120 */
124 EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; 121 EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
125 } 122 }
126#endif
127 stored = 0; 123 stored = 0;
128 offset = filp->f_pos & (sb->s_blocksize - 1); 124 offset = filp->f_pos & (sb->s_blocksize - 1);
129 125
@@ -232,7 +228,6 @@ out:
232 return ret; 228 return ret;
233} 229}
234 230
235#ifdef CONFIG_EXT4_INDEX
236/* 231/*
237 * These functions convert from the major/minor hash to an f_pos 232 * These functions convert from the major/minor hash to an f_pos
238 * value. 233 * value.
@@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp)
518 513
519 return 0; 514 return 0;
520} 515}
521
522#endif
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 78beb096f57d..85287742f2ae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -33,7 +33,7 @@
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h> 35#include <linux/ext4_jbd2.h>
36#include <linux/jbd.h> 36#include <linux/jbd2.h>
37#include <linux/highuid.h> 37#include <linux/highuid.h>
38#include <linux/pagemap.h> 38#include <linux/pagemap.h>
39#include <linux/quotaops.h> 39#include <linux/quotaops.h>
@@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
52{ 52{
53 ext4_fsblk_t block; 53 ext4_fsblk_t block;
54 54
55 block = le32_to_cpu(ex->ee_start); 55 block = le32_to_cpu(ex->ee_start_lo);
56 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; 56 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
57 return block; 57 return block;
58} 58}
@@ -65,7 +65,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
65{ 65{
66 ext4_fsblk_t block; 66 ext4_fsblk_t block;
67 67
68 block = le32_to_cpu(ix->ei_leaf); 68 block = le32_to_cpu(ix->ei_leaf_lo);
69 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; 69 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
70 return block; 70 return block;
71} 71}
@@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
77 */ 77 */
78static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) 78static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
79{ 79{
80 ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); 80 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
81 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); 81 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
82} 82}
83 83
@@ -88,7 +88,7 @@ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
88 */ 88 */
89static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) 89static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
90{ 90{
91 ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); 91 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
92 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); 92 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
93} 93}
94 94
@@ -1409,8 +1409,7 @@ has_space:
1409 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1409 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
1410 nearex = path[depth].p_ext; 1410 nearex = path[depth].p_ext;
1411 nearex->ee_block = newext->ee_block; 1411 nearex->ee_block = newext->ee_block;
1412 nearex->ee_start = newext->ee_start; 1412 ext4_ext_store_pblock(nearex, ext_pblock(newext));
1413 nearex->ee_start_hi = newext->ee_start_hi;
1414 nearex->ee_len = newext->ee_len; 1413 nearex->ee_len = newext->ee_len;
1415 1414
1416merge: 1415merge:
@@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
2177 } 2176 }
2178 /* ex2: iblock to iblock + maxblocks-1 : initialised */ 2177 /* ex2: iblock to iblock + maxblocks-1 : initialised */
2179 ex2->ee_block = cpu_to_le32(iblock); 2178 ex2->ee_block = cpu_to_le32(iblock);
2180 ex2->ee_start = cpu_to_le32(newblock);
2181 ext4_ext_store_pblock(ex2, newblock); 2179 ext4_ext_store_pblock(ex2, newblock);
2182 ex2->ee_len = cpu_to_le16(allocated); 2180 ex2->ee_len = cpu_to_le16(allocated);
2183 if (ex2 != ex) 2181 if (ex2 != ex)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a167d7131fa..8d50879d1c2c 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
47 struct inode *inode = dentry->d_inode; 47 struct inode *inode = dentry->d_inode;
48 int ret = 0; 48 int ret = 0;
49 49
50 J_ASSERT(ext4_journal_current_handle() == 0); 50 J_ASSERT(ext4_journal_current_handle() == NULL);
51 51
52 /* 52 /*
53 * data=writeback: 53 * data=writeback:
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
new file mode 100644
index 000000000000..1577910bb58b
--- /dev/null
+++ b/fs/ext4/group.h
@@ -0,0 +1,27 @@
1/*
2 * linux/fs/ext4/group.h
3 *
4 * Copyright (C) 2007 Cluster File Systems, Inc
5 *
6 * Author: Andreas Dilger <adilger@clusterfs.com>
7 */
8
9#ifndef _LINUX_EXT4_GROUP_H
10#define _LINUX_EXT4_GROUP_H
11
12extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
13 struct ext4_group_desc *gdp);
14extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
15 struct ext4_group_desc *gdp);
16struct buffer_head *read_block_bitmap(struct super_block *sb,
17 unsigned int block_group);
18extern unsigned ext4_init_block_bitmap(struct super_block *sb,
19 struct buffer_head *bh, int group,
20 struct ext4_group_desc *desc);
21#define ext4_free_blocks_after_init(sb, group, desc) \
22 ext4_init_block_bitmap(sb, NULL, group, desc)
23extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
24 struct buffer_head *bh, int group,
25 struct ext4_group_desc *desc);
26extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
27#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index d0c7793d9393..c61f37fd3f05 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -28,6 +28,7 @@
28 28
29#include "xattr.h" 29#include "xattr.h"
30#include "acl.h" 30#include "acl.h"
31#include "group.h"
31 32
32/* 33/*
33 * ialloc.c contains the inodes allocation and deallocation routines 34 * ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
43 * the free blocks count in the block. 44 * the free blocks count in the block.
44 */ 45 */
45 46
47/*
48 * To avoid calling the atomic setbit hundreds or thousands of times, we only
49 * need to use it within a single byte (to ensure we get endianness right).
50 * We can use memset for the rest of the bitmap as there are no other users.
51 */
52void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
53{
54 int i;
55
56 if (start_bit >= end_bit)
57 return;
58
59 ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
60 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
61 ext4_set_bit(i, bitmap);
62 if (i < end_bit)
63 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
64}
65
66/* Initializes an uninitialized inode bitmap */
67unsigned ext4_init_inode_bitmap(struct super_block *sb,
68 struct buffer_head *bh, int block_group,
69 struct ext4_group_desc *gdp)
70{
71 struct ext4_sb_info *sbi = EXT4_SB(sb);
72
73 J_ASSERT_BH(bh, buffer_locked(bh));
74
75 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
78 ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
79 block_group);
80 gdp->bg_free_blocks_count = 0;
81 gdp->bg_free_inodes_count = 0;
82 gdp->bg_itable_unused = 0;
83 memset(bh->b_data, 0xff, sb->s_blocksize);
84 return 0;
85 }
86
87 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
88 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
89 bh->b_data);
90
91 return EXT4_INODES_PER_GROUP(sb);
92}
46 93
47/* 94/*
48 * Read the inode allocation bitmap for a given block_group, reading 95 * Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
59 desc = ext4_get_group_desc(sb, block_group, NULL); 106 desc = ext4_get_group_desc(sb, block_group, NULL);
60 if (!desc) 107 if (!desc)
61 goto error_out; 108 goto error_out;
62 109 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
63 bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); 110 bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
111 if (!buffer_uptodate(bh)) {
112 lock_buffer(bh);
113 if (!buffer_uptodate(bh)) {
114 ext4_init_inode_bitmap(sb, bh, block_group,
115 desc);
116 set_buffer_uptodate(bh);
117 }
118 unlock_buffer(bh);
119 }
120 } else {
121 bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
122 }
64 if (!bh) 123 if (!bh)
65 ext4_error(sb, "read_inode_bitmap", 124 ext4_error(sb, "read_inode_bitmap",
66 "Cannot read inode bitmap - " 125 "Cannot read inode bitmap - "
@@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
169 if (is_directory) 228 if (is_directory)
170 gdp->bg_used_dirs_count = cpu_to_le16( 229 gdp->bg_used_dirs_count = cpu_to_le16(
171 le16_to_cpu(gdp->bg_used_dirs_count) - 1); 230 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
231 gdp->bg_checksum = ext4_group_desc_csum(sbi,
232 block_group, gdp);
172 spin_unlock(sb_bgl_lock(sbi, block_group)); 233 spin_unlock(sb_bgl_lock(sbi, block_group));
173 percpu_counter_inc(&sbi->s_freeinodes_counter); 234 percpu_counter_inc(&sbi->s_freeinodes_counter);
174 if (is_directory) 235 if (is_directory)
@@ -435,7 +496,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
435 struct ext4_sb_info *sbi; 496 struct ext4_sb_info *sbi;
436 int err = 0; 497 int err = 0;
437 struct inode *ret; 498 struct inode *ret;
438 int i; 499 int i, free = 0;
439 500
440 /* Cannot create files in a deleted directory */ 501 /* Cannot create files in a deleted directory */
441 if (!dir || !dir->i_nlink) 502 if (!dir || !dir->i_nlink)
@@ -517,11 +578,13 @@ repeat_in_this_group:
517 goto out; 578 goto out;
518 579
519got: 580got:
520 ino += group * EXT4_INODES_PER_GROUP(sb) + 1; 581 ino++;
521 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 582 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
522 ext4_error (sb, "ext4_new_inode", 583 ino > EXT4_INODES_PER_GROUP(sb)) {
523 "reserved inode or inode > inodes count - " 584 ext4_error(sb, __FUNCTION__,
524 "block_group = %d, inode=%lu", group, ino); 585 "reserved inode or inode > inodes count - "
586 "block_group = %d, inode=%lu", group,
587 ino + group * EXT4_INODES_PER_GROUP(sb));
525 err = -EIO; 588 err = -EIO;
526 goto fail; 589 goto fail;
527 } 590 }
@@ -529,13 +592,78 @@ got:
529 BUFFER_TRACE(bh2, "get_write_access"); 592 BUFFER_TRACE(bh2, "get_write_access");
530 err = ext4_journal_get_write_access(handle, bh2); 593 err = ext4_journal_get_write_access(handle, bh2);
531 if (err) goto fail; 594 if (err) goto fail;
595
596 /* We may have to initialize the block bitmap if it isn't already */
597 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
598 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
599 struct buffer_head *block_bh = read_block_bitmap(sb, group);
600
601 BUFFER_TRACE(block_bh, "get block bitmap access");
602 err = ext4_journal_get_write_access(handle, block_bh);
603 if (err) {
604 brelse(block_bh);
605 goto fail;
606 }
607
608 free = 0;
609 spin_lock(sb_bgl_lock(sbi, group));
610 /* recheck and clear flag under lock if we still need to */
611 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
612 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
613 free = ext4_free_blocks_after_init(sb, group, gdp);
614 gdp->bg_free_blocks_count = cpu_to_le16(free);
615 }
616 spin_unlock(sb_bgl_lock(sbi, group));
617
618 /* Don't need to dirty bitmap block if we didn't change it */
619 if (free) {
620 BUFFER_TRACE(block_bh, "dirty block bitmap");
621 err = ext4_journal_dirty_metadata(handle, block_bh);
622 }
623
624 brelse(block_bh);
625 if (err)
626 goto fail;
627 }
628
532 spin_lock(sb_bgl_lock(sbi, group)); 629 spin_lock(sb_bgl_lock(sbi, group));
630 /* If we didn't allocate from within the initialized part of the inode
631 * table then we need to initialize up to this inode. */
632 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
633 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
634 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
635
636 /* When marking the block group with
637 * ~EXT4_BG_INODE_UNINIT we don't want to depend
638 * on the value of bg_itable_unsed even though
639 * mke2fs could have initialized the same for us.
640 * Instead we calculated the value below
641 */
642
643 free = 0;
644 } else {
645 free = EXT4_INODES_PER_GROUP(sb) -
646 le16_to_cpu(gdp->bg_itable_unused);
647 }
648
649 /*
650 * Check the relative inode number against the last used
651 * relative inode number in this group. if it is greater
652 * we need to update the bg_itable_unused count
653 *
654 */
655 if (ino > free)
656 gdp->bg_itable_unused =
657 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
658 }
659
533 gdp->bg_free_inodes_count = 660 gdp->bg_free_inodes_count =
534 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); 661 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
535 if (S_ISDIR(mode)) { 662 if (S_ISDIR(mode)) {
536 gdp->bg_used_dirs_count = 663 gdp->bg_used_dirs_count =
537 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); 664 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
538 } 665 }
666 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
539 spin_unlock(sb_bgl_lock(sbi, group)); 667 spin_unlock(sb_bgl_lock(sbi, group));
540 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); 668 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
541 err = ext4_journal_dirty_metadata(handle, bh2); 669 err = ext4_journal_dirty_metadata(handle, bh2);
@@ -557,7 +685,7 @@ got:
557 inode->i_gid = current->fsgid; 685 inode->i_gid = current->fsgid;
558 inode->i_mode = mode; 686 inode->i_mode = mode;
559 687
560 inode->i_ino = ino; 688 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
561 /* This is the optimal IO size (for stat), not the fs block size */ 689 /* This is the optimal IO size (for stat), not the fs block size */
562 inode->i_blocks = 0; 690 inode->i_blocks = 0;
563 inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = 691 inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
@@ -573,11 +701,6 @@ got:
573 /* dirsync only applies to directories */ 701 /* dirsync only applies to directories */
574 if (!S_ISDIR(mode)) 702 if (!S_ISDIR(mode))
575 ei->i_flags &= ~EXT4_DIRSYNC_FL; 703 ei->i_flags &= ~EXT4_DIRSYNC_FL;
576#ifdef EXT4_FRAGMENTS
577 ei->i_faddr = 0;
578 ei->i_frag_no = 0;
579 ei->i_frag_size = 0;
580#endif
581 ei->i_file_acl = 0; 704 ei->i_file_acl = 0;
582 ei->i_dir_acl = 0; 705 ei->i_dir_acl = 0;
583 ei->i_dtime = 0; 706 ei->i_dtime = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0df2b1e06d0b..5489703d9573 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1027 } 1027 }
1028 if (buffer_new(&dummy)) { 1028 if (buffer_new(&dummy)) {
1029 J_ASSERT(create != 0); 1029 J_ASSERT(create != 0);
1030 J_ASSERT(handle != 0); 1030 J_ASSERT(handle != NULL);
1031 1031
1032 /* 1032 /*
1033 * Now that we do not always journal data, we should 1033 * Now that we do not always journal data, we should
@@ -2711,11 +2711,6 @@ void ext4_read_inode(struct inode * inode)
2711 } 2711 }
2712 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); 2712 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
2713 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 2713 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
2714#ifdef EXT4_FRAGMENTS
2715 ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
2716 ei->i_frag_no = raw_inode->i_frag;
2717 ei->i_frag_size = raw_inode->i_fsize;
2718#endif
2719 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 2714 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
2720 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2715 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2721 cpu_to_le32(EXT4_OS_HURD)) 2716 cpu_to_le32(EXT4_OS_HURD))
@@ -2860,11 +2855,6 @@ static int ext4_do_update_inode(handle_t *handle,
2860 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); 2855 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
2861 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 2856 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2862 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 2857 raw_inode->i_flags = cpu_to_le32(ei->i_flags);
2863#ifdef EXT4_FRAGMENTS
2864 raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
2865 raw_inode->i_frag = ei->i_frag_no;
2866 raw_inode->i_fsize = ei->i_frag_size;
2867#endif
2868 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2858 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2869 cpu_to_le32(EXT4_OS_HURD)) 2859 cpu_to_le32(EXT4_OS_HURD))
2870 raw_inode->i_file_acl_high = 2860 raw_inode->i_file_acl_high =
@@ -3243,12 +3233,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
3243 iloc, handle); 3233 iloc, handle);
3244 if (ret) { 3234 if (ret) {
3245 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 3235 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
3246 if (mnt_count != sbi->s_es->s_mnt_count) { 3236 if (mnt_count !=
3237 le16_to_cpu(sbi->s_es->s_mnt_count)) {
3247 ext4_warning(inode->i_sb, __FUNCTION__, 3238 ext4_warning(inode->i_sb, __FUNCTION__,
3248 "Unable to expand inode %lu. Delete" 3239 "Unable to expand inode %lu. Delete"
3249 " some EAs or run e2fsck.", 3240 " some EAs or run e2fsck.",
3250 inode->i_ino); 3241 inode->i_ino);
3251 mnt_count = sbi->s_es->s_mnt_count; 3242 mnt_count =
3243 le16_to_cpu(sbi->s_es->s_mnt_count);
3252 } 3244 }
3253 } 3245 }
3254 } 3246 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fdb862e71c4..94ee6f315dc1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -144,7 +144,6 @@ struct dx_map_entry
144 u16 size; 144 u16 size;
145}; 145};
146 146
147#ifdef CONFIG_EXT4_INDEX
148static inline unsigned dx_get_block (struct dx_entry *entry); 147static inline unsigned dx_get_block (struct dx_entry *entry);
149static void dx_set_block (struct dx_entry *entry, unsigned value); 148static void dx_set_block (struct dx_entry *entry, unsigned value);
150static inline unsigned dx_get_hash (struct dx_entry *entry); 149static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
766 dx_set_block(new, block); 765 dx_set_block(new, block);
767 dx_set_count(entries, count + 1); 766 dx_set_count(entries, count + 1);
768} 767}
769#endif
770
771 768
772static void ext4_update_dx_flag(struct inode *inode) 769static void ext4_update_dx_flag(struct inode *inode)
773{ 770{
@@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
869 name = dentry->d_name.name; 866 name = dentry->d_name.name;
870 if (namelen > EXT4_NAME_LEN) 867 if (namelen > EXT4_NAME_LEN)
871 return NULL; 868 return NULL;
872#ifdef CONFIG_EXT4_INDEX
873 if (is_dx(dir)) { 869 if (is_dx(dir)) {
874 bh = ext4_dx_find_entry(dentry, res_dir, &err); 870 bh = ext4_dx_find_entry(dentry, res_dir, &err);
875 /* 871 /*
@@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
881 return bh; 877 return bh;
882 dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); 878 dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
883 } 879 }
884#endif
885 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); 880 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
886 start = EXT4_I(dir)->i_dir_start_lookup; 881 start = EXT4_I(dir)->i_dir_start_lookup;
887 if (start >= nblocks) 882 if (start >= nblocks)
@@ -957,7 +952,6 @@ cleanup_and_exit:
957 return ret; 952 return ret;
958} 953}
959 954
960#ifdef CONFIG_EXT4_INDEX
961static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, 955static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
962 struct ext4_dir_entry_2 **res_dir, int *err) 956 struct ext4_dir_entry_2 **res_dir, int *err)
963{ 957{
@@ -1025,7 +1019,6 @@ errout:
1025 dx_release (frames); 1019 dx_release (frames);
1026 return NULL; 1020 return NULL;
1027} 1021}
1028#endif
1029 1022
1030static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 1023static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
1031{ 1024{
@@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
1121 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 1114 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1122} 1115}
1123 1116
1124#ifdef CONFIG_EXT4_INDEX
1125/* 1117/*
1126 * Move count entries from end of map between two memory locations. 1118 * Move count entries from end of map between two memory locations.
1127 * Returns pointer to last entry moved. 1119 * Returns pointer to last entry moved.
@@ -1266,8 +1258,6 @@ errout:
1266 *error = err; 1258 *error = err;
1267 return NULL; 1259 return NULL;
1268} 1260}
1269#endif
1270
1271 1261
1272/* 1262/*
1273 * Add a new entry into a directory (leaf) block. If de is non-NULL, 1263 * Add a new entry into a directory (leaf) block. If de is non-NULL,
@@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1364 return 0; 1354 return 0;
1365} 1355}
1366 1356
1367#ifdef CONFIG_EXT4_INDEX
1368/* 1357/*
1369 * This converts a one block unindexed directory to a 3 block indexed 1358 * This converts a one block unindexed directory to a 3 block indexed
1370 * directory, and adds the dentry to the indexed directory. 1359 * directory, and adds the dentry to the indexed directory.
@@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1443 1432
1444 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1433 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1445} 1434}
1446#endif
1447 1435
1448/* 1436/*
1449 * ext4_add_entry() 1437 * ext4_add_entry()
@@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
1464 struct ext4_dir_entry_2 *de; 1452 struct ext4_dir_entry_2 *de;
1465 struct super_block * sb; 1453 struct super_block * sb;
1466 int retval; 1454 int retval;
1467#ifdef CONFIG_EXT4_INDEX
1468 int dx_fallback=0; 1455 int dx_fallback=0;
1469#endif
1470 unsigned blocksize; 1456 unsigned blocksize;
1471 u32 block, blocks; 1457 u32 block, blocks;
1472 1458
@@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
1474 blocksize = sb->s_blocksize; 1460 blocksize = sb->s_blocksize;
1475 if (!dentry->d_name.len) 1461 if (!dentry->d_name.len)
1476 return -EINVAL; 1462 return -EINVAL;
1477#ifdef CONFIG_EXT4_INDEX
1478 if (is_dx(dir)) { 1463 if (is_dx(dir)) {
1479 retval = ext4_dx_add_entry(handle, dentry, inode); 1464 retval = ext4_dx_add_entry(handle, dentry, inode);
1480 if (!retval || (retval != ERR_BAD_DX_DIR)) 1465 if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
1483 dx_fallback++; 1468 dx_fallback++;
1484 ext4_mark_inode_dirty(handle, dir); 1469 ext4_mark_inode_dirty(handle, dir);
1485 } 1470 }
1486#endif
1487 blocks = dir->i_size >> sb->s_blocksize_bits; 1471 blocks = dir->i_size >> sb->s_blocksize_bits;
1488 for (block = 0, offset = 0; block < blocks; block++) { 1472 for (block = 0, offset = 0; block < blocks; block++) {
1489 bh = ext4_bread(handle, dir, block, 0, &retval); 1473 bh = ext4_bread(handle, dir, block, 0, &retval);
@@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
1493 if (retval != -ENOSPC) 1477 if (retval != -ENOSPC)
1494 return retval; 1478 return retval;
1495 1479
1496#ifdef CONFIG_EXT4_INDEX
1497 if (blocks == 1 && !dx_fallback && 1480 if (blocks == 1 && !dx_fallback &&
1498 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) 1481 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1499 return make_indexed_dir(handle, dentry, inode, bh); 1482 return make_indexed_dir(handle, dentry, inode, bh);
1500#endif
1501 brelse(bh); 1483 brelse(bh);
1502 } 1484 }
1503 bh = ext4_append(handle, dir, &block, &retval); 1485 bh = ext4_append(handle, dir, &block, &retval);
@@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
1509 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1491 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1510} 1492}
1511 1493
1512#ifdef CONFIG_EXT4_INDEX
1513/* 1494/*
1514 * Returns 0 for success, or a negative error value 1495 * Returns 0 for success, or a negative error value
1515 */ 1496 */
@@ -1644,7 +1625,6 @@ cleanup:
1644 dx_release(frames); 1625 dx_release(frames);
1645 return err; 1626 return err;
1646} 1627}
1647#endif
1648 1628
1649/* 1629/*
1650 * ext4_delete_entry deletes a directory entry by merging it with the 1630 * ext4_delete_entry deletes a directory entry by merging it with the
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 472fc0d3e1c0..bd8a52bb3999 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,7 @@
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18 18
19#include "group.h"
19 20
20#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 21#define outside(b, first, last) ((b) < (first) || (b) >= (last))
21#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 22#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -140,22 +141,29 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
140} 141}
141 142
142/* 143/*
143 * To avoid calling the atomic setbit hundreds or thousands of times, we only 144 * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
144 * need to use it within a single byte (to ensure we get endianness right). 145 * If that fails, restart the transaction & regain write access for the
145 * We can use memset for the rest of the bitmap as there are no other users. 146 * buffer head which is used for block_bitmap modifications.
146 */ 147 */
147static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 148static int extend_or_restart_transaction(handle_t *handle, int thresh,
149 struct buffer_head *bh)
148{ 150{
149 int i; 151 int err;
152
153 if (handle->h_buffer_credits >= thresh)
154 return 0;
150 155
151 if (start_bit >= end_bit) 156 err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
152 return; 157 if (err < 0)
158 return err;
159 if (err) {
160 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
161 return err;
162 if ((err = ext4_journal_get_write_access(handle, bh)))
163 return err;
164 }
153 165
154 ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); 166 return 0;
155 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
156 ext4_set_bit(i, bitmap);
157 if (i < end_bit)
158 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
159} 167}
160 168
161/* 169/*
@@ -180,8 +188,9 @@ static int setup_new_group_blocks(struct super_block *sb,
180 int i; 188 int i;
181 int err = 0, err2; 189 int err = 0, err2;
182 190
183 handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks + 191 /* This transaction may be extended/restarted along the way */
184 2 + sbi->s_itb_per_group); 192 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
193
185 if (IS_ERR(handle)) 194 if (IS_ERR(handle))
186 return PTR_ERR(handle); 195 return PTR_ERR(handle);
187 196
@@ -208,6 +217,9 @@ static int setup_new_group_blocks(struct super_block *sb,
208 217
209 ext4_debug("update backup group %#04lx (+%d)\n", block, bit); 218 ext4_debug("update backup group %#04lx (+%d)\n", block, bit);
210 219
220 if ((err = extend_or_restart_transaction(handle, 1, bh)))
221 goto exit_bh;
222
211 gdb = sb_getblk(sb, block); 223 gdb = sb_getblk(sb, block);
212 if (!gdb) { 224 if (!gdb) {
213 err = -EIO; 225 err = -EIO;
@@ -217,10 +229,10 @@ static int setup_new_group_blocks(struct super_block *sb,
217 brelse(gdb); 229 brelse(gdb);
218 goto exit_bh; 230 goto exit_bh;
219 } 231 }
220 lock_buffer(bh); 232 lock_buffer(gdb);
221 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); 233 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
222 set_buffer_uptodate(gdb); 234 set_buffer_uptodate(gdb);
223 unlock_buffer(bh); 235 unlock_buffer(gdb);
224 ext4_journal_dirty_metadata(handle, gdb); 236 ext4_journal_dirty_metadata(handle, gdb);
225 ext4_set_bit(bit, bh->b_data); 237 ext4_set_bit(bit, bh->b_data);
226 brelse(gdb); 238 brelse(gdb);
@@ -233,6 +245,9 @@ static int setup_new_group_blocks(struct super_block *sb,
233 245
234 ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); 246 ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit);
235 247
248 if ((err = extend_or_restart_transaction(handle, 1, bh)))
249 goto exit_bh;
250
236 if (IS_ERR(gdb = bclean(handle, sb, block))) { 251 if (IS_ERR(gdb = bclean(handle, sb, block))) {
237 err = PTR_ERR(bh); 252 err = PTR_ERR(bh);
238 goto exit_bh; 253 goto exit_bh;
@@ -254,6 +269,10 @@ static int setup_new_group_blocks(struct super_block *sb,
254 struct buffer_head *it; 269 struct buffer_head *it;
255 270
256 ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); 271 ext4_debug("clear inode block %#04lx (+%d)\n", block, bit);
272
273 if ((err = extend_or_restart_transaction(handle, 1, bh)))
274 goto exit_bh;
275
257 if (IS_ERR(it = bclean(handle, sb, block))) { 276 if (IS_ERR(it = bclean(handle, sb, block))) {
258 err = PTR_ERR(it); 277 err = PTR_ERR(it);
259 goto exit_bh; 278 goto exit_bh;
@@ -262,6 +281,10 @@ static int setup_new_group_blocks(struct super_block *sb,
262 brelse(it); 281 brelse(it);
263 ext4_set_bit(bit, bh->b_data); 282 ext4_set_bit(bit, bh->b_data);
264 } 283 }
284
285 if ((err = extend_or_restart_transaction(handle, 2, bh)))
286 goto exit_bh;
287
265 mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), 288 mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
266 bh->b_data); 289 bh->b_data);
267 ext4_journal_dirty_metadata(handle, bh); 290 ext4_journal_dirty_metadata(handle, bh);
@@ -289,7 +312,6 @@ exit_journal:
289 return err; 312 return err;
290} 313}
291 314
292
293/* 315/*
294 * Iterate through the groups which hold BACKUP superblock/GDT copies in an 316 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
295 * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before 317 * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before
@@ -842,6 +864,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
842 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 864 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
843 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); 865 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
844 gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); 866 gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
867 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
845 868
846 /* 869 /*
847 * Make the new blocks and inodes valid next. We do this before 870 * Make the new blocks and inodes valid next. We do this before
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4c8d31c61454..b11e9e2bcd01 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,12 +37,14 @@
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/seq_file.h> 38#include <linux/seq_file.h>
39#include <linux/log2.h> 39#include <linux/log2.h>
40#include <linux/crc16.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42 43
43#include "xattr.h" 44#include "xattr.h"
44#include "acl.h" 45#include "acl.h"
45#include "namei.h" 46#include "namei.h"
47#include "group.h"
46 48
47static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 49static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
48 unsigned long journal_devnum); 50 unsigned long journal_devnum);
@@ -68,31 +70,31 @@ static void ext4_write_super_lockfs(struct super_block *sb);
68ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 70ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
69 struct ext4_group_desc *bg) 71 struct ext4_group_desc *bg)
70{ 72{
71 return le32_to_cpu(bg->bg_block_bitmap) | 73 return le32_to_cpu(bg->bg_block_bitmap_lo) |
72 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 74 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
73 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 75 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
74} 76}
75 77
76ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 78ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
77 struct ext4_group_desc *bg) 79 struct ext4_group_desc *bg)
78{ 80{
79 return le32_to_cpu(bg->bg_inode_bitmap) | 81 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
80 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 82 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
81 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 83 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
82} 84}
83 85
84ext4_fsblk_t ext4_inode_table(struct super_block *sb, 86ext4_fsblk_t ext4_inode_table(struct super_block *sb,
85 struct ext4_group_desc *bg) 87 struct ext4_group_desc *bg)
86{ 88{
87 return le32_to_cpu(bg->bg_inode_table) | 89 return le32_to_cpu(bg->bg_inode_table_lo) |
88 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 90 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
89 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 91 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
90} 92}
91 93
92void ext4_block_bitmap_set(struct super_block *sb, 94void ext4_block_bitmap_set(struct super_block *sb,
93 struct ext4_group_desc *bg, ext4_fsblk_t blk) 95 struct ext4_group_desc *bg, ext4_fsblk_t blk)
94{ 96{
95 bg->bg_block_bitmap = cpu_to_le32((u32)blk); 97 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
96 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 98 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
97 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 99 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
98} 100}
@@ -100,7 +102,7 @@ void ext4_block_bitmap_set(struct super_block *sb,
100void ext4_inode_bitmap_set(struct super_block *sb, 102void ext4_inode_bitmap_set(struct super_block *sb,
101 struct ext4_group_desc *bg, ext4_fsblk_t blk) 103 struct ext4_group_desc *bg, ext4_fsblk_t blk)
102{ 104{
103 bg->bg_inode_bitmap = cpu_to_le32((u32)blk); 105 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
104 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 106 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
105 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 107 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
106} 108}
@@ -108,7 +110,7 @@ void ext4_inode_bitmap_set(struct super_block *sb,
108void ext4_inode_table_set(struct super_block *sb, 110void ext4_inode_table_set(struct super_block *sb,
109 struct ext4_group_desc *bg, ext4_fsblk_t blk) 111 struct ext4_group_desc *bg, ext4_fsblk_t blk)
110{ 112{
111 bg->bg_inode_table = cpu_to_le32((u32)blk); 113 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
112 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 114 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
113 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 115 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
114} 116}
@@ -1037,7 +1039,7 @@ static int parse_options (char *options, struct super_block *sb,
1037 if (option < 0) 1039 if (option < 0)
1038 return 0; 1040 return 0;
1039 if (option == 0) 1041 if (option == 0)
1040 option = JBD_DEFAULT_MAX_COMMIT_AGE; 1042 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1041 sbi->s_commit_interval = HZ * option; 1043 sbi->s_commit_interval = HZ * option;
1042 break; 1044 break;
1043 case Opt_data_journal: 1045 case Opt_data_journal:
@@ -1308,6 +1310,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1308 return res; 1310 return res;
1309} 1311}
1310 1312
1313__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1314 struct ext4_group_desc *gdp)
1315{
1316 __u16 crc = 0;
1317
1318 if (sbi->s_es->s_feature_ro_compat &
1319 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1320 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1321 __le32 le_group = cpu_to_le32(block_group);
1322
1323 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1324 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1325 crc = crc16(crc, (__u8 *)gdp, offset);
1326 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1327 /* for checksum of struct ext4_group_desc do the rest...*/
1328 if ((sbi->s_es->s_feature_incompat &
1329 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1330 offset < le16_to_cpu(sbi->s_es->s_desc_size))
1331 crc = crc16(crc, (__u8 *)gdp + offset,
1332 le16_to_cpu(sbi->s_es->s_desc_size) -
1333 offset);
1334 }
1335
1336 return cpu_to_le16(crc);
1337}
1338
1339int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1340 struct ext4_group_desc *gdp)
1341{
1342 if ((sbi->s_es->s_feature_ro_compat &
1343 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1344 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1345 return 0;
1346
1347 return 1;
1348}
1349
1311/* Called at mount-time, super-block is locked */ 1350/* Called at mount-time, super-block is locked */
1312static int ext4_check_descriptors (struct super_block * sb) 1351static int ext4_check_descriptors (struct super_block * sb)
1313{ 1352{
@@ -1319,13 +1358,17 @@ static int ext4_check_descriptors (struct super_block * sb)
1319 ext4_fsblk_t inode_table; 1358 ext4_fsblk_t inode_table;
1320 struct ext4_group_desc * gdp = NULL; 1359 struct ext4_group_desc * gdp = NULL;
1321 int desc_block = 0; 1360 int desc_block = 0;
1361 int flexbg_flag = 0;
1322 int i; 1362 int i;
1323 1363
1364 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1365 flexbg_flag = 1;
1366
1324 ext4_debug ("Checking group descriptors"); 1367 ext4_debug ("Checking group descriptors");
1325 1368
1326 for (i = 0; i < sbi->s_groups_count; i++) 1369 for (i = 0; i < sbi->s_groups_count; i++)
1327 { 1370 {
1328 if (i == sbi->s_groups_count - 1) 1371 if (i == sbi->s_groups_count - 1 || flexbg_flag)
1329 last_block = ext4_blocks_count(sbi->s_es) - 1; 1372 last_block = ext4_blocks_count(sbi->s_es) - 1;
1330 else 1373 else
1331 last_block = first_block + 1374 last_block = first_block +
@@ -1362,7 +1405,16 @@ static int ext4_check_descriptors (struct super_block * sb)
1362 i, inode_table); 1405 i, inode_table);
1363 return 0; 1406 return 0;
1364 } 1407 }
1365 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1408 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1409 ext4_error(sb, __FUNCTION__,
1410 "Checksum for group %d failed (%u!=%u)\n", i,
1411 le16_to_cpu(ext4_group_desc_csum(sbi, i,
1412 gdp)),
1413 le16_to_cpu(gdp->bg_checksum));
1414 return 0;
1415 }
1416 if (!flexbg_flag)
1417 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1366 gdp = (struct ext4_group_desc *) 1418 gdp = (struct ext4_group_desc *)
1367 ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); 1419 ((__u8 *)gdp + EXT4_DESC_SIZE(sb));
1368 } 1420 }
@@ -1726,14 +1778,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1726 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 1778 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
1727 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 1779 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
1728 } 1780 }
1729 sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
1730 le32_to_cpu(es->s_log_frag_size);
1731 if (blocksize != sbi->s_frag_size) {
1732 printk(KERN_ERR
1733 "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
1734 sbi->s_frag_size, blocksize);
1735 goto failed_mount;
1736 }
1737 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 1781 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
1738 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 1782 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
1739 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 1783 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -1747,7 +1791,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1747 } else 1791 } else
1748 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 1792 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
1749 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1793 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1750 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1751 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1794 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1752 if (EXT4_INODE_SIZE(sb) == 0) 1795 if (EXT4_INODE_SIZE(sb) == 0)
1753 goto cantfind_ext4; 1796 goto cantfind_ext4;
@@ -1771,12 +1814,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1771 sbi->s_blocks_per_group); 1814 sbi->s_blocks_per_group);
1772 goto failed_mount; 1815 goto failed_mount;
1773 } 1816 }
1774 if (sbi->s_frags_per_group > blocksize * 8) {
1775 printk (KERN_ERR
1776 "EXT4-fs: #fragments per group too big: %lu\n",
1777 sbi->s_frags_per_group);
1778 goto failed_mount;
1779 }
1780 if (sbi->s_inodes_per_group > blocksize * 8) { 1817 if (sbi->s_inodes_per_group > blocksize * 8) {
1781 printk (KERN_ERR 1818 printk (KERN_ERR
1782 "EXT4-fs: #inodes per group too big: %lu\n", 1819 "EXT4-fs: #inodes per group too big: %lu\n",
@@ -2630,7 +2667,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
2630 2667
2631 if (test_opt(sb, MINIX_DF)) { 2668 if (test_opt(sb, MINIX_DF)) {
2632 sbi->s_overhead_last = 0; 2669 sbi->s_overhead_last = 0;
2633 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { 2670 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
2634 unsigned long ngroups = sbi->s_groups_count, i; 2671 unsigned long ngroups = sbi->s_groups_count, i;
2635 ext4_fsblk_t overhead = 0; 2672 ext4_fsblk_t overhead = 0;
2636 smp_rmb(); 2673 smp_rmb();
@@ -2665,14 +2702,14 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
2665 overhead += ngroups * (2 + sbi->s_itb_per_group); 2702 overhead += ngroups * (2 + sbi->s_itb_per_group);
2666 sbi->s_overhead_last = overhead; 2703 sbi->s_overhead_last = overhead;
2667 smp_wmb(); 2704 smp_wmb();
2668 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); 2705 sbi->s_blocks_last = ext4_blocks_count(es);
2669 } 2706 }
2670 2707
2671 buf->f_type = EXT4_SUPER_MAGIC; 2708 buf->f_type = EXT4_SUPER_MAGIC;
2672 buf->f_bsize = sb->s_blocksize; 2709 buf->f_bsize = sb->s_blocksize;
2673 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 2710 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
2674 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 2711 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2675 es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); 2712 ext4_free_blocks_count_set(es, buf->f_bfree);
2676 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 2713 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
2677 if (buf->f_bfree < ext4_r_blocks_count(es)) 2714 if (buf->f_bfree < ext4_r_blocks_count(es))
2678 buf->f_bavail = 0; 2715 buf->f_bavail = 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b10d68fffb55..86387302c2a9 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -750,12 +750,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
750 } 750 }
751 } else { 751 } else {
752 /* Allocate a buffer where we construct the new block. */ 752 /* Allocate a buffer where we construct the new block. */
753 s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); 753 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
754 /* assert(header == s->base) */ 754 /* assert(header == s->base) */
755 error = -ENOMEM; 755 error = -ENOMEM;
756 if (s->base == NULL) 756 if (s->base == NULL)
757 goto cleanup; 757 goto cleanup;
758 memset(s->base, 0, sb->s_blocksize);
759 header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); 758 header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
760 header(s->base)->h_blocks = cpu_to_le32(1); 759 header(s->base)->h_blocks = cpu_to_le32(1);
761 header(s->base)->h_refcount = cpu_to_le32(1); 760 header(s->base)->h_refcount = cpu_to_le32(1);
@@ -1121,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1121 int total_ino, total_blk; 1120 int total_ino, total_blk;
1122 void *base, *start, *end; 1121 void *base, *start, *end;
1123 int extra_isize = 0, error = 0, tried_min_extra_isize = 0; 1122 int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
1124 int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize; 1123 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1125 1124
1126 down_write(&EXT4_I(inode)->xattr_sem); 1125 down_write(&EXT4_I(inode)->xattr_sem);
1127retry: 1126retry:
@@ -1293,7 +1292,7 @@ retry:
1293 1292
1294 i.name = b_entry_name; 1293 i.name = b_entry_name;
1295 i.value = buffer; 1294 i.value = buffer;
1296 i.value_len = cpu_to_le32(size); 1295 i.value_len = size;
1297 error = ext4_xattr_block_find(inode, &i, bs); 1296 error = ext4_xattr_block_find(inode, &i, bs);
1298 if (error) 1297 if (error)
1299 goto cleanup; 1298 goto cleanup;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d1acab931330..3763757f9fe7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -63,13 +63,21 @@ static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
63 * Set dentry and possibly attribute timeouts from the lookup/mk* 63 * Set dentry and possibly attribute timeouts from the lookup/mk*
64 * replies 64 * replies
65 */ 65 */
66static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) 66static void fuse_change_entry_timeout(struct dentry *entry,
67 struct fuse_entry_out *o)
67{ 68{
68 fuse_dentry_settime(entry, 69 fuse_dentry_settime(entry,
69 time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); 70 time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
70 if (entry->d_inode) 71}
71 get_fuse_inode(entry->d_inode)->i_time = 72
72 time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 73static u64 attr_timeout(struct fuse_attr_out *o)
74{
75 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
76}
77
78static u64 entry_attr_timeout(struct fuse_entry_out *o)
79{
80 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
73} 81}
74 82
75/* 83/*
@@ -108,13 +116,19 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
108 struct dentry *entry, 116 struct dentry *entry,
109 struct fuse_entry_out *outarg) 117 struct fuse_entry_out *outarg)
110{ 118{
119 struct fuse_conn *fc = get_fuse_conn(dir);
120
121 memset(outarg, 0, sizeof(struct fuse_entry_out));
111 req->in.h.opcode = FUSE_LOOKUP; 122 req->in.h.opcode = FUSE_LOOKUP;
112 req->in.h.nodeid = get_node_id(dir); 123 req->in.h.nodeid = get_node_id(dir);
113 req->in.numargs = 1; 124 req->in.numargs = 1;
114 req->in.args[0].size = entry->d_name.len + 1; 125 req->in.args[0].size = entry->d_name.len + 1;
115 req->in.args[0].value = entry->d_name.name; 126 req->in.args[0].value = entry->d_name.name;
116 req->out.numargs = 1; 127 req->out.numargs = 1;
117 req->out.args[0].size = sizeof(struct fuse_entry_out); 128 if (fc->minor < 9)
129 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
130 else
131 req->out.args[0].size = sizeof(struct fuse_entry_out);
118 req->out.args[0].value = outarg; 132 req->out.args[0].value = outarg;
119} 133}
120 134
@@ -140,6 +154,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
140 struct fuse_req *req; 154 struct fuse_req *req;
141 struct fuse_req *forget_req; 155 struct fuse_req *forget_req;
142 struct dentry *parent; 156 struct dentry *parent;
157 u64 attr_version;
143 158
144 /* For negative dentries, always do a fresh lookup */ 159 /* For negative dentries, always do a fresh lookup */
145 if (!inode) 160 if (!inode)
@@ -156,6 +171,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
156 return 0; 171 return 0;
157 } 172 }
158 173
174 spin_lock(&fc->lock);
175 attr_version = fc->attr_version;
176 spin_unlock(&fc->lock);
177
159 parent = dget_parent(entry); 178 parent = dget_parent(entry);
160 fuse_lookup_init(req, parent->d_inode, entry, &outarg); 179 fuse_lookup_init(req, parent->d_inode, entry, &outarg);
161 request_send(fc, req); 180 request_send(fc, req);
@@ -180,8 +199,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
180 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) 199 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
181 return 0; 200 return 0;
182 201
183 fuse_change_attributes(inode, &outarg.attr); 202 fuse_change_attributes(inode, &outarg.attr,
184 fuse_change_timeout(entry, &outarg); 203 entry_attr_timeout(&outarg),
204 attr_version);
205 fuse_change_entry_timeout(entry, &outarg);
185 } 206 }
186 return 1; 207 return 1;
187} 208}
@@ -228,6 +249,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
228 struct fuse_conn *fc = get_fuse_conn(dir); 249 struct fuse_conn *fc = get_fuse_conn(dir);
229 struct fuse_req *req; 250 struct fuse_req *req;
230 struct fuse_req *forget_req; 251 struct fuse_req *forget_req;
252 u64 attr_version;
231 253
232 if (entry->d_name.len > FUSE_NAME_MAX) 254 if (entry->d_name.len > FUSE_NAME_MAX)
233 return ERR_PTR(-ENAMETOOLONG); 255 return ERR_PTR(-ENAMETOOLONG);
@@ -242,6 +264,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
242 return ERR_PTR(PTR_ERR(forget_req)); 264 return ERR_PTR(PTR_ERR(forget_req));
243 } 265 }
244 266
267 spin_lock(&fc->lock);
268 attr_version = fc->attr_version;
269 spin_unlock(&fc->lock);
270
245 fuse_lookup_init(req, dir, entry, &outarg); 271 fuse_lookup_init(req, dir, entry, &outarg);
246 request_send(fc, req); 272 request_send(fc, req);
247 err = req->out.h.error; 273 err = req->out.h.error;
@@ -253,7 +279,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
253 err = -EIO; 279 err = -EIO;
254 if (!err && outarg.nodeid) { 280 if (!err && outarg.nodeid) {
255 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 281 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
256 &outarg.attr); 282 &outarg.attr, entry_attr_timeout(&outarg),
283 attr_version);
257 if (!inode) { 284 if (!inode) {
258 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 285 fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
259 return ERR_PTR(-ENOMEM); 286 return ERR_PTR(-ENOMEM);
@@ -276,7 +303,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
276 303
277 entry->d_op = &fuse_dentry_operations; 304 entry->d_op = &fuse_dentry_operations;
278 if (!err) 305 if (!err)
279 fuse_change_timeout(entry, &outarg); 306 fuse_change_entry_timeout(entry, &outarg);
280 else 307 else
281 fuse_invalidate_entry_cache(entry); 308 fuse_invalidate_entry_cache(entry);
282 return NULL; 309 return NULL;
@@ -335,6 +362,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
335 362
336 flags &= ~O_NOCTTY; 363 flags &= ~O_NOCTTY;
337 memset(&inarg, 0, sizeof(inarg)); 364 memset(&inarg, 0, sizeof(inarg));
365 memset(&outentry, 0, sizeof(outentry));
338 inarg.flags = flags; 366 inarg.flags = flags;
339 inarg.mode = mode; 367 inarg.mode = mode;
340 req->in.h.opcode = FUSE_CREATE; 368 req->in.h.opcode = FUSE_CREATE;
@@ -345,7 +373,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
345 req->in.args[1].size = entry->d_name.len + 1; 373 req->in.args[1].size = entry->d_name.len + 1;
346 req->in.args[1].value = entry->d_name.name; 374 req->in.args[1].value = entry->d_name.name;
347 req->out.numargs = 2; 375 req->out.numargs = 2;
348 req->out.args[0].size = sizeof(outentry); 376 if (fc->minor < 9)
377 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
378 else
379 req->out.args[0].size = sizeof(outentry);
349 req->out.args[0].value = &outentry; 380 req->out.args[0].value = &outentry;
350 req->out.args[1].size = sizeof(outopen); 381 req->out.args[1].size = sizeof(outopen);
351 req->out.args[1].value = &outopen; 382 req->out.args[1].value = &outopen;
@@ -363,7 +394,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
363 394
364 fuse_put_request(fc, req); 395 fuse_put_request(fc, req);
365 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 396 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
366 &outentry.attr); 397 &outentry.attr, entry_attr_timeout(&outentry), 0);
367 if (!inode) { 398 if (!inode) {
368 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 399 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
369 ff->fh = outopen.fh; 400 ff->fh = outopen.fh;
@@ -373,7 +404,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
373 } 404 }
374 fuse_put_request(fc, forget_req); 405 fuse_put_request(fc, forget_req);
375 d_instantiate(entry, inode); 406 d_instantiate(entry, inode);
376 fuse_change_timeout(entry, &outentry); 407 fuse_change_entry_timeout(entry, &outentry);
377 file = lookup_instantiate_filp(nd, entry, generic_file_open); 408 file = lookup_instantiate_filp(nd, entry, generic_file_open);
378 if (IS_ERR(file)) { 409 if (IS_ERR(file)) {
379 ff->fh = outopen.fh; 410 ff->fh = outopen.fh;
@@ -410,9 +441,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
410 return PTR_ERR(forget_req); 441 return PTR_ERR(forget_req);
411 } 442 }
412 443
444 memset(&outarg, 0, sizeof(outarg));
413 req->in.h.nodeid = get_node_id(dir); 445 req->in.h.nodeid = get_node_id(dir);
414 req->out.numargs = 1; 446 req->out.numargs = 1;
415 req->out.args[0].size = sizeof(outarg); 447 if (fc->minor < 9)
448 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
449 else
450 req->out.args[0].size = sizeof(outarg);
416 req->out.args[0].value = &outarg; 451 req->out.args[0].value = &outarg;
417 request_send(fc, req); 452 request_send(fc, req);
418 err = req->out.h.error; 453 err = req->out.h.error;
@@ -428,7 +463,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
428 goto out_put_forget_req; 463 goto out_put_forget_req;
429 464
430 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 465 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
431 &outarg.attr); 466 &outarg.attr, entry_attr_timeout(&outarg), 0);
432 if (!inode) { 467 if (!inode) {
433 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 468 fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
434 return -ENOMEM; 469 return -ENOMEM;
@@ -451,7 +486,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
451 } else 486 } else
452 d_instantiate(entry, inode); 487 d_instantiate(entry, inode);
453 488
454 fuse_change_timeout(entry, &outarg); 489 fuse_change_entry_timeout(entry, &outarg);
455 fuse_invalidate_attr(dir); 490 fuse_invalidate_attr(dir);
456 return 0; 491 return 0;
457 492
@@ -663,52 +698,84 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
663 return err; 698 return err;
664} 699}
665 700
666static int fuse_do_getattr(struct inode *inode) 701static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
702 struct kstat *stat)
703{
704 stat->dev = inode->i_sb->s_dev;
705 stat->ino = attr->ino;
706 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
707 stat->nlink = attr->nlink;
708 stat->uid = attr->uid;
709 stat->gid = attr->gid;
710 stat->rdev = inode->i_rdev;
711 stat->atime.tv_sec = attr->atime;
712 stat->atime.tv_nsec = attr->atimensec;
713 stat->mtime.tv_sec = attr->mtime;
714 stat->mtime.tv_nsec = attr->mtimensec;
715 stat->ctime.tv_sec = attr->ctime;
716 stat->ctime.tv_nsec = attr->ctimensec;
717 stat->size = attr->size;
718 stat->blocks = attr->blocks;
719 stat->blksize = (1 << inode->i_blkbits);
720}
721
722static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
723 struct file *file)
667{ 724{
668 int err; 725 int err;
669 struct fuse_attr_out arg; 726 struct fuse_getattr_in inarg;
727 struct fuse_attr_out outarg;
670 struct fuse_conn *fc = get_fuse_conn(inode); 728 struct fuse_conn *fc = get_fuse_conn(inode);
671 struct fuse_req *req = fuse_get_req(fc); 729 struct fuse_req *req;
730 u64 attr_version;
731
732 req = fuse_get_req(fc);
672 if (IS_ERR(req)) 733 if (IS_ERR(req))
673 return PTR_ERR(req); 734 return PTR_ERR(req);
674 735
736 spin_lock(&fc->lock);
737 attr_version = fc->attr_version;
738 spin_unlock(&fc->lock);
739
740 memset(&inarg, 0, sizeof(inarg));
741 memset(&outarg, 0, sizeof(outarg));
742 /* Directories have separate file-handle space */
743 if (file && S_ISREG(inode->i_mode)) {
744 struct fuse_file *ff = file->private_data;
745
746 inarg.getattr_flags |= FUSE_GETATTR_FH;
747 inarg.fh = ff->fh;
748 }
675 req->in.h.opcode = FUSE_GETATTR; 749 req->in.h.opcode = FUSE_GETATTR;
676 req->in.h.nodeid = get_node_id(inode); 750 req->in.h.nodeid = get_node_id(inode);
751 req->in.numargs = 1;
752 req->in.args[0].size = sizeof(inarg);
753 req->in.args[0].value = &inarg;
677 req->out.numargs = 1; 754 req->out.numargs = 1;
678 req->out.args[0].size = sizeof(arg); 755 if (fc->minor < 9)
679 req->out.args[0].value = &arg; 756 req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
757 else
758 req->out.args[0].size = sizeof(outarg);
759 req->out.args[0].value = &outarg;
680 request_send(fc, req); 760 request_send(fc, req);
681 err = req->out.h.error; 761 err = req->out.h.error;
682 fuse_put_request(fc, req); 762 fuse_put_request(fc, req);
683 if (!err) { 763 if (!err) {
684 if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) { 764 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
685 make_bad_inode(inode); 765 make_bad_inode(inode);
686 err = -EIO; 766 err = -EIO;
687 } else { 767 } else {
688 struct fuse_inode *fi = get_fuse_inode(inode); 768 fuse_change_attributes(inode, &outarg.attr,
689 fuse_change_attributes(inode, &arg.attr); 769 attr_timeout(&outarg),
690 fi->i_time = time_to_jiffies(arg.attr_valid, 770 attr_version);
691 arg.attr_valid_nsec); 771 if (stat)
772 fuse_fillattr(inode, &outarg.attr, stat);
692 } 773 }
693 } 774 }
694 return err; 775 return err;
695} 776}
696 777
697/* 778/*
698 * Check if attributes are still valid, and if not send a GETATTR
699 * request to refresh them.
700 */
701static int fuse_refresh_attributes(struct inode *inode)
702{
703 struct fuse_inode *fi = get_fuse_inode(inode);
704
705 if (fi->i_time < get_jiffies_64())
706 return fuse_do_getattr(inode);
707 else
708 return 0;
709}
710
711/*
712 * Calling into a user-controlled filesystem gives the filesystem 779 * Calling into a user-controlled filesystem gives the filesystem
713 * daemon ptrace-like capabilities over the requester process. This 780 * daemon ptrace-like capabilities over the requester process. This
714 * means, that the filesystem daemon is able to record the exact 781 * means, that the filesystem daemon is able to record the exact
@@ -721,7 +788,7 @@ static int fuse_refresh_attributes(struct inode *inode)
721 * for which the owner of the mount has ptrace privilege. This 788 * for which the owner of the mount has ptrace privilege. This
722 * excludes processes started by other users, suid or sgid processes. 789 * excludes processes started by other users, suid or sgid processes.
723 */ 790 */
724static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) 791int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
725{ 792{
726 if (fc->flags & FUSE_ALLOW_OTHER) 793 if (fc->flags & FUSE_ALLOW_OTHER)
727 return 1; 794 return 1;
@@ -795,11 +862,14 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
795 */ 862 */
796 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || 863 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
797 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 864 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
798 err = fuse_refresh_attributes(inode); 865 struct fuse_inode *fi = get_fuse_inode(inode);
799 if (err) 866 if (fi->i_time < get_jiffies_64()) {
800 return err; 867 err = fuse_do_getattr(inode, NULL, NULL);
868 if (err)
869 return err;
801 870
802 refreshed = true; 871 refreshed = true;
872 }
803 } 873 }
804 874
805 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 875 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -809,7 +879,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
809 attributes. This is also needed, because the root 879 attributes. This is also needed, because the root
810 node will at first have no permissions */ 880 node will at first have no permissions */
811 if (err == -EACCES && !refreshed) { 881 if (err == -EACCES && !refreshed) {
812 err = fuse_do_getattr(inode); 882 err = fuse_do_getattr(inode, NULL, NULL);
813 if (!err) 883 if (!err)
814 err = generic_permission(inode, mask, NULL); 884 err = generic_permission(inode, mask, NULL);
815 } 885 }
@@ -825,7 +895,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
825 if (refreshed) 895 if (refreshed)
826 return -EACCES; 896 return -EACCES;
827 897
828 err = fuse_do_getattr(inode); 898 err = fuse_do_getattr(inode, NULL, NULL);
829 if (!err && !(inode->i_mode & S_IXUGO)) 899 if (!err && !(inode->i_mode & S_IXUGO))
830 return -EACCES; 900 return -EACCES;
831 } 901 }
@@ -962,6 +1032,20 @@ static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
962 return file ? fuse_fsync_common(file, de, datasync, 1) : 0; 1032 return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
963} 1033}
964 1034
1035static bool update_mtime(unsigned ivalid)
1036{
1037 /* Always update if mtime is explicitly set */
1038 if (ivalid & ATTR_MTIME_SET)
1039 return true;
1040
1041 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1042 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1043 return false;
1044
1045 /* In all other cases update */
1046 return true;
1047}
1048
965static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) 1049static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
966{ 1050{
967 unsigned ivalid = iattr->ia_valid; 1051 unsigned ivalid = iattr->ia_valid;
@@ -974,16 +1058,19 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
974 arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; 1058 arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid;
975 if (ivalid & ATTR_SIZE) 1059 if (ivalid & ATTR_SIZE)
976 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1060 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
977 /* You can only _set_ these together (they may change by themselves) */ 1061 if (ivalid & ATTR_ATIME) {
978 if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { 1062 arg->valid |= FATTR_ATIME;
979 arg->valid |= FATTR_ATIME | FATTR_MTIME;
980 arg->atime = iattr->ia_atime.tv_sec; 1063 arg->atime = iattr->ia_atime.tv_sec;
981 arg->mtime = iattr->ia_mtime.tv_sec; 1064 arg->atimensec = iattr->ia_atime.tv_nsec;
1065 if (!(ivalid & ATTR_ATIME_SET))
1066 arg->valid |= FATTR_ATIME_NOW;
982 } 1067 }
983 if (ivalid & ATTR_FILE) { 1068 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
984 struct fuse_file *ff = iattr->ia_file->private_data; 1069 arg->valid |= FATTR_MTIME;
985 arg->valid |= FATTR_FH; 1070 arg->mtime = iattr->ia_mtime.tv_sec;
986 arg->fh = ff->fh; 1071 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1072 if (!(ivalid & ATTR_MTIME_SET))
1073 arg->valid |= FATTR_MTIME_NOW;
987 } 1074 }
988} 1075}
989 1076
@@ -995,22 +1082,28 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
995 * vmtruncate() doesn't allow for this case, so do the rlimit checking 1082 * vmtruncate() doesn't allow for this case, so do the rlimit checking
996 * and the actual truncation by hand. 1083 * and the actual truncation by hand.
997 */ 1084 */
998static int fuse_setattr(struct dentry *entry, struct iattr *attr) 1085static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1086 struct file *file)
999{ 1087{
1000 struct inode *inode = entry->d_inode; 1088 struct inode *inode = entry->d_inode;
1001 struct fuse_conn *fc = get_fuse_conn(inode); 1089 struct fuse_conn *fc = get_fuse_conn(inode);
1002 struct fuse_inode *fi = get_fuse_inode(inode);
1003 struct fuse_req *req; 1090 struct fuse_req *req;
1004 struct fuse_setattr_in inarg; 1091 struct fuse_setattr_in inarg;
1005 struct fuse_attr_out outarg; 1092 struct fuse_attr_out outarg;
1006 int err; 1093 int err;
1007 1094
1095 if (!fuse_allow_task(fc, current))
1096 return -EACCES;
1097
1008 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1098 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1009 err = inode_change_ok(inode, attr); 1099 err = inode_change_ok(inode, attr);
1010 if (err) 1100 if (err)
1011 return err; 1101 return err;
1012 } 1102 }
1013 1103
1104 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
1105 return 0;
1106
1014 if (attr->ia_valid & ATTR_SIZE) { 1107 if (attr->ia_valid & ATTR_SIZE) {
1015 unsigned long limit; 1108 unsigned long limit;
1016 if (IS_SWAPFILE(inode)) 1109 if (IS_SWAPFILE(inode))
@@ -1027,14 +1120,28 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1027 return PTR_ERR(req); 1120 return PTR_ERR(req);
1028 1121
1029 memset(&inarg, 0, sizeof(inarg)); 1122 memset(&inarg, 0, sizeof(inarg));
1123 memset(&outarg, 0, sizeof(outarg));
1030 iattr_to_fattr(attr, &inarg); 1124 iattr_to_fattr(attr, &inarg);
1125 if (file) {
1126 struct fuse_file *ff = file->private_data;
1127 inarg.valid |= FATTR_FH;
1128 inarg.fh = ff->fh;
1129 }
1130 if (attr->ia_valid & ATTR_SIZE) {
1131 /* For mandatory locking in truncate */
1132 inarg.valid |= FATTR_LOCKOWNER;
1133 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1134 }
1031 req->in.h.opcode = FUSE_SETATTR; 1135 req->in.h.opcode = FUSE_SETATTR;
1032 req->in.h.nodeid = get_node_id(inode); 1136 req->in.h.nodeid = get_node_id(inode);
1033 req->in.numargs = 1; 1137 req->in.numargs = 1;
1034 req->in.args[0].size = sizeof(inarg); 1138 req->in.args[0].size = sizeof(inarg);
1035 req->in.args[0].value = &inarg; 1139 req->in.args[0].value = &inarg;
1036 req->out.numargs = 1; 1140 req->out.numargs = 1;
1037 req->out.args[0].size = sizeof(outarg); 1141 if (fc->minor < 9)
1142 req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
1143 else
1144 req->out.args[0].size = sizeof(outarg);
1038 req->out.args[0].value = &outarg; 1145 req->out.args[0].value = &outarg;
1039 request_send(fc, req); 1146 request_send(fc, req);
1040 err = req->out.h.error; 1147 err = req->out.h.error;
@@ -1050,11 +1157,18 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1050 return -EIO; 1157 return -EIO;
1051 } 1158 }
1052 1159
1053 fuse_change_attributes(inode, &outarg.attr); 1160 fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
1054 fi->i_time = time_to_jiffies(outarg.attr_valid, outarg.attr_valid_nsec);
1055 return 0; 1161 return 0;
1056} 1162}
1057 1163
1164static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1165{
1166 if (attr->ia_valid & ATTR_FILE)
1167 return fuse_do_setattr(entry, attr, attr->ia_file);
1168 else
1169 return fuse_do_setattr(entry, attr, NULL);
1170}
1171
1058static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, 1172static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1059 struct kstat *stat) 1173 struct kstat *stat)
1060{ 1174{
@@ -1066,8 +1180,10 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1066 if (!fuse_allow_task(fc, current)) 1180 if (!fuse_allow_task(fc, current))
1067 return -EACCES; 1181 return -EACCES;
1068 1182
1069 err = fuse_refresh_attributes(inode); 1183 if (fi->i_time < get_jiffies_64())
1070 if (!err) { 1184 err = fuse_do_getattr(inode, stat, NULL);
1185 else {
1186 err = 0;
1071 generic_fillattr(inode, stat); 1187 generic_fillattr(inode, stat);
1072 stat->mode = fi->orig_i_mode; 1188 stat->mode = fi->orig_i_mode;
1073 } 1189 }
@@ -1172,6 +1288,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1172 struct fuse_getxattr_out outarg; 1288 struct fuse_getxattr_out outarg;
1173 ssize_t ret; 1289 ssize_t ret;
1174 1290
1291 if (!fuse_allow_task(fc, current))
1292 return -EACCES;
1293
1175 if (fc->no_listxattr) 1294 if (fc->no_listxattr)
1176 return -EOPNOTSUPP; 1295 return -EOPNOTSUPP;
1177 1296
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c4b98c03a46e..0fcdba9d47c0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -28,7 +28,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
28 return PTR_ERR(req); 28 return PTR_ERR(req);
29 29
30 memset(&inarg, 0, sizeof(inarg)); 30 memset(&inarg, 0, sizeof(inarg));
31 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 31 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
32 if (!fc->atomic_o_trunc)
33 inarg.flags &= ~O_TRUNC;
32 req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 34 req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
33 req->in.h.nodeid = get_node_id(inode); 35 req->in.h.nodeid = get_node_id(inode);
34 req->in.numargs = 1; 36 req->in.numargs = 1;
@@ -54,6 +56,7 @@ struct fuse_file *fuse_file_alloc(void)
54 kfree(ff); 56 kfree(ff);
55 ff = NULL; 57 ff = NULL;
56 } 58 }
59 INIT_LIST_HEAD(&ff->write_entry);
57 atomic_set(&ff->count, 0); 60 atomic_set(&ff->count, 0);
58 } 61 }
59 return ff; 62 return ff;
@@ -148,12 +151,18 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
148{ 151{
149 struct fuse_file *ff = file->private_data; 152 struct fuse_file *ff = file->private_data;
150 if (ff) { 153 if (ff) {
154 struct fuse_conn *fc = get_fuse_conn(inode);
155
151 fuse_release_fill(ff, get_node_id(inode), file->f_flags, 156 fuse_release_fill(ff, get_node_id(inode), file->f_flags,
152 isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); 157 isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
153 158
154 /* Hold vfsmount and dentry until release is finished */ 159 /* Hold vfsmount and dentry until release is finished */
155 ff->reserved_req->vfsmount = mntget(file->f_path.mnt); 160 ff->reserved_req->vfsmount = mntget(file->f_path.mnt);
156 ff->reserved_req->dentry = dget(file->f_path.dentry); 161 ff->reserved_req->dentry = dget(file->f_path.dentry);
162
163 spin_lock(&fc->lock);
164 list_del(&ff->write_entry);
165 spin_unlock(&fc->lock);
157 /* 166 /*
158 * Normally this will send the RELEASE request, 167 * Normally this will send the RELEASE request,
159 * however if some asynchronous READ or WRITE requests 168 * however if some asynchronous READ or WRITE requests
@@ -180,7 +189,7 @@ static int fuse_release(struct inode *inode, struct file *file)
180 * Scramble the ID space with XTEA, so that the value of the files_struct 189 * Scramble the ID space with XTEA, so that the value of the files_struct
181 * pointer is not exposed to userspace. 190 * pointer is not exposed to userspace.
182 */ 191 */
183static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) 192u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
184{ 193{
185 u32 *k = fc->scramble_key; 194 u32 *k = fc->scramble_key;
186 u64 v = (unsigned long) id; 195 u64 v = (unsigned long) id;
@@ -299,11 +308,19 @@ void fuse_read_fill(struct fuse_req *req, struct fuse_file *ff,
299} 308}
300 309
301static size_t fuse_send_read(struct fuse_req *req, struct file *file, 310static size_t fuse_send_read(struct fuse_req *req, struct file *file,
302 struct inode *inode, loff_t pos, size_t count) 311 struct inode *inode, loff_t pos, size_t count,
312 fl_owner_t owner)
303{ 313{
304 struct fuse_conn *fc = get_fuse_conn(inode); 314 struct fuse_conn *fc = get_fuse_conn(inode);
305 struct fuse_file *ff = file->private_data; 315 struct fuse_file *ff = file->private_data;
316
306 fuse_read_fill(req, ff, inode, pos, count, FUSE_READ); 317 fuse_read_fill(req, ff, inode, pos, count, FUSE_READ);
318 if (owner != NULL) {
319 struct fuse_read_in *inarg = &req->misc.read_in;
320
321 inarg->read_flags |= FUSE_READ_LOCKOWNER;
322 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
323 }
307 request_send(fc, req); 324 request_send(fc, req);
308 return req->out.args[0].size; 325 return req->out.args[0].size;
309} 326}
@@ -327,7 +344,8 @@ static int fuse_readpage(struct file *file, struct page *page)
327 req->out.page_zeroing = 1; 344 req->out.page_zeroing = 1;
328 req->num_pages = 1; 345 req->num_pages = 1;
329 req->pages[0] = page; 346 req->pages[0] = page;
330 fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE); 347 fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE,
348 NULL);
331 err = req->out.h.error; 349 err = req->out.h.error;
332 fuse_put_request(fc, req); 350 fuse_put_request(fc, req);
333 if (!err) 351 if (!err)
@@ -434,30 +452,47 @@ out:
434 return err; 452 return err;
435} 453}
436 454
437static size_t fuse_send_write(struct fuse_req *req, struct file *file, 455static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
438 struct inode *inode, loff_t pos, size_t count) 456 struct inode *inode, loff_t pos, size_t count,
457 int writepage)
439{ 458{
440 struct fuse_conn *fc = get_fuse_conn(inode); 459 struct fuse_conn *fc = get_fuse_conn(inode);
441 struct fuse_file *ff = file->private_data; 460 struct fuse_write_in *inarg = &req->misc.write.in;
442 struct fuse_write_in inarg; 461 struct fuse_write_out *outarg = &req->misc.write.out;
443 struct fuse_write_out outarg;
444 462
445 memset(&inarg, 0, sizeof(struct fuse_write_in)); 463 memset(inarg, 0, sizeof(struct fuse_write_in));
446 inarg.fh = ff->fh; 464 inarg->fh = ff->fh;
447 inarg.offset = pos; 465 inarg->offset = pos;
448 inarg.size = count; 466 inarg->size = count;
467 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
449 req->in.h.opcode = FUSE_WRITE; 468 req->in.h.opcode = FUSE_WRITE;
450 req->in.h.nodeid = get_node_id(inode); 469 req->in.h.nodeid = get_node_id(inode);
451 req->in.argpages = 1; 470 req->in.argpages = 1;
452 req->in.numargs = 2; 471 req->in.numargs = 2;
453 req->in.args[0].size = sizeof(struct fuse_write_in); 472 if (fc->minor < 9)
454 req->in.args[0].value = &inarg; 473 req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
474 else
475 req->in.args[0].size = sizeof(struct fuse_write_in);
476 req->in.args[0].value = inarg;
455 req->in.args[1].size = count; 477 req->in.args[1].size = count;
456 req->out.numargs = 1; 478 req->out.numargs = 1;
457 req->out.args[0].size = sizeof(struct fuse_write_out); 479 req->out.args[0].size = sizeof(struct fuse_write_out);
458 req->out.args[0].value = &outarg; 480 req->out.args[0].value = outarg;
481}
482
483static size_t fuse_send_write(struct fuse_req *req, struct file *file,
484 struct inode *inode, loff_t pos, size_t count,
485 fl_owner_t owner)
486{
487 struct fuse_conn *fc = get_fuse_conn(inode);
488 fuse_write_fill(req, file->private_data, inode, pos, count, 0);
489 if (owner != NULL) {
490 struct fuse_write_in *inarg = &req->misc.write.in;
491 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
492 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
493 }
459 request_send(fc, req); 494 request_send(fc, req);
460 return outarg.size; 495 return req->misc.write.out.size;
461} 496}
462 497
463static int fuse_write_begin(struct file *file, struct address_space *mapping, 498static int fuse_write_begin(struct file *file, struct address_space *mapping,
@@ -478,6 +513,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
478 int err; 513 int err;
479 size_t nres; 514 size_t nres;
480 struct fuse_conn *fc = get_fuse_conn(inode); 515 struct fuse_conn *fc = get_fuse_conn(inode);
516 struct fuse_inode *fi = get_fuse_inode(inode);
481 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 517 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
482 struct fuse_req *req; 518 struct fuse_req *req;
483 519
@@ -491,7 +527,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
491 req->num_pages = 1; 527 req->num_pages = 1;
492 req->pages[0] = page; 528 req->pages[0] = page;
493 req->page_offset = offset; 529 req->page_offset = offset;
494 nres = fuse_send_write(req, file, inode, pos, count); 530 nres = fuse_send_write(req, file, inode, pos, count, NULL);
495 err = req->out.h.error; 531 err = req->out.h.error;
496 fuse_put_request(fc, req); 532 fuse_put_request(fc, req);
497 if (!err && !nres) 533 if (!err && !nres)
@@ -499,6 +535,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
499 if (!err) { 535 if (!err) {
500 pos += nres; 536 pos += nres;
501 spin_lock(&fc->lock); 537 spin_lock(&fc->lock);
538 fi->attr_version = ++fc->attr_version;
502 if (pos > inode->i_size) 539 if (pos > inode->i_size)
503 i_size_write(inode, pos); 540 i_size_write(inode, pos);
504 spin_unlock(&fc->lock); 541 spin_unlock(&fc->lock);
@@ -591,9 +628,11 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
591 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 628 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
592 nbytes = min(count, nbytes); 629 nbytes = min(count, nbytes);
593 if (write) 630 if (write)
594 nres = fuse_send_write(req, file, inode, pos, nbytes); 631 nres = fuse_send_write(req, file, inode, pos, nbytes,
632 current->files);
595 else 633 else
596 nres = fuse_send_read(req, file, inode, pos, nbytes); 634 nres = fuse_send_read(req, file, inode, pos, nbytes,
635 current->files);
597 fuse_release_user_pages(req, !write); 636 fuse_release_user_pages(req, !write);
598 if (req->out.h.error) { 637 if (req->out.h.error) {
599 if (!res) 638 if (!res)
@@ -695,7 +734,8 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
695} 734}
696 735
697static void fuse_lk_fill(struct fuse_req *req, struct file *file, 736static void fuse_lk_fill(struct fuse_req *req, struct file *file,
698 const struct file_lock *fl, int opcode, pid_t pid) 737 const struct file_lock *fl, int opcode, pid_t pid,
738 int flock)
699{ 739{
700 struct inode *inode = file->f_path.dentry->d_inode; 740 struct inode *inode = file->f_path.dentry->d_inode;
701 struct fuse_conn *fc = get_fuse_conn(inode); 741 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -708,6 +748,8 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file,
708 arg->lk.end = fl->fl_end; 748 arg->lk.end = fl->fl_end;
709 arg->lk.type = fl->fl_type; 749 arg->lk.type = fl->fl_type;
710 arg->lk.pid = pid; 750 arg->lk.pid = pid;
751 if (flock)
752 arg->lk_flags |= FUSE_LK_FLOCK;
711 req->in.h.opcode = opcode; 753 req->in.h.opcode = opcode;
712 req->in.h.nodeid = get_node_id(inode); 754 req->in.h.nodeid = get_node_id(inode);
713 req->in.numargs = 1; 755 req->in.numargs = 1;
@@ -727,7 +769,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
727 if (IS_ERR(req)) 769 if (IS_ERR(req))
728 return PTR_ERR(req); 770 return PTR_ERR(req);
729 771
730 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0); 772 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
731 req->out.numargs = 1; 773 req->out.numargs = 1;
732 req->out.args[0].size = sizeof(outarg); 774 req->out.args[0].size = sizeof(outarg);
733 req->out.args[0].value = &outarg; 775 req->out.args[0].value = &outarg;
@@ -740,7 +782,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
740 return err; 782 return err;
741} 783}
742 784
743static int fuse_setlk(struct file *file, struct file_lock *fl) 785static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
744{ 786{
745 struct inode *inode = file->f_path.dentry->d_inode; 787 struct inode *inode = file->f_path.dentry->d_inode;
746 struct fuse_conn *fc = get_fuse_conn(inode); 788 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -757,7 +799,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl)
757 if (IS_ERR(req)) 799 if (IS_ERR(req))
758 return PTR_ERR(req); 800 return PTR_ERR(req);
759 801
760 fuse_lk_fill(req, file, fl, opcode, pid); 802 fuse_lk_fill(req, file, fl, opcode, pid, flock);
761 request_send(fc, req); 803 request_send(fc, req);
762 err = req->out.h.error; 804 err = req->out.h.error;
763 /* locking is restartable */ 805 /* locking is restartable */
@@ -783,8 +825,25 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
783 if (fc->no_lock) 825 if (fc->no_lock)
784 err = posix_lock_file_wait(file, fl); 826 err = posix_lock_file_wait(file, fl);
785 else 827 else
786 err = fuse_setlk(file, fl); 828 err = fuse_setlk(file, fl, 0);
829 }
830 return err;
831}
832
833static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
834{
835 struct inode *inode = file->f_path.dentry->d_inode;
836 struct fuse_conn *fc = get_fuse_conn(inode);
837 int err;
838
839 if (fc->no_lock) {
840 err = flock_lock_file_wait(file, fl);
841 } else {
842 /* emulate flock with POSIX locks */
843 fl->fl_owner = (fl_owner_t) file;
844 err = fuse_setlk(file, fl, 1);
787 } 845 }
846
788 return err; 847 return err;
789} 848}
790 849
@@ -836,6 +895,7 @@ static const struct file_operations fuse_file_operations = {
836 .release = fuse_release, 895 .release = fuse_release,
837 .fsync = fuse_fsync, 896 .fsync = fuse_fsync,
838 .lock = fuse_file_lock, 897 .lock = fuse_file_lock,
898 .flock = fuse_file_flock,
839 .splice_read = generic_file_splice_read, 899 .splice_read = generic_file_splice_read,
840}; 900};
841 901
@@ -848,6 +908,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
848 .release = fuse_release, 908 .release = fuse_release,
849 .fsync = fuse_fsync, 909 .fsync = fuse_fsync,
850 .lock = fuse_file_lock, 910 .lock = fuse_file_lock,
911 .flock = fuse_file_flock,
851 /* no mmap and splice_read */ 912 /* no mmap and splice_read */
852}; 913};
853 914
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1764506fdd11..6c5461de1a5f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -67,6 +67,12 @@ struct fuse_inode {
67 /** The sticky bit in inode->i_mode may have been removed, so 67 /** The sticky bit in inode->i_mode may have been removed, so
68 preserve the original mode */ 68 preserve the original mode */
69 mode_t orig_i_mode; 69 mode_t orig_i_mode;
70
71 /** Version of last attribute change */
72 u64 attr_version;
73
74 /** Files usable in writepage. Protected by fc->lock */
75 struct list_head write_files;
70}; 76};
71 77
72/** FUSE specific file data */ 78/** FUSE specific file data */
@@ -79,6 +85,9 @@ struct fuse_file {
79 85
80 /** Refcount */ 86 /** Refcount */
81 atomic_t count; 87 atomic_t count;
88
89 /** Entry on inode's write_files list */
90 struct list_head write_entry;
82}; 91};
83 92
84/** One input argument of a request */ 93/** One input argument of a request */
@@ -210,6 +219,10 @@ struct fuse_req {
210 struct fuse_init_in init_in; 219 struct fuse_init_in init_in;
211 struct fuse_init_out init_out; 220 struct fuse_init_out init_out;
212 struct fuse_read_in read_in; 221 struct fuse_read_in read_in;
222 struct {
223 struct fuse_write_in in;
224 struct fuse_write_out out;
225 } write;
213 struct fuse_lk_in lk_in; 226 struct fuse_lk_in lk_in;
214 } misc; 227 } misc;
215 228
@@ -317,6 +330,9 @@ struct fuse_conn {
317 /** Do readpages asynchronously? Only set in INIT */ 330 /** Do readpages asynchronously? Only set in INIT */
318 unsigned async_read : 1; 331 unsigned async_read : 1;
319 332
333 /** Do not send separate SETATTR request before open(O_TRUNC) */
334 unsigned atomic_o_trunc : 1;
335
320 /* 336 /*
321 * The following bitfields are only for optimization purposes 337 * The following bitfields are only for optimization purposes
322 * and hence races in setting them will not cause malfunction 338 * and hence races in setting them will not cause malfunction
@@ -387,6 +403,9 @@ struct fuse_conn {
387 403
388 /** Reserved request for the DESTROY message */ 404 /** Reserved request for the DESTROY message */
389 struct fuse_req *destroy_req; 405 struct fuse_req *destroy_req;
406
407 /** Version counter for attribute changes */
408 u64 attr_version;
390}; 409};
391 410
392static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 411static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -416,7 +435,8 @@ extern const struct file_operations fuse_dev_operations;
416 * Get a filled in inode 435 * Get a filled in inode
417 */ 436 */
418struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 437struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
419 int generation, struct fuse_attr *attr); 438 int generation, struct fuse_attr *attr,
439 u64 attr_valid, u64 attr_version);
420 440
421/** 441/**
422 * Send FORGET command 442 * Send FORGET command
@@ -477,7 +497,8 @@ void fuse_init_symlink(struct inode *inode);
477/** 497/**
478 * Change attributes of an inode 498 * Change attributes of an inode
479 */ 499 */
480void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr); 500void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
501 u64 attr_valid, u64 attr_version);
481 502
482/** 503/**
483 * Initialize the client device 504 * Initialize the client device
@@ -565,3 +586,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
565 * Is file type valid? 586 * Is file type valid?
566 */ 587 */
567int fuse_valid_type(int m); 588int fuse_valid_type(int m);
589
590/**
591 * Is task allowed to perform filesystem operation?
592 */
593int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
594
595u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd0735715c14..9a68d6970845 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -56,6 +56,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
56 fi->i_time = 0; 56 fi->i_time = 0;
57 fi->nodeid = 0; 57 fi->nodeid = 0;
58 fi->nlookup = 0; 58 fi->nlookup = 0;
59 INIT_LIST_HEAD(&fi->write_files);
59 fi->forget_req = fuse_request_alloc(); 60 fi->forget_req = fuse_request_alloc();
60 if (!fi->forget_req) { 61 if (!fi->forget_req) {
61 kmem_cache_free(fuse_inode_cachep, inode); 62 kmem_cache_free(fuse_inode_cachep, inode);
@@ -68,6 +69,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
68static void fuse_destroy_inode(struct inode *inode) 69static void fuse_destroy_inode(struct inode *inode)
69{ 70{
70 struct fuse_inode *fi = get_fuse_inode(inode); 71 struct fuse_inode *fi = get_fuse_inode(inode);
72 BUG_ON(!list_empty(&fi->write_files));
71 if (fi->forget_req) 73 if (fi->forget_req)
72 fuse_request_free(fi->forget_req); 74 fuse_request_free(fi->forget_req);
73 kmem_cache_free(fuse_inode_cachep, inode); 75 kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,12 +119,22 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
117 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 119 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
118} 120}
119 121
120void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) 122
123void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
124 u64 attr_valid, u64 attr_version)
121{ 125{
122 struct fuse_conn *fc = get_fuse_conn(inode); 126 struct fuse_conn *fc = get_fuse_conn(inode);
123 struct fuse_inode *fi = get_fuse_inode(inode); 127 struct fuse_inode *fi = get_fuse_inode(inode);
124 loff_t oldsize; 128 loff_t oldsize;
125 129
130 spin_lock(&fc->lock);
131 if (attr_version != 0 && fi->attr_version > attr_version) {
132 spin_unlock(&fc->lock);
133 return;
134 }
135 fi->attr_version = ++fc->attr_version;
136 fi->i_time = attr_valid;
137
126 inode->i_ino = attr->ino; 138 inode->i_ino = attr->ino;
127 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 139 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
128 inode->i_nlink = attr->nlink; 140 inode->i_nlink = attr->nlink;
@@ -136,6 +148,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
136 inode->i_ctime.tv_sec = attr->ctime; 148 inode->i_ctime.tv_sec = attr->ctime;
137 inode->i_ctime.tv_nsec = attr->ctimensec; 149 inode->i_ctime.tv_nsec = attr->ctimensec;
138 150
151 if (attr->blksize != 0)
152 inode->i_blkbits = ilog2(attr->blksize);
153 else
154 inode->i_blkbits = inode->i_sb->s_blocksize_bits;
155
139 /* 156 /*
140 * Don't set the sticky bit in i_mode, unless we want the VFS 157 * Don't set the sticky bit in i_mode, unless we want the VFS
141 * to check permissions. This prevents failures due to the 158 * to check permissions. This prevents failures due to the
@@ -145,7 +162,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
145 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 162 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
146 inode->i_mode &= ~S_ISVTX; 163 inode->i_mode &= ~S_ISVTX;
147 164
148 spin_lock(&fc->lock);
149 oldsize = inode->i_size; 165 oldsize = inode->i_size;
150 i_size_write(inode, attr->size); 166 i_size_write(inode, attr->size);
151 spin_unlock(&fc->lock); 167 spin_unlock(&fc->lock);
@@ -194,7 +210,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
194} 210}
195 211
196struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 212struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
197 int generation, struct fuse_attr *attr) 213 int generation, struct fuse_attr *attr,
214 u64 attr_valid, u64 attr_version)
198{ 215{
199 struct inode *inode; 216 struct inode *inode;
200 struct fuse_inode *fi; 217 struct fuse_inode *fi;
@@ -222,7 +239,8 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
222 spin_lock(&fc->lock); 239 spin_lock(&fc->lock);
223 fi->nlookup ++; 240 fi->nlookup ++;
224 spin_unlock(&fc->lock); 241 spin_unlock(&fc->lock);
225 fuse_change_attributes(inode, attr); 242 fuse_change_attributes(inode, attr, attr_valid, attr_version);
243
226 return inode; 244 return inode;
227} 245}
228 246
@@ -287,6 +305,11 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
287 struct fuse_statfs_out outarg; 305 struct fuse_statfs_out outarg;
288 int err; 306 int err;
289 307
308 if (!fuse_allow_task(fc, current)) {
309 buf->f_type = FUSE_SUPER_MAGIC;
310 return 0;
311 }
312
290 req = fuse_get_req(fc); 313 req = fuse_get_req(fc);
291 if (IS_ERR(req)) 314 if (IS_ERR(req))
292 return PTR_ERR(req); 315 return PTR_ERR(req);
@@ -452,6 +475,7 @@ static struct fuse_conn *new_conn(void)
452 } 475 }
453 fc->reqctr = 0; 476 fc->reqctr = 0;
454 fc->blocked = 1; 477 fc->blocked = 1;
478 fc->attr_version = 1;
455 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 479 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
456 } 480 }
457out: 481out:
@@ -483,7 +507,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
483 attr.mode = mode; 507 attr.mode = mode;
484 attr.ino = FUSE_ROOT_ID; 508 attr.ino = FUSE_ROOT_ID;
485 attr.nlink = 1; 509 attr.nlink = 1;
486 return fuse_iget(sb, 1, 0, &attr); 510 return fuse_iget(sb, 1, 0, &attr, 0, 0);
487} 511}
488 512
489static const struct super_operations fuse_super_operations = { 513static const struct super_operations fuse_super_operations = {
@@ -514,6 +538,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
514 fc->async_read = 1; 538 fc->async_read = 1;
515 if (!(arg->flags & FUSE_POSIX_LOCKS)) 539 if (!(arg->flags & FUSE_POSIX_LOCKS))
516 fc->no_lock = 1; 540 fc->no_lock = 1;
541 if (arg->flags & FUSE_ATOMIC_O_TRUNC)
542 fc->atomic_o_trunc = 1;
517 } else { 543 } else {
518 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 544 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
519 fc->no_lock = 1; 545 fc->no_lock = 1;
@@ -536,7 +562,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
536 arg->major = FUSE_KERNEL_VERSION; 562 arg->major = FUSE_KERNEL_VERSION;
537 arg->minor = FUSE_KERNEL_MINOR_VERSION; 563 arg->minor = FUSE_KERNEL_MINOR_VERSION;
538 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 564 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
539 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; 565 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_FILE_OPS |
566 FUSE_ATOMIC_O_TRUNC;
540 req->in.h.opcode = FUSE_INIT; 567 req->in.h.opcode = FUSE_INIT;
541 req->in.numargs = 1; 568 req->in.numargs = 1;
542 req->in.args[0].size = sizeof(*arg); 569 req->in.args[0].size = sizeof(*arg);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a003d50edcdb..a263d82761df 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -375,7 +375,7 @@ void journal_commit_transaction(journal_t *journal)
375 struct buffer_head *bh = jh2bh(jh); 375 struct buffer_head *bh = jh2bh(jh);
376 376
377 jbd_lock_bh_state(bh); 377 jbd_lock_bh_state(bh);
378 jbd_slab_free(jh->b_committed_data, bh->b_size); 378 jbd_free(jh->b_committed_data, bh->b_size);
379 jh->b_committed_data = NULL; 379 jh->b_committed_data = NULL;
380 jbd_unlock_bh_state(bh); 380 jbd_unlock_bh_state(bh);
381 } 381 }
@@ -792,14 +792,14 @@ restart_loop:
792 * Otherwise, we can just throw away the frozen data now. 792 * Otherwise, we can just throw away the frozen data now.
793 */ 793 */
794 if (jh->b_committed_data) { 794 if (jh->b_committed_data) {
795 jbd_slab_free(jh->b_committed_data, bh->b_size); 795 jbd_free(jh->b_committed_data, bh->b_size);
796 jh->b_committed_data = NULL; 796 jh->b_committed_data = NULL;
797 if (jh->b_frozen_data) { 797 if (jh->b_frozen_data) {
798 jh->b_committed_data = jh->b_frozen_data; 798 jh->b_committed_data = jh->b_frozen_data;
799 jh->b_frozen_data = NULL; 799 jh->b_frozen_data = NULL;
800 } 800 }
801 } else if (jh->b_frozen_data) { 801 } else if (jh->b_frozen_data) {
802 jbd_slab_free(jh->b_frozen_data, bh->b_size); 802 jbd_free(jh->b_frozen_data, bh->b_size);
803 jh->b_frozen_data = NULL; 803 jh->b_frozen_data = NULL;
804 } 804 }
805 805
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a6be78c05dce..5d9fec0b7ebd 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);
83 83
84static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 84static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
85static void __journal_abort_soft (journal_t *journal, int errno); 85static void __journal_abort_soft (journal_t *journal, int errno);
86static int journal_create_jbd_slab(size_t slab_size);
87 86
88/* 87/*
89 * Helper function used to manage commit timeouts 88 * Helper function used to manage commit timeouts
@@ -218,7 +217,7 @@ static int journal_start_thread(journal_t *journal)
218 if (IS_ERR(t)) 217 if (IS_ERR(t))
219 return PTR_ERR(t); 218 return PTR_ERR(t);
220 219
221 wait_event(journal->j_wait_done_commit, journal->j_task != 0); 220 wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
222 return 0; 221 return 0;
223} 222}
224 223
@@ -230,7 +229,8 @@ static void journal_kill_thread(journal_t *journal)
230 while (journal->j_task) { 229 while (journal->j_task) {
231 wake_up(&journal->j_wait_commit); 230 wake_up(&journal->j_wait_commit);
232 spin_unlock(&journal->j_state_lock); 231 spin_unlock(&journal->j_state_lock);
233 wait_event(journal->j_wait_done_commit, journal->j_task == 0); 232 wait_event(journal->j_wait_done_commit,
233 journal->j_task == NULL);
234 spin_lock(&journal->j_state_lock); 234 spin_lock(&journal->j_state_lock);
235 } 235 }
236 spin_unlock(&journal->j_state_lock); 236 spin_unlock(&journal->j_state_lock);
@@ -334,10 +334,10 @@ repeat:
334 char *tmp; 334 char *tmp;
335 335
336 jbd_unlock_bh_state(bh_in); 336 jbd_unlock_bh_state(bh_in);
337 tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); 337 tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
338 jbd_lock_bh_state(bh_in); 338 jbd_lock_bh_state(bh_in);
339 if (jh_in->b_frozen_data) { 339 if (jh_in->b_frozen_data) {
340 jbd_slab_free(tmp, bh_in->b_size); 340 jbd_free(tmp, bh_in->b_size);
341 goto repeat; 341 goto repeat;
342 } 342 }
343 343
@@ -654,7 +654,7 @@ static journal_t * journal_init_common (void)
654 journal_t *journal; 654 journal_t *journal;
655 int err; 655 int err;
656 656
657 journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); 657 journal = kmalloc(sizeof(*journal), GFP_KERNEL);
658 if (!journal) 658 if (!journal)
659 goto fail; 659 goto fail;
660 memset(journal, 0, sizeof(*journal)); 660 memset(journal, 0, sizeof(*journal));
@@ -1095,13 +1095,6 @@ int journal_load(journal_t *journal)
1095 } 1095 }
1096 } 1096 }
1097 1097
1098 /*
1099 * Create a slab for this blocksize
1100 */
1101 err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
1102 if (err)
1103 return err;
1104
1105 /* Let the recovery code check whether it needs to recover any 1098 /* Let the recovery code check whether it needs to recover any
1106 * data from the journal. */ 1099 * data from the journal. */
1107 if (journal_recover(journal)) 1100 if (journal_recover(journal))
@@ -1615,86 +1608,6 @@ int journal_blocks_per_page(struct inode *inode)
1615} 1608}
1616 1609
1617/* 1610/*
1618 * Simple support for retrying memory allocations. Introduced to help to
1619 * debug different VM deadlock avoidance strategies.
1620 */
1621void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1622{
1623 return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
1624}
1625
1626/*
1627 * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
1628 * and allocate frozen and commit buffers from these slabs.
1629 *
1630 * Reason for doing this is to avoid, SLAB_DEBUG - since it could
1631 * cause bh to cross page boundary.
1632 */
1633
1634#define JBD_MAX_SLABS 5
1635#define JBD_SLAB_INDEX(size) (size >> 11)
1636
1637static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
1638static const char *jbd_slab_names[JBD_MAX_SLABS] = {
1639 "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
1640};
1641
1642static void journal_destroy_jbd_slabs(void)
1643{
1644 int i;
1645
1646 for (i = 0; i < JBD_MAX_SLABS; i++) {
1647 if (jbd_slab[i])
1648 kmem_cache_destroy(jbd_slab[i]);
1649 jbd_slab[i] = NULL;
1650 }
1651}
1652
1653static int journal_create_jbd_slab(size_t slab_size)
1654{
1655 int i = JBD_SLAB_INDEX(slab_size);
1656
1657 BUG_ON(i >= JBD_MAX_SLABS);
1658
1659 /*
1660 * Check if we already have a slab created for this size
1661 */
1662 if (jbd_slab[i])
1663 return 0;
1664
1665 /*
1666 * Create a slab and force alignment to be same as slabsize -
1667 * this will make sure that allocations won't cross the page
1668 * boundary.
1669 */
1670 jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
1671 slab_size, slab_size, 0, NULL);
1672 if (!jbd_slab[i]) {
1673 printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
1674 return -ENOMEM;
1675 }
1676 return 0;
1677}
1678
1679void * jbd_slab_alloc(size_t size, gfp_t flags)
1680{
1681 int idx;
1682
1683 idx = JBD_SLAB_INDEX(size);
1684 BUG_ON(jbd_slab[idx] == NULL);
1685 return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
1686}
1687
1688void jbd_slab_free(void *ptr, size_t size)
1689{
1690 int idx;
1691
1692 idx = JBD_SLAB_INDEX(size);
1693 BUG_ON(jbd_slab[idx] == NULL);
1694 kmem_cache_free(jbd_slab[idx], ptr);
1695}
1696
1697/*
1698 * Journal_head storage management 1611 * Journal_head storage management
1699 */ 1612 */
1700static struct kmem_cache *journal_head_cache; 1613static struct kmem_cache *journal_head_cache;
@@ -1739,14 +1652,14 @@ static struct journal_head *journal_alloc_journal_head(void)
1739 atomic_inc(&nr_journal_heads); 1652 atomic_inc(&nr_journal_heads);
1740#endif 1653#endif
1741 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1654 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
1742 if (ret == 0) { 1655 if (ret == NULL) {
1743 jbd_debug(1, "out of memory for journal_head\n"); 1656 jbd_debug(1, "out of memory for journal_head\n");
1744 if (time_after(jiffies, last_warning + 5*HZ)) { 1657 if (time_after(jiffies, last_warning + 5*HZ)) {
1745 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1658 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1746 __FUNCTION__); 1659 __FUNCTION__);
1747 last_warning = jiffies; 1660 last_warning = jiffies;
1748 } 1661 }
1749 while (ret == 0) { 1662 while (ret == NULL) {
1750 yield(); 1663 yield();
1751 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1664 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
1752 } 1665 }
@@ -1881,13 +1794,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1881 printk(KERN_WARNING "%s: freeing " 1794 printk(KERN_WARNING "%s: freeing "
1882 "b_frozen_data\n", 1795 "b_frozen_data\n",
1883 __FUNCTION__); 1796 __FUNCTION__);
1884 jbd_slab_free(jh->b_frozen_data, bh->b_size); 1797 jbd_free(jh->b_frozen_data, bh->b_size);
1885 } 1798 }
1886 if (jh->b_committed_data) { 1799 if (jh->b_committed_data) {
1887 printk(KERN_WARNING "%s: freeing " 1800 printk(KERN_WARNING "%s: freeing "
1888 "b_committed_data\n", 1801 "b_committed_data\n",
1889 __FUNCTION__); 1802 __FUNCTION__);
1890 jbd_slab_free(jh->b_committed_data, bh->b_size); 1803 jbd_free(jh->b_committed_data, bh->b_size);
1891 } 1804 }
1892 bh->b_private = NULL; 1805 bh->b_private = NULL;
1893 jh->b_bh = NULL; /* debug, really */ 1806 jh->b_bh = NULL; /* debug, really */
@@ -2042,7 +1955,6 @@ static void journal_destroy_caches(void)
2042 journal_destroy_revoke_caches(); 1955 journal_destroy_revoke_caches();
2043 journal_destroy_journal_head_cache(); 1956 journal_destroy_journal_head_cache();
2044 journal_destroy_handle_cache(); 1957 journal_destroy_handle_cache();
2045 journal_destroy_jbd_slabs();
2046} 1958}
2047 1959
2048static int __init journal_init(void) 1960static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8df5bac0b7a5..9841b1e5af03 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
96 96
97alloc_transaction: 97alloc_transaction:
98 if (!journal->j_running_transaction) { 98 if (!journal->j_running_transaction) {
99 new_transaction = jbd_kmalloc(sizeof(*new_transaction), 99 new_transaction = kmalloc(sizeof(*new_transaction),
100 GFP_NOFS); 100 GFP_NOFS|__GFP_NOFAIL);
101 if (!new_transaction) { 101 if (!new_transaction) {
102 ret = -ENOMEM; 102 ret = -ENOMEM;
103 goto out; 103 goto out;
@@ -675,7 +675,7 @@ repeat:
675 JBUFFER_TRACE(jh, "allocate memory for buffer"); 675 JBUFFER_TRACE(jh, "allocate memory for buffer");
676 jbd_unlock_bh_state(bh); 676 jbd_unlock_bh_state(bh);
677 frozen_buffer = 677 frozen_buffer =
678 jbd_slab_alloc(jh2bh(jh)->b_size, 678 jbd_alloc(jh2bh(jh)->b_size,
679 GFP_NOFS); 679 GFP_NOFS);
680 if (!frozen_buffer) { 680 if (!frozen_buffer) {
681 printk(KERN_EMERG 681 printk(KERN_EMERG
@@ -735,7 +735,7 @@ done:
735 735
736out: 736out:
737 if (unlikely(frozen_buffer)) /* It's usually NULL */ 737 if (unlikely(frozen_buffer)) /* It's usually NULL */
738 jbd_slab_free(frozen_buffer, bh->b_size); 738 jbd_free(frozen_buffer, bh->b_size);
739 739
740 JBUFFER_TRACE(jh, "exit"); 740 JBUFFER_TRACE(jh, "exit");
741 return error; 741 return error;
@@ -888,7 +888,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
888 888
889repeat: 889repeat:
890 if (!jh->b_committed_data) { 890 if (!jh->b_committed_data) {
891 committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); 891 committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
892 if (!committed_data) { 892 if (!committed_data) {
893 printk(KERN_EMERG "%s: No memory for committed data\n", 893 printk(KERN_EMERG "%s: No memory for committed data\n",
894 __FUNCTION__); 894 __FUNCTION__);
@@ -915,7 +915,7 @@ repeat:
915out: 915out:
916 journal_put_journal_head(jh); 916 journal_put_journal_head(jh);
917 if (unlikely(committed_data)) 917 if (unlikely(committed_data))
918 jbd_slab_free(committed_data, bh->b_size); 918 jbd_free(committed_data, bh->b_size);
919 return err; 919 return err;
920} 920}
921 921
@@ -1172,7 +1172,7 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1172 } 1172 }
1173 1173
1174 /* That test should have eliminated the following case: */ 1174 /* That test should have eliminated the following case: */
1175 J_ASSERT_JH(jh, jh->b_frozen_data == 0); 1175 J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
1176 1176
1177 JBUFFER_TRACE(jh, "file as BJ_Metadata"); 1177 JBUFFER_TRACE(jh, "file as BJ_Metadata");
1178 spin_lock(&journal->j_list_lock); 1178 spin_lock(&journal->j_list_lock);
@@ -1522,7 +1522,7 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
1522 1522
1523 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1523 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1524 if (jh->b_jlist != BJ_None) 1524 if (jh->b_jlist != BJ_None)
1525 J_ASSERT_JH(jh, transaction != 0); 1525 J_ASSERT_JH(jh, transaction != NULL);
1526 1526
1527 switch (jh->b_jlist) { 1527 switch (jh->b_jlist) {
1528 case BJ_None: 1528 case BJ_None:
@@ -1591,11 +1591,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1591 if (buffer_locked(bh) || buffer_dirty(bh)) 1591 if (buffer_locked(bh) || buffer_dirty(bh))
1592 goto out; 1592 goto out;
1593 1593
1594 if (jh->b_next_transaction != 0) 1594 if (jh->b_next_transaction != NULL)
1595 goto out; 1595 goto out;
1596 1596
1597 spin_lock(&journal->j_list_lock); 1597 spin_lock(&journal->j_list_lock);
1598 if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { 1598 if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
1599 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { 1599 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
1600 /* A written-back ordered data buffer */ 1600 /* A written-back ordered data buffer */
1601 JBUFFER_TRACE(jh, "release data"); 1601 JBUFFER_TRACE(jh, "release data");
@@ -1603,7 +1603,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1603 journal_remove_journal_head(bh); 1603 journal_remove_journal_head(bh);
1604 __brelse(bh); 1604 __brelse(bh);
1605 } 1605 }
1606 } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { 1606 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1607 /* written-back checkpointed metadata buffer */ 1607 /* written-back checkpointed metadata buffer */
1608 if (jh->b_jlist == BJ_None) { 1608 if (jh->b_jlist == BJ_None) {
1609 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1609 JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1963,7 +1963,7 @@ void __journal_file_buffer(struct journal_head *jh,
1963 1963
1964 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1964 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1965 J_ASSERT_JH(jh, jh->b_transaction == transaction || 1965 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1966 jh->b_transaction == 0); 1966 jh->b_transaction == NULL);
1967 1967
1968 if (jh->b_transaction && jh->b_jlist == jlist) 1968 if (jh->b_transaction && jh->b_jlist == jlist)
1969 return; 1969 return;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c0f59d1b13dc..6986f334c643 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -278,7 +278,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
278 unsigned long long block) 278 unsigned long long block)
279{ 279{
280 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 280 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
281 if (tag_bytes > JBD_TAG_SIZE32) 281 if (tag_bytes > JBD2_TAG_SIZE32)
282 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 282 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
283} 283}
284 284
@@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
384 struct buffer_head *bh = jh2bh(jh); 384 struct buffer_head *bh = jh2bh(jh);
385 385
386 jbd_lock_bh_state(bh); 386 jbd_lock_bh_state(bh);
387 jbd2_slab_free(jh->b_committed_data, bh->b_size); 387 jbd2_free(jh->b_committed_data, bh->b_size);
388 jh->b_committed_data = NULL; 388 jh->b_committed_data = NULL;
389 jbd_unlock_bh_state(bh); 389 jbd_unlock_bh_state(bh);
390 } 390 }
@@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
475 spin_unlock(&journal->j_list_lock); 475 spin_unlock(&journal->j_list_lock);
476 476
477 if (err) 477 if (err)
478 __jbd2_journal_abort_hard(journal); 478 jbd2_journal_abort(journal, err);
479 479
480 jbd2_journal_write_revoke_records(journal, commit_transaction); 480 jbd2_journal_write_revoke_records(journal, commit_transaction);
481 481
@@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
533 533
534 descriptor = jbd2_journal_get_descriptor_buffer(journal); 534 descriptor = jbd2_journal_get_descriptor_buffer(journal);
535 if (!descriptor) { 535 if (!descriptor) {
536 __jbd2_journal_abort_hard(journal); 536 jbd2_journal_abort(journal, -EIO);
537 continue; 537 continue;
538 } 538 }
539 539
@@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
566 and repeat this loop: we'll fall into the 566 and repeat this loop: we'll fall into the
567 refile-on-abort condition above. */ 567 refile-on-abort condition above. */
568 if (err) { 568 if (err) {
569 __jbd2_journal_abort_hard(journal); 569 jbd2_journal_abort(journal, err);
570 continue; 570 continue;
571 } 571 }
572 572
@@ -757,7 +757,7 @@ wait_for_iobuf:
757 err = -EIO; 757 err = -EIO;
758 758
759 if (err) 759 if (err)
760 __jbd2_journal_abort_hard(journal); 760 jbd2_journal_abort(journal, err);
761 761
762 /* End of a transaction! Finally, we can do checkpoint 762 /* End of a transaction! Finally, we can do checkpoint
763 processing: any buffers committed as a result of this 763 processing: any buffers committed as a result of this
@@ -801,14 +801,14 @@ restart_loop:
801 * Otherwise, we can just throw away the frozen data now. 801 * Otherwise, we can just throw away the frozen data now.
802 */ 802 */
803 if (jh->b_committed_data) { 803 if (jh->b_committed_data) {
804 jbd2_slab_free(jh->b_committed_data, bh->b_size); 804 jbd2_free(jh->b_committed_data, bh->b_size);
805 jh->b_committed_data = NULL; 805 jh->b_committed_data = NULL;
806 if (jh->b_frozen_data) { 806 if (jh->b_frozen_data) {
807 jh->b_committed_data = jh->b_frozen_data; 807 jh->b_committed_data = jh->b_frozen_data;
808 jh->b_frozen_data = NULL; 808 jh->b_frozen_data = NULL;
809 } 809 }
810 } else if (jh->b_frozen_data) { 810 } else if (jh->b_frozen_data) {
811 jbd2_slab_free(jh->b_frozen_data, bh->b_size); 811 jbd2_free(jh->b_frozen_data, bh->b_size);
812 jh->b_frozen_data = NULL; 812 jh->b_frozen_data = NULL;
813 } 813 }
814 814
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f37324aee817..6ddc5531587c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit);
84 84
85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
86static void __journal_abort_soft (journal_t *journal, int errno); 86static void __journal_abort_soft (journal_t *journal, int errno);
87static int jbd2_journal_create_jbd_slab(size_t slab_size);
88 87
89/* 88/*
90 * Helper function used to manage commit timeouts 89 * Helper function used to manage commit timeouts
@@ -335,10 +334,10 @@ repeat:
335 char *tmp; 334 char *tmp;
336 335
337 jbd_unlock_bh_state(bh_in); 336 jbd_unlock_bh_state(bh_in);
338 tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS); 337 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
339 jbd_lock_bh_state(bh_in); 338 jbd_lock_bh_state(bh_in);
340 if (jh_in->b_frozen_data) { 339 if (jh_in->b_frozen_data) {
341 jbd2_slab_free(tmp, bh_in->b_size); 340 jbd2_free(tmp, bh_in->b_size);
342 goto repeat; 341 goto repeat;
343 } 342 }
344 343
@@ -655,10 +654,9 @@ static journal_t * journal_init_common (void)
655 journal_t *journal; 654 journal_t *journal;
656 int err; 655 int err;
657 656
658 journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); 657 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
659 if (!journal) 658 if (!journal)
660 goto fail; 659 goto fail;
661 memset(journal, 0, sizeof(*journal));
662 660
663 init_waitqueue_head(&journal->j_wait_transaction_locked); 661 init_waitqueue_head(&journal->j_wait_transaction_locked);
664 init_waitqueue_head(&journal->j_wait_logspace); 662 init_waitqueue_head(&journal->j_wait_logspace);
@@ -672,7 +670,7 @@ static journal_t * journal_init_common (void)
672 spin_lock_init(&journal->j_list_lock); 670 spin_lock_init(&journal->j_list_lock);
673 spin_lock_init(&journal->j_state_lock); 671 spin_lock_init(&journal->j_state_lock);
674 672
675 journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE); 673 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
676 674
677 /* The journal is marked for error until we succeed with recovery! */ 675 /* The journal is marked for error until we succeed with recovery! */
678 journal->j_flags = JBD2_ABORT; 676 journal->j_flags = JBD2_ABORT;
@@ -1096,13 +1094,6 @@ int jbd2_journal_load(journal_t *journal)
1096 } 1094 }
1097 } 1095 }
1098 1096
1099 /*
1100 * Create a slab for this blocksize
1101 */
1102 err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
1103 if (err)
1104 return err;
1105
1106 /* Let the recovery code check whether it needs to recover any 1097 /* Let the recovery code check whether it needs to recover any
1107 * data from the journal. */ 1098 * data from the journal. */
1108 if (jbd2_journal_recover(journal)) 1099 if (jbd2_journal_recover(journal))
@@ -1621,89 +1612,9 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
1621size_t journal_tag_bytes(journal_t *journal) 1612size_t journal_tag_bytes(journal_t *journal)
1622{ 1613{
1623 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 1614 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
1624 return JBD_TAG_SIZE64; 1615 return JBD2_TAG_SIZE64;
1625 else 1616 else
1626 return JBD_TAG_SIZE32; 1617 return JBD2_TAG_SIZE32;
1627}
1628
1629/*
1630 * Simple support for retrying memory allocations. Introduced to help to
1631 * debug different VM deadlock avoidance strategies.
1632 */
1633void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1634{
1635 return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
1636}
1637
1638/*
1639 * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
1640 * and allocate frozen and commit buffers from these slabs.
1641 *
1642 * Reason for doing this is to avoid, SLAB_DEBUG - since it could
1643 * cause bh to cross page boundary.
1644 */
1645
1646#define JBD_MAX_SLABS 5
1647#define JBD_SLAB_INDEX(size) (size >> 11)
1648
1649static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
1650static const char *jbd_slab_names[JBD_MAX_SLABS] = {
1651 "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
1652};
1653
1654static void jbd2_journal_destroy_jbd_slabs(void)
1655{
1656 int i;
1657
1658 for (i = 0; i < JBD_MAX_SLABS; i++) {
1659 if (jbd_slab[i])
1660 kmem_cache_destroy(jbd_slab[i]);
1661 jbd_slab[i] = NULL;
1662 }
1663}
1664
1665static int jbd2_journal_create_jbd_slab(size_t slab_size)
1666{
1667 int i = JBD_SLAB_INDEX(slab_size);
1668
1669 BUG_ON(i >= JBD_MAX_SLABS);
1670
1671 /*
1672 * Check if we already have a slab created for this size
1673 */
1674 if (jbd_slab[i])
1675 return 0;
1676
1677 /*
1678 * Create a slab and force alignment to be same as slabsize -
1679 * this will make sure that allocations won't cross the page
1680 * boundary.
1681 */
1682 jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
1683 slab_size, slab_size, 0, NULL);
1684 if (!jbd_slab[i]) {
1685 printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
1686 return -ENOMEM;
1687 }
1688 return 0;
1689}
1690
1691void * jbd2_slab_alloc(size_t size, gfp_t flags)
1692{
1693 int idx;
1694
1695 idx = JBD_SLAB_INDEX(size);
1696 BUG_ON(jbd_slab[idx] == NULL);
1697 return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
1698}
1699
1700void jbd2_slab_free(void *ptr, size_t size)
1701{
1702 int idx;
1703
1704 idx = JBD_SLAB_INDEX(size);
1705 BUG_ON(jbd_slab[idx] == NULL);
1706 kmem_cache_free(jbd_slab[idx], ptr);
1707} 1618}
1708 1619
1709/* 1620/*
@@ -1770,7 +1681,7 @@ static void journal_free_journal_head(struct journal_head *jh)
1770{ 1681{
1771#ifdef CONFIG_JBD2_DEBUG 1682#ifdef CONFIG_JBD2_DEBUG
1772 atomic_dec(&nr_journal_heads); 1683 atomic_dec(&nr_journal_heads);
1773 memset(jh, JBD_POISON_FREE, sizeof(*jh)); 1684 memset(jh, JBD2_POISON_FREE, sizeof(*jh));
1774#endif 1685#endif
1775 kmem_cache_free(jbd2_journal_head_cache, jh); 1686 kmem_cache_free(jbd2_journal_head_cache, jh);
1776} 1687}
@@ -1893,13 +1804,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1893 printk(KERN_WARNING "%s: freeing " 1804 printk(KERN_WARNING "%s: freeing "
1894 "b_frozen_data\n", 1805 "b_frozen_data\n",
1895 __FUNCTION__); 1806 __FUNCTION__);
1896 jbd2_slab_free(jh->b_frozen_data, bh->b_size); 1807 jbd2_free(jh->b_frozen_data, bh->b_size);
1897 } 1808 }
1898 if (jh->b_committed_data) { 1809 if (jh->b_committed_data) {
1899 printk(KERN_WARNING "%s: freeing " 1810 printk(KERN_WARNING "%s: freeing "
1900 "b_committed_data\n", 1811 "b_committed_data\n",
1901 __FUNCTION__); 1812 __FUNCTION__);
1902 jbd2_slab_free(jh->b_committed_data, bh->b_size); 1813 jbd2_free(jh->b_committed_data, bh->b_size);
1903 } 1814 }
1904 bh->b_private = NULL; 1815 bh->b_private = NULL;
1905 jh->b_bh = NULL; /* debug, really */ 1816 jh->b_bh = NULL; /* debug, really */
@@ -1953,16 +1864,14 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
1953/* 1864/*
1954 * debugfs tunables 1865 * debugfs tunables
1955 */ 1866 */
1956#if defined(CONFIG_JBD2_DEBUG) 1867#ifdef CONFIG_JBD2_DEBUG
1957u8 jbd2_journal_enable_debug; 1868u8 jbd2_journal_enable_debug __read_mostly;
1958EXPORT_SYMBOL(jbd2_journal_enable_debug); 1869EXPORT_SYMBOL(jbd2_journal_enable_debug);
1959#endif
1960
1961#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
1962 1870
1963#define JBD2_DEBUG_NAME "jbd2-debug" 1871#define JBD2_DEBUG_NAME "jbd2-debug"
1964 1872
1965struct dentry *jbd2_debugfs_dir, *jbd2_debug; 1873static struct dentry *jbd2_debugfs_dir;
1874static struct dentry *jbd2_debug;
1966 1875
1967static void __init jbd2_create_debugfs_entry(void) 1876static void __init jbd2_create_debugfs_entry(void)
1968{ 1877{
@@ -1975,24 +1884,18 @@ static void __init jbd2_create_debugfs_entry(void)
1975 1884
1976static void __exit jbd2_remove_debugfs_entry(void) 1885static void __exit jbd2_remove_debugfs_entry(void)
1977{ 1886{
1978 if (jbd2_debug) 1887 debugfs_remove(jbd2_debug);
1979 debugfs_remove(jbd2_debug); 1888 debugfs_remove(jbd2_debugfs_dir);
1980 if (jbd2_debugfs_dir)
1981 debugfs_remove(jbd2_debugfs_dir);
1982} 1889}
1983 1890
1984#else 1891#else
1985 1892
1986static void __init jbd2_create_debugfs_entry(void) 1893static void __init jbd2_create_debugfs_entry(void)
1987{ 1894{
1988 do {
1989 } while (0);
1990} 1895}
1991 1896
1992static void __exit jbd2_remove_debugfs_entry(void) 1897static void __exit jbd2_remove_debugfs_entry(void)
1993{ 1898{
1994 do {
1995 } while (0);
1996} 1899}
1997 1900
1998#endif 1901#endif
@@ -2040,7 +1943,6 @@ static void jbd2_journal_destroy_caches(void)
2040 jbd2_journal_destroy_revoke_caches(); 1943 jbd2_journal_destroy_revoke_caches();
2041 jbd2_journal_destroy_jbd2_journal_head_cache(); 1944 jbd2_journal_destroy_jbd2_journal_head_cache();
2042 jbd2_journal_destroy_handle_cache(); 1945 jbd2_journal_destroy_handle_cache();
2043 jbd2_journal_destroy_jbd_slabs();
2044} 1946}
2045 1947
2046static int __init journal_init(void) 1948static int __init journal_init(void)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b50be8a044eb..d0ce627539ef 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -311,7 +311,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
311static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) 311static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
312{ 312{
313 unsigned long long block = be32_to_cpu(tag->t_blocknr); 313 unsigned long long block = be32_to_cpu(tag->t_blocknr);
314 if (tag_bytes > JBD_TAG_SIZE32) 314 if (tag_bytes > JBD2_TAG_SIZE32)
315 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 315 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
316 return block; 316 return block;
317} 317}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 01d88975e0c5..3595fd432d5b 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -352,7 +352,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
352 if (bh) 352 if (bh)
353 BUFFER_TRACE(bh, "found on hash"); 353 BUFFER_TRACE(bh, "found on hash");
354 } 354 }
355#ifdef JBD_EXPENSIVE_CHECKING 355#ifdef JBD2_EXPENSIVE_CHECKING
356 else { 356 else {
357 struct buffer_head *bh2; 357 struct buffer_head *bh2;
358 358
@@ -453,7 +453,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
453 } 453 }
454 } 454 }
455 455
456#ifdef JBD_EXPENSIVE_CHECKING 456#ifdef JBD2_EXPENSIVE_CHECKING
457 /* There better not be one left behind by now! */ 457 /* There better not be one left behind by now! */
458 record = find_revoke_record(journal, bh->b_blocknr); 458 record = find_revoke_record(journal, bh->b_blocknr);
459 J_ASSERT_JH(jh, record == NULL); 459 J_ASSERT_JH(jh, record == NULL);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7946ff43fc40..b1fcf2b3dca3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
96 96
97alloc_transaction: 97alloc_transaction:
98 if (!journal->j_running_transaction) { 98 if (!journal->j_running_transaction) {
99 new_transaction = jbd_kmalloc(sizeof(*new_transaction), 99 new_transaction = kzalloc(sizeof(*new_transaction),
100 GFP_NOFS); 100 GFP_NOFS|__GFP_NOFAIL);
101 if (!new_transaction) { 101 if (!new_transaction) {
102 ret = -ENOMEM; 102 ret = -ENOMEM;
103 goto out; 103 goto out;
104 } 104 }
105 memset(new_transaction, 0, sizeof(*new_transaction));
106 } 105 }
107 106
108 jbd_debug(3, "New handle %p going live.\n", handle); 107 jbd_debug(3, "New handle %p going live.\n", handle);
@@ -236,7 +235,7 @@ out:
236/* Allocate a new handle. This should probably be in a slab... */ 235/* Allocate a new handle. This should probably be in a slab... */
237static handle_t *new_handle(int nblocks) 236static handle_t *new_handle(int nblocks)
238{ 237{
239 handle_t *handle = jbd_alloc_handle(GFP_NOFS); 238 handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
240 if (!handle) 239 if (!handle)
241 return NULL; 240 return NULL;
242 memset(handle, 0, sizeof(*handle)); 241 memset(handle, 0, sizeof(*handle));
@@ -282,7 +281,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
282 281
283 err = start_this_handle(journal, handle); 282 err = start_this_handle(journal, handle);
284 if (err < 0) { 283 if (err < 0) {
285 jbd_free_handle(handle); 284 jbd2_free_handle(handle);
286 current->journal_info = NULL; 285 current->journal_info = NULL;
287 handle = ERR_PTR(err); 286 handle = ERR_PTR(err);
288 } 287 }
@@ -668,7 +667,7 @@ repeat:
668 JBUFFER_TRACE(jh, "allocate memory for buffer"); 667 JBUFFER_TRACE(jh, "allocate memory for buffer");
669 jbd_unlock_bh_state(bh); 668 jbd_unlock_bh_state(bh);
670 frozen_buffer = 669 frozen_buffer =
671 jbd2_slab_alloc(jh2bh(jh)->b_size, 670 jbd2_alloc(jh2bh(jh)->b_size,
672 GFP_NOFS); 671 GFP_NOFS);
673 if (!frozen_buffer) { 672 if (!frozen_buffer) {
674 printk(KERN_EMERG 673 printk(KERN_EMERG
@@ -728,7 +727,7 @@ done:
728 727
729out: 728out:
730 if (unlikely(frozen_buffer)) /* It's usually NULL */ 729 if (unlikely(frozen_buffer)) /* It's usually NULL */
731 jbd2_slab_free(frozen_buffer, bh->b_size); 730 jbd2_free(frozen_buffer, bh->b_size);
732 731
733 JBUFFER_TRACE(jh, "exit"); 732 JBUFFER_TRACE(jh, "exit");
734 return error; 733 return error;
@@ -881,7 +880,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
881 880
882repeat: 881repeat:
883 if (!jh->b_committed_data) { 882 if (!jh->b_committed_data) {
884 committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); 883 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
885 if (!committed_data) { 884 if (!committed_data) {
886 printk(KERN_EMERG "%s: No memory for committed data\n", 885 printk(KERN_EMERG "%s: No memory for committed data\n",
887 __FUNCTION__); 886 __FUNCTION__);
@@ -908,7 +907,7 @@ repeat:
908out: 907out:
909 jbd2_journal_put_journal_head(jh); 908 jbd2_journal_put_journal_head(jh);
910 if (unlikely(committed_data)) 909 if (unlikely(committed_data))
911 jbd2_slab_free(committed_data, bh->b_size); 910 jbd2_free(committed_data, bh->b_size);
912 return err; 911 return err;
913} 912}
914 913
@@ -1411,7 +1410,7 @@ int jbd2_journal_stop(handle_t *handle)
1411 spin_unlock(&journal->j_state_lock); 1410 spin_unlock(&journal->j_state_lock);
1412 } 1411 }
1413 1412
1414 jbd_free_handle(handle); 1413 jbd2_free_handle(handle);
1415 return err; 1414 return err;
1416} 1415}
1417 1416
diff --git a/fs/namei.c b/fs/namei.c
index 464eeccb675b..1e5c71669164 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1659,8 +1659,10 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1659 error = locks_verify_locked(inode); 1659 error = locks_verify_locked(inode);
1660 if (!error) { 1660 if (!error) {
1661 DQUOT_INIT(inode); 1661 DQUOT_INIT(inode);
1662 1662
1663 error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); 1663 error = do_truncate(dentry, 0,
1664 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1665 NULL);
1664 } 1666 }
1665 put_write_access(inode); 1667 put_write_access(inode);
1666 if (error) 1668 if (error)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6c22453d77ae..6d2f2a3eccf8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -357,6 +357,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
357 357
358 nfs_inc_stats(inode, NFSIOS_VFSSETATTR); 358 nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
359 359
360 /* skip mode change if it's just for clearing setuid/setgid */
361 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
362 attr->ia_valid &= ~ATTR_MODE;
363
360 if (attr->ia_valid & ATTR_SIZE) { 364 if (attr->ia_valid & ATTR_SIZE) {
361 if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) 365 if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
362 attr->ia_valid &= ~ATTR_SIZE; 366 attr->ia_valid &= ~ATTR_SIZE;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 819545d21670..46934c97f8f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -364,14 +364,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
364 if (iap->ia_valid & ATTR_MODE) { 364 if (iap->ia_valid & ATTR_MODE) {
365 iap->ia_mode &= S_IALLUGO; 365 iap->ia_mode &= S_IALLUGO;
366 imode = iap->ia_mode |= (imode & ~S_IALLUGO); 366 imode = iap->ia_mode |= (imode & ~S_IALLUGO);
367 /* if changing uid/gid revoke setuid/setgid in mode */
368 if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) {
369 iap->ia_valid |= ATTR_KILL_PRIV;
370 iap->ia_mode &= ~S_ISUID;
371 }
372 if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
373 iap->ia_mode &= ~S_ISGID;
374 } else {
375 /*
376 * Revoke setuid/setgid bit on chown/chgrp
377 */
378 if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
379 iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
380 if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
381 iap->ia_valid |= ATTR_KILL_SGID;
367 } 382 }
368 383
369 /* Revoke setuid/setgid bit on chown/chgrp */
370 if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
371 iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
372 if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
373 iap->ia_valid |= ATTR_KILL_SGID;
374
375 /* Change the attributes. */ 384 /* Change the attributes. */
376 385
377 iap->ia_valid |= ATTR_CTIME; 386 iap->ia_valid |= ATTR_CTIME;
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index e7905816c4ca..64965e1c21c4 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -111,7 +111,7 @@ utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
111 int c, nc; 111 int c, nc;
112 const struct utf8_table *t; 112 const struct utf8_table *t;
113 113
114 if (s == 0) 114 if (!s)
115 return 0; 115 return 0;
116 116
117 l = wc; 117 l = wc;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9ea12004fa57..0804289d355d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3061,7 +3061,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3061{ 3061{
3062 struct inode *inode = dentry->d_inode; 3062 struct inode *inode = dentry->d_inode;
3063 int error; 3063 int error;
3064 unsigned int ia_valid = attr->ia_valid; 3064 unsigned int ia_valid;
3065
3066 /* must be turned off for recursive notify_change calls */
3067 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3068
3065 reiserfs_write_lock(inode->i_sb); 3069 reiserfs_write_lock(inode->i_sb);
3066 if (attr->ia_valid & ATTR_SIZE) { 3070 if (attr->ia_valid & ATTR_SIZE) {
3067 /* version 2 items will be caught by the s_maxbytes check 3071 /* version 2 items will be caught by the s_maxbytes check