aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-07-30 18:14:50 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-07-30 18:14:50 -0400
commitde745fb27983770ebfdeaa70f8a36f791fb33786 (patch)
tree701555a1a7a2a5ff9a6c67896cf1ea089597750e /fs
parent08cd84c81f27d5bd22ba958b7cae6d566c509280 (diff)
parenta670fcb43f01a67ef56176afc76e5d43d128b25c (diff)
/spare/repo/netdev-2.6 branch 'ieee80211'
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/autofs_i.h1
-rw-r--r--fs/autofs4/inode.c73
-rw-r--r--fs/bio.c1
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext2/xip.c81
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/xattr.c2
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/hostfs/hostfs_user.c16
-rw-r--r--fs/hppfs/hppfs_kern.c6
-rw-r--r--fs/inode.c45
-rw-r--r--fs/inotify.c177
-rw-r--r--fs/jffs/intrep.c3
-rw-r--r--fs/jffs2/build.c9
-rw-r--r--fs/jffs2/erase.c174
-rw-r--r--fs/jffs2/nodelist.c14
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/readinode.c11
-rw-r--r--fs/jffs2/super.c11
-rw-r--r--fs/jfs/jfs_dmap.c344
-rw-r--r--fs/jfs/jfs_dtree.c212
-rw-r--r--fs/jfs/jfs_dtree.h7
-rw-r--r--fs/jfs/jfs_imap.c105
-rw-r--r--fs/jfs/jfs_logmgr.c3
-rw-r--r--fs/jfs/jfs_metapage.c11
-rw-r--r--fs/jfs/jfs_unicode.c7
-rw-r--r--fs/jfs/jfs_xtree.c340
-rw-r--r--fs/jfs/jfs_xtree.h6
-rw-r--r--fs/jfs/xattr.c6
-rw-r--r--fs/lockd/svc.c4
-rw-r--r--fs/locks.c81
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/ntfs/ChangeLog179
-rw-r--r--fs/ntfs/Makefile4
-rw-r--r--fs/ntfs/aops.c166
-rw-r--r--fs/ntfs/attrib.c630
-rw-r--r--fs/ntfs/attrib.h16
-rw-r--r--fs/ntfs/compress.c46
-rw-r--r--fs/ntfs/debug.c15
-rw-r--r--fs/ntfs/dir.c32
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/index.c16
-rw-r--r--fs/ntfs/inode.c530
-rw-r--r--fs/ntfs/inode.h7
-rw-r--r--fs/ntfs/layout.h83
-rw-r--r--fs/ntfs/lcnalloc.c72
-rw-r--r--fs/ntfs/logfile.c11
-rw-r--r--fs/ntfs/mft.c227
-rw-r--r--fs/ntfs/namei.c34
-rw-r--r--fs/ntfs/ntfs.h8
-rw-r--r--fs/ntfs/runlist.c278
-rw-r--r--fs/ntfs/runlist.h16
-rw-r--r--fs/ntfs/super.c692
-rw-r--r--fs/ntfs/sysctl.c4
-rw-r--r--fs/ntfs/sysctl.h2
-rw-r--r--fs/ntfs/time.h4
-rw-r--r--fs/ntfs/types.h10
-rw-r--r--fs/ntfs/unistr.c2
-rw-r--r--fs/ntfs/usnjrnl.c84
-rw-r--r--fs/ntfs/usnjrnl.h205
-rw-r--r--fs/ntfs/volume.h12
-rw-r--r--fs/reiserfs/inode.c12
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/xattr.c1
-rw-r--r--fs/sysfs/file.c18
-rw-r--r--fs/sysfs/inode.c2
69 files changed, 3051 insertions, 2152 deletions
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 9c09641ce907..fca83e28edcf 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -92,6 +92,7 @@ struct autofs_wait_queue {
92 92
93struct autofs_sb_info { 93struct autofs_sb_info {
94 u32 magic; 94 u32 magic;
95 struct dentry *root;
95 struct file *pipe; 96 struct file *pipe;
96 pid_t oz_pgrp; 97 pid_t oz_pgrp;
97 int catatonic; 98 int catatonic;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 4bb14cc68040..0a3c05d10167 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -16,6 +16,7 @@
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/parser.h> 17#include <linux/parser.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/smp_lock.h>
19#include "autofs_i.h" 20#include "autofs_i.h"
20#include <linux/module.h> 21#include <linux/module.h>
21 22
@@ -76,6 +77,66 @@ void autofs4_free_ino(struct autofs_info *ino)
76 kfree(ino); 77 kfree(ino);
77} 78}
78 79
80/*
81 * Deal with the infamous "Busy inodes after umount ..." message.
82 *
83 * Clean up the dentry tree. This happens with autofs if the user
84 * space program goes away due to a SIGKILL, SIGSEGV etc.
85 */
86static void autofs4_force_release(struct autofs_sb_info *sbi)
87{
88 struct dentry *this_parent = sbi->root;
89 struct list_head *next;
90
91 spin_lock(&dcache_lock);
92repeat:
93 next = this_parent->d_subdirs.next;
94resume:
95 while (next != &this_parent->d_subdirs) {
96 struct dentry *dentry = list_entry(next, struct dentry, d_child);
97
98 /* Negative dentry - don`t care */
99 if (!simple_positive(dentry)) {
100 next = next->next;
101 continue;
102 }
103
104 if (!list_empty(&dentry->d_subdirs)) {
105 this_parent = dentry;
106 goto repeat;
107 }
108
109 next = next->next;
110 spin_unlock(&dcache_lock);
111
112 DPRINTK("dentry %p %.*s",
113 dentry, (int)dentry->d_name.len, dentry->d_name.name);
114
115 dput(dentry);
116 spin_lock(&dcache_lock);
117 }
118
119 if (this_parent != sbi->root) {
120 struct dentry *dentry = this_parent;
121
122 next = this_parent->d_child.next;
123 this_parent = this_parent->d_parent;
124 spin_unlock(&dcache_lock);
125 DPRINTK("parent dentry %p %.*s",
126 dentry, (int)dentry->d_name.len, dentry->d_name.name);
127 dput(dentry);
128 spin_lock(&dcache_lock);
129 goto resume;
130 }
131 spin_unlock(&dcache_lock);
132
133 dput(sbi->root);
134 sbi->root = NULL;
135 shrink_dcache_sb(sbi->sb);
136
137 return;
138}
139
79static void autofs4_put_super(struct super_block *sb) 140static void autofs4_put_super(struct super_block *sb)
80{ 141{
81 struct autofs_sb_info *sbi = autofs4_sbi(sb); 142 struct autofs_sb_info *sbi = autofs4_sbi(sb);
@@ -85,6 +146,10 @@ static void autofs4_put_super(struct super_block *sb)
85 if ( !sbi->catatonic ) 146 if ( !sbi->catatonic )
86 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ 147 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
87 148
149 /* Clean up and release dangling references */
150 if (sbi)
151 autofs4_force_release(sbi);
152
88 kfree(sbi); 153 kfree(sbi);
89 154
90 DPRINTK("shutting down"); 155 DPRINTK("shutting down");
@@ -199,6 +264,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
199 264
200 s->s_fs_info = sbi; 265 s->s_fs_info = sbi;
201 sbi->magic = AUTOFS_SBI_MAGIC; 266 sbi->magic = AUTOFS_SBI_MAGIC;
267 sbi->root = NULL;
202 sbi->catatonic = 0; 268 sbi->catatonic = 0;
203 sbi->exp_timeout = 0; 269 sbi->exp_timeout = 0;
204 sbi->oz_pgrp = process_group(current); 270 sbi->oz_pgrp = process_group(current);
@@ -267,6 +333,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
267 sbi->pipe = pipe; 333 sbi->pipe = pipe;
268 334
269 /* 335 /*
336 * Take a reference to the root dentry so we get a chance to
337 * clean up the dentry tree on umount.
338 * See autofs4_force_release.
339 */
340 sbi->root = dget(root);
341
342 /*
270 * Success! Install the root dentry now to indicate completion. 343 * Success! Install the root dentry now to indicate completion.
271 */ 344 */
272 s->s_root = root; 345 s->s_root = root;
diff --git a/fs/bio.c b/fs/bio.c
index ca8f7a850fe3..249dd6bb66c8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -261,6 +261,7 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
261 */ 261 */
262 bio->bi_vcnt = bio_src->bi_vcnt; 262 bio->bi_vcnt = bio_src->bi_vcnt;
263 bio->bi_size = bio_src->bi_size; 263 bio->bi_size = bio_src->bi_size;
264 bio->bi_idx = bio_src->bi_idx;
264 bio_phys_segments(q, bio); 265 bio_phys_segments(q, bio);
265 bio_hw_segments(q, bio); 266 bio_hw_segments(q, bio);
266} 267}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 77e059149212..161f156d98c8 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -612,6 +612,7 @@ got:
612 err = ext2_init_acl(inode, dir); 612 err = ext2_init_acl(inode, dir);
613 if (err) { 613 if (err) {
614 DQUOT_FREE_INODE(inode); 614 DQUOT_FREE_INODE(inode);
615 DQUOT_DROP(inode);
615 goto fail2; 616 goto fail2;
616 } 617 }
617 mark_inode_dirty(inode); 618 mark_inode_dirty(inode);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 27982b500e84..0099462d4271 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -823,7 +823,7 @@ cleanup:
823void 823void
824ext2_xattr_put_super(struct super_block *sb) 824ext2_xattr_put_super(struct super_block *sb)
825{ 825{
826 mb_cache_shrink(ext2_xattr_cache, sb->s_bdev); 826 mb_cache_shrink(sb->s_bdev);
827} 827}
828 828
829 829
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index d44431d1a338..ca7f00312388 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -15,66 +15,79 @@
15#include "xip.h" 15#include "xip.h"
16 16
17static inline int 17static inline int
18__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) { 18__inode_direct_access(struct inode *inode, sector_t sector,
19 unsigned long *data)
20{
19 BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access); 21 BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
20 return inode->i_sb->s_bdev->bd_disk->fops 22 return inode->i_sb->s_bdev->bd_disk->fops
21 ->direct_access(inode->i_sb->s_bdev,sector,data); 23 ->direct_access(inode->i_sb->s_bdev,sector,data);
22} 24}
23 25
26static inline int
27__ext2_get_sector(struct inode *inode, sector_t offset, int create,
28 sector_t *result)
29{
30 struct buffer_head tmp;
31 int rc;
32
33 memset(&tmp, 0, sizeof(struct buffer_head));
34 rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp,
35 create);
36 *result = tmp.b_blocknr;
37
38 /* did we get a sparse block (hole in the file)? */
39 if (!tmp.b_blocknr && !rc) {
40 BUG_ON(create);
41 rc = -ENODATA;
42 }
43
44 return rc;
45}
46
24int 47int
25ext2_clear_xip_target(struct inode *inode, int block) { 48ext2_clear_xip_target(struct inode *inode, int block)
26 sector_t sector = block*(PAGE_SIZE/512); 49{
50 sector_t sector = block * (PAGE_SIZE/512);
27 unsigned long data; 51 unsigned long data;
28 int rc; 52 int rc;
29 53
30 rc = __inode_direct_access(inode, sector, &data); 54 rc = __inode_direct_access(inode, sector, &data);
31 if (rc) 55 if (!rc)
32 return rc; 56 clear_page((void*)data);
33 clear_page((void*)data); 57 return rc;
34 return 0;
35} 58}
36 59
37void ext2_xip_verify_sb(struct super_block *sb) 60void ext2_xip_verify_sb(struct super_block *sb)
38{ 61{
39 struct ext2_sb_info *sbi = EXT2_SB(sb); 62 struct ext2_sb_info *sbi = EXT2_SB(sb);
40 63
41 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) { 64 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
42 if ((sb->s_bdev == NULL) || 65 !sb->s_bdev->bd_disk->fops->direct_access) {
43 sb->s_bdev->bd_disk == NULL || 66 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
44 sb->s_bdev->bd_disk->fops == NULL || 67 ext2_warning(sb, __FUNCTION__,
45 sb->s_bdev->bd_disk->fops->direct_access == NULL) { 68 "ignoring xip option - not supported by bdev");
46 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
47 ext2_warning(sb, __FUNCTION__,
48 "ignoring xip option - not supported by bdev");
49 }
50 } 69 }
51} 70}
52 71
53struct page* 72struct page *
54ext2_get_xip_page(struct address_space *mapping, sector_t blockno, 73ext2_get_xip_page(struct address_space *mapping, sector_t offset,
55 int create) 74 int create)
56{ 75{
57 int rc; 76 int rc;
58 unsigned long data; 77 unsigned long data;
59 struct buffer_head tmp; 78 sector_t sector;
60 79
61 tmp.b_state = 0; 80 /* first, retrieve the sector number */
62 tmp.b_blocknr = 0; 81 rc = __ext2_get_sector(mapping->host, offset, create, &sector);
63 rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
64 create);
65 if (rc) 82 if (rc)
66 return ERR_PTR(rc); 83 goto error;
67 if (tmp.b_blocknr == 0) {
68 /* SPARSE block */
69 BUG_ON(create);
70 return ERR_PTR(-ENODATA);
71 }
72 84
85 /* retrieve address of the target data */
73 rc = __inode_direct_access 86 rc = __inode_direct_access
74 (mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data); 87 (mapping->host, sector * (PAGE_SIZE/512), &data);
75 if (rc) 88 if (!rc)
76 return ERR_PTR(rc); 89 return virt_to_page(data);
77 90
78 SetPageUptodate(virt_to_page(data)); 91 error:
79 return virt_to_page(data); 92 return ERR_PTR(rc);
80} 93}
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 1e6f3ea28713..6981bd014ede 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -604,12 +604,14 @@ got:
604 err = ext3_init_acl(handle, inode, dir); 604 err = ext3_init_acl(handle, inode, dir);
605 if (err) { 605 if (err) {
606 DQUOT_FREE_INODE(inode); 606 DQUOT_FREE_INODE(inode);
607 DQUOT_DROP(inode);
607 goto fail2; 608 goto fail2;
608 } 609 }
609 err = ext3_mark_inode_dirty(handle, inode); 610 err = ext3_mark_inode_dirty(handle, inode);
610 if (err) { 611 if (err) {
611 ext3_std_error(sb, err); 612 ext3_std_error(sb, err);
612 DQUOT_FREE_INODE(inode); 613 DQUOT_FREE_INODE(inode);
614 DQUOT_DROP(inode);
613 goto fail2; 615 goto fail2;
614 } 616 }
615 617
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 3f9dfa643b19..269c7b92db9a 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1106,7 +1106,7 @@ cleanup:
1106void 1106void
1107ext3_xattr_put_super(struct super_block *sb) 1107ext3_xattr_put_super(struct super_block *sb)
1108{ 1108{
1109 mb_cache_shrink(ext3_xattr_cache, sb->s_bdev); 1109 mb_cache_shrink(sb->s_bdev);
1110} 1110}
1111 1111
1112/* 1112/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 286a9f8f3d49..6fbc9d8fcc36 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -288,7 +288,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
288 break; 288 break;
289 case F_SETLK: 289 case F_SETLK:
290 case F_SETLKW: 290 case F_SETLKW:
291 err = fcntl_setlk(filp, cmd, (struct flock __user *) arg); 291 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
292 break; 292 break;
293 case F_GETOWN: 293 case F_GETOWN:
294 /* 294 /*
@@ -376,7 +376,8 @@ asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg
376 break; 376 break;
377 case F_SETLK64: 377 case F_SETLK64:
378 case F_SETLKW64: 378 case F_SETLKW64:
379 err = fcntl_setlk64(filp, cmd, (struct flock64 __user *) arg); 379 err = fcntl_setlk64(fd, filp, cmd,
380 (struct flock64 __user *) arg);
380 break; 381 break;
381 default: 382 default:
382 err = do_fcntl(fd, cmd, arg, filp); 383 err = do_fcntl(fd, cmd, arg, filp);
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index c1516d013bf6..67bca0d4a33b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -69,6 +69,7 @@ extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
69extern int write_file(int fd, unsigned long long *offset, const char *buf, 69extern int write_file(int fd, unsigned long long *offset, const char *buf,
70 int len); 70 int len);
71extern int lseek_file(int fd, long long offset, int whence); 71extern int lseek_file(int fd, long long offset, int whence);
72extern int fsync_file(int fd, int datasync);
72extern int file_create(char *name, int ur, int uw, int ux, int gr, 73extern int file_create(char *name, int ur, int uw, int ux, int gr,
73 int gw, int gx, int or, int ow, int ox); 74 int gw, int gx, int or, int ow, int ox);
74extern int set_attr(const char *file, struct hostfs_iattr *attrs); 75extern int set_attr(const char *file, struct hostfs_iattr *attrs);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 4bf43ea87c46..b2d18200a003 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -15,7 +15,6 @@
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/root_dev.h>
19#include <linux/statfs.h> 18#include <linux/statfs.h>
20#include <linux/kdev_t.h> 19#include <linux/kdev_t.h>
21#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -160,8 +159,6 @@ static int read_name(struct inode *ino, char *name)
160 ino->i_size = i_size; 159 ino->i_size = i_size;
161 ino->i_blksize = i_blksize; 160 ino->i_blksize = i_blksize;
162 ino->i_blocks = i_blocks; 161 ino->i_blocks = i_blocks;
163 if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
164 ino->i_uid = 0;
165 return(0); 162 return(0);
166} 163}
167 164
@@ -385,7 +382,7 @@ int hostfs_file_open(struct inode *ino, struct file *file)
385 382
386int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) 383int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
387{ 384{
388 return(0); 385 return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
389} 386}
390 387
391static struct file_operations hostfs_file_fops = { 388static struct file_operations hostfs_file_fops = {
@@ -841,16 +838,10 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
841 attrs.ia_mode = attr->ia_mode; 838 attrs.ia_mode = attr->ia_mode;
842 } 839 }
843 if(attr->ia_valid & ATTR_UID){ 840 if(attr->ia_valid & ATTR_UID){
844 if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
845 (attr->ia_uid == 0))
846 attr->ia_uid = getuid();
847 attrs.ia_valid |= HOSTFS_ATTR_UID; 841 attrs.ia_valid |= HOSTFS_ATTR_UID;
848 attrs.ia_uid = attr->ia_uid; 842 attrs.ia_uid = attr->ia_uid;
849 } 843 }
850 if(attr->ia_valid & ATTR_GID){ 844 if(attr->ia_valid & ATTR_GID){
851 if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
852 (attr->ia_gid == 0))
853 attr->ia_gid = getgid();
854 attrs.ia_valid |= HOSTFS_ATTR_GID; 845 attrs.ia_valid |= HOSTFS_ATTR_GID;
855 attrs.ia_gid = attr->ia_gid; 846 attrs.ia_gid = attr->ia_gid;
856 } 847 }
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 4796e8490f7d..b97809deba66 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -153,10 +153,24 @@ int lseek_file(int fd, long long offset, int whence)
153 int ret; 153 int ret;
154 154
155 ret = lseek64(fd, offset, whence); 155 ret = lseek64(fd, offset, whence);
156 if(ret < 0) return(-errno); 156 if(ret < 0)
157 return(-errno);
157 return(0); 158 return(0);
158} 159}
159 160
161int fsync_file(int fd, int datasync)
162{
163 int ret;
164 if (datasync)
165 ret = fdatasync(fd);
166 else
167 ret = fsync(fd);
168
169 if (ret < 0)
170 return -errno;
171 return 0;
172}
173
160void close_file(void *stream) 174void close_file(void *stream)
161{ 175{
162 close(*((int *) stream)); 176 close(*((int *) stream));
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 6f553e17c375..ff150fedb981 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -233,7 +233,7 @@ static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
233 set_fs(USER_DS); 233 set_fs(USER_DS);
234 234
235 if(ppos) *ppos = file->f_pos; 235 if(ppos) *ppos = file->f_pos;
236 return(n); 236 return n;
237} 237}
238 238
239static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) 239static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
@@ -254,7 +254,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
254 err = os_read_file(fd, new_buf, cur); 254 err = os_read_file(fd, new_buf, cur);
255 if(err < 0){ 255 if(err < 0){
256 printk("hppfs_read : read failed, errno = %d\n", 256 printk("hppfs_read : read failed, errno = %d\n",
257 count); 257 err);
258 n = err; 258 n = err;
259 goto out_free; 259 goto out_free;
260 } 260 }
@@ -271,7 +271,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
271 out_free: 271 out_free:
272 kfree(new_buf); 272 kfree(new_buf);
273 out: 273 out:
274 return(n); 274 return n;
275} 275}
276 276
277static ssize_t hppfs_read(struct file *file, char *buf, size_t count, 277static ssize_t hppfs_read(struct file *file, char *buf, size_t count,
diff --git a/fs/inode.c b/fs/inode.c
index 96364fae0844..e57f1724db3e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -757,6 +757,7 @@ EXPORT_SYMBOL(igrab);
757 * @head: the head of the list to search 757 * @head: the head of the list to search
758 * @test: callback used for comparisons between inodes 758 * @test: callback used for comparisons between inodes
759 * @data: opaque data pointer to pass to @test 759 * @data: opaque data pointer to pass to @test
760 * @wait: if true wait for the inode to be unlocked, if false do not
760 * 761 *
761 * ifind() searches for the inode specified by @data in the inode 762 * ifind() searches for the inode specified by @data in the inode
762 * cache. This is a generalized version of ifind_fast() for file systems where 763 * cache. This is a generalized version of ifind_fast() for file systems where
@@ -771,7 +772,7 @@ EXPORT_SYMBOL(igrab);
771 */ 772 */
772static inline struct inode *ifind(struct super_block *sb, 773static inline struct inode *ifind(struct super_block *sb,
773 struct hlist_head *head, int (*test)(struct inode *, void *), 774 struct hlist_head *head, int (*test)(struct inode *, void *),
774 void *data) 775 void *data, const int wait)
775{ 776{
776 struct inode *inode; 777 struct inode *inode;
777 778
@@ -780,7 +781,8 @@ static inline struct inode *ifind(struct super_block *sb,
780 if (inode) { 781 if (inode) {
781 __iget(inode); 782 __iget(inode);
782 spin_unlock(&inode_lock); 783 spin_unlock(&inode_lock);
783 wait_on_inode(inode); 784 if (likely(wait))
785 wait_on_inode(inode);
784 return inode; 786 return inode;
785 } 787 }
786 spin_unlock(&inode_lock); 788 spin_unlock(&inode_lock);
@@ -820,7 +822,7 @@ static inline struct inode *ifind_fast(struct super_block *sb,
820} 822}
821 823
822/** 824/**
823 * ilookup5 - search for an inode in the inode cache 825 * ilookup5_nowait - search for an inode in the inode cache
824 * @sb: super block of file system to search 826 * @sb: super block of file system to search
825 * @hashval: hash value (usually inode number) to search for 827 * @hashval: hash value (usually inode number) to search for
826 * @test: callback used for comparisons between inodes 828 * @test: callback used for comparisons between inodes
@@ -832,7 +834,38 @@ static inline struct inode *ifind_fast(struct super_block *sb,
832 * identification of an inode. 834 * identification of an inode.
833 * 835 *
834 * If the inode is in the cache, the inode is returned with an incremented 836 * If the inode is in the cache, the inode is returned with an incremented
835 * reference count. 837 * reference count. Note, the inode lock is not waited upon so you have to be
838 * very careful what you do with the returned inode. You probably should be
839 * using ilookup5() instead.
840 *
841 * Otherwise NULL is returned.
842 *
843 * Note, @test is called with the inode_lock held, so can't sleep.
844 */
845struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
846 int (*test)(struct inode *, void *), void *data)
847{
848 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
849
850 return ifind(sb, head, test, data, 0);
851}
852
853EXPORT_SYMBOL(ilookup5_nowait);
854
855/**
856 * ilookup5 - search for an inode in the inode cache
857 * @sb: super block of file system to search
858 * @hashval: hash value (usually inode number) to search for
859 * @test: callback used for comparisons between inodes
860 * @data: opaque data pointer to pass to @test
861 *
862 * ilookup5() uses ifind() to search for the inode specified by @hashval and
863 * @data in the inode cache. This is a generalized version of ilookup() for
864 * file systems where the inode number is not sufficient for unique
865 * identification of an inode.
866 *
867 * If the inode is in the cache, the inode lock is waited upon and the inode is
868 * returned with an incremented reference count.
836 * 869 *
837 * Otherwise NULL is returned. 870 * Otherwise NULL is returned.
838 * 871 *
@@ -843,7 +876,7 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
843{ 876{
844 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 877 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
845 878
846 return ifind(sb, head, test, data); 879 return ifind(sb, head, test, data, 1);
847} 880}
848 881
849EXPORT_SYMBOL(ilookup5); 882EXPORT_SYMBOL(ilookup5);
@@ -900,7 +933,7 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
900 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 933 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
901 struct inode *inode; 934 struct inode *inode;
902 935
903 inode = ifind(sb, head, test, data); 936 inode = ifind(sb, head, test, data, 1);
904 if (inode) 937 if (inode)
905 return inode; 938 return inode;
906 /* 939 /*
diff --git a/fs/inotify.c b/fs/inotify.c
index e423bfe0c86f..a8a714e48140 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -29,8 +29,6 @@
29#include <linux/mount.h> 29#include <linux/mount.h>
30#include <linux/namei.h> 30#include <linux/namei.h>
31#include <linux/poll.h> 31#include <linux/poll.h>
32#include <linux/device.h>
33#include <linux/miscdevice.h>
34#include <linux/init.h> 32#include <linux/init.h>
35#include <linux/list.h> 33#include <linux/list.h>
36#include <linux/writeback.h> 34#include <linux/writeback.h>
@@ -45,8 +43,8 @@ static kmem_cache_t *event_cachep;
45 43
46static struct vfsmount *inotify_mnt; 44static struct vfsmount *inotify_mnt;
47 45
48/* These are configurable via /proc/sys/inotify */ 46/* these are configurable via /proc/sys/fs/inotify/ */
49int inotify_max_user_devices; 47int inotify_max_user_instances;
50int inotify_max_user_watches; 48int inotify_max_user_watches;
51int inotify_max_queued_events; 49int inotify_max_queued_events;
52 50
@@ -64,8 +62,8 @@ int inotify_max_queued_events;
64 * Lifetimes of the three main data structures--inotify_device, inode, and 62 * Lifetimes of the three main data structures--inotify_device, inode, and
65 * inotify_watch--are managed by reference count. 63 * inotify_watch--are managed by reference count.
66 * 64 *
67 * inotify_device: Lifetime is from open until release. Additional references 65 * inotify_device: Lifetime is from inotify_init() until release. Additional
68 * can bump the count via get_inotify_dev() and drop the count via 66 * references can bump the count via get_inotify_dev() and drop the count via
69 * put_inotify_dev(). 67 * put_inotify_dev().
70 * 68 *
71 * inotify_watch: Lifetime is from create_watch() to destory_watch(). 69 * inotify_watch: Lifetime is from create_watch() to destory_watch().
@@ -77,7 +75,7 @@ int inotify_max_queued_events;
77 */ 75 */
78 76
79/* 77/*
80 * struct inotify_device - represents an open instance of an inotify device 78 * struct inotify_device - represents an inotify instance
81 * 79 *
82 * This structure is protected by the semaphore 'sem'. 80 * This structure is protected by the semaphore 'sem'.
83 */ 81 */
@@ -125,6 +123,47 @@ struct inotify_watch {
125 u32 mask; /* event mask for this watch */ 123 u32 mask; /* event mask for this watch */
126}; 124};
127 125
126#ifdef CONFIG_SYSCTL
127
128#include <linux/sysctl.h>
129
130static int zero;
131
132ctl_table inotify_table[] = {
133 {
134 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
135 .procname = "max_user_instances",
136 .data = &inotify_max_user_instances,
137 .maxlen = sizeof(int),
138 .mode = 0644,
139 .proc_handler = &proc_dointvec_minmax,
140 .strategy = &sysctl_intvec,
141 .extra1 = &zero,
142 },
143 {
144 .ctl_name = INOTIFY_MAX_USER_WATCHES,
145 .procname = "max_user_watches",
146 .data = &inotify_max_user_watches,
147 .maxlen = sizeof(int),
148 .mode = 0644,
149 .proc_handler = &proc_dointvec_minmax,
150 .strategy = &sysctl_intvec,
151 .extra1 = &zero,
152 },
153 {
154 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
155 .procname = "max_queued_events",
156 .data = &inotify_max_queued_events,
157 .maxlen = sizeof(int),
158 .mode = 0644,
159 .proc_handler = &proc_dointvec_minmax,
160 .strategy = &sysctl_intvec,
161 .extra1 = &zero
162 },
163 { .ctl_name = 0 }
164};
165#endif /* CONFIG_SYSCTL */
166
128static inline void get_inotify_dev(struct inotify_device *dev) 167static inline void get_inotify_dev(struct inotify_device *dev)
129{ 168{
130 atomic_inc(&dev->count); 169 atomic_inc(&dev->count);
@@ -332,7 +371,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd)
332 /* you can only watch an inode if you have read permissions on it */ 371 /* you can only watch an inode if you have read permissions on it */
333 error = permission(nd->dentry->d_inode, MAY_READ, NULL); 372 error = permission(nd->dentry->d_inode, MAY_READ, NULL);
334 if (error) 373 if (error)
335 path_release (nd); 374 path_release(nd);
336 return error; 375 return error;
337} 376}
338 377
@@ -348,7 +387,8 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
348 struct inotify_watch *watch; 387 struct inotify_watch *watch;
349 int ret; 388 int ret;
350 389
351 if (atomic_read(&dev->user->inotify_watches) >= inotify_max_user_watches) 390 if (atomic_read(&dev->user->inotify_watches) >=
391 inotify_max_user_watches)
352 return ERR_PTR(-ENOSPC); 392 return ERR_PTR(-ENOSPC);
353 393
354 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); 394 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
@@ -744,15 +784,14 @@ static int inotify_release(struct inode *ignored, struct file *file)
744 inotify_dev_event_dequeue(dev); 784 inotify_dev_event_dequeue(dev);
745 up(&dev->sem); 785 up(&dev->sem);
746 786
747 /* free this device: the put matching the get in inotify_open() */ 787 /* free this device: the put matching the get in inotify_init() */
748 put_inotify_dev(dev); 788 put_inotify_dev(dev);
749 789
750 return 0; 790 return 0;
751} 791}
752 792
753/* 793/*
754 * inotify_ignore - handle the INOTIFY_IGNORE ioctl, asking that a given wd be 794 * inotify_ignore - remove a given wd from this inotify instance.
755 * removed from the device.
756 * 795 *
757 * Can sleep. 796 * Can sleep.
758 */ 797 */
@@ -817,42 +856,40 @@ asmlinkage long sys_inotify_init(void)
817{ 856{
818 struct inotify_device *dev; 857 struct inotify_device *dev;
819 struct user_struct *user; 858 struct user_struct *user;
820 int ret = -ENOTTY; 859 struct file *filp;
821 int fd; 860 int fd, ret;
822 struct file *filp;
823 861
824 fd = get_unused_fd(); 862 fd = get_unused_fd();
825 if (fd < 0) { 863 if (fd < 0)
826 ret = fd; 864 return fd;
827 goto out;
828 }
829 865
830 filp = get_empty_filp(); 866 filp = get_empty_filp();
831 if (!filp) { 867 if (!filp) {
832 put_unused_fd(fd);
833 ret = -ENFILE; 868 ret = -ENFILE;
834 goto out; 869 goto out_put_fd;
835 } 870 }
836 filp->f_op = &inotify_fops;
837 filp->f_vfsmnt = mntget(inotify_mnt);
838 filp->f_dentry = dget(inotify_mnt->mnt_root);
839 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
840 filp->f_mode = FMODE_READ;
841 filp->f_flags = O_RDONLY;
842 871
843 user = get_uid(current->user); 872 user = get_uid(current->user);
844 873 if (unlikely(atomic_read(&user->inotify_devs) >=
845 if (unlikely(atomic_read(&user->inotify_devs) >= inotify_max_user_devices)) { 874 inotify_max_user_instances)) {
846 ret = -EMFILE; 875 ret = -EMFILE;
847 goto out_err; 876 goto out_free_uid;
848 } 877 }
849 878
850 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); 879 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
851 if (unlikely(!dev)) { 880 if (unlikely(!dev)) {
852 ret = -ENOMEM; 881 ret = -ENOMEM;
853 goto out_err; 882 goto out_free_uid;
854 } 883 }
855 884
885 filp->f_op = &inotify_fops;
886 filp->f_vfsmnt = mntget(inotify_mnt);
887 filp->f_dentry = dget(inotify_mnt->mnt_root);
888 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
889 filp->f_mode = FMODE_READ;
890 filp->f_flags = O_RDONLY;
891 filp->private_data = dev;
892
856 idr_init(&dev->idr); 893 idr_init(&dev->idr);
857 INIT_LIST_HEAD(&dev->events); 894 INIT_LIST_HEAD(&dev->events);
858 INIT_LIST_HEAD(&dev->watches); 895 INIT_LIST_HEAD(&dev->watches);
@@ -866,46 +903,50 @@ asmlinkage long sys_inotify_init(void)
866 903
867 get_inotify_dev(dev); 904 get_inotify_dev(dev);
868 atomic_inc(&user->inotify_devs); 905 atomic_inc(&user->inotify_devs);
906 fd_install(fd, filp);
869 907
870 filp->private_data = dev;
871 fd_install (fd, filp);
872 return fd; 908 return fd;
873out_err: 909out_free_uid:
874 put_unused_fd (fd);
875 put_filp (filp);
876 free_uid(user); 910 free_uid(user);
877out: 911 put_filp(filp);
912out_put_fd:
913 put_unused_fd(fd);
878 return ret; 914 return ret;
879} 915}
880 916
881asmlinkage long sys_inotify_add_watch(int fd, const char *path, u32 mask) 917asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
882{ 918{
883 struct inotify_watch *watch, *old; 919 struct inotify_watch *watch, *old;
884 struct inode *inode; 920 struct inode *inode;
885 struct inotify_device *dev; 921 struct inotify_device *dev;
886 struct nameidata nd; 922 struct nameidata nd;
887 struct file *filp; 923 struct file *filp;
888 int ret; 924 int ret, fput_needed;
889 925
890 filp = fget(fd); 926 filp = fget_light(fd, &fput_needed);
891 if (!filp) 927 if (unlikely(!filp))
892 return -EBADF; 928 return -EBADF;
893 929
894 dev = filp->private_data; 930 /* verify that this is indeed an inotify instance */
931 if (unlikely(filp->f_op != &inotify_fops)) {
932 ret = -EINVAL;
933 goto fput_and_out;
934 }
895 935
896 ret = find_inode ((const char __user*)path, &nd); 936 ret = find_inode(path, &nd);
897 if (ret) 937 if (unlikely(ret))
898 goto fput_and_out; 938 goto fput_and_out;
899 939
900 /* Held in place by reference in nd */ 940 /* inode held in place by reference to nd; dev by fget on fd */
901 inode = nd.dentry->d_inode; 941 inode = nd.dentry->d_inode;
942 dev = filp->private_data;
902 943
903 down(&inode->inotify_sem); 944 down(&inode->inotify_sem);
904 down(&dev->sem); 945 down(&dev->sem);
905 946
906 /* don't let user-space set invalid bits: we don't want flags set */ 947 /* don't let user-space set invalid bits: we don't want flags set */
907 mask &= IN_ALL_EVENTS; 948 mask &= IN_ALL_EVENTS;
908 if (!mask) { 949 if (unlikely(!mask)) {
909 ret = -EINVAL; 950 ret = -EINVAL;
910 goto out; 951 goto out;
911 } 952 }
@@ -932,11 +973,11 @@ asmlinkage long sys_inotify_add_watch(int fd, const char *path, u32 mask)
932 list_add(&watch->i_list, &inode->inotify_watches); 973 list_add(&watch->i_list, &inode->inotify_watches);
933 ret = watch->wd; 974 ret = watch->wd;
934out: 975out:
935 path_release (&nd);
936 up(&dev->sem); 976 up(&dev->sem);
937 up(&inode->inotify_sem); 977 up(&inode->inotify_sem);
978 path_release(&nd);
938fput_and_out: 979fput_and_out:
939 fput(filp); 980 fput_light(filp, fput_needed);
940 return ret; 981 return ret;
941} 982}
942 983
@@ -944,14 +985,23 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
944{ 985{
945 struct file *filp; 986 struct file *filp;
946 struct inotify_device *dev; 987 struct inotify_device *dev;
947 int ret; 988 int ret, fput_needed;
948 989
949 filp = fget(fd); 990 filp = fget_light(fd, &fput_needed);
950 if (!filp) 991 if (unlikely(!filp))
951 return -EBADF; 992 return -EBADF;
993
994 /* verify that this is indeed an inotify instance */
995 if (unlikely(filp->f_op != &inotify_fops)) {
996 ret = -EINVAL;
997 goto out;
998 }
999
952 dev = filp->private_data; 1000 dev = filp->private_data;
953 ret = inotify_ignore (dev, wd); 1001 ret = inotify_ignore(dev, wd);
954 fput(filp); 1002
1003out:
1004 fput_light(filp, fput_needed);
955 return ret; 1005 return ret;
956} 1006}
957 1007
@@ -969,17 +1019,24 @@ static struct file_system_type inotify_fs_type = {
969}; 1019};
970 1020
971/* 1021/*
972 * inotify_init - Our initialization function. Note that we cannnot return 1022 * inotify_setup - Our initialization function. Note that we cannnot return
973 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 1023 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
974 * must result in panic(). 1024 * must result in panic().
975 */ 1025 */
976static int __init inotify_init(void) 1026static int __init inotify_setup(void)
977{ 1027{
978 register_filesystem(&inotify_fs_type); 1028 int ret;
1029
1030 ret = register_filesystem(&inotify_fs_type);
1031 if (unlikely(ret))
1032 panic("inotify: register_filesystem returned %d!\n", ret);
1033
979 inotify_mnt = kern_mount(&inotify_fs_type); 1034 inotify_mnt = kern_mount(&inotify_fs_type);
1035 if (IS_ERR(inotify_mnt))
1036 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
980 1037
981 inotify_max_queued_events = 8192; 1038 inotify_max_queued_events = 16384;
982 inotify_max_user_devices = 128; 1039 inotify_max_user_instances = 128;
983 inotify_max_user_watches = 8192; 1040 inotify_max_user_watches = 8192;
984 1041
985 atomic_set(&inotify_cookie, 0); 1042 atomic_set(&inotify_cookie, 0);
@@ -991,9 +1048,7 @@ static int __init inotify_init(void)
991 sizeof(struct inotify_kernel_event), 1048 sizeof(struct inotify_kernel_event),
992 0, SLAB_PANIC, NULL, NULL); 1049 0, SLAB_PANIC, NULL, NULL);
993 1050
994 printk(KERN_INFO "inotify syscall\n");
995
996 return 0; 1051 return 0;
997} 1052}
998 1053
999module_init(inotify_init); 1054module_init(inotify_setup);
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index fc589ddd0762..456d7e6e29c2 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -3397,6 +3397,9 @@ jffs_garbage_collect_thread(void *ptr)
3397 siginfo_t info; 3397 siginfo_t info;
3398 unsigned long signr = 0; 3398 unsigned long signr = 0;
3399 3399
3400 if (try_to_freeze())
3401 continue;
3402
3400 spin_lock_irq(&current->sighand->siglock); 3403 spin_lock_irq(&current->sighand->siglock);
3401 signr = dequeue_signal(current, &current->blocked, &info); 3404 signr = dequeue_signal(current, &current->blocked, &info);
3402 spin_unlock_irq(&current->sighand->siglock); 3405 spin_unlock_irq(&current->sighand->siglock);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 3dd5394921c9..97dc39796e2c 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: build.c,v 1.70 2005/02/28 08:21:05 dedekind Exp $ 10 * $Id: build.c,v 1.71 2005/07/12 16:37:08 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -336,13 +336,6 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
336 c->blocks[i].bad_count = 0; 336 c->blocks[i].bad_count = 0;
337 } 337 }
338 338
339 init_MUTEX(&c->alloc_sem);
340 init_MUTEX(&c->erase_free_sem);
341 init_waitqueue_head(&c->erase_wait);
342 init_waitqueue_head(&c->inocache_wq);
343 spin_lock_init(&c->erase_completion_lock);
344 spin_lock_init(&c->inocache_lock);
345
346 INIT_LIST_HEAD(&c->clean_list); 339 INIT_LIST_HEAD(&c->clean_list);
347 INIT_LIST_HEAD(&c->very_dirty_list); 340 INIT_LIST_HEAD(&c->very_dirty_list);
348 INIT_LIST_HEAD(&c->dirty_list); 341 INIT_LIST_HEAD(&c->dirty_list);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 6a4c0a3685da..787d84ac2bcd 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: erase.c,v 1.76 2005/05/03 15:11:40 dedekind Exp $ 10 * $Id: erase.c,v 1.80 2005/07/14 19:46:24 joern Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -300,100 +300,86 @@ static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_erase
300 jeb->last_node = NULL; 300 jeb->last_node = NULL;
301} 301}
302 302
303static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 303static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t *bad_offset)
304{ 304{
305 struct jffs2_raw_node_ref *marker_ref = NULL; 305 void *ebuf;
306 unsigned char *ebuf; 306 uint32_t ofs;
307 size_t retlen; 307 size_t retlen;
308 int ret; 308 int ret = -EIO;
309 uint32_t bad_offset; 309
310
311 if ((!jffs2_cleanmarker_oob(c)) && (c->cleanmarker_size > 0)) {
312 marker_ref = jffs2_alloc_raw_node_ref();
313 if (!marker_ref) {
314 printk(KERN_WARNING "Failed to allocate raw node ref for clean marker\n");
315 /* Stick it back on the list from whence it came and come back later */
316 jffs2_erase_pending_trigger(c);
317 spin_lock(&c->erase_completion_lock);
318 list_add(&jeb->list, &c->erase_complete_list);
319 spin_unlock(&c->erase_completion_lock);
320 return;
321 }
322 }
323 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL); 310 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
324 if (!ebuf) { 311 if (!ebuf) {
325 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Assuming it worked\n", jeb->offset); 312 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n", jeb->offset);
326 } else { 313 return -EAGAIN;
327 uint32_t ofs = jeb->offset; 314 }
328 315
329 D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset)); 316 D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset));
330 while(ofs < jeb->offset + c->sector_size) {
331 uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
332 int i;
333 317
334 bad_offset = ofs; 318 for (ofs = jeb->offset; ofs < jeb->offset + c->sector_size; ) {
319 uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
320 int i;
335 321
336 ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf); 322 *bad_offset = ofs;
337 323
338 if (ret) { 324 ret = jffs2_flash_read(c, ofs, readlen, &retlen, ebuf);
339 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); 325 if (ret) {
340 goto bad; 326 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
341 } 327 goto fail;
342 if (retlen != readlen) { 328 }
343 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen); 329 if (retlen != readlen) {
344 goto bad; 330 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen);
345 } 331 goto fail;
346 for (i=0; i<readlen; i += sizeof(unsigned long)) { 332 }
347 /* It's OK. We know it's properly aligned */ 333 for (i=0; i<readlen; i += sizeof(unsigned long)) {
348 unsigned long datum = *(unsigned long *)(&ebuf[i]); 334 /* It's OK. We know it's properly aligned */
349 if (datum + 1) { 335 unsigned long *datum = ebuf + i;
350 bad_offset += i; 336 if (*datum + 1) {
351 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", datum, bad_offset); 337 *bad_offset += i;
352 bad: 338 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", *datum, *bad_offset);
353 if ((!jffs2_cleanmarker_oob(c)) && (c->cleanmarker_size > 0)) 339 goto fail;
354 jffs2_free_raw_node_ref(marker_ref);
355 kfree(ebuf);
356 bad2:
357 spin_lock(&c->erase_completion_lock);
358 /* Stick it on a list (any list) so
359 erase_failed can take it right off
360 again. Silly, but shouldn't happen
361 often. */
362 list_add(&jeb->list, &c->erasing_list);
363 spin_unlock(&c->erase_completion_lock);
364 jffs2_erase_failed(c, jeb, bad_offset);
365 return;
366 }
367 } 340 }
368 ofs += readlen;
369 cond_resched();
370 } 341 }
371 kfree(ebuf); 342 ofs += readlen;
343 cond_resched();
372 } 344 }
345 ret = 0;
346fail:
347 kfree(ebuf);
348 return ret;
349}
373 350
374 bad_offset = jeb->offset; 351static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
352{
353 struct jffs2_raw_node_ref *marker_ref = NULL;
354 size_t retlen;
355 int ret;
356 uint32_t bad_offset;
357
358 switch (jffs2_block_check_erase(c, jeb, &bad_offset)) {
359 case -EAGAIN: goto refile;
360 case -EIO: goto filebad;
361 }
375 362
376 /* Write the erase complete marker */ 363 /* Write the erase complete marker */
377 D1(printk(KERN_DEBUG "Writing erased marker to block at 0x%08x\n", jeb->offset)); 364 D1(printk(KERN_DEBUG "Writing erased marker to block at 0x%08x\n", jeb->offset));
378 if (jffs2_cleanmarker_oob(c)) { 365 bad_offset = jeb->offset;
379 366
380 if (jffs2_write_nand_cleanmarker(c, jeb)) 367 /* Cleanmarker in oob area or no cleanmarker at all ? */
381 goto bad2; 368 if (jffs2_cleanmarker_oob(c) || c->cleanmarker_size == 0) {
382
383 jeb->first_node = jeb->last_node = NULL;
384 369
385 jeb->free_size = c->sector_size; 370 if (jffs2_cleanmarker_oob(c)) {
386 jeb->used_size = 0; 371 if (jffs2_write_nand_cleanmarker(c, jeb))
387 jeb->dirty_size = 0; 372 goto filebad;
388 jeb->wasted_size = 0; 373 }
389 } else if (c->cleanmarker_size == 0) {
390 jeb->first_node = jeb->last_node = NULL;
391 374
375 jeb->first_node = jeb->last_node = NULL;
392 jeb->free_size = c->sector_size; 376 jeb->free_size = c->sector_size;
393 jeb->used_size = 0; 377 jeb->used_size = 0;
394 jeb->dirty_size = 0; 378 jeb->dirty_size = 0;
395 jeb->wasted_size = 0; 379 jeb->wasted_size = 0;
380
396 } else { 381 } else {
382
397 struct kvec vecs[1]; 383 struct kvec vecs[1];
398 struct jffs2_unknown_node marker = { 384 struct jffs2_unknown_node marker = {
399 .magic = cpu_to_je16(JFFS2_MAGIC_BITMASK), 385 .magic = cpu_to_je16(JFFS2_MAGIC_BITMASK),
@@ -401,21 +387,28 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
401 .totlen = cpu_to_je32(c->cleanmarker_size) 387 .totlen = cpu_to_je32(c->cleanmarker_size)
402 }; 388 };
403 389
390 marker_ref = jffs2_alloc_raw_node_ref();
391 if (!marker_ref) {
392 printk(KERN_WARNING "Failed to allocate raw node ref for clean marker. Refiling\n");
393 goto refile;
394 }
395
404 marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4)); 396 marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4));
405 397
406 vecs[0].iov_base = (unsigned char *) &marker; 398 vecs[0].iov_base = (unsigned char *) &marker;
407 vecs[0].iov_len = sizeof(marker); 399 vecs[0].iov_len = sizeof(marker);
408 ret = jffs2_flash_direct_writev(c, vecs, 1, jeb->offset, &retlen); 400 ret = jffs2_flash_direct_writev(c, vecs, 1, jeb->offset, &retlen);
409 401
410 if (ret) { 402 if (ret || retlen != sizeof(marker)) {
411 printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n", 403 if (ret)
412 jeb->offset, ret); 404 printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n",
413 goto bad2; 405 jeb->offset, ret);
414 } 406 else
415 if (retlen != sizeof(marker)) { 407 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n",
416 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n", 408 jeb->offset, sizeof(marker), retlen);
417 jeb->offset, sizeof(marker), retlen); 409
418 goto bad2; 410 jffs2_free_raw_node_ref(marker_ref);
411 goto filebad;
419 } 412 }
420 413
421 marker_ref->next_in_ino = NULL; 414 marker_ref->next_in_ino = NULL;
@@ -444,5 +437,22 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
444 c->nr_free_blocks++; 437 c->nr_free_blocks++;
445 spin_unlock(&c->erase_completion_lock); 438 spin_unlock(&c->erase_completion_lock);
446 wake_up(&c->erase_wait); 439 wake_up(&c->erase_wait);
447} 440 return;
441
442filebad:
443 spin_lock(&c->erase_completion_lock);
444 /* Stick it on a list (any list) so erase_failed can take it
445 right off again. Silly, but shouldn't happen often. */
446 list_add(&jeb->list, &c->erasing_list);
447 spin_unlock(&c->erase_completion_lock);
448 jffs2_erase_failed(c, jeb, bad_offset);
449 return;
448 450
451refile:
452 /* Stick it back on the list from whence it came and come back later */
453 jffs2_erase_pending_trigger(c);
454 spin_lock(&c->erase_completion_lock);
455 list_add(&jeb->list, &c->erase_complete_list);
456 spin_unlock(&c->erase_completion_lock);
457 return;
458}
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index c7bbdeec93a6..4991c348f6ec 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: nodelist.c,v 1.97 2005/07/06 15:18:41 dwmw2 Exp $ 10 * $Id: nodelist.c,v 1.98 2005/07/10 15:15:32 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -55,11 +55,11 @@ void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new
55 }); 55 });
56} 56}
57 57
58/* Put a new tmp_dnode_info into the list, keeping the list in 58/*
59 order of increasing version 59 * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
60*/ 60 * order of increasing version.
61 61 */
62static void jffs2_add_tn_to_list(struct jffs2_tmp_dnode_info *tn, struct rb_root *list) 62static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
63{ 63{
64 struct rb_node **p = &list->rb_node; 64 struct rb_node **p = &list->rb_node;
65 struct rb_node * parent = NULL; 65 struct rb_node * parent = NULL;
@@ -420,7 +420,7 @@ int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
420 D1(printk(KERN_DEBUG "dnode @%08x: ver %u, offset %04x, dsize %04x\n", 420 D1(printk(KERN_DEBUG "dnode @%08x: ver %u, offset %04x, dsize %04x\n",
421 ref_offset(ref), je32_to_cpu(node.i.version), 421 ref_offset(ref), je32_to_cpu(node.i.version),
422 je32_to_cpu(node.i.offset), je32_to_cpu(node.i.dsize))); 422 je32_to_cpu(node.i.offset), je32_to_cpu(node.i.dsize)));
423 jffs2_add_tn_to_list(tn, &ret_tn); 423 jffs2_add_tn_to_tree(tn, &ret_tn);
424 break; 424 break;
425 425
426 default: 426 default:
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 7bf72e012c94..d900c8929b09 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: os-linux.h,v 1.57 2005/07/06 12:13:09 dwmw2 Exp $ 10 * $Id: os-linux.h,v 1.58 2005/07/12 02:34:35 tpoynor Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -86,6 +86,8 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
86#define jffs2_dataflash(c) (0) 86#define jffs2_dataflash(c) (0)
87#define jffs2_nor_ecc_flash_setup(c) (0) 87#define jffs2_nor_ecc_flash_setup(c) (0)
88#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) 88#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0)
89#define jffs2_dataflash_setup(c) (0)
90#define jffs2_dataflash_cleanup(c) do {} while (0)
89 91
90#else /* NAND and/or ECC'd NOR support present */ 92#else /* NAND and/or ECC'd NOR support present */
91 93
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 081656c1d49e..5b2a83599d73 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: readinode.c,v 1.120 2005/07/05 21:03:07 dwmw2 Exp $ 10 * $Id: readinode.c,v 1.125 2005/07/10 13:13:55 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -151,6 +151,9 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
151 151
152 D1(printk(KERN_DEBUG "jffs2_add_full_dnode_to_inode(ino #%u, f %p, fn %p)\n", f->inocache->ino, f, fn)); 152 D1(printk(KERN_DEBUG "jffs2_add_full_dnode_to_inode(ino #%u, f %p, fn %p)\n", f->inocache->ino, f, fn));
153 153
154 if (unlikely(!fn->size))
155 return 0;
156
154 newfrag = jffs2_alloc_node_frag(); 157 newfrag = jffs2_alloc_node_frag();
155 if (unlikely(!newfrag)) 158 if (unlikely(!newfrag))
156 return -ENOMEM; 159 return -ENOMEM;
@@ -158,11 +161,6 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
158 D2(printk(KERN_DEBUG "adding node %04x-%04x @0x%08x on flash, newfrag *%p\n", 161 D2(printk(KERN_DEBUG "adding node %04x-%04x @0x%08x on flash, newfrag *%p\n",
159 fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag)); 162 fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag));
160 163
161 if (unlikely(!fn->size)) {
162 jffs2_free_node_frag(newfrag);
163 return 0;
164 }
165
166 newfrag->ofs = fn->ofs; 164 newfrag->ofs = fn->ofs;
167 newfrag->size = fn->size; 165 newfrag->size = fn->size;
168 newfrag->node = fn; 166 newfrag->node = fn;
@@ -560,7 +558,6 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
560 } 558 }
561 next_tn: 559 next_tn:
562 BUG_ON(rb->rb_left); 560 BUG_ON(rb->rb_left);
563 repl_rb = NULL;
564 if (rb->rb_parent && rb->rb_parent->rb_left == rb) { 561 if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
565 /* We were then left-hand child of our parent. We need 562 /* We were then left-hand child of our parent. We need
566 to move our own right-hand child into our place. */ 563 to move our own right-hand child into our place. */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 2cf14cf8b35a..aaf9475cfb6a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: super.c,v 1.106 2005/05/18 11:37:25 dedekind Exp $ 10 * $Id: super.c,v 1.107 2005/07/12 16:37:08 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -140,6 +140,15 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
140 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n", 140 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n",
141 mtd->index, mtd->name)); 141 mtd->index, mtd->name));
142 142
143 /* Initialize JFFS2 superblock locks, the further initialization will be
144 * done later */
145 init_MUTEX(&c->alloc_sem);
146 init_MUTEX(&c->erase_free_sem);
147 init_waitqueue_head(&c->erase_wait);
148 init_waitqueue_head(&c->inocache_wq);
149 spin_lock_init(&c->erase_completion_lock);
150 spin_lock_init(&c->inocache_lock);
151
143 sb->s_op = &jffs2_super_operations; 152 sb->s_op = &jffs2_super_operations;
144 sb->s_flags = flags | MS_NOATIME; 153 sb->s_flags = flags | MS_NOATIME;
145 154
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index cced2fed9d0f..c739626f5bf1 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -26,36 +26,6 @@
26#include "jfs_debug.h" 26#include "jfs_debug.h"
27 27
28/* 28/*
29 * Debug code for double-checking block map
30 */
31/* #define _JFS_DEBUG_DMAP 1 */
32
33#ifdef _JFS_DEBUG_DMAP
34#define DBINITMAP(size,ipbmap,results) \
35 DBinitmap(size,ipbmap,results)
36#define DBALLOC(dbmap,mapsize,blkno,nblocks) \
37 DBAlloc(dbmap,mapsize,blkno,nblocks)
38#define DBFREE(dbmap,mapsize,blkno,nblocks) \
39 DBFree(dbmap,mapsize,blkno,nblocks)
40#define DBALLOCCK(dbmap,mapsize,blkno,nblocks) \
41 DBAllocCK(dbmap,mapsize,blkno,nblocks)
42#define DBFREECK(dbmap,mapsize,blkno,nblocks) \
43 DBFreeCK(dbmap,mapsize,blkno,nblocks)
44
45static void DBinitmap(s64, struct inode *, u32 **);
46static void DBAlloc(uint *, s64, s64, s64);
47static void DBFree(uint *, s64, s64, s64);
48static void DBAllocCK(uint *, s64, s64, s64);
49static void DBFreeCK(uint *, s64, s64, s64);
50#else
51#define DBINITMAP(size,ipbmap,results)
52#define DBALLOC(dbmap, mapsize, blkno, nblocks)
53#define DBFREE(dbmap, mapsize, blkno, nblocks)
54#define DBALLOCCK(dbmap, mapsize, blkno, nblocks)
55#define DBFREECK(dbmap, mapsize, blkno, nblocks)
56#endif /* _JFS_DEBUG_DMAP */
57
58/*
59 * SERIALIZATION of the Block Allocation Map. 29 * SERIALIZATION of the Block Allocation Map.
60 * 30 *
61 * the working state of the block allocation map is accessed in 31 * the working state of the block allocation map is accessed in
@@ -105,7 +75,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
105 int nblocks); 75 int nblocks);
106static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); 76static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
107static void dbBackSplit(dmtree_t * tp, int leafno); 77static void dbBackSplit(dmtree_t * tp, int leafno);
108static void dbJoin(dmtree_t * tp, int leafno, int newval); 78static int dbJoin(dmtree_t * tp, int leafno, int newval);
109static void dbAdjTree(dmtree_t * tp, int leafno, int newval); 79static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
110static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, 80static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
111 int level); 81 int level);
@@ -128,8 +98,8 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
128static int dbFindBits(u32 word, int l2nb); 98static int dbFindBits(u32 word, int l2nb);
129static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); 99static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
130static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); 100static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
131static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 101static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
132 int nblocks); 102 int nblocks);
133static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 103static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
134 int nblocks); 104 int nblocks);
135static int dbMaxBud(u8 * cp); 105static int dbMaxBud(u8 * cp);
@@ -242,7 +212,6 @@ int dbMount(struct inode *ipbmap)
242 JFS_SBI(ipbmap->i_sb)->bmap = bmp; 212 JFS_SBI(ipbmap->i_sb)->bmap = bmp;
243 213
244 memset(bmp->db_active, 0, sizeof(bmp->db_active)); 214 memset(bmp->db_active, 0, sizeof(bmp->db_active));
245 DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
246 215
247 /* 216 /*
248 * allocate/initialize the bmap lock 217 * allocate/initialize the bmap lock
@@ -407,16 +376,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
407 */ 376 */
408 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 377 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
409 378
410 DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
411
412 /* free the blocks. */ 379 /* free the blocks. */
413 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { 380 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
381 jfs_error(ip->i_sb, "dbFree: error in block map\n");
414 release_metapage(mp); 382 release_metapage(mp);
415 IREAD_UNLOCK(ipbmap); 383 IREAD_UNLOCK(ipbmap);
416 return (rc); 384 return (rc);
417 } 385 }
418
419 DBFREE(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
420 } 386 }
421 387
422 /* write the last buffer. */ 388 /* write the last buffer. */
@@ -775,10 +741,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
775 IWRITE_LOCK(ipbmap); 741 IWRITE_LOCK(ipbmap);
776 742
777 rc = dbAllocAny(bmp, nblocks, l2nb, results); 743 rc = dbAllocAny(bmp, nblocks, l2nb, results);
778 if (rc == 0) {
779 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results,
780 nblocks);
781 }
782 744
783 goto write_unlock; 745 goto write_unlock;
784 } 746 }
@@ -836,8 +798,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
836 != -ENOSPC) { 798 != -ENOSPC) {
837 if (rc == 0) { 799 if (rc == 0) {
838 *results = blkno; 800 *results = blkno;
839 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
840 *results, nblocks);
841 mark_metapage_dirty(mp); 801 mark_metapage_dirty(mp);
842 } 802 }
843 803
@@ -863,11 +823,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
863 if ((rc = 823 if ((rc =
864 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) 824 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
865 != -ENOSPC) { 825 != -ENOSPC) {
866 if (rc == 0) { 826 if (rc == 0)
867 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
868 *results, nblocks);
869 mark_metapage_dirty(mp); 827 mark_metapage_dirty(mp);
870 }
871 828
872 release_metapage(mp); 829 release_metapage(mp);
873 goto read_unlock; 830 goto read_unlock;
@@ -878,11 +835,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
878 */ 835 */
879 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) 836 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
880 != -ENOSPC) { 837 != -ENOSPC) {
881 if (rc == 0) { 838 if (rc == 0)
882 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
883 *results, nblocks);
884 mark_metapage_dirty(mp); 839 mark_metapage_dirty(mp);
885 }
886 840
887 release_metapage(mp); 841 release_metapage(mp);
888 goto read_unlock; 842 goto read_unlock;
@@ -896,13 +850,9 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
896 * the same allocation group as the hint. 850 * the same allocation group as the hint.
897 */ 851 */
898 IWRITE_LOCK(ipbmap); 852 IWRITE_LOCK(ipbmap);
899 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) 853 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
900 != -ENOSPC) {
901 if (rc == 0)
902 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
903 *results, nblocks);
904 goto write_unlock; 854 goto write_unlock;
905 } 855
906 IWRITE_UNLOCK(ipbmap); 856 IWRITE_UNLOCK(ipbmap);
907 857
908 858
@@ -918,9 +868,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
918 */ 868 */
919 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) 869 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
920 rc = dbAllocAny(bmp, nblocks, l2nb, results); 870 rc = dbAllocAny(bmp, nblocks, l2nb, results);
921 if (rc == 0) {
922 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, nblocks);
923 }
924 871
925 write_unlock: 872 write_unlock:
926 IWRITE_UNLOCK(ipbmap); 873 IWRITE_UNLOCK(ipbmap);
@@ -992,10 +939,9 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
992 939
993 IREAD_UNLOCK(ipbmap); 940 IREAD_UNLOCK(ipbmap);
994 941
995 if (rc == 0) { 942 if (rc == 0)
996 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
997 mark_metapage_dirty(mp); 943 mark_metapage_dirty(mp);
998 } 944
999 release_metapage(mp); 945 release_metapage(mp);
1000 946
1001 return (rc); 947 return (rc);
@@ -1144,7 +1090,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1144 return -EIO; 1090 return -EIO;
1145 } 1091 }
1146 1092
1147 DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
1148 dp = (struct dmap *) mp->data; 1093 dp = (struct dmap *) mp->data;
1149 1094
1150 /* try to allocate the blocks immediately following the 1095 /* try to allocate the blocks immediately following the
@@ -1155,11 +1100,9 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1155 IREAD_UNLOCK(ipbmap); 1100 IREAD_UNLOCK(ipbmap);
1156 1101
1157 /* were we successful ? */ 1102 /* were we successful ? */
1158 if (rc == 0) { 1103 if (rc == 0)
1159 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, extblkno,
1160 addnblocks);
1161 write_metapage(mp); 1104 write_metapage(mp);
1162 } else 1105 else
1163 /* we were not successful */ 1106 /* we were not successful */
1164 release_metapage(mp); 1107 release_metapage(mp);
1165 1108
@@ -2078,7 +2021,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2078 int nblocks) 2021 int nblocks)
2079{ 2022{
2080 s8 oldroot; 2023 s8 oldroot;
2081 int rc, word; 2024 int rc = 0, word;
2082 2025
2083 /* save the current value of the root (i.e. maximum free string) 2026 /* save the current value of the root (i.e. maximum free string)
2084 * of the dmap tree. 2027 * of the dmap tree.
@@ -2086,11 +2029,11 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2086 oldroot = dp->tree.stree[ROOT]; 2029 oldroot = dp->tree.stree[ROOT];
2087 2030
2088 /* free the specified (blocks) bits */ 2031 /* free the specified (blocks) bits */
2089 dbFreeBits(bmp, dp, blkno, nblocks); 2032 rc = dbFreeBits(bmp, dp, blkno, nblocks);
2090 2033
2091 /* if the root has not changed, done. */ 2034 /* if error or the root has not changed, done. */
2092 if (dp->tree.stree[ROOT] == oldroot) 2035 if (rc || (dp->tree.stree[ROOT] == oldroot))
2093 return (0); 2036 return (rc);
2094 2037
2095 /* root changed. bubble the change up to the dmap control pages. 2038 /* root changed. bubble the change up to the dmap control pages.
2096 * if the adjustment of the upper level control pages fails, 2039 * if the adjustment of the upper level control pages fails,
@@ -2279,15 +2222,16 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2279 * blkno - starting block number of the bits to be freed. 2222 * blkno - starting block number of the bits to be freed.
2280 * nblocks - number of bits to be freed. 2223 * nblocks - number of bits to be freed.
2281 * 2224 *
2282 * RETURN VALUES: none 2225 * RETURN VALUES: 0 for success
2283 * 2226 *
2284 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2227 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2285 */ 2228 */
2286static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 2229static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2287 int nblocks) 2230 int nblocks)
2288{ 2231{
2289 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; 2232 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2290 dmtree_t *tp = (dmtree_t *) & dp->tree; 2233 dmtree_t *tp = (dmtree_t *) & dp->tree;
2234 int rc = 0;
2291 int size; 2235 int size;
2292 2236
2293 /* determine the bit number and word within the dmap of the 2237 /* determine the bit number and word within the dmap of the
@@ -2336,8 +2280,10 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2336 2280
2337 /* update the leaf for this dmap word. 2281 /* update the leaf for this dmap word.
2338 */ 2282 */
2339 dbJoin(tp, word, 2283 rc = dbJoin(tp, word,
2340 dbMaxBud((u8 *) & dp->wmap[word])); 2284 dbMaxBud((u8 *) & dp->wmap[word]));
2285 if (rc)
2286 return rc;
2341 2287
2342 word += 1; 2288 word += 1;
2343 } else { 2289 } else {
@@ -2368,7 +2314,9 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2368 2314
2369 /* update the leaf. 2315 /* update the leaf.
2370 */ 2316 */
2371 dbJoin(tp, word, size); 2317 rc = dbJoin(tp, word, size);
2318 if (rc)
2319 return rc;
2372 2320
2373 /* get the number of dmap words handled. 2321 /* get the number of dmap words handled.
2374 */ 2322 */
@@ -2415,6 +2363,8 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2415 } 2363 }
2416 2364
2417 BMAP_UNLOCK(bmp); 2365 BMAP_UNLOCK(bmp);
2366
2367 return 0;
2418} 2368}
2419 2369
2420 2370
@@ -2522,7 +2472,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2522 } 2472 }
2523 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); 2473 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
2524 } else { 2474 } else {
2525 dbJoin((dmtree_t *) dcp, leafno, newval); 2475 rc = dbJoin((dmtree_t *) dcp, leafno, newval);
2476 if (rc)
2477 return rc;
2526 } 2478 }
2527 2479
2528 /* check if the root of the current dmap control page changed due 2480 /* check if the root of the current dmap control page changed due
@@ -2747,7 +2699,7 @@ static void dbBackSplit(dmtree_t * tp, int leafno)
2747 * 2699 *
2748 * RETURN VALUES: none 2700 * RETURN VALUES: none
2749 */ 2701 */
2750static void dbJoin(dmtree_t * tp, int leafno, int newval) 2702static int dbJoin(dmtree_t * tp, int leafno, int newval)
2751{ 2703{
2752 int budsz, buddy; 2704 int budsz, buddy;
2753 s8 *leaf; 2705 s8 *leaf;
@@ -2787,7 +2739,9 @@ static void dbJoin(dmtree_t * tp, int leafno, int newval)
2787 if (newval > leaf[buddy]) 2739 if (newval > leaf[buddy])
2788 break; 2740 break;
2789 2741
2790 assert(newval == leaf[buddy]); 2742 /* It shouldn't be less */
2743 if (newval < leaf[buddy])
2744 return -EIO;
2791 2745
2792 /* check which (leafno or buddy) is the left buddy. 2746 /* check which (leafno or buddy) is the left buddy.
2793 * the left buddy gets to claim the blocks resulting 2747 * the left buddy gets to claim the blocks resulting
@@ -2819,6 +2773,8 @@ static void dbJoin(dmtree_t * tp, int leafno, int newval)
2819 /* update the leaf value. 2773 /* update the leaf value.
2820 */ 2774 */
2821 dbAdjTree(tp, leafno, newval); 2775 dbAdjTree(tp, leafno, newval);
2776
2777 return 0;
2822} 2778}
2823 2779
2824 2780
@@ -3185,16 +3141,12 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3185 */ 3141 */
3186 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 3142 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
3187 3143
3188 DBFREECK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
3189
3190 /* allocate the blocks. */ 3144 /* allocate the blocks. */
3191 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { 3145 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
3192 release_metapage(mp); 3146 release_metapage(mp);
3193 IREAD_UNLOCK(ipbmap); 3147 IREAD_UNLOCK(ipbmap);
3194 return (rc); 3148 return (rc);
3195 } 3149 }
3196
3197 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
3198 } 3150 }
3199 3151
3200 /* write the last buffer. */ 3152 /* write the last buffer. */
@@ -4041,223 +3993,3 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
4041 3993
4042 return (nblocks); 3994 return (nblocks);
4043} 3995}
4044
4045
4046#ifdef _JFS_DEBUG_DMAP
4047/*
4048 * DBinitmap()
4049 */
4050static void DBinitmap(s64 size, struct inode *ipbmap, u32 ** results)
4051{
4052 int npages;
4053 u32 *dbmap, *d;
4054 int n;
4055 s64 lblkno, cur_block;
4056 struct dmap *dp;
4057 struct metapage *mp;
4058
4059 npages = size / 32768;
4060 npages += (size % 32768) ? 1 : 0;
4061
4062 dbmap = (u32 *) xmalloc(npages * 4096, L2PSIZE, kernel_heap);
4063 if (dbmap == NULL)
4064 BUG(); /* Not robust since this is only unused debug code */
4065
4066 for (n = 0, d = dbmap; n < npages; n++, d += 1024)
4067 bzero(d, 4096);
4068
4069 /* Need to initialize from disk map pages
4070 */
4071 for (d = dbmap, cur_block = 0; cur_block < size;
4072 cur_block += BPERDMAP, d += LPERDMAP) {
4073 lblkno = BLKTODMAP(cur_block,
4074 JFS_SBI(ipbmap->i_sb)->bmap->
4075 db_l2nbperpage);
4076 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
4077 if (mp == NULL) {
4078 jfs_error(ipbmap->i_sb,
4079 "DBinitmap: could not read disk map page");
4080 continue;
4081 }
4082 dp = (struct dmap *) mp->data;
4083
4084 for (n = 0; n < LPERDMAP; n++)
4085 d[n] = le32_to_cpu(dp->wmap[n]);
4086
4087 release_metapage(mp);
4088 }
4089
4090 *results = dbmap;
4091}
4092
4093
4094/*
4095 * DBAlloc()
4096 */
4097void DBAlloc(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4098{
4099 int word, nb, bitno;
4100 u32 mask;
4101
4102 assert(blkno > 0 && blkno < mapsize);
4103 assert(nblocks > 0 && nblocks <= mapsize);
4104
4105 assert(blkno + nblocks <= mapsize);
4106
4107 dbmap += (blkno / 32);
4108 while (nblocks > 0) {
4109 bitno = blkno & (32 - 1);
4110 nb = min(nblocks, 32 - bitno);
4111
4112 mask = (0xffffffff << (32 - nb) >> bitno);
4113 assert((mask & *dbmap) == 0);
4114 *dbmap |= mask;
4115
4116 dbmap++;
4117 blkno += nb;
4118 nblocks -= nb;
4119 }
4120}
4121
4122
4123/*
4124 * DBFree()
4125 */
4126static void DBFree(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4127{
4128 int word, nb, bitno;
4129 u32 mask;
4130
4131 assert(blkno > 0 && blkno < mapsize);
4132 assert(nblocks > 0 && nblocks <= mapsize);
4133
4134 assert(blkno + nblocks <= mapsize);
4135
4136 dbmap += (blkno / 32);
4137 while (nblocks > 0) {
4138 bitno = blkno & (32 - 1);
4139 nb = min(nblocks, 32 - bitno);
4140
4141 mask = (0xffffffff << (32 - nb) >> bitno);
4142 assert((mask & *dbmap) == mask);
4143 *dbmap &= ~mask;
4144
4145 dbmap++;
4146 blkno += nb;
4147 nblocks -= nb;
4148 }
4149}
4150
4151
4152/*
4153 * DBAllocCK()
4154 */
4155static void DBAllocCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4156{
4157 int word, nb, bitno;
4158 u32 mask;
4159
4160 assert(blkno > 0 && blkno < mapsize);
4161 assert(nblocks > 0 && nblocks <= mapsize);
4162
4163 assert(blkno + nblocks <= mapsize);
4164
4165 dbmap += (blkno / 32);
4166 while (nblocks > 0) {
4167 bitno = blkno & (32 - 1);
4168 nb = min(nblocks, 32 - bitno);
4169
4170 mask = (0xffffffff << (32 - nb) >> bitno);
4171 assert((mask & *dbmap) == mask);
4172
4173 dbmap++;
4174 blkno += nb;
4175 nblocks -= nb;
4176 }
4177}
4178
4179
4180/*
4181 * DBFreeCK()
4182 */
4183static void DBFreeCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4184{
4185 int word, nb, bitno;
4186 u32 mask;
4187
4188 assert(blkno > 0 && blkno < mapsize);
4189 assert(nblocks > 0 && nblocks <= mapsize);
4190
4191 assert(blkno + nblocks <= mapsize);
4192
4193 dbmap += (blkno / 32);
4194 while (nblocks > 0) {
4195 bitno = blkno & (32 - 1);
4196 nb = min(nblocks, 32 - bitno);
4197
4198 mask = (0xffffffff << (32 - nb) >> bitno);
4199 assert((mask & *dbmap) == 0);
4200
4201 dbmap++;
4202 blkno += nb;
4203 nblocks -= nb;
4204 }
4205}
4206
4207
4208/*
4209 * dbPrtMap()
4210 */
4211static void dbPrtMap(struct bmap * bmp)
4212{
4213 printk(" mapsize: %d%d\n", bmp->db_mapsize);
4214 printk(" nfree: %d%d\n", bmp->db_nfree);
4215 printk(" numag: %d\n", bmp->db_numag);
4216 printk(" agsize: %d%d\n", bmp->db_agsize);
4217 printk(" agl2size: %d\n", bmp->db_agl2size);
4218 printk(" agwidth: %d\n", bmp->db_agwidth);
4219 printk(" agstart: %d\n", bmp->db_agstart);
4220 printk(" agheigth: %d\n", bmp->db_agheigth);
4221 printk(" aglevel: %d\n", bmp->db_aglevel);
4222 printk(" maxlevel: %d\n", bmp->db_maxlevel);
4223 printk(" maxag: %d\n", bmp->db_maxag);
4224 printk(" agpref: %d\n", bmp->db_agpref);
4225 printk(" l2nbppg: %d\n", bmp->db_l2nbperpage);
4226}
4227
4228
4229/*
4230 * dbPrtCtl()
4231 */
4232static void dbPrtCtl(struct dmapctl * dcp)
4233{
4234 int i, j, n;
4235
4236 printk(" height: %08x\n", le32_to_cpu(dcp->height));
4237 printk(" leafidx: %08x\n", le32_to_cpu(dcp->leafidx));
4238 printk(" budmin: %08x\n", dcp->budmin);
4239 printk(" nleafs: %08x\n", le32_to_cpu(dcp->nleafs));
4240 printk(" l2nleafs: %08x\n", le32_to_cpu(dcp->l2nleafs));
4241
4242 printk("\n Tree:\n");
4243 for (i = 0; i < CTLLEAFIND; i += 8) {
4244 n = min(8, CTLLEAFIND - i);
4245
4246 for (j = 0; j < n; j++)
4247 printf(" [%03x]: %02x", i + j,
4248 (char) dcp->stree[i + j]);
4249 printf("\n");
4250 }
4251
4252 printk("\n Tree Leaves:\n");
4253 for (i = 0; i < LPERCTL; i += 8) {
4254 n = min(8, LPERCTL - i);
4255
4256 for (j = 0; j < n; j++)
4257 printf(" [%03x]: %02x",
4258 i + j,
4259 (char) dcp->stree[i + j + CTLLEAFIND]);
4260 printf("\n");
4261 }
4262}
4263#endif /* _JFS_DEBUG_DMAP */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 8676aee3ae48..404f33eae507 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -381,9 +381,12 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
381 * It's time to move the inline table to an external 381 * It's time to move the inline table to an external
382 * page and begin to build the xtree 382 * page and begin to build the xtree
383 */ 383 */
384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage) || 384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
385 dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) 385 goto clean_up;
386 goto clean_up; /* No space */ 386 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
387 DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
388 goto clean_up;
389 }
387 390
388 /* 391 /*
389 * Save the table, we're going to overwrite it with the 392 * Save the table, we're going to overwrite it with the
@@ -397,13 +400,15 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
397 xtInitRoot(tid, ip); 400 xtInitRoot(tid, ip);
398 401
399 /* 402 /*
400 * Allocate the first block & add it to the xtree 403 * Add the first block to the xtree
401 */ 404 */
402 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 405 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
403 /* This really shouldn't fail */ 406 /* This really shouldn't fail */
404 jfs_warn("add_index: xtInsert failed!"); 407 jfs_warn("add_index: xtInsert failed!");
405 memcpy(&jfs_ip->i_dirtable, temp_table, 408 memcpy(&jfs_ip->i_dirtable, temp_table,
406 sizeof (temp_table)); 409 sizeof (temp_table));
410 dbFree(ip, xaddr, sbi->nbperpage);
411 DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
407 goto clean_up; 412 goto clean_up;
408 } 413 }
409 ip->i_size = PSIZE; 414 ip->i_size = PSIZE;
@@ -4554,202 +4559,3 @@ int dtModify(tid_t tid, struct inode *ip,
4554 4559
4555 return 0; 4560 return 0;
4556} 4561}
4557
4558#ifdef _JFS_DEBUG_DTREE
4559/*
4560 * dtDisplayTree()
4561 *
4562 * function: traverse forward
4563 */
4564int dtDisplayTree(struct inode *ip)
4565{
4566 int rc;
4567 struct metapage *mp;
4568 dtpage_t *p;
4569 s64 bn, pbn;
4570 int index, lastindex, v, h;
4571 pxd_t *xd;
4572 struct btstack btstack;
4573 struct btframe *btsp;
4574 struct btframe *parent;
4575 u8 *stbl;
4576 int psize = 256;
4577
4578 printk("display B+-tree.\n");
4579
4580 /* clear stack */
4581 btsp = btstack.stack;
4582
4583 /*
4584 * start with root
4585 *
4586 * root resides in the inode
4587 */
4588 bn = 0;
4589 v = h = 0;
4590
4591 /*
4592 * first access of each page:
4593 */
4594 newPage:
4595 DT_GETPAGE(ip, bn, mp, psize, p, rc);
4596 if (rc)
4597 return rc;
4598
4599 /* process entries forward from first index */
4600 index = 0;
4601 lastindex = p->header.nextindex - 1;
4602
4603 if (p->header.flag & BT_INTERNAL) {
4604 /*
4605 * first access of each internal page
4606 */
4607 printf("internal page ");
4608 dtDisplayPage(ip, bn, p);
4609
4610 goto getChild;
4611 } else { /* (p->header.flag & BT_LEAF) */
4612
4613 /*
4614 * first access of each leaf page
4615 */
4616 printf("leaf page ");
4617 dtDisplayPage(ip, bn, p);
4618
4619 /*
4620 * process leaf page entries
4621 *
4622 for ( ; index <= lastindex; index++)
4623 {
4624 }
4625 */
4626
4627 /* unpin the leaf page */
4628 DT_PUTPAGE(mp);
4629 }
4630
4631 /*
4632 * go back up to the parent page
4633 */
4634 getParent:
4635 /* pop/restore parent entry for the current child page */
4636 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4637 /* current page must have been root */
4638 return;
4639
4640 /*
4641 * parent page scan completed
4642 */
4643 if ((index = parent->index) == (lastindex = parent->lastindex)) {
4644 /* go back up to the parent page */
4645 goto getParent;
4646 }
4647
4648 /*
4649 * parent page has entries remaining
4650 */
4651 /* get back the parent page */
4652 bn = parent->bn;
4653 /* v = parent->level; */
4654 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4655 if (rc)
4656 return rc;
4657
4658 /* get next parent entry */
4659 index++;
4660
4661 /*
4662 * internal page: go down to child page of current entry
4663 */
4664 getChild:
4665 /* push/save current parent entry for the child page */
4666 btsp->bn = pbn = bn;
4667 btsp->index = index;
4668 btsp->lastindex = lastindex;
4669 /* btsp->level = v; */
4670 /* btsp->node = h; */
4671 ++btsp;
4672
4673 /* get current entry for the child page */
4674 stbl = DT_GETSTBL(p);
4675 xd = (pxd_t *) & p->slot[stbl[index]];
4676
4677 /*
4678 * first access of each internal entry:
4679 */
4680
4681 /* get child page */
4682 bn = addressPXD(xd);
4683 psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize;
4684
4685 printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn);
4686 v++;
4687 h = index;
4688
4689 /* release parent page */
4690 DT_PUTPAGE(mp);
4691
4692 /* process the child page */
4693 goto newPage;
4694}
4695
4696
4697/*
4698 * dtDisplayPage()
4699 *
4700 * function: display page
4701 */
4702int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p)
4703{
4704 int rc;
4705 struct metapage *mp;
4706 struct ldtentry *lh;
4707 struct idtentry *ih;
4708 pxd_t *xd;
4709 int i, j;
4710 u8 *stbl;
4711 wchar_t name[JFS_NAME_MAX + 1];
4712 struct component_name key = { 0, name };
4713 int freepage = 0;
4714
4715 if (p == NULL) {
4716 freepage = 1;
4717 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4718 if (rc)
4719 return rc;
4720 }
4721
4722 /* display page control */
4723 printk("bn:0x%Lx flag:0x%08x nextindex:%d\n",
4724 bn, p->header.flag, p->header.nextindex);
4725
4726 /* display entries */
4727 stbl = DT_GETSTBL(p);
4728 for (i = 0, j = 1; i < p->header.nextindex; i++, j++) {
4729 dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag);
4730 key.name[key.namlen] = '\0';
4731 if (p->header.flag & BT_LEAF) {
4732 lh = (struct ldtentry *) & p->slot[stbl[i]];
4733 printf("\t[%d] %s:%d", i, key.name,
4734 le32_to_cpu(lh->inumber));
4735 } else {
4736 ih = (struct idtentry *) & p->slot[stbl[i]];
4737 xd = (pxd_t *) ih;
4738 bn = addressPXD(xd);
4739 printf("\t[%d] %s:0x%Lx", i, key.name, bn);
4740 }
4741
4742 if (j == 4) {
4743 printf("\n");
4744 j = 0;
4745 }
4746 }
4747
4748 printf("\n");
4749
4750 if (freepage)
4751 DT_PUTPAGE(mp);
4752
4753 return 0;
4754}
4755#endif /* _JFS_DEBUG_DTREE */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index 273a80130c9d..13e4fdf07724 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -269,11 +269,4 @@ extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key,
269 ino_t * orig_ino, ino_t new_ino, int flag); 269 ino_t * orig_ino, ino_t new_ino, int flag);
270 270
271extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); 271extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
272
273#ifdef _JFS_DEBUG_DTREE
274extern int dtDisplayTree(struct inode *ip);
275
276extern int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p);
277#endif /* _JFS_DEBUG_DTREE */
278
279#endif /* !_H_JFS_DTREE */ 272#endif /* !_H_JFS_DTREE */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 971af2977eff..4021d46da7e3 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -87,25 +87,6 @@ static int copy_from_dinode(struct dinode *, struct inode *);
87static void copy_to_dinode(struct dinode *, struct inode *); 87static void copy_to_dinode(struct dinode *, struct inode *);
88 88
89/* 89/*
90 * debug code for double-checking inode map
91 */
92/* #define _JFS_DEBUG_IMAP 1 */
93
94#ifdef _JFS_DEBUG_IMAP
95#define DBG_DIINIT(imap) DBGdiInit(imap)
96#define DBG_DIALLOC(imap, ino) DBGdiAlloc(imap, ino)
97#define DBG_DIFREE(imap, ino) DBGdiFree(imap, ino)
98
99static void *DBGdiInit(struct inomap * imap);
100static void DBGdiAlloc(struct inomap * imap, ino_t ino);
101static void DBGdiFree(struct inomap * imap, ino_t ino);
102#else
103#define DBG_DIINIT(imap)
104#define DBG_DIALLOC(imap, ino)
105#define DBG_DIFREE(imap, ino)
106#endif /* _JFS_DEBUG_IMAP */
107
108/*
109 * NAME: diMount() 90 * NAME: diMount()
110 * 91 *
111 * FUNCTION: initialize the incore inode map control structures for 92 * FUNCTION: initialize the incore inode map control structures for
@@ -188,8 +169,6 @@ int diMount(struct inode *ipimap)
188 imap->im_ipimap = ipimap; 169 imap->im_ipimap = ipimap;
189 JFS_IP(ipimap)->i_imap = imap; 170 JFS_IP(ipimap)->i_imap = imap;
190 171
191// DBG_DIINIT(imap);
192
193 return (0); 172 return (0);
194} 173}
195 174
@@ -1043,7 +1022,6 @@ int diFree(struct inode *ip)
1043 /* update the bitmap. 1022 /* update the bitmap.
1044 */ 1023 */
1045 iagp->wmap[extno] = cpu_to_le32(bitmap); 1024 iagp->wmap[extno] = cpu_to_le32(bitmap);
1046 DBG_DIFREE(imap, inum);
1047 1025
1048 /* update the free inode counts at the iag, ag and 1026 /* update the free inode counts at the iag, ag and
1049 * map level. 1027 * map level.
@@ -1231,7 +1209,6 @@ int diFree(struct inode *ip)
1231 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); 1209 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
1232 } 1210 }
1233 iagp->wmap[extno] = 0; 1211 iagp->wmap[extno] = 0;
1234 DBG_DIFREE(imap, inum);
1235 PXDlength(&iagp->inoext[extno], 0); 1212 PXDlength(&iagp->inoext[extno], 0);
1236 PXDaddress(&iagp->inoext[extno], 0); 1213 PXDaddress(&iagp->inoext[extno], 0);
1237 1214
@@ -1350,7 +1327,6 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1350 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1327 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1351 1328
1352 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1329 ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1353 DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
1354 jfs_ip->ixpxd = iagp->inoext[extno]; 1330 jfs_ip->ixpxd = iagp->inoext[extno];
1355 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 1331 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
1356 jfs_ip->active_ag = -1; 1332 jfs_ip->active_ag = -1;
@@ -3185,84 +3161,3 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3185 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3161 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3186 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3162 dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3187} 3163}
3188
3189#ifdef _JFS_DEBUG_IMAP
3190/*
3191 * DBGdiInit()
3192 */
3193static void *DBGdiInit(struct inomap * imap)
3194{
3195 u32 *dimap;
3196 int size;
3197 size = 64 * 1024;
3198 if ((dimap = (u32 *) xmalloc(size, L2PSIZE, kernel_heap)) == NULL)
3199 assert(0);
3200 bzero((void *) dimap, size);
3201 imap->im_DBGdimap = dimap;
3202}
3203
3204/*
3205 * DBGdiAlloc()
3206 */
3207static void DBGdiAlloc(struct inomap * imap, ino_t ino)
3208{
3209 u32 *dimap = imap->im_DBGdimap;
3210 int w, b;
3211 u32 m;
3212 w = ino >> 5;
3213 b = ino & 31;
3214 m = 0x80000000 >> b;
3215 assert(w < 64 * 256);
3216 if (dimap[w] & m) {
3217 printk("DEBUG diAlloc: duplicate alloc ino:0x%x\n", ino);
3218 }
3219 dimap[w] |= m;
3220}
3221
3222/*
3223 * DBGdiFree()
3224 */
3225static void DBGdiFree(struct inomap * imap, ino_t ino)
3226{
3227 u32 *dimap = imap->im_DBGdimap;
3228 int w, b;
3229 u32 m;
3230 w = ino >> 5;
3231 b = ino & 31;
3232 m = 0x80000000 >> b;
3233 assert(w < 64 * 256);
3234 if ((dimap[w] & m) == 0) {
3235 printk("DEBUG diFree: duplicate free ino:0x%x\n", ino);
3236 }
3237 dimap[w] &= ~m;
3238}
3239
3240static void dump_cp(struct inomap * ipimap, char *function, int line)
3241{
3242 printk("\n* ********* *\nControl Page %s %d\n", function, line);
3243 printk("FreeIAG %d\tNextIAG %d\n", ipimap->im_freeiag,
3244 ipimap->im_nextiag);
3245 printk("NumInos %d\tNumFree %d\n",
3246 atomic_read(&ipimap->im_numinos),
3247 atomic_read(&ipimap->im_numfree));
3248 printk("AG InoFree %d\tAG ExtFree %d\n",
3249 ipimap->im_agctl[0].inofree, ipimap->im_agctl[0].extfree);
3250 printk("AG NumInos %d\tAG NumFree %d\n",
3251 ipimap->im_agctl[0].numinos, ipimap->im_agctl[0].numfree);
3252}
3253
3254static void dump_iag(struct iag * iag, char *function, int line)
3255{
3256 printk("\n* ********* *\nIAG %s %d\n", function, line);
3257 printk("IagNum %d\tIAG Free %d\n", le32_to_cpu(iag->iagnum),
3258 le32_to_cpu(iag->iagfree));
3259 printk("InoFreeFwd %d\tInoFreeBack %d\n",
3260 le32_to_cpu(iag->inofreefwd),
3261 le32_to_cpu(iag->inofreeback));
3262 printk("ExtFreeFwd %d\tExtFreeBack %d\n",
3263 le32_to_cpu(iag->extfreefwd),
3264 le32_to_cpu(iag->extfreeback));
3265 printk("NFreeInos %d\tNFreeExts %d\n", le32_to_cpu(iag->nfreeinos),
3266 le32_to_cpu(iag->nfreeexts));
3267}
3268#endif /* _JFS_DEBUG_IMAP */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 79d07624bfe1..22815e88e7cc 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1030,7 +1030,8 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
1030 * starting until all current transactions are completed 1030 * starting until all current transactions are completed
1031 * by setting syncbarrier flag. 1031 * by setting syncbarrier flag.
1032 */ 1032 */
1033 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { 1033 if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1034 (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1034 set_bit(log_SYNCBARRIER, &log->flag); 1035 set_bit(log_SYNCBARRIER, &log->flag);
1035 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, 1036 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1036 log->syncpt); 1037 log->syncpt);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 6c5485d16c39..13d7e3f1feb4 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -561,7 +561,6 @@ static int metapage_releasepage(struct page *page, int gfp_mask)
561 dump_mem("page", page, sizeof(struct page)); 561 dump_mem("page", page, sizeof(struct page));
562 dump_stack(); 562 dump_stack();
563 } 563 }
564 WARN_ON(mp->lsn);
565 if (mp->lsn) 564 if (mp->lsn)
566 remove_from_logsync(mp); 565 remove_from_logsync(mp);
567 remove_metapage(page, mp); 566 remove_metapage(page, mp);
@@ -641,7 +640,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
641 } else { 640 } else {
642 page = read_cache_page(mapping, page_index, 641 page = read_cache_page(mapping, page_index,
643 (filler_t *)mapping->a_ops->readpage, NULL); 642 (filler_t *)mapping->a_ops->readpage, NULL);
644 if (IS_ERR(page)) { 643 if (IS_ERR(page) || !PageUptodate(page)) {
645 jfs_err("read_cache_page failed!"); 644 jfs_err("read_cache_page failed!");
646 return NULL; 645 return NULL;
647 } 646 }
@@ -783,14 +782,6 @@ void release_metapage(struct metapage * mp)
783 if (test_bit(META_discard, &mp->flag) && !mp->count) { 782 if (test_bit(META_discard, &mp->flag) && !mp->count) {
784 clear_page_dirty(page); 783 clear_page_dirty(page);
785 ClearPageUptodate(page); 784 ClearPageUptodate(page);
786#ifdef _NOT_YET
787 if (page->mapping) {
788 /* Remove from page cache and page cache reference */
789 remove_from_page_cache(page);
790 page_cache_release(page);
791 metapage_releasepage(page, 0);
792 }
793#endif
794 } 785 }
795#else 786#else
796 /* Try to keep metapages from using up too much memory */ 787 /* Try to keep metapages from using up too much memory */
diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c
index b32208aad550..f327decfb155 100644
--- a/fs/jfs/jfs_unicode.c
+++ b/fs/jfs/jfs_unicode.c
@@ -51,8 +51,9 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
51 } 51 }
52 } else { 52 } else {
53 for (i = 0; (i < len) && from[i]; i++) { 53 for (i = 0; (i < len) && from[i]; i++) {
54 if (le16_to_cpu(from[i]) & 0xff00) { 54 if (unlikely(le16_to_cpu(from[i]) & 0xff00)) {
55 if (warn) { 55 to[i] = '?';
56 if (unlikely(warn)) {
56 warn--; 57 warn--;
57 warn_again--; 58 warn_again--;
58 printk(KERN_ERR 59 printk(KERN_ERR
@@ -61,7 +62,7 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
61 printk(KERN_ERR 62 printk(KERN_ERR
62 "mount with iocharset=utf8 to access\n"); 63 "mount with iocharset=utf8 to access\n");
63 } 64 }
64 to[i] = '?'; 65
65 } 66 }
66 else 67 else
67 to[i] = (char) (le16_to_cpu(from[i])); 68 to[i] = (char) (le16_to_cpu(from[i]));
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 31b34db4519e..a7fe2f2b969f 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -135,14 +135,6 @@ static int xtSearchNode(struct inode *ip,
135static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 135static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
136#endif /* _STILL_TO_PORT */ 136#endif /* _STILL_TO_PORT */
137 137
138/* External references */
139
140/*
141 * debug control
142 */
143/* #define _JFS_DEBUG_XTREE 1 */
144
145
146/* 138/*
147 * xtLookup() 139 * xtLookup()
148 * 140 *
@@ -4140,338 +4132,6 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4140 return 0; 4132 return 0;
4141} 4133}
4142 4134
4143
4144#ifdef _JFS_DEBUG_XTREE
4145/*
4146 * xtDisplayTree()
4147 *
4148 * function: traverse forward
4149 */
4150int xtDisplayTree(struct inode *ip)
4151{
4152 int rc = 0;
4153 struct metapage *mp;
4154 xtpage_t *p;
4155 s64 bn, pbn;
4156 int index, lastindex, v, h;
4157 xad_t *xad;
4158 struct btstack btstack;
4159 struct btframe *btsp;
4160 struct btframe *parent;
4161
4162 printk("display B+-tree.\n");
4163
4164 /* clear stack */
4165 btsp = btstack.stack;
4166
4167 /*
4168 * start with root
4169 *
4170 * root resides in the inode
4171 */
4172 bn = 0;
4173 v = h = 0;
4174
4175 /*
4176 * first access of each page:
4177 */
4178 getPage:
4179 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4180 if (rc)
4181 return rc;
4182
4183 /* process entries forward from first index */
4184 index = XTENTRYSTART;
4185 lastindex = le16_to_cpu(p->header.nextindex) - 1;
4186
4187 if (p->header.flag & BT_INTERNAL) {
4188 /*
4189 * first access of each internal page
4190 */
4191 goto getChild;
4192 } else { /* (p->header.flag & BT_LEAF) */
4193
4194 /*
4195 * first access of each leaf page
4196 */
4197 printf("leaf page ");
4198 xtDisplayPage(ip, bn, p);
4199
4200 /* unpin the leaf page */
4201 XT_PUTPAGE(mp);
4202 }
4203
4204 /*
4205 * go back up to the parent page
4206 */
4207 getParent:
4208 /* pop/restore parent entry for the current child page */
4209 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4210 /* current page must have been root */
4211 return;
4212
4213 /*
4214 * parent page scan completed
4215 */
4216 if ((index = parent->index) == (lastindex = parent->lastindex)) {
4217 /* go back up to the parent page */
4218 goto getParent;
4219 }
4220
4221 /*
4222 * parent page has entries remaining
4223 */
4224 /* get back the parent page */
4225 bn = parent->bn;
4226 /* v = parent->level; */
4227 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4228 if (rc)
4229 return rc;
4230
4231 /* get next parent entry */
4232 index++;
4233
4234 /*
4235 * internal page: go down to child page of current entry
4236 */
4237 getChild:
4238 /* push/save current parent entry for the child page */
4239 btsp->bn = pbn = bn;
4240 btsp->index = index;
4241 btsp->lastindex = lastindex;
4242 /* btsp->level = v; */
4243 /* btsp->node = h; */
4244 ++btsp;
4245
4246 /* get child page */
4247 xad = &p->xad[index];
4248 bn = addressXAD(xad);
4249
4250 /*
4251 * first access of each internal entry:
4252 */
4253 /* release parent page */
4254 XT_PUTPAGE(mp);
4255
4256 printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
4257 (ulong) bn);
4258 v++;
4259 h = index;
4260
4261 /* process the child page */
4262 goto getPage;
4263}
4264
4265
4266/*
4267 * xtDisplayPage()
4268 *
4269 * function: display page
4270 */
4271int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
4272{
4273 int rc = 0;
4274 xad_t *xad;
4275 s64 xaddr, xoff;
4276 int xlen, i, j;
4277
4278 /* display page control */
4279 printf("bn:0x%lx flag:0x%x nextindex:%d\n",
4280 (ulong) bn, p->header.flag,
4281 le16_to_cpu(p->header.nextindex));
4282
4283 /* display entries */
4284 xad = &p->xad[XTENTRYSTART];
4285 for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
4286 i++, xad++, j++) {
4287 xoff = offsetXAD(xad);
4288 xaddr = addressXAD(xad);
4289 xlen = lengthXAD(xad);
4290 printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
4291 (ulong) xaddr, xlen);
4292
4293 if (j == 4) {
4294 printf("\n");
4295 j = 0;
4296 }
4297 }
4298
4299 printf("\n");
4300}
4301#endif /* _JFS_DEBUG_XTREE */
4302
4303
4304#ifdef _JFS_WIP
4305/*
4306 * xtGather()
4307 *
4308 * function:
4309 * traverse for allocation acquiring tlock at commit time
4310 * (vs at the time of update) logging backward top down
4311 *
4312 * note:
4313 * problem - establishing that all new allocation have been
4314 * processed both for append and random write in sparse file
4315 * at the current entry at the current subtree root page
4316 *
4317 */
4318int xtGather(btree_t *t)
4319{
4320 int rc = 0;
4321 xtpage_t *p;
4322 u64 bn;
4323 int index;
4324 btentry_t *e;
4325 struct btstack btstack;
4326 struct btsf *parent;
4327
4328 /* clear stack */
4329 BT_CLR(&btstack);
4330
4331 /*
4332 * start with root
4333 *
4334 * root resides in the inode
4335 */
4336 bn = 0;
4337 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4338 if (rc)
4339 return rc;
4340
4341 /* new root is NOT pointed by a new entry
4342 if (p->header.flag & NEW)
4343 allocate new page lock;
4344 write a NEWPAGE log;
4345 */
4346
4347 dopage:
4348 /*
4349 * first access of each page:
4350 */
4351 /* process entries backward from last index */
4352 index = le16_to_cpu(p->header.nextindex) - 1;
4353
4354 if (p->header.flag & BT_LEAF) {
4355 /*
4356 * first access of each leaf page
4357 */
4358 /* process leaf page entries backward */
4359 for (; index >= XTENTRYSTART; index--) {
4360 e = &p->xad[index];
4361 /*
4362 * if newpage, log NEWPAGE.
4363 *
4364 if (e->flag & XAD_NEW) {
4365 nfound =+ entry->length;
4366 update current page lock for the entry;
4367 newpage(entry);
4368 *
4369 * if moved, log move.
4370 *
4371 } else if (e->flag & XAD_MOVED) {
4372 reset flag;
4373 update current page lock for the entry;
4374 }
4375 */
4376 }
4377
4378 /* unpin the leaf page */
4379 XT_PUTPAGE(mp);
4380
4381 /*
4382 * go back up to the parent page
4383 */
4384 getParent:
4385 /* restore parent entry for the current child page */
4386 if ((parent = BT_POP(&btstack)) == NULL)
4387 /* current page must have been root */
4388 return 0;
4389
4390 if ((index = parent->index) == XTENTRYSTART) {
4391 /*
4392 * parent page scan completed
4393 */
4394 /* go back up to the parent page */
4395 goto getParent;
4396 } else {
4397 /*
4398 * parent page has entries remaining
4399 */
4400 /* get back the parent page */
4401 bn = parent->bn;
4402 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4403 if (rc)
4404 return -EIO;
4405
4406 /* first subroot page which
4407 * covers all new allocated blocks
4408 * itself not new/modified.
4409 * (if modified from split of descendent,
4410 * go down path of split page)
4411
4412 if (nfound == nnew &&
4413 !(p->header.flag & (NEW | MOD)))
4414 exit scan;
4415 */
4416
4417 /* process parent page entries backward */
4418 index--;
4419 }
4420 } else {
4421 /*
4422 * first access of each internal page
4423 */
4424 }
4425
4426 /*
4427 * internal page: go down to child page of current entry
4428 */
4429
4430 /* save current parent entry for the child page */
4431 BT_PUSH(&btstack, bn, index);
4432
4433 /* get current entry for the child page */
4434 e = &p->xad[index];
4435
4436 /*
4437 * first access of each internal entry:
4438 */
4439 /*
4440 * if new entry, log btree_tnewentry.
4441 *
4442 if (e->flag & XAD_NEW)
4443 update parent page lock for the entry;
4444 */
4445
4446 /* release parent page */
4447 XT_PUTPAGE(mp);
4448
4449 /* get child page */
4450 bn = e->bn;
4451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4452 if (rc)
4453 return rc;
4454
4455 /*
4456 * first access of each non-root page:
4457 */
4458 /*
4459 * if new, log btree_newpage.
4460 *
4461 if (p->header.flag & NEW)
4462 allocate new page lock;
4463 write a NEWPAGE log (next, prev);
4464 */
4465
4466 /* process the child page */
4467 goto dopage;
4468
4469 out:
4470 return 0;
4471}
4472#endif /* _JFS_WIP */
4473
4474
4475#ifdef CONFIG_JFS_STATISTICS 4135#ifdef CONFIG_JFS_STATISTICS
4476int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4136int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4477 int *eof, void *data) 4137 int *eof, void *data)
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index a69784254fe7..af668a80b40f 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -131,10 +131,4 @@ extern int xtRelocate(tid_t tid, struct inode *ip,
131extern int xtAppend(tid_t tid, 131extern int xtAppend(tid_t tid,
132 struct inode *ip, int xflag, s64 xoff, int maxblocks, 132 struct inode *ip, int xflag, s64 xoff, int maxblocks,
133 int *xlenp, s64 * xaddrp, int flag); 133 int *xlenp, s64 * xaddrp, int flag);
134
135#ifdef _JFS_DEBUG_XTREE
136extern int xtDisplayTree(struct inode *ip);
137extern int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p);
138#endif /* _JFS_DEBUG_XTREE */
139
140#endif /* !_H_JFS_XTREE */ 134#endif /* !_H_JFS_XTREE */
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index ee438d429d45..554ec739e49b 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -781,7 +781,7 @@ static int can_set_xattr(struct inode *inode, const char *name,
781 if (IS_RDONLY(inode)) 781 if (IS_RDONLY(inode))
782 return -EROFS; 782 return -EROFS;
783 783
784 if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) 784 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
785 return -EPERM; 785 return -EPERM;
786 786
787 if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0) 787 if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
@@ -790,12 +790,12 @@ static int can_set_xattr(struct inode *inode, const char *name,
790 */ 790 */
791 return can_set_system_xattr(inode, name, value, value_len); 791 return can_set_system_xattr(inode, name, value, value_len);
792 792
793 if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 793 if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
794 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); 794 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
795 795
796#ifdef CONFIG_JFS_SECURITY 796#ifdef CONFIG_JFS_SECURITY
797 if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) 797 if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)
798 != 0) 798 == 0)
799 return 0; /* Leave it to the security module */ 799 return 0; /* Leave it to the security module */
800#endif 800#endif
801 801
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 6e242556b903..12a857c29e25 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -331,7 +331,7 @@ static ctl_table nlm_sysctls[] = {
331 .ctl_name = CTL_UNNUMBERED, 331 .ctl_name = CTL_UNNUMBERED,
332 .procname = "nlm_grace_period", 332 .procname = "nlm_grace_period",
333 .data = &nlm_grace_period, 333 .data = &nlm_grace_period,
334 .maxlen = sizeof(int), 334 .maxlen = sizeof(unsigned long),
335 .mode = 0644, 335 .mode = 0644,
336 .proc_handler = &proc_doulongvec_minmax, 336 .proc_handler = &proc_doulongvec_minmax,
337 .extra1 = (unsigned long *) &nlm_grace_period_min, 337 .extra1 = (unsigned long *) &nlm_grace_period_min,
@@ -341,7 +341,7 @@ static ctl_table nlm_sysctls[] = {
341 .ctl_name = CTL_UNNUMBERED, 341 .ctl_name = CTL_UNNUMBERED,
342 .procname = "nlm_timeout", 342 .procname = "nlm_timeout",
343 .data = &nlm_timeout, 343 .data = &nlm_timeout,
344 .maxlen = sizeof(int), 344 .maxlen = sizeof(unsigned long),
345 .mode = 0644, 345 .mode = 0644,
346 .proc_handler = &proc_doulongvec_minmax, 346 .proc_handler = &proc_doulongvec_minmax,
347 .extra1 = (unsigned long *) &nlm_timeout_min, 347 .extra1 = (unsigned long *) &nlm_timeout_min,
diff --git a/fs/locks.c b/fs/locks.c
index 29fa5da6c117..11956b6179ff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1591,7 +1591,8 @@ out:
1591/* Apply the lock described by l to an open file descriptor. 1591/* Apply the lock described by l to an open file descriptor.
1592 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1592 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1593 */ 1593 */
1594int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l) 1594int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1595 struct flock __user *l)
1595{ 1596{
1596 struct file_lock *file_lock = locks_alloc_lock(); 1597 struct file_lock *file_lock = locks_alloc_lock();
1597 struct flock flock; 1598 struct flock flock;
@@ -1620,6 +1621,7 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1620 goto out; 1621 goto out;
1621 } 1622 }
1622 1623
1624again:
1623 error = flock_to_posix_lock(filp, file_lock, &flock); 1625 error = flock_to_posix_lock(filp, file_lock, &flock);
1624 if (error) 1626 if (error)
1625 goto out; 1627 goto out;
@@ -1648,25 +1650,33 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1648 if (error) 1650 if (error)
1649 goto out; 1651 goto out;
1650 1652
1651 if (filp->f_op && filp->f_op->lock != NULL) { 1653 if (filp->f_op && filp->f_op->lock != NULL)
1652 error = filp->f_op->lock(filp, cmd, file_lock); 1654 error = filp->f_op->lock(filp, cmd, file_lock);
1653 goto out; 1655 else {
1654 } 1656 for (;;) {
1657 error = __posix_lock_file(inode, file_lock);
1658 if ((error != -EAGAIN) || (cmd == F_SETLK))
1659 break;
1660 error = wait_event_interruptible(file_lock->fl_wait,
1661 !file_lock->fl_next);
1662 if (!error)
1663 continue;
1655 1664
1656 for (;;) { 1665 locks_delete_block(file_lock);
1657 error = __posix_lock_file(inode, file_lock);
1658 if ((error != -EAGAIN) || (cmd == F_SETLK))
1659 break; 1666 break;
1660 error = wait_event_interruptible(file_lock->fl_wait, 1667 }
1661 !file_lock->fl_next); 1668 }
1662 if (!error)
1663 continue;
1664 1669
1665 locks_delete_block(file_lock); 1670 /*
1666 break; 1671 * Attempt to detect a close/fcntl race and recover by
1672 * releasing the lock that was just acquired.
1673 */
1674 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
1675 flock.l_type = F_UNLCK;
1676 goto again;
1667 } 1677 }
1668 1678
1669 out: 1679out:
1670 locks_free_lock(file_lock); 1680 locks_free_lock(file_lock);
1671 return error; 1681 return error;
1672} 1682}
@@ -1724,7 +1734,8 @@ out:
1724/* Apply the lock described by l to an open file descriptor. 1734/* Apply the lock described by l to an open file descriptor.
1725 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1735 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1726 */ 1736 */
1727int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) 1737int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1738 struct flock64 __user *l)
1728{ 1739{
1729 struct file_lock *file_lock = locks_alloc_lock(); 1740 struct file_lock *file_lock = locks_alloc_lock();
1730 struct flock64 flock; 1741 struct flock64 flock;
@@ -1753,6 +1764,7 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
1753 goto out; 1764 goto out;
1754 } 1765 }
1755 1766
1767again:
1756 error = flock64_to_posix_lock(filp, file_lock, &flock); 1768 error = flock64_to_posix_lock(filp, file_lock, &flock);
1757 if (error) 1769 if (error)
1758 goto out; 1770 goto out;
@@ -1781,22 +1793,30 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
1781 if (error) 1793 if (error)
1782 goto out; 1794 goto out;
1783 1795
1784 if (filp->f_op && filp->f_op->lock != NULL) { 1796 if (filp->f_op && filp->f_op->lock != NULL)
1785 error = filp->f_op->lock(filp, cmd, file_lock); 1797 error = filp->f_op->lock(filp, cmd, file_lock);
1786 goto out; 1798 else {
1787 } 1799 for (;;) {
1800 error = __posix_lock_file(inode, file_lock);
1801 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1802 break;
1803 error = wait_event_interruptible(file_lock->fl_wait,
1804 !file_lock->fl_next);
1805 if (!error)
1806 continue;
1788 1807
1789 for (;;) { 1808 locks_delete_block(file_lock);
1790 error = __posix_lock_file(inode, file_lock);
1791 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1792 break; 1809 break;
1793 error = wait_event_interruptible(file_lock->fl_wait, 1810 }
1794 !file_lock->fl_next); 1811 }
1795 if (!error)
1796 continue;
1797 1812
1798 locks_delete_block(file_lock); 1813 /*
1799 break; 1814 * Attempt to detect a close/fcntl race and recover by
1815 * releasing the lock that was just acquired.
1816 */
1817 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
1818 flock.l_type = F_UNLCK;
1819 goto again;
1800 } 1820 }
1801 1821
1802out: 1822out:
@@ -1888,12 +1908,7 @@ void locks_remove_flock(struct file *filp)
1888 1908
1889 while ((fl = *before) != NULL) { 1909 while ((fl = *before) != NULL) {
1890 if (fl->fl_file == filp) { 1910 if (fl->fl_file == filp) {
1891 /* 1911 if (IS_FLOCK(fl)) {
1892 * We might have a POSIX lock that was created at the same time
1893 * the filp was closed for the last time. Just remove that too,
1894 * regardless of ownership, since nobody can own it.
1895 */
1896 if (IS_FLOCK(fl) || IS_POSIX(fl)) {
1897 locks_delete_lock(before); 1912 locks_delete_lock(before);
1898 continue; 1913 continue;
1899 } 1914 }
diff --git a/fs/mbcache.c b/fs/mbcache.c
index c7170b9221a3..b002a088857d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -316,11 +316,10 @@ fail:
316 * currently in use cannot be freed, and thus remain in the cache. All others 316 * currently in use cannot be freed, and thus remain in the cache. All others
317 * are freed. 317 * are freed.
318 * 318 *
319 * @cache: which cache to shrink
320 * @bdev: which device's cache entries to shrink 319 * @bdev: which device's cache entries to shrink
321 */ 320 */
322void 321void
323mb_cache_shrink(struct mb_cache *cache, struct block_device *bdev) 322mb_cache_shrink(struct block_device *bdev)
324{ 323{
325 LIST_HEAD(free_list); 324 LIST_HEAD(free_list);
326 struct list_head *l, *ltmp; 325 struct list_head *l, *ltmp;
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 1d2ad15f1533..9709fac6531d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -1,21 +1,18 @@
1ToDo/Notes: 1ToDo/Notes:
2 - Find and fix bugs. 2 - Find and fix bugs.
3 - Checkpoint or disable the user space journal ($UsnJrnl).
4 - In between ntfs_prepare/commit_write, need exclusion between 3 - In between ntfs_prepare/commit_write, need exclusion between
5 simultaneous file extensions. Need perhaps an NInoResizeUnderway() 4 simultaneous file extensions. This is given to us by holding i_sem
6 flag which we can set in ntfs_prepare_write() and clear again in 5 on the inode. The only places in the kernel when a file is resized
7 ntfs_commit_write(). Just have to be careful in readpage/writepage, 6 are prepare/commit write and truncate for both of which i_sem is
8 as well as in truncate, that we play nice... We might need to have 7 held. Just have to be careful in readpage/writepage and all other
9 a data_size field in the ntfs_inode to store the real attribute 8 helpers not running under i_sem that we play nice...
10 length. Also need to be careful with initialized_size extention in 9 Also need to be careful with initialized_size extention in
11 ntfs_prepare_write. Basically, just be _very_ careful in this code... 10 ntfs_prepare_write. Basically, just be _very_ careful in this code...
12 OTOH, perhaps i_sem, which is held accross generic_file_write is 11 UPDATE: The only things that need to be checked are read/writepage
13 sufficient for synchronisation here. We then just need to make sure 12 which do not hold i_sem. Note writepage cannot change i_size but it
14 ntfs_readpage/writepage/truncate interoperate properly with us. 13 needs to cope with a concurrent i_size change, just like readpage.
15 UPDATE: The above is all ok as it is due to i_sem held. The only 14 Also both need to cope with concurrent changes to the other sizes,
16 thing that needs to be checked is ntfs_writepage() which does not 15 i.e. initialized/allocated/compressed size, as well.
17 hold i_sem. It cannot change i_size but it needs to cope with a
18 concurrent i_size change.
19 - Implement mft.c::sync_mft_mirror_umount(). We currently will just 16 - Implement mft.c::sync_mft_mirror_umount(). We currently will just
20 leave the volume dirty on umount if the final iput(vol->mft_ino) 17 leave the volume dirty on umount if the final iput(vol->mft_ino)
21 causes a write of any mirrored mft records due to the mft mirror 18 causes a write of any mirrored mft records due to the mft mirror
@@ -25,12 +22,158 @@ ToDo/Notes:
25 - Enable the code for setting the NT4 compatibility flag when we start 22 - Enable the code for setting the NT4 compatibility flag when we start
26 making NTFS 1.2 specific modifications. 23 making NTFS 1.2 specific modifications.
27 24
282.1.23-WIP 252.1.23 - Implement extension of resident files and make writing safe as well as
26 many bug fixes, cleanups, and enhancements...
29 27
30 - Add printk rate limiting for ntfs_warning() and ntfs_error() when 28 - Add printk rate limiting for ntfs_warning() and ntfs_error() when
31 compiled without debug. This avoids a possible denial of service 29 compiled without debug. This avoids a possible denial of service
32 attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this 30 attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
33 out. 31 out.
32 - Fix compilation warnings on ia64. (Randy Dunlap)
33 - Use i_size_{read,write}() instead of reading i_size by hand and cache
34 the value where apropriate.
35 - Add size_lock to the ntfs_inode structure. This is an rw spinlock
36 and it locks against access to the inode sizes. Note, ->size_lock
37 is also accessed from irq context so you must use the _irqsave and
38 _irqrestore lock and unlock functions, respectively. Protect all
39 accesses to allocated_size, initialized_size, and compressed_size.
40 - Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
41 - Implement extension of resident files in the regular file write code
42 paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()). At present
43 this only works until the data attribute becomes too big for the mft
44 record after which we abort the write returning -EOPNOTSUPP from
45 ntfs_prepare_write().
46 - Add disable_sparse mount option together with a per volume sparse
47 enable bit which is set appropriately and a per inode sparse disable
48 bit which is preset on some system file inodes as appropriate.
49 - Enforce that sparse support is disabled on NTFS volumes pre 3.0.
50 - Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
51 the creation of the unmapped runlist element for the base attribute
52 extent.
53 - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
54 helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
55 This allows us to map runlist fragments with the runlist lock already
56 held without having to drop and reacquire it around the call. Adapt
57 all callers.
58 - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
59 runlist. This allows us to find runlist elements with the runlist
60 lock already held without having to drop and reacquire it around the
61 call. Adapt all callers.
62 - Change time to u64 in time.h::ntfs2utc() as it otherwise generates a
63 warning in the do_div() call on sparc32. Thanks to Meelis Roos for
64 the report and analysis of the warning.
65 - Fix a nasty runlist merge bug when merging two holes.
66 - Set the ntfs_inode->allocated_size to the real allocated size in the
67 mft record for resident attributes (fs/ntfs/inode.c).
68 - Small readability cleanup to use "a" instead of "ctx->attr"
69 everywhere (fs/ntfs/inode.c).
70 - Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the
71 definition of ntfs_export_ops from fs/ntfs/super.c to namei.c. Also,
72 declare ntfs_export_ops in fs/ntfs/ntfs.h.
73 - Correct sparse file handling. The compressed values need to be
74 checked and set in the ntfs inode as done for compressed files and
75 the compressed size needs to be used for vfs inode->i_blocks instead
76 of the allocated size, again, as done for compressed files.
77 - Add AT_EA in addition to AT_DATA to whitelist for being allowed to be
78 non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
79 - Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new
80 write code.
81 - Fix bug in fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after
82 dropping the read lock and taking the write lock we were not checking
83 whether someone else did not already do the work we wanted to do.
84 - Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to
85 ntfs_attr_find_vcn_nolock() and update all callers.
86 - Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().
87 - Fix sign of various error return values to be negative in
88 fs/ntfs/lcnalloc.c.
89 - Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and
90 handle the case where an attribute is converted from resident to
91 non-resident by a concurrent file write.
92 - Remove checks for NULL before calling kfree() since kfree() does the
93 checking itself. (Jesper Juhl)
94 - Some utilities modify the boot sector but do not update the checksum.
95 Thus, relax the checking in fs/ntfs/super.c::is_boot_sector_ntfs() to
96 only emit a warning when the checksum is incorrect rather than
97 refusing the mount. Thanks to Bernd Casimir for pointing this
98 problem out.
99 - Update attribute definition handling.
100 - Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants.
101 - Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000.
102 - Use MAX_BUF_PER_PAGE instead of variable sized array allocation for
103 better code generation and one less sparse warning in fs/ntfs/aops.c.
104 - Remove spurious void pointer casts from fs/ntfs/. (Pekka Enberg)
105 - Use C99 style structure initialization after memory allocation where
106 possible (fs/ntfs/{attrib.c,index.c,super.c}). Thanks to Al Viro and
107 Pekka Enberg.
108 - Stamp the transaction log ($UsnJrnl), aka user space journal, if it
109 is active on the volume and we are mounting read-write or remounting
110 from read-only to read-write.
111 - Fix a bug in address space operations error recovery code paths where
112 if the runlist was not mapped at all and a mapping error occured we
113 would leave the runlist locked on exit to the function so that the
114 next access to the same file would try to take the lock and deadlock.
115 - Detect the case when Windows has been suspended to disk on the volume
116 to be mounted and if this is the case do not allow (re)mounting
117 read-write. This is done by parsing hiberfil.sys if present.
118 - Fix several occurences of a bug where we would perform 'var & ~const'
119 with a 64-bit variable and a int, i.e. 32-bit, constant. This causes
120 the higher order 32-bits of the 64-bit variable to be zeroed. To fix
121 this cast the 'const' to the same 64-bit type as 'var'.
122 - Change the runlist terminator of the newly allocated cluster(s) to
123 LCN_ENOENT in ntfs_attr_make_non_resident(). Otherwise the runlist
124 code gets confused.
125 - Add an extra parameter @last_vcn to ntfs_get_size_for_mapping_pairs()
126 and ntfs_mapping_pairs_build() to allow the runlist encoding to be
127 partial which is desirable when filling holes in sparse attributes.
128 Update all callers.
129 - Change ntfs_map_runlist_nolock() to only decompress the mapping pairs
130 if the requested vcn is inside it. Otherwise we get into problems
131 when we try to map an out of bounds vcn because we then try to map
132 the already mapped runlist fragment which causes
133 ntfs_mapping_pairs_decompress() to fail and return error. Update
134 ntfs_attr_find_vcn_nolock() accordingly.
135 - Fix a nasty deadlock that appeared in recent kernels.
136 The situation: VFS inode X on a mounted ntfs volume is dirty. For
137 same inode X, the ntfs_inode is dirty and thus corresponding on-disk
138 inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging
139 to the table of inodes, i.e. $MFT, inode 0.
140 What happens:
141 Process 1: sys_sync()/umount()/whatever... calls
142 __sync_single_inode() for $MFT -> do_writepages() -> write_page for
143 the dirty page containing the on-disk inode X, the page is now locked
144 -> ntfs_write_mst_block() which clears PageUptodate() on the page to
145 prevent anyone else getting hold of it whilst it does the write out.
146 This is necessary as the on-disk inode needs "fixups" applied before
147 the write to disk which are removed again after the write and
148 PageUptodate is then set again. It then analyses the page looking
149 for dirty on-disk inodes and when it finds one it calls
150 ntfs_may_write_mft_record() to see if it is safe to write this
151 on-disk inode. This then calls ilookup5() to check if the
152 corresponding VFS inode is in icache(). This in turn calls ifind()
153 which waits on the inode lock via wait_on_inode whilst holding the
154 global inode_lock.
155 Process 2: pdflush results in a call to __sync_single_inode for the
156 same VFS inode X on the ntfs volume. This locks the inode (I_LOCK)
157 then calls write-inode -> ntfs_write_inode -> map_mft_record() ->
158 read_cache_page() for the page (in page cache of table of inodes
159 $MFT, inode 0) containing the on-disk inode. This page has
160 PageUptodate() clear because of Process 1 (see above) so
161 read_cache_page() blocks when it tries to take the page lock for the
162 page so it can call ntfs_read_page().
163 Thus Process 1 is holding the page lock on the page containing the
164 on-disk inode X and it is waiting on the inode X to be unlocked in
165 ifind() so it can write the page out and then unlock the page.
166 And Process 2 is holding the inode lock on inode X and is waiting for
167 the page to be unlocked so it can call ntfs_readpage() or discover
168 that Process 1 set PageUptodate() again and use the page.
169 Thus we have a deadlock due to ifind() waiting on the inode lock.
170 The solution: The fix is to use the newly introduced
171 ilookup5_nowait() which does not wait on the inode's lock and hence
172 avoids the deadlock. This is safe as we do not care about the VFS
173 inode and only use the fact that it is in the VFS inode cache and the
174 fact that the vfs and ntfs inodes are one struct in memory to find
175 the ntfs inode in memory if present. Also, the ntfs inode has its
176 own locking so it does not matter if the vfs inode is locked.
34 177
352.1.22 - Many bug and race fixes and error handling improvements. 1782.1.22 - Many bug and race fixes and error handling improvements.
36 179
@@ -1037,7 +1180,7 @@ tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/
1037 - Further runlist merging work. (Richard Russon) 1180 - Further runlist merging work. (Richard Russon)
1038 - Backwards compatibility for gcc-2.95. (Richard Russon) 1181 - Backwards compatibility for gcc-2.95. (Richard Russon)
1039 - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. 1182 - Update to kernel 2.5.5-pre1 and rediff the now tiny patch.
1040 - Convert to new file system declaration using ->ntfs_get_sb() and 1183 - Convert to new filesystem declaration using ->ntfs_get_sb() and
1041 replacing ntfs_read_super() with ntfs_fill_super(). 1184 replacing ntfs_read_super() with ntfs_fill_super().
1042 - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index 1185 - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index
1043 overflow on 32-bit architectures. 1186 overflow on 32-bit architectures.
@@ -1333,7 +1476,7 @@ tng-0.0.1 - The first useful version.
1333 The driver is now actually useful! Yey. (-: It undoubtedly has got bugs 1476 The driver is now actually useful! Yey. (-: It undoubtedly has got bugs
1334 though and it doesn't implement accesssing compressed files yet. Also, 1477 though and it doesn't implement accesssing compressed files yet. Also,
1335 accessing files with attribute list attributes is not implemented yet 1478 accessing files with attribute list attributes is not implemented yet
1336 either. But for small or simple file systems it should work and allow 1479 either. But for small or simple filesystems it should work and allow
1337 you to list directories, use stat on directory entries and the file 1480 you to list directories, use stat on directory entries and the file
1338 system, open, read, mmap and llseek around in files. A big mile stone 1481 system, open, read, mmap and llseek around in files. A big mile stone
1339 has been reached! 1482 has been reached!
@@ -1341,7 +1484,7 @@ tng-0.0.1 - The first useful version.
1341tng-0.0.0 - Initial version tag. 1484tng-0.0.0 - Initial version tag.
1342 1485
1343 Initial driver implementation. The driver can mount and umount simple 1486 Initial driver implementation. The driver can mount and umount simple
1344 NTFS file systems (i.e. ones without attribute lists in the system 1487 NTFS filesystems (i.e. ones without attribute lists in the system
1345 files). If the mount fails there might be problems in the error handling 1488 files). If the mount fails there might be problems in the error handling
1346 code paths, so be warned. Otherwise it seems to be loading the system 1489 code paths, so be warned. Otherwise it seems to be loading the system
1347 files nicely and the mft record read mapping/unmapping seems to be 1490 files nicely and the mft record read mapping/unmapping seems to be
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 7b66381a0b0f..f083f27d8b69 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.22\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
@@ -15,5 +15,5 @@ endif
15ifeq ($(CONFIG_NTFS_RW),y) 15ifeq ($(CONFIG_NTFS_RW),y)
16EXTRA_CFLAGS += -DNTFS_RW 16EXTRA_CFLAGS += -DNTFS_RW
17 17
18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o 18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
19endif 19endif
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 45d56e41ed98..3f43bfe6184e 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
2 * aops.c - NTFS kernel address space operations and page cache handling. 2 * aops.c - NTFS kernel address space operations and page cache handling.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -66,19 +66,22 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
66 ni = NTFS_I(page->mapping->host); 66 ni = NTFS_I(page->mapping->host);
67 67
68 if (likely(uptodate)) { 68 if (likely(uptodate)) {
69 s64 file_ofs; 69 s64 file_ofs, initialized_size;
70 70
71 set_buffer_uptodate(bh); 71 set_buffer_uptodate(bh);
72 72
73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + 73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
74 bh_offset(bh); 74 bh_offset(bh);
75 read_lock_irqsave(&ni->size_lock, flags);
76 initialized_size = ni->initialized_size;
77 read_unlock_irqrestore(&ni->size_lock, flags);
75 /* Check for the current buffer head overflowing. */ 78 /* Check for the current buffer head overflowing. */
76 if (file_ofs + bh->b_size > ni->initialized_size) { 79 if (file_ofs + bh->b_size > initialized_size) {
77 char *addr; 80 char *addr;
78 int ofs = 0; 81 int ofs = 0;
79 82
80 if (file_ofs < ni->initialized_size) 83 if (file_ofs < initialized_size)
81 ofs = ni->initialized_size - file_ofs; 84 ofs = initialized_size - file_ofs;
82 addr = kmap_atomic(page, KM_BIO_SRC_IRQ); 85 addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
83 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); 86 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
84 flush_dcache_page(page); 87 flush_dcache_page(page);
@@ -132,7 +135,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
132 i * rec_size), rec_size); 135 i * rec_size), rec_size);
133 flush_dcache_page(page); 136 flush_dcache_page(page);
134 kunmap_atomic(addr, KM_BIO_SRC_IRQ); 137 kunmap_atomic(addr, KM_BIO_SRC_IRQ);
135 if (likely(!PageError(page) && page_uptodate)) 138 if (likely(page_uptodate && !PageError(page)))
136 SetPageUptodate(page); 139 SetPageUptodate(page);
137 } 140 }
138 unlock_page(page); 141 unlock_page(page);
@@ -168,6 +171,7 @@ static int ntfs_read_block(struct page *page)
168 runlist_element *rl; 171 runlist_element *rl;
169 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 172 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
170 sector_t iblock, lblock, zblock; 173 sector_t iblock, lblock, zblock;
174 unsigned long flags;
171 unsigned int blocksize, vcn_ofs; 175 unsigned int blocksize, vcn_ofs;
172 int i, nr; 176 int i, nr;
173 unsigned char blocksize_bits; 177 unsigned char blocksize_bits;
@@ -190,8 +194,10 @@ static int ntfs_read_block(struct page *page)
190 } 194 }
191 195
192 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 196 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
197 read_lock_irqsave(&ni->size_lock, flags);
193 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; 198 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
194 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; 199 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
200 read_unlock_irqrestore(&ni->size_lock, flags);
195 201
196 /* Loop through all the buffers in the page. */ 202 /* Loop through all the buffers in the page. */
197 rl = NULL; 203 rl = NULL;
@@ -258,7 +264,8 @@ lock_retry_remap:
258 goto lock_retry_remap; 264 goto lock_retry_remap;
259 rl = NULL; 265 rl = NULL;
260 lcn = err; 266 lcn = err;
261 } 267 } else if (!rl)
268 up_read(&ni->runlist.lock);
262 /* Hard error, zero out region. */ 269 /* Hard error, zero out region. */
263 bh->b_blocknr = -1; 270 bh->b_blocknr = -1;
264 SetPageError(page); 271 SetPageError(page);
@@ -341,14 +348,15 @@ handle_zblock:
341 */ 348 */
342static int ntfs_readpage(struct file *file, struct page *page) 349static int ntfs_readpage(struct file *file, struct page *page)
343{ 350{
344 loff_t i_size;
345 ntfs_inode *ni, *base_ni; 351 ntfs_inode *ni, *base_ni;
346 u8 *kaddr; 352 u8 *kaddr;
347 ntfs_attr_search_ctx *ctx; 353 ntfs_attr_search_ctx *ctx;
348 MFT_RECORD *mrec; 354 MFT_RECORD *mrec;
355 unsigned long flags;
349 u32 attr_len; 356 u32 attr_len;
350 int err = 0; 357 int err = 0;
351 358
359retry_readpage:
352 BUG_ON(!PageLocked(page)); 360 BUG_ON(!PageLocked(page));
353 /* 361 /*
354 * This can potentially happen because we clear PageUptodate() during 362 * This can potentially happen because we clear PageUptodate() during
@@ -383,9 +391,9 @@ static int ntfs_readpage(struct file *file, struct page *page)
383 * Attribute is resident, implying it is not compressed or encrypted. 391 * Attribute is resident, implying it is not compressed or encrypted.
384 * This also means the attribute is smaller than an mft record and 392 * This also means the attribute is smaller than an mft record and
385 * hence smaller than a page, so can simply zero out any pages with 393 * hence smaller than a page, so can simply zero out any pages with
386 * index above 0. We can also do this if the file size is 0. 394 * index above 0.
387 */ 395 */
388 if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { 396 if (unlikely(page->index > 0)) {
389 kaddr = kmap_atomic(page, KM_USER0); 397 kaddr = kmap_atomic(page, KM_USER0);
390 memset(kaddr, 0, PAGE_CACHE_SIZE); 398 memset(kaddr, 0, PAGE_CACHE_SIZE);
391 flush_dcache_page(page); 399 flush_dcache_page(page);
@@ -402,6 +410,14 @@ static int ntfs_readpage(struct file *file, struct page *page)
402 err = PTR_ERR(mrec); 410 err = PTR_ERR(mrec);
403 goto err_out; 411 goto err_out;
404 } 412 }
413 /*
414 * If a parallel write made the attribute non-resident, drop the mft
415 * record and retry the readpage.
416 */
417 if (unlikely(NInoNonResident(ni))) {
418 unmap_mft_record(base_ni);
419 goto retry_readpage;
420 }
405 ctx = ntfs_attr_get_search_ctx(base_ni, mrec); 421 ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
406 if (unlikely(!ctx)) { 422 if (unlikely(!ctx)) {
407 err = -ENOMEM; 423 err = -ENOMEM;
@@ -412,9 +428,10 @@ static int ntfs_readpage(struct file *file, struct page *page)
412 if (unlikely(err)) 428 if (unlikely(err))
413 goto put_unm_err_out; 429 goto put_unm_err_out;
414 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 430 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
415 i_size = i_size_read(VFS_I(ni)); 431 read_lock_irqsave(&ni->size_lock, flags);
416 if (unlikely(attr_len > i_size)) 432 if (unlikely(attr_len > ni->initialized_size))
417 attr_len = i_size; 433 attr_len = ni->initialized_size;
434 read_unlock_irqrestore(&ni->size_lock, flags);
418 kaddr = kmap_atomic(page, KM_USER0); 435 kaddr = kmap_atomic(page, KM_USER0);
419 /* Copy the data to the page. */ 436 /* Copy the data to the page. */
420 memcpy(kaddr, (u8*)ctx->attr + 437 memcpy(kaddr, (u8*)ctx->attr +
@@ -463,12 +480,15 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
463{ 480{
464 VCN vcn; 481 VCN vcn;
465 LCN lcn; 482 LCN lcn;
483 s64 initialized_size;
484 loff_t i_size;
466 sector_t block, dblock, iblock; 485 sector_t block, dblock, iblock;
467 struct inode *vi; 486 struct inode *vi;
468 ntfs_inode *ni; 487 ntfs_inode *ni;
469 ntfs_volume *vol; 488 ntfs_volume *vol;
470 runlist_element *rl; 489 runlist_element *rl;
471 struct buffer_head *bh, *head; 490 struct buffer_head *bh, *head;
491 unsigned long flags;
472 unsigned int blocksize, vcn_ofs; 492 unsigned int blocksize, vcn_ofs;
473 int err; 493 int err;
474 BOOL need_end_writeback; 494 BOOL need_end_writeback;
@@ -510,11 +530,16 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
510 /* The first block in the page. */ 530 /* The first block in the page. */
511 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 531 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
512 532
533 read_lock_irqsave(&ni->size_lock, flags);
534 i_size = i_size_read(vi);
535 initialized_size = ni->initialized_size;
536 read_unlock_irqrestore(&ni->size_lock, flags);
537
513 /* The first out of bounds block for the data size. */ 538 /* The first out of bounds block for the data size. */
514 dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; 539 dblock = (i_size + blocksize - 1) >> blocksize_bits;
515 540
516 /* The last (fully or partially) initialized block. */ 541 /* The last (fully or partially) initialized block. */
517 iblock = ni->initialized_size >> blocksize_bits; 542 iblock = initialized_size >> blocksize_bits;
518 543
519 /* 544 /*
520 * Be very careful. We have no exclusion from __set_page_dirty_buffers 545 * Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -559,7 +584,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
559 584
560 /* Make sure we have enough initialized size. */ 585 /* Make sure we have enough initialized size. */
561 if (unlikely((block >= iblock) && 586 if (unlikely((block >= iblock) &&
562 (ni->initialized_size < vi->i_size))) { 587 (initialized_size < i_size))) {
563 /* 588 /*
564 * If this page is fully outside initialized size, zero 589 * If this page is fully outside initialized size, zero
565 * out all pages between the current initialized size 590 * out all pages between the current initialized size
@@ -666,7 +691,8 @@ lock_retry_remap:
666 goto lock_retry_remap; 691 goto lock_retry_remap;
667 rl = NULL; 692 rl = NULL;
668 lcn = err; 693 lcn = err;
669 } 694 } else if (!rl)
695 up_read(&ni->runlist.lock);
670 /* Failed to map the buffer, even after retrying. */ 696 /* Failed to map the buffer, even after retrying. */
671 bh->b_blocknr = -1; 697 bh->b_blocknr = -1;
672 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 698 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
@@ -801,17 +827,15 @@ static int ntfs_write_mst_block(struct page *page,
801 ntfs_inode *ni = NTFS_I(vi); 827 ntfs_inode *ni = NTFS_I(vi);
802 ntfs_volume *vol = ni->vol; 828 ntfs_volume *vol = ni->vol;
803 u8 *kaddr; 829 u8 *kaddr;
804 unsigned char bh_size_bits = vi->i_blkbits;
805 unsigned int bh_size = 1 << bh_size_bits;
806 unsigned int rec_size = ni->itype.index.block_size; 830 unsigned int rec_size = ni->itype.index.block_size;
807 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size]; 831 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
808 struct buffer_head *bh, *head, *tbh, *rec_start_bh; 832 struct buffer_head *bh, *head, *tbh, *rec_start_bh;
809 int max_bhs = PAGE_CACHE_SIZE / bh_size; 833 struct buffer_head *bhs[MAX_BUF_PER_PAGE];
810 struct buffer_head *bhs[max_bhs];
811 runlist_element *rl; 834 runlist_element *rl;
812 int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2; 835 int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
813 unsigned rec_size_bits; 836 unsigned bh_size, rec_size_bits;
814 BOOL sync, is_mft, page_is_dirty, rec_is_dirty; 837 BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
838 unsigned char bh_size_bits;
815 839
816 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 840 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
817 "0x%lx.", vi->i_ino, ni->type, page->index); 841 "0x%lx.", vi->i_ino, ni->type, page->index);
@@ -826,7 +850,11 @@ static int ntfs_write_mst_block(struct page *page,
826 */ 850 */
827 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || 851 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
828 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); 852 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
853 bh_size_bits = vi->i_blkbits;
854 bh_size = 1 << bh_size_bits;
855 max_bhs = PAGE_CACHE_SIZE / bh_size;
829 BUG_ON(!max_bhs); 856 BUG_ON(!max_bhs);
857 BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
830 858
831 /* Were we called for sync purposes? */ 859 /* Were we called for sync purposes? */
832 sync = (wbc->sync_mode == WB_SYNC_ALL); 860 sync = (wbc->sync_mode == WB_SYNC_ALL);
@@ -846,7 +874,7 @@ static int ntfs_write_mst_block(struct page *page,
846 (PAGE_CACHE_SHIFT - bh_size_bits); 874 (PAGE_CACHE_SHIFT - bh_size_bits);
847 875
848 /* The first out of bounds block for the data size. */ 876 /* The first out of bounds block for the data size. */
849 dblock = (vi->i_size + bh_size - 1) >> bh_size_bits; 877 dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
850 878
851 rl = NULL; 879 rl = NULL;
852 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; 880 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
@@ -858,6 +886,7 @@ static int ntfs_write_mst_block(struct page *page,
858 if (likely(block < rec_block)) { 886 if (likely(block < rec_block)) {
859 if (unlikely(block >= dblock)) { 887 if (unlikely(block >= dblock)) {
860 clear_buffer_dirty(bh); 888 clear_buffer_dirty(bh);
889 set_buffer_uptodate(bh);
861 continue; 890 continue;
862 } 891 }
863 /* 892 /*
@@ -938,8 +967,11 @@ lock_retry_remap:
938 if (err2 == -ENOMEM) 967 if (err2 == -ENOMEM)
939 page_is_dirty = TRUE; 968 page_is_dirty = TRUE;
940 lcn = err2; 969 lcn = err2;
941 } else 970 } else {
942 err2 = -EIO; 971 err2 = -EIO;
972 if (!rl)
973 up_read(&ni->runlist.lock);
974 }
943 /* Hard error. Abort writing this record. */ 975 /* Hard error. Abort writing this record. */
944 if (!err || err == -ENOMEM) 976 if (!err || err == -ENOMEM)
945 err = err2; 977 err = err2;
@@ -949,7 +981,8 @@ lock_retry_remap:
949 "attribute type 0x%x) because " 981 "attribute type 0x%x) because "
950 "its location on disk could " 982 "its location on disk could "
951 "not be determined (error " 983 "not be determined (error "
952 "code %lli).", (s64)block << 984 "code %lli).",
985 (long long)block <<
953 bh_size_bits >> 986 bh_size_bits >>
954 vol->mft_record_size_bits, 987 vol->mft_record_size_bits,
955 ni->mft_no, ni->type, 988 ni->mft_no, ni->type,
@@ -1223,19 +1256,17 @@ done:
1223static int ntfs_writepage(struct page *page, struct writeback_control *wbc) 1256static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1224{ 1257{
1225 loff_t i_size; 1258 loff_t i_size;
1226 struct inode *vi; 1259 struct inode *vi = page->mapping->host;
1227 ntfs_inode *ni, *base_ni; 1260 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1228 char *kaddr; 1261 char *kaddr;
1229 ntfs_attr_search_ctx *ctx; 1262 ntfs_attr_search_ctx *ctx = NULL;
1230 MFT_RECORD *m; 1263 MFT_RECORD *m = NULL;
1231 u32 attr_len; 1264 u32 attr_len;
1232 int err; 1265 int err;
1233 1266
1267retry_writepage:
1234 BUG_ON(!PageLocked(page)); 1268 BUG_ON(!PageLocked(page));
1235
1236 vi = page->mapping->host;
1237 i_size = i_size_read(vi); 1269 i_size = i_size_read(vi);
1238
1239 /* Is the page fully outside i_size? (truncate in progress) */ 1270 /* Is the page fully outside i_size? (truncate in progress) */
1240 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> 1271 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
1241 PAGE_CACHE_SHIFT)) { 1272 PAGE_CACHE_SHIFT)) {
@@ -1248,8 +1279,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1248 ntfs_debug("Write outside i_size - truncated?"); 1279 ntfs_debug("Write outside i_size - truncated?");
1249 return 0; 1280 return 0;
1250 } 1281 }
1251 ni = NTFS_I(vi);
1252
1253 /* NInoNonResident() == NInoIndexAllocPresent() */ 1282 /* NInoNonResident() == NInoIndexAllocPresent() */
1254 if (NInoNonResident(ni)) { 1283 if (NInoNonResident(ni)) {
1255 /* 1284 /*
@@ -1326,6 +1355,14 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1326 ctx = NULL; 1355 ctx = NULL;
1327 goto err_out; 1356 goto err_out;
1328 } 1357 }
1358 /*
1359 * If a parallel write made the attribute non-resident, drop the mft
1360 * record and retry the writepage.
1361 */
1362 if (unlikely(NInoNonResident(ni))) {
1363 unmap_mft_record(base_ni);
1364 goto retry_writepage;
1365 }
1329 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1366 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1330 if (unlikely(!ctx)) { 1367 if (unlikely(!ctx)) {
1331 err = -ENOMEM; 1368 err = -ENOMEM;
@@ -1367,15 +1404,12 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1367 */ 1404 */
1368 1405
1369 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1406 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
1370 i_size = i_size_read(VFS_I(ni)); 1407 i_size = i_size_read(vi);
1371 kaddr = kmap_atomic(page, KM_USER0);
1372 if (unlikely(attr_len > i_size)) { 1408 if (unlikely(attr_len > i_size)) {
1373 /* Zero out of bounds area in the mft record. */
1374 memset((u8*)ctx->attr + le16_to_cpu(
1375 ctx->attr->data.resident.value_offset) +
1376 i_size, 0, attr_len - i_size);
1377 attr_len = i_size; 1409 attr_len = i_size;
1410 ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
1378 } 1411 }
1412 kaddr = kmap_atomic(page, KM_USER0);
1379 /* Copy the data from the page to the mft record. */ 1413 /* Copy the data from the page to the mft record. */
1380 memcpy((u8*)ctx->attr + 1414 memcpy((u8*)ctx->attr +
1381 le16_to_cpu(ctx->attr->data.resident.value_offset), 1415 le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -1405,8 +1439,10 @@ err_out:
1405 err = 0; 1439 err = 0;
1406 } else { 1440 } else {
1407 ntfs_error(vi->i_sb, "Resident attribute write failed with " 1441 ntfs_error(vi->i_sb, "Resident attribute write failed with "
1408 "error %i. Setting page error flag.", err); 1442 "error %i.", err);
1409 SetPageError(page); 1443 SetPageError(page);
1444 NVolSetErrors(ni->vol);
1445 make_bad_inode(vi);
1410 } 1446 }
1411 unlock_page(page); 1447 unlock_page(page);
1412 if (ctx) 1448 if (ctx)
@@ -1425,12 +1461,15 @@ static int ntfs_prepare_nonresident_write(struct page *page,
1425{ 1461{
1426 VCN vcn; 1462 VCN vcn;
1427 LCN lcn; 1463 LCN lcn;
1464 s64 initialized_size;
1465 loff_t i_size;
1428 sector_t block, ablock, iblock; 1466 sector_t block, ablock, iblock;
1429 struct inode *vi; 1467 struct inode *vi;
1430 ntfs_inode *ni; 1468 ntfs_inode *ni;
1431 ntfs_volume *vol; 1469 ntfs_volume *vol;
1432 runlist_element *rl; 1470 runlist_element *rl;
1433 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; 1471 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
1472 unsigned long flags;
1434 unsigned int vcn_ofs, block_start, block_end, blocksize; 1473 unsigned int vcn_ofs, block_start, block_end, blocksize;
1435 int err; 1474 int err;
1436 BOOL is_retry; 1475 BOOL is_retry;
@@ -1462,16 +1501,20 @@ static int ntfs_prepare_nonresident_write(struct page *page,
1462 /* The first block in the page. */ 1501 /* The first block in the page. */
1463 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 1502 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
1464 1503
1504 read_lock_irqsave(&ni->size_lock, flags);
1465 /* 1505 /*
1466 * The first out of bounds block for the allocated size. No need to 1506 * The first out of bounds block for the allocated size. No need to
1467 * round up as allocated_size is in multiples of cluster size and the 1507 * round up as allocated_size is in multiples of cluster size and the
1468 * minimum cluster size is 512 bytes, which is equal to the smallest 1508 * minimum cluster size is 512 bytes, which is equal to the smallest
1469 * blocksize. 1509 * blocksize.
1470 */ 1510 */
1471 ablock = ni->allocated_size >> blocksize_bits; 1511 ablock = ni->allocated_size >> blocksize_bits;
1512 i_size = i_size_read(vi);
1513 initialized_size = ni->initialized_size;
1514 read_unlock_irqrestore(&ni->size_lock, flags);
1472 1515
1473 /* The last (fully or partially) initialized block. */ 1516 /* The last (fully or partially) initialized block. */
1474 iblock = ni->initialized_size >> blocksize_bits; 1517 iblock = initialized_size >> blocksize_bits;
1475 1518
1476 /* Loop through all the buffers in the page. */ 1519 /* Loop through all the buffers in the page. */
1477 block_start = 0; 1520 block_start = 0;
@@ -1518,7 +1561,7 @@ static int ntfs_prepare_nonresident_write(struct page *page,
1518 * request, i.e. block < ablock is true. 1561 * request, i.e. block < ablock is true.
1519 */ 1562 */
1520 if (unlikely((block >= iblock) && 1563 if (unlikely((block >= iblock) &&
1521 (ni->initialized_size < vi->i_size))) { 1564 (initialized_size < i_size))) {
1522 /* 1565 /*
1523 * If this page is fully outside initialized size, zero 1566 * If this page is fully outside initialized size, zero
1524 * out all pages between the current initialized size 1567 * out all pages between the current initialized size
@@ -1622,6 +1665,8 @@ lock_retry_remap:
1622 "not supported yet. " 1665 "not supported yet. "
1623 "Sorry."); 1666 "Sorry.");
1624 err = -EOPNOTSUPP; 1667 err = -EOPNOTSUPP;
1668 if (!rl)
1669 up_read(&ni->runlist.lock);
1625 goto err_out; 1670 goto err_out;
1626 } else if (!is_retry && 1671 } else if (!is_retry &&
1627 lcn == LCN_RL_NOT_MAPPED) { 1672 lcn == LCN_RL_NOT_MAPPED) {
@@ -1636,7 +1681,8 @@ lock_retry_remap:
1636 goto lock_retry_remap; 1681 goto lock_retry_remap;
1637 rl = NULL; 1682 rl = NULL;
1638 lcn = err; 1683 lcn = err;
1639 } 1684 } else if (!rl)
1685 up_read(&ni->runlist.lock);
1640 /* 1686 /*
1641 * Failed to map the buffer, even after 1687 * Failed to map the buffer, even after
1642 * retrying. 1688 * retrying.
@@ -1797,6 +1843,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1797 unsigned from, unsigned to) 1843 unsigned from, unsigned to)
1798{ 1844{
1799 s64 new_size; 1845 s64 new_size;
1846 loff_t i_size;
1800 struct inode *vi = page->mapping->host; 1847 struct inode *vi = page->mapping->host;
1801 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); 1848 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1802 ntfs_volume *vol = ni->vol; 1849 ntfs_volume *vol = ni->vol;
@@ -1868,14 +1915,8 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1868 BUG_ON(page_has_buffers(page)); 1915 BUG_ON(page_has_buffers(page));
1869 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; 1916 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
1870 /* If we do not need to resize the attribute allocation we are done. */ 1917 /* If we do not need to resize the attribute allocation we are done. */
1871 if (new_size <= vi->i_size) 1918 if (new_size <= i_size_read(vi))
1872 goto done; 1919 goto done;
1873
1874 // FIXME: We abort for now as this code is not safe.
1875 ntfs_error(vi->i_sb, "Changing the file size is not supported yet. "
1876 "Sorry.");
1877 return -EOPNOTSUPP;
1878
1879 /* Map, pin, and lock the (base) mft record. */ 1920 /* Map, pin, and lock the (base) mft record. */
1880 if (!NInoAttr(ni)) 1921 if (!NInoAttr(ni))
1881 base_ni = ni; 1922 base_ni = ni;
@@ -1904,7 +1945,15 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1904 a = ctx->attr; 1945 a = ctx->attr;
1905 /* The total length of the attribute value. */ 1946 /* The total length of the attribute value. */
1906 attr_len = le32_to_cpu(a->data.resident.value_length); 1947 attr_len = le32_to_cpu(a->data.resident.value_length);
1907 BUG_ON(vi->i_size != attr_len); 1948 /* Fix an eventual previous failure of ntfs_commit_write(). */
1949 i_size = i_size_read(vi);
1950 if (unlikely(attr_len > i_size)) {
1951 attr_len = i_size;
1952 a->data.resident.value_length = cpu_to_le32(attr_len);
1953 }
1954 /* If we do not need to resize the attribute allocation we are done. */
1955 if (new_size <= attr_len)
1956 goto done_unm;
1908 /* Check if new size is allowed in $AttrDef. */ 1957 /* Check if new size is allowed in $AttrDef. */
1909 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); 1958 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
1910 if (unlikely(err)) { 1959 if (unlikely(err)) {
@@ -1962,6 +2011,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1962 } 2011 }
1963 flush_dcache_mft_record_page(ctx->ntfs_ino); 2012 flush_dcache_mft_record_page(ctx->ntfs_ino);
1964 mark_mft_record_dirty(ctx->ntfs_ino); 2013 mark_mft_record_dirty(ctx->ntfs_ino);
2014done_unm:
1965 ntfs_attr_put_search_ctx(ctx); 2015 ntfs_attr_put_search_ctx(ctx);
1966 unmap_mft_record(base_ni); 2016 unmap_mft_record(base_ni);
1967 /* 2017 /*
@@ -2047,7 +2097,7 @@ static int ntfs_commit_nonresident_write(struct page *page,
2047 * now we know ntfs_prepare_write() would have failed in the write 2097 * now we know ntfs_prepare_write() would have failed in the write
2048 * exceeds i_size case, so this will never trigger which is fine. 2098 * exceeds i_size case, so this will never trigger which is fine.
2049 */ 2099 */
2050 if (pos > vi->i_size) { 2100 if (pos > i_size_read(vi)) {
2051 ntfs_error(vi->i_sb, "Writing beyond the existing file size is " 2101 ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
2052 "not supported yet. Sorry."); 2102 "not supported yet. Sorry.");
2053 return -EOPNOTSUPP; 2103 return -EOPNOTSUPP;
@@ -2183,9 +2233,13 @@ static int ntfs_commit_write(struct file *file, struct page *page,
2183 } 2233 }
2184 kunmap_atomic(kaddr, KM_USER0); 2234 kunmap_atomic(kaddr, KM_USER0);
2185 /* Update i_size if necessary. */ 2235 /* Update i_size if necessary. */
2186 if (vi->i_size < attr_len) { 2236 if (i_size_read(vi) < attr_len) {
2237 unsigned long flags;
2238
2239 write_lock_irqsave(&ni->size_lock, flags);
2187 ni->allocated_size = ni->initialized_size = attr_len; 2240 ni->allocated_size = ni->initialized_size = attr_len;
2188 i_size_write(vi, attr_len); 2241 i_size_write(vi, attr_len);
2242 write_unlock_irqrestore(&ni->size_lock, flags);
2189 } 2243 }
2190 /* Mark the mft record dirty, so it gets written back. */ 2244 /* Mark the mft record dirty, so it gets written back. */
2191 flush_dcache_mft_record_page(ctx->ntfs_ino); 2245 flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1ff7f90a18b0..cd0f9e740b14 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. 2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -21,88 +21,217 @@
21 */ 21 */
22 22
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/swap.h>
24 25
25#include "attrib.h" 26#include "attrib.h"
26#include "debug.h" 27#include "debug.h"
27#include "layout.h" 28#include "layout.h"
29#include "lcnalloc.h"
30#include "malloc.h"
28#include "mft.h" 31#include "mft.h"
29#include "ntfs.h" 32#include "ntfs.h"
30#include "types.h" 33#include "types.h"
31 34
32/** 35/**
33 * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode 36 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
34 * @ni: ntfs inode for which to map (part of) a runlist 37 * @ni: ntfs inode for which to map (part of) a runlist
35 * @vcn: map runlist part containing this vcn 38 * @vcn: map runlist part containing this vcn
36 * 39 *
37 * Map the part of a runlist containing the @vcn of the ntfs inode @ni. 40 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
38 * 41 *
39 * Return 0 on success and -errno on error. 42 * Return 0 on success and -errno on error. There is one special error code
43 * which is not an error as such. This is -ENOENT. It means that @vcn is out
44 * of bounds of the runlist.
40 * 45 *
41 * Locking: - The runlist must be unlocked on entry and is unlocked on return. 46 * Locking: - The runlist must be locked for writing.
42 * - This function takes the lock for writing and modifies the runlist. 47 * - This function modifies the runlist.
43 */ 48 */
44int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) 49int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
45{ 50{
51 VCN end_vcn;
46 ntfs_inode *base_ni; 52 ntfs_inode *base_ni;
53 MFT_RECORD *m;
54 ATTR_RECORD *a;
47 ntfs_attr_search_ctx *ctx; 55 ntfs_attr_search_ctx *ctx;
48 MFT_RECORD *mrec; 56 runlist_element *rl;
49 int err = 0; 57 int err = 0;
50 58
51 ntfs_debug("Mapping runlist part containing vcn 0x%llx.", 59 ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
52 (unsigned long long)vcn); 60 (unsigned long long)vcn);
53
54 if (!NInoAttr(ni)) 61 if (!NInoAttr(ni))
55 base_ni = ni; 62 base_ni = ni;
56 else 63 else
57 base_ni = ni->ext.base_ntfs_ino; 64 base_ni = ni->ext.base_ntfs_ino;
58 65 m = map_mft_record(base_ni);
59 mrec = map_mft_record(base_ni); 66 if (IS_ERR(m))
60 if (IS_ERR(mrec)) 67 return PTR_ERR(m);
61 return PTR_ERR(mrec); 68 ctx = ntfs_attr_get_search_ctx(base_ni, m);
62 ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
63 if (unlikely(!ctx)) { 69 if (unlikely(!ctx)) {
64 err = -ENOMEM; 70 err = -ENOMEM;
65 goto err_out; 71 goto err_out;
66 } 72 }
67 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 73 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
68 CASE_SENSITIVE, vcn, NULL, 0, ctx); 74 CASE_SENSITIVE, vcn, NULL, 0, ctx);
69 if (unlikely(err)) 75 if (unlikely(err)) {
70 goto put_err_out; 76 if (err == -ENOENT)
77 err = -EIO;
78 goto err_out;
79 }
80 a = ctx->attr;
81 /*
82 * Only decompress the mapping pairs if @vcn is inside it. Otherwise
83 * we get into problems when we try to map an out of bounds vcn because
84 * we then try to map the already mapped runlist fragment and
85 * ntfs_mapping_pairs_decompress() fails.
86 */
87 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
88 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1))
89 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
90 if (unlikely(vcn >= end_vcn)) {
91 err = -ENOENT;
92 goto err_out;
93 }
94 rl = ntfs_mapping_pairs_decompress(ni->vol, a, ni->runlist.rl);
95 if (IS_ERR(rl))
96 err = PTR_ERR(rl);
97 else
98 ni->runlist.rl = rl;
99err_out:
100 if (likely(ctx))
101 ntfs_attr_put_search_ctx(ctx);
102 unmap_mft_record(base_ni);
103 return err;
104}
105
106/**
107 * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
108 * @ni: ntfs inode for which to map (part of) a runlist
109 * @vcn: map runlist part containing this vcn
110 *
111 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
112 *
113 * Return 0 on success and -errno on error. There is one special error code
114 * which is not an error as such. This is -ENOENT. It means that @vcn is out
115 * of bounds of the runlist.
116 *
117 * Locking: - The runlist must be unlocked on entry and is unlocked on return.
118 * - This function takes the runlist lock for writing and modifies the
119 * runlist.
120 */
121int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
122{
123 int err = 0;
71 124
72 down_write(&ni->runlist.lock); 125 down_write(&ni->runlist.lock);
73 /* Make sure someone else didn't do the work while we were sleeping. */ 126 /* Make sure someone else didn't do the work while we were sleeping. */
74 if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= 127 if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
75 LCN_RL_NOT_MAPPED)) { 128 LCN_RL_NOT_MAPPED))
76 runlist_element *rl; 129 err = ntfs_map_runlist_nolock(ni, vcn);
130 up_write(&ni->runlist.lock);
131 return err;
132}
77 133
78 rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr, 134/**
79 ni->runlist.rl); 135 * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode
80 if (IS_ERR(rl)) 136 * @ni: ntfs inode of the attribute whose runlist to search
81 err = PTR_ERR(rl); 137 * @vcn: vcn to convert
82 else 138 * @write_locked: true if the runlist is locked for writing
83 ni->runlist.rl = rl; 139 *
140 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
141 * described by the ntfs inode @ni and return the corresponding logical cluster
142 * number (lcn).
143 *
144 * If the @vcn is not mapped yet, the attempt is made to map the attribute
145 * extent containing the @vcn and the vcn to lcn conversion is retried.
146 *
147 * If @write_locked is true the caller has locked the runlist for writing and
148 * if false for reading.
149 *
150 * Since lcns must be >= 0, we use negative return codes with special meaning:
151 *
152 * Return code Meaning / Description
153 * ==========================================
154 * LCN_HOLE Hole / not allocated on disk.
155 * LCN_ENOENT There is no such vcn in the runlist, i.e. @vcn is out of bounds.
156 * LCN_ENOMEM Not enough memory to map runlist.
157 * LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc).
158 *
159 * Locking: - The runlist must be locked on entry and is left locked on return.
160 * - If @write_locked is FALSE, i.e. the runlist is locked for reading,
161 * the lock may be dropped inside the function so you cannot rely on
162 * the runlist still being the same when this function returns.
163 */
164LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
165 const BOOL write_locked)
166{
167 LCN lcn;
168 BOOL is_retry = FALSE;
169
170 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
171 ni->mft_no, (unsigned long long)vcn,
172 write_locked ? "write" : "read");
173 BUG_ON(!ni);
174 BUG_ON(!NInoNonResident(ni));
175 BUG_ON(vcn < 0);
176retry_remap:
177 /* Convert vcn to lcn. If that fails map the runlist and retry once. */
178 lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
179 if (likely(lcn >= LCN_HOLE)) {
180 ntfs_debug("Done, lcn 0x%llx.", (long long)lcn);
181 return lcn;
84 } 182 }
85 up_write(&ni->runlist.lock); 183 if (lcn != LCN_RL_NOT_MAPPED) {
184 if (lcn != LCN_ENOENT)
185 lcn = LCN_EIO;
186 } else if (!is_retry) {
187 int err;
86 188
87put_err_out: 189 if (!write_locked) {
88 ntfs_attr_put_search_ctx(ctx); 190 up_read(&ni->runlist.lock);
89err_out: 191 down_write(&ni->runlist.lock);
90 unmap_mft_record(base_ni); 192 if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
91 return err; 193 LCN_RL_NOT_MAPPED)) {
194 up_write(&ni->runlist.lock);
195 down_read(&ni->runlist.lock);
196 goto retry_remap;
197 }
198 }
199 err = ntfs_map_runlist_nolock(ni, vcn);
200 if (!write_locked) {
201 up_write(&ni->runlist.lock);
202 down_read(&ni->runlist.lock);
203 }
204 if (likely(!err)) {
205 is_retry = TRUE;
206 goto retry_remap;
207 }
208 if (err == -ENOENT)
209 lcn = LCN_ENOENT;
210 else if (err == -ENOMEM)
211 lcn = LCN_ENOMEM;
212 else
213 lcn = LCN_EIO;
214 }
215 if (lcn != LCN_ENOENT)
216 ntfs_error(ni->vol->sb, "Failed with error code %lli.",
217 (long long)lcn);
218 return lcn;
92} 219}
93 220
94/** 221/**
95 * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode 222 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
96 * @ni: ntfs inode describing the runlist to search 223 * @ni: ntfs inode describing the runlist to search
97 * @vcn: vcn to find 224 * @vcn: vcn to find
98 * @need_write: if false, lock for reading and if true, lock for writing 225 * @write_locked: true if the runlist is locked for writing
99 * 226 *
100 * Find the virtual cluster number @vcn in the runlist described by the ntfs 227 * Find the virtual cluster number @vcn in the runlist described by the ntfs
101 * inode @ni and return the address of the runlist element containing the @vcn. 228 * inode @ni and return the address of the runlist element containing the @vcn.
102 * The runlist is left locked and the caller has to unlock it. If @need_write 229 *
103 * is true, the runlist is locked for writing and if @need_write is false, the 230 * If the @vcn is not mapped yet, the attempt is made to map the attribute
104 * runlist is locked for reading. In the error case, the runlist is not left 231 * extent containing the @vcn and the vcn to lcn conversion is retried.
105 * locked. 232 *
233 * If @write_locked is true the caller has locked the runlist for writing and
234 * if false for reading.
106 * 235 *
107 * Note you need to distinguish between the lcn of the returned runlist element 236 * Note you need to distinguish between the lcn of the returned runlist element
108 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on 237 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
@@ -118,34 +247,29 @@ err_out:
118 * -ENOMEM - Not enough memory to map runlist. 247 * -ENOMEM - Not enough memory to map runlist.
119 * -EIO - Critical error (runlist/file is corrupt, i/o error, etc). 248 * -EIO - Critical error (runlist/file is corrupt, i/o error, etc).
120 * 249 *
121 * Locking: - The runlist must be unlocked on entry. 250 * Locking: - The runlist must be locked on entry and is left locked on return.
122 * - On failing return, the runlist is unlocked. 251 * - If @write_locked is FALSE, i.e. the runlist is locked for reading,
123 * - On successful return, the runlist is locked. If @need_write us 252 * the lock may be dropped inside the function so you cannot rely on
124 * true, it is locked for writing. Otherwise is is locked for 253 * the runlist still being the same when this function returns.
125 * reading.
126 */ 254 */
127runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, 255runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
128 const BOOL need_write) 256 const BOOL write_locked)
129{ 257{
130 runlist_element *rl; 258 runlist_element *rl;
131 int err = 0; 259 int err = 0;
132 BOOL is_retry = FALSE; 260 BOOL is_retry = FALSE;
133 261
134 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.", 262 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
135 ni->mft_no, (unsigned long long)vcn, 263 ni->mft_no, (unsigned long long)vcn,
136 !need_write ? "read" : "writ"); 264 write_locked ? "write" : "read");
137 BUG_ON(!ni); 265 BUG_ON(!ni);
138 BUG_ON(!NInoNonResident(ni)); 266 BUG_ON(!NInoNonResident(ni));
139 BUG_ON(vcn < 0); 267 BUG_ON(vcn < 0);
140lock_retry_remap: 268retry_remap:
141 if (!need_write)
142 down_read(&ni->runlist.lock);
143 else
144 down_write(&ni->runlist.lock);
145 rl = ni->runlist.rl; 269 rl = ni->runlist.rl;
146 if (likely(rl && vcn >= rl[0].vcn)) { 270 if (likely(rl && vcn >= rl[0].vcn)) {
147 while (likely(rl->length)) { 271 while (likely(rl->length)) {
148 if (likely(vcn < rl[1].vcn)) { 272 if (unlikely(vcn < rl[1].vcn)) {
149 if (likely(rl->lcn >= LCN_HOLE)) { 273 if (likely(rl->lcn >= LCN_HOLE)) {
150 ntfs_debug("Done."); 274 ntfs_debug("Done.");
151 return rl; 275 return rl;
@@ -161,30 +285,41 @@ lock_retry_remap:
161 err = -EIO; 285 err = -EIO;
162 } 286 }
163 } 287 }
164 if (!need_write)
165 up_read(&ni->runlist.lock);
166 else
167 up_write(&ni->runlist.lock);
168 if (!err && !is_retry) { 288 if (!err && !is_retry) {
169 /* 289 /*
170 * The @vcn is in an unmapped region, map the runlist and 290 * The @vcn is in an unmapped region, map the runlist and
171 * retry. 291 * retry.
172 */ 292 */
173 err = ntfs_map_runlist(ni, vcn); 293 if (!write_locked) {
294 up_read(&ni->runlist.lock);
295 down_write(&ni->runlist.lock);
296 if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
297 LCN_RL_NOT_MAPPED)) {
298 up_write(&ni->runlist.lock);
299 down_read(&ni->runlist.lock);
300 goto retry_remap;
301 }
302 }
303 err = ntfs_map_runlist_nolock(ni, vcn);
304 if (!write_locked) {
305 up_write(&ni->runlist.lock);
306 down_read(&ni->runlist.lock);
307 }
174 if (likely(!err)) { 308 if (likely(!err)) {
175 is_retry = TRUE; 309 is_retry = TRUE;
176 goto lock_retry_remap; 310 goto retry_remap;
177 } 311 }
178 /* 312 /*
179 * -EINVAL and -ENOENT coming from a failed mapping attempt are 313 * -EINVAL coming from a failed mapping attempt is equivalent
180 * equivalent to i/o errors for us as they should not happen in 314 * to i/o error for us as it should not happen in our code
181 * our code paths. 315 * paths.
182 */ 316 */
183 if (err == -EINVAL || err == -ENOENT) 317 if (err == -EINVAL)
184 err = -EIO; 318 err = -EIO;
185 } else if (!err) 319 } else if (!err)
186 err = -EIO; 320 err = -EIO;
187 ntfs_error(ni->vol->sb, "Failed with error code %i.", err); 321 if (err != -ENOENT)
322 ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
188 return ERR_PTR(err); 323 return ERR_PTR(err);
189} 324}
190 325
@@ -870,15 +1005,14 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
870static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx, 1005static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx,
871 ntfs_inode *ni, MFT_RECORD *mrec) 1006 ntfs_inode *ni, MFT_RECORD *mrec)
872{ 1007{
873 ctx->mrec = mrec; 1008 *ctx = (ntfs_attr_search_ctx) {
874 /* Sanity checks are performed elsewhere. */ 1009 .mrec = mrec,
875 ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset)); 1010 /* Sanity checks are performed elsewhere. */
876 ctx->is_first = TRUE; 1011 .attr = (ATTR_RECORD*)((u8*)mrec +
877 ctx->ntfs_ino = ni; 1012 le16_to_cpu(mrec->attrs_offset)),
878 ctx->al_entry = NULL; 1013 .is_first = TRUE,
879 ctx->base_ntfs_ino = NULL; 1014 .ntfs_ino = ni,
880 ctx->base_mrec = NULL; 1015 };
881 ctx->base_attr = NULL;
882} 1016}
883 1017
884/** 1018/**
@@ -945,6 +1079,8 @@ void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx)
945 return; 1079 return;
946} 1080}
947 1081
1082#ifdef NTFS_RW
1083
948/** 1084/**
949 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file 1085 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file
950 * @vol: ntfs volume to which the attribute belongs 1086 * @vol: ntfs volume to which the attribute belongs
@@ -1024,27 +1160,21 @@ int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type,
1024 * Check whether the attribute of @type on the ntfs volume @vol is allowed to 1160 * Check whether the attribute of @type on the ntfs volume @vol is allowed to
1025 * be non-resident. This information is obtained from $AttrDef system file. 1161 * be non-resident. This information is obtained from $AttrDef system file.
1026 * 1162 *
1027 * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, or 1163 * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, and
1028 * -ENOENT if the attribute is not listed in $AttrDef. 1164 * -ENOENT if the attribute is not listed in $AttrDef.
1029 */ 1165 */
1030int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type) 1166int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1031{ 1167{
1032 ATTR_DEF *ad; 1168 ATTR_DEF *ad;
1033 1169
1034 /*
1035 * $DATA is always allowed to be non-resident even if $AttrDef does not
1036 * specify this in the flags of the $DATA attribute definition record.
1037 */
1038 if (type == AT_DATA)
1039 return 0;
1040 /* Find the attribute definition record in $AttrDef. */ 1170 /* Find the attribute definition record in $AttrDef. */
1041 ad = ntfs_attr_find_in_attrdef(vol, type); 1171 ad = ntfs_attr_find_in_attrdef(vol, type);
1042 if (unlikely(!ad)) 1172 if (unlikely(!ad))
1043 return -ENOENT; 1173 return -ENOENT;
1044 /* Check the flags and return the result. */ 1174 /* Check the flags and return the result. */
1045 if (ad->flags & CAN_BE_NON_RESIDENT) 1175 if (ad->flags & ATTR_DEF_RESIDENT)
1046 return 0; 1176 return -EPERM;
1047 return -EPERM; 1177 return 0;
1048} 1178}
1049 1179
1050/** 1180/**
@@ -1067,9 +1197,9 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1067 */ 1197 */
1068int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type) 1198int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1069{ 1199{
1070 if (type != AT_INDEX_ALLOCATION && type != AT_EA) 1200 if (type == AT_INDEX_ALLOCATION || type == AT_EA)
1071 return 0; 1201 return -EPERM;
1072 return -EPERM; 1202 return 0;
1073} 1203}
1074 1204
1075/** 1205/**
@@ -1117,6 +1247,328 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1117} 1247}
1118 1248
1119/** 1249/**
1250 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
1251 * @ni: ntfs inode describing the attribute to convert
1252 *
1253 * Convert the resident ntfs attribute described by the ntfs inode @ni to a
1254 * non-resident one.
1255 *
1256 * Return 0 on success and -errno on error. The following error return codes
1257 * are defined:
1258 * -EPERM - The attribute is not allowed to be non-resident.
1259 * -ENOMEM - Not enough memory.
1260 * -ENOSPC - Not enough disk space.
1261 * -EINVAL - Attribute not defined on the volume.
1262 * -EIO - I/o error or other error.
1263 * Note that -ENOSPC is also returned in the case that there is not enough
1264 * space in the mft record to do the conversion. This can happen when the mft
1265 * record is already very full. The caller is responsible for trying to make
1266 * space in the mft record and trying again. FIXME: Do we need a separate
1267 * error return code for this kind of -ENOSPC or is it always worth trying
1268 * again in case the attribute may then fit in a resident state so no need to
1269 * make it non-resident at all? Ho-hum... (AIA)
1270 *
1271 * NOTE to self: No changes in the attribute list are required to move from
1272 * a resident to a non-resident attribute.
1273 *
1274 * Locking: - The caller must hold i_sem on the inode.
1275 */
1276int ntfs_attr_make_non_resident(ntfs_inode *ni)
1277{
1278 s64 new_size;
1279 struct inode *vi = VFS_I(ni);
1280 ntfs_volume *vol = ni->vol;
1281 ntfs_inode *base_ni;
1282 MFT_RECORD *m;
1283 ATTR_RECORD *a;
1284 ntfs_attr_search_ctx *ctx;
1285 struct page *page;
1286 runlist_element *rl;
1287 u8 *kaddr;
1288 unsigned long flags;
1289 int mp_size, mp_ofs, name_ofs, arec_size, err, err2;
1290 u32 attr_size;
1291 u8 old_res_attr_flags;
1292
1293 /* Check that the attribute is allowed to be non-resident. */
1294 err = ntfs_attr_can_be_non_resident(vol, ni->type);
1295 if (unlikely(err)) {
1296 if (err == -EPERM)
1297 ntfs_debug("Attribute is not allowed to be "
1298 "non-resident.");
1299 else
1300 ntfs_debug("Attribute not defined on the NTFS "
1301 "volume!");
1302 return err;
1303 }
1304 /*
1305 * The size needs to be aligned to a cluster boundary for allocation
1306 * purposes.
1307 */
1308 new_size = (i_size_read(vi) + vol->cluster_size - 1) &
1309 ~(vol->cluster_size - 1);
1310 if (new_size > 0) {
1311 runlist_element *rl2;
1312
1313 /*
1314 * Will need the page later and since the page lock nests
1315 * outside all ntfs locks, we need to get the page now.
1316 */
1317 page = find_or_create_page(vi->i_mapping, 0,
1318 mapping_gfp_mask(vi->i_mapping));
1319 if (unlikely(!page))
1320 return -ENOMEM;
1321 /* Start by allocating clusters to hold the attribute value. */
1322 rl = ntfs_cluster_alloc(vol, 0, new_size >>
1323 vol->cluster_size_bits, -1, DATA_ZONE);
1324 if (IS_ERR(rl)) {
1325 err = PTR_ERR(rl);
1326 ntfs_debug("Failed to allocate cluster%s, error code "
1327 "%i.", (new_size >>
1328 vol->cluster_size_bits) > 1 ? "s" : "",
1329 err);
1330 goto page_err_out;
1331 }
1332 /* Change the runlist terminator to LCN_ENOENT. */
1333 rl2 = rl;
1334 while (rl2->length)
1335 rl2++;
1336 BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
1337 rl2->lcn = LCN_ENOENT;
1338 } else {
1339 rl = NULL;
1340 page = NULL;
1341 }
1342 /* Determine the size of the mapping pairs array. */
1343 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0, -1);
1344 if (unlikely(mp_size < 0)) {
1345 err = mp_size;
1346 ntfs_debug("Failed to get size for mapping pairs array, error "
1347 "code %i.", err);
1348 goto rl_err_out;
1349 }
1350 down_write(&ni->runlist.lock);
1351 if (!NInoAttr(ni))
1352 base_ni = ni;
1353 else
1354 base_ni = ni->ext.base_ntfs_ino;
1355 m = map_mft_record(base_ni);
1356 if (IS_ERR(m)) {
1357 err = PTR_ERR(m);
1358 m = NULL;
1359 ctx = NULL;
1360 goto err_out;
1361 }
1362 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1363 if (unlikely(!ctx)) {
1364 err = -ENOMEM;
1365 goto err_out;
1366 }
1367 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1368 CASE_SENSITIVE, 0, NULL, 0, ctx);
1369 if (unlikely(err)) {
1370 if (err == -ENOENT)
1371 err = -EIO;
1372 goto err_out;
1373 }
1374 m = ctx->mrec;
1375 a = ctx->attr;
1376 BUG_ON(NInoNonResident(ni));
1377 BUG_ON(a->non_resident);
1378 /*
1379 * Calculate new offsets for the name and the mapping pairs array.
1380 * We assume the attribute is not compressed or sparse.
1381 */
1382 name_ofs = (offsetof(ATTR_REC,
1383 data.non_resident.compressed_size) + 7) & ~7;
1384 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1385 /*
1386 * Determine the size of the resident part of the now non-resident
1387 * attribute record.
1388 */
1389 arec_size = (mp_ofs + mp_size + 7) & ~7;
1390 /*
1391 * If the page is not uptodate bring it uptodate by copying from the
1392 * attribute value.
1393 */
1394 attr_size = le32_to_cpu(a->data.resident.value_length);
1395 BUG_ON(attr_size != i_size_read(vi));
1396 if (page && !PageUptodate(page)) {
1397 kaddr = kmap_atomic(page, KM_USER0);
1398 memcpy(kaddr, (u8*)a +
1399 le16_to_cpu(a->data.resident.value_offset),
1400 attr_size);
1401 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
1402 kunmap_atomic(kaddr, KM_USER0);
1403 flush_dcache_page(page);
1404 SetPageUptodate(page);
1405 }
1406 /* Backup the attribute flag. */
1407 old_res_attr_flags = a->data.resident.flags;
1408 /* Resize the resident part of the attribute record. */
1409 err = ntfs_attr_record_resize(m, a, arec_size);
1410 if (unlikely(err))
1411 goto err_out;
1412 /*
1413 * Convert the resident part of the attribute record to describe a
1414 * non-resident attribute.
1415 */
1416 a->non_resident = 1;
1417 /* Move the attribute name if it exists and update the offset. */
1418 if (a->name_length)
1419 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1420 a->name_length * sizeof(ntfschar));
1421 a->name_offset = cpu_to_le16(name_ofs);
1422 /*
1423 * FIXME: For now just clear all of these as we do not support them
1424 * when writing.
1425 */
1426 a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
1427 ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
1428 /* Setup the fields specific to non-resident attributes. */
1429 a->data.non_resident.lowest_vcn = 0;
1430 a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
1431 vol->cluster_size_bits);
1432 a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
1433 a->data.non_resident.compression_unit = 0;
1434 memset(&a->data.non_resident.reserved, 0,
1435 sizeof(a->data.non_resident.reserved));
1436 a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
1437 a->data.non_resident.data_size =
1438 a->data.non_resident.initialized_size =
1439 cpu_to_sle64(attr_size);
1440 /* Generate the mapping pairs array into the attribute record. */
1441 err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
1442 arec_size - mp_ofs, rl, 0, -1, NULL);
1443 if (unlikely(err)) {
1444 ntfs_debug("Failed to build mapping pairs, error code %i.",
1445 err);
1446 goto undo_err_out;
1447 }
1448 /* Setup the in-memory attribute structure to be non-resident. */
1449 /*
1450 * FIXME: For now just clear all of these as we do not support them
1451 * when writing.
1452 */
1453 NInoClearSparse(ni);
1454 NInoClearEncrypted(ni);
1455 NInoClearCompressed(ni);
1456 ni->runlist.rl = rl;
1457 write_lock_irqsave(&ni->size_lock, flags);
1458 ni->allocated_size = new_size;
1459 write_unlock_irqrestore(&ni->size_lock, flags);
1460 /*
1461 * This needs to be last since the address space operations ->readpage
1462 * and ->writepage can run concurrently with us as they are not
1463 * serialized on i_sem. Note, we are not allowed to fail once we flip
1464 * this switch, which is another reason to do this last.
1465 */
1466 NInoSetNonResident(ni);
1467 /* Mark the mft record dirty, so it gets written back. */
1468 flush_dcache_mft_record_page(ctx->ntfs_ino);
1469 mark_mft_record_dirty(ctx->ntfs_ino);
1470 ntfs_attr_put_search_ctx(ctx);
1471 unmap_mft_record(base_ni);
1472 up_write(&ni->runlist.lock);
1473 if (page) {
1474 set_page_dirty(page);
1475 unlock_page(page);
1476 mark_page_accessed(page);
1477 page_cache_release(page);
1478 }
1479 ntfs_debug("Done.");
1480 return 0;
1481undo_err_out:
1482 /* Convert the attribute back into a resident attribute. */
1483 a->non_resident = 0;
1484 /* Move the attribute name if it exists and update the offset. */
1485 name_ofs = (offsetof(ATTR_RECORD, data.resident.reserved) +
1486 sizeof(a->data.resident.reserved) + 7) & ~7;
1487 if (a->name_length)
1488 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1489 a->name_length * sizeof(ntfschar));
1490 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1491 a->name_offset = cpu_to_le16(name_ofs);
1492 arec_size = (mp_ofs + attr_size + 7) & ~7;
1493 /* Resize the resident part of the attribute record. */
1494 err2 = ntfs_attr_record_resize(m, a, arec_size);
1495 if (unlikely(err2)) {
1496 /*
1497 * This cannot happen (well if memory corruption is at work it
1498 * could happen in theory), but deal with it as well as we can.
1499 * If the old size is too small, truncate the attribute,
1500 * otherwise simply give it a larger allocated size.
1501 * FIXME: Should check whether chkdsk complains when the
1502 * allocated size is much bigger than the resident value size.
1503 */
1504 arec_size = le32_to_cpu(a->length);
1505 if ((mp_ofs + attr_size) > arec_size) {
1506 err2 = attr_size;
1507 attr_size = arec_size - mp_ofs;
1508 ntfs_error(vol->sb, "Failed to undo partial resident "
1509 "to non-resident attribute "
1510 "conversion. Truncating inode 0x%lx, "
1511 "attribute type 0x%x from %i bytes to "
1512 "%i bytes to maintain metadata "
1513 "consistency. THIS MEANS YOU ARE "
1514 "LOSING %i BYTES DATA FROM THIS %s.",
1515 vi->i_ino,
1516 (unsigned)le32_to_cpu(ni->type),
1517 err2, attr_size, err2 - attr_size,
1518 ((ni->type == AT_DATA) &&
1519 !ni->name_len) ? "FILE": "ATTRIBUTE");
1520 write_lock_irqsave(&ni->size_lock, flags);
1521 ni->initialized_size = attr_size;
1522 i_size_write(vi, attr_size);
1523 write_unlock_irqrestore(&ni->size_lock, flags);
1524 }
1525 }
1526 /* Setup the fields specific to resident attributes. */
1527 a->data.resident.value_length = cpu_to_le32(attr_size);
1528 a->data.resident.value_offset = cpu_to_le16(mp_ofs);
1529 a->data.resident.flags = old_res_attr_flags;
1530 memset(&a->data.resident.reserved, 0,
1531 sizeof(a->data.resident.reserved));
1532 /* Copy the data from the page back to the attribute value. */
1533 if (page) {
1534 kaddr = kmap_atomic(page, KM_USER0);
1535 memcpy((u8*)a + mp_ofs, kaddr, attr_size);
1536 kunmap_atomic(kaddr, KM_USER0);
1537 }
1538 /* Setup the allocated size in the ntfs inode in case it changed. */
1539 write_lock_irqsave(&ni->size_lock, flags);
1540 ni->allocated_size = arec_size - mp_ofs;
1541 write_unlock_irqrestore(&ni->size_lock, flags);
1542 /* Mark the mft record dirty, so it gets written back. */
1543 flush_dcache_mft_record_page(ctx->ntfs_ino);
1544 mark_mft_record_dirty(ctx->ntfs_ino);
1545err_out:
1546 if (ctx)
1547 ntfs_attr_put_search_ctx(ctx);
1548 if (m)
1549 unmap_mft_record(base_ni);
1550 ni->runlist.rl = NULL;
1551 up_write(&ni->runlist.lock);
1552rl_err_out:
1553 if (rl) {
1554 if (ntfs_cluster_free_from_rl(vol, rl) < 0) {
1555 ntfs_error(vol->sb, "Failed to release allocated "
1556 "cluster(s) in error code path. Run "
1557 "chkdsk to recover the lost "
1558 "cluster(s).");
1559 NVolSetErrors(vol);
1560 }
1561 ntfs_free(rl);
1562page_err_out:
1563 unlock_page(page);
1564 page_cache_release(page);
1565 }
1566 if (err == -EINVAL)
1567 err = -EIO;
1568 return err;
1569}
1570
1571/**
1120 * ntfs_attr_set - fill (a part of) an attribute with a byte 1572 * ntfs_attr_set - fill (a part of) an attribute with a byte
1121 * @ni: ntfs inode describing the attribute to fill 1573 * @ni: ntfs inode describing the attribute to fill
1122 * @ofs: offset inside the attribute at which to start to fill 1574 * @ofs: offset inside the attribute at which to start to fill
@@ -1127,6 +1579,10 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1127 * byte offset @ofs inside the attribute with the constant byte @val. 1579 * byte offset @ofs inside the attribute with the constant byte @val.
1128 * 1580 *
1129 * This function is effectively like memset() applied to an ntfs attribute. 1581 * This function is effectively like memset() applied to an ntfs attribute.
1582 * Note thie function actually only operates on the page cache pages belonging
1583 * to the ntfs attribute and it marks them dirty after doing the memset().
1584 * Thus it relies on the vm dirty page write code paths to cause the modified
1585 * pages to be written to the mft record/disk.
1130 * 1586 *
1131 * Return 0 on success and -errno on error. An error code of -ESPIPE means 1587 * Return 0 on success and -errno on error. An error code of -ESPIPE means
1132 * that @ofs + @cnt were outside the end of the attribute and no write was 1588 * that @ofs + @cnt were outside the end of the attribute and no write was
@@ -1155,7 +1611,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
1155 end = ofs + cnt; 1611 end = ofs + cnt;
1156 end_ofs = end & ~PAGE_CACHE_MASK; 1612 end_ofs = end & ~PAGE_CACHE_MASK;
1157 /* If the end is outside the inode size return -ESPIPE. */ 1613 /* If the end is outside the inode size return -ESPIPE. */
1158 if (unlikely(end > VFS_I(ni)->i_size)) { 1614 if (unlikely(end > i_size_read(VFS_I(ni)))) {
1159 ntfs_error(vol->sb, "Request exceeds end of attribute."); 1615 ntfs_error(vol->sb, "Request exceeds end of attribute.");
1160 return -ESPIPE; 1616 return -ESPIPE;
1161 } 1617 }
@@ -1256,3 +1712,5 @@ done:
1256 ntfs_debug("Done."); 1712 ntfs_debug("Done.");
1257 return 0; 1713 return 0;
1258} 1714}
1715
1716#endif /* NTFS_RW */
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index e0c2c6c81bc0..0e4ac6d3c0e7 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -2,7 +2,7 @@
2 * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. 2 * attrib.h - Defines for attribute handling in NTFS Linux kernel driver.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -60,10 +60,14 @@ typedef struct {
60 ATTR_RECORD *base_attr; 60 ATTR_RECORD *base_attr;
61} ntfs_attr_search_ctx; 61} ntfs_attr_search_ctx;
62 62
63extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
63extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); 64extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
64 65
65extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, 66extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
66 const BOOL need_write); 67 const BOOL write_locked);
68
69extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
70 const VCN vcn, const BOOL write_locked);
67 71
68int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, 72int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
69 const u32 name_len, const IGNORE_CASE_BOOL ic, 73 const u32 name_len, const IGNORE_CASE_BOOL ic,
@@ -85,6 +89,8 @@ extern ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni,
85 MFT_RECORD *mrec); 89 MFT_RECORD *mrec);
86extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx); 90extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx);
87 91
92#ifdef NTFS_RW
93
88extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol, 94extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol,
89 const ATTR_TYPE type, const s64 size); 95 const ATTR_TYPE type, const s64 size);
90extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, 96extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol,
@@ -94,7 +100,11 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
94 100
95extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); 101extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
96 102
103extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
104
97extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, 105extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
98 const u8 val); 106 const u8 val);
99 107
108#endif /* NTFS_RW */
109
100#endif /* _LINUX_NTFS_ATTRIB_H */ 110#endif /* _LINUX_NTFS_ATTRIB_H */
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index ee5ae706f861..6d265cfd49aa 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -96,13 +96,14 @@ void free_compression_buffers(void)
96/** 96/**
97 * zero_partial_compressed_page - zero out of bounds compressed page region 97 * zero_partial_compressed_page - zero out of bounds compressed page region
98 */ 98 */
99static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page) 99static void zero_partial_compressed_page(struct page *page,
100 const s64 initialized_size)
100{ 101{
101 u8 *kp = page_address(page); 102 u8 *kp = page_address(page);
102 unsigned int kp_ofs; 103 unsigned int kp_ofs;
103 104
104 ntfs_debug("Zeroing page region outside initialized size."); 105 ntfs_debug("Zeroing page region outside initialized size.");
105 if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) { 106 if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
106 /* 107 /*
107 * FIXME: Using clear_page() will become wrong when we get 108 * FIXME: Using clear_page() will become wrong when we get
108 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem. 109 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
@@ -110,7 +111,7 @@ static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
110 clear_page(kp); 111 clear_page(kp);
111 return; 112 return;
112 } 113 }
113 kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK; 114 kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
114 memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs); 115 memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
115 return; 116 return;
116} 117}
@@ -118,12 +119,12 @@ static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
118/** 119/**
119 * handle_bounds_compressed_page - test for&handle out of bounds compressed page 120 * handle_bounds_compressed_page - test for&handle out of bounds compressed page
120 */ 121 */
121static inline void handle_bounds_compressed_page(ntfs_inode *ni, 122static inline void handle_bounds_compressed_page(struct page *page,
122 struct page *page) 123 const loff_t i_size, const s64 initialized_size)
123{ 124{
124 if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) && 125 if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
125 (ni->initialized_size < VFS_I(ni)->i_size)) 126 (initialized_size < i_size))
126 zero_partial_compressed_page(ni, page); 127 zero_partial_compressed_page(page, initialized_size);
127 return; 128 return;
128} 129}
129 130
@@ -138,6 +139,8 @@ static inline void handle_bounds_compressed_page(ntfs_inode *ni,
138 * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT) 139 * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT)
139 * @cb_start: compression block to decompress (IN) 140 * @cb_start: compression block to decompress (IN)
140 * @cb_size: size of compression block @cb_start in bytes (IN) 141 * @cb_size: size of compression block @cb_start in bytes (IN)
142 * @i_size: file size when we started the read (IN)
143 * @initialized_size: initialized file size when we started the read (IN)
141 * 144 *
142 * The caller must have disabled preemption. ntfs_decompress() reenables it when 145 * The caller must have disabled preemption. ntfs_decompress() reenables it when
143 * the critical section is finished. 146 * the critical section is finished.
@@ -165,7 +168,8 @@ static inline void handle_bounds_compressed_page(ntfs_inode *ni,
165static int ntfs_decompress(struct page *dest_pages[], int *dest_index, 168static int ntfs_decompress(struct page *dest_pages[], int *dest_index,
166 int *dest_ofs, const int dest_max_index, const int dest_max_ofs, 169 int *dest_ofs, const int dest_max_index, const int dest_max_ofs,
167 const int xpage, char *xpage_done, u8 *const cb_start, 170 const int xpage, char *xpage_done, u8 *const cb_start,
168 const u32 cb_size) 171 const u32 cb_size, const loff_t i_size,
172 const s64 initialized_size)
169{ 173{
170 /* 174 /*
171 * Pointers into the compressed data, i.e. the compression block (cb), 175 * Pointers into the compressed data, i.e. the compression block (cb),
@@ -219,9 +223,6 @@ return_error:
219 spin_unlock(&ntfs_cb_lock); 223 spin_unlock(&ntfs_cb_lock);
220 /* Second stage: finalize completed pages. */ 224 /* Second stage: finalize completed pages. */
221 if (nr_completed_pages > 0) { 225 if (nr_completed_pages > 0) {
222 struct page *page = dest_pages[completed_pages[0]];
223 ntfs_inode *ni = NTFS_I(page->mapping->host);
224
225 for (i = 0; i < nr_completed_pages; i++) { 226 for (i = 0; i < nr_completed_pages; i++) {
226 int di = completed_pages[i]; 227 int di = completed_pages[i];
227 228
@@ -230,7 +231,8 @@ return_error:
230 * If we are outside the initialized size, zero 231 * If we are outside the initialized size, zero
231 * the out of bounds page range. 232 * the out of bounds page range.
232 */ 233 */
233 handle_bounds_compressed_page(ni, dp); 234 handle_bounds_compressed_page(dp, i_size,
235 initialized_size);
234 flush_dcache_page(dp); 236 flush_dcache_page(dp);
235 kunmap(dp); 237 kunmap(dp);
236 SetPageUptodate(dp); 238 SetPageUptodate(dp);
@@ -478,12 +480,14 @@ return_overflow:
478 */ 480 */
479int ntfs_read_compressed_block(struct page *page) 481int ntfs_read_compressed_block(struct page *page)
480{ 482{
483 loff_t i_size;
484 s64 initialized_size;
481 struct address_space *mapping = page->mapping; 485 struct address_space *mapping = page->mapping;
482 ntfs_inode *ni = NTFS_I(mapping->host); 486 ntfs_inode *ni = NTFS_I(mapping->host);
483 ntfs_volume *vol = ni->vol; 487 ntfs_volume *vol = ni->vol;
484 struct super_block *sb = vol->sb; 488 struct super_block *sb = vol->sb;
485 runlist_element *rl; 489 runlist_element *rl;
486 unsigned long block_size = sb->s_blocksize; 490 unsigned long flags, block_size = sb->s_blocksize;
487 unsigned char block_size_bits = sb->s_blocksize_bits; 491 unsigned char block_size_bits = sb->s_blocksize_bits;
488 u8 *cb, *cb_pos, *cb_end; 492 u8 *cb, *cb_pos, *cb_end;
489 struct buffer_head **bhs; 493 struct buffer_head **bhs;
@@ -552,8 +556,12 @@ int ntfs_read_compressed_block(struct page *page)
552 * The remaining pages need to be allocated and inserted into the page 556 * The remaining pages need to be allocated and inserted into the page
553 * cache, alignment guarantees keep all the below much simpler. (-8 557 * cache, alignment guarantees keep all the below much simpler. (-8
554 */ 558 */
555 max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> 559 read_lock_irqsave(&ni->size_lock, flags);
556 PAGE_CACHE_SHIFT) - offset; 560 i_size = i_size_read(VFS_I(ni));
561 initialized_size = ni->initialized_size;
562 read_unlock_irqrestore(&ni->size_lock, flags);
563 max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
564 offset;
557 if (nr_pages < max_page) 565 if (nr_pages < max_page)
558 max_page = nr_pages; 566 max_page = nr_pages;
559 for (i = 0; i < max_page; i++, offset++) { 567 for (i = 0; i < max_page; i++, offset++) {
@@ -824,7 +832,8 @@ lock_retry_remap:
824 * If we are outside the initialized size, zero 832 * If we are outside the initialized size, zero
825 * the out of bounds page range. 833 * the out of bounds page range.
826 */ 834 */
827 handle_bounds_compressed_page(ni, page); 835 handle_bounds_compressed_page(page, i_size,
836 initialized_size);
828 flush_dcache_page(page); 837 flush_dcache_page(page);
829 kunmap(page); 838 kunmap(page);
830 SetPageUptodate(page); 839 SetPageUptodate(page);
@@ -847,7 +856,8 @@ lock_retry_remap:
847 ntfs_debug("Found compressed compression block."); 856 ntfs_debug("Found compressed compression block.");
848 err = ntfs_decompress(pages, &cur_page, &cur_ofs, 857 err = ntfs_decompress(pages, &cur_page, &cur_ofs,
849 cb_max_page, cb_max_ofs, xpage, &xpage_done, 858 cb_max_page, cb_max_ofs, xpage, &xpage_done,
850 cb_pos, cb_size - (cb_pos - cb)); 859 cb_pos, cb_size - (cb_pos - cb), i_size,
860 initialized_size);
851 /* 861 /*
852 * We can sleep from now on, lock already dropped by 862 * We can sleep from now on, lock already dropped by
853 * ntfs_decompress(). 863 * ntfs_decompress().
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 6fb6bb5e3723..807150e2c2b9 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -164,14 +164,17 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
164 if (index > -LCN_ENOENT - 1) 164 if (index > -LCN_ENOENT - 1)
165 index = 3; 165 index = 3;
166 printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", 166 printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
167 (rl + i)->vcn, lcn_str[index], 167 (long long)(rl + i)->vcn, lcn_str[index],
168 (rl + i)->length, (rl + i)->length ? 168 (long long)(rl + i)->length,
169 "" : " (runlist end)"); 169 (rl + i)->length ? "" :
170 " (runlist end)");
170 } else 171 } else
171 printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", 172 printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n",
172 (rl + i)->vcn, (rl + i)->lcn, 173 (long long)(rl + i)->vcn,
173 (rl + i)->length, (rl + i)->length ? 174 (long long)(rl + i)->lcn,
174 "" : " (runlist end)"); 175 (long long)(rl + i)->length,
176 (rl + i)->length ? "" :
177 " (runlist end)");
175 if (!(rl + i)->length) 178 if (!(rl + i)->length)
176 break; 179 break;
177 } 180 }
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 93577561cdbe..46779471c542 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. 2 * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -183,8 +183,7 @@ found_it:
183 name->len = 0; 183 name->len = 0;
184 *res = name; 184 *res = name;
185 } else { 185 } else {
186 if (name) 186 kfree(name);
187 kfree(name);
188 *res = NULL; 187 *res = NULL;
189 } 188 }
190 mref = le64_to_cpu(ie->data.dir.indexed_file); 189 mref = le64_to_cpu(ie->data.dir.indexed_file);
@@ -444,8 +443,7 @@ found_it2:
444 name->len = 0; 443 name->len = 0;
445 *res = name; 444 *res = name;
446 } else { 445 } else {
447 if (name) 446 kfree(name);
448 kfree(name);
449 *res = NULL; 447 *res = NULL;
450 } 448 }
451 mref = le64_to_cpu(ie->data.dir.indexed_file); 449 mref = le64_to_cpu(ie->data.dir.indexed_file);
@@ -610,7 +608,7 @@ dir_err_out:
610// TODO: (AIA) 608// TODO: (AIA)
611// The algorithm embedded in this code will be required for the time when we 609// The algorithm embedded in this code will be required for the time when we
612// want to support adding of entries to directories, where we require correct 610// want to support adding of entries to directories, where we require correct
613// collation of file names in order not to cause corruption of the file system. 611// collation of file names in order not to cause corruption of the filesystem.
614 612
615/** 613/**
616 * ntfs_lookup_inode_by_name - find an inode in a directory given its name 614 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
@@ -1101,7 +1099,7 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1101static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 1099static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1102{ 1100{
1103 s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; 1101 s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1104 loff_t fpos; 1102 loff_t fpos, i_size;
1105 struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; 1103 struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode;
1106 struct super_block *sb = vdir->i_sb; 1104 struct super_block *sb = vdir->i_sb;
1107 ntfs_inode *ndir = NTFS_I(vdir); 1105 ntfs_inode *ndir = NTFS_I(vdir);
@@ -1122,7 +1120,8 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1122 vdir->i_ino, fpos); 1120 vdir->i_ino, fpos);
1123 rc = err = 0; 1121 rc = err = 0;
1124 /* Are we at end of dir yet? */ 1122 /* Are we at end of dir yet? */
1125 if (fpos >= vdir->i_size + vol->mft_record_size) 1123 i_size = i_size_read(vdir);
1124 if (fpos >= i_size + vol->mft_record_size)
1126 goto done; 1125 goto done;
1127 /* Emulate . and .. for all directories. */ 1126 /* Emulate . and .. for all directories. */
1128 if (!fpos) { 1127 if (!fpos) {
@@ -1264,7 +1263,7 @@ skip_index_root:
1264 bmp_mapping = bmp_vi->i_mapping; 1263 bmp_mapping = bmp_vi->i_mapping;
1265 /* Get the starting bitmap bit position and sanity check it. */ 1264 /* Get the starting bitmap bit position and sanity check it. */
1266 bmp_pos = ia_pos >> ndir->itype.index.block_size_bits; 1265 bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
1267 if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) { 1266 if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
1268 ntfs_error(sb, "Current index allocation position exceeds " 1267 ntfs_error(sb, "Current index allocation position exceeds "
1269 "index bitmap size."); 1268 "index bitmap size.");
1270 goto err_out; 1269 goto err_out;
@@ -1301,7 +1300,7 @@ find_next_index_buffer:
1301 goto get_next_bmp_page; 1300 goto get_next_bmp_page;
1302 } 1301 }
1303 /* If we have reached the end of the bitmap, we are done. */ 1302 /* If we have reached the end of the bitmap, we are done. */
1304 if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size)) 1303 if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
1305 goto unm_EOD; 1304 goto unm_EOD;
1306 ia_pos = (bmp_pos + cur_bmp_pos) << 1305 ia_pos = (bmp_pos + cur_bmp_pos) <<
1307 ndir->itype.index.block_size_bits; 1306 ndir->itype.index.block_size_bits;
@@ -1309,7 +1308,8 @@ find_next_index_buffer:
1309 ntfs_debug("Handling index buffer 0x%llx.", 1308 ntfs_debug("Handling index buffer 0x%llx.",
1310 (unsigned long long)bmp_pos + cur_bmp_pos); 1309 (unsigned long long)bmp_pos + cur_bmp_pos);
1311 /* If the current index buffer is in the same page we reuse the page. */ 1310 /* If the current index buffer is in the same page we reuse the page. */
1312 if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) { 1311 if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
1312 (ia_pos & (s64)PAGE_CACHE_MASK)) {
1313 prev_ia_pos = ia_pos; 1313 prev_ia_pos = ia_pos;
1314 if (likely(ia_page != NULL)) { 1314 if (likely(ia_page != NULL)) {
1315 unlock_page(ia_page); 1315 unlock_page(ia_page);
@@ -1441,7 +1441,7 @@ unm_EOD:
1441 ntfs_unmap_page(bmp_page); 1441 ntfs_unmap_page(bmp_page);
1442EOD: 1442EOD:
1443 /* We are finished, set fpos to EOD. */ 1443 /* We are finished, set fpos to EOD. */
1444 fpos = vdir->i_size + vol->mft_record_size; 1444 fpos = i_size + vol->mft_record_size;
1445abort: 1445abort:
1446 kfree(name); 1446 kfree(name);
1447done: 1447done:
@@ -1461,10 +1461,8 @@ err_out:
1461 unlock_page(ia_page); 1461 unlock_page(ia_page);
1462 ntfs_unmap_page(ia_page); 1462 ntfs_unmap_page(ia_page);
1463 } 1463 }
1464 if (ir) 1464 kfree(ir);
1465 kfree(ir); 1465 kfree(name);
1466 if (name)
1467 kfree(name);
1468 if (ctx) 1466 if (ctx)
1469 ntfs_attr_put_search_ctx(ctx); 1467 ntfs_attr_put_search_ctx(ctx);
1470 if (m) 1468 if (m)
@@ -1495,7 +1493,7 @@ err_out:
1495static int ntfs_dir_open(struct inode *vi, struct file *filp) 1493static int ntfs_dir_open(struct inode *vi, struct file *filp)
1496{ 1494{
1497 if (sizeof(unsigned long) < 8) { 1495 if (sizeof(unsigned long) < 8) {
1498 if (vi->i_size > MAX_LFS_FILESIZE) 1496 if (i_size_read(vi) > MAX_LFS_FILESIZE)
1499 return -EFBIG; 1497 return -EFBIG;
1500 } 1498 }
1501 return 0; 1499 return 0;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db8713ea0d27..e0f530ce6b99 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -47,7 +47,7 @@
47static int ntfs_file_open(struct inode *vi, struct file *filp) 47static int ntfs_file_open(struct inode *vi, struct file *filp)
48{ 48{
49 if (sizeof(unsigned long) < 8) { 49 if (sizeof(unsigned long) < 8) {
50 if (vi->i_size > MAX_LFS_FILESIZE) 50 if (i_size_read(vi) > MAX_LFS_FILESIZE)
51 return -EFBIG; 51 return -EFBIG;
52 } 52 }
53 return generic_file_open(vi, filp); 53 return generic_file_open(vi, filp);
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 71bd2cd7a4d9..11fd5307d780 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * index.c - NTFS kernel index handling. Part of the Linux-NTFS project. 2 * index.c - NTFS kernel index handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2004 Anton Altaparmakov 4 * Copyright (c) 2004-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -39,18 +39,8 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
39 ntfs_index_context *ictx; 39 ntfs_index_context *ictx;
40 40
41 ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS); 41 ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS);
42 if (ictx) { 42 if (ictx)
43 ictx->idx_ni = idx_ni; 43 *ictx = (ntfs_index_context){ .idx_ni = idx_ni };
44 ictx->entry = NULL;
45 ictx->data = NULL;
46 ictx->data_len = 0;
47 ictx->is_in_root = 0;
48 ictx->ir = NULL;
49 ictx->actx = NULL;
50 ictx->base_ni = NULL;
51 ictx->ia = NULL;
52 ictx->page = NULL;
53 }
54 return ictx; 44 return ictx;
55} 45}
56 46
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 31840ba0b38c..886214a77f90 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. 2 * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -174,7 +174,7 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
174 174
175 vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode, 175 vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode,
176 (set_t)ntfs_init_locked_inode, &na); 176 (set_t)ntfs_init_locked_inode, &na);
177 if (!vi) 177 if (unlikely(!vi))
178 return ERR_PTR(-ENOMEM); 178 return ERR_PTR(-ENOMEM);
179 179
180 err = 0; 180 err = 0;
@@ -188,7 +188,7 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
188 * There is no point in keeping bad inodes around if the failure was 188 * There is no point in keeping bad inodes around if the failure was
189 * due to ENOMEM. We want to be able to retry again later. 189 * due to ENOMEM. We want to be able to retry again later.
190 */ 190 */
191 if (err == -ENOMEM) { 191 if (unlikely(err == -ENOMEM)) {
192 iput(vi); 192 iput(vi);
193 vi = ERR_PTR(err); 193 vi = ERR_PTR(err);
194 } 194 }
@@ -235,7 +235,7 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
235 235
236 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, 236 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
237 (set_t)ntfs_init_locked_inode, &na); 237 (set_t)ntfs_init_locked_inode, &na);
238 if (!vi) 238 if (unlikely(!vi))
239 return ERR_PTR(-ENOMEM); 239 return ERR_PTR(-ENOMEM);
240 240
241 err = 0; 241 err = 0;
@@ -250,7 +250,7 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
250 * simplifies things in that we never need to check for bad attribute 250 * simplifies things in that we never need to check for bad attribute
251 * inodes elsewhere. 251 * inodes elsewhere.
252 */ 252 */
253 if (err) { 253 if (unlikely(err)) {
254 iput(vi); 254 iput(vi);
255 vi = ERR_PTR(err); 255 vi = ERR_PTR(err);
256 } 256 }
@@ -290,7 +290,7 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
290 290
291 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, 291 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
292 (set_t)ntfs_init_locked_inode, &na); 292 (set_t)ntfs_init_locked_inode, &na);
293 if (!vi) 293 if (unlikely(!vi))
294 return ERR_PTR(-ENOMEM); 294 return ERR_PTR(-ENOMEM);
295 295
296 err = 0; 296 err = 0;
@@ -305,7 +305,7 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
305 * simplifies things in that we never need to check for bad index 305 * simplifies things in that we never need to check for bad index
306 * inodes elsewhere. 306 * inodes elsewhere.
307 */ 307 */
308 if (err) { 308 if (unlikely(err)) {
309 iput(vi); 309 iput(vi);
310 vi = ERR_PTR(err); 310 vi = ERR_PTR(err);
311 } 311 }
@@ -317,8 +317,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
317 ntfs_inode *ni; 317 ntfs_inode *ni;
318 318
319 ntfs_debug("Entering."); 319 ntfs_debug("Entering.");
320 ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache, 320 ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS);
321 SLAB_NOFS);
322 if (likely(ni != NULL)) { 321 if (likely(ni != NULL)) {
323 ni->state = 0; 322 ni->state = 0;
324 return VFS_I(ni); 323 return VFS_I(ni);
@@ -343,7 +342,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
343 ntfs_inode *ni; 342 ntfs_inode *ni;
344 343
345 ntfs_debug("Entering."); 344 ntfs_debug("Entering.");
346 ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); 345 ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
347 if (likely(ni != NULL)) { 346 if (likely(ni != NULL)) {
348 ni->state = 0; 347 ni->state = 0;
349 return ni; 348 return ni;
@@ -376,6 +375,7 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
376void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) 375void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
377{ 376{
378 ntfs_debug("Entering."); 377 ntfs_debug("Entering.");
378 rwlock_init(&ni->size_lock);
379 ni->initialized_size = ni->allocated_size = 0; 379 ni->initialized_size = ni->allocated_size = 0;
380 ni->seq_no = 0; 380 ni->seq_no = 0;
381 atomic_set(&ni->count, 1); 381 atomic_set(&ni->count, 1);
@@ -524,6 +524,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
524 ntfs_volume *vol = NTFS_SB(vi->i_sb); 524 ntfs_volume *vol = NTFS_SB(vi->i_sb);
525 ntfs_inode *ni; 525 ntfs_inode *ni;
526 MFT_RECORD *m; 526 MFT_RECORD *m;
527 ATTR_RECORD *a;
527 STANDARD_INFORMATION *si; 528 STANDARD_INFORMATION *si;
528 ntfs_attr_search_ctx *ctx; 529 ntfs_attr_search_ctx *ctx;
529 int err = 0; 530 int err = 0;
@@ -632,9 +633,10 @@ static int ntfs_read_locked_inode(struct inode *vi)
632 } 633 }
633 goto unm_err_out; 634 goto unm_err_out;
634 } 635 }
636 a = ctx->attr;
635 /* Get the standard information attribute value. */ 637 /* Get the standard information attribute value. */
636 si = (STANDARD_INFORMATION*)((char*)ctx->attr + 638 si = (STANDARD_INFORMATION*)((u8*)a +
637 le16_to_cpu(ctx->attr->data.resident.value_offset)); 639 le16_to_cpu(a->data.resident.value_offset));
638 640
639 /* Transfer information from the standard information into vi. */ 641 /* Transfer information from the standard information into vi. */
640 /* 642 /*
@@ -673,15 +675,16 @@ static int ntfs_read_locked_inode(struct inode *vi)
673 goto skip_attr_list_load; 675 goto skip_attr_list_load;
674 ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); 676 ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
675 NInoSetAttrList(ni); 677 NInoSetAttrList(ni);
676 if (ctx->attr->flags & ATTR_IS_ENCRYPTED || 678 a = ctx->attr;
677 ctx->attr->flags & ATTR_COMPRESSION_MASK || 679 if (a->flags & ATTR_IS_ENCRYPTED ||
678 ctx->attr->flags & ATTR_IS_SPARSE) { 680 a->flags & ATTR_COMPRESSION_MASK ||
681 a->flags & ATTR_IS_SPARSE) {
679 ntfs_error(vi->i_sb, "Attribute list attribute is " 682 ntfs_error(vi->i_sb, "Attribute list attribute is "
680 "compressed/encrypted/sparse."); 683 "compressed/encrypted/sparse.");
681 goto unm_err_out; 684 goto unm_err_out;
682 } 685 }
683 /* Now allocate memory for the attribute list. */ 686 /* Now allocate memory for the attribute list. */
684 ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); 687 ni->attr_list_size = (u32)ntfs_attr_size(a);
685 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); 688 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
686 if (!ni->attr_list) { 689 if (!ni->attr_list) {
687 ntfs_error(vi->i_sb, "Not enough memory to allocate " 690 ntfs_error(vi->i_sb, "Not enough memory to allocate "
@@ -689,9 +692,9 @@ static int ntfs_read_locked_inode(struct inode *vi)
689 err = -ENOMEM; 692 err = -ENOMEM;
690 goto unm_err_out; 693 goto unm_err_out;
691 } 694 }
692 if (ctx->attr->non_resident) { 695 if (a->non_resident) {
693 NInoSetAttrListNonResident(ni); 696 NInoSetAttrListNonResident(ni);
694 if (ctx->attr->data.non_resident.lowest_vcn) { 697 if (a->data.non_resident.lowest_vcn) {
695 ntfs_error(vi->i_sb, "Attribute list has non " 698 ntfs_error(vi->i_sb, "Attribute list has non "
696 "zero lowest_vcn."); 699 "zero lowest_vcn.");
697 goto unm_err_out; 700 goto unm_err_out;
@@ -701,7 +704,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
701 * exclusive access to the inode at this time. 704 * exclusive access to the inode at this time.
702 */ 705 */
703 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, 706 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
704 ctx->attr, NULL); 707 a, NULL);
705 if (IS_ERR(ni->attr_list_rl.rl)) { 708 if (IS_ERR(ni->attr_list_rl.rl)) {
706 err = PTR_ERR(ni->attr_list_rl.rl); 709 err = PTR_ERR(ni->attr_list_rl.rl);
707 ni->attr_list_rl.rl = NULL; 710 ni->attr_list_rl.rl = NULL;
@@ -712,27 +715,26 @@ static int ntfs_read_locked_inode(struct inode *vi)
712 /* Now load the attribute list. */ 715 /* Now load the attribute list. */
713 if ((err = load_attribute_list(vol, &ni->attr_list_rl, 716 if ((err = load_attribute_list(vol, &ni->attr_list_rl,
714 ni->attr_list, ni->attr_list_size, 717 ni->attr_list, ni->attr_list_size,
715 sle64_to_cpu(ctx->attr->data. 718 sle64_to_cpu(a->data.non_resident.
716 non_resident.initialized_size)))) { 719 initialized_size)))) {
717 ntfs_error(vi->i_sb, "Failed to load " 720 ntfs_error(vi->i_sb, "Failed to load "
718 "attribute list attribute."); 721 "attribute list attribute.");
719 goto unm_err_out; 722 goto unm_err_out;
720 } 723 }
721 } else /* if (!ctx.attr->non_resident) */ { 724 } else /* if (!a->non_resident) */ {
722 if ((u8*)ctx->attr + le16_to_cpu( 725 if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
723 ctx->attr->data.resident.value_offset) + 726 + le32_to_cpu(
724 le32_to_cpu( 727 a->data.resident.value_length) >
725 ctx->attr->data.resident.value_length) >
726 (u8*)ctx->mrec + vol->mft_record_size) { 728 (u8*)ctx->mrec + vol->mft_record_size) {
727 ntfs_error(vi->i_sb, "Corrupt attribute list " 729 ntfs_error(vi->i_sb, "Corrupt attribute list "
728 "in inode."); 730 "in inode.");
729 goto unm_err_out; 731 goto unm_err_out;
730 } 732 }
731 /* Now copy the attribute list. */ 733 /* Now copy the attribute list. */
732 memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( 734 memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
733 ctx->attr->data.resident.value_offset), 735 a->data.resident.value_offset),
734 le32_to_cpu( 736 le32_to_cpu(
735 ctx->attr->data.resident.value_length)); 737 a->data.resident.value_length));
736 } 738 }
737 } 739 }
738skip_attr_list_load: 740skip_attr_list_load:
@@ -741,10 +743,11 @@ skip_attr_list_load:
741 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes. 743 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
742 */ 744 */
743 if (S_ISDIR(vi->i_mode)) { 745 if (S_ISDIR(vi->i_mode)) {
746 loff_t bvi_size;
744 struct inode *bvi; 747 struct inode *bvi;
745 ntfs_inode *bni; 748 ntfs_inode *bni;
746 INDEX_ROOT *ir; 749 INDEX_ROOT *ir;
747 char *ir_end, *index_end; 750 u8 *ir_end, *index_end;
748 751
749 /* It is a directory, find index root attribute. */ 752 /* It is a directory, find index root attribute. */
750 ntfs_attr_reinit_search_ctx(ctx); 753 ntfs_attr_reinit_search_ctx(ctx);
@@ -760,17 +763,16 @@ skip_attr_list_load:
760 } 763 }
761 goto unm_err_out; 764 goto unm_err_out;
762 } 765 }
766 a = ctx->attr;
763 /* Set up the state. */ 767 /* Set up the state. */
764 if (unlikely(ctx->attr->non_resident)) { 768 if (unlikely(a->non_resident)) {
765 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not " 769 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
766 "resident."); 770 "resident.");
767 goto unm_err_out; 771 goto unm_err_out;
768 } 772 }
769 /* Ensure the attribute name is placed before the value. */ 773 /* Ensure the attribute name is placed before the value. */
770 if (unlikely(ctx->attr->name_length && 774 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
771 (le16_to_cpu(ctx->attr->name_offset) >= 775 le16_to_cpu(a->data.resident.value_offset)))) {
772 le16_to_cpu(ctx->attr->data.resident.
773 value_offset)))) {
774 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is " 776 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
775 "placed after the attribute value."); 777 "placed after the attribute value.");
776 goto unm_err_out; 778 goto unm_err_out;
@@ -781,28 +783,27 @@ skip_attr_list_load:
781 * encrypted. However index root cannot be both compressed and 783 * encrypted. However index root cannot be both compressed and
782 * encrypted. 784 * encrypted.
783 */ 785 */
784 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) 786 if (a->flags & ATTR_COMPRESSION_MASK)
785 NInoSetCompressed(ni); 787 NInoSetCompressed(ni);
786 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 788 if (a->flags & ATTR_IS_ENCRYPTED) {
787 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 789 if (a->flags & ATTR_COMPRESSION_MASK) {
788 ntfs_error(vi->i_sb, "Found encrypted and " 790 ntfs_error(vi->i_sb, "Found encrypted and "
789 "compressed attribute."); 791 "compressed attribute.");
790 goto unm_err_out; 792 goto unm_err_out;
791 } 793 }
792 NInoSetEncrypted(ni); 794 NInoSetEncrypted(ni);
793 } 795 }
794 if (ctx->attr->flags & ATTR_IS_SPARSE) 796 if (a->flags & ATTR_IS_SPARSE)
795 NInoSetSparse(ni); 797 NInoSetSparse(ni);
796 ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu( 798 ir = (INDEX_ROOT*)((u8*)a +
797 ctx->attr->data.resident.value_offset)); 799 le16_to_cpu(a->data.resident.value_offset));
798 ir_end = (char*)ir + le32_to_cpu( 800 ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
799 ctx->attr->data.resident.value_length); 801 if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
800 if (ir_end > (char*)ctx->mrec + vol->mft_record_size) {
801 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " 802 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
802 "corrupt."); 803 "corrupt.");
803 goto unm_err_out; 804 goto unm_err_out;
804 } 805 }
805 index_end = (char*)&ir->index + 806 index_end = (u8*)&ir->index +
806 le32_to_cpu(ir->index.index_length); 807 le32_to_cpu(ir->index.index_length);
807 if (index_end > ir_end) { 808 if (index_end > ir_end) {
808 ntfs_error(vi->i_sb, "Directory index is corrupt."); 809 ntfs_error(vi->i_sb, "Directory index is corrupt.");
@@ -889,7 +890,8 @@ skip_attr_list_load:
889 "attribute."); 890 "attribute.");
890 goto unm_err_out; 891 goto unm_err_out;
891 } 892 }
892 if (!ctx->attr->non_resident) { 893 a = ctx->attr;
894 if (!a->non_resident) {
893 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 895 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
894 "is resident."); 896 "is resident.");
895 goto unm_err_out; 897 goto unm_err_out;
@@ -898,42 +900,40 @@ skip_attr_list_load:
898 * Ensure the attribute name is placed before the mapping pairs 900 * Ensure the attribute name is placed before the mapping pairs
899 * array. 901 * array.
900 */ 902 */
901 if (unlikely(ctx->attr->name_length && 903 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
902 (le16_to_cpu(ctx->attr->name_offset) >= 904 le16_to_cpu(
903 le16_to_cpu(ctx->attr->data.non_resident. 905 a->data.non_resident.mapping_pairs_offset)))) {
904 mapping_pairs_offset)))) {
905 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name " 906 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
906 "is placed after the mapping pairs " 907 "is placed after the mapping pairs "
907 "array."); 908 "array.");
908 goto unm_err_out; 909 goto unm_err_out;
909 } 910 }
910 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 911 if (a->flags & ATTR_IS_ENCRYPTED) {
911 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 912 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
912 "is encrypted."); 913 "is encrypted.");
913 goto unm_err_out; 914 goto unm_err_out;
914 } 915 }
915 if (ctx->attr->flags & ATTR_IS_SPARSE) { 916 if (a->flags & ATTR_IS_SPARSE) {
916 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 917 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
917 "is sparse."); 918 "is sparse.");
918 goto unm_err_out; 919 goto unm_err_out;
919 } 920 }
920 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 921 if (a->flags & ATTR_COMPRESSION_MASK) {
921 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 922 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
922 "is compressed."); 923 "is compressed.");
923 goto unm_err_out; 924 goto unm_err_out;
924 } 925 }
925 if (ctx->attr->data.non_resident.lowest_vcn) { 926 if (a->data.non_resident.lowest_vcn) {
926 ntfs_error(vi->i_sb, "First extent of " 927 ntfs_error(vi->i_sb, "First extent of "
927 "$INDEX_ALLOCATION attribute has non " 928 "$INDEX_ALLOCATION attribute has non "
928 "zero lowest_vcn."); 929 "zero lowest_vcn.");
929 goto unm_err_out; 930 goto unm_err_out;
930 } 931 }
931 vi->i_size = sle64_to_cpu( 932 vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
932 ctx->attr->data.non_resident.data_size);
933 ni->initialized_size = sle64_to_cpu( 933 ni->initialized_size = sle64_to_cpu(
934 ctx->attr->data.non_resident.initialized_size); 934 a->data.non_resident.initialized_size);
935 ni->allocated_size = sle64_to_cpu( 935 ni->allocated_size = sle64_to_cpu(
936 ctx->attr->data.non_resident.allocated_size); 936 a->data.non_resident.allocated_size);
937 /* 937 /*
938 * We are done with the mft record, so we release it. Otherwise 938 * We are done with the mft record, so we release it. Otherwise
939 * we would deadlock in ntfs_attr_iget(). 939 * we would deadlock in ntfs_attr_iget().
@@ -958,11 +958,12 @@ skip_attr_list_load:
958 goto unm_err_out; 958 goto unm_err_out;
959 } 959 }
960 /* Consistency check bitmap size vs. index allocation size. */ 960 /* Consistency check bitmap size vs. index allocation size. */
961 if ((bvi->i_size << 3) < (vi->i_size >> 961 bvi_size = i_size_read(bvi);
962 if ((bvi_size << 3) < (vi->i_size >>
962 ni->itype.index.block_size_bits)) { 963 ni->itype.index.block_size_bits)) {
963 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) " 964 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
964 "for index allocation (0x%llx).", 965 "for index allocation (0x%llx).",
965 bvi->i_size << 3, vi->i_size); 966 bvi_size << 3, vi->i_size);
966 goto unm_err_out; 967 goto unm_err_out;
967 } 968 }
968skip_large_dir_stuff: 969skip_large_dir_stuff:
@@ -1010,87 +1011,92 @@ skip_large_dir_stuff:
1010 ntfs_error(vi->i_sb, "$DATA attribute is missing."); 1011 ntfs_error(vi->i_sb, "$DATA attribute is missing.");
1011 goto unm_err_out; 1012 goto unm_err_out;
1012 } 1013 }
1014 a = ctx->attr;
1013 /* Setup the state. */ 1015 /* Setup the state. */
1014 if (ctx->attr->non_resident) { 1016 if (a->non_resident) {
1015 NInoSetNonResident(ni); 1017 NInoSetNonResident(ni);
1016 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1018 if (a->flags & (ATTR_COMPRESSION_MASK |
1017 NInoSetCompressed(ni); 1019 ATTR_IS_SPARSE)) {
1018 if (vol->cluster_size > 4096) { 1020 if (a->flags & ATTR_COMPRESSION_MASK) {
1019 ntfs_error(vi->i_sb, "Found " 1021 NInoSetCompressed(ni);
1020 "compressed data but " 1022 if (vol->cluster_size > 4096) {
1021 "compression is disabled due " 1023 ntfs_error(vi->i_sb, "Found "
1022 "to cluster size (%i) > 4kiB.", 1024 "compressed data but "
1023 vol->cluster_size); 1025 "compression is "
1024 goto unm_err_out; 1026 "disabled due to "
1025 } 1027 "cluster size (%i) > "
1026 if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) 1028 "4kiB.",
1027 != ATTR_IS_COMPRESSED) { 1029 vol->cluster_size);
1028 ntfs_error(vi->i_sb, "Found " 1030 goto unm_err_out;
1029 "unknown compression method or " 1031 }
1030 "corrupt file."); 1032 if ((a->flags & ATTR_COMPRESSION_MASK)
1031 goto unm_err_out; 1033 != ATTR_IS_COMPRESSED) {
1034 ntfs_error(vi->i_sb, "Found "
1035 "unknown compression "
1036 "method or corrupt "
1037 "file.");
1038 goto unm_err_out;
1039 }
1032 } 1040 }
1033 ni->itype.compressed.block_clusters = 1U << 1041 if (a->flags & ATTR_IS_SPARSE)
1034 ctx->attr->data.non_resident. 1042 NInoSetSparse(ni);
1035 compression_unit; 1043 if (a->data.non_resident.compression_unit !=
1036 if (ctx->attr->data.non_resident. 1044 4) {
1037 compression_unit != 4) {
1038 ntfs_error(vi->i_sb, "Found " 1045 ntfs_error(vi->i_sb, "Found "
1039 "nonstandard compression unit " 1046 "nonstandard compression unit "
1040 "(%u instead of 4). Cannot " 1047 "(%u instead of 4). Cannot "
1041 "handle this.", 1048 "handle this.",
1042 ctx->attr->data.non_resident. 1049 a->data.non_resident.
1043 compression_unit); 1050 compression_unit);
1044 err = -EOPNOTSUPP; 1051 err = -EOPNOTSUPP;
1045 goto unm_err_out; 1052 goto unm_err_out;
1046 } 1053 }
1054 ni->itype.compressed.block_clusters = 1U <<
1055 a->data.non_resident.
1056 compression_unit;
1047 ni->itype.compressed.block_size = 1U << ( 1057 ni->itype.compressed.block_size = 1U << (
1048 ctx->attr->data.non_resident. 1058 a->data.non_resident.
1049 compression_unit + 1059 compression_unit +
1050 vol->cluster_size_bits); 1060 vol->cluster_size_bits);
1051 ni->itype.compressed.block_size_bits = ffs( 1061 ni->itype.compressed.block_size_bits = ffs(
1052 ni->itype.compressed.block_size) - 1; 1062 ni->itype.compressed.
1063 block_size) - 1;
1064 ni->itype.compressed.size = sle64_to_cpu(
1065 a->data.non_resident.
1066 compressed_size);
1053 } 1067 }
1054 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 1068 if (a->flags & ATTR_IS_ENCRYPTED) {
1055 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1069 if (a->flags & ATTR_COMPRESSION_MASK) {
1056 ntfs_error(vi->i_sb, "Found encrypted " 1070 ntfs_error(vi->i_sb, "Found encrypted "
1057 "and compressed data."); 1071 "and compressed data.");
1058 goto unm_err_out; 1072 goto unm_err_out;
1059 } 1073 }
1060 NInoSetEncrypted(ni); 1074 NInoSetEncrypted(ni);
1061 } 1075 }
1062 if (ctx->attr->flags & ATTR_IS_SPARSE) 1076 if (a->data.non_resident.lowest_vcn) {
1063 NInoSetSparse(ni);
1064 if (ctx->attr->data.non_resident.lowest_vcn) {
1065 ntfs_error(vi->i_sb, "First extent of $DATA " 1077 ntfs_error(vi->i_sb, "First extent of $DATA "
1066 "attribute has non zero " 1078 "attribute has non zero "
1067 "lowest_vcn."); 1079 "lowest_vcn.");
1068 goto unm_err_out; 1080 goto unm_err_out;
1069 } 1081 }
1070 /* Setup all the sizes. */
1071 vi->i_size = sle64_to_cpu( 1082 vi->i_size = sle64_to_cpu(
1072 ctx->attr->data.non_resident.data_size); 1083 a->data.non_resident.data_size);
1073 ni->initialized_size = sle64_to_cpu( 1084 ni->initialized_size = sle64_to_cpu(
1074 ctx->attr->data.non_resident. 1085 a->data.non_resident.initialized_size);
1075 initialized_size);
1076 ni->allocated_size = sle64_to_cpu( 1086 ni->allocated_size = sle64_to_cpu(
1077 ctx->attr->data.non_resident. 1087 a->data.non_resident.allocated_size);
1078 allocated_size);
1079 if (NInoCompressed(ni)) {
1080 ni->itype.compressed.size = sle64_to_cpu(
1081 ctx->attr->data.non_resident.
1082 compressed_size);
1083 }
1084 } else { /* Resident attribute. */ 1088 } else { /* Resident attribute. */
1085 /* 1089 vi->i_size = ni->initialized_size = le32_to_cpu(
1086 * Make all sizes equal for simplicity in read code 1090 a->data.resident.value_length);
1087 * paths. FIXME: Need to keep this in mind when 1091 ni->allocated_size = le32_to_cpu(a->length) -
1088 * converting to non-resident attribute in write code 1092 le16_to_cpu(
1089 * path. (Probably only affects truncate().) 1093 a->data.resident.value_offset);
1090 */ 1094 if (vi->i_size > ni->allocated_size) {
1091 vi->i_size = ni->initialized_size = ni->allocated_size = 1095 ntfs_error(vi->i_sb, "Resident data attribute "
1092 le32_to_cpu( 1096 "is corrupt (size exceeds "
1093 ctx->attr->data.resident.value_length); 1097 "allocation).");
1098 goto unm_err_out;
1099 }
1094 } 1100 }
1095no_data_attr_special_case: 1101no_data_attr_special_case:
1096 /* We are done with the mft record, so we release it. */ 1102 /* We are done with the mft record, so we release it. */
@@ -1117,11 +1123,10 @@ no_data_attr_special_case:
1117 * sizes of all non-resident attributes present to give us the Linux 1123 * sizes of all non-resident attributes present to give us the Linux
1118 * correct size that should go into i_blocks (after division by 512). 1124 * correct size that should go into i_blocks (after division by 512).
1119 */ 1125 */
1120 if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni)) 1126 if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
1121 vi->i_blocks = ni->allocated_size >> 9;
1122 else
1123 vi->i_blocks = ni->itype.compressed.size >> 9; 1127 vi->i_blocks = ni->itype.compressed.size >> 9;
1124 1128 else
1129 vi->i_blocks = ni->allocated_size >> 9;
1125 ntfs_debug("Done."); 1130 ntfs_debug("Done.");
1126 return 0; 1131 return 0;
1127 1132
@@ -1166,6 +1171,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1166 ntfs_volume *vol = NTFS_SB(vi->i_sb); 1171 ntfs_volume *vol = NTFS_SB(vi->i_sb);
1167 ntfs_inode *ni, *base_ni; 1172 ntfs_inode *ni, *base_ni;
1168 MFT_RECORD *m; 1173 MFT_RECORD *m;
1174 ATTR_RECORD *a;
1169 ntfs_attr_search_ctx *ctx; 1175 ntfs_attr_search_ctx *ctx;
1170 int err = 0; 1176 int err = 0;
1171 1177
@@ -1200,24 +1206,21 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1200 err = -ENOMEM; 1206 err = -ENOMEM;
1201 goto unm_err_out; 1207 goto unm_err_out;
1202 } 1208 }
1203
1204 /* Find the attribute. */ 1209 /* Find the attribute. */
1205 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1210 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1206 CASE_SENSITIVE, 0, NULL, 0, ctx); 1211 CASE_SENSITIVE, 0, NULL, 0, ctx);
1207 if (unlikely(err)) 1212 if (unlikely(err))
1208 goto unm_err_out; 1213 goto unm_err_out;
1209 1214 a = ctx->attr;
1210 if (!ctx->attr->non_resident) { 1215 if (!a->non_resident) {
1211 /* Ensure the attribute name is placed before the value. */ 1216 /* Ensure the attribute name is placed before the value. */
1212 if (unlikely(ctx->attr->name_length && 1217 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1213 (le16_to_cpu(ctx->attr->name_offset) >= 1218 le16_to_cpu(a->data.resident.value_offset)))) {
1214 le16_to_cpu(ctx->attr->data.resident.
1215 value_offset)))) {
1216 ntfs_error(vol->sb, "Attribute name is placed after " 1219 ntfs_error(vol->sb, "Attribute name is placed after "
1217 "the attribute value."); 1220 "the attribute value.");
1218 goto unm_err_out; 1221 goto unm_err_out;
1219 } 1222 }
1220 if (NInoMstProtected(ni) || ctx->attr->flags) { 1223 if (NInoMstProtected(ni) || a->flags) {
1221 ntfs_error(vi->i_sb, "Found mst protected attribute " 1224 ntfs_error(vi->i_sb, "Found mst protected attribute "
1222 "or attribute with non-zero flags but " 1225 "or attribute with non-zero flags but "
1223 "the attribute is resident. Please " 1226 "the attribute is resident. Please "
@@ -1225,85 +1228,95 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1225 "linux-ntfs-dev@lists.sourceforge.net"); 1228 "linux-ntfs-dev@lists.sourceforge.net");
1226 goto unm_err_out; 1229 goto unm_err_out;
1227 } 1230 }
1228 /* 1231 vi->i_size = ni->initialized_size = le32_to_cpu(
1229 * Resident attribute. Make all sizes equal for simplicity in 1232 a->data.resident.value_length);
1230 * read code paths. 1233 ni->allocated_size = le32_to_cpu(a->length) -
1231 */ 1234 le16_to_cpu(a->data.resident.value_offset);
1232 vi->i_size = ni->initialized_size = ni->allocated_size = 1235 if (vi->i_size > ni->allocated_size) {
1233 le32_to_cpu(ctx->attr->data.resident.value_length); 1236 ntfs_error(vi->i_sb, "Resident attribute is corrupt "
1237 "(size exceeds allocation).");
1238 goto unm_err_out;
1239 }
1234 } else { 1240 } else {
1235 NInoSetNonResident(ni); 1241 NInoSetNonResident(ni);
1236 /* 1242 /*
1237 * Ensure the attribute name is placed before the mapping pairs 1243 * Ensure the attribute name is placed before the mapping pairs
1238 * array. 1244 * array.
1239 */ 1245 */
1240 if (unlikely(ctx->attr->name_length && 1246 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1241 (le16_to_cpu(ctx->attr->name_offset) >= 1247 le16_to_cpu(
1242 le16_to_cpu(ctx->attr->data.non_resident. 1248 a->data.non_resident.mapping_pairs_offset)))) {
1243 mapping_pairs_offset)))) {
1244 ntfs_error(vol->sb, "Attribute name is placed after " 1249 ntfs_error(vol->sb, "Attribute name is placed after "
1245 "the mapping pairs array."); 1250 "the mapping pairs array.");
1246 goto unm_err_out; 1251 goto unm_err_out;
1247 } 1252 }
1248 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1253 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1254 if (a->flags & ATTR_COMPRESSION_MASK) {
1255 NInoSetCompressed(ni);
1256 if ((ni->type != AT_DATA) || (ni->type ==
1257 AT_DATA && ni->name_len)) {
1258 ntfs_error(vi->i_sb, "Found compressed "
1259 "non-data or named "
1260 "data attribute. "
1261 "Please report you "
1262 "saw this message to "
1263 "linux-ntfs-dev@lists."
1264 "sourceforge.net");
1265 goto unm_err_out;
1266 }
1267 if (vol->cluster_size > 4096) {
1268 ntfs_error(vi->i_sb, "Found compressed "
1269 "attribute but "
1270 "compression is "
1271 "disabled due to "
1272 "cluster size (%i) > "
1273 "4kiB.",
1274 vol->cluster_size);
1275 goto unm_err_out;
1276 }
1277 if ((a->flags & ATTR_COMPRESSION_MASK) !=
1278 ATTR_IS_COMPRESSED) {
1279 ntfs_error(vi->i_sb, "Found unknown "
1280 "compression method.");
1281 goto unm_err_out;
1282 }
1283 }
1249 if (NInoMstProtected(ni)) { 1284 if (NInoMstProtected(ni)) {
1250 ntfs_error(vi->i_sb, "Found mst protected " 1285 ntfs_error(vi->i_sb, "Found mst protected "
1251 "attribute but the attribute " 1286 "attribute but the attribute "
1252 "is compressed. Please report " 1287 "is %s. Please report you "
1253 "you saw this message to " 1288 "saw this message to "
1254 "linux-ntfs-dev@lists."
1255 "sourceforge.net");
1256 goto unm_err_out;
1257 }
1258 NInoSetCompressed(ni);
1259 if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1260 ni->name_len)) {
1261 ntfs_error(vi->i_sb, "Found compressed "
1262 "non-data or named data "
1263 "attribute. Please report "
1264 "you saw this message to "
1265 "linux-ntfs-dev@lists." 1289 "linux-ntfs-dev@lists."
1266 "sourceforge.net"); 1290 "sourceforge.net",
1267 goto unm_err_out; 1291 NInoCompressed(ni) ?
1268 } 1292 "compressed" : "sparse");
1269 if (vol->cluster_size > 4096) {
1270 ntfs_error(vi->i_sb, "Found compressed "
1271 "attribute but compression is "
1272 "disabled due to cluster size "
1273 "(%i) > 4kiB.",
1274 vol->cluster_size);
1275 goto unm_err_out; 1293 goto unm_err_out;
1276 } 1294 }
1277 if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) 1295 if (a->flags & ATTR_IS_SPARSE)
1278 != ATTR_IS_COMPRESSED) { 1296 NInoSetSparse(ni);
1279 ntfs_error(vi->i_sb, "Found unknown " 1297 if (a->data.non_resident.compression_unit != 4) {
1280 "compression method.");
1281 goto unm_err_out;
1282 }
1283 ni->itype.compressed.block_clusters = 1U <<
1284 ctx->attr->data.non_resident.
1285 compression_unit;
1286 if (ctx->attr->data.non_resident.compression_unit !=
1287 4) {
1288 ntfs_error(vi->i_sb, "Found nonstandard " 1298 ntfs_error(vi->i_sb, "Found nonstandard "
1289 "compression unit (%u instead " 1299 "compression unit (%u instead "
1290 "of 4). Cannot handle this.", 1300 "of 4). Cannot handle this.",
1291 ctx->attr->data.non_resident. 1301 a->data.non_resident.
1292 compression_unit); 1302 compression_unit);
1293 err = -EOPNOTSUPP; 1303 err = -EOPNOTSUPP;
1294 goto unm_err_out; 1304 goto unm_err_out;
1295 } 1305 }
1306 ni->itype.compressed.block_clusters = 1U <<
1307 a->data.non_resident.compression_unit;
1296 ni->itype.compressed.block_size = 1U << ( 1308 ni->itype.compressed.block_size = 1U << (
1297 ctx->attr->data.non_resident. 1309 a->data.non_resident.compression_unit +
1298 compression_unit +
1299 vol->cluster_size_bits); 1310 vol->cluster_size_bits);
1300 ni->itype.compressed.block_size_bits = ffs( 1311 ni->itype.compressed.block_size_bits = ffs(
1301 ni->itype.compressed.block_size) - 1; 1312 ni->itype.compressed.block_size) - 1;
1313 ni->itype.compressed.size = sle64_to_cpu(
1314 a->data.non_resident.compressed_size);
1302 } 1315 }
1303 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 1316 if (a->flags & ATTR_IS_ENCRYPTED) {
1304 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1317 if (a->flags & ATTR_COMPRESSION_MASK) {
1305 ntfs_error(vi->i_sb, "Found encrypted " 1318 ntfs_error(vi->i_sb, "Found encrypted and "
1306 "and compressed data."); 1319 "compressed data.");
1307 goto unm_err_out; 1320 goto unm_err_out;
1308 } 1321 }
1309 if (NInoMstProtected(ni)) { 1322 if (NInoMstProtected(ni)) {
@@ -1317,37 +1330,17 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1317 } 1330 }
1318 NInoSetEncrypted(ni); 1331 NInoSetEncrypted(ni);
1319 } 1332 }
1320 if (ctx->attr->flags & ATTR_IS_SPARSE) { 1333 if (a->data.non_resident.lowest_vcn) {
1321 if (NInoMstProtected(ni)) {
1322 ntfs_error(vi->i_sb, "Found mst protected "
1323 "attribute but the attribute "
1324 "is sparse. Please report "
1325 "you saw this message to "
1326 "linux-ntfs-dev@lists."
1327 "sourceforge.net");
1328 goto unm_err_out;
1329 }
1330 NInoSetSparse(ni);
1331 }
1332 if (ctx->attr->data.non_resident.lowest_vcn) {
1333 ntfs_error(vi->i_sb, "First extent of attribute has " 1334 ntfs_error(vi->i_sb, "First extent of attribute has "
1334 "non-zero lowest_vcn."); 1335 "non-zero lowest_vcn.");
1335 goto unm_err_out; 1336 goto unm_err_out;
1336 } 1337 }
1337 /* Setup all the sizes. */ 1338 vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
1338 vi->i_size = sle64_to_cpu(
1339 ctx->attr->data.non_resident.data_size);
1340 ni->initialized_size = sle64_to_cpu( 1339 ni->initialized_size = sle64_to_cpu(
1341 ctx->attr->data.non_resident.initialized_size); 1340 a->data.non_resident.initialized_size);
1342 ni->allocated_size = sle64_to_cpu( 1341 ni->allocated_size = sle64_to_cpu(
1343 ctx->attr->data.non_resident.allocated_size); 1342 a->data.non_resident.allocated_size);
1344 if (NInoCompressed(ni)) {
1345 ni->itype.compressed.size = sle64_to_cpu(
1346 ctx->attr->data.non_resident.
1347 compressed_size);
1348 }
1349 } 1343 }
1350
1351 /* Setup the operations for this attribute inode. */ 1344 /* Setup the operations for this attribute inode. */
1352 vi->i_op = NULL; 1345 vi->i_op = NULL;
1353 vi->i_fop = NULL; 1346 vi->i_fop = NULL;
@@ -1355,12 +1348,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1355 vi->i_mapping->a_ops = &ntfs_mst_aops; 1348 vi->i_mapping->a_ops = &ntfs_mst_aops;
1356 else 1349 else
1357 vi->i_mapping->a_ops = &ntfs_aops; 1350 vi->i_mapping->a_ops = &ntfs_aops;
1358 1351 if (NInoCompressed(ni) || NInoSparse(ni))
1359 if (!NInoCompressed(ni))
1360 vi->i_blocks = ni->allocated_size >> 9;
1361 else
1362 vi->i_blocks = ni->itype.compressed.size >> 9; 1352 vi->i_blocks = ni->itype.compressed.size >> 9;
1363 1353 else
1354 vi->i_blocks = ni->allocated_size >> 9;
1364 /* 1355 /*
1365 * Make sure the base inode doesn't go away and attach it to the 1356 * Make sure the base inode doesn't go away and attach it to the
1366 * attribute inode. 1357 * attribute inode.
@@ -1429,10 +1420,12 @@ err_out:
1429 */ 1420 */
1430static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) 1421static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1431{ 1422{
1423 loff_t bvi_size;
1432 ntfs_volume *vol = NTFS_SB(vi->i_sb); 1424 ntfs_volume *vol = NTFS_SB(vi->i_sb);
1433 ntfs_inode *ni, *base_ni, *bni; 1425 ntfs_inode *ni, *base_ni, *bni;
1434 struct inode *bvi; 1426 struct inode *bvi;
1435 MFT_RECORD *m; 1427 MFT_RECORD *m;
1428 ATTR_RECORD *a;
1436 ntfs_attr_search_ctx *ctx; 1429 ntfs_attr_search_ctx *ctx;
1437 INDEX_ROOT *ir; 1430 INDEX_ROOT *ir;
1438 u8 *ir_end, *index_end; 1431 u8 *ir_end, *index_end;
@@ -1474,30 +1467,28 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1474 "missing."); 1467 "missing.");
1475 goto unm_err_out; 1468 goto unm_err_out;
1476 } 1469 }
1470 a = ctx->attr;
1477 /* Set up the state. */ 1471 /* Set up the state. */
1478 if (unlikely(ctx->attr->non_resident)) { 1472 if (unlikely(a->non_resident)) {
1479 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident."); 1473 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1480 goto unm_err_out; 1474 goto unm_err_out;
1481 } 1475 }
1482 /* Ensure the attribute name is placed before the value. */ 1476 /* Ensure the attribute name is placed before the value. */
1483 if (unlikely(ctx->attr->name_length && 1477 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1484 (le16_to_cpu(ctx->attr->name_offset) >= 1478 le16_to_cpu(a->data.resident.value_offset)))) {
1485 le16_to_cpu(ctx->attr->data.resident.
1486 value_offset)))) {
1487 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed " 1479 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
1488 "after the attribute value."); 1480 "after the attribute value.");
1489 goto unm_err_out; 1481 goto unm_err_out;
1490 } 1482 }
1491 /* Compressed/encrypted/sparse index root is not allowed. */ 1483 /* Compressed/encrypted/sparse index root is not allowed. */
1492 if (ctx->attr->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | 1484 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
1493 ATTR_IS_SPARSE)) { 1485 ATTR_IS_SPARSE)) {
1494 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " 1486 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
1495 "root attribute."); 1487 "root attribute.");
1496 goto unm_err_out; 1488 goto unm_err_out;
1497 } 1489 }
1498 ir = (INDEX_ROOT*)((u8*)ctx->attr + 1490 ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset));
1499 le16_to_cpu(ctx->attr->data.resident.value_offset)); 1491 ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
1500 ir_end = (u8*)ir + le32_to_cpu(ctx->attr->data.resident.value_length);
1501 if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) { 1492 if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
1502 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt."); 1493 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
1503 goto unm_err_out; 1494 goto unm_err_out;
@@ -1570,7 +1561,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1570 "$INDEX_ALLOCATION attribute."); 1561 "$INDEX_ALLOCATION attribute.");
1571 goto unm_err_out; 1562 goto unm_err_out;
1572 } 1563 }
1573 if (!ctx->attr->non_resident) { 1564 if (!a->non_resident) {
1574 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " 1565 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1575 "resident."); 1566 "resident.");
1576 goto unm_err_out; 1567 goto unm_err_out;
@@ -1578,37 +1569,36 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1578 /* 1569 /*
1579 * Ensure the attribute name is placed before the mapping pairs array. 1570 * Ensure the attribute name is placed before the mapping pairs array.
1580 */ 1571 */
1581 if (unlikely(ctx->attr->name_length && (le16_to_cpu( 1572 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1582 ctx->attr->name_offset) >= le16_to_cpu( 1573 le16_to_cpu(
1583 ctx->attr->data.non_resident.mapping_pairs_offset)))) { 1574 a->data.non_resident.mapping_pairs_offset)))) {
1584 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is " 1575 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
1585 "placed after the mapping pairs array."); 1576 "placed after the mapping pairs array.");
1586 goto unm_err_out; 1577 goto unm_err_out;
1587 } 1578 }
1588 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 1579 if (a->flags & ATTR_IS_ENCRYPTED) {
1589 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " 1580 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1590 "encrypted."); 1581 "encrypted.");
1591 goto unm_err_out; 1582 goto unm_err_out;
1592 } 1583 }
1593 if (ctx->attr->flags & ATTR_IS_SPARSE) { 1584 if (a->flags & ATTR_IS_SPARSE) {
1594 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse."); 1585 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1595 goto unm_err_out; 1586 goto unm_err_out;
1596 } 1587 }
1597 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1588 if (a->flags & ATTR_COMPRESSION_MASK) {
1598 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " 1589 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1599 "compressed."); 1590 "compressed.");
1600 goto unm_err_out; 1591 goto unm_err_out;
1601 } 1592 }
1602 if (ctx->attr->data.non_resident.lowest_vcn) { 1593 if (a->data.non_resident.lowest_vcn) {
1603 ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION " 1594 ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
1604 "attribute has non zero lowest_vcn."); 1595 "attribute has non zero lowest_vcn.");
1605 goto unm_err_out; 1596 goto unm_err_out;
1606 } 1597 }
1607 vi->i_size = sle64_to_cpu(ctx->attr->data.non_resident.data_size); 1598 vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
1608 ni->initialized_size = sle64_to_cpu( 1599 ni->initialized_size = sle64_to_cpu(
1609 ctx->attr->data.non_resident.initialized_size); 1600 a->data.non_resident.initialized_size);
1610 ni->allocated_size = sle64_to_cpu( 1601 ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size);
1611 ctx->attr->data.non_resident.allocated_size);
1612 /* 1602 /*
1613 * We are done with the mft record, so we release it. Otherwise 1603 * We are done with the mft record, so we release it. Otherwise
1614 * we would deadlock in ntfs_attr_iget(). 1604 * we would deadlock in ntfs_attr_iget().
@@ -1632,10 +1622,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1632 goto iput_unm_err_out; 1622 goto iput_unm_err_out;
1633 } 1623 }
1634 /* Consistency check bitmap size vs. index allocation size. */ 1624 /* Consistency check bitmap size vs. index allocation size. */
1635 if ((bvi->i_size << 3) < (vi->i_size >> 1625 bvi_size = i_size_read(bvi);
1636 ni->itype.index.block_size_bits)) { 1626 if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1637 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for " 1627 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
1638 "index allocation (0x%llx).", bvi->i_size << 3, 1628 "index allocation (0x%llx).", bvi_size << 3,
1639 vi->i_size); 1629 vi->i_size);
1640 goto iput_unm_err_out; 1630 goto iput_unm_err_out;
1641 } 1631 }
@@ -1646,7 +1636,6 @@ skip_large_index_stuff:
1646 vi->i_fop = NULL; 1636 vi->i_fop = NULL;
1647 vi->i_mapping->a_ops = &ntfs_mst_aops; 1637 vi->i_mapping->a_ops = &ntfs_mst_aops;
1648 vi->i_blocks = ni->allocated_size >> 9; 1638 vi->i_blocks = ni->allocated_size >> 9;
1649
1650 /* 1639 /*
1651 * Make sure the base inode doesn't go away and attach it to the 1640 * Make sure the base inode doesn't go away and attach it to the
1652 * index inode. 1641 * index inode.
@@ -1712,7 +1701,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1712 struct buffer_head *bh; 1701 struct buffer_head *bh;
1713 ntfs_inode *ni; 1702 ntfs_inode *ni;
1714 MFT_RECORD *m = NULL; 1703 MFT_RECORD *m = NULL;
1715 ATTR_RECORD *attr; 1704 ATTR_RECORD *a;
1716 ntfs_attr_search_ctx *ctx; 1705 ntfs_attr_search_ctx *ctx;
1717 unsigned int i, nr_blocks; 1706 unsigned int i, nr_blocks;
1718 int err; 1707 int err;
@@ -1727,10 +1716,10 @@ int ntfs_read_inode_mount(struct inode *vi)
1727 /* Setup the data attribute. It is special as it is mst protected. */ 1716 /* Setup the data attribute. It is special as it is mst protected. */
1728 NInoSetNonResident(ni); 1717 NInoSetNonResident(ni);
1729 NInoSetMstProtected(ni); 1718 NInoSetMstProtected(ni);
1719 NInoSetSparseDisabled(ni);
1730 ni->type = AT_DATA; 1720 ni->type = AT_DATA;
1731 ni->name = NULL; 1721 ni->name = NULL;
1732 ni->name_len = 0; 1722 ni->name_len = 0;
1733
1734 /* 1723 /*
1735 * This sets up our little cheat allowing us to reuse the async read io 1724 * This sets up our little cheat allowing us to reuse the async read io
1736 * completion handler for directories. 1725 * completion handler for directories.
@@ -1808,9 +1797,10 @@ int ntfs_read_inode_mount(struct inode *vi)
1808 1797
1809 ntfs_debug("Attribute list attribute found in $MFT."); 1798 ntfs_debug("Attribute list attribute found in $MFT.");
1810 NInoSetAttrList(ni); 1799 NInoSetAttrList(ni);
1811 if (ctx->attr->flags & ATTR_IS_ENCRYPTED || 1800 a = ctx->attr;
1812 ctx->attr->flags & ATTR_COMPRESSION_MASK || 1801 if (a->flags & ATTR_IS_ENCRYPTED ||
1813 ctx->attr->flags & ATTR_IS_SPARSE) { 1802 a->flags & ATTR_COMPRESSION_MASK ||
1803 a->flags & ATTR_IS_SPARSE) {
1814 ntfs_error(sb, "Attribute list attribute is " 1804 ntfs_error(sb, "Attribute list attribute is "
1815 "compressed/encrypted/sparse. Not " 1805 "compressed/encrypted/sparse. Not "
1816 "allowed. $MFT is corrupt. You should " 1806 "allowed. $MFT is corrupt. You should "
@@ -1818,16 +1808,16 @@ int ntfs_read_inode_mount(struct inode *vi)
1818 goto put_err_out; 1808 goto put_err_out;
1819 } 1809 }
1820 /* Now allocate memory for the attribute list. */ 1810 /* Now allocate memory for the attribute list. */
1821 ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); 1811 ni->attr_list_size = (u32)ntfs_attr_size(a);
1822 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); 1812 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
1823 if (!ni->attr_list) { 1813 if (!ni->attr_list) {
1824 ntfs_error(sb, "Not enough memory to allocate buffer " 1814 ntfs_error(sb, "Not enough memory to allocate buffer "
1825 "for attribute list."); 1815 "for attribute list.");
1826 goto put_err_out; 1816 goto put_err_out;
1827 } 1817 }
1828 if (ctx->attr->non_resident) { 1818 if (a->non_resident) {
1829 NInoSetAttrListNonResident(ni); 1819 NInoSetAttrListNonResident(ni);
1830 if (ctx->attr->data.non_resident.lowest_vcn) { 1820 if (a->data.non_resident.lowest_vcn) {
1831 ntfs_error(sb, "Attribute list has non zero " 1821 ntfs_error(sb, "Attribute list has non zero "
1832 "lowest_vcn. $MFT is corrupt. " 1822 "lowest_vcn. $MFT is corrupt. "
1833 "You should run chkdsk."); 1823 "You should run chkdsk.");
@@ -1835,7 +1825,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1835 } 1825 }
1836 /* Setup the runlist. */ 1826 /* Setup the runlist. */
1837 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, 1827 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
1838 ctx->attr, NULL); 1828 a, NULL);
1839 if (IS_ERR(ni->attr_list_rl.rl)) { 1829 if (IS_ERR(ni->attr_list_rl.rl)) {
1840 err = PTR_ERR(ni->attr_list_rl.rl); 1830 err = PTR_ERR(ni->attr_list_rl.rl);
1841 ni->attr_list_rl.rl = NULL; 1831 ni->attr_list_rl.rl = NULL;
@@ -1847,7 +1837,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1847 /* Now load the attribute list. */ 1837 /* Now load the attribute list. */
1848 if ((err = load_attribute_list(vol, &ni->attr_list_rl, 1838 if ((err = load_attribute_list(vol, &ni->attr_list_rl,
1849 ni->attr_list, ni->attr_list_size, 1839 ni->attr_list, ni->attr_list_size,
1850 sle64_to_cpu(ctx->attr->data. 1840 sle64_to_cpu(a->data.
1851 non_resident.initialized_size)))) { 1841 non_resident.initialized_size)))) {
1852 ntfs_error(sb, "Failed to load attribute list " 1842 ntfs_error(sb, "Failed to load attribute list "
1853 "attribute with error code %i.", 1843 "attribute with error code %i.",
@@ -1855,20 +1845,20 @@ int ntfs_read_inode_mount(struct inode *vi)
1855 goto put_err_out; 1845 goto put_err_out;
1856 } 1846 }
1857 } else /* if (!ctx.attr->non_resident) */ { 1847 } else /* if (!ctx.attr->non_resident) */ {
1858 if ((u8*)ctx->attr + le16_to_cpu( 1848 if ((u8*)a + le16_to_cpu(
1859 ctx->attr->data.resident.value_offset) + 1849 a->data.resident.value_offset) +
1860 le32_to_cpu( 1850 le32_to_cpu(
1861 ctx->attr->data.resident.value_length) > 1851 a->data.resident.value_length) >
1862 (u8*)ctx->mrec + vol->mft_record_size) { 1852 (u8*)ctx->mrec + vol->mft_record_size) {
1863 ntfs_error(sb, "Corrupt attribute list " 1853 ntfs_error(sb, "Corrupt attribute list "
1864 "attribute."); 1854 "attribute.");
1865 goto put_err_out; 1855 goto put_err_out;
1866 } 1856 }
1867 /* Now copy the attribute list. */ 1857 /* Now copy the attribute list. */
1868 memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( 1858 memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
1869 ctx->attr->data.resident.value_offset), 1859 a->data.resident.value_offset),
1870 le32_to_cpu( 1860 le32_to_cpu(
1871 ctx->attr->data.resident.value_length)); 1861 a->data.resident.value_length));
1872 } 1862 }
1873 /* The attribute list is now setup in memory. */ 1863 /* The attribute list is now setup in memory. */
1874 /* 1864 /*
@@ -1934,25 +1924,25 @@ int ntfs_read_inode_mount(struct inode *vi)
1934 ntfs_attr_reinit_search_ctx(ctx); 1924 ntfs_attr_reinit_search_ctx(ctx);
1935 1925
1936 /* Now load all attribute extents. */ 1926 /* Now load all attribute extents. */
1937 attr = NULL; 1927 a = NULL;
1938 next_vcn = last_vcn = highest_vcn = 0; 1928 next_vcn = last_vcn = highest_vcn = 0;
1939 while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, 1929 while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
1940 ctx))) { 1930 ctx))) {
1941 runlist_element *nrl; 1931 runlist_element *nrl;
1942 1932
1943 /* Cache the current attribute. */ 1933 /* Cache the current attribute. */
1944 attr = ctx->attr; 1934 a = ctx->attr;
1945 /* $MFT must be non-resident. */ 1935 /* $MFT must be non-resident. */
1946 if (!attr->non_resident) { 1936 if (!a->non_resident) {
1947 ntfs_error(sb, "$MFT must be non-resident but a " 1937 ntfs_error(sb, "$MFT must be non-resident but a "
1948 "resident extent was found. $MFT is " 1938 "resident extent was found. $MFT is "
1949 "corrupt. Run chkdsk."); 1939 "corrupt. Run chkdsk.");
1950 goto put_err_out; 1940 goto put_err_out;
1951 } 1941 }
1952 /* $MFT must be uncompressed and unencrypted. */ 1942 /* $MFT must be uncompressed and unencrypted. */
1953 if (attr->flags & ATTR_COMPRESSION_MASK || 1943 if (a->flags & ATTR_COMPRESSION_MASK ||
1954 attr->flags & ATTR_IS_ENCRYPTED || 1944 a->flags & ATTR_IS_ENCRYPTED ||
1955 attr->flags & ATTR_IS_SPARSE) { 1945 a->flags & ATTR_IS_SPARSE) {
1956 ntfs_error(sb, "$MFT must be uncompressed, " 1946 ntfs_error(sb, "$MFT must be uncompressed, "
1957 "non-sparse, and unencrypted but a " 1947 "non-sparse, and unencrypted but a "
1958 "compressed/sparse/encrypted extent " 1948 "compressed/sparse/encrypted extent "
@@ -1966,7 +1956,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1966 * as we have exclusive access to the inode at this time and we 1956 * as we have exclusive access to the inode at this time and we
1967 * are a mount in progress task, too. 1957 * are a mount in progress task, too.
1968 */ 1958 */
1969 nrl = ntfs_mapping_pairs_decompress(vol, attr, ni->runlist.rl); 1959 nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
1970 if (IS_ERR(nrl)) { 1960 if (IS_ERR(nrl)) {
1971 ntfs_error(sb, "ntfs_mapping_pairs_decompress() " 1961 ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
1972 "failed with error code %ld. $MFT is " 1962 "failed with error code %ld. $MFT is "
@@ -1977,7 +1967,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1977 1967
1978 /* Are we in the first extent? */ 1968 /* Are we in the first extent? */
1979 if (!next_vcn) { 1969 if (!next_vcn) {
1980 if (attr->data.non_resident.lowest_vcn) { 1970 if (a->data.non_resident.lowest_vcn) {
1981 ntfs_error(sb, "First extent of $DATA " 1971 ntfs_error(sb, "First extent of $DATA "
1982 "attribute has non zero " 1972 "attribute has non zero "
1983 "lowest_vcn. $MFT is corrupt. " 1973 "lowest_vcn. $MFT is corrupt. "
@@ -1986,15 +1976,15 @@ int ntfs_read_inode_mount(struct inode *vi)
1986 } 1976 }
1987 /* Get the last vcn in the $DATA attribute. */ 1977 /* Get the last vcn in the $DATA attribute. */
1988 last_vcn = sle64_to_cpu( 1978 last_vcn = sle64_to_cpu(
1989 attr->data.non_resident.allocated_size) 1979 a->data.non_resident.allocated_size)
1990 >> vol->cluster_size_bits; 1980 >> vol->cluster_size_bits;
1991 /* Fill in the inode size. */ 1981 /* Fill in the inode size. */
1992 vi->i_size = sle64_to_cpu( 1982 vi->i_size = sle64_to_cpu(
1993 attr->data.non_resident.data_size); 1983 a->data.non_resident.data_size);
1994 ni->initialized_size = sle64_to_cpu(attr->data. 1984 ni->initialized_size = sle64_to_cpu(
1995 non_resident.initialized_size); 1985 a->data.non_resident.initialized_size);
1996 ni->allocated_size = sle64_to_cpu( 1986 ni->allocated_size = sle64_to_cpu(
1997 attr->data.non_resident.allocated_size); 1987 a->data.non_resident.allocated_size);
1998 /* 1988 /*
1999 * Verify the number of mft records does not exceed 1989 * Verify the number of mft records does not exceed
2000 * 2^32 - 1. 1990 * 2^32 - 1.
@@ -2051,7 +2041,7 @@ int ntfs_read_inode_mount(struct inode *vi)
2051 } 2041 }
2052 2042
2053 /* Get the lowest vcn for the next extent. */ 2043 /* Get the lowest vcn for the next extent. */
2054 highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); 2044 highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
2055 next_vcn = highest_vcn + 1; 2045 next_vcn = highest_vcn + 1;
2056 2046
2057 /* Only one extent or error, which we catch below. */ 2047 /* Only one extent or error, which we catch below. */
@@ -2060,7 +2050,7 @@ int ntfs_read_inode_mount(struct inode *vi)
2060 2050
2061 /* Avoid endless loops due to corruption. */ 2051 /* Avoid endless loops due to corruption. */
2062 if (next_vcn < sle64_to_cpu( 2052 if (next_vcn < sle64_to_cpu(
2063 attr->data.non_resident.lowest_vcn)) { 2053 a->data.non_resident.lowest_vcn)) {
2064 ntfs_error(sb, "$MFT has corrupt attribute list " 2054 ntfs_error(sb, "$MFT has corrupt attribute list "
2065 "attribute. Run chkdsk."); 2055 "attribute. Run chkdsk.");
2066 goto put_err_out; 2056 goto put_err_out;
@@ -2071,7 +2061,7 @@ int ntfs_read_inode_mount(struct inode *vi)
2071 "$MFT is corrupt. Run chkdsk."); 2061 "$MFT is corrupt. Run chkdsk.");
2072 goto put_err_out; 2062 goto put_err_out;
2073 } 2063 }
2074 if (!attr) { 2064 if (!a) {
2075 ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is " 2065 ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
2076 "corrupt. Run chkdsk."); 2066 "corrupt. Run chkdsk.");
2077 goto put_err_out; 2067 goto put_err_out;
@@ -2275,6 +2265,8 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
2275 seq_printf(sf, ",case_sensitive"); 2265 seq_printf(sf, ",case_sensitive");
2276 if (NVolShowSystemFiles(vol)) 2266 if (NVolShowSystemFiles(vol))
2277 seq_printf(sf, ",show_sys_files"); 2267 seq_printf(sf, ",show_sys_files");
2268 if (!NVolSparseEnabled(vol))
2269 seq_printf(sf, ",disable_sparse");
2278 for (i = 0; on_errors_arr[i].val; i++) { 2270 for (i = 0; on_errors_arr[i].val; i++) {
2279 if (on_errors_arr[i].val & vol->on_errors) 2271 if (on_errors_arr[i].val & vol->on_errors)
2280 seq_printf(sf, ",errors=%s", on_errors_arr[i].str); 2272 seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
@@ -2311,6 +2303,7 @@ int ntfs_truncate(struct inode *vi)
2311 ntfs_volume *vol = ni->vol; 2303 ntfs_volume *vol = ni->vol;
2312 ntfs_attr_search_ctx *ctx; 2304 ntfs_attr_search_ctx *ctx;
2313 MFT_RECORD *m; 2305 MFT_RECORD *m;
2306 ATTR_RECORD *a;
2314 const char *te = " Leaving file length out of sync with i_size."; 2307 const char *te = " Leaving file length out of sync with i_size.";
2315 int err; 2308 int err;
2316 2309
@@ -2347,14 +2340,15 @@ int ntfs_truncate(struct inode *vi)
2347 vi->i_ino, err); 2340 vi->i_ino, err);
2348 goto err_out; 2341 goto err_out;
2349 } 2342 }
2343 a = ctx->attr;
2350 /* If the size has not changed there is nothing to do. */ 2344 /* If the size has not changed there is nothing to do. */
2351 if (ntfs_attr_size(ctx->attr) == i_size_read(vi)) 2345 if (ntfs_attr_size(a) == i_size_read(vi))
2352 goto done; 2346 goto done;
2353 // TODO: Implement the truncate... 2347 // TODO: Implement the truncate...
2354 ntfs_error(vi->i_sb, "Inode size has changed but this is not " 2348 ntfs_error(vi->i_sb, "Inode size has changed but this is not "
2355 "implemented yet. Resetting inode size to old value. " 2349 "implemented yet. Resetting inode size to old value. "
2356 " This is most likely a bug in the ntfs driver!"); 2350 " This is most likely a bug in the ntfs driver!");
2357 i_size_write(vi, ntfs_attr_size(ctx->attr)); 2351 i_size_write(vi, ntfs_attr_size(a));
2358done: 2352done:
2359 ntfs_attr_put_search_ctx(ctx); 2353 ntfs_attr_put_search_ctx(ctx);
2360 unmap_mft_record(ni); 2354 unmap_mft_record(ni);
@@ -2515,18 +2509,18 @@ int ntfs_write_inode(struct inode *vi, int sync)
2515 nt = utc2ntfs(vi->i_mtime); 2509 nt = utc2ntfs(vi->i_mtime);
2516 if (si->last_data_change_time != nt) { 2510 if (si->last_data_change_time != nt) {
2517 ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " 2511 ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
2518 "new = 0x%llx", vi->i_ino, 2512 "new = 0x%llx", vi->i_ino, (long long)
2519 sle64_to_cpu(si->last_data_change_time), 2513 sle64_to_cpu(si->last_data_change_time),
2520 sle64_to_cpu(nt)); 2514 (long long)sle64_to_cpu(nt));
2521 si->last_data_change_time = nt; 2515 si->last_data_change_time = nt;
2522 modified = TRUE; 2516 modified = TRUE;
2523 } 2517 }
2524 nt = utc2ntfs(vi->i_ctime); 2518 nt = utc2ntfs(vi->i_ctime);
2525 if (si->last_mft_change_time != nt) { 2519 if (si->last_mft_change_time != nt) {
2526 ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " 2520 ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
2527 "new = 0x%llx", vi->i_ino, 2521 "new = 0x%llx", vi->i_ino, (long long)
2528 sle64_to_cpu(si->last_mft_change_time), 2522 sle64_to_cpu(si->last_mft_change_time),
2529 sle64_to_cpu(nt)); 2523 (long long)sle64_to_cpu(nt));
2530 si->last_mft_change_time = nt; 2524 si->last_mft_change_time = nt;
2531 modified = TRUE; 2525 modified = TRUE;
2532 } 2526 }
@@ -2534,8 +2528,8 @@ int ntfs_write_inode(struct inode *vi, int sync)
2534 if (si->last_access_time != nt) { 2528 if (si->last_access_time != nt) {
2535 ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " 2529 ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
2536 "new = 0x%llx", vi->i_ino, 2530 "new = 0x%llx", vi->i_ino,
2537 sle64_to_cpu(si->last_access_time), 2531 (long long)sle64_to_cpu(si->last_access_time),
2538 sle64_to_cpu(nt)); 2532 (long long)sle64_to_cpu(nt));
2539 si->last_access_time = nt; 2533 si->last_access_time = nt;
2540 modified = TRUE; 2534 modified = TRUE;
2541 } 2535 }
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 99580455f2ed..3de5c0231966 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -2,7 +2,7 @@
2 * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of 2 * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
3 * the Linux-NTFS project. 3 * the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -44,6 +44,7 @@ typedef struct _ntfs_inode ntfs_inode;
44 * fields already provided in the VFS inode. 44 * fields already provided in the VFS inode.
45 */ 45 */
46struct _ntfs_inode { 46struct _ntfs_inode {
47 rwlock_t size_lock; /* Lock serializing access to inode sizes. */
47 s64 initialized_size; /* Copy from the attribute record. */ 48 s64 initialized_size; /* Copy from the attribute record. */
48 s64 allocated_size; /* Copy from the attribute record. */ 49 s64 allocated_size; /* Copy from the attribute record. */
49 unsigned long state; /* NTFS specific flags describing this inode. 50 unsigned long state; /* NTFS specific flags describing this inode.
@@ -109,7 +110,7 @@ struct _ntfs_inode {
109 u8 block_size_bits; /* Log2 of the above. */ 110 u8 block_size_bits; /* Log2 of the above. */
110 u8 vcn_size_bits; /* Log2 of the above. */ 111 u8 vcn_size_bits; /* Log2 of the above. */
111 } index; 112 } index;
112 struct { /* It is a compressed file or an attribute inode. */ 113 struct { /* It is a compressed/sparse file/attribute inode. */
113 s64 size; /* Copy of compressed_size from 114 s64 size; /* Copy of compressed_size from
114 $DATA. */ 115 $DATA. */
115 u32 block_size; /* Size of a compression block 116 u32 block_size; /* Size of a compression block
@@ -165,6 +166,7 @@ typedef enum {
165 NI_Sparse, /* 1: Unnamed data attr is sparse (f). 166 NI_Sparse, /* 1: Unnamed data attr is sparse (f).
166 1: Create sparse files by default (d). 167 1: Create sparse files by default (d).
167 1: Attribute is sparse (a). */ 168 1: Attribute is sparse (a). */
169 NI_SparseDisabled, /* 1: May not create sparse regions. */
168 NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */ 170 NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */
169} ntfs_inode_state_bits; 171} ntfs_inode_state_bits;
170 172
@@ -217,6 +219,7 @@ NINO_FNS(IndexAllocPresent)
217NINO_FNS(Compressed) 219NINO_FNS(Compressed)
218NINO_FNS(Encrypted) 220NINO_FNS(Encrypted)
219NINO_FNS(Sparse) 221NINO_FNS(Sparse)
222NINO_FNS(SparseDisabled)
220NINO_FNS(TruncateFailed) 223NINO_FNS(TruncateFailed)
221 224
222/* 225/*
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 47b338999921..609ad1728ce4 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2,7 +2,7 @@
2 * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS 2 * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS
3 * project. 3 * project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -547,26 +547,44 @@ enum {
547 COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), 547 COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10),
548 COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), 548 COLLATION_NTOFS_SID = const_cpu_to_le32(0x11),
549 COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), 549 COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12),
550 COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13) 550 COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13),
551}; 551};
552 552
553typedef le32 COLLATION_RULE; 553typedef le32 COLLATION_RULE;
554 554
555/* 555/*
556 * The flags (32-bit) describing attribute properties in the attribute 556 * The flags (32-bit) describing attribute properties in the attribute
557 * definition structure. FIXME: This information is from Regis's information 557 * definition structure. FIXME: This information is based on Regis's
558 * and, according to him, it is not certain and probably incomplete. 558 * information and, according to him, it is not certain and probably
559 * The INDEXABLE flag is fairly certainly correct as only the file name 559 * incomplete. The INDEXABLE flag is fairly certainly correct as only the file
560 * attribute has this flag set and this is the only attribute indexed in NT4. 560 * name attribute has this flag set and this is the only attribute indexed in
561 * NT4.
561 */ 562 */
562enum { 563enum {
563 INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be 564 ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be
564 indexed. */ 565 indexed. */
565 NEED_TO_REGENERATE = const_cpu_to_le32(0x40), /* Need to regenerate 566 ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type
566 during regeneration 567 can be present multiple times in the
567 phase. */ 568 mft records of an inode. */
568 CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be 569 ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value
569 non-resident. */ 570 must contain at least one non-zero
571 byte. */
572 ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be
573 indexed and the attribute value must be
574 unique for the attribute type in all of
575 the mft records of an inode. */
576 ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be
577 named and the name must be unique for
578 the attribute type in all of the mft
579 records of an inode. */
580 ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be
581 resident. */
582 ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log
583 modifications to this attribute,
584 regardless of whether it is resident or
585 non-resident. Without this, only log
586 modifications if the attribute is
587 resident. */
570}; 588};
571 589
572typedef le32 ATTR_DEF_FLAGS; 590typedef le32 ATTR_DEF_FLAGS;
@@ -749,10 +767,11 @@ typedef struct {
749 record header aligned to 8-byte boundary. */ 767 record header aligned to 8-byte boundary. */
750/* 34*/ u8 compression_unit; /* The compression unit expressed 768/* 34*/ u8 compression_unit; /* The compression unit expressed
751 as the log to the base 2 of the number of 769 as the log to the base 2 of the number of
752 clusters in a compression unit. 0 means not 770 clusters in a compression unit. 0 means not
753 compressed. (This effectively limits the 771 compressed. (This effectively limits the
754 compression unit size to be a power of two 772 compression unit size to be a power of two
755 clusters.) WinNT4 only uses a value of 4. */ 773 clusters.) WinNT4 only uses a value of 4.
774 Sparse files also have this set to 4. */
756/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ 775/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */
757/* The sizes below are only used when lowest_vcn is zero, as otherwise it would 776/* The sizes below are only used when lowest_vcn is zero, as otherwise it would
758 be difficult to keep them up-to-date.*/ 777 be difficult to keep them up-to-date.*/
@@ -772,10 +791,10 @@ typedef struct {
772 data_size. */ 791 data_size. */
773/* sizeof(uncompressed attr) = 64*/ 792/* sizeof(uncompressed attr) = 64*/
774/* 64*/ sle64 compressed_size; /* Byte size of the attribute 793/* 64*/ sle64 compressed_size; /* Byte size of the attribute
775 value after compression. Only present when 794 value after compression. Only present when
776 compressed. Always is a multiple of the 795 compressed or sparse. Always is a multiple of
777 cluster size. Represents the actual amount of 796 the cluster size. Represents the actual amount
778 disk space being used on the disk. */ 797 of disk space being used on the disk. */
779/* sizeof(compressed attr) = 72*/ 798/* sizeof(compressed attr) = 72*/
780 } __attribute__ ((__packed__)) non_resident; 799 } __attribute__ ((__packed__)) non_resident;
781 } __attribute__ ((__packed__)) data; 800 } __attribute__ ((__packed__)) data;
@@ -834,7 +853,7 @@ enum {
834 /* Note, this is a copy of the corresponding bit from the mft record, 853 /* Note, this is a copy of the corresponding bit from the mft record,
835 telling us whether this file has a view index present (eg. object id 854 telling us whether this file has a view index present (eg. object id
836 index, quota index, one of the security indexes or the encrypting 855 index, quota index, one of the security indexes or the encrypting
837 file system related indexes). */ 856 filesystem related indexes). */
838}; 857};
839 858
840typedef le32 FILE_ATTR_FLAGS; 859typedef le32 FILE_ATTR_FLAGS;
@@ -917,20 +936,12 @@ typedef struct {
917 /* 56*/ le64 quota_charged; /* Byte size of the charge to 936 /* 56*/ le64 quota_charged; /* Byte size of the charge to
918 the quota for all streams of the file. Note: Is 937 the quota for all streams of the file. Note: Is
919 zero if quotas are disabled. */ 938 zero if quotas are disabled. */
920 /* 64*/ le64 usn; /* Last update sequence number 939 /* 64*/ leUSN usn; /* Last update sequence number
921 of the file. This is a direct index into the 940 of the file. This is a direct index into the
922 change (aka usn) journal file. It is zero if 941 transaction log file ($UsnJrnl). It is zero if
923 the usn journal is disabled. 942 the usn journal is disabled or this file has
924 NOTE: To disable the journal need to delete 943 not been subject to logging yet. See usnjrnl.h
925 the journal file itself and to then walk the 944 for details. */
926 whole mft and set all Usn entries in all mft
927 records to zero! (This can take a while!)
928 The journal is FILE_Extend/$UsnJrnl. Win2k
929 will recreate the journal and initiate
930 logging if necessary when mounting the
931 partition. This, in contrast to disabling the
932 journal is a very fast process, so the user
933 won't even notice it. */
934 } __attribute__ ((__packed__)) v3; 945 } __attribute__ ((__packed__)) v3;
935 /* sizeof() = 72 bytes (NTFS 3.x) */ 946 /* sizeof() = 72 bytes (NTFS 3.x) */
936 } __attribute__ ((__packed__)) ver; 947 } __attribute__ ((__packed__)) ver;
@@ -1893,7 +1904,7 @@ enum {
1893 VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), 1904 VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f),
1894 1905
1895 /* To make our life easier when checking if we must mount read-only. */ 1906 /* To make our life easier when checking if we must mount read-only. */
1896 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8037), 1907 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8027),
1897} __attribute__ ((__packed__)); 1908} __attribute__ ((__packed__));
1898 1909
1899typedef le16 VOLUME_FLAGS; 1910typedef le16 VOLUME_FLAGS;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 23fd911078b1..a4bc07616e5d 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. 2 * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2004 Anton Altaparmakov 4 * Copyright (c) 2004-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -60,7 +60,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
60 if (rl->lcn < 0) 60 if (rl->lcn < 0)
61 continue; 61 continue;
62 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); 62 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
63 if (unlikely(err && (!ret || ret == ENOMEM) && ret != err)) 63 if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
64 ret = err; 64 ret = err;
65 } 65 }
66 ntfs_debug("Done."); 66 ntfs_debug("Done.");
@@ -140,6 +140,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
140 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 140 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
141 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; 141 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
142 s64 clusters; 142 s64 clusters;
143 loff_t i_size;
143 struct inode *lcnbmp_vi; 144 struct inode *lcnbmp_vi;
144 runlist_element *rl = NULL; 145 runlist_element *rl = NULL;
145 struct address_space *mapping; 146 struct address_space *mapping;
@@ -249,6 +250,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
249 clusters = count; 250 clusters = count;
250 rlpos = rlsize = 0; 251 rlpos = rlsize = 0;
251 mapping = lcnbmp_vi->i_mapping; 252 mapping = lcnbmp_vi->i_mapping;
253 i_size = i_size_read(lcnbmp_vi);
252 while (1) { 254 while (1) {
253 ntfs_debug("Start of outer while loop: done_zones 0x%x, " 255 ntfs_debug("Start of outer while loop: done_zones 0x%x, "
254 "search_zone %i, pass %i, zone_start 0x%llx, " 256 "search_zone %i, pass %i, zone_start 0x%llx, "
@@ -263,7 +265,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
263 last_read_pos = bmp_pos >> 3; 265 last_read_pos = bmp_pos >> 3;
264 ntfs_debug("last_read_pos 0x%llx.", 266 ntfs_debug("last_read_pos 0x%llx.",
265 (unsigned long long)last_read_pos); 267 (unsigned long long)last_read_pos);
266 if (last_read_pos > lcnbmp_vi->i_size) { 268 if (last_read_pos > i_size) {
267 ntfs_debug("End of attribute reached. " 269 ntfs_debug("End of attribute reached. "
268 "Skipping to zone_pass_done."); 270 "Skipping to zone_pass_done.");
269 goto zone_pass_done; 271 goto zone_pass_done;
@@ -287,11 +289,11 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
287 buf_size = last_read_pos & ~PAGE_CACHE_MASK; 289 buf_size = last_read_pos & ~PAGE_CACHE_MASK;
288 buf = page_address(page) + buf_size; 290 buf = page_address(page) + buf_size;
289 buf_size = PAGE_CACHE_SIZE - buf_size; 291 buf_size = PAGE_CACHE_SIZE - buf_size;
290 if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size)) 292 if (unlikely(last_read_pos + buf_size > i_size))
291 buf_size = lcnbmp_vi->i_size - last_read_pos; 293 buf_size = i_size - last_read_pos;
292 buf_size <<= 3; 294 buf_size <<= 3;
293 lcn = bmp_pos & 7; 295 lcn = bmp_pos & 7;
294 bmp_pos &= ~7; 296 bmp_pos &= ~(LCN)7;
295 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, " 297 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
296 "bmp_pos 0x%llx, need_writeback %i.", buf_size, 298 "bmp_pos 0x%llx, need_writeback %i.", buf_size,
297 (unsigned long long)lcn, 299 (unsigned long long)lcn,
@@ -309,7 +311,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
309 (unsigned int)*byte); 311 (unsigned int)*byte);
310 /* Skip full bytes. */ 312 /* Skip full bytes. */
311 if (*byte == 0xff) { 313 if (*byte == 0xff) {
312 lcn = (lcn + 8) & ~7; 314 lcn = (lcn + 8) & ~(LCN)7;
313 ntfs_debug("Continuing while loop 1."); 315 ntfs_debug("Continuing while loop 1.");
314 continue; 316 continue;
315 } 317 }
@@ -691,7 +693,7 @@ switch_to_data1_zone: search_zone = 2;
691 if (zone == MFT_ZONE || mft_zone_size <= 0) { 693 if (zone == MFT_ZONE || mft_zone_size <= 0) {
692 ntfs_debug("No free clusters left, going to out."); 694 ntfs_debug("No free clusters left, going to out.");
693 /* Really no more space left on device. */ 695 /* Really no more space left on device. */
694 err = ENOSPC; 696 err = -ENOSPC;
695 goto out; 697 goto out;
696 } /* zone == DATA_ZONE && mft_zone_size > 0 */ 698 } /* zone == DATA_ZONE && mft_zone_size > 0 */
697 ntfs_debug("Shrinking mft zone."); 699 ntfs_debug("Shrinking mft zone.");
@@ -755,13 +757,13 @@ out:
755 if (rl) { 757 if (rl) {
756 int err2; 758 int err2;
757 759
758 if (err == ENOSPC) 760 if (err == -ENOSPC)
759 ntfs_debug("Not enough space to complete allocation, " 761 ntfs_debug("Not enough space to complete allocation, "
760 "err ENOSPC, first free lcn 0x%llx, " 762 "err -ENOSPC, first free lcn 0x%llx, "
761 "could allocate up to 0x%llx " 763 "could allocate up to 0x%llx "
762 "clusters.", 764 "clusters.",
763 (unsigned long long)rl[0].lcn, 765 (unsigned long long)rl[0].lcn,
764 (unsigned long long)count - clusters); 766 (unsigned long long)(count - clusters));
765 /* Deallocate all allocated clusters. */ 767 /* Deallocate all allocated clusters. */
766 ntfs_debug("Attempting rollback..."); 768 ntfs_debug("Attempting rollback...");
767 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); 769 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
@@ -773,10 +775,10 @@ out:
773 } 775 }
774 /* Free the runlist. */ 776 /* Free the runlist. */
775 ntfs_free(rl); 777 ntfs_free(rl);
776 } else if (err == ENOSPC) 778 } else if (err == -ENOSPC)
777 ntfs_debug("No space left at all, err = ENOSPC, " 779 ntfs_debug("No space left at all, err = -ENOSPC, first free "
778 "first free lcn = 0x%llx.", 780 "lcn = 0x%llx.",
779 (unsigned long long)vol->data1_zone_pos); 781 (long long)vol->data1_zone_pos);
780 up_write(&vol->lcnbmp_lock); 782 up_write(&vol->lcnbmp_lock);
781 return ERR_PTR(err); 783 return ERR_PTR(err);
782} 784}
@@ -846,8 +848,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
846 848
847 total_freed = real_freed = 0; 849 total_freed = real_freed = 0;
848 850
849 /* This returns with ni->runlist locked for reading on success. */ 851 down_read(&ni->runlist.lock);
850 rl = ntfs_find_vcn(ni, start_vcn, FALSE); 852 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
851 if (IS_ERR(rl)) { 853 if (IS_ERR(rl)) {
852 if (!is_rollback) 854 if (!is_rollback)
853 ntfs_error(vol->sb, "Failed to find first runlist " 855 ntfs_error(vol->sb, "Failed to find first runlist "
@@ -861,7 +863,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
861 ntfs_error(vol->sb, "First runlist element has " 863 ntfs_error(vol->sb, "First runlist element has "
862 "invalid lcn, aborting."); 864 "invalid lcn, aborting.");
863 err = -EIO; 865 err = -EIO;
864 goto unl_err_out; 866 goto err_out;
865 } 867 }
866 /* Find the starting cluster inside the run that needs freeing. */ 868 /* Find the starting cluster inside the run that needs freeing. */
867 delta = start_vcn - rl->vcn; 869 delta = start_vcn - rl->vcn;
@@ -879,7 +881,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
879 if (!is_rollback) 881 if (!is_rollback)
880 ntfs_error(vol->sb, "Failed to clear first run " 882 ntfs_error(vol->sb, "Failed to clear first run "
881 "(error %i), aborting.", err); 883 "(error %i), aborting.", err);
882 goto unl_err_out; 884 goto err_out;
883 } 885 }
884 /* We have freed @to_free real clusters. */ 886 /* We have freed @to_free real clusters. */
885 real_freed = to_free; 887 real_freed = to_free;
@@ -899,30 +901,15 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
899 if (unlikely(rl->lcn < LCN_HOLE)) { 901 if (unlikely(rl->lcn < LCN_HOLE)) {
900 VCN vcn; 902 VCN vcn;
901 903
902 /* 904 /* Attempt to map runlist. */
903 * Attempt to map runlist, dropping runlist lock for
904 * the duration.
905 */
906 vcn = rl->vcn; 905 vcn = rl->vcn;
907 up_read(&ni->runlist.lock); 906 rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE);
908 err = ntfs_map_runlist(ni, vcn);
909 if (err) {
910 if (!is_rollback)
911 ntfs_error(vol->sb, "Failed to map "
912 "runlist fragment.");
913 if (err == -EINVAL || err == -ENOENT)
914 err = -EIO;
915 goto err_out;
916 }
917 /*
918 * This returns with ni->runlist locked for reading on
919 * success.
920 */
921 rl = ntfs_find_vcn(ni, vcn, FALSE);
922 if (IS_ERR(rl)) { 907 if (IS_ERR(rl)) {
923 err = PTR_ERR(rl); 908 err = PTR_ERR(rl);
924 if (!is_rollback) 909 if (!is_rollback)
925 ntfs_error(vol->sb, "Failed to find " 910 ntfs_error(vol->sb, "Failed to map "
911 "runlist fragment or "
912 "failed to find "
926 "subsequent runlist " 913 "subsequent runlist "
927 "element."); 914 "element.");
928 goto err_out; 915 goto err_out;
@@ -935,7 +922,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
935 (unsigned long long) 922 (unsigned long long)
936 rl->lcn); 923 rl->lcn);
937 err = -EIO; 924 err = -EIO;
938 goto unl_err_out; 925 goto err_out;
939 } 926 }
940 } 927 }
941 /* The number of clusters in this run that need freeing. */ 928 /* The number of clusters in this run that need freeing. */
@@ -951,7 +938,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
951 if (!is_rollback) 938 if (!is_rollback)
952 ntfs_error(vol->sb, "Failed to clear " 939 ntfs_error(vol->sb, "Failed to clear "
953 "subsequent run."); 940 "subsequent run.");
954 goto unl_err_out; 941 goto err_out;
955 } 942 }
956 /* We have freed @to_free real clusters. */ 943 /* We have freed @to_free real clusters. */
957 real_freed += to_free; 944 real_freed += to_free;
@@ -972,9 +959,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
972 /* We are done. Return the number of actually freed clusters. */ 959 /* We are done. Return the number of actually freed clusters. */
973 ntfs_debug("Done."); 960 ntfs_debug("Done.");
974 return real_freed; 961 return real_freed;
975unl_err_out:
976 up_read(&ni->runlist.lock);
977err_out: 962err_out:
963 up_read(&ni->runlist.lock);
978 if (is_rollback) 964 if (is_rollback)
979 return err; 965 return err;
980 /* If no real clusters were freed, no need to rollback. */ 966 /* If no real clusters were freed, no need to rollback. */
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 5e280abafab3..8edb8e20fb08 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. 2 * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2002-2004 Anton Altaparmakov 4 * Copyright (c) 2002-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -410,7 +410,7 @@ err_out:
410} 410}
411 411
412/** 412/**
413 * ntfs_ckeck_logfile - check in the journal if the volume is consistent 413 * ntfs_check_logfile - check the journal for consistency
414 * @log_vi: struct inode of loaded journal $LogFile to check 414 * @log_vi: struct inode of loaded journal $LogFile to check
415 * 415 *
416 * Check the $LogFile journal for consistency and return TRUE if it is 416 * Check the $LogFile journal for consistency and return TRUE if it is
@@ -443,7 +443,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
443 /* An empty $LogFile must have been clean before it got emptied. */ 443 /* An empty $LogFile must have been clean before it got emptied. */
444 if (NVolLogFileEmpty(vol)) 444 if (NVolLogFileEmpty(vol))
445 goto is_empty; 445 goto is_empty;
446 size = log_vi->i_size; 446 size = i_size_read(log_vi);
447 /* Make sure the file doesn't exceed the maximum allowed size. */ 447 /* Make sure the file doesn't exceed the maximum allowed size. */
448 if (size > MaxLogFileSize) 448 if (size > MaxLogFileSize)
449 size = MaxLogFileSize; 449 size = MaxLogFileSize;
@@ -464,7 +464,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
464 * optimize log_page_size and log_page_bits into constants. 464 * optimize log_page_size and log_page_bits into constants.
465 */ 465 */
466 log_page_bits = generic_ffs(log_page_size) - 1; 466 log_page_bits = generic_ffs(log_page_size) - 1;
467 size &= ~(log_page_size - 1); 467 size &= ~(s64)(log_page_size - 1);
468 /* 468 /*
469 * Ensure the log file is big enough to store at least the two restart 469 * Ensure the log file is big enough to store at least the two restart
470 * pages and the minimum number of log record pages. 470 * pages and the minimum number of log record pages.
@@ -689,7 +689,8 @@ BOOL ntfs_empty_logfile(struct inode *log_vi)
689 if (!NVolLogFileEmpty(vol)) { 689 if (!NVolLogFileEmpty(vol)) {
690 int err; 690 int err;
691 691
692 err = ntfs_attr_set(NTFS_I(log_vi), 0, log_vi->i_size, 0xff); 692 err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi),
693 0xff);
693 if (unlikely(err)) { 694 if (unlikely(err)) {
694 ntfs_error(vol->sb, "Failed to fill $LogFile with " 695 ntfs_error(vol->sb, "Failed to fill $LogFile with "
695 "0xff bytes (error code %i).", err); 696 "0xff bytes (error code %i).", err);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index dfa85ac2f8ba..ac9ff39aa834 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -45,6 +45,7 @@
45 */ 45 */
46static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) 46static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
47{ 47{
48 loff_t i_size;
48 ntfs_volume *vol = ni->vol; 49 ntfs_volume *vol = ni->vol;
49 struct inode *mft_vi = vol->mft_ino; 50 struct inode *mft_vi = vol->mft_ino;
50 struct page *page; 51 struct page *page;
@@ -60,13 +61,14 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
60 index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; 61 index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
61 ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 62 ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
62 63
64 i_size = i_size_read(mft_vi);
63 /* The maximum valid index into the page cache for $MFT's data. */ 65 /* The maximum valid index into the page cache for $MFT's data. */
64 end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; 66 end_index = i_size >> PAGE_CACHE_SHIFT;
65 67
66 /* If the wanted index is out of bounds the mft record doesn't exist. */ 68 /* If the wanted index is out of bounds the mft record doesn't exist. */
67 if (unlikely(index >= end_index)) { 69 if (unlikely(index >= end_index)) {
68 if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < 70 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
69 ofs + vol->mft_record_size) { 71 vol->mft_record_size) {
70 page = ERR_PTR(-ENOENT); 72 page = ERR_PTR(-ENOENT);
71 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " 73 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, "
72 "which is beyond the end of the mft. " 74 "which is beyond the end of the mft. "
@@ -285,7 +287,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
285 } 287 }
286 unmap_mft_record(ni); 288 unmap_mft_record(ni);
287 ntfs_error(base_ni->vol->sb, "Found stale extent mft " 289 ntfs_error(base_ni->vol->sb, "Found stale extent mft "
288 "reference! Corrupt file system. " 290 "reference! Corrupt filesystem. "
289 "Run chkdsk."); 291 "Run chkdsk.");
290 return ERR_PTR(-EIO); 292 return ERR_PTR(-EIO);
291 } 293 }
@@ -316,7 +318,7 @@ map_err_out:
316 /* Verify the sequence number if it is present. */ 318 /* Verify the sequence number if it is present. */
317 if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { 319 if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
318 ntfs_error(base_ni->vol->sb, "Found stale extent mft " 320 ntfs_error(base_ni->vol->sb, "Found stale extent mft "
319 "reference! Corrupt file system. Run chkdsk."); 321 "reference! Corrupt filesystem. Run chkdsk.");
320 destroy_ni = TRUE; 322 destroy_ni = TRUE;
321 m = ERR_PTR(-EIO); 323 m = ERR_PTR(-EIO);
322 goto unm_err_out; 324 goto unm_err_out;
@@ -946,20 +948,23 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
946 na.name_len = 0; 948 na.name_len = 0;
947 na.type = AT_UNUSED; 949 na.type = AT_UNUSED;
948 /* 950 /*
949 * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from here or 951 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
950 * we deadlock because the inode is already locked by the kernel 952 * we get here for it rather often.
951 * (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits
952 * until the inode is unlocked before returning it and it never gets
953 * unlocked because ntfs_should_write_mft_record() never returns. )-:
954 * Fortunately, we have inode 0 pinned in icache for the duration of
955 * the mount so we can access it directly.
956 */ 953 */
957 if (!mft_no) { 954 if (!mft_no) {
958 /* Balance the below iput(). */ 955 /* Balance the below iput(). */
959 vi = igrab(mft_vi); 956 vi = igrab(mft_vi);
960 BUG_ON(vi != mft_vi); 957 BUG_ON(vi != mft_vi);
961 } else 958 } else {
962 vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na); 959 /*
960 * Have to use ilookup5_nowait() since ilookup5() waits for the
961 * inode lock which causes ntfs to deadlock when a concurrent
962 * inode write via the inode dirty code paths and the page
963 * dirty code path of the inode dirty code path when writing
964 * $MFT occurs.
965 */
966 vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na);
967 }
963 if (vi) { 968 if (vi) {
964 ntfs_debug("Base inode 0x%lx is in icache.", mft_no); 969 ntfs_debug("Base inode 0x%lx is in icache.", mft_no);
965 /* The inode is in icache. */ 970 /* The inode is in icache. */
@@ -1014,7 +1019,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
1014 na.mft_no = MREF_LE(m->base_mft_record); 1019 na.mft_no = MREF_LE(m->base_mft_record);
1015 ntfs_debug("Mft record 0x%lx is an extent record. Looking for base " 1020 ntfs_debug("Mft record 0x%lx is an extent record. Looking for base "
1016 "inode 0x%lx in icache.", mft_no, na.mft_no); 1021 "inode 0x%lx in icache.", mft_no, na.mft_no);
1017 vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode, &na); 1022 if (!na.mft_no) {
1023 /* Balance the below iput(). */
1024 vi = igrab(mft_vi);
1025 BUG_ON(vi != mft_vi);
1026 } else
1027 vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode,
1028 &na);
1018 if (!vi) { 1029 if (!vi) {
1019 /* 1030 /*
1020 * The base inode is not in icache, write this extent mft 1031 * The base inode is not in icache, write this extent mft
@@ -1121,6 +1132,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1121 ntfs_inode *base_ni) 1132 ntfs_inode *base_ni)
1122{ 1133{
1123 s64 pass_end, ll, data_pos, pass_start, ofs, bit; 1134 s64 pass_end, ll, data_pos, pass_start, ofs, bit;
1135 unsigned long flags;
1124 struct address_space *mftbmp_mapping; 1136 struct address_space *mftbmp_mapping;
1125 u8 *buf, *byte; 1137 u8 *buf, *byte;
1126 struct page *page; 1138 struct page *page;
@@ -1134,9 +1146,13 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1134 * Set the end of the pass making sure we do not overflow the mft 1146 * Set the end of the pass making sure we do not overflow the mft
1135 * bitmap. 1147 * bitmap.
1136 */ 1148 */
1149 read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
1137 pass_end = NTFS_I(vol->mft_ino)->allocated_size >> 1150 pass_end = NTFS_I(vol->mft_ino)->allocated_size >>
1138 vol->mft_record_size_bits; 1151 vol->mft_record_size_bits;
1152 read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags);
1153 read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
1139 ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; 1154 ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3;
1155 read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
1140 if (pass_end > ll) 1156 if (pass_end > ll)
1141 pass_end = ll; 1157 pass_end = ll;
1142 pass = 1; 1158 pass = 1;
@@ -1263,6 +1279,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1263{ 1279{
1264 LCN lcn; 1280 LCN lcn;
1265 s64 ll; 1281 s64 ll;
1282 unsigned long flags;
1266 struct page *page; 1283 struct page *page;
1267 ntfs_inode *mft_ni, *mftbmp_ni; 1284 ntfs_inode *mft_ni, *mftbmp_ni;
1268 runlist_element *rl, *rl2 = NULL; 1285 runlist_element *rl, *rl2 = NULL;
@@ -1284,17 +1301,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1284 /* 1301 /*
1285 * Determine the last lcn of the mft bitmap. The allocated size of the 1302 * Determine the last lcn of the mft bitmap. The allocated size of the
1286 * mft bitmap cannot be zero so we are ok to do this. 1303 * mft bitmap cannot be zero so we are ok to do this.
1287 * ntfs_find_vcn() returns the runlist locked on success.
1288 */ 1304 */
1289 rl = ntfs_find_vcn(mftbmp_ni, (mftbmp_ni->allocated_size - 1) >> 1305 down_write(&mftbmp_ni->runlist.lock);
1290 vol->cluster_size_bits, TRUE); 1306 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
1307 ll = mftbmp_ni->allocated_size;
1308 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1309 rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
1310 (ll - 1) >> vol->cluster_size_bits, TRUE);
1291 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { 1311 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
1312 up_write(&mftbmp_ni->runlist.lock);
1292 ntfs_error(vol->sb, "Failed to determine last allocated " 1313 ntfs_error(vol->sb, "Failed to determine last allocated "
1293 "cluster of mft bitmap attribute."); 1314 "cluster of mft bitmap attribute.");
1294 if (!IS_ERR(rl)) { 1315 if (!IS_ERR(rl))
1295 up_write(&mftbmp_ni->runlist.lock);
1296 ret = -EIO; 1316 ret = -EIO;
1297 } else 1317 else
1298 ret = PTR_ERR(rl); 1318 ret = PTR_ERR(rl);
1299 return ret; 1319 return ret;
1300 } 1320 }
@@ -1396,7 +1416,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1396 BUG_ON(ll < rl2->vcn); 1416 BUG_ON(ll < rl2->vcn);
1397 BUG_ON(ll >= rl2->vcn + rl2->length); 1417 BUG_ON(ll >= rl2->vcn + rl2->length);
1398 /* Get the size for the new mapping pairs array for this extent. */ 1418 /* Get the size for the new mapping pairs array for this extent. */
1399 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); 1419 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
1400 if (unlikely(mp_size <= 0)) { 1420 if (unlikely(mp_size <= 0)) {
1401 ntfs_error(vol->sb, "Get size for mapping pairs failed for " 1421 ntfs_error(vol->sb, "Get size for mapping pairs failed for "
1402 "mft bitmap attribute extent."); 1422 "mft bitmap attribute extent.");
@@ -1418,6 +1438,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1418 // TODO: Deal with this by moving this extent to a new mft 1438 // TODO: Deal with this by moving this extent to a new mft
1419 // record or by starting a new extent in a new mft record or by 1439 // record or by starting a new extent in a new mft record or by
1420 // moving other attributes out of this mft record. 1440 // moving other attributes out of this mft record.
1441 // Note: It will need to be a special mft record and if none of
1442 // those are available it gets rather complicated...
1421 ntfs_error(vol->sb, "Not enough space in this mft record to " 1443 ntfs_error(vol->sb, "Not enough space in this mft record to "
1422 "accomodate extended mft bitmap attribute " 1444 "accomodate extended mft bitmap attribute "
1423 "extent. Cannot handle this yet."); 1445 "extent. Cannot handle this yet.");
@@ -1428,7 +1450,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1428 /* Generate the mapping pairs array directly into the attr record. */ 1450 /* Generate the mapping pairs array directly into the attr record. */
1429 ret = ntfs_mapping_pairs_build(vol, (u8*)a + 1451 ret = ntfs_mapping_pairs_build(vol, (u8*)a +
1430 le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1452 le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
1431 mp_size, rl2, ll, NULL); 1453 mp_size, rl2, ll, -1, NULL);
1432 if (unlikely(ret)) { 1454 if (unlikely(ret)) {
1433 ntfs_error(vol->sb, "Failed to build mapping pairs array for " 1455 ntfs_error(vol->sb, "Failed to build mapping pairs array for "
1434 "mft bitmap attribute."); 1456 "mft bitmap attribute.");
@@ -1458,9 +1480,11 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1458 } 1480 }
1459 a = ctx->attr; 1481 a = ctx->attr;
1460 } 1482 }
1483 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1461 mftbmp_ni->allocated_size += vol->cluster_size; 1484 mftbmp_ni->allocated_size += vol->cluster_size;
1462 a->data.non_resident.allocated_size = 1485 a->data.non_resident.allocated_size =
1463 cpu_to_sle64(mftbmp_ni->allocated_size); 1486 cpu_to_sle64(mftbmp_ni->allocated_size);
1487 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1464 /* Ensure the changes make it to disk. */ 1488 /* Ensure the changes make it to disk. */
1465 flush_dcache_mft_record_page(ctx->ntfs_ino); 1489 flush_dcache_mft_record_page(ctx->ntfs_ino);
1466 mark_mft_record_dirty(ctx->ntfs_ino); 1490 mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1476,7 +1500,9 @@ restore_undo_alloc:
1476 0, ctx)) { 1500 0, ctx)) {
1477 ntfs_error(vol->sb, "Failed to find last attribute extent of " 1501 ntfs_error(vol->sb, "Failed to find last attribute extent of "
1478 "mft bitmap attribute.%s", es); 1502 "mft bitmap attribute.%s", es);
1503 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1479 mftbmp_ni->allocated_size += vol->cluster_size; 1504 mftbmp_ni->allocated_size += vol->cluster_size;
1505 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1480 ntfs_attr_put_search_ctx(ctx); 1506 ntfs_attr_put_search_ctx(ctx);
1481 unmap_mft_record(mft_ni); 1507 unmap_mft_record(mft_ni);
1482 up_write(&mftbmp_ni->runlist.lock); 1508 up_write(&mftbmp_ni->runlist.lock);
@@ -1512,7 +1538,7 @@ undo_alloc:
1512 a->data.non_resident.mapping_pairs_offset), 1538 a->data.non_resident.mapping_pairs_offset),
1513 old_alen - le16_to_cpu( 1539 old_alen - le16_to_cpu(
1514 a->data.non_resident.mapping_pairs_offset), 1540 a->data.non_resident.mapping_pairs_offset),
1515 rl2, ll, NULL)) { 1541 rl2, ll, -1, NULL)) {
1516 ntfs_error(vol->sb, "Failed to restore mapping pairs " 1542 ntfs_error(vol->sb, "Failed to restore mapping pairs "
1517 "array.%s", es); 1543 "array.%s", es);
1518 NVolSetErrors(vol); 1544 NVolSetErrors(vol);
@@ -1550,6 +1576,7 @@ undo_alloc:
1550static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) 1576static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
1551{ 1577{
1552 s64 old_data_size, old_initialized_size; 1578 s64 old_data_size, old_initialized_size;
1579 unsigned long flags;
1553 struct inode *mftbmp_vi; 1580 struct inode *mftbmp_vi;
1554 ntfs_inode *mft_ni, *mftbmp_ni; 1581 ntfs_inode *mft_ni, *mftbmp_ni;
1555 ntfs_attr_search_ctx *ctx; 1582 ntfs_attr_search_ctx *ctx;
@@ -1583,7 +1610,8 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
1583 goto put_err_out; 1610 goto put_err_out;
1584 } 1611 }
1585 a = ctx->attr; 1612 a = ctx->attr;
1586 old_data_size = mftbmp_vi->i_size; 1613 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1614 old_data_size = i_size_read(mftbmp_vi);
1587 old_initialized_size = mftbmp_ni->initialized_size; 1615 old_initialized_size = mftbmp_ni->initialized_size;
1588 /* 1616 /*
1589 * We can simply update the initialized_size before filling the space 1617 * We can simply update the initialized_size before filling the space
@@ -1593,11 +1621,12 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
1593 mftbmp_ni->initialized_size += 8; 1621 mftbmp_ni->initialized_size += 8;
1594 a->data.non_resident.initialized_size = 1622 a->data.non_resident.initialized_size =
1595 cpu_to_sle64(mftbmp_ni->initialized_size); 1623 cpu_to_sle64(mftbmp_ni->initialized_size);
1596 if (mftbmp_ni->initialized_size > mftbmp_vi->i_size) { 1624 if (mftbmp_ni->initialized_size > old_data_size) {
1597 mftbmp_vi->i_size = mftbmp_ni->initialized_size; 1625 i_size_write(mftbmp_vi, mftbmp_ni->initialized_size);
1598 a->data.non_resident.data_size = 1626 a->data.non_resident.data_size =
1599 cpu_to_sle64(mftbmp_vi->i_size); 1627 cpu_to_sle64(mftbmp_ni->initialized_size);
1600 } 1628 }
1629 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1601 /* Ensure the changes make it to disk. */ 1630 /* Ensure the changes make it to disk. */
1602 flush_dcache_mft_record_page(ctx->ntfs_ino); 1631 flush_dcache_mft_record_page(ctx->ntfs_ino);
1603 mark_mft_record_dirty(ctx->ntfs_ino); 1632 mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1636,22 +1665,28 @@ unm_err_out:
1636 goto err_out; 1665 goto err_out;
1637 } 1666 }
1638 a = ctx->attr; 1667 a = ctx->attr;
1668 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1639 mftbmp_ni->initialized_size = old_initialized_size; 1669 mftbmp_ni->initialized_size = old_initialized_size;
1640 a->data.non_resident.initialized_size = 1670 a->data.non_resident.initialized_size =
1641 cpu_to_sle64(old_initialized_size); 1671 cpu_to_sle64(old_initialized_size);
1642 if (mftbmp_vi->i_size != old_data_size) { 1672 if (i_size_read(mftbmp_vi) != old_data_size) {
1643 mftbmp_vi->i_size = old_data_size; 1673 i_size_write(mftbmp_vi, old_data_size);
1644 a->data.non_resident.data_size = cpu_to_sle64(old_data_size); 1674 a->data.non_resident.data_size = cpu_to_sle64(old_data_size);
1645 } 1675 }
1676 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1646 flush_dcache_mft_record_page(ctx->ntfs_ino); 1677 flush_dcache_mft_record_page(ctx->ntfs_ino);
1647 mark_mft_record_dirty(ctx->ntfs_ino); 1678 mark_mft_record_dirty(ctx->ntfs_ino);
1648 ntfs_attr_put_search_ctx(ctx); 1679 ntfs_attr_put_search_ctx(ctx);
1649 unmap_mft_record(mft_ni); 1680 unmap_mft_record(mft_ni);
1681#ifdef DEBUG
1682 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
1650 ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " 1683 ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, "
1651 "data_size 0x%llx, initialized_size 0x%llx.", 1684 "data_size 0x%llx, initialized_size 0x%llx.",
1652 (long long)mftbmp_ni->allocated_size, 1685 (long long)mftbmp_ni->allocated_size,
1653 (long long)mftbmp_vi->i_size, 1686 (long long)i_size_read(mftbmp_vi),
1654 (long long)mftbmp_ni->initialized_size); 1687 (long long)mftbmp_ni->initialized_size);
1688 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1689#endif /* DEBUG */
1655err_out: 1690err_out:
1656 return ret; 1691 return ret;
1657} 1692}
@@ -1679,7 +1714,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1679{ 1714{
1680 LCN lcn; 1715 LCN lcn;
1681 VCN old_last_vcn; 1716 VCN old_last_vcn;
1682 s64 min_nr, nr, ll = 0; 1717 s64 min_nr, nr, ll;
1718 unsigned long flags;
1683 ntfs_inode *mft_ni; 1719 ntfs_inode *mft_ni;
1684 runlist_element *rl, *rl2; 1720 runlist_element *rl, *rl2;
1685 ntfs_attr_search_ctx *ctx = NULL; 1721 ntfs_attr_search_ctx *ctx = NULL;
@@ -1695,23 +1731,25 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1695 * Determine the preferred allocation location, i.e. the last lcn of 1731 * Determine the preferred allocation location, i.e. the last lcn of
1696 * the mft data attribute. The allocated size of the mft data 1732 * the mft data attribute. The allocated size of the mft data
1697 * attribute cannot be zero so we are ok to do this. 1733 * attribute cannot be zero so we are ok to do this.
1698 * ntfs_find_vcn() returns the runlist locked on success.
1699 */ 1734 */
1700 rl = ntfs_find_vcn(mft_ni, (mft_ni->allocated_size - 1) >> 1735 down_write(&mft_ni->runlist.lock);
1701 vol->cluster_size_bits, TRUE); 1736 read_lock_irqsave(&mft_ni->size_lock, flags);
1737 ll = mft_ni->allocated_size;
1738 read_unlock_irqrestore(&mft_ni->size_lock, flags);
1739 rl = ntfs_attr_find_vcn_nolock(mft_ni,
1740 (ll - 1) >> vol->cluster_size_bits, TRUE);
1702 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { 1741 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
1742 up_write(&mft_ni->runlist.lock);
1703 ntfs_error(vol->sb, "Failed to determine last allocated " 1743 ntfs_error(vol->sb, "Failed to determine last allocated "
1704 "cluster of mft data attribute."); 1744 "cluster of mft data attribute.");
1705 if (!IS_ERR(rl)) { 1745 if (!IS_ERR(rl))
1706 up_write(&mft_ni->runlist.lock);
1707 ret = -EIO; 1746 ret = -EIO;
1708 } else 1747 else
1709 ret = PTR_ERR(rl); 1748 ret = PTR_ERR(rl);
1710 return ret; 1749 return ret;
1711 } 1750 }
1712 lcn = rl->lcn + rl->length; 1751 lcn = rl->lcn + rl->length;
1713 ntfs_debug("Last lcn of mft data attribute is 0x%llx.", 1752 ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn);
1714 (long long)lcn);
1715 /* Minimum allocation is one mft record worth of clusters. */ 1753 /* Minimum allocation is one mft record worth of clusters. */
1716 min_nr = vol->mft_record_size >> vol->cluster_size_bits; 1754 min_nr = vol->mft_record_size >> vol->cluster_size_bits;
1717 if (!min_nr) 1755 if (!min_nr)
@@ -1721,12 +1759,13 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1721 if (!nr) 1759 if (!nr)
1722 nr = min_nr; 1760 nr = min_nr;
1723 /* Ensure we do not go above 2^32-1 mft records. */ 1761 /* Ensure we do not go above 2^32-1 mft records. */
1724 if (unlikely((mft_ni->allocated_size + 1762 read_lock_irqsave(&mft_ni->size_lock, flags);
1725 (nr << vol->cluster_size_bits)) >> 1763 ll = mft_ni->allocated_size;
1764 read_unlock_irqrestore(&mft_ni->size_lock, flags);
1765 if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
1726 vol->mft_record_size_bits >= (1ll << 32))) { 1766 vol->mft_record_size_bits >= (1ll << 32))) {
1727 nr = min_nr; 1767 nr = min_nr;
1728 if (unlikely((mft_ni->allocated_size + 1768 if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
1729 (nr << vol->cluster_size_bits)) >>
1730 vol->mft_record_size_bits >= (1ll << 32))) { 1769 vol->mft_record_size_bits >= (1ll << 32))) {
1731 ntfs_warning(vol->sb, "Cannot allocate mft record " 1770 ntfs_warning(vol->sb, "Cannot allocate mft record "
1732 "because the maximum number of inodes " 1771 "because the maximum number of inodes "
@@ -1772,7 +1811,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1772 return PTR_ERR(rl); 1811 return PTR_ERR(rl);
1773 } 1812 }
1774 mft_ni->runlist.rl = rl; 1813 mft_ni->runlist.rl = rl;
1775 ntfs_debug("Allocated %lli clusters.", nr); 1814 ntfs_debug("Allocated %lli clusters.", (long long)nr);
1776 /* Find the last run in the new runlist. */ 1815 /* Find the last run in the new runlist. */
1777 for (; rl[1].length; rl++) 1816 for (; rl[1].length; rl++)
1778 ; 1817 ;
@@ -1808,7 +1847,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1808 BUG_ON(ll < rl2->vcn); 1847 BUG_ON(ll < rl2->vcn);
1809 BUG_ON(ll >= rl2->vcn + rl2->length); 1848 BUG_ON(ll >= rl2->vcn + rl2->length);
1810 /* Get the size for the new mapping pairs array for this extent. */ 1849 /* Get the size for the new mapping pairs array for this extent. */
1811 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); 1850 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
1812 if (unlikely(mp_size <= 0)) { 1851 if (unlikely(mp_size <= 0)) {
1813 ntfs_error(vol->sb, "Get size for mapping pairs failed for " 1852 ntfs_error(vol->sb, "Get size for mapping pairs failed for "
1814 "mft data attribute extent."); 1853 "mft data attribute extent.");
@@ -1832,7 +1871,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1832 // moving other attributes out of this mft record. 1871 // moving other attributes out of this mft record.
1833 // Note: Use the special reserved mft records and ensure that 1872 // Note: Use the special reserved mft records and ensure that
1834 // this extent is not required to find the mft record in 1873 // this extent is not required to find the mft record in
1835 // question. 1874 // question. If no free special records left we would need to
1875 // move an existing record away, insert ours in its place, and
1876 // then place the moved record into the newly allocated space
1877 // and we would then need to update all references to this mft
1878 // record appropriately. This is rather complicated...
1836 ntfs_error(vol->sb, "Not enough space in this mft record to " 1879 ntfs_error(vol->sb, "Not enough space in this mft record to "
1837 "accomodate extended mft data attribute " 1880 "accomodate extended mft data attribute "
1838 "extent. Cannot handle this yet."); 1881 "extent. Cannot handle this yet.");
@@ -1843,7 +1886,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1843 /* Generate the mapping pairs array directly into the attr record. */ 1886 /* Generate the mapping pairs array directly into the attr record. */
1844 ret = ntfs_mapping_pairs_build(vol, (u8*)a + 1887 ret = ntfs_mapping_pairs_build(vol, (u8*)a +
1845 le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1888 le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
1846 mp_size, rl2, ll, NULL); 1889 mp_size, rl2, ll, -1, NULL);
1847 if (unlikely(ret)) { 1890 if (unlikely(ret)) {
1848 ntfs_error(vol->sb, "Failed to build mapping pairs array of " 1891 ntfs_error(vol->sb, "Failed to build mapping pairs array of "
1849 "mft data attribute."); 1892 "mft data attribute.");
@@ -1875,9 +1918,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1875 } 1918 }
1876 a = ctx->attr; 1919 a = ctx->attr;
1877 } 1920 }
1921 write_lock_irqsave(&mft_ni->size_lock, flags);
1878 mft_ni->allocated_size += nr << vol->cluster_size_bits; 1922 mft_ni->allocated_size += nr << vol->cluster_size_bits;
1879 a->data.non_resident.allocated_size = 1923 a->data.non_resident.allocated_size =
1880 cpu_to_sle64(mft_ni->allocated_size); 1924 cpu_to_sle64(mft_ni->allocated_size);
1925 write_unlock_irqrestore(&mft_ni->size_lock, flags);
1881 /* Ensure the changes make it to disk. */ 1926 /* Ensure the changes make it to disk. */
1882 flush_dcache_mft_record_page(ctx->ntfs_ino); 1927 flush_dcache_mft_record_page(ctx->ntfs_ino);
1883 mark_mft_record_dirty(ctx->ntfs_ino); 1928 mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1892,7 +1937,9 @@ restore_undo_alloc:
1892 CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { 1937 CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) {
1893 ntfs_error(vol->sb, "Failed to find last attribute extent of " 1938 ntfs_error(vol->sb, "Failed to find last attribute extent of "
1894 "mft data attribute.%s", es); 1939 "mft data attribute.%s", es);
1940 write_lock_irqsave(&mft_ni->size_lock, flags);
1895 mft_ni->allocated_size += nr << vol->cluster_size_bits; 1941 mft_ni->allocated_size += nr << vol->cluster_size_bits;
1942 write_unlock_irqrestore(&mft_ni->size_lock, flags);
1896 ntfs_attr_put_search_ctx(ctx); 1943 ntfs_attr_put_search_ctx(ctx);
1897 unmap_mft_record(mft_ni); 1944 unmap_mft_record(mft_ni);
1898 up_write(&mft_ni->runlist.lock); 1945 up_write(&mft_ni->runlist.lock);
@@ -1921,7 +1968,7 @@ undo_alloc:
1921 a->data.non_resident.mapping_pairs_offset), 1968 a->data.non_resident.mapping_pairs_offset),
1922 old_alen - le16_to_cpu( 1969 old_alen - le16_to_cpu(
1923 a->data.non_resident.mapping_pairs_offset), 1970 a->data.non_resident.mapping_pairs_offset),
1924 rl2, ll, NULL)) { 1971 rl2, ll, -1, NULL)) {
1925 ntfs_error(vol->sb, "Failed to restore mapping pairs " 1972 ntfs_error(vol->sb, "Failed to restore mapping pairs "
1926 "array.%s", es); 1973 "array.%s", es);
1927 NVolSetErrors(vol); 1974 NVolSetErrors(vol);
@@ -1991,7 +2038,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
1991 "reports this as corruption, please email " 2038 "reports this as corruption, please email "
1992 "linux-ntfs-dev@lists.sourceforge.net stating " 2039 "linux-ntfs-dev@lists.sourceforge.net stating "
1993 "that you saw this message and that the " 2040 "that you saw this message and that the "
1994 "modified file system created was corrupt. " 2041 "modified filesystem created was corrupt. "
1995 "Thank you."); 2042 "Thank you.");
1996 } 2043 }
1997 /* Set the update sequence number to 1. */ 2044 /* Set the update sequence number to 1. */
@@ -2036,6 +2083,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
2036 */ 2083 */
2037static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) 2084static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2038{ 2085{
2086 loff_t i_size;
2039 struct inode *mft_vi = vol->mft_ino; 2087 struct inode *mft_vi = vol->mft_ino;
2040 struct page *page; 2088 struct page *page;
2041 MFT_RECORD *m; 2089 MFT_RECORD *m;
@@ -2051,10 +2099,11 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2051 index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; 2099 index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
2052 ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 2100 ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
2053 /* The maximum valid index into the page cache for $MFT's data. */ 2101 /* The maximum valid index into the page cache for $MFT's data. */
2054 end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; 2102 i_size = i_size_read(mft_vi);
2103 end_index = i_size >> PAGE_CACHE_SHIFT;
2055 if (unlikely(index >= end_index)) { 2104 if (unlikely(index >= end_index)) {
2056 if (unlikely(index > end_index || ofs + vol->mft_record_size >= 2105 if (unlikely(index > end_index || ofs + vol->mft_record_size >=
2057 (mft_vi->i_size & ~PAGE_CACHE_MASK))) { 2106 (i_size & ~PAGE_CACHE_MASK))) {
2058 ntfs_error(vol->sb, "Tried to format non-existing mft " 2107 ntfs_error(vol->sb, "Tried to format non-existing mft "
2059 "record 0x%llx.", (long long)mft_no); 2108 "record 0x%llx.", (long long)mft_no);
2060 return -ENOENT; 2109 return -ENOENT;
@@ -2188,6 +2237,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2188 ntfs_inode *base_ni, MFT_RECORD **mrec) 2237 ntfs_inode *base_ni, MFT_RECORD **mrec)
2189{ 2238{
2190 s64 ll, bit, old_data_initialized, old_data_size; 2239 s64 ll, bit, old_data_initialized, old_data_size;
2240 unsigned long flags;
2191 struct inode *vi; 2241 struct inode *vi;
2192 struct page *page; 2242 struct page *page;
2193 ntfs_inode *mft_ni, *mftbmp_ni, *ni; 2243 ntfs_inode *mft_ni, *mftbmp_ni, *ni;
@@ -2237,9 +2287,13 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2237 * the first 24 mft records as they are special and whilst they may not 2287 * the first 24 mft records as they are special and whilst they may not
2238 * be in use, we do not allocate from them. 2288 * be in use, we do not allocate from them.
2239 */ 2289 */
2290 read_lock_irqsave(&mft_ni->size_lock, flags);
2240 ll = mft_ni->initialized_size >> vol->mft_record_size_bits; 2291 ll = mft_ni->initialized_size >> vol->mft_record_size_bits;
2241 if (mftbmp_ni->initialized_size << 3 > ll && 2292 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2242 mftbmp_ni->initialized_size > 3) { 2293 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2294 old_data_initialized = mftbmp_ni->initialized_size;
2295 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2296 if (old_data_initialized << 3 > ll && old_data_initialized > 3) {
2243 bit = ll; 2297 bit = ll;
2244 if (bit < 24) 2298 if (bit < 24)
2245 bit = 24; 2299 bit = 24;
@@ -2254,15 +2308,18 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2254 * mft record that we can allocate. 2308 * mft record that we can allocate.
2255 * Note: The smallest mft record we allocate is mft record 24. 2309 * Note: The smallest mft record we allocate is mft record 24.
2256 */ 2310 */
2257 bit = mftbmp_ni->initialized_size << 3; 2311 bit = old_data_initialized << 3;
2258 if (unlikely(bit >= (1ll << 32))) 2312 if (unlikely(bit >= (1ll << 32)))
2259 goto max_err_out; 2313 goto max_err_out;
2314 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2315 old_data_size = mftbmp_ni->allocated_size;
2260 ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " 2316 ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
2261 "data_size 0x%llx, initialized_size 0x%llx.", 2317 "data_size 0x%llx, initialized_size 0x%llx.",
2262 (long long)mftbmp_ni->allocated_size, 2318 (long long)old_data_size,
2263 (long long)vol->mftbmp_ino->i_size, 2319 (long long)i_size_read(vol->mftbmp_ino),
2264 (long long)mftbmp_ni->initialized_size); 2320 (long long)old_data_initialized);
2265 if (mftbmp_ni->initialized_size + 8 > mftbmp_ni->allocated_size) { 2321 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2322 if (old_data_initialized + 8 > old_data_size) {
2266 /* Need to extend bitmap by one more cluster. */ 2323 /* Need to extend bitmap by one more cluster. */
2267 ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 2324 ntfs_debug("mftbmp: initialized_size + 8 > allocated_size.");
2268 err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2325 err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
@@ -2270,12 +2327,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2270 up_write(&vol->mftbmp_lock); 2327 up_write(&vol->mftbmp_lock);
2271 goto err_out; 2328 goto err_out;
2272 } 2329 }
2330#ifdef DEBUG
2331 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2273 ntfs_debug("Status of mftbmp after allocation extension: " 2332 ntfs_debug("Status of mftbmp after allocation extension: "
2274 "allocated_size 0x%llx, data_size 0x%llx, " 2333 "allocated_size 0x%llx, data_size 0x%llx, "
2275 "initialized_size 0x%llx.", 2334 "initialized_size 0x%llx.",
2276 (long long)mftbmp_ni->allocated_size, 2335 (long long)mftbmp_ni->allocated_size,
2277 (long long)vol->mftbmp_ino->i_size, 2336 (long long)i_size_read(vol->mftbmp_ino),
2278 (long long)mftbmp_ni->initialized_size); 2337 (long long)mftbmp_ni->initialized_size);
2338 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2339#endif /* DEBUG */
2279 } 2340 }
2280 /* 2341 /*
2281 * We now have sufficient allocated space, extend the initialized_size 2342 * We now have sufficient allocated space, extend the initialized_size
@@ -2287,12 +2348,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2287 up_write(&vol->mftbmp_lock); 2348 up_write(&vol->mftbmp_lock);
2288 goto err_out; 2349 goto err_out;
2289 } 2350 }
2351#ifdef DEBUG
2352 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2290 ntfs_debug("Status of mftbmp after initialized extention: " 2353 ntfs_debug("Status of mftbmp after initialized extention: "
2291 "allocated_size 0x%llx, data_size 0x%llx, " 2354 "allocated_size 0x%llx, data_size 0x%llx, "
2292 "initialized_size 0x%llx.", 2355 "initialized_size 0x%llx.",
2293 (long long)mftbmp_ni->allocated_size, 2356 (long long)mftbmp_ni->allocated_size,
2294 (long long)vol->mftbmp_ino->i_size, 2357 (long long)i_size_read(vol->mftbmp_ino),
2295 (long long)mftbmp_ni->initialized_size); 2358 (long long)mftbmp_ni->initialized_size);
2359 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2360#endif /* DEBUG */
2296 ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); 2361 ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit);
2297found_free_rec: 2362found_free_rec:
2298 /* @bit is the found free mft record, allocate it in the mft bitmap. */ 2363 /* @bit is the found free mft record, allocate it in the mft bitmap. */
@@ -2314,7 +2379,10 @@ have_alloc_rec:
2314 * parallel allocation could allocate the same mft record as this one. 2379 * parallel allocation could allocate the same mft record as this one.
2315 */ 2380 */
2316 ll = (bit + 1) << vol->mft_record_size_bits; 2381 ll = (bit + 1) << vol->mft_record_size_bits;
2317 if (ll <= mft_ni->initialized_size) { 2382 read_lock_irqsave(&mft_ni->size_lock, flags);
2383 old_data_initialized = mft_ni->initialized_size;
2384 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2385 if (ll <= old_data_initialized) {
2318 ntfs_debug("Allocated mft record already initialized."); 2386 ntfs_debug("Allocated mft record already initialized.");
2319 goto mft_rec_already_initialized; 2387 goto mft_rec_already_initialized;
2320 } 2388 }
@@ -2325,26 +2393,30 @@ have_alloc_rec:
2325 * actually traversed more than once when a freshly formatted volume is 2393 * actually traversed more than once when a freshly formatted volume is
2326 * first written to so it optimizes away nicely in the common case. 2394 * first written to so it optimizes away nicely in the common case.
2327 */ 2395 */
2396 read_lock_irqsave(&mft_ni->size_lock, flags);
2328 ntfs_debug("Status of mft data before extension: " 2397 ntfs_debug("Status of mft data before extension: "
2329 "allocated_size 0x%llx, data_size 0x%llx, " 2398 "allocated_size 0x%llx, data_size 0x%llx, "
2330 "initialized_size 0x%llx.", 2399 "initialized_size 0x%llx.",
2331 (long long)mft_ni->allocated_size, 2400 (long long)mft_ni->allocated_size,
2332 (long long)vol->mft_ino->i_size, 2401 (long long)i_size_read(vol->mft_ino),
2333 (long long)mft_ni->initialized_size); 2402 (long long)mft_ni->initialized_size);
2334 while (ll > mft_ni->allocated_size) { 2403 while (ll > mft_ni->allocated_size) {
2404 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2335 err = ntfs_mft_data_extend_allocation_nolock(vol); 2405 err = ntfs_mft_data_extend_allocation_nolock(vol);
2336 if (unlikely(err)) { 2406 if (unlikely(err)) {
2337 ntfs_error(vol->sb, "Failed to extend mft data " 2407 ntfs_error(vol->sb, "Failed to extend mft data "
2338 "allocation."); 2408 "allocation.");
2339 goto undo_mftbmp_alloc_nolock; 2409 goto undo_mftbmp_alloc_nolock;
2340 } 2410 }
2411 read_lock_irqsave(&mft_ni->size_lock, flags);
2341 ntfs_debug("Status of mft data after allocation extension: " 2412 ntfs_debug("Status of mft data after allocation extension: "
2342 "allocated_size 0x%llx, data_size 0x%llx, " 2413 "allocated_size 0x%llx, data_size 0x%llx, "
2343 "initialized_size 0x%llx.", 2414 "initialized_size 0x%llx.",
2344 (long long)mft_ni->allocated_size, 2415 (long long)mft_ni->allocated_size,
2345 (long long)vol->mft_ino->i_size, 2416 (long long)i_size_read(vol->mft_ino),
2346 (long long)mft_ni->initialized_size); 2417 (long long)mft_ni->initialized_size);
2347 } 2418 }
2419 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2348 /* 2420 /*
2349 * Extend mft data initialized size (and data size of course) to reach 2421 * Extend mft data initialized size (and data size of course) to reach
2350 * the allocated mft record, formatting the mft records allong the way. 2422 * the allocated mft record, formatting the mft records allong the way.
@@ -2352,6 +2424,7 @@ have_alloc_rec:
2352 * needed by ntfs_mft_record_format(). We will update the attribute 2424 * needed by ntfs_mft_record_format(). We will update the attribute
2353 * record itself in one fell swoop later on. 2425 * record itself in one fell swoop later on.
2354 */ 2426 */
2427 write_lock_irqsave(&mft_ni->size_lock, flags);
2355 old_data_initialized = mft_ni->initialized_size; 2428 old_data_initialized = mft_ni->initialized_size;
2356 old_data_size = vol->mft_ino->i_size; 2429 old_data_size = vol->mft_ino->i_size;
2357 while (ll > mft_ni->initialized_size) { 2430 while (ll > mft_ni->initialized_size) {
@@ -2360,8 +2433,9 @@ have_alloc_rec:
2360 new_initialized_size = mft_ni->initialized_size + 2433 new_initialized_size = mft_ni->initialized_size +
2361 vol->mft_record_size; 2434 vol->mft_record_size;
2362 mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; 2435 mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits;
2363 if (new_initialized_size > vol->mft_ino->i_size) 2436 if (new_initialized_size > i_size_read(vol->mft_ino))
2364 vol->mft_ino->i_size = new_initialized_size; 2437 i_size_write(vol->mft_ino, new_initialized_size);
2438 write_unlock_irqrestore(&mft_ni->size_lock, flags);
2365 ntfs_debug("Initializing mft record 0x%llx.", 2439 ntfs_debug("Initializing mft record 0x%llx.",
2366 (long long)mft_no); 2440 (long long)mft_no);
2367 err = ntfs_mft_record_format(vol, mft_no); 2441 err = ntfs_mft_record_format(vol, mft_no);
@@ -2369,8 +2443,10 @@ have_alloc_rec:
2369 ntfs_error(vol->sb, "Failed to format mft record."); 2443 ntfs_error(vol->sb, "Failed to format mft record.");
2370 goto undo_data_init; 2444 goto undo_data_init;
2371 } 2445 }
2446 write_lock_irqsave(&mft_ni->size_lock, flags);
2372 mft_ni->initialized_size = new_initialized_size; 2447 mft_ni->initialized_size = new_initialized_size;
2373 } 2448 }
2449 write_unlock_irqrestore(&mft_ni->size_lock, flags);
2374 record_formatted = TRUE; 2450 record_formatted = TRUE;
2375 /* Update the mft data attribute record to reflect the new sizes. */ 2451 /* Update the mft data attribute record to reflect the new sizes. */
2376 m = map_mft_record(mft_ni); 2452 m = map_mft_record(mft_ni);
@@ -2396,22 +2472,27 @@ have_alloc_rec:
2396 goto undo_data_init; 2472 goto undo_data_init;
2397 } 2473 }
2398 a = ctx->attr; 2474 a = ctx->attr;
2475 read_lock_irqsave(&mft_ni->size_lock, flags);
2399 a->data.non_resident.initialized_size = 2476 a->data.non_resident.initialized_size =
2400 cpu_to_sle64(mft_ni->initialized_size); 2477 cpu_to_sle64(mft_ni->initialized_size);
2401 a->data.non_resident.data_size = cpu_to_sle64(vol->mft_ino->i_size); 2478 a->data.non_resident.data_size =
2479 cpu_to_sle64(i_size_read(vol->mft_ino));
2480 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2402 /* Ensure the changes make it to disk. */ 2481 /* Ensure the changes make it to disk. */
2403 flush_dcache_mft_record_page(ctx->ntfs_ino); 2482 flush_dcache_mft_record_page(ctx->ntfs_ino);
2404 mark_mft_record_dirty(ctx->ntfs_ino); 2483 mark_mft_record_dirty(ctx->ntfs_ino);
2405 ntfs_attr_put_search_ctx(ctx); 2484 ntfs_attr_put_search_ctx(ctx);
2406 unmap_mft_record(mft_ni); 2485 unmap_mft_record(mft_ni);
2486 read_lock_irqsave(&mft_ni->size_lock, flags);
2407 ntfs_debug("Status of mft data after mft record initialization: " 2487 ntfs_debug("Status of mft data after mft record initialization: "
2408 "allocated_size 0x%llx, data_size 0x%llx, " 2488 "allocated_size 0x%llx, data_size 0x%llx, "
2409 "initialized_size 0x%llx.", 2489 "initialized_size 0x%llx.",
2410 (long long)mft_ni->allocated_size, 2490 (long long)mft_ni->allocated_size,
2411 (long long)vol->mft_ino->i_size, 2491 (long long)i_size_read(vol->mft_ino),
2412 (long long)mft_ni->initialized_size); 2492 (long long)mft_ni->initialized_size);
2413 BUG_ON(vol->mft_ino->i_size > mft_ni->allocated_size); 2493 BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size);
2414 BUG_ON(mft_ni->initialized_size > vol->mft_ino->i_size); 2494 BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino));
2495 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2415mft_rec_already_initialized: 2496mft_rec_already_initialized:
2416 /* 2497 /*
2417 * We can finally drop the mft bitmap lock as the mft data attribute 2498 * We can finally drop the mft bitmap lock as the mft data attribute
@@ -2652,8 +2733,10 @@ mft_rec_already_initialized:
2652 *mrec = m; 2733 *mrec = m;
2653 return ni; 2734 return ni;
2654undo_data_init: 2735undo_data_init:
2736 write_lock_irqsave(&mft_ni->size_lock, flags);
2655 mft_ni->initialized_size = old_data_initialized; 2737 mft_ni->initialized_size = old_data_initialized;
2656 vol->mft_ino->i_size = old_data_size; 2738 i_size_write(vol->mft_ino, old_data_size);
2739 write_unlock_irqrestore(&mft_ni->size_lock, flags);
2657 goto undo_mftbmp_alloc_nolock; 2740 goto undo_mftbmp_alloc_nolock;
2658undo_mftbmp_alloc: 2741undo_mftbmp_alloc:
2659 down_write(&vol->mftbmp_lock); 2742 down_write(&vol->mftbmp_lock);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 7c7e13b43b2e..351dbc3b6e40 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -153,8 +153,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
153 ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with " 153 ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with "
154 "error code %li.", dent_ino, 154 "error code %li.", dent_ino,
155 PTR_ERR(dent_inode)); 155 PTR_ERR(dent_inode));
156 if (name) 156 kfree(name);
157 kfree(name);
158 /* Return the error code. */ 157 /* Return the error code. */
159 return (struct dentry *)dent_inode; 158 return (struct dentry *)dent_inode;
160 } 159 }
@@ -380,7 +379,7 @@ struct inode_operations ntfs_dir_inode_ops = {
380 * Return the dentry of the parent directory on success or the error code on 379 * Return the dentry of the parent directory on success or the error code on
381 * error (IS_ERR() is true). 380 * error (IS_ERR() is true).
382 */ 381 */
383struct dentry *ntfs_get_parent(struct dentry *child_dent) 382static struct dentry *ntfs_get_parent(struct dentry *child_dent)
384{ 383{
385 struct inode *vi = child_dent->d_inode; 384 struct inode *vi = child_dent->d_inode;
386 ntfs_inode *ni = NTFS_I(vi); 385 ntfs_inode *ni = NTFS_I(vi);
@@ -465,7 +464,7 @@ try_next:
465 * 464 *
466 * Return the dentry on success or the error code on error (IS_ERR() is true). 465 * Return the dentry on success or the error code on error (IS_ERR() is true).
467 */ 466 */
468struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) 467static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
469{ 468{
470 struct inode *vi; 469 struct inode *vi;
471 struct dentry *dent; 470 struct dentry *dent;
@@ -496,3 +495,30 @@ struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
496 ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); 495 ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen);
497 return dent; 496 return dent;
498} 497}
498
499/**
500 * Export operations allowing NFS exporting of mounted NTFS partitions.
501 *
502 * We use the default ->decode_fh() and ->encode_fh() for now. Note that they
503 * use 32 bits to store the inode number which is an unsigned long so on 64-bit
504 * architectures is usually 64 bits so it would all fail horribly on huge
505 * volumes. I guess we need to define our own encode and decode fh functions
506 * that store 64-bit inode numbers at some point but for now we will ignore the
507 * problem...
508 *
509 * We also use the default ->get_name() helper (used by ->decode_fh() via
510 * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
511 * independent.
512 *
513 * The default ->get_parent() just returns -EACCES so we have to provide our
514 * own and the default ->get_dentry() is incompatible with NTFS due to not
515 * allowing the inode number 0 which is used in NTFS for the system file $MFT
516 * and due to using iget() whereas NTFS needs ntfs_iget().
517 */
518struct export_operations ntfs_export_ops = {
519 .get_parent = ntfs_get_parent, /* Find the parent of a given
520 directory. */
521 .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode
522 given a file handle
523 sub-fragment. */
524};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 720ffb71bab8..446b5014115c 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -2,7 +2,7 @@
2 * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS 2 * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS
3 * project. 3 * project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (C) 2002 Richard Russon 6 * Copyright (C) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -31,6 +31,7 @@
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/nls.h> 32#include <linux/nls.h>
33#include <linux/smp.h> 33#include <linux/smp.h>
34#include <linux/pagemap.h>
34 35
35#include "types.h" 36#include "types.h"
36#include "volume.h" 37#include "volume.h"
@@ -41,6 +42,9 @@ typedef enum {
41 NTFS_BLOCK_SIZE_BITS = 9, 42 NTFS_BLOCK_SIZE_BITS = 9,
42 NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */ 43 NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */
43 NTFS_MAX_NAME_LEN = 255, 44 NTFS_MAX_NAME_LEN = 255,
45 NTFS_MAX_ATTR_NAME_LEN = 255,
46 NTFS_MAX_CLUSTER_SIZE = 64 * 1024, /* 64kiB */
47 NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE,
44} NTFS_CONSTANTS; 48} NTFS_CONSTANTS;
45 49
46/* Global variables. */ 50/* Global variables. */
@@ -65,6 +69,8 @@ extern struct inode_operations ntfs_dir_inode_ops;
65extern struct file_operations ntfs_empty_file_ops; 69extern struct file_operations ntfs_empty_file_ops;
66extern struct inode_operations ntfs_empty_inode_ops; 70extern struct inode_operations ntfs_empty_inode_ops;
67 71
72extern struct export_operations ntfs_export_ops;
73
68/** 74/**
69 * NTFS_SB - return the ntfs volume given a vfs super block 75 * NTFS_SB - return the ntfs volume given a vfs super block
70 * @sb: VFS super block 76 * @sb: VFS super block
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 8438fb1da219..758855b0414e 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. 2 * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -59,7 +59,7 @@ static inline void ntfs_rl_mc(runlist_element *dstbase, int dst,
59 * 59 *
60 * As the runlists grow, more memory will be required. To prevent the 60 * As the runlists grow, more memory will be required. To prevent the
61 * kernel having to allocate and reallocate large numbers of small bits of 61 * kernel having to allocate and reallocate large numbers of small bits of
62 * memory, this function returns and entire page of memory. 62 * memory, this function returns an entire page of memory.
63 * 63 *
64 * It is up to the caller to serialize access to the runlist @rl. 64 * It is up to the caller to serialize access to the runlist @rl.
65 * 65 *
@@ -113,8 +113,11 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
113 BUG_ON(!dst); 113 BUG_ON(!dst);
114 BUG_ON(!src); 114 BUG_ON(!src);
115 115
116 if ((dst->lcn < 0) || (src->lcn < 0)) /* Are we merging holes? */ 116 if ((dst->lcn < 0) || (src->lcn < 0)) { /* Are we merging holes? */
117 if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
118 return TRUE;
117 return FALSE; 119 return FALSE;
120 }
118 if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ 121 if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
119 return FALSE; 122 return FALSE;
120 if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ 123 if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
@@ -855,30 +858,42 @@ mpa_err:
855 if (!attr->data.non_resident.lowest_vcn) { 858 if (!attr->data.non_resident.lowest_vcn) {
856 VCN max_cluster; 859 VCN max_cluster;
857 860
858 max_cluster = (sle64_to_cpu( 861 max_cluster = ((sle64_to_cpu(
859 attr->data.non_resident.allocated_size) + 862 attr->data.non_resident.allocated_size) +
860 vol->cluster_size - 1) >> 863 vol->cluster_size - 1) >>
861 vol->cluster_size_bits; 864 vol->cluster_size_bits) - 1;
862 /* 865 /*
863 * If there is a difference between the highest_vcn and the 866 * A highest_vcn of zero means this is a single extent
864 * highest cluster, the runlist is either corrupt or, more 867 * attribute so simply terminate the runlist with LCN_ENOENT).
865 * likely, there are more extents following this one.
866 */ 868 */
867 if (deltaxcn < --max_cluster) { 869 if (deltaxcn) {
868 ntfs_debug("More extents to follow; deltaxcn = 0x%llx, " 870 /*
869 "max_cluster = 0x%llx", 871 * If there is a difference between the highest_vcn and
870 (unsigned long long)deltaxcn, 872 * the highest cluster, the runlist is either corrupt
871 (unsigned long long)max_cluster); 873 * or, more likely, there are more extents following
872 rl[rlpos].vcn = vcn; 874 * this one.
873 vcn += rl[rlpos].length = max_cluster - deltaxcn; 875 */
874 rl[rlpos].lcn = LCN_RL_NOT_MAPPED; 876 if (deltaxcn < max_cluster) {
875 rlpos++; 877 ntfs_debug("More extents to follow; deltaxcn "
876 } else if (unlikely(deltaxcn > max_cluster)) { 878 "= 0x%llx, max_cluster = "
877 ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = " 879 "0x%llx",
878 "0x%llx, max_cluster = 0x%llx", 880 (unsigned long long)deltaxcn,
879 (unsigned long long)deltaxcn, 881 (unsigned long long)
880 (unsigned long long)max_cluster); 882 max_cluster);
881 goto mpa_err; 883 rl[rlpos].vcn = vcn;
884 vcn += rl[rlpos].length = max_cluster -
885 deltaxcn;
886 rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
887 rlpos++;
888 } else if (unlikely(deltaxcn > max_cluster)) {
889 ntfs_error(vol->sb, "Corrupt attribute. "
890 "deltaxcn = 0x%llx, "
891 "max_cluster = 0x%llx",
892 (unsigned long long)deltaxcn,
893 (unsigned long long)
894 max_cluster);
895 goto mpa_err;
896 }
882 } 897 }
883 rl[rlpos].lcn = LCN_ENOENT; 898 rl[rlpos].lcn = LCN_ENOENT;
884 } else /* Not the base extent. There may be more extents to follow. */ 899 } else /* Not the base extent. There may be more extents to follow. */
@@ -918,17 +933,18 @@ err_out:
918 * 933 *
919 * It is up to the caller to serialize access to the runlist @rl. 934 * It is up to the caller to serialize access to the runlist @rl.
920 * 935 *
921 * Since lcns must be >= 0, we use negative return values with special meaning: 936 * Since lcns must be >= 0, we use negative return codes with special meaning:
922 * 937 *
923 * Return value Meaning / Description 938 * Return code Meaning / Description
924 * ================================================== 939 * ==================================================
925 * -1 = LCN_HOLE Hole / not allocated on disk. 940 * LCN_HOLE Hole / not allocated on disk.
926 * -2 = LCN_RL_NOT_MAPPED This is part of the runlist which has not been 941 * LCN_RL_NOT_MAPPED This is part of the runlist which has not been
927 * inserted into the runlist yet. 942 * inserted into the runlist yet.
928 * -3 = LCN_ENOENT There is no such vcn in the attribute. 943 * LCN_ENOENT There is no such vcn in the attribute.
929 * 944 *
930 * Locking: - The caller must have locked the runlist (for reading or writing). 945 * Locking: - The caller must have locked the runlist (for reading or writing).
931 * - This function does not touch the lock. 946 * - This function does not touch the lock, nor does it modify the
947 * runlist.
932 */ 948 */
933LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn) 949LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
934{ 950{
@@ -964,6 +980,39 @@ LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
964 return LCN_ENOENT; 980 return LCN_ENOENT;
965} 981}
966 982
983#ifdef NTFS_RW
984
985/**
986 * ntfs_rl_find_vcn_nolock - find a vcn in a runlist
987 * @rl: runlist to search
988 * @vcn: vcn to find
989 *
990 * Find the virtual cluster number @vcn in the runlist @rl and return the
991 * address of the runlist element containing the @vcn on success.
992 *
993 * Return NULL if @rl is NULL or @vcn is in an unmapped part/out of bounds of
994 * the runlist.
995 *
996 * Locking: The runlist must be locked on entry.
997 */
998runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl, const VCN vcn)
999{
1000 BUG_ON(vcn < 0);
1001 if (unlikely(!rl || vcn < rl[0].vcn))
1002 return NULL;
1003 while (likely(rl->length)) {
1004 if (unlikely(vcn < rl[1].vcn)) {
1005 if (likely(rl->lcn >= LCN_HOLE))
1006 return rl;
1007 return NULL;
1008 }
1009 rl++;
1010 }
1011 if (likely(rl->lcn == LCN_ENOENT))
1012 return rl;
1013 return NULL;
1014}
1015
967/** 1016/**
968 * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number 1017 * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number
969 * @n: number for which to get the number of bytes for 1018 * @n: number for which to get the number of bytes for
@@ -999,10 +1048,17 @@ static inline int ntfs_get_nr_significant_bytes(const s64 n)
999 * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array 1048 * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array
1000 * @vol: ntfs volume (needed for the ntfs version) 1049 * @vol: ntfs volume (needed for the ntfs version)
1001 * @rl: locked runlist to determine the size of the mapping pairs of 1050 * @rl: locked runlist to determine the size of the mapping pairs of
1002 * @start_vcn: vcn at which to start the mapping pairs array 1051 * @first_vcn: first vcn which to include in the mapping pairs array
1052 * @last_vcn: last vcn which to include in the mapping pairs array
1003 * 1053 *
1004 * Walk the locked runlist @rl and calculate the size in bytes of the mapping 1054 * Walk the locked runlist @rl and calculate the size in bytes of the mapping
1005 * pairs array corresponding to the runlist @rl, starting at vcn @start_vcn. 1055 * pairs array corresponding to the runlist @rl, starting at vcn @first_vcn and
1056 * finishing with vcn @last_vcn.
1057 *
1058 * A @last_vcn of -1 means end of runlist and in that case the size of the
1059 * mapping pairs array corresponding to the runlist starting at vcn @first_vcn
1060 * and finishing at the end of the runlist is determined.
1061 *
1006 * This for example allows us to allocate a buffer of the right size when 1062 * This for example allows us to allocate a buffer of the right size when
1007 * building the mapping pairs array. 1063 * building the mapping pairs array.
1008 * 1064 *
@@ -1018,34 +1074,50 @@ static inline int ntfs_get_nr_significant_bytes(const s64 n)
1018 * remains locked throughout, and is left locked upon return. 1074 * remains locked throughout, and is left locked upon return.
1019 */ 1075 */
1020int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, 1076int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1021 const runlist_element *rl, const VCN start_vcn) 1077 const runlist_element *rl, const VCN first_vcn,
1078 const VCN last_vcn)
1022{ 1079{
1023 LCN prev_lcn; 1080 LCN prev_lcn;
1024 int rls; 1081 int rls;
1082 BOOL the_end = FALSE;
1025 1083
1026 BUG_ON(start_vcn < 0); 1084 BUG_ON(first_vcn < 0);
1085 BUG_ON(last_vcn < -1);
1086 BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
1027 if (!rl) { 1087 if (!rl) {
1028 BUG_ON(start_vcn); 1088 BUG_ON(first_vcn);
1089 BUG_ON(last_vcn > 0);
1029 return 1; 1090 return 1;
1030 } 1091 }
1031 /* Skip to runlist element containing @start_vcn. */ 1092 /* Skip to runlist element containing @first_vcn. */
1032 while (rl->length && start_vcn >= rl[1].vcn) 1093 while (rl->length && first_vcn >= rl[1].vcn)
1033 rl++; 1094 rl++;
1034 if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn) 1095 if (unlikely((!rl->length && first_vcn > rl->vcn) ||
1096 first_vcn < rl->vcn))
1035 return -EINVAL; 1097 return -EINVAL;
1036 prev_lcn = 0; 1098 prev_lcn = 0;
1037 /* Always need the termining zero byte. */ 1099 /* Always need the termining zero byte. */
1038 rls = 1; 1100 rls = 1;
1039 /* Do the first partial run if present. */ 1101 /* Do the first partial run if present. */
1040 if (start_vcn > rl->vcn) { 1102 if (first_vcn > rl->vcn) {
1041 s64 delta; 1103 s64 delta, length = rl->length;
1042 1104
1043 /* We know rl->length != 0 already. */ 1105 /* We know rl->length != 0 already. */
1044 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1106 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1045 goto err_out; 1107 goto err_out;
1046 delta = start_vcn - rl->vcn; 1108 /*
1109 * If @stop_vcn is given and finishes inside this run, cap the
1110 * run length.
1111 */
1112 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1113 s64 s1 = last_vcn + 1;
1114 if (unlikely(rl[1].vcn > s1))
1115 length = s1 - rl->vcn;
1116 the_end = TRUE;
1117 }
1118 delta = first_vcn - rl->vcn;
1047 /* Header byte + length. */ 1119 /* Header byte + length. */
1048 rls += 1 + ntfs_get_nr_significant_bytes(rl->length - delta); 1120 rls += 1 + ntfs_get_nr_significant_bytes(length - delta);
1049 /* 1121 /*
1050 * If the logical cluster number (lcn) denotes a hole and we 1122 * If the logical cluster number (lcn) denotes a hole and we
1051 * are on NTFS 3.0+, we don't store it at all, i.e. we need 1123 * are on NTFS 3.0+, we don't store it at all, i.e. we need
@@ -1053,9 +1125,9 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1053 * Note: this assumes that on NTFS 1.2-, holes are stored with 1125 * Note: this assumes that on NTFS 1.2-, holes are stored with
1054 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1). 1126 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
1055 */ 1127 */
1056 if (rl->lcn >= 0 || vol->major_ver < 3) { 1128 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1057 prev_lcn = rl->lcn; 1129 prev_lcn = rl->lcn;
1058 if (rl->lcn >= 0) 1130 if (likely(rl->lcn >= 0))
1059 prev_lcn += delta; 1131 prev_lcn += delta;
1060 /* Change in lcn. */ 1132 /* Change in lcn. */
1061 rls += ntfs_get_nr_significant_bytes(prev_lcn); 1133 rls += ntfs_get_nr_significant_bytes(prev_lcn);
@@ -1064,11 +1136,23 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1064 rl++; 1136 rl++;
1065 } 1137 }
1066 /* Do the full runs. */ 1138 /* Do the full runs. */
1067 for (; rl->length; rl++) { 1139 for (; rl->length && !the_end; rl++) {
1068 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1140 s64 length = rl->length;
1141
1142 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1069 goto err_out; 1143 goto err_out;
1144 /*
1145 * If @stop_vcn is given and finishes inside this run, cap the
1146 * run length.
1147 */
1148 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1149 s64 s1 = last_vcn + 1;
1150 if (unlikely(rl[1].vcn > s1))
1151 length = s1 - rl->vcn;
1152 the_end = TRUE;
1153 }
1070 /* Header byte + length. */ 1154 /* Header byte + length. */
1071 rls += 1 + ntfs_get_nr_significant_bytes(rl->length); 1155 rls += 1 + ntfs_get_nr_significant_bytes(length);
1072 /* 1156 /*
1073 * If the logical cluster number (lcn) denotes a hole and we 1157 * If the logical cluster number (lcn) denotes a hole and we
1074 * are on NTFS 3.0+, we don't store it at all, i.e. we need 1158 * are on NTFS 3.0+, we don't store it at all, i.e. we need
@@ -1076,7 +1160,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1076 * Note: this assumes that on NTFS 1.2-, holes are stored with 1160 * Note: this assumes that on NTFS 1.2-, holes are stored with
1077 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1). 1161 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
1078 */ 1162 */
1079 if (rl->lcn >= 0 || vol->major_ver < 3) { 1163 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1080 /* Change in lcn. */ 1164 /* Change in lcn. */
1081 rls += ntfs_get_nr_significant_bytes(rl->lcn - 1165 rls += ntfs_get_nr_significant_bytes(rl->lcn -
1082 prev_lcn); 1166 prev_lcn);
@@ -1119,7 +1203,7 @@ static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
1119 1203
1120 i = 0; 1204 i = 0;
1121 do { 1205 do {
1122 if (dst > dst_max) 1206 if (unlikely(dst > dst_max))
1123 goto err_out; 1207 goto err_out;
1124 *dst++ = l & 0xffll; 1208 *dst++ = l & 0xffll;
1125 l >>= 8; 1209 l >>= 8;
@@ -1128,12 +1212,12 @@ static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
1128 j = (n >> 8 * (i - 1)) & 0xff; 1212 j = (n >> 8 * (i - 1)) & 0xff;
1129 /* If the sign bit is wrong, we need an extra byte. */ 1213 /* If the sign bit is wrong, we need an extra byte. */
1130 if (n < 0 && j >= 0) { 1214 if (n < 0 && j >= 0) {
1131 if (dst > dst_max) 1215 if (unlikely(dst > dst_max))
1132 goto err_out; 1216 goto err_out;
1133 i++; 1217 i++;
1134 *dst = (s8)-1; 1218 *dst = (s8)-1;
1135 } else if (n > 0 && j < 0) { 1219 } else if (n > 0 && j < 0) {
1136 if (dst > dst_max) 1220 if (unlikely(dst > dst_max))
1137 goto err_out; 1221 goto err_out;
1138 i++; 1222 i++;
1139 *dst = (s8)0; 1223 *dst = (s8)0;
@@ -1149,13 +1233,18 @@ err_out:
1149 * @dst: destination buffer to which to write the mapping pairs array 1233 * @dst: destination buffer to which to write the mapping pairs array
1150 * @dst_len: size of destination buffer @dst in bytes 1234 * @dst_len: size of destination buffer @dst in bytes
1151 * @rl: locked runlist for which to build the mapping pairs array 1235 * @rl: locked runlist for which to build the mapping pairs array
1152 * @start_vcn: vcn at which to start the mapping pairs array 1236 * @first_vcn: first vcn which to include in the mapping pairs array
1237 * @last_vcn: last vcn which to include in the mapping pairs array
1153 * @stop_vcn: first vcn outside destination buffer on success or -ENOSPC 1238 * @stop_vcn: first vcn outside destination buffer on success or -ENOSPC
1154 * 1239 *
1155 * Create the mapping pairs array from the locked runlist @rl, starting at vcn 1240 * Create the mapping pairs array from the locked runlist @rl, starting at vcn
1156 * @start_vcn and save the array in @dst. @dst_len is the size of @dst in 1241 * @first_vcn and finishing with vcn @last_vcn and save the array in @dst.
1157 * bytes and it should be at least equal to the value obtained by calling 1242 * @dst_len is the size of @dst in bytes and it should be at least equal to the
1158 * ntfs_get_size_for_mapping_pairs(). 1243 * value obtained by calling ntfs_get_size_for_mapping_pairs().
1244 *
1245 * A @last_vcn of -1 means end of runlist and in that case the mapping pairs
1246 * array corresponding to the runlist starting at vcn @first_vcn and finishing
1247 * at the end of the runlist is created.
1159 * 1248 *
1160 * If @rl is NULL, just write a single terminator byte to @dst. 1249 * If @rl is NULL, just write a single terminator byte to @dst.
1161 * 1250 *
@@ -1164,7 +1253,7 @@ err_out:
1164 * been filled with all the mapping pairs that will fit, thus it can be treated 1253 * been filled with all the mapping pairs that will fit, thus it can be treated
1165 * as partial success, in that a new attribute extent needs to be created or 1254 * as partial success, in that a new attribute extent needs to be created or
1166 * the next extent has to be used and the mapping pairs build has to be 1255 * the next extent has to be used and the mapping pairs build has to be
1167 * continued with @start_vcn set to *@stop_vcn. 1256 * continued with @first_vcn set to *@stop_vcn.
1168 * 1257 *
1169 * Return 0 on success and -errno on error. The following error codes are 1258 * Return 0 on success and -errno on error. The following error codes are
1170 * defined: 1259 * defined:
@@ -1178,27 +1267,32 @@ err_out:
1178 */ 1267 */
1179int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, 1268int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1180 const int dst_len, const runlist_element *rl, 1269 const int dst_len, const runlist_element *rl,
1181 const VCN start_vcn, VCN *const stop_vcn) 1270 const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn)
1182{ 1271{
1183 LCN prev_lcn; 1272 LCN prev_lcn;
1184 s8 *dst_max, *dst_next; 1273 s8 *dst_max, *dst_next;
1185 int err = -ENOSPC; 1274 int err = -ENOSPC;
1275 BOOL the_end = FALSE;
1186 s8 len_len, lcn_len; 1276 s8 len_len, lcn_len;
1187 1277
1188 BUG_ON(start_vcn < 0); 1278 BUG_ON(first_vcn < 0);
1279 BUG_ON(last_vcn < -1);
1280 BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
1189 BUG_ON(dst_len < 1); 1281 BUG_ON(dst_len < 1);
1190 if (!rl) { 1282 if (!rl) {
1191 BUG_ON(start_vcn); 1283 BUG_ON(first_vcn);
1284 BUG_ON(last_vcn > 0);
1192 if (stop_vcn) 1285 if (stop_vcn)
1193 *stop_vcn = 0; 1286 *stop_vcn = 0;
1194 /* Terminator byte. */ 1287 /* Terminator byte. */
1195 *dst = 0; 1288 *dst = 0;
1196 return 0; 1289 return 0;
1197 } 1290 }
1198 /* Skip to runlist element containing @start_vcn. */ 1291 /* Skip to runlist element containing @first_vcn. */
1199 while (rl->length && start_vcn >= rl[1].vcn) 1292 while (rl->length && first_vcn >= rl[1].vcn)
1200 rl++; 1293 rl++;
1201 if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn) 1294 if (unlikely((!rl->length && first_vcn > rl->vcn) ||
1295 first_vcn < rl->vcn))
1202 return -EINVAL; 1296 return -EINVAL;
1203 /* 1297 /*
1204 * @dst_max is used for bounds checking in 1298 * @dst_max is used for bounds checking in
@@ -1207,17 +1301,27 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1207 dst_max = dst + dst_len - 1; 1301 dst_max = dst + dst_len - 1;
1208 prev_lcn = 0; 1302 prev_lcn = 0;
1209 /* Do the first partial run if present. */ 1303 /* Do the first partial run if present. */
1210 if (start_vcn > rl->vcn) { 1304 if (first_vcn > rl->vcn) {
1211 s64 delta; 1305 s64 delta, length = rl->length;
1212 1306
1213 /* We know rl->length != 0 already. */ 1307 /* We know rl->length != 0 already. */
1214 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1308 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1215 goto err_out; 1309 goto err_out;
1216 delta = start_vcn - rl->vcn; 1310 /*
1311 * If @stop_vcn is given and finishes inside this run, cap the
1312 * run length.
1313 */
1314 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1315 s64 s1 = last_vcn + 1;
1316 if (unlikely(rl[1].vcn > s1))
1317 length = s1 - rl->vcn;
1318 the_end = TRUE;
1319 }
1320 delta = first_vcn - rl->vcn;
1217 /* Write length. */ 1321 /* Write length. */
1218 len_len = ntfs_write_significant_bytes(dst + 1, dst_max, 1322 len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
1219 rl->length - delta); 1323 length - delta);
1220 if (len_len < 0) 1324 if (unlikely(len_len < 0))
1221 goto size_err; 1325 goto size_err;
1222 /* 1326 /*
1223 * If the logical cluster number (lcn) denotes a hole and we 1327 * If the logical cluster number (lcn) denotes a hole and we
@@ -1228,19 +1332,19 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1228 * case on NT4. - We assume that we just need to write the lcn 1332 * case on NT4. - We assume that we just need to write the lcn
1229 * change until someone tells us otherwise... (AIA) 1333 * change until someone tells us otherwise... (AIA)
1230 */ 1334 */
1231 if (rl->lcn >= 0 || vol->major_ver < 3) { 1335 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1232 prev_lcn = rl->lcn; 1336 prev_lcn = rl->lcn;
1233 if (rl->lcn >= 0) 1337 if (likely(rl->lcn >= 0))
1234 prev_lcn += delta; 1338 prev_lcn += delta;
1235 /* Write change in lcn. */ 1339 /* Write change in lcn. */
1236 lcn_len = ntfs_write_significant_bytes(dst + 1 + 1340 lcn_len = ntfs_write_significant_bytes(dst + 1 +
1237 len_len, dst_max, prev_lcn); 1341 len_len, dst_max, prev_lcn);
1238 if (lcn_len < 0) 1342 if (unlikely(lcn_len < 0))
1239 goto size_err; 1343 goto size_err;
1240 } else 1344 } else
1241 lcn_len = 0; 1345 lcn_len = 0;
1242 dst_next = dst + len_len + lcn_len + 1; 1346 dst_next = dst + len_len + lcn_len + 1;
1243 if (dst_next > dst_max) 1347 if (unlikely(dst_next > dst_max))
1244 goto size_err; 1348 goto size_err;
1245 /* Update header byte. */ 1349 /* Update header byte. */
1246 *dst = lcn_len << 4 | len_len; 1350 *dst = lcn_len << 4 | len_len;
@@ -1250,13 +1354,25 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1250 rl++; 1354 rl++;
1251 } 1355 }
1252 /* Do the full runs. */ 1356 /* Do the full runs. */
1253 for (; rl->length; rl++) { 1357 for (; rl->length && !the_end; rl++) {
1254 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1358 s64 length = rl->length;
1359
1360 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1255 goto err_out; 1361 goto err_out;
1362 /*
1363 * If @stop_vcn is given and finishes inside this run, cap the
1364 * run length.
1365 */
1366 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1367 s64 s1 = last_vcn + 1;
1368 if (unlikely(rl[1].vcn > s1))
1369 length = s1 - rl->vcn;
1370 the_end = TRUE;
1371 }
1256 /* Write length. */ 1372 /* Write length. */
1257 len_len = ntfs_write_significant_bytes(dst + 1, dst_max, 1373 len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
1258 rl->length); 1374 length);
1259 if (len_len < 0) 1375 if (unlikely(len_len < 0))
1260 goto size_err; 1376 goto size_err;
1261 /* 1377 /*
1262 * If the logical cluster number (lcn) denotes a hole and we 1378 * If the logical cluster number (lcn) denotes a hole and we
@@ -1267,17 +1383,17 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1267 * case on NT4. - We assume that we just need to write the lcn 1383 * case on NT4. - We assume that we just need to write the lcn
1268 * change until someone tells us otherwise... (AIA) 1384 * change until someone tells us otherwise... (AIA)
1269 */ 1385 */
1270 if (rl->lcn >= 0 || vol->major_ver < 3) { 1386 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1271 /* Write change in lcn. */ 1387 /* Write change in lcn. */
1272 lcn_len = ntfs_write_significant_bytes(dst + 1 + 1388 lcn_len = ntfs_write_significant_bytes(dst + 1 +
1273 len_len, dst_max, rl->lcn - prev_lcn); 1389 len_len, dst_max, rl->lcn - prev_lcn);
1274 if (lcn_len < 0) 1390 if (unlikely(lcn_len < 0))
1275 goto size_err; 1391 goto size_err;
1276 prev_lcn = rl->lcn; 1392 prev_lcn = rl->lcn;
1277 } else 1393 } else
1278 lcn_len = 0; 1394 lcn_len = 0;
1279 dst_next = dst + len_len + lcn_len + 1; 1395 dst_next = dst + len_len + lcn_len + 1;
1280 if (dst_next > dst_max) 1396 if (unlikely(dst_next > dst_max))
1281 goto size_err; 1397 goto size_err;
1282 /* Update header byte. */ 1398 /* Update header byte. */
1283 *dst = lcn_len << 4 | len_len; 1399 *dst = lcn_len << 4 | len_len;
@@ -1436,3 +1552,5 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
1436 ntfs_debug("Done."); 1552 ntfs_debug("Done.");
1437 return 0; 1553 return 0;
1438} 1554}
1555
1556#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index 7107fde59df9..aa0ee6540e7c 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -2,7 +2,7 @@
2 * runlist.h - Defines for runlist handling in NTFS Linux kernel driver. 2 * runlist.h - Defines for runlist handling in NTFS Linux kernel driver.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -66,6 +66,8 @@ typedef enum {
66 LCN_HOLE = -1, /* Keep this as highest value or die! */ 66 LCN_HOLE = -1, /* Keep this as highest value or die! */
67 LCN_RL_NOT_MAPPED = -2, 67 LCN_RL_NOT_MAPPED = -2,
68 LCN_ENOENT = -3, 68 LCN_ENOENT = -3,
69 LCN_ENOMEM = -4,
70 LCN_EIO = -5,
69} LCN_SPECIAL_VALUES; 71} LCN_SPECIAL_VALUES;
70 72
71extern runlist_element *ntfs_runlists_merge(runlist_element *drl, 73extern runlist_element *ntfs_runlists_merge(runlist_element *drl,
@@ -76,14 +78,22 @@ extern runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
76 78
77extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn); 79extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn);
78 80
81#ifdef NTFS_RW
82
83extern runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl,
84 const VCN vcn);
85
79extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, 86extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
80 const runlist_element *rl, const VCN start_vcn); 87 const runlist_element *rl, const VCN first_vcn,
88 const VCN last_vcn);
81 89
82extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, 90extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
83 const int dst_len, const runlist_element *rl, 91 const int dst_len, const runlist_element *rl,
84 const VCN start_vcn, VCN *const stop_vcn); 92 const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn);
85 93
86extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, 94extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
87 runlist *const runlist, const s64 new_length); 95 runlist *const runlist, const s64 new_length);
88 96
97#endif /* NTFS_RW */
98
89#endif /* _LINUX_NTFS_RUNLIST_H */ 99#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 212a3d0f2073..41aa8eb6755b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. 2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2001,2002 Richard Russon 5 * Copyright (c) 2001,2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -34,14 +34,16 @@
34#include "sysctl.h" 34#include "sysctl.h"
35#include "logfile.h" 35#include "logfile.h"
36#include "quota.h" 36#include "quota.h"
37#include "usnjrnl.h"
37#include "dir.h" 38#include "dir.h"
38#include "debug.h" 39#include "debug.h"
39#include "index.h" 40#include "index.h"
40#include "aops.h" 41#include "aops.h"
42#include "layout.h"
41#include "malloc.h" 43#include "malloc.h"
42#include "ntfs.h" 44#include "ntfs.h"
43 45
44/* Number of mounted file systems which have compression enabled. */ 46/* Number of mounted filesystems which have compression enabled. */
45static unsigned long ntfs_nr_compression_users; 47static unsigned long ntfs_nr_compression_users;
46 48
47/* A global default upcase table and a corresponding reference count. */ 49/* A global default upcase table and a corresponding reference count. */
@@ -102,7 +104,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
102 gid_t gid = (gid_t)-1; 104 gid_t gid = (gid_t)-1;
103 mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; 105 mode_t fmask = (mode_t)-1, dmask = (mode_t)-1;
104 int mft_zone_multiplier = -1, on_errors = -1; 106 int mft_zone_multiplier = -1, on_errors = -1;
105 int show_sys_files = -1, case_sensitive = -1; 107 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
106 struct nls_table *nls_map = NULL, *old_nls; 108 struct nls_table *nls_map = NULL, *old_nls;
107 109
108 /* I am lazy... (-8 */ 110 /* I am lazy... (-8 */
@@ -162,6 +164,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
162 else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) 164 else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
163 else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) 165 else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
164 else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) 166 else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive)
167 else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse)
165 else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, 168 else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors,
166 on_errors_arr) 169 on_errors_arr)
167 else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) 170 else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes"))
@@ -291,6 +294,21 @@ no_mount_options:
291 else 294 else
292 NVolClearCaseSensitive(vol); 295 NVolClearCaseSensitive(vol);
293 } 296 }
297 if (disable_sparse != -1) {
298 if (disable_sparse)
299 NVolClearSparseEnabled(vol);
300 else {
301 if (!NVolSparseEnabled(vol) &&
302 vol->major_ver && vol->major_ver < 3)
303 ntfs_warning(vol->sb, "Not enabling sparse "
304 "support due to NTFS volume "
305 "version %i.%i (need at least "
306 "version 3.0).", vol->major_ver,
307 vol->minor_ver);
308 else
309 NVolSetSparseEnabled(vol);
310 }
311 }
294 return TRUE; 312 return TRUE;
295needs_arg: 313needs_arg:
296 ntfs_error(vol->sb, "The %s option requires an argument.", p); 314 ntfs_error(vol->sb, "The %s option requires an argument.", p);
@@ -480,6 +498,12 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
480 NVolSetErrors(vol); 498 NVolSetErrors(vol);
481 return -EROFS; 499 return -EROFS;
482 } 500 }
501 if (!ntfs_stamp_usnjrnl(vol)) {
502 ntfs_error(sb, "Failed to stamp transation log "
503 "($UsnJrnl)%s", es);
504 NVolSetErrors(vol);
505 return -EROFS;
506 }
483 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { 507 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
484 /* Remounting read-only. */ 508 /* Remounting read-only. */
485 if (!NVolErrors(vol)) { 509 if (!NVolErrors(vol)) {
@@ -516,16 +540,19 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
516{ 540{
517 /* 541 /*
518 * Check that checksum == sum of u32 values from b to the checksum 542 * Check that checksum == sum of u32 values from b to the checksum
519 * field. If checksum is zero, no checking is done. 543 * field. If checksum is zero, no checking is done. We will work when
544 * the checksum test fails, since some utilities update the boot sector
545 * ignoring the checksum which leaves the checksum out-of-date. We
546 * report a warning if this is the case.
520 */ 547 */
521 if ((void*)b < (void*)&b->checksum && b->checksum) { 548 if ((void*)b < (void*)&b->checksum && b->checksum && !silent) {
522 le32 *u; 549 le32 *u;
523 u32 i; 550 u32 i;
524 551
525 for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u) 552 for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u)
526 i += le32_to_cpup(u); 553 i += le32_to_cpup(u);
527 if (le32_to_cpu(b->checksum) != i) 554 if (le32_to_cpu(b->checksum) != i)
528 goto not_ntfs; 555 ntfs_warning(sb, "Invalid boot sector checksum.");
529 } 556 }
530 /* Check OEMidentifier is "NTFS " */ 557 /* Check OEMidentifier is "NTFS " */
531 if (b->oem_id != magicNTFS) 558 if (b->oem_id != magicNTFS)
@@ -541,9 +568,9 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
541 default: 568 default:
542 goto not_ntfs; 569 goto not_ntfs;
543 } 570 }
544 /* Check the cluster size is not above 65536 bytes. */ 571 /* Check the cluster size is not above the maximum (64kiB). */
545 if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) * 572 if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
546 b->bpb.sectors_per_cluster > 0x10000) 573 b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE)
547 goto not_ntfs; 574 goto not_ntfs;
548 /* Check reserved/unused fields are really zero. */ 575 /* Check reserved/unused fields are really zero. */
549 if (le16_to_cpu(b->bpb.reserved_sectors) || 576 if (le16_to_cpu(b->bpb.reserved_sectors) ||
@@ -575,7 +602,7 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
575 * many BIOSes will refuse to boot from a bootsector if the magic is 602 * many BIOSes will refuse to boot from a bootsector if the magic is
576 * incorrect, so we emit a warning. 603 * incorrect, so we emit a warning.
577 */ 604 */
578 if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) 605 if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
579 ntfs_warning(sb, "Invalid end of sector marker."); 606 ntfs_warning(sb, "Invalid end of sector marker.");
580 return TRUE; 607 return TRUE;
581not_ntfs: 608not_ntfs:
@@ -967,6 +994,7 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
967 tmp_ni = NTFS_I(tmp_ino); 994 tmp_ni = NTFS_I(tmp_ino);
968 /* The $MFTMirr, like the $MFT is multi sector transfer protected. */ 995 /* The $MFTMirr, like the $MFT is multi sector transfer protected. */
969 NInoSetMstProtected(tmp_ni); 996 NInoSetMstProtected(tmp_ni);
997 NInoSetSparseDisabled(tmp_ni);
970 /* 998 /*
971 * Set up our little cheat allowing us to reuse the async read io 999 * Set up our little cheat allowing us to reuse the async read io
972 * completion handler for directories. 1000 * completion handler for directories.
@@ -990,12 +1018,12 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
990 */ 1018 */
991static BOOL check_mft_mirror(ntfs_volume *vol) 1019static BOOL check_mft_mirror(ntfs_volume *vol)
992{ 1020{
993 unsigned long index;
994 struct super_block *sb = vol->sb; 1021 struct super_block *sb = vol->sb;
995 ntfs_inode *mirr_ni; 1022 ntfs_inode *mirr_ni;
996 struct page *mft_page, *mirr_page; 1023 struct page *mft_page, *mirr_page;
997 u8 *kmft, *kmirr; 1024 u8 *kmft, *kmirr;
998 runlist_element *rl, rl2[2]; 1025 runlist_element *rl, rl2[2];
1026 pgoff_t index;
999 int mrecs_per_page, i; 1027 int mrecs_per_page, i;
1000 1028
1001 ntfs_debug("Entering."); 1029 ntfs_debug("Entering.");
@@ -1122,11 +1150,130 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
1122 /* ntfs_check_logfile() will have displayed error output. */ 1150 /* ntfs_check_logfile() will have displayed error output. */
1123 return FALSE; 1151 return FALSE;
1124 } 1152 }
1153 NInoSetSparseDisabled(NTFS_I(tmp_ino));
1125 vol->logfile_ino = tmp_ino; 1154 vol->logfile_ino = tmp_ino;
1126 ntfs_debug("Done."); 1155 ntfs_debug("Done.");
1127 return TRUE; 1156 return TRUE;
1128} 1157}
1129 1158
1159#define NTFS_HIBERFIL_HEADER_SIZE 4096
1160
1161/**
1162 * check_windows_hibernation_status - check if Windows is suspended on a volume
1163 * @vol: ntfs super block of device to check
1164 *
1165 * Check if Windows is hibernated on the ntfs volume @vol. This is done by
1166 * looking for the file hiberfil.sys in the root directory of the volume. If
1167 * the file is not present Windows is definitely not suspended.
1168 *
1169 * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
1170 * definitely suspended (this volume is not the system volume). Caveat: on a
1171 * system with many volumes it is possible that the < 4kiB check is bogus but
1172 * for now this should do fine.
1173 *
1174 * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
1175 * hiberfil header (which is the first 4kiB). If this begins with "hibr",
1176 * Windows is definitely suspended. If it is completely full of zeroes,
1177 * Windows is definitely not hibernated. Any other case is treated as if
1178 * Windows is suspended. This caters for the above mentioned caveat of a
1179 * system with many volumes where no "hibr" magic would be present and there is
1180 * no zero header.
1181 *
1182 * Return 0 if Windows is not hibernated on the volume, >0 if Windows is
1183 * hibernated on the volume, and -errno on error.
1184 */
1185static int check_windows_hibernation_status(ntfs_volume *vol)
1186{
1187 MFT_REF mref;
1188 struct inode *vi;
1189 ntfs_inode *ni;
1190 struct page *page;
1191 u32 *kaddr, *kend;
1192 ntfs_name *name = NULL;
1193 int ret = 1;
1194 static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
1195 const_cpu_to_le16('i'), const_cpu_to_le16('b'),
1196 const_cpu_to_le16('e'), const_cpu_to_le16('r'),
1197 const_cpu_to_le16('f'), const_cpu_to_le16('i'),
1198 const_cpu_to_le16('l'), const_cpu_to_le16('.'),
1199 const_cpu_to_le16('s'), const_cpu_to_le16('y'),
1200 const_cpu_to_le16('s'), 0 };
1201
1202 ntfs_debug("Entering.");
1203 /*
1204 * Find the inode number for the hibernation file by looking up the
1205 * filename hiberfil.sys in the root directory.
1206 */
1207 down(&vol->root_ino->i_sem);
1208 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
1209 &name);
1210 up(&vol->root_ino->i_sem);
1211 if (IS_ERR_MREF(mref)) {
1212 ret = MREF_ERR(mref);
1213 /* If the file does not exist, Windows is not hibernated. */
1214 if (ret == -ENOENT) {
1215 ntfs_debug("hiberfil.sys not present. Windows is not "
1216 "hibernated on the volume.");
1217 return 0;
1218 }
1219 /* A real error occured. */
1220 ntfs_error(vol->sb, "Failed to find inode number for "
1221 "hiberfil.sys.");
1222 return ret;
1223 }
1224 /* We do not care for the type of match that was found. */
1225 kfree(name);
1226 /* Get the inode. */
1227 vi = ntfs_iget(vol->sb, MREF(mref));
1228 if (IS_ERR(vi) || is_bad_inode(vi)) {
1229 if (!IS_ERR(vi))
1230 iput(vi);
1231 ntfs_error(vol->sb, "Failed to load hiberfil.sys.");
1232 return IS_ERR(vi) ? PTR_ERR(vi) : -EIO;
1233 }
1234 if (unlikely(i_size_read(vi) < NTFS_HIBERFIL_HEADER_SIZE)) {
1235 ntfs_debug("hiberfil.sys is smaller than 4kiB (0x%llx). "
1236 "Windows is hibernated on the volume. This "
1237 "is not the system volume.", i_size_read(vi));
1238 goto iput_out;
1239 }
1240 ni = NTFS_I(vi);
1241 page = ntfs_map_page(vi->i_mapping, 0);
1242 if (IS_ERR(page)) {
1243 ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
1244 ret = PTR_ERR(page);
1245 goto iput_out;
1246 }
1247 kaddr = (u32*)page_address(page);
1248 if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
1249 ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is "
1250 "hibernated on the volume. This is the "
1251 "system volume.");
1252 goto unm_iput_out;
1253 }
1254 kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
1255 do {
1256 if (unlikely(*kaddr)) {
1257 ntfs_debug("hiberfil.sys is larger than 4kiB "
1258 "(0x%llx), does not contain the "
1259 "\"hibr\" magic, and does not have a "
1260 "zero header. Windows is hibernated "
1261 "on the volume. This is not the "
1262 "system volume.", i_size_read(vi));
1263 goto unm_iput_out;
1264 }
1265 } while (++kaddr < kend);
1266 ntfs_debug("hiberfil.sys contains a zero header. Windows is not "
1267 "hibernated on the volume. This is the system "
1268 "volume.");
1269 ret = 0;
1270unm_iput_out:
1271 ntfs_unmap_page(page);
1272iput_out:
1273 iput(vi);
1274 return ret;
1275}
1276
1130/** 1277/**
1131 * load_and_init_quota - load and setup the quota file for a volume if present 1278 * load_and_init_quota - load and setup the quota file for a volume if present
1132 * @vol: ntfs super block describing device whose quota file to load 1279 * @vol: ntfs super block describing device whose quota file to load
@@ -1175,8 +1322,7 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1175 return FALSE; 1322 return FALSE;
1176 } 1323 }
1177 /* We do not care for the type of match that was found. */ 1324 /* We do not care for the type of match that was found. */
1178 if (name) 1325 kfree(name);
1179 kfree(name);
1180 /* Get the inode. */ 1326 /* Get the inode. */
1181 tmp_ino = ntfs_iget(vol->sb, MREF(mref)); 1327 tmp_ino = ntfs_iget(vol->sb, MREF(mref));
1182 if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { 1328 if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) {
@@ -1198,6 +1344,167 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1198} 1344}
1199 1345
1200/** 1346/**
1347 * load_and_init_usnjrnl - load and setup the transaction log if present
1348 * @vol: ntfs super block describing device whose usnjrnl file to load
1349 *
1350 * Return TRUE on success or FALSE on error.
1351 *
1352 * If $UsnJrnl is not present or in the process of being disabled, we set
1353 * NVolUsnJrnlStamped() and return success.
1354 *
1355 * If the $UsnJrnl $DATA/$J attribute has a size equal to the lowest valid usn,
1356 * i.e. transaction logging has only just been enabled or the journal has been
1357 * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
1358 * and return success.
1359 */
1360static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
1361{
1362 MFT_REF mref;
1363 struct inode *tmp_ino;
1364 ntfs_inode *tmp_ni;
1365 struct page *page;
1366 ntfs_name *name = NULL;
1367 USN_HEADER *uh;
1368 static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
1369 const_cpu_to_le16('U'), const_cpu_to_le16('s'),
1370 const_cpu_to_le16('n'), const_cpu_to_le16('J'),
1371 const_cpu_to_le16('r'), const_cpu_to_le16('n'),
1372 const_cpu_to_le16('l'), 0 };
1373 static ntfschar Max[5] = { const_cpu_to_le16('$'),
1374 const_cpu_to_le16('M'), const_cpu_to_le16('a'),
1375 const_cpu_to_le16('x'), 0 };
1376 static ntfschar J[3] = { const_cpu_to_le16('$'),
1377 const_cpu_to_le16('J'), 0 };
1378
1379 ntfs_debug("Entering.");
1380 /*
1381 * Find the inode number for the transaction log file by looking up the
1382 * filename $UsnJrnl in the extended system files directory $Extend.
1383 */
1384 down(&vol->extend_ino->i_sem);
1385 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
1386 &name);
1387 up(&vol->extend_ino->i_sem);
1388 if (IS_ERR_MREF(mref)) {
1389 /*
1390 * If the file does not exist, transaction logging is disabled,
1391 * just return success.
1392 */
1393 if (MREF_ERR(mref) == -ENOENT) {
1394 ntfs_debug("$UsnJrnl not present. Volume does not "
1395 "have transaction logging enabled.");
1396not_enabled:
1397 /*
1398 * No need to try to stamp the transaction log if
1399 * transaction logging is not enabled.
1400 */
1401 NVolSetUsnJrnlStamped(vol);
1402 return TRUE;
1403 }
1404 /* A real error occured. */
1405 ntfs_error(vol->sb, "Failed to find inode number for "
1406 "$UsnJrnl.");
1407 return FALSE;
1408 }
1409 /* We do not care for the type of match that was found. */
1410 kfree(name);
1411 /* Get the inode. */
1412 tmp_ino = ntfs_iget(vol->sb, MREF(mref));
1413 if (unlikely(IS_ERR(tmp_ino) || is_bad_inode(tmp_ino))) {
1414 if (!IS_ERR(tmp_ino))
1415 iput(tmp_ino);
1416 ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
1417 return FALSE;
1418 }
1419 vol->usnjrnl_ino = tmp_ino;
1420 /*
1421 * If the transaction log is in the process of being deleted, we can
1422 * ignore it.
1423 */
1424 if (unlikely(vol->vol_flags & VOLUME_DELETE_USN_UNDERWAY)) {
1425 ntfs_debug("$UsnJrnl in the process of being disabled. "
1426 "Volume does not have transaction logging "
1427 "enabled.");
1428 goto not_enabled;
1429 }
1430 /* Get the $DATA/$Max attribute. */
1431 tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, Max, 4);
1432 if (IS_ERR(tmp_ino)) {
1433 ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$Max "
1434 "attribute.");
1435 return FALSE;
1436 }
1437 vol->usnjrnl_max_ino = tmp_ino;
1438 if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
1439 ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
1440 "attribute (size is 0x%llx but should be at "
1441 "least 0x%x bytes).", i_size_read(tmp_ino),
1442 sizeof(USN_HEADER));
1443 return FALSE;
1444 }
1445 /* Get the $DATA/$J attribute. */
1446 tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, J, 2);
1447 if (IS_ERR(tmp_ino)) {
1448 ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$J "
1449 "attribute.");
1450 return FALSE;
1451 }
1452 vol->usnjrnl_j_ino = tmp_ino;
1453 /* Verify $J is non-resident and sparse. */
1454 tmp_ni = NTFS_I(vol->usnjrnl_j_ino);
1455 if (unlikely(!NInoNonResident(tmp_ni) || !NInoSparse(tmp_ni))) {
1456 ntfs_error(vol->sb, "$UsnJrnl/$DATA/$J attribute is resident "
1457 "and/or not sparse.");
1458 return FALSE;
1459 }
1460 /* Read the USN_HEADER from $DATA/$Max. */
1461 page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
1462 if (IS_ERR(page)) {
1463 ntfs_error(vol->sb, "Failed to read from $UsnJrnl/$DATA/$Max "
1464 "attribute.");
1465 return FALSE;
1466 }
1467 uh = (USN_HEADER*)page_address(page);
1468 /* Sanity check the $Max. */
1469 if (unlikely(sle64_to_cpu(uh->allocation_delta) >
1470 sle64_to_cpu(uh->maximum_size))) {
1471 ntfs_error(vol->sb, "Allocation delta (0x%llx) exceeds "
1472 "maximum size (0x%llx). $UsnJrnl is corrupt.",
1473 (long long)sle64_to_cpu(uh->allocation_delta),
1474 (long long)sle64_to_cpu(uh->maximum_size));
1475 ntfs_unmap_page(page);
1476 return FALSE;
1477 }
1478 /*
1479 * If the transaction log has been stamped and nothing has been written
1480 * to it since, we do not need to stamp it.
1481 */
1482 if (unlikely(sle64_to_cpu(uh->lowest_valid_usn) >=
1483 i_size_read(vol->usnjrnl_j_ino))) {
1484 if (likely(sle64_to_cpu(uh->lowest_valid_usn) ==
1485 i_size_read(vol->usnjrnl_j_ino))) {
1486 ntfs_unmap_page(page);
1487 ntfs_debug("$UsnJrnl is enabled but nothing has been "
1488 "logged since it was last stamped. "
1489 "Treating this as if the volume does "
1490 "not have transaction logging "
1491 "enabled.");
1492 goto not_enabled;
1493 }
1494 ntfs_error(vol->sb, "$UsnJrnl has lowest valid usn (0x%llx) "
1495 "which is out of bounds (0x%llx). $UsnJrnl "
1496 "is corrupt.",
1497 (long long)sle64_to_cpu(uh->lowest_valid_usn),
1498 i_size_read(vol->usnjrnl_j_ino));
1499 ntfs_unmap_page(page);
1500 return FALSE;
1501 }
1502 ntfs_unmap_page(page);
1503 ntfs_debug("Done.");
1504 return TRUE;
1505}
1506
1507/**
1201 * load_and_init_attrdef - load the attribute definitions table for a volume 1508 * load_and_init_attrdef - load the attribute definitions table for a volume
1202 * @vol: ntfs super block describing device whose attrdef to load 1509 * @vol: ntfs super block describing device whose attrdef to load
1203 * 1510 *
@@ -1205,10 +1512,11 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1205 */ 1512 */
1206static BOOL load_and_init_attrdef(ntfs_volume *vol) 1513static BOOL load_and_init_attrdef(ntfs_volume *vol)
1207{ 1514{
1515 loff_t i_size;
1208 struct super_block *sb = vol->sb; 1516 struct super_block *sb = vol->sb;
1209 struct inode *ino; 1517 struct inode *ino;
1210 struct page *page; 1518 struct page *page;
1211 unsigned long index, max_index; 1519 pgoff_t index, max_index;
1212 unsigned int size; 1520 unsigned int size;
1213 1521
1214 ntfs_debug("Entering."); 1522 ntfs_debug("Entering.");
@@ -1219,14 +1527,16 @@ static BOOL load_and_init_attrdef(ntfs_volume *vol)
1219 iput(ino); 1527 iput(ino);
1220 goto failed; 1528 goto failed;
1221 } 1529 }
1530 NInoSetSparseDisabled(NTFS_I(ino));
1222 /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */ 1531 /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */
1223 if (!ino->i_size || ino->i_size > 0x7fffffff) 1532 i_size = i_size_read(ino);
1533 if (i_size <= 0 || i_size > 0x7fffffff)
1224 goto iput_failed; 1534 goto iput_failed;
1225 vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(ino->i_size); 1535 vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(i_size);
1226 if (!vol->attrdef) 1536 if (!vol->attrdef)
1227 goto iput_failed; 1537 goto iput_failed;
1228 index = 0; 1538 index = 0;
1229 max_index = ino->i_size >> PAGE_CACHE_SHIFT; 1539 max_index = i_size >> PAGE_CACHE_SHIFT;
1230 size = PAGE_CACHE_SIZE; 1540 size = PAGE_CACHE_SIZE;
1231 while (index < max_index) { 1541 while (index < max_index) {
1232 /* Read the attrdef table and copy it into the linear buffer. */ 1542 /* Read the attrdef table and copy it into the linear buffer. */
@@ -1239,12 +1549,12 @@ read_partial_attrdef_page:
1239 ntfs_unmap_page(page); 1549 ntfs_unmap_page(page);
1240 }; 1550 };
1241 if (size == PAGE_CACHE_SIZE) { 1551 if (size == PAGE_CACHE_SIZE) {
1242 size = ino->i_size & ~PAGE_CACHE_MASK; 1552 size = i_size & ~PAGE_CACHE_MASK;
1243 if (size) 1553 if (size)
1244 goto read_partial_attrdef_page; 1554 goto read_partial_attrdef_page;
1245 } 1555 }
1246 vol->attrdef_size = ino->i_size; 1556 vol->attrdef_size = i_size;
1247 ntfs_debug("Read %llu bytes from $AttrDef.", ino->i_size); 1557 ntfs_debug("Read %llu bytes from $AttrDef.", i_size);
1248 iput(ino); 1558 iput(ino);
1249 return TRUE; 1559 return TRUE;
1250free_iput_failed: 1560free_iput_failed:
@@ -1267,10 +1577,11 @@ failed:
1267 */ 1577 */
1268static BOOL load_and_init_upcase(ntfs_volume *vol) 1578static BOOL load_and_init_upcase(ntfs_volume *vol)
1269{ 1579{
1580 loff_t i_size;
1270 struct super_block *sb = vol->sb; 1581 struct super_block *sb = vol->sb;
1271 struct inode *ino; 1582 struct inode *ino;
1272 struct page *page; 1583 struct page *page;
1273 unsigned long index, max_index; 1584 pgoff_t index, max_index;
1274 unsigned int size; 1585 unsigned int size;
1275 int i, max; 1586 int i, max;
1276 1587
@@ -1286,14 +1597,15 @@ static BOOL load_and_init_upcase(ntfs_volume *vol)
1286 * The upcase size must not be above 64k Unicode characters, must not 1597 * The upcase size must not be above 64k Unicode characters, must not
1287 * be zero and must be a multiple of sizeof(ntfschar). 1598 * be zero and must be a multiple of sizeof(ntfschar).
1288 */ 1599 */
1289 if (!ino->i_size || ino->i_size & (sizeof(ntfschar) - 1) || 1600 i_size = i_size_read(ino);
1290 ino->i_size > 64ULL * 1024 * sizeof(ntfschar)) 1601 if (!i_size || i_size & (sizeof(ntfschar) - 1) ||
1602 i_size > 64ULL * 1024 * sizeof(ntfschar))
1291 goto iput_upcase_failed; 1603 goto iput_upcase_failed;
1292 vol->upcase = (ntfschar*)ntfs_malloc_nofs(ino->i_size); 1604 vol->upcase = (ntfschar*)ntfs_malloc_nofs(i_size);
1293 if (!vol->upcase) 1605 if (!vol->upcase)
1294 goto iput_upcase_failed; 1606 goto iput_upcase_failed;
1295 index = 0; 1607 index = 0;
1296 max_index = ino->i_size >> PAGE_CACHE_SHIFT; 1608 max_index = i_size >> PAGE_CACHE_SHIFT;
1297 size = PAGE_CACHE_SIZE; 1609 size = PAGE_CACHE_SIZE;
1298 while (index < max_index) { 1610 while (index < max_index) {
1299 /* Read the upcase table and copy it into the linear buffer. */ 1611 /* Read the upcase table and copy it into the linear buffer. */
@@ -1306,13 +1618,13 @@ read_partial_upcase_page:
1306 ntfs_unmap_page(page); 1618 ntfs_unmap_page(page);
1307 }; 1619 };
1308 if (size == PAGE_CACHE_SIZE) { 1620 if (size == PAGE_CACHE_SIZE) {
1309 size = ino->i_size & ~PAGE_CACHE_MASK; 1621 size = i_size & ~PAGE_CACHE_MASK;
1310 if (size) 1622 if (size)
1311 goto read_partial_upcase_page; 1623 goto read_partial_upcase_page;
1312 } 1624 }
1313 vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS; 1625 vol->upcase_len = i_size >> UCHAR_T_SIZE_BITS;
1314 ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).", 1626 ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
1315 ino->i_size, 64 * 1024 * sizeof(ntfschar)); 1627 i_size, 64 * 1024 * sizeof(ntfschar));
1316 iput(ino); 1628 iput(ino);
1317 down(&ntfs_lock); 1629 down(&ntfs_lock);
1318 if (!default_upcase) { 1630 if (!default_upcase) {
@@ -1376,6 +1688,9 @@ static BOOL load_system_files(ntfs_volume *vol)
1376 MFT_RECORD *m; 1688 MFT_RECORD *m;
1377 VOLUME_INFORMATION *vi; 1689 VOLUME_INFORMATION *vi;
1378 ntfs_attr_search_ctx *ctx; 1690 ntfs_attr_search_ctx *ctx;
1691#ifdef NTFS_RW
1692 int err;
1693#endif /* NTFS_RW */
1379 1694
1380 ntfs_debug("Entering."); 1695 ntfs_debug("Entering.");
1381#ifdef NTFS_RW 1696#ifdef NTFS_RW
@@ -1435,7 +1750,8 @@ static BOOL load_system_files(ntfs_volume *vol)
1435 iput(vol->lcnbmp_ino); 1750 iput(vol->lcnbmp_ino);
1436 goto bitmap_failed; 1751 goto bitmap_failed;
1437 } 1752 }
1438 if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) { 1753 NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
1754 if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
1439 iput(vol->lcnbmp_ino); 1755 iput(vol->lcnbmp_ino);
1440bitmap_failed: 1756bitmap_failed:
1441 ntfs_error(sb, "Failed to load $Bitmap."); 1757 ntfs_error(sb, "Failed to load $Bitmap.");
@@ -1486,6 +1802,12 @@ get_ctx_vol_failed:
1486 unmap_mft_record(NTFS_I(vol->vol_ino)); 1802 unmap_mft_record(NTFS_I(vol->vol_ino));
1487 printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, 1803 printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
1488 vol->minor_ver); 1804 vol->minor_ver);
1805 if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
1806 ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
1807 "volume version %i.%i (need at least version "
1808 "3.0).", vol->major_ver, vol->minor_ver);
1809 NVolClearSparseEnabled(vol);
1810 }
1489#ifdef NTFS_RW 1811#ifdef NTFS_RW
1490 /* Make sure that no unsupported volume flags are set. */ 1812 /* Make sure that no unsupported volume flags are set. */
1491 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { 1813 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -1545,6 +1867,50 @@ get_ctx_vol_failed:
1545 /* This will prevent a read-write remount. */ 1867 /* This will prevent a read-write remount. */
1546 NVolSetErrors(vol); 1868 NVolSetErrors(vol);
1547 } 1869 }
1870#endif /* NTFS_RW */
1871 /* Get the root directory inode so we can do path lookups. */
1872 vol->root_ino = ntfs_iget(sb, FILE_root);
1873 if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
1874 if (!IS_ERR(vol->root_ino))
1875 iput(vol->root_ino);
1876 ntfs_error(sb, "Failed to load root directory.");
1877 goto iput_logfile_err_out;
1878 }
1879#ifdef NTFS_RW
1880 /*
1881 * Check if Windows is suspended to disk on the target volume. If it
1882 * is hibernated, we must not write *anything* to the disk so set
1883 * NVolErrors() without setting the dirty volume flag and mount
1884 * read-only. This will prevent read-write remounting and it will also
1885 * prevent all writes.
1886 */
1887 err = check_windows_hibernation_status(vol);
1888 if (unlikely(err)) {
1889 static const char *es1a = "Failed to determine if Windows is "
1890 "hibernated";
1891 static const char *es1b = "Windows is hibernated";
1892 static const char *es2 = ". Run chkdsk.";
1893 const char *es1;
1894
1895 es1 = err < 0 ? es1a : es1b;
1896 /* If a read-write mount, convert it to a read-only mount. */
1897 if (!(sb->s_flags & MS_RDONLY)) {
1898 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
1899 ON_ERRORS_CONTINUE))) {
1900 ntfs_error(sb, "%s and neither on_errors="
1901 "continue nor on_errors="
1902 "remount-ro was specified%s",
1903 es1, es2);
1904 goto iput_root_err_out;
1905 }
1906 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
1907 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1908 } else
1909 ntfs_warning(sb, "%s. Will not be able to remount "
1910 "read-write%s", es1, es2);
1911 /* This will prevent a read-write remount. */
1912 NVolSetErrors(vol);
1913 }
1548 /* If (still) a read-write mount, mark the volume dirty. */ 1914 /* If (still) a read-write mount, mark the volume dirty. */
1549 if (!(sb->s_flags & MS_RDONLY) && 1915 if (!(sb->s_flags & MS_RDONLY) &&
1550 ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { 1916 ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
@@ -1558,7 +1924,7 @@ get_ctx_vol_failed:
1558 ntfs_error(sb, "%s and neither on_errors=continue nor " 1924 ntfs_error(sb, "%s and neither on_errors=continue nor "
1559 "on_errors=remount-ro was specified%s", 1925 "on_errors=remount-ro was specified%s",
1560 es1, es2); 1926 es1, es2);
1561 goto iput_logfile_err_out; 1927 goto iput_root_err_out;
1562 } 1928 }
1563 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1929 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1564 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1930 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1585,7 +1951,7 @@ get_ctx_vol_failed:
1585 ntfs_error(sb, "%s and neither on_errors=continue nor " 1951 ntfs_error(sb, "%s and neither on_errors=continue nor "
1586 "on_errors=remount-ro was specified%s", 1952 "on_errors=remount-ro was specified%s",
1587 es1, es2); 1953 es1, es2);
1588 goto iput_logfile_err_out; 1954 goto iput_root_err_out;
1589 } 1955 }
1590 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1956 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1591 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1957 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1604,23 +1970,15 @@ get_ctx_vol_failed:
1604 ntfs_error(sb, "%s and neither on_errors=continue nor " 1970 ntfs_error(sb, "%s and neither on_errors=continue nor "
1605 "on_errors=remount-ro was specified%s", 1971 "on_errors=remount-ro was specified%s",
1606 es1, es2); 1972 es1, es2);
1607 goto iput_logfile_err_out; 1973 goto iput_root_err_out;
1608 } 1974 }
1609 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1975 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1610 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1976 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
1611 NVolSetErrors(vol); 1977 NVolSetErrors(vol);
1612 } 1978 }
1613#endif /* NTFS_RW */ 1979#endif /* NTFS_RW */
1614 /* Get the root directory inode. */
1615 vol->root_ino = ntfs_iget(sb, FILE_root);
1616 if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
1617 if (!IS_ERR(vol->root_ino))
1618 iput(vol->root_ino);
1619 ntfs_error(sb, "Failed to load root directory.");
1620 goto iput_logfile_err_out;
1621 }
1622 /* If on NTFS versions before 3.0, we are done. */ 1980 /* If on NTFS versions before 3.0, we are done. */
1623 if (vol->major_ver < 3) 1981 if (unlikely(vol->major_ver < 3))
1624 return TRUE; 1982 return TRUE;
1625 /* NTFS 3.0+ specific initialization. */ 1983 /* NTFS 3.0+ specific initialization. */
1626 /* Get the security descriptors inode. */ 1984 /* Get the security descriptors inode. */
@@ -1631,7 +1989,7 @@ get_ctx_vol_failed:
1631 ntfs_error(sb, "Failed to load $Secure."); 1989 ntfs_error(sb, "Failed to load $Secure.");
1632 goto iput_root_err_out; 1990 goto iput_root_err_out;
1633 } 1991 }
1634 // FIXME: Initialize security. 1992 // TODO: Initialize security.
1635 /* Get the extended system files' directory inode. */ 1993 /* Get the extended system files' directory inode. */
1636 vol->extend_ino = ntfs_iget(sb, FILE_Extend); 1994 vol->extend_ino = ntfs_iget(sb, FILE_Extend);
1637 if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) { 1995 if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
@@ -1682,10 +2040,60 @@ get_ctx_vol_failed:
1682 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2040 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
1683 NVolSetErrors(vol); 2041 NVolSetErrors(vol);
1684 } 2042 }
1685 // TODO: Delete or checkpoint the $UsnJrnl if it exists. 2043 /*
2044 * Find the transaction log file ($UsnJrnl), load it if present, check
2045 * it, and set it up.
2046 */
2047 if (!load_and_init_usnjrnl(vol)) {
2048 static const char *es1 = "Failed to load $UsnJrnl";
2049 static const char *es2 = ". Run chkdsk.";
2050
2051 /* If a read-write mount, convert it to a read-only mount. */
2052 if (!(sb->s_flags & MS_RDONLY)) {
2053 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2054 ON_ERRORS_CONTINUE))) {
2055 ntfs_error(sb, "%s and neither on_errors="
2056 "continue nor on_errors="
2057 "remount-ro was specified%s",
2058 es1, es2);
2059 goto iput_usnjrnl_err_out;
2060 }
2061 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
2062 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2063 } else
2064 ntfs_warning(sb, "%s. Will not be able to remount "
2065 "read-write%s", es1, es2);
2066 /* This will prevent a read-write remount. */
2067 NVolSetErrors(vol);
2068 }
2069 /* If (still) a read-write mount, stamp the transaction log. */
2070 if (!(sb->s_flags & MS_RDONLY) && !ntfs_stamp_usnjrnl(vol)) {
2071 static const char *es1 = "Failed to stamp transaction log "
2072 "($UsnJrnl)";
2073 static const char *es2 = ". Run chkdsk.";
2074
2075 /* Convert to a read-only mount. */
2076 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2077 ON_ERRORS_CONTINUE))) {
2078 ntfs_error(sb, "%s and neither on_errors=continue nor "
2079 "on_errors=remount-ro was specified%s",
2080 es1, es2);
2081 goto iput_usnjrnl_err_out;
2082 }
2083 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2084 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
2085 NVolSetErrors(vol);
2086 }
1686#endif /* NTFS_RW */ 2087#endif /* NTFS_RW */
1687 return TRUE; 2088 return TRUE;
1688#ifdef NTFS_RW 2089#ifdef NTFS_RW
2090iput_usnjrnl_err_out:
2091 if (vol->usnjrnl_j_ino)
2092 iput(vol->usnjrnl_j_ino);
2093 if (vol->usnjrnl_max_ino)
2094 iput(vol->usnjrnl_max_ino);
2095 if (vol->usnjrnl_ino)
2096 iput(vol->usnjrnl_ino);
1689iput_quota_err_out: 2097iput_quota_err_out:
1690 if (vol->quota_q_ino) 2098 if (vol->quota_q_ino)
1691 iput(vol->quota_q_ino); 2099 iput(vol->quota_q_ino);
@@ -1759,6 +2167,12 @@ static void ntfs_put_super(struct super_block *sb)
1759 2167
1760 /* NTFS 3.0+ specific. */ 2168 /* NTFS 3.0+ specific. */
1761 if (vol->major_ver >= 3) { 2169 if (vol->major_ver >= 3) {
2170 if (vol->usnjrnl_j_ino)
2171 ntfs_commit_inode(vol->usnjrnl_j_ino);
2172 if (vol->usnjrnl_max_ino)
2173 ntfs_commit_inode(vol->usnjrnl_max_ino);
2174 if (vol->usnjrnl_ino)
2175 ntfs_commit_inode(vol->usnjrnl_ino);
1762 if (vol->quota_q_ino) 2176 if (vol->quota_q_ino)
1763 ntfs_commit_inode(vol->quota_q_ino); 2177 ntfs_commit_inode(vol->quota_q_ino);
1764 if (vol->quota_ino) 2178 if (vol->quota_ino)
@@ -1814,6 +2228,18 @@ static void ntfs_put_super(struct super_block *sb)
1814 /* NTFS 3.0+ specific clean up. */ 2228 /* NTFS 3.0+ specific clean up. */
1815 if (vol->major_ver >= 3) { 2229 if (vol->major_ver >= 3) {
1816#ifdef NTFS_RW 2230#ifdef NTFS_RW
2231 if (vol->usnjrnl_j_ino) {
2232 iput(vol->usnjrnl_j_ino);
2233 vol->usnjrnl_j_ino = NULL;
2234 }
2235 if (vol->usnjrnl_max_ino) {
2236 iput(vol->usnjrnl_max_ino);
2237 vol->usnjrnl_max_ino = NULL;
2238 }
2239 if (vol->usnjrnl_ino) {
2240 iput(vol->usnjrnl_ino);
2241 vol->usnjrnl_ino = NULL;
2242 }
1817 if (vol->quota_q_ino) { 2243 if (vol->quota_q_ino) {
1818 iput(vol->quota_q_ino); 2244 iput(vol->quota_q_ino);
1819 vol->quota_q_ino = NULL; 2245 vol->quota_q_ino = NULL;
@@ -1959,8 +2385,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
1959 struct address_space *mapping = vol->lcnbmp_ino->i_mapping; 2385 struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
1960 filler_t *readpage = (filler_t*)mapping->a_ops->readpage; 2386 filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
1961 struct page *page; 2387 struct page *page;
1962 unsigned long index, max_index; 2388 pgoff_t index, max_index;
1963 unsigned int max_size;
1964 2389
1965 ntfs_debug("Entering."); 2390 ntfs_debug("Entering.");
1966 /* Serialize accesses to the cluster bitmap. */ 2391 /* Serialize accesses to the cluster bitmap. */
@@ -1972,11 +2397,10 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
1972 */ 2397 */
1973 max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> 2398 max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >>
1974 PAGE_CACHE_SHIFT; 2399 PAGE_CACHE_SHIFT;
1975 /* Use multiples of 4 bytes. */ 2400 /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
1976 max_size = PAGE_CACHE_SIZE >> 2; 2401 ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
1977 ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.", 2402 max_index, PAGE_CACHE_SIZE / 4);
1978 max_index, max_size); 2403 for (index = 0; index < max_index; index++) {
1979 for (index = 0UL; index < max_index; index++) {
1980 unsigned int i; 2404 unsigned int i;
1981 /* 2405 /*
1982 * Read the page from page cache, getting it from backing store 2406 * Read the page from page cache, getting it from backing store
@@ -2008,7 +2432,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2008 * the result as all out of range bytes are set to zero by 2432 * the result as all out of range bytes are set to zero by
2009 * ntfs_readpage(). 2433 * ntfs_readpage().
2010 */ 2434 */
2011 for (i = 0; i < max_size; i++) 2435 for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
2012 nr_free -= (s64)hweight32(kaddr[i]); 2436 nr_free -= (s64)hweight32(kaddr[i]);
2013 kunmap_atomic(kaddr, KM_USER0); 2437 kunmap_atomic(kaddr, KM_USER0);
2014 page_cache_release(page); 2438 page_cache_release(page);
@@ -2031,6 +2455,8 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2031/** 2455/**
2032 * __get_nr_free_mft_records - return the number of free inodes on a volume 2456 * __get_nr_free_mft_records - return the number of free inodes on a volume
2033 * @vol: ntfs volume for which to obtain free inode count 2457 * @vol: ntfs volume for which to obtain free inode count
2458 * @nr_free: number of mft records in filesystem
2459 * @max_index: maximum number of pages containing set bits
2034 * 2460 *
2035 * Calculate the number of free mft records (inodes) on the mounted NTFS 2461 * Calculate the number of free mft records (inodes) on the mounted NTFS
2036 * volume @vol. We actually calculate the number of mft records in use instead 2462 * volume @vol. We actually calculate the number of mft records in use instead
@@ -2043,32 +2469,20 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2043 * 2469 *
2044 * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing. 2470 * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing.
2045 */ 2471 */
2046static unsigned long __get_nr_free_mft_records(ntfs_volume *vol) 2472static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2473 s64 nr_free, const pgoff_t max_index)
2047{ 2474{
2048 s64 nr_free;
2049 u32 *kaddr; 2475 u32 *kaddr;
2050 struct address_space *mapping = vol->mftbmp_ino->i_mapping; 2476 struct address_space *mapping = vol->mftbmp_ino->i_mapping;
2051 filler_t *readpage = (filler_t*)mapping->a_ops->readpage; 2477 filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
2052 struct page *page; 2478 struct page *page;
2053 unsigned long index, max_index; 2479 pgoff_t index;
2054 unsigned int max_size;
2055 2480
2056 ntfs_debug("Entering."); 2481 ntfs_debug("Entering.");
2057 /* Number of mft records in file system (at this point in time). */ 2482 /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
2058 nr_free = vol->mft_ino->i_size >> vol->mft_record_size_bits;
2059 /*
2060 * Convert the maximum number of set bits into bytes rounded up, then
2061 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
2062 * have one full and one partial page max_index = 2.
2063 */
2064 max_index = ((((NTFS_I(vol->mft_ino)->initialized_size >>
2065 vol->mft_record_size_bits) + 7) >> 3) +
2066 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2067 /* Use multiples of 4 bytes. */
2068 max_size = PAGE_CACHE_SIZE >> 2;
2069 ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " 2483 ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
2070 "0x%x.", max_index, max_size); 2484 "0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
2071 for (index = 0UL; index < max_index; index++) { 2485 for (index = 0; index < max_index; index++) {
2072 unsigned int i; 2486 unsigned int i;
2073 /* 2487 /*
2074 * Read the page from page cache, getting it from backing store 2488 * Read the page from page cache, getting it from backing store
@@ -2100,7 +2514,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
2100 * the result as all out of range bytes are set to zero by 2514 * the result as all out of range bytes are set to zero by
2101 * ntfs_readpage(). 2515 * ntfs_readpage().
2102 */ 2516 */
2103 for (i = 0; i < max_size; i++) 2517 for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
2104 nr_free -= (s64)hweight32(kaddr[i]); 2518 nr_free -= (s64)hweight32(kaddr[i]);
2105 kunmap_atomic(kaddr, KM_USER0); 2519 kunmap_atomic(kaddr, KM_USER0);
2106 page_cache_release(page); 2520 page_cache_release(page);
@@ -2134,8 +2548,11 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
2134 */ 2548 */
2135static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) 2549static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
2136{ 2550{
2137 ntfs_volume *vol = NTFS_SB(sb);
2138 s64 size; 2551 s64 size;
2552 ntfs_volume *vol = NTFS_SB(sb);
2553 ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
2554 pgoff_t max_index;
2555 unsigned long flags;
2139 2556
2140 ntfs_debug("Entering."); 2557 ntfs_debug("Entering.");
2141 /* Type of filesystem. */ 2558 /* Type of filesystem. */
@@ -2143,13 +2560,13 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
2143 /* Optimal transfer block size. */ 2560 /* Optimal transfer block size. */
2144 sfs->f_bsize = PAGE_CACHE_SIZE; 2561 sfs->f_bsize = PAGE_CACHE_SIZE;
2145 /* 2562 /*
2146 * Total data blocks in file system in units of f_bsize and since 2563 * Total data blocks in filesystem in units of f_bsize and since
2147 * inodes are also stored in data blocs ($MFT is a file) this is just 2564 * inodes are also stored in data blocs ($MFT is a file) this is just
2148 * the total clusters. 2565 * the total clusters.
2149 */ 2566 */
2150 sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >> 2567 sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
2151 PAGE_CACHE_SHIFT; 2568 PAGE_CACHE_SHIFT;
2152 /* Free data blocks in file system in units of f_bsize. */ 2569 /* Free data blocks in filesystem in units of f_bsize. */
2153 size = get_nr_free_clusters(vol) << vol->cluster_size_bits >> 2570 size = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
2154 PAGE_CACHE_SHIFT; 2571 PAGE_CACHE_SHIFT;
2155 if (size < 0LL) 2572 if (size < 0LL)
@@ -2158,17 +2575,27 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
2158 sfs->f_bavail = sfs->f_bfree = size; 2575 sfs->f_bavail = sfs->f_bfree = size;
2159 /* Serialize accesses to the inode bitmap. */ 2576 /* Serialize accesses to the inode bitmap. */
2160 down_read(&vol->mftbmp_lock); 2577 down_read(&vol->mftbmp_lock);
2161 /* Number of inodes in file system (at this point in time). */ 2578 read_lock_irqsave(&mft_ni->size_lock, flags);
2162 sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits; 2579 size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
2580 /*
2581 * Convert the maximum number of set bits into bytes rounded up, then
2582 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
2583 * have one full and one partial page max_index = 2.
2584 */
2585 max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
2586 + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2587 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2588 /* Number of inodes in filesystem (at this point in time). */
2589 sfs->f_files = size;
2163 /* Free inodes in fs (based on current total count). */ 2590 /* Free inodes in fs (based on current total count). */
2164 sfs->f_ffree = __get_nr_free_mft_records(vol); 2591 sfs->f_ffree = __get_nr_free_mft_records(vol, size, max_index);
2165 up_read(&vol->mftbmp_lock); 2592 up_read(&vol->mftbmp_lock);
2166 /* 2593 /*
2167 * File system id. This is extremely *nix flavour dependent and even 2594 * File system id. This is extremely *nix flavour dependent and even
2168 * within Linux itself all fs do their own thing. I interpret this to 2595 * within Linux itself all fs do their own thing. I interpret this to
2169 * mean a unique id associated with the mounted fs and not the id 2596 * mean a unique id associated with the mounted fs and not the id
2170 * associated with the file system driver, the latter is already given 2597 * associated with the filesystem driver, the latter is already given
2171 * by the file system type in sfs->f_type. Thus we use the 64-bit 2598 * by the filesystem type in sfs->f_type. Thus we use the 64-bit
2172 * volume serial number splitting it into two 32-bit parts. We enter 2599 * volume serial number splitting it into two 32-bit parts. We enter
2173 * the least significant 32-bits in f_fsid[0] and the most significant 2600 * the least significant 32-bits in f_fsid[0] and the most significant
2174 * 32-bits in f_fsid[1]. 2601 * 32-bits in f_fsid[1].
@@ -2219,53 +2646,19 @@ static struct super_operations ntfs_sops = {
2219 proc. */ 2646 proc. */
2220}; 2647};
2221 2648
2222
2223/** 2649/**
2224 * Declarations for NTFS specific export operations (fs/ntfs/namei.c). 2650 * ntfs_fill_super - mount an ntfs filesystem
2225 */ 2651 * @sb: super block of ntfs filesystem to mount
2226extern struct dentry *ntfs_get_parent(struct dentry *child_dent);
2227extern struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh);
2228
2229/**
2230 * Export operations allowing NFS exporting of mounted NTFS partitions.
2231 *
2232 * We use the default ->decode_fh() and ->encode_fh() for now. Note that they
2233 * use 32 bits to store the inode number which is an unsigned long so on 64-bit
2234 * architectures is usually 64 bits so it would all fail horribly on huge
2235 * volumes. I guess we need to define our own encode and decode fh functions
2236 * that store 64-bit inode numbers at some point but for now we will ignore the
2237 * problem...
2238 *
2239 * We also use the default ->get_name() helper (used by ->decode_fh() via
2240 * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
2241 * independent.
2242 *
2243 * The default ->get_parent() just returns -EACCES so we have to provide our
2244 * own and the default ->get_dentry() is incompatible with NTFS due to not
2245 * allowing the inode number 0 which is used in NTFS for the system file $MFT
2246 * and due to using iget() whereas NTFS needs ntfs_iget().
2247 */
2248static struct export_operations ntfs_export_ops = {
2249 .get_parent = ntfs_get_parent, /* Find the parent of a given
2250 directory. */
2251 .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode
2252 given a file handle
2253 sub-fragment. */
2254};
2255
2256/**
2257 * ntfs_fill_super - mount an ntfs files system
2258 * @sb: super block of ntfs file system to mount
2259 * @opt: string containing the mount options 2652 * @opt: string containing the mount options
2260 * @silent: silence error output 2653 * @silent: silence error output
2261 * 2654 *
2262 * ntfs_fill_super() is called by the VFS to mount the device described by @sb 2655 * ntfs_fill_super() is called by the VFS to mount the device described by @sb
2263 * with the mount otions in @data with the NTFS file system. 2656 * with the mount otions in @data with the NTFS filesystem.
2264 * 2657 *
2265 * If @silent is true, remain silent even if errors are detected. This is used 2658 * If @silent is true, remain silent even if errors are detected. This is used
2266 * during bootup, when the kernel tries to mount the root file system with all 2659 * during bootup, when the kernel tries to mount the root filesystem with all
2267 * registered file systems one after the other until one succeeds. This implies 2660 * registered filesystems one after the other until one succeeds. This implies
2268 * that all file systems except the correct one will quite correctly and 2661 * that all filesystems except the correct one will quite correctly and
2269 * expectedly return an error, but nobody wants to see error messages when in 2662 * expectedly return an error, but nobody wants to see error messages when in
2270 * fact this is what is supposed to happen. 2663 * fact this is what is supposed to happen.
2271 * 2664 *
@@ -2292,39 +2685,25 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2292 return -ENOMEM; 2685 return -ENOMEM;
2293 } 2686 }
2294 /* Initialize ntfs_volume structure. */ 2687 /* Initialize ntfs_volume structure. */
2295 memset(vol, 0, sizeof(ntfs_volume)); 2688 *vol = (ntfs_volume) {
2296 vol->sb = sb; 2689 .sb = sb,
2297 vol->upcase = NULL; 2690 /*
2298 vol->attrdef = NULL; 2691 * Default is group and other don't have any access to files or
2299 vol->mft_ino = NULL; 2692 * directories while owner has full access. Further, files by
2300 vol->mftbmp_ino = NULL; 2693 * default are not executable but directories are of course
2694 * browseable.
2695 */
2696 .fmask = 0177,
2697 .dmask = 0077,
2698 };
2301 init_rwsem(&vol->mftbmp_lock); 2699 init_rwsem(&vol->mftbmp_lock);
2302#ifdef NTFS_RW
2303 vol->mftmirr_ino = NULL;
2304 vol->logfile_ino = NULL;
2305#endif /* NTFS_RW */
2306 vol->lcnbmp_ino = NULL;
2307 init_rwsem(&vol->lcnbmp_lock); 2700 init_rwsem(&vol->lcnbmp_lock);
2308 vol->vol_ino = NULL;
2309 vol->root_ino = NULL;
2310 vol->secure_ino = NULL;
2311 vol->extend_ino = NULL;
2312#ifdef NTFS_RW
2313 vol->quota_ino = NULL;
2314 vol->quota_q_ino = NULL;
2315#endif /* NTFS_RW */
2316 vol->nls_map = NULL;
2317
2318 /*
2319 * Default is group and other don't have any access to files or
2320 * directories while owner has full access. Further, files by default
2321 * are not executable but directories are of course browseable.
2322 */
2323 vol->fmask = 0177;
2324 vol->dmask = 0077;
2325 2701
2326 unlock_kernel(); 2702 unlock_kernel();
2327 2703
2704 /* By default, enable sparse support. */
2705 NVolSetSparseEnabled(vol);
2706
2328 /* Important to get the mount options dealt with now. */ 2707 /* Important to get the mount options dealt with now. */
2329 if (!parse_options(vol, (char*)opt)) 2708 if (!parse_options(vol, (char*)opt))
2330 goto err_out_now; 2709 goto err_out_now;
@@ -2347,7 +2726,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2347 } 2726 }
2348 2727
2349 /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ 2728 /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */
2350 vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS; 2729 vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
2730 NTFS_BLOCK_SIZE_BITS;
2351 2731
2352 /* Read the boot sector and return unlocked buffer head to it. */ 2732 /* Read the boot sector and return unlocked buffer head to it. */
2353 if (!(bh = read_ntfs_boot_sector(sb, silent))) { 2733 if (!(bh = read_ntfs_boot_sector(sb, silent))) {
@@ -2476,6 +2856,18 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2476 /* NTFS 3.0+ specific clean up. */ 2856 /* NTFS 3.0+ specific clean up. */
2477 if (vol->major_ver >= 3) { 2857 if (vol->major_ver >= 3) {
2478#ifdef NTFS_RW 2858#ifdef NTFS_RW
2859 if (vol->usnjrnl_j_ino) {
2860 iput(vol->usnjrnl_j_ino);
2861 vol->usnjrnl_j_ino = NULL;
2862 }
2863 if (vol->usnjrnl_max_ino) {
2864 iput(vol->usnjrnl_max_ino);
2865 vol->usnjrnl_max_ino = NULL;
2866 }
2867 if (vol->usnjrnl_ino) {
2868 iput(vol->usnjrnl_ino);
2869 vol->usnjrnl_ino = NULL;
2870 }
2479 if (vol->quota_q_ino) { 2871 if (vol->quota_q_ino) {
2480 iput(vol->quota_q_ino); 2872 iput(vol->quota_q_ino);
2481 vol->quota_q_ino = NULL; 2873 vol->quota_q_ino = NULL;
@@ -2581,7 +2973,7 @@ err_out_now:
2581 */ 2973 */
2582kmem_cache_t *ntfs_name_cache; 2974kmem_cache_t *ntfs_name_cache;
2583 2975
2584/* Slab caches for efficient allocation/deallocation of of inodes. */ 2976/* Slab caches for efficient allocation/deallocation of inodes. */
2585kmem_cache_t *ntfs_inode_cache; 2977kmem_cache_t *ntfs_inode_cache;
2586kmem_cache_t *ntfs_big_inode_cache; 2978kmem_cache_t *ntfs_big_inode_cache;
2587 2979
@@ -2705,7 +3097,7 @@ static int __init init_ntfs_fs(void)
2705 ntfs_debug("NTFS driver registered successfully."); 3097 ntfs_debug("NTFS driver registered successfully.");
2706 return 0; /* Success! */ 3098 return 0; /* Success! */
2707 } 3099 }
2708 printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n"); 3100 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
2709 3101
2710sysctl_err_out: 3102sysctl_err_out:
2711 kmem_cache_destroy(ntfs_big_inode_cache); 3103 kmem_cache_destroy(ntfs_big_inode_cache);
@@ -2719,7 +3111,7 @@ actx_err_out:
2719 kmem_cache_destroy(ntfs_index_ctx_cache); 3111 kmem_cache_destroy(ntfs_index_ctx_cache);
2720ictx_err_out: 3112ictx_err_out:
2721 if (!err) { 3113 if (!err) {
2722 printk(KERN_CRIT "NTFS: Aborting NTFS file system driver " 3114 printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver "
2723 "registration...\n"); 3115 "registration...\n");
2724 err = -ENOMEM; 3116 err = -ENOMEM;
2725 } 3117 }
@@ -2759,7 +3151,7 @@ static void __exit exit_ntfs_fs(void)
2759} 3151}
2760 3152
2761MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); 3153MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
2762MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2004 Anton Altaparmakov"); 3154MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov");
2763MODULE_VERSION(NTFS_VERSION); 3155MODULE_VERSION(NTFS_VERSION);
2764MODULE_LICENSE("GPL"); 3156MODULE_LICENSE("GPL");
2765#ifdef DEBUG 3157#ifdef DEBUG
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 75067e4f3036..1c23138d00b3 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -3,7 +3,7 @@
3 * the Linux-NTFS project. Adapted from the old NTFS driver, 3 * the Linux-NTFS project. Adapted from the old NTFS driver,
4 * Copyright (C) 1997 Martin von Löwis, Régis Duchesne 4 * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
5 * 5 *
6 * Copyright (c) 2002-2004 Anton Altaparmakov 6 * Copyright (c) 2002-2005 Anton Altaparmakov
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as published 9 * modify it under the terms of the GNU General Public License as published
@@ -67,7 +67,7 @@ int ntfs_sysctl(int add)
67 return -ENOMEM; 67 return -ENOMEM;
68#ifdef CONFIG_PROC_FS 68#ifdef CONFIG_PROC_FS
69 /* 69 /*
70 * If the proc file system is in use and we are a module, need 70 * If the proc filesystem is in use and we are a module, need
71 * to set the owner of our proc entry to our module. In the 71 * to set the owner of our proc entry to our module. In the
72 * non-modular case, THIS_MODULE is NULL, so this is ok. 72 * non-modular case, THIS_MODULE is NULL, so this is ok.
73 */ 73 */
diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h
index df749cc0aac8..c8064cae8f17 100644
--- a/fs/ntfs/sysctl.h
+++ b/fs/ntfs/sysctl.h
@@ -26,7 +26,7 @@
26 26
27#include <linux/config.h> 27#include <linux/config.h>
28 28
29#if (DEBUG && CONFIG_SYSCTL) 29#if defined(DEBUG) && defined(CONFIG_SYSCTL)
30 30
31extern int ntfs_sysctl(int add); 31extern int ntfs_sysctl(int add);
32 32
diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h
index a09a51dabe4e..01233989d5d1 100644
--- a/fs/ntfs/time.h
+++ b/fs/ntfs/time.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * time.h - NTFS time conversion functions. Part of the Linux-NTFS project. 2 * time.h - NTFS time conversion functions. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -87,7 +87,7 @@ static inline struct timespec ntfs2utc(const sle64 time)
87 struct timespec ts; 87 struct timespec ts;
88 88
89 /* Subtract the NTFS time offset. */ 89 /* Subtract the NTFS time offset. */
90 s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET; 90 u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET);
91 /* 91 /*
92 * Convert the time to 1-second intervals and the remainder to 92 * Convert the time to 1-second intervals and the remainder to
93 * 1-nano-second intervals. 93 * 1-nano-second intervals.
diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h
index 08a55aa53d4e..6e4a7e3343f2 100644
--- a/fs/ntfs/types.h
+++ b/fs/ntfs/types.h
@@ -2,7 +2,7 @@
2 * types.h - Defines for NTFS Linux kernel driver specific types. 2 * types.h - Defines for NTFS Linux kernel driver specific types.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published 8 * modify it under the terms of the GNU General Public License as published
@@ -53,6 +53,14 @@ typedef sle64 leLCN;
53typedef s64 LSN; 53typedef s64 LSN;
54typedef sle64 leLSN; 54typedef sle64 leLSN;
55 55
56/*
57 * The NTFS transaction log $UsnJrnl uses usn which are signed 64-bit values.
58 * We define our own type USN, to allow for type checking and better code
59 * readability.
60 */
61typedef s64 USN;
62typedef sle64 leUSN;
63
56typedef enum { 64typedef enum {
57 FALSE = 0, 65 FALSE = 0,
58 TRUE = 1 66 TRUE = 1
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 560b0ea255b0..19c42e231b44 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -264,7 +264,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
264 264
265 /* We don't trust outside sources. */ 265 /* We don't trust outside sources. */
266 if (ins) { 266 if (ins) {
267 ucs = (ntfschar*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); 267 ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
268 if (ucs) { 268 if (ucs) {
269 for (i = o = 0; i < ins_len; i += wc_len) { 269 for (i = o = 0; i < ins_len; i += wc_len) {
270 wc_len = nls->char2uni(ins + i, ins_len - i, 270 wc_len = nls->char2uni(ins + i, ins_len - i,
diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c
new file mode 100644
index 000000000000..77773240d139
--- /dev/null
+++ b/fs/ntfs/usnjrnl.c
@@ -0,0 +1,84 @@
1/*
2 * usnjrnl.h - NTFS kernel transaction log ($UsnJrnl) handling. Part of the
3 * Linux-NTFS project.
4 *
5 * Copyright (c) 2005 Anton Altaparmakov
6 *
7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published
9 * by the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program/include file is distributed in the hope that it will be
13 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program (in the main directory of the Linux-NTFS
19 * distribution in the file COPYING); if not, write to the Free Software
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifdef NTFS_RW
24
25#include <linux/fs.h>
26#include <linux/highmem.h>
27#include <linux/mm.h>
28
29#include "aops.h"
30#include "debug.h"
31#include "endian.h"
32#include "time.h"
33#include "types.h"
34#include "usnjrnl.h"
35#include "volume.h"
36
37/**
38 * ntfs_stamp_usnjrnl - stamp the transaction log ($UsnJrnl) on an ntfs volume
39 * @vol: ntfs volume on which to stamp the transaction log
40 *
41 * Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return
42 * TRUE on success and FALSE on error.
43 *
44 * This function assumes that the transaction log has already been loaded and
45 * consistency checked by a call to fs/ntfs/super.c::load_and_init_usnjrnl().
46 */
47BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol)
48{
49 ntfs_debug("Entering.");
50 if (likely(!NVolUsnJrnlStamped(vol))) {
51 sle64 stamp;
52 struct page *page;
53 USN_HEADER *uh;
54
55 page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
56 if (IS_ERR(page)) {
57 ntfs_error(vol->sb, "Failed to read from "
58 "$UsnJrnl/$DATA/$Max attribute.");
59 return FALSE;
60 }
61 uh = (USN_HEADER*)page_address(page);
62 stamp = get_current_ntfs_time();
63 ntfs_debug("Stamping transaction log ($UsnJrnl): old "
64 "journal_id 0x%llx, old lowest_valid_usn "
65 "0x%llx, new journal_id 0x%llx, new "
66 "lowest_valid_usn 0x%llx.",
67 (long long)sle64_to_cpu(uh->journal_id),
68 (long long)sle64_to_cpu(uh->lowest_valid_usn),
69 (long long)sle64_to_cpu(stamp),
70 i_size_read(vol->usnjrnl_j_ino));
71 uh->lowest_valid_usn =
72 cpu_to_sle64(i_size_read(vol->usnjrnl_j_ino));
73 uh->journal_id = stamp;
74 flush_dcache_page(page);
75 set_page_dirty(page);
76 ntfs_unmap_page(page);
77 /* Set the flag so we do not have to do it again on remount. */
78 NVolSetUsnJrnlStamped(vol);
79 }
80 ntfs_debug("Done.");
81 return TRUE;
82}
83
84#endif /* NTFS_RW */
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
new file mode 100644
index 000000000000..ff988b0deb45
--- /dev/null
+++ b/fs/ntfs/usnjrnl.h
@@ -0,0 +1,205 @@
1/*
2 * usnjrnl.h - Defines for NTFS kernel transaction log ($UsnJrnl) handling.
3 * Part of the Linux-NTFS project.
4 *
5 * Copyright (c) 2005 Anton Altaparmakov
6 *
7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published
9 * by the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program/include file is distributed in the hope that it will be
13 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program (in the main directory of the Linux-NTFS
19 * distribution in the file COPYING); if not, write to the Free Software
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef _LINUX_NTFS_USNJRNL_H
24#define _LINUX_NTFS_USNJRNL_H
25
26#ifdef NTFS_RW
27
28#include "types.h"
29#include "endian.h"
30#include "layout.h"
31#include "volume.h"
32
33/*
34 * Transaction log ($UsnJrnl) organization:
35 *
36 * The transaction log records whenever a file is modified in any way. So for
37 * example it will record that file "blah" was written to at a particular time
38 * but not what was written. If will record that a file was deleted or
39 * created, that a file was truncated, etc. See below for all the reason
40 * codes used.
41 *
42 * The transaction log is in the $Extend directory which is in the root
43 * directory of each volume. If it is not present it means transaction
44 * logging is disabled. If it is present it means transaction logging is
45 * either enabled or in the process of being disabled in which case we can
46 * ignore it as it will go away as soon as Windows gets its hands on it.
47 *
48 * To determine whether the transaction logging is enabled or in the process
49 * of being disabled, need to check the volume flags in the
50 * $VOLUME_INFORMATION attribute in the $Volume system file (which is present
51 * in the root directory and has a fixed mft record number, see layout.h).
52 * If the flag VOLUME_DELETE_USN_UNDERWAY is set it means the transaction log
53 * is in the process of being disabled and if this flag is clear it means the
54 * transaction log is enabled.
55 *
56 * The transaction log consists of two parts; the $DATA/$Max attribute as well
57 * as the $DATA/$J attribute. $Max is a header describing the transaction
58 * log whilst $J is the transaction log data itself as a sequence of variable
59 * sized USN_RECORDs (see below for all the structures).
60 *
61 * We do not care about transaction logging at this point in time but we still
62 * need to let windows know that the transaction log is out of date. To do
63 * this we need to stamp the transaction log. This involves setting the
64 * lowest_valid_usn field in the $DATA/$Max attribute to the usn to be used
65 * for the next added USN_RECORD to the $DATA/$J attribute as well as
66 * generating a new journal_id in $DATA/$Max.
67 *
68 * The journal_id is as of the current version (2.0) of the transaction log
69 * simply the 64-bit timestamp of when the journal was either created or last
70 * stamped.
71 *
72 * To determine the next usn there are two ways. The first is to parse
73 * $DATA/$J and to find the last USN_RECORD in it and to add its record_length
74 * to its usn (which is the byte offset in the $DATA/$J attribute). The
75 * second is simply to take the data size of the attribute. Since the usns
76 * are simply byte offsets into $DATA/$J, this is exactly the next usn. For
77 * obvious reasons we use the second method as it is much simpler and faster.
78 *
79 * As an aside, note that to actually disable the transaction log, one would
80 * need to set the VOLUME_DELETE_USN_UNDERWAY flag (see above), then go
81 * through all the mft records on the volume and set the usn field in their
82 * $STANDARD_INFORMATION attribute to zero. Once that is done, one would need
83 * to delete the transaction log file, i.e. \$Extent\$UsnJrnl, and finally,
84 * one would need to clear the VOLUME_DELETE_USN_UNDERWAY flag.
85 *
86 * Note that if a volume is unmounted whilst the transaction log is being
87 * disabled, the process will continue the next time the volume is mounted.
88 * This is why we can safely mount read-write when we see a transaction log
89 * in the process of being deleted.
90 */
91
92/* Some $UsnJrnl related constants. */
93#define UsnJrnlMajorVer 2
94#define UsnJrnlMinorVer 0
95
96/*
97 * $DATA/$Max attribute. This is (always?) resident and has a fixed size of
98 * 32 bytes. It contains the header describing the transaction log.
99 */
100typedef struct {
101/*Ofs*/
102/* 0*/sle64 maximum_size; /* The maximum on-disk size of the $DATA/$J
103 attribute. */
104/* 8*/sle64 allocation_delta; /* Number of bytes by which to increase the
105 size of the $DATA/$J attribute. */
106/*0x10*/sle64 journal_id; /* Current id of the transaction log. */
107/*0x18*/leUSN lowest_valid_usn; /* Lowest valid usn in $DATA/$J for the
108 current journal_id. */
109/* sizeof() = 32 (0x20) bytes */
110} __attribute__ ((__packed__)) USN_HEADER;
111
112/*
113 * Reason flags (32-bit). Cumulative flags describing the change(s) to the
114 * file since it was last opened. I think the names speak for themselves but
115 * if you disagree check out the descriptions in the Linux NTFS project NTFS
116 * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
117 */
118enum {
119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001),
120 USN_REASON_DATA_EXTEND = const_cpu_to_le32(0x00000002),
121 USN_REASON_DATA_TRUNCATION = const_cpu_to_le32(0x00000004),
122 USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010),
123 USN_REASON_NAMED_DATA_EXTEND = const_cpu_to_le32(0x00000020),
124 USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040),
125 USN_REASON_FILE_CREATE = const_cpu_to_le32(0x00000100),
126 USN_REASON_FILE_DELETE = const_cpu_to_le32(0x00000200),
127 USN_REASON_EA_CHANGE = const_cpu_to_le32(0x00000400),
128 USN_REASON_SECURITY_CHANGE = const_cpu_to_le32(0x00000800),
129 USN_REASON_RENAME_OLD_NAME = const_cpu_to_le32(0x00001000),
130 USN_REASON_RENAME_NEW_NAME = const_cpu_to_le32(0x00002000),
131 USN_REASON_INDEXABLE_CHANGE = const_cpu_to_le32(0x00004000),
132 USN_REASON_BASIC_INFO_CHANGE = const_cpu_to_le32(0x00008000),
133 USN_REASON_HARD_LINK_CHANGE = const_cpu_to_le32(0x00010000),
134 USN_REASON_COMPRESSION_CHANGE = const_cpu_to_le32(0x00020000),
135 USN_REASON_ENCRYPTION_CHANGE = const_cpu_to_le32(0x00040000),
136 USN_REASON_OBJECT_ID_CHANGE = const_cpu_to_le32(0x00080000),
137 USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000),
138 USN_REASON_STREAM_CHANGE = const_cpu_to_le32(0x00200000),
139 USN_REASON_CLOSE = const_cpu_to_le32(0x80000000),
140};
141
142typedef le32 USN_REASON_FLAGS;
143
144/*
145 * Source info flags (32-bit). Information about the source of the change(s)
146 * to the file. For detailed descriptions of what these mean, see the Linux
147 * NTFS project NTFS documentation:
148 * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
149 */
150enum {
151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001),
152 USN_SOURCE_AUXILIARY_DATA = const_cpu_to_le32(0x00000002),
153 USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004),
154};
155
156typedef le32 USN_SOURCE_INFO_FLAGS;
157
158/*
159 * $DATA/$J attribute. This is always non-resident, is marked as sparse, and
160 * is of variabled size. It consists of a sequence of variable size
161 * USN_RECORDS. The minimum allocated_size is allocation_delta as
162 * specified in $DATA/$Max. When the maximum_size specified in $DATA/$Max is
163 * exceeded by more than allocation_delta bytes, allocation_delta bytes are
164 * allocated and appended to the $DATA/$J attribute and an equal number of
165 * bytes at the beginning of the attribute are freed and made sparse. Note the
166 * making sparse only happens at volume checkpoints and hence the actual
167 * $DATA/$J size can exceed maximum_size + allocation_delta temporarily.
168 */
169typedef struct {
170/*Ofs*/
171/* 0*/le32 length; /* Byte size of this record (8-byte
172 aligned). */
173/* 4*/le16 major_ver; /* Major version of the transaction log used
174 for this record. */
175/* 6*/le16 minor_ver; /* Minor version of the transaction log used
176 for this record. */
177/* 8*/leMFT_REF mft_reference;/* The mft reference of the file (or
178 directory) described by this record. */
179/*0x10*/leMFT_REF parent_directory;/* The mft reference of the parent
180 directory of the file described by this
181 record. */
182/*0x18*/leUSN usn; /* The usn of this record. Equals the offset
183 within the $DATA/$J attribute. */
184/*0x20*/sle64 time; /* Time when this record was created. */
185/*0x28*/USN_REASON_FLAGS reason;/* Reason flags (see above). */
186/*0x2c*/USN_SOURCE_INFO_FLAGS source_info;/* Source info flags (see above). */
187/*0x30*/le32 security_id; /* File security_id copied from
188 $STANDARD_INFORMATION. */
189/*0x34*/FILE_ATTR_FLAGS file_attributes; /* File attributes copied from
190 $STANDARD_INFORMATION or $FILE_NAME (not
191 sure which). */
192/*0x38*/le16 file_name_size; /* Size of the file name in bytes. */
193/*0x3a*/le16 file_name_offset; /* Offset to the file name in bytes from the
194 start of this record. */
195/*0x3c*/ntfschar file_name[0]; /* Use when creating only. When reading use
196 file_name_offset to determine the location
197 of the name. */
198/* sizeof() = 60 (0x3c) bytes */
199} __attribute__ ((__packed__)) USN_RECORD;
200
201extern BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol);
202
203#endif /* NTFS_RW */
204
205#endif /* _LINUX_NTFS_USNJRNL_H */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 4b97fa8635a8..375cd20a9f61 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -2,7 +2,7 @@
2 * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part 2 * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
3 * of the Linux-NTFS project. 3 * of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -54,7 +54,7 @@ typedef struct {
54 mode_t dmask; /* The mask for directory 54 mode_t dmask; /* The mask for directory
55 permissions. */ 55 permissions. */
56 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ 56 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */
57 u8 on_errors; /* What to do on file system errors. */ 57 u8 on_errors; /* What to do on filesystem errors. */
58 /* NTFS bootsector provided information. */ 58 /* NTFS bootsector provided information. */
59 u16 sector_size; /* in bytes */ 59 u16 sector_size; /* in bytes */
60 u8 sector_size_bits; /* log2(sector_size) */ 60 u8 sector_size_bits; /* log2(sector_size) */
@@ -125,6 +125,10 @@ typedef struct {
125 /* $Quota stuff is NTFS3.0+ specific. Unused/NULL otherwise. */ 125 /* $Quota stuff is NTFS3.0+ specific. Unused/NULL otherwise. */
126 struct inode *quota_ino; /* The VFS inode of $Quota. */ 126 struct inode *quota_ino; /* The VFS inode of $Quota. */
127 struct inode *quota_q_ino; /* Attribute inode for $Quota/$Q. */ 127 struct inode *quota_q_ino; /* Attribute inode for $Quota/$Q. */
128 /* $UsnJrnl stuff is NTFS3.0+ specific. Unused/NULL otherwise. */
129 struct inode *usnjrnl_ino; /* The VFS inode of $UsnJrnl. */
130 struct inode *usnjrnl_max_ino; /* Attribute inode for $UsnJrnl/$Max. */
131 struct inode *usnjrnl_j_ino; /* Attribute inode for $UsnJrnl/$J. */
128#endif /* NTFS_RW */ 132#endif /* NTFS_RW */
129 struct nls_table *nls_map; 133 struct nls_table *nls_map;
130} ntfs_volume; 134} ntfs_volume;
@@ -141,6 +145,8 @@ typedef enum {
141 file names in WIN32 namespace. */ 145 file names in WIN32 namespace. */
142 NV_LogFileEmpty, /* 1: $LogFile journal is empty. */ 146 NV_LogFileEmpty, /* 1: $LogFile journal is empty. */
143 NV_QuotaOutOfDate, /* 1: $Quota is out of date. */ 147 NV_QuotaOutOfDate, /* 1: $Quota is out of date. */
148 NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */
149 NV_SparseEnabled, /* 1: May create sparse files. */
144} ntfs_volume_flags; 150} ntfs_volume_flags;
145 151
146/* 152/*
@@ -167,5 +173,7 @@ NVOL_FNS(ShowSystemFiles)
167NVOL_FNS(CaseSensitive) 173NVOL_FNS(CaseSensitive)
168NVOL_FNS(LogFileEmpty) 174NVOL_FNS(LogFileEmpty)
169NVOL_FNS(QuotaOutOfDate) 175NVOL_FNS(QuotaOutOfDate)
176NVOL_FNS(UsnJrnlStamped)
177NVOL_FNS(SparseEnabled)
170 178
171#endif /* _LINUX_NTFS_VOLUME_H */ 179#endif /* _LINUX_NTFS_VOLUME_H */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 1aaf2c7d44e6..d9f614a57731 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1980,7 +1980,17 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1980 out_inserted_sd: 1980 out_inserted_sd:
1981 inode->i_nlink = 0; 1981 inode->i_nlink = 0;
1982 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1982 th->t_trans_id = 0; /* so the caller can't use this handle later */
1983 iput(inode); 1983
1984 /* If we were inheriting an ACL, we need to release the lock so that
1985 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
1986 * code really needs to be reworked, but this will take care of it
1987 * for now. -jeffm */
1988 if (REISERFS_I(dir)->i_acl_default) {
1989 reiserfs_write_unlock_xattrs(dir->i_sb);
1990 iput(inode);
1991 reiserfs_write_lock_xattrs(dir->i_sb);
1992 } else
1993 iput(inode);
1984 return err; 1994 return err;
1985} 1995}
1986 1996
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c66c27ec4100..ca7989b04be3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -556,14 +556,14 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
556} 556}
557 557
558/* lock the current transaction */ 558/* lock the current transaction */
559inline static void lock_journal(struct super_block *p_s_sb) 559static inline void lock_journal(struct super_block *p_s_sb)
560{ 560{
561 PROC_INFO_INC(p_s_sb, journal.lock_journal); 561 PROC_INFO_INC(p_s_sb, journal.lock_journal);
562 down(&SB_JOURNAL(p_s_sb)->j_lock); 562 down(&SB_JOURNAL(p_s_sb)->j_lock);
563} 563}
564 564
565/* unlock the current transaction */ 565/* unlock the current transaction */
566inline static void unlock_journal(struct super_block *p_s_sb) 566static inline void unlock_journal(struct super_block *p_s_sb)
567{ 567{
568 up(&SB_JOURNAL(p_s_sb)->j_lock); 568 up(&SB_JOURNAL(p_s_sb)->j_lock);
569} 569}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e386d3db3051..87ac9dc8b381 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -39,7 +39,6 @@
39#include <linux/xattr.h> 39#include <linux/xattr.h>
40#include <linux/reiserfs_xattr.h> 40#include <linux/reiserfs_xattr.h>
41#include <linux/reiserfs_acl.h> 41#include <linux/reiserfs_acl.h>
42#include <linux/mbcache.h>
43#include <asm/uaccess.h> 42#include <asm/uaccess.h>
44#include <asm/checksum.h> 43#include <asm/checksum.h>
45#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 335288b9be0f..4013d7905e84 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -437,8 +437,8 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
437{ 437{
438 struct dentry *dir = kobj->dentry; 438 struct dentry *dir = kobj->dentry;
439 struct dentry *victim; 439 struct dentry *victim;
440 struct sysfs_dirent *sd; 440 struct inode * inode;
441 umode_t umode = (mode & S_IALLUGO) | S_IFREG; 441 struct iattr newattrs;
442 int res = -ENOENT; 442 int res = -ENOENT;
443 443
444 down(&dir->d_inode->i_sem); 444 down(&dir->d_inode->i_sem);
@@ -446,13 +446,15 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
446 if (!IS_ERR(victim)) { 446 if (!IS_ERR(victim)) {
447 if (victim->d_inode && 447 if (victim->d_inode &&
448 (victim->d_parent->d_inode == dir->d_inode)) { 448 (victim->d_parent->d_inode == dir->d_inode)) {
449 sd = victim->d_fsdata; 449 inode = victim->d_inode;
450 attr->mode = mode; 450 down(&inode->i_sem);
451 sd->s_mode = umode; 451 newattrs.ia_mode = (mode & S_IALLUGO) |
452 victim->d_inode->i_mode = umode; 452 (inode->i_mode & ~S_IALLUGO);
453 dput(victim); 453 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
454 res = 0; 454 res = notify_change(victim, &newattrs);
455 up(&inode->i_sem);
455 } 456 }
457 dput(victim);
456 } 458 }
457 up(&dir->d_inode->i_sem); 459 up(&dir->d_inode->i_sem);
458 460
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 8de13bafaa76..d727dc960634 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -85,7 +85,7 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
85 85
86 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 86 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
87 mode &= ~S_ISGID; 87 mode &= ~S_ISGID;
88 sd_iattr->ia_mode = mode; 88 sd_iattr->ia_mode = sd->s_mode = mode;
89 } 89 }
90 90
91 return error; 91 return error;