aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig39
-rw-r--r--fs/Makefile5
-rw-r--r--fs/anon_inodes.c7
-rw-r--r--fs/bad_inode.c6
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/binfmt_aout.c81
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cifsfs.c7
-rw-r--r--fs/cifs/cifsfs.h1
-rw-r--r--fs/cifs/fcntl.c118
-rw-r--r--fs/dcache.c25
-rw-r--r--fs/dcookies.c28
-rw-r--r--fs/devpts/inode.c472
-rw-r--r--fs/ecryptfs/inode.c3
-rw-r--r--fs/exec.c44
-rw-r--r--fs/ext2/ialloc.c6
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/namei.c15
-rw-r--r--fs/ext3/ialloc.c6
-rw-r--r--fs/ext3/inode.c7
-rw-r--r--fs/ext3/namei.c15
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/namei.c14
-rw-r--r--fs/fat/dir.c1
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/file_table.c10
-rw-r--r--fs/freevxfs/vxfs_inode.c4
-rw-r--r--fs/inode.c59
-rw-r--r--fs/jfs/jfs_inode.c29
-rw-r--r--fs/jfs/namei.c24
-rw-r--r--fs/namei.c115
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfsctl.c5
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/notify/Makefile2
-rw-r--r--fs/notify/dnotify/Kconfig10
-rw-r--r--fs/notify/dnotify/Makefile1
-rw-r--r--fs/notify/dnotify/dnotify.c (renamed from fs/dnotify.c)3
-rw-r--r--fs/notify/inotify/Kconfig27
-rw-r--r--fs/notify/inotify/Makefile2
-rw-r--r--fs/notify/inotify/inotify.c (renamed from fs/inotify.c)0
-rw-r--r--fs/notify/inotify/inotify_user.c (renamed from fs/inotify_user.c)2
-rw-r--r--fs/open.c5
-rw-r--r--fs/proc/stat.c11
-rw-r--r--fs/reiserfs/inode.c15
-rw-r--r--fs/reiserfs/namei.c8
-rw-r--r--fs/seq_file.c10
-rw-r--r--fs/sysv/inode.c6
-rw-r--r--fs/ubifs/budget.c208
-rw-r--r--fs/ubifs/commit.c25
-rw-r--r--fs/ubifs/compress.c18
-rw-r--r--fs/ubifs/debug.c265
-rw-r--r--fs/ubifs/debug.h117
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c4
-rw-r--r--fs/ubifs/key.h32
-rw-r--r--fs/ubifs/lprops.c14
-rw-r--r--fs/ubifs/lpt.c45
-rw-r--r--fs/ubifs/lpt_commit.c210
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/replay.c15
-rw-r--r--fs/ubifs/sb.c20
-rw-r--r--fs/ubifs/super.c255
-rw-r--r--fs/ubifs/tnc.c31
-rw-r--r--fs/ubifs/tnc_commit.c9
-rw-r--r--fs/ubifs/ubifs-media.h7
-rw-r--r--fs/ubifs/ubifs.h111
71 files changed, 1753 insertions, 937 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 522469a7eca3..ff0e81980207 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -270,44 +270,7 @@ config OCFS2_COMPAT_JBD
270 270
271endif # BLOCK 271endif # BLOCK
272 272
273config DNOTIFY 273source "fs/notify/Kconfig"
274 bool "Dnotify support"
275 default y
276 help
277 Dnotify is a directory-based per-fd file change notification system
278 that uses signals to communicate events to user-space. There exist
279 superior alternatives, but some applications may still rely on
280 dnotify.
281
282 If unsure, say Y.
283
284config INOTIFY
285 bool "Inotify file change notification support"
286 default y
287 ---help---
288 Say Y here to enable inotify support. Inotify is a file change
289 notification system and a replacement for dnotify. Inotify fixes
290 numerous shortcomings in dnotify and introduces several new features
291 including multiple file events, one-shot support, and unmount
292 notification.
293
294 For more information, see <file:Documentation/filesystems/inotify.txt>
295
296 If unsure, say Y.
297
298config INOTIFY_USER
299 bool "Inotify support for userspace"
300 depends on INOTIFY
301 default y
302 ---help---
303 Say Y here to enable inotify support for userspace, including the
304 associated system calls. Inotify allows monitoring of both files and
305 directories via a single open fd. Events are read from the file
306 descriptor, which is also select()- and poll()-able.
307
308 For more information, see <file:Documentation/filesystems/inotify.txt>
309
310 If unsure, say Y.
311 274
312config QUOTA 275config QUOTA
313 bool "Quota support" 276 bool "Quota support"
diff --git a/fs/Makefile b/fs/Makefile
index d9f8afe6f0c4..e6f423d1d228 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -20,8 +20,7 @@ obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o 22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
23obj-$(CONFIG_INOTIFY) += inotify.o 23obj-y += notify/
24obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
25obj-$(CONFIG_EPOLL) += eventpoll.o 24obj-$(CONFIG_EPOLL) += eventpoll.o
26obj-$(CONFIG_ANON_INODES) += anon_inodes.o 25obj-$(CONFIG_ANON_INODES) += anon_inodes.o
27obj-$(CONFIG_SIGNALFD) += signalfd.o 26obj-$(CONFIG_SIGNALFD) += signalfd.o
@@ -57,8 +56,6 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o
57obj-$(CONFIG_QFMT_V2) += quota_v2.o 56obj-$(CONFIG_QFMT_V2) += quota_v2.o
58obj-$(CONFIG_QUOTACTL) += quota.o 57obj-$(CONFIG_QUOTACTL) += quota.o
59 58
60obj-$(CONFIG_DNOTIFY) += dnotify.o
61
62obj-$(CONFIG_PROC_FS) += proc/ 59obj-$(CONFIG_PROC_FS) += proc/
63obj-y += partitions/ 60obj-y += partitions/
64obj-$(CONFIG_SYSFS) += sysfs/ 61obj-$(CONFIG_SYSFS) += sysfs/
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index c16d9be1b017..3bbdb9d02376 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
79 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
80 return -ENODEV; 80 return -ENODEV;
81 81
82 if (fops->owner && !try_module_get(fops->owner))
83 return -ENOENT;
84
82 error = get_unused_fd_flags(flags); 85 error = get_unused_fd_flags(flags);
83 if (error < 0) 86 if (error < 0)
84 return error; 87 goto err_module;
85 fd = error; 88 fd = error;
86 89
87 /* 90 /*
@@ -128,6 +131,8 @@ err_dput:
128 dput(dentry); 131 dput(dentry);
129err_put_unused_fd: 132err_put_unused_fd:
130 put_unused_fd(fd); 133 put_unused_fd(fd);
134err_module:
135 module_put(fops->owner);
131 return error; 136 return error;
132} 137}
133EXPORT_SYMBOL_GPL(anon_inode_getfd); 138EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f1538c03b1b..a05287a23f62 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags)
132 return -EIO; 132 return -EIO;
133} 133}
134 134
135static int bad_file_dir_notify(struct file *file, unsigned long arg)
136{
137 return -EIO;
138}
139
140static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) 135static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
141{ 136{
142 return -EIO; 137 return -EIO;
@@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops =
179 .sendpage = bad_file_sendpage, 174 .sendpage = bad_file_sendpage,
180 .get_unmapped_area = bad_file_get_unmapped_area, 175 .get_unmapped_area = bad_file_get_unmapped_area,
181 .check_flags = bad_file_check_flags, 176 .check_flags = bad_file_check_flags,
182 .dir_notify = bad_file_dir_notify,
183 .flock = bad_file_flock, 177 .flock = bad_file_flock,
184 .splice_write = bad_file_splice_write, 178 .splice_write = bad_file_splice_write,
185 .splice_read = bad_file_splice_read, 179 .splice_read = bad_file_splice_read,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b6dfee37c7b7..d06cb023ad02 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -378,7 +378,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
378 inode->i_size = 0; 378 inode->i_size = 0;
379 inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; 379 inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE;
380 strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink, 380 strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
381 BEFS_SYMLINK_LEN); 381 BEFS_SYMLINK_LEN - 1);
382 befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0';
382 } else { 383 } else {
383 int num_blks; 384 int num_blks;
384 385
@@ -477,6 +478,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
477 kfree(link); 478 kfree(link);
478 befs_error(sb, "Failed to read entire long symlink"); 479 befs_error(sb, "Failed to read entire long symlink");
479 link = ERR_PTR(-EIO); 480 link = ERR_PTR(-EIO);
481 } else {
482 link[len - 1] = '\0';
480 } 483 }
481 } else { 484 } else {
482 link = befs_ino->i_data.symlink; 485 link = befs_ino->i_data.symlink;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index f1f3f4192a60..b639dcf7c778 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -95,92 +95,55 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
95 int has_dumped = 0; 95 int has_dumped = 0;
96 unsigned long dump_start, dump_size; 96 unsigned long dump_start, dump_size;
97 struct user dump; 97 struct user dump;
98#if defined(__alpha__) 98#ifdef __alpha__
99# define START_DATA(u) (u.start_data) 99# define START_DATA(u) (u.start_data)
100#elif defined(__arm__) 100#else
101# define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code) 101# define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code)
102#elif defined(__sparc__)
103# define START_DATA(u) (u.u_tsize)
104#elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__)
105# define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
106#endif 102#endif
107#ifdef __sparc__
108# define START_STACK(u) ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1))
109#else
110# define START_STACK(u) (u.start_stack) 103# define START_STACK(u) (u.start_stack)
111#endif
112 104
113 fs = get_fs(); 105 fs = get_fs();
114 set_fs(KERNEL_DS); 106 set_fs(KERNEL_DS);
115 has_dumped = 1; 107 has_dumped = 1;
116 current->flags |= PF_DUMPCORE; 108 current->flags |= PF_DUMPCORE;
117 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 109 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
118#ifndef __sparc__
119 dump.u_ar0 = offsetof(struct user, regs); 110 dump.u_ar0 = offsetof(struct user, regs);
120#endif
121 dump.signal = signr; 111 dump.signal = signr;
122 aout_dump_thread(regs, &dump); 112 aout_dump_thread(regs, &dump);
123 113
124/* If the size of the dump file exceeds the rlimit, then see what would happen 114/* If the size of the dump file exceeds the rlimit, then see what would happen
125 if we wrote the stack, but not the data area. */ 115 if we wrote the stack, but not the data area. */
126#ifdef __sparc__
127 if ((dump.u_dsize + dump.u_ssize) > limit)
128 dump.u_dsize = 0;
129#else
130 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) 116 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
131 dump.u_dsize = 0; 117 dump.u_dsize = 0;
132#endif
133 118
134/* Make sure we have enough room to write the stack and data areas. */ 119/* Make sure we have enough room to write the stack and data areas. */
135#ifdef __sparc__
136 if (dump.u_ssize > limit)
137 dump.u_ssize = 0;
138#else
139 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 120 if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
140 dump.u_ssize = 0; 121 dump.u_ssize = 0;
141#endif
142 122
143/* make sure we actually have a data and stack area to dump */ 123/* make sure we actually have a data and stack area to dump */
144 set_fs(USER_DS); 124 set_fs(USER_DS);
145#ifdef __sparc__
146 if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize))
147 dump.u_dsize = 0;
148 if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize))
149 dump.u_ssize = 0;
150#else
151 if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) 125 if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
152 dump.u_dsize = 0; 126 dump.u_dsize = 0;
153 if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) 127 if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
154 dump.u_ssize = 0; 128 dump.u_ssize = 0;
155#endif
156 129
157 set_fs(KERNEL_DS); 130 set_fs(KERNEL_DS);
158/* struct user */ 131/* struct user */
159 DUMP_WRITE(&dump,sizeof(dump)); 132 DUMP_WRITE(&dump,sizeof(dump));
160/* Now dump all of the user data. Include malloced stuff as well */ 133/* Now dump all of the user data. Include malloced stuff as well */
161#ifndef __sparc__
162 DUMP_SEEK(PAGE_SIZE); 134 DUMP_SEEK(PAGE_SIZE);
163#endif
164/* now we start writing out the user space info */ 135/* now we start writing out the user space info */
165 set_fs(USER_DS); 136 set_fs(USER_DS);
166/* Dump the data area */ 137/* Dump the data area */
167 if (dump.u_dsize != 0) { 138 if (dump.u_dsize != 0) {
168 dump_start = START_DATA(dump); 139 dump_start = START_DATA(dump);
169#ifdef __sparc__
170 dump_size = dump.u_dsize;
171#else
172 dump_size = dump.u_dsize << PAGE_SHIFT; 140 dump_size = dump.u_dsize << PAGE_SHIFT;
173#endif
174 DUMP_WRITE(dump_start,dump_size); 141 DUMP_WRITE(dump_start,dump_size);
175 } 142 }
176/* Now prepare to dump the stack area */ 143/* Now prepare to dump the stack area */
177 if (dump.u_ssize != 0) { 144 if (dump.u_ssize != 0) {
178 dump_start = START_STACK(dump); 145 dump_start = START_STACK(dump);
179#ifdef __sparc__
180 dump_size = dump.u_ssize;
181#else
182 dump_size = dump.u_ssize << PAGE_SHIFT; 146 dump_size = dump.u_ssize << PAGE_SHIFT;
183#endif
184 DUMP_WRITE(dump_start,dump_size); 147 DUMP_WRITE(dump_start,dump_size);
185 } 148 }
186/* Finally dump the task struct. Not be used by gdb, but could be useful */ 149/* Finally dump the task struct. Not be used by gdb, but could be useful */
@@ -205,29 +168,24 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin
205 int envc = bprm->envc; 168 int envc = bprm->envc;
206 169
207 sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p); 170 sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
208#ifdef __sparc__
209 /* This imposes the proper stack alignment for a new process. */
210 sp = (void __user *) (((unsigned long) sp) & ~7);
211 if ((envc+argc+3)&1) --sp;
212#endif
213#ifdef __alpha__ 171#ifdef __alpha__
214/* whee.. test-programs are so much fun. */ 172/* whee.. test-programs are so much fun. */
215 put_user(0, --sp); 173 put_user(0, --sp);
216 put_user(0, --sp); 174 put_user(0, --sp);
217 if (bprm->loader) { 175 if (bprm->loader) {
218 put_user(0, --sp); 176 put_user(0, --sp);
219 put_user(0x3eb, --sp); 177 put_user(1003, --sp);
220 put_user(bprm->loader, --sp); 178 put_user(bprm->loader, --sp);
221 put_user(0x3ea, --sp); 179 put_user(1002, --sp);
222 } 180 }
223 put_user(bprm->exec, --sp); 181 put_user(bprm->exec, --sp);
224 put_user(0x3e9, --sp); 182 put_user(1001, --sp);
225#endif 183#endif
226 sp -= envc+1; 184 sp -= envc+1;
227 envp = (char __user * __user *) sp; 185 envp = (char __user * __user *) sp;
228 sp -= argc+1; 186 sp -= argc+1;
229 argv = (char __user * __user *) sp; 187 argv = (char __user * __user *) sp;
230#if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__) 188#ifndef __alpha__
231 put_user((unsigned long) envp,--sp); 189 put_user((unsigned long) envp,--sp);
232 put_user((unsigned long) argv,--sp); 190 put_user((unsigned long) argv,--sp);
233#endif 191#endif
@@ -300,13 +258,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
300 return retval; 258 return retval;
301 259
302 /* OK, This is the point of no return */ 260 /* OK, This is the point of no return */
303#if defined(__alpha__) 261#ifdef __alpha__
304 SET_AOUT_PERSONALITY(bprm, ex); 262 SET_AOUT_PERSONALITY(bprm, ex);
305#elif defined(__sparc__)
306 set_personality(PER_SUNOS);
307#if !defined(__sparc_v9__)
308 memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
309#endif
310#else 263#else
311 set_personality(PER_LINUX); 264 set_personality(PER_LINUX);
312#endif 265#endif
@@ -322,24 +275,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
322 275
323 install_exec_creds(bprm); 276 install_exec_creds(bprm);
324 current->flags &= ~PF_FORKNOEXEC; 277 current->flags &= ~PF_FORKNOEXEC;
325#ifdef __sparc__
326 if (N_MAGIC(ex) == NMAGIC) {
327 loff_t pos = fd_offset;
328 /* Fuck me plenty... */
329 /* <AOL></AOL> */
330 down_write(&current->mm->mmap_sem);
331 error = do_brk(N_TXTADDR(ex), ex.a_text);
332 up_write(&current->mm->mmap_sem);
333 bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
334 ex.a_text, &pos);
335 down_write(&current->mm->mmap_sem);
336 error = do_brk(N_DATADDR(ex), ex.a_data);
337 up_write(&current->mm->mmap_sem);
338 bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
339 ex.a_data, &pos);
340 goto beyond_if;
341 }
342#endif
343 278
344 if (N_MAGIC(ex) == OMAGIC) { 279 if (N_MAGIC(ex) == OMAGIC) {
345 unsigned long text_addr, map_size; 280 unsigned long text_addr, map_size;
@@ -347,7 +282,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
347 282
348 text_addr = N_TXTADDR(ex); 283 text_addr = N_TXTADDR(ex);
349 284
350#if defined(__alpha__) || defined(__sparc__) 285#ifdef __alpha__
351 pos = fd_offset; 286 pos = fd_offset;
352 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1; 287 map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
353#else 288#else
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99e0ae1a4c78..349a26c10001 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -326,12 +326,13 @@ static struct file_system_type bd_type = {
326 .kill_sb = kill_anon_super, 326 .kill_sb = kill_anon_super,
327}; 327};
328 328
329static struct vfsmount *bd_mnt __read_mostly; 329struct super_block *blockdev_superblock __read_mostly;
330struct super_block *blockdev_superblock;
331 330
332void __init bdev_cache_init(void) 331void __init bdev_cache_init(void)
333{ 332{
334 int err; 333 int err;
334 struct vfsmount *bd_mnt;
335
335 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 336 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
336 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 337 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
337 SLAB_MEM_SPREAD|SLAB_PANIC), 338 SLAB_MEM_SPREAD|SLAB_PANIC),
@@ -373,7 +374,7 @@ struct block_device *bdget(dev_t dev)
373 struct block_device *bdev; 374 struct block_device *bdev;
374 struct inode *inode; 375 struct inode *inode;
375 376
376 inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), 377 inode = iget5_locked(blockdev_superblock, hash(dev),
377 bdev_test, bdev_set, &dev); 378 bdev_test, bdev_set, &dev);
378 379
379 if (!inode) 380 if (!inode)
@@ -463,7 +464,7 @@ void bd_forget(struct inode *inode)
463 464
464 spin_lock(&bdev_lock); 465 spin_lock(&bdev_lock);
465 if (inode->i_bdev) { 466 if (inode->i_bdev) {
466 if (inode->i_sb != blockdev_superblock) 467 if (!sb_is_blkdev_sb(inode->i_sb))
467 bdev = inode->i_bdev; 468 bdev = inode->i_bdev;
468 __bd_forget(inode); 469 __bd_forget(inode);
469 } 470 }
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ba43fb346fb..9948c0030e86 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \ 8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o cifsacl.o 9 readdir.o ioctl.o sess.o export.o cifsacl.o
10 10
11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o 11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0005a194a75c..13ea53251dcf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = {
747#endif /* CONFIG_CIFS_POSIX */ 747#endif /* CONFIG_CIFS_POSIX */
748 748
749#ifdef CONFIG_CIFS_EXPERIMENTAL 749#ifdef CONFIG_CIFS_EXPERIMENTAL
750 .dir_notify = cifs_dir_notify,
751 .setlease = cifs_setlease, 750 .setlease = cifs_setlease,
752#endif /* CONFIG_CIFS_EXPERIMENTAL */ 751#endif /* CONFIG_CIFS_EXPERIMENTAL */
753}; 752};
@@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = {
768#endif /* CONFIG_CIFS_POSIX */ 767#endif /* CONFIG_CIFS_POSIX */
769 .llseek = cifs_llseek, 768 .llseek = cifs_llseek,
770#ifdef CONFIG_CIFS_EXPERIMENTAL 769#ifdef CONFIG_CIFS_EXPERIMENTAL
771 .dir_notify = cifs_dir_notify,
772 .setlease = cifs_setlease, 770 .setlease = cifs_setlease,
773#endif /* CONFIG_CIFS_EXPERIMENTAL */ 771#endif /* CONFIG_CIFS_EXPERIMENTAL */
774}; 772};
@@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = {
789#endif /* CONFIG_CIFS_POSIX */ 787#endif /* CONFIG_CIFS_POSIX */
790 788
791#ifdef CONFIG_CIFS_EXPERIMENTAL 789#ifdef CONFIG_CIFS_EXPERIMENTAL
792 .dir_notify = cifs_dir_notify,
793 .setlease = cifs_setlease, 790 .setlease = cifs_setlease,
794#endif /* CONFIG_CIFS_EXPERIMENTAL */ 791#endif /* CONFIG_CIFS_EXPERIMENTAL */
795}; 792};
@@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
809#endif /* CONFIG_CIFS_POSIX */ 806#endif /* CONFIG_CIFS_POSIX */
810 .llseek = cifs_llseek, 807 .llseek = cifs_llseek,
811#ifdef CONFIG_CIFS_EXPERIMENTAL 808#ifdef CONFIG_CIFS_EXPERIMENTAL
812 .dir_notify = cifs_dir_notify,
813 .setlease = cifs_setlease, 809 .setlease = cifs_setlease,
814#endif /* CONFIG_CIFS_EXPERIMENTAL */ 810#endif /* CONFIG_CIFS_EXPERIMENTAL */
815}; 811};
@@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = {
818 .readdir = cifs_readdir, 814 .readdir = cifs_readdir,
819 .release = cifs_closedir, 815 .release = cifs_closedir,
820 .read = generic_read_dir, 816 .read = generic_read_dir,
821#ifdef CONFIG_CIFS_EXPERIMENTAL
822 .dir_notify = cifs_dir_notify,
823#endif /* CONFIG_CIFS_EXPERIMENTAL */
824 .unlocked_ioctl = cifs_ioctl, 817 .unlocked_ioctl = cifs_ioctl,
825 .llseek = generic_file_llseek, 818 .llseek = generic_file_llseek,
826}; 819};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 2ce04c73d74e..7ac481841f87 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
76extern const struct file_operations cifs_dir_ops; 76extern const struct file_operations cifs_dir_ops;
77extern int cifs_dir_open(struct inode *inode, struct file *file); 77extern int cifs_dir_open(struct inode *inode, struct file *file);
78extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); 78extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
79extern int cifs_dir_notify(struct file *, unsigned long arg);
80 79
81/* Functions related to dir entries */ 80/* Functions related to dir entries */
82extern struct dentry_operations cifs_dentry_ops; 81extern struct dentry_operations cifs_dentry_ops;
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
deleted file mode 100644
index 5a57581eb4b2..000000000000
--- a/fs/cifs/fcntl.c
+++ /dev/null
@@ -1,118 +0,0 @@
1/*
2 * fs/cifs/fcntl.c
3 *
4 * vfs operations that deal with the file control API
5 *
6 * Copyright (C) International Business Machines Corp., 2003,2004
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 *
9 * This library is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published
11 * by the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23#include <linux/fs.h>
24#include <linux/stat.h>
25#include <linux/fcntl.h>
26#include "cifsglob.h"
27#include "cifsproto.h"
28#include "cifs_unicode.h"
29#include "cifs_debug.h"
30#include "cifsfs.h"
31
32static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
33{
34 __u32 cifs_ntfy_flags = 0;
35
36 /* No way on Linux VFS to ask to monitor xattr
37 changes (and no stream support either */
38 if (fcntl_notify_flags & DN_ACCESS)
39 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
40 if (fcntl_notify_flags & DN_MODIFY) {
41 /* What does this mean on directories? */
42 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
43 FILE_NOTIFY_CHANGE_SIZE;
44 }
45 if (fcntl_notify_flags & DN_CREATE) {
46 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
47 FILE_NOTIFY_CHANGE_LAST_WRITE;
48 }
49 if (fcntl_notify_flags & DN_DELETE)
50 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
51 if (fcntl_notify_flags & DN_RENAME) {
52 /* BB review this - checking various server behaviors */
53 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
54 FILE_NOTIFY_CHANGE_FILE_NAME;
55 }
56 if (fcntl_notify_flags & DN_ATTRIB) {
57 cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
58 FILE_NOTIFY_CHANGE_ATTRIBUTES;
59 }
60/* if (fcntl_notify_flags & DN_MULTISHOT) {
61 cifs_ntfy_flags |= ;
62 } */ /* BB fixme - not sure how to handle this with CIFS yet */
63
64 return cifs_ntfy_flags;
65}
66
67int cifs_dir_notify(struct file *file, unsigned long arg)
68{
69 int xid;
70 int rc = -EINVAL;
71 int oplock = 0;
72 struct cifs_sb_info *cifs_sb;
73 struct cifsTconInfo *pTcon;
74 char *full_path = NULL;
75 __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
76 __u16 netfid;
77
78 if (experimEnabled == 0)
79 return 0;
80
81 xid = GetXid();
82 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
83 pTcon = cifs_sb->tcon;
84
85 full_path = build_path_from_dentry(file->f_path.dentry);
86
87 if (full_path == NULL) {
88 rc = -ENOMEM;
89 } else {
90 cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
91 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
92 GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
93 &netfid, &oplock, NULL, cifs_sb->local_nls,
94 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
95 /* BB fixme - add this handle to a notify handle list */
96 if (rc) {
97 cFYI(1, ("Could not open directory for notify"));
98 } else {
99 filter = convert_to_cifs_notify_flags(arg);
100 if (filter != 0) {
101 rc = CIFSSMBNotify(xid, pTcon,
102 0 /* no subdirs */, netfid,
103 filter, file, arg & DN_MULTISHOT,
104 cifs_sb->local_nls);
105 } else {
106 rc = -EINVAL;
107 }
108 /* BB add code to close file eventually (at unmount
109 it would close automatically but may be a way
110 to do it easily when inode freed or when
111 notify info is cleared/changed */
112 cFYI(1, ("notify rc %d", rc));
113 }
114 }
115
116 FreeXid(xid);
117 return rc;
118}
diff --git a/fs/dcache.c b/fs/dcache.c
index a1d86c7f3e66..e88c23b85a32 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
34#include <linux/bootmem.h> 34#include <linux/bootmem.h>
35#include "internal.h" 35#include "internal.h"
36 36
37
38int sysctl_vfs_cache_pressure __read_mostly = 100; 37int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 39
@@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
948 dentry->d_op = NULL; 947 dentry->d_op = NULL;
949 dentry->d_fsdata = NULL; 948 dentry->d_fsdata = NULL;
950 dentry->d_mounted = 0; 949 dentry->d_mounted = 0;
951#ifdef CONFIG_PROFILING
952 dentry->d_cookie = NULL;
953#endif
954 INIT_HLIST_NODE(&dentry->d_hash); 950 INIT_HLIST_NODE(&dentry->d_hash);
955 INIT_LIST_HEAD(&dentry->d_lru); 951 INIT_LIST_HEAD(&dentry->d_lru);
956 INIT_LIST_HEAD(&dentry->d_subdirs); 952 INIT_LIST_HEAD(&dentry->d_subdirs);
@@ -1336,7 +1332,7 @@ err_out:
1336 * 1332 *
1337 * Searches the children of the parent dentry for the name in question. If 1333 * Searches the children of the parent dentry for the name in question. If
1338 * the dentry is found its reference count is incremented and the dentry 1334 * the dentry is found its reference count is incremented and the dentry
1339 * is returned. The caller must use d_put to free the entry when it has 1335 * is returned. The caller must use dput to free the entry when it has
1340 * finished using it. %NULL is returned on failure. 1336 * finished using it. %NULL is returned on failure.
1341 * 1337 *
1342 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1338 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
@@ -1620,8 +1616,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1620 */ 1616 */
1621 memcpy(dentry->d_iname, target->d_name.name, 1617 memcpy(dentry->d_iname, target->d_name.name,
1622 target->d_name.len + 1); 1618 target->d_name.len + 1);
1619 dentry->d_name.len = target->d_name.len;
1620 return;
1623 } 1621 }
1624 } 1622 }
1623 do_switch(dentry->d_name.len, target->d_name.len);
1625} 1624}
1626 1625
1627/* 1626/*
@@ -1681,7 +1680,6 @@ already_unhashed:
1681 1680
1682 /* Switch the names.. */ 1681 /* Switch the names.. */
1683 switch_names(dentry, target); 1682 switch_names(dentry, target);
1684 do_switch(dentry->d_name.len, target->d_name.len);
1685 do_switch(dentry->d_name.hash, target->d_name.hash); 1683 do_switch(dentry->d_name.hash, target->d_name.hash);
1686 1684
1687 /* ... and switch the parents */ 1685 /* ... and switch the parents */
@@ -1791,7 +1789,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1791 struct dentry *dparent, *aparent; 1789 struct dentry *dparent, *aparent;
1792 1790
1793 switch_names(dentry, anon); 1791 switch_names(dentry, anon);
1794 do_switch(dentry->d_name.len, anon->d_name.len);
1795 do_switch(dentry->d_name.hash, anon->d_name.hash); 1792 do_switch(dentry->d_name.hash, anon->d_name.hash);
1796 1793
1797 dparent = dentry->d_parent; 1794 dparent = dentry->d_parent;
@@ -1911,7 +1908,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1911 * Convert a dentry into an ASCII path name. If the entry has been deleted 1908 * Convert a dentry into an ASCII path name. If the entry has been deleted
1912 * the string " (deleted)" is appended. Note that this is ambiguous. 1909 * the string " (deleted)" is appended. Note that this is ambiguous.
1913 * 1910 *
1914 * Returns the buffer or an error code if the path was too long. 1911 * Returns a pointer into the buffer or an error code if the
1912 * path was too long.
1915 * 1913 *
1916 * "buflen" should be positive. Caller holds the dcache_lock. 1914 * "buflen" should be positive. Caller holds the dcache_lock.
1917 * 1915 *
@@ -1987,7 +1985,10 @@ Elong:
1987 * Convert a dentry into an ASCII path name. If the entry has been deleted 1985 * Convert a dentry into an ASCII path name. If the entry has been deleted
1988 * the string " (deleted)" is appended. Note that this is ambiguous. 1986 * the string " (deleted)" is appended. Note that this is ambiguous.
1989 * 1987 *
1990 * Returns the buffer or an error code if the path was too long. 1988 * Returns a pointer into the buffer or an error code if the path was
1989 * too long. Note: Callers should use the returned pointer, not the passed
1990 * in buffer, to use the name! The implementation often starts at an offset
1991 * into the buffer, and may leave 0 bytes at the start.
1991 * 1992 *
1992 * "buflen" should be positive. 1993 * "buflen" should be positive.
1993 */ 1994 */
@@ -2313,9 +2314,6 @@ static void __init dcache_init(void)
2313/* SLAB cache for __getname() consumers */ 2314/* SLAB cache for __getname() consumers */
2314struct kmem_cache *names_cachep __read_mostly; 2315struct kmem_cache *names_cachep __read_mostly;
2315 2316
2316/* SLAB cache for file structures */
2317struct kmem_cache *filp_cachep __read_mostly;
2318
2319EXPORT_SYMBOL(d_genocide); 2317EXPORT_SYMBOL(d_genocide);
2320 2318
2321void __init vfs_caches_init_early(void) 2319void __init vfs_caches_init_early(void)
@@ -2337,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages)
2337 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 2335 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
2338 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2336 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2339 2337
2340 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
2341 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2342
2343 dcache_init(); 2338 dcache_init();
2344 inode_init(); 2339 inode_init();
2345 files_init(mempages); 2340 files_init(mempages);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 855d4b1d619a..180e9fec4ad8 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path)
93{ 93{
94 struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache, 94 struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
95 GFP_KERNEL); 95 GFP_KERNEL);
96 struct dentry *d;
96 if (!dcs) 97 if (!dcs)
97 return NULL; 98 return NULL;
98 99
99 path->dentry->d_cookie = dcs; 100 d = path->dentry;
101 spin_lock(&d->d_lock);
102 d->d_flags |= DCACHE_COOKIE;
103 spin_unlock(&d->d_lock);
104
100 dcs->path = *path; 105 dcs->path = *path;
101 path_get(path); 106 path_get(path);
102 hash_dcookie(dcs); 107 hash_dcookie(dcs);
@@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie)
119 goto out; 124 goto out;
120 } 125 }
121 126
122 dcs = path->dentry->d_cookie; 127 if (path->dentry->d_flags & DCACHE_COOKIE) {
123 128 dcs = find_dcookie((unsigned long)path->dentry);
124 if (!dcs) 129 } else {
125 dcs = alloc_dcookie(path); 130 dcs = alloc_dcookie(path);
126 131 if (!dcs) {
127 if (!dcs) { 132 err = -ENOMEM;
128 err = -ENOMEM; 133 goto out;
129 goto out; 134 }
130 } 135 }
131 136
132 *cookie = dcookie_value(dcs); 137 *cookie = dcookie_value(dcs);
@@ -251,7 +256,12 @@ out_kmem:
251 256
252static void free_dcookie(struct dcookie_struct * dcs) 257static void free_dcookie(struct dcookie_struct * dcs)
253{ 258{
254 dcs->path.dentry->d_cookie = NULL; 259 struct dentry *d = dcs->path.dentry;
260
261 spin_lock(&d->d_lock);
262 d->d_flags &= ~DCACHE_COOKIE;
263 spin_unlock(&d->d_lock);
264
255 path_put(&dcs->path); 265 path_put(&dcs->path);
256 kmem_cache_free(dcookie_cache, dcs); 266 kmem_cache_free(dcookie_cache, dcs);
257} 267}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5d61b7c06e13..fff96e152c0c 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -27,25 +27,32 @@
27#define DEVPTS_SUPER_MAGIC 0x1cd1 27#define DEVPTS_SUPER_MAGIC 0x1cd1
28 28
29#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
30/*
31 * ptmx is a new node in /dev/pts and will be unused in legacy (single-
32 * instance) mode. To prevent surprises in user space, set permissions of
33 * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful
34 * permissions.
35 */
36#define DEVPTS_DEFAULT_PTMX_MODE 0000
30#define PTMX_MINOR 2 37#define PTMX_MINOR 2
31 38
32extern int pty_limit; /* Config limit on Unix98 ptys */ 39extern int pty_limit; /* Config limit on Unix98 ptys */
33static DEFINE_IDA(allocated_ptys);
34static DEFINE_MUTEX(allocated_ptys_lock); 40static DEFINE_MUTEX(allocated_ptys_lock);
35 41
36static struct vfsmount *devpts_mnt; 42static struct vfsmount *devpts_mnt;
37static struct dentry *devpts_root;
38 43
39static struct { 44struct pts_mount_opts {
40 int setuid; 45 int setuid;
41 int setgid; 46 int setgid;
42 uid_t uid; 47 uid_t uid;
43 gid_t gid; 48 gid_t gid;
44 umode_t mode; 49 umode_t mode;
45} config = {.mode = DEVPTS_DEFAULT_MODE}; 50 umode_t ptmxmode;
51 int newinstance;
52};
46 53
47enum { 54enum {
48 Opt_uid, Opt_gid, Opt_mode, 55 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,
49 Opt_err 56 Opt_err
50}; 57};
51 58
@@ -53,18 +60,50 @@ static const match_table_t tokens = {
53 {Opt_uid, "uid=%u"}, 60 {Opt_uid, "uid=%u"},
54 {Opt_gid, "gid=%u"}, 61 {Opt_gid, "gid=%u"},
55 {Opt_mode, "mode=%o"}, 62 {Opt_mode, "mode=%o"},
63#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
64 {Opt_ptmxmode, "ptmxmode=%o"},
65 {Opt_newinstance, "newinstance"},
66#endif
56 {Opt_err, NULL} 67 {Opt_err, NULL}
57}; 68};
58 69
59static int devpts_remount(struct super_block *sb, int *flags, char *data) 70struct pts_fs_info {
71 struct ida allocated_ptys;
72 struct pts_mount_opts mount_opts;
73 struct dentry *ptmx_dentry;
74};
75
76static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
77{
78 return sb->s_fs_info;
79}
80
81static inline struct super_block *pts_sb_from_inode(struct inode *inode)
82{
83#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
84 if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
85 return inode->i_sb;
86#endif
87 return devpts_mnt->mnt_sb;
88}
89
90#define PARSE_MOUNT 0
91#define PARSE_REMOUNT 1
92
93static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
60{ 94{
61 char *p; 95 char *p;
62 96
63 config.setuid = 0; 97 opts->setuid = 0;
64 config.setgid = 0; 98 opts->setgid = 0;
65 config.uid = 0; 99 opts->uid = 0;
66 config.gid = 0; 100 opts->gid = 0;
67 config.mode = DEVPTS_DEFAULT_MODE; 101 opts->mode = DEVPTS_DEFAULT_MODE;
102 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
103
104 /* newinstance makes sense only on initial mount */
105 if (op == PARSE_MOUNT)
106 opts->newinstance = 0;
68 107
69 while ((p = strsep(&data, ",")) != NULL) { 108 while ((p = strsep(&data, ",")) != NULL) {
70 substring_t args[MAX_OPT_ARGS]; 109 substring_t args[MAX_OPT_ARGS];
@@ -79,20 +118,32 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
79 case Opt_uid: 118 case Opt_uid:
80 if (match_int(&args[0], &option)) 119 if (match_int(&args[0], &option))
81 return -EINVAL; 120 return -EINVAL;
82 config.uid = option; 121 opts->uid = option;
83 config.setuid = 1; 122 opts->setuid = 1;
84 break; 123 break;
85 case Opt_gid: 124 case Opt_gid:
86 if (match_int(&args[0], &option)) 125 if (match_int(&args[0], &option))
87 return -EINVAL; 126 return -EINVAL;
88 config.gid = option; 127 opts->gid = option;
89 config.setgid = 1; 128 opts->setgid = 1;
90 break; 129 break;
91 case Opt_mode: 130 case Opt_mode:
92 if (match_octal(&args[0], &option)) 131 if (match_octal(&args[0], &option))
93 return -EINVAL; 132 return -EINVAL;
94 config.mode = option & S_IALLUGO; 133 opts->mode = option & S_IALLUGO;
134 break;
135#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
136 case Opt_ptmxmode:
137 if (match_octal(&args[0], &option))
138 return -EINVAL;
139 opts->ptmxmode = option & S_IALLUGO;
140 break;
141 case Opt_newinstance:
142 /* newinstance makes sense only on initial mount */
143 if (op == PARSE_MOUNT)
144 opts->newinstance = 1;
95 break; 145 break;
146#endif
96 default: 147 default:
97 printk(KERN_ERR "devpts: called with bogus options\n"); 148 printk(KERN_ERR "devpts: called with bogus options\n");
98 return -EINVAL; 149 return -EINVAL;
@@ -102,13 +153,108 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
102 return 0; 153 return 0;
103} 154}
104 155
156#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
157static int mknod_ptmx(struct super_block *sb)
158{
159 int mode;
160 int rc = -ENOMEM;
161 struct dentry *dentry;
162 struct inode *inode;
163 struct dentry *root = sb->s_root;
164 struct pts_fs_info *fsi = DEVPTS_SB(sb);
165 struct pts_mount_opts *opts = &fsi->mount_opts;
166
167 mutex_lock(&root->d_inode->i_mutex);
168
169 /* If we have already created ptmx node, return */
170 if (fsi->ptmx_dentry) {
171 rc = 0;
172 goto out;
173 }
174
175 dentry = d_alloc_name(root, "ptmx");
176 if (!dentry) {
177 printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n");
178 goto out;
179 }
180
181 /*
182 * Create a new 'ptmx' node in this mount of devpts.
183 */
184 inode = new_inode(sb);
185 if (!inode) {
186 printk(KERN_ERR "Unable to alloc inode for ptmx node\n");
187 dput(dentry);
188 goto out;
189 }
190
191 inode->i_ino = 2;
192 inode->i_uid = inode->i_gid = 0;
193 inode->i_blocks = 0;
194 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
195
196 mode = S_IFCHR|opts->ptmxmode;
197 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
198
199 d_add(dentry, inode);
200
201 fsi->ptmx_dentry = dentry;
202 rc = 0;
203
204 printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
205 inode->i_ino);
206out:
207 mutex_unlock(&root->d_inode->i_mutex);
208 return rc;
209}
210
211static void update_ptmx_mode(struct pts_fs_info *fsi)
212{
213 struct inode *inode;
214 if (fsi->ptmx_dentry) {
215 inode = fsi->ptmx_dentry->d_inode;
216 inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
217 }
218}
219#else
220static inline void update_ptmx_mode(struct pts_fs_info *fsi)
221{
222 return;
223}
224#endif
225
226static int devpts_remount(struct super_block *sb, int *flags, char *data)
227{
228 int err;
229 struct pts_fs_info *fsi = DEVPTS_SB(sb);
230 struct pts_mount_opts *opts = &fsi->mount_opts;
231
232 err = parse_mount_options(data, PARSE_REMOUNT, opts);
233
234 /*
235 * parse_mount_options() restores options to default values
236 * before parsing and may have changed ptmxmode. So, update the
237 * mode in the inode too. Bogus options don't fail the remount,
238 * so do this even on error return.
239 */
240 update_ptmx_mode(fsi);
241
242 return err;
243}
244
105static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) 245static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
106{ 246{
107 if (config.setuid) 247 struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
108 seq_printf(seq, ",uid=%u", config.uid); 248 struct pts_mount_opts *opts = &fsi->mount_opts;
109 if (config.setgid) 249
110 seq_printf(seq, ",gid=%u", config.gid); 250 if (opts->setuid)
111 seq_printf(seq, ",mode=%03o", config.mode); 251 seq_printf(seq, ",uid=%u", opts->uid);
252 if (opts->setgid)
253 seq_printf(seq, ",gid=%u", opts->gid);
254 seq_printf(seq, ",mode=%03o", opts->mode);
255#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
256 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
257#endif
112 258
113 return 0; 259 return 0;
114} 260}
@@ -119,10 +265,25 @@ static const struct super_operations devpts_sops = {
119 .show_options = devpts_show_options, 265 .show_options = devpts_show_options,
120}; 266};
121 267
268static void *new_pts_fs_info(void)
269{
270 struct pts_fs_info *fsi;
271
272 fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL);
273 if (!fsi)
274 return NULL;
275
276 ida_init(&fsi->allocated_ptys);
277 fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
278 fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
279
280 return fsi;
281}
282
122static int 283static int
123devpts_fill_super(struct super_block *s, void *data, int silent) 284devpts_fill_super(struct super_block *s, void *data, int silent)
124{ 285{
125 struct inode * inode; 286 struct inode *inode;
126 287
127 s->s_blocksize = 1024; 288 s->s_blocksize = 1024;
128 s->s_blocksize_bits = 10; 289 s->s_blocksize_bits = 10;
@@ -130,9 +291,13 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
130 s->s_op = &devpts_sops; 291 s->s_op = &devpts_sops;
131 s->s_time_gran = 1; 292 s->s_time_gran = 1;
132 293
294 s->s_fs_info = new_pts_fs_info();
295 if (!s->s_fs_info)
296 goto fail;
297
133 inode = new_inode(s); 298 inode = new_inode(s);
134 if (!inode) 299 if (!inode)
135 goto fail; 300 goto free_fsi;
136 inode->i_ino = 1; 301 inode->i_ino = 1;
137 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 302 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
138 inode->i_blocks = 0; 303 inode->i_blocks = 0;
@@ -142,27 +307,226 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
142 inode->i_fop = &simple_dir_operations; 307 inode->i_fop = &simple_dir_operations;
143 inode->i_nlink = 2; 308 inode->i_nlink = 2;
144 309
145 devpts_root = s->s_root = d_alloc_root(inode); 310 s->s_root = d_alloc_root(inode);
146 if (s->s_root) 311 if (s->s_root)
147 return 0; 312 return 0;
148 313
149 printk("devpts: get root dentry failed\n"); 314 printk(KERN_ERR "devpts: get root dentry failed\n");
150 iput(inode); 315 iput(inode);
316
317free_fsi:
318 kfree(s->s_fs_info);
151fail: 319fail:
152 return -ENOMEM; 320 return -ENOMEM;
153} 321}
154 322
323#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
324static int compare_init_pts_sb(struct super_block *s, void *p)
325{
326 if (devpts_mnt)
327 return devpts_mnt->mnt_sb == s;
328 return 0;
329}
330
331/*
332 * Safely parse the mount options in @data and update @opts.
333 *
334 * devpts ends up parsing options two times during mount, due to the
335 * two modes of operation it supports. The first parse occurs in
336 * devpts_get_sb() when determining the mode (single-instance or
337 * multi-instance mode). The second parse happens in devpts_remount()
338 * or new_pts_mount() depending on the mode.
339 *
340 * Parsing of options modifies the @data making subsequent parsing
341 * incorrect. So make a local copy of @data and parse it.
342 *
343 * Return: 0 On success, -errno on error
344 */
345static int safe_parse_mount_options(void *data, struct pts_mount_opts *opts)
346{
347 int rc;
348 void *datacp;
349
350 if (!data)
351 return 0;
352
353 /* Use kstrdup() ? */
354 datacp = kmalloc(PAGE_SIZE, GFP_KERNEL);
355 if (!datacp)
356 return -ENOMEM;
357
358 memcpy(datacp, data, PAGE_SIZE);
359 rc = parse_mount_options((char *)datacp, PARSE_MOUNT, opts);
360 kfree(datacp);
361
362 return rc;
363}
364
365/*
366 * Mount a new (private) instance of devpts. PTYs created in this
367 * instance are independent of the PTYs in other devpts instances.
368 */
369static int new_pts_mount(struct file_system_type *fs_type, int flags,
370 void *data, struct vfsmount *mnt)
371{
372 int err;
373 struct pts_fs_info *fsi;
374 struct pts_mount_opts *opts;
375
376 printk(KERN_NOTICE "devpts: newinstance mount\n");
377
378 err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
379 if (err)
380 return err;
381
382 fsi = DEVPTS_SB(mnt->mnt_sb);
383 opts = &fsi->mount_opts;
384
385 err = parse_mount_options(data, PARSE_MOUNT, opts);
386 if (err)
387 goto fail;
388
389 err = mknod_ptmx(mnt->mnt_sb);
390 if (err)
391 goto fail;
392
393 return 0;
394
395fail:
396 dput(mnt->mnt_sb->s_root);
397 deactivate_super(mnt->mnt_sb);
398 return err;
399}
400
401/*
402 * Check if 'newinstance' mount option was specified in @data.
403 *
404 * Return: -errno on error (eg: invalid mount options specified)
405 * : 1 if 'newinstance' mount option was specified
406 * : 0 if 'newinstance' mount option was NOT specified
407 */
408static int is_new_instance_mount(void *data)
409{
410 int rc;
411 struct pts_mount_opts opts;
412
413 if (!data)
414 return 0;
415
416 rc = safe_parse_mount_options(data, &opts);
417 if (!rc)
418 rc = opts.newinstance;
419
420 return rc;
421}
422
423/*
424 * get_init_pts_sb()
425 *
426 * This interface is needed to support multiple namespace semantics in
427 * devpts while preserving backward compatibility of the current 'single-
428 * namespace' semantics. i.e all mounts of devpts without the 'newinstance'
429 * mount option should bind to the initial kernel mount, like
430 * get_sb_single().
431 *
432 * Mounts with 'newinstance' option create a new private namespace.
433 *
434 * But for single-mount semantics, devpts cannot use get_sb_single(),
435 * because get_sb_single()/sget() find and use the super-block from
436 * the most recent mount of devpts. But that recent mount may be a
437 * 'newinstance' mount and get_sb_single() would pick the newinstance
438 * super-block instead of the initial super-block.
439 *
440 * This interface is identical to get_sb_single() except that it
441 * consistently selects the 'single-namespace' superblock even in the
442 * presence of the private namespace (i.e 'newinstance') super-blocks.
443 */
444static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
445 void *data, struct vfsmount *mnt)
446{
447 struct super_block *s;
448 int error;
449
450 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
451 if (IS_ERR(s))
452 return PTR_ERR(s);
453
454 if (!s->s_root) {
455 s->s_flags = flags;
456 error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
457 if (error) {
458 up_write(&s->s_umount);
459 deactivate_super(s);
460 return error;
461 }
462 s->s_flags |= MS_ACTIVE;
463 }
464 do_remount_sb(s, flags, data, 0);
465 return simple_set_mnt(mnt, s);
466}
467
468/*
469 * Mount or remount the initial kernel mount of devpts. This type of
470 * mount maintains the legacy, single-instance semantics, while the
471 * kernel still allows multiple-instances.
472 */
473static int init_pts_mount(struct file_system_type *fs_type, int flags,
474 void *data, struct vfsmount *mnt)
475{
476 int err;
477
478 err = get_init_pts_sb(fs_type, flags, data, mnt);
479 if (err)
480 return err;
481
482 err = mknod_ptmx(mnt->mnt_sb);
483 if (err) {
484 dput(mnt->mnt_sb->s_root);
485 deactivate_super(mnt->mnt_sb);
486 }
487
488 return err;
489}
490
155static int devpts_get_sb(struct file_system_type *fs_type, 491static int devpts_get_sb(struct file_system_type *fs_type,
156 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 492 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
157{ 493{
494 int new;
495
496 new = is_new_instance_mount(data);
497 if (new < 0)
498 return new;
499
500 if (new)
501 return new_pts_mount(fs_type, flags, data, mnt);
502
503 return init_pts_mount(fs_type, flags, data, mnt);
504}
505#else
506/*
507 * This supports only the legacy single-instance semantics (no
508 * multiple-instance semantics)
509 */
510static int devpts_get_sb(struct file_system_type *fs_type, int flags,
511 const char *dev_name, void *data, struct vfsmount *mnt)
512{
158 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); 513 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
159} 514}
515#endif
516
517static void devpts_kill_sb(struct super_block *sb)
518{
519 struct pts_fs_info *fsi = DEVPTS_SB(sb);
520
521 kfree(fsi);
522 kill_litter_super(sb);
523}
160 524
161static struct file_system_type devpts_fs_type = { 525static struct file_system_type devpts_fs_type = {
162 .owner = THIS_MODULE, 526 .owner = THIS_MODULE,
163 .name = "devpts", 527 .name = "devpts",
164 .get_sb = devpts_get_sb, 528 .get_sb = devpts_get_sb,
165 .kill_sb = kill_anon_super, 529 .kill_sb = devpts_kill_sb,
166}; 530};
167 531
168/* 532/*
@@ -172,16 +536,17 @@ static struct file_system_type devpts_fs_type = {
172 536
173int devpts_new_index(struct inode *ptmx_inode) 537int devpts_new_index(struct inode *ptmx_inode)
174{ 538{
539 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
540 struct pts_fs_info *fsi = DEVPTS_SB(sb);
175 int index; 541 int index;
176 int ida_ret; 542 int ida_ret;
177 543
178retry: 544retry:
179 if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) { 545 if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
180 return -ENOMEM; 546 return -ENOMEM;
181 }
182 547
183 mutex_lock(&allocated_ptys_lock); 548 mutex_lock(&allocated_ptys_lock);
184 ida_ret = ida_get_new(&allocated_ptys, &index); 549 ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
185 if (ida_ret < 0) { 550 if (ida_ret < 0) {
186 mutex_unlock(&allocated_ptys_lock); 551 mutex_unlock(&allocated_ptys_lock);
187 if (ida_ret == -EAGAIN) 552 if (ida_ret == -EAGAIN)
@@ -190,7 +555,7 @@ retry:
190 } 555 }
191 556
192 if (index >= pty_limit) { 557 if (index >= pty_limit) {
193 ida_remove(&allocated_ptys, index); 558 ida_remove(&fsi->allocated_ptys, index);
194 mutex_unlock(&allocated_ptys_lock); 559 mutex_unlock(&allocated_ptys_lock);
195 return -EIO; 560 return -EIO;
196 } 561 }
@@ -200,18 +565,26 @@ retry:
200 565
201void devpts_kill_index(struct inode *ptmx_inode, int idx) 566void devpts_kill_index(struct inode *ptmx_inode, int idx)
202{ 567{
568 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
569 struct pts_fs_info *fsi = DEVPTS_SB(sb);
570
203 mutex_lock(&allocated_ptys_lock); 571 mutex_lock(&allocated_ptys_lock);
204 ida_remove(&allocated_ptys, idx); 572 ida_remove(&fsi->allocated_ptys, idx);
205 mutex_unlock(&allocated_ptys_lock); 573 mutex_unlock(&allocated_ptys_lock);
206} 574}
207 575
208int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) 576int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
209{ 577{
210 int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ 578 /* tty layer puts index from devpts_new_index() in here */
579 int number = tty->index;
211 struct tty_driver *driver = tty->driver; 580 struct tty_driver *driver = tty->driver;
212 dev_t device = MKDEV(driver->major, driver->minor_start+number); 581 dev_t device = MKDEV(driver->major, driver->minor_start+number);
213 struct dentry *dentry; 582 struct dentry *dentry;
214 struct inode *inode = new_inode(devpts_mnt->mnt_sb); 583 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
584 struct inode *inode = new_inode(sb);
585 struct dentry *root = sb->s_root;
586 struct pts_fs_info *fsi = DEVPTS_SB(sb);
587 struct pts_mount_opts *opts = &fsi->mount_opts;
215 char s[12]; 588 char s[12];
216 589
217 /* We're supposed to be given the slave end of a pty */ 590 /* We're supposed to be given the slave end of a pty */
@@ -221,25 +594,25 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
221 if (!inode) 594 if (!inode)
222 return -ENOMEM; 595 return -ENOMEM;
223 596
224 inode->i_ino = number+2; 597 inode->i_ino = number + 3;
225 inode->i_uid = config.setuid ? config.uid : current_fsuid(); 598 inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
226 inode->i_gid = config.setgid ? config.gid : current_fsgid(); 599 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
227 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 600 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
228 init_special_inode(inode, S_IFCHR|config.mode, device); 601 init_special_inode(inode, S_IFCHR|opts->mode, device);
229 inode->i_private = tty; 602 inode->i_private = tty;
230 tty->driver_data = inode; 603 tty->driver_data = inode;
231 604
232 sprintf(s, "%d", number); 605 sprintf(s, "%d", number);
233 606
234 mutex_lock(&devpts_root->d_inode->i_mutex); 607 mutex_lock(&root->d_inode->i_mutex);
235 608
236 dentry = d_alloc_name(devpts_root, s); 609 dentry = d_alloc_name(root, s);
237 if (!IS_ERR(dentry)) { 610 if (!IS_ERR(dentry)) {
238 d_add(dentry, inode); 611 d_add(dentry, inode);
239 fsnotify_create(devpts_root->d_inode, dentry); 612 fsnotify_create(root->d_inode, dentry);
240 } 613 }
241 614
242 mutex_unlock(&devpts_root->d_inode->i_mutex); 615 mutex_unlock(&root->d_inode->i_mutex);
243 616
244 return 0; 617 return 0;
245} 618}
@@ -256,20 +629,27 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
256void devpts_pty_kill(struct tty_struct *tty) 629void devpts_pty_kill(struct tty_struct *tty)
257{ 630{
258 struct inode *inode = tty->driver_data; 631 struct inode *inode = tty->driver_data;
632 struct super_block *sb = pts_sb_from_inode(inode);
633 struct dentry *root = sb->s_root;
259 struct dentry *dentry; 634 struct dentry *dentry;
260 635
261 BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); 636 BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
262 637
263 mutex_lock(&devpts_root->d_inode->i_mutex); 638 mutex_lock(&root->d_inode->i_mutex);
264 639
265 dentry = d_find_alias(inode); 640 dentry = d_find_alias(inode);
266 if (dentry && !IS_ERR(dentry)) { 641 if (IS_ERR(dentry))
642 goto out;
643
644 if (dentry) {
267 inode->i_nlink--; 645 inode->i_nlink--;
268 d_delete(dentry); 646 d_delete(dentry);
269 dput(dentry); 647 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
270 } 648 }
271 649
272 mutex_unlock(&devpts_root->d_inode->i_mutex); 650 dput(dentry); /* d_find_alias above */
651out:
652 mutex_unlock(&root->d_inode->i_mutex);
273} 653}
274 654
275static int __init init_devpts_fs(void) 655static int __init init_devpts_fs(void)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 89209f00f9c7..5e78fc179886 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -673,10 +673,11 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
673 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " 673 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
674 "dentry->d_name.name = [%s]\n", dentry->d_name.name); 674 "dentry->d_name.name = [%s]\n", dentry->d_name.name);
675 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 675 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
676 buf[rc] = '\0';
677 set_fs(old_fs); 676 set_fs(old_fs);
678 if (rc < 0) 677 if (rc < 0)
679 goto out_free; 678 goto out_free;
679 else
680 buf[rc] = '\0';
680 rc = 0; 681 rc = 0;
681 nd_set_link(nd, buf); 682 nd_set_link(nd, buf);
682 goto out; 683 goto out;
diff --git a/fs/exec.c b/fs/exec.c
index 02d2e120542d..3ef9cf9b1871 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -57,11 +57,6 @@
57#include <asm/tlb.h> 57#include <asm/tlb.h>
58#include "internal.h" 58#include "internal.h"
59 59
60#ifdef __alpha__
61/* for /sbin/loader handling in search_binary_handler() */
62#include <linux/a.out.h>
63#endif
64
65int core_uses_pid; 60int core_uses_pid;
66char core_pattern[CORENAME_MAX_SIZE] = "core"; 61char core_pattern[CORENAME_MAX_SIZE] = "core";
67int suid_dumpable = 0; 62int suid_dumpable = 0;
@@ -127,7 +122,8 @@ asmlinkage long sys_uselib(const char __user * library)
127 if (nd.path.mnt->mnt_flags & MNT_NOEXEC) 122 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
128 goto exit; 123 goto exit;
129 124
130 error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); 125 error = inode_permission(nd.path.dentry->d_inode,
126 MAY_READ | MAY_EXEC | MAY_OPEN);
131 if (error) 127 if (error)
132 goto exit; 128 goto exit;
133 129
@@ -680,7 +676,7 @@ struct file *open_exec(const char *name)
680 if (nd.path.mnt->mnt_flags & MNT_NOEXEC) 676 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
681 goto out_path_put; 677 goto out_path_put;
682 678
683 err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); 679 err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
684 if (err) 680 if (err)
685 goto out_path_put; 681 goto out_path_put;
686 682
@@ -1171,41 +1167,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1171 unsigned int depth = bprm->recursion_depth; 1167 unsigned int depth = bprm->recursion_depth;
1172 int try,retval; 1168 int try,retval;
1173 struct linux_binfmt *fmt; 1169 struct linux_binfmt *fmt;
1174#ifdef __alpha__
1175 /* handle /sbin/loader.. */
1176 {
1177 struct exec * eh = (struct exec *) bprm->buf;
1178 1170
1179 if (!bprm->loader && eh->fh.f_magic == 0x183 &&
1180 (eh->fh.f_flags & 0x3000) == 0x3000)
1181 {
1182 struct file * file;
1183 unsigned long loader;
1184
1185 allow_write_access(bprm->file);
1186 fput(bprm->file);
1187 bprm->file = NULL;
1188
1189 loader = bprm->vma->vm_end - sizeof(void *);
1190
1191 file = open_exec("/sbin/loader");
1192 retval = PTR_ERR(file);
1193 if (IS_ERR(file))
1194 return retval;
1195
1196 /* Remember if the application is TASO. */
1197 bprm->taso = eh->ah.entry < 0x100000000UL;
1198
1199 bprm->file = file;
1200 bprm->loader = loader;
1201 retval = prepare_binprm(bprm);
1202 if (retval<0)
1203 return retval;
1204 /* should call search_binary_handler recursively here,
1205 but it does not matter */
1206 }
1207 }
1208#endif
1209 retval = security_bprm_check(bprm); 1171 retval = security_bprm_check(bprm);
1210 if (retval) 1172 if (retval)
1211 return retval; 1173 return retval;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 8d0add625870..c454d5db28a5 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -585,7 +585,10 @@ got:
585 spin_lock(&sbi->s_next_gen_lock); 585 spin_lock(&sbi->s_next_gen_lock);
586 inode->i_generation = sbi->s_next_generation++; 586 inode->i_generation = sbi->s_next_generation++;
587 spin_unlock(&sbi->s_next_gen_lock); 587 spin_unlock(&sbi->s_next_gen_lock);
588 insert_inode_hash(inode); 588 if (insert_inode_locked(inode) < 0) {
589 err = -EINVAL;
590 goto fail_drop;
591 }
589 592
590 if (DQUOT_ALLOC_INODE(inode)) { 593 if (DQUOT_ALLOC_INODE(inode)) {
591 err = -EDQUOT; 594 err = -EDQUOT;
@@ -612,6 +615,7 @@ fail_drop:
612 DQUOT_DROP(inode); 615 DQUOT_DROP(inode);
613 inode->i_flags |= S_NOQUOTA; 616 inode->i_flags |= S_NOQUOTA;
614 inode->i_nlink = 0; 617 inode->i_nlink = 0;
618 unlock_new_inode(inode);
615 iput(inode); 619 iput(inode);
616 return ERR_PTR(err); 620 return ERR_PTR(err);
617 621
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7658b33e2653..02b39a5deb74 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -32,6 +32,7 @@
32#include <linux/buffer_head.h> 32#include <linux/buffer_head.h>
33#include <linux/mpage.h> 33#include <linux/mpage.h>
34#include <linux/fiemap.h> 34#include <linux/fiemap.h>
35#include <linux/namei.h>
35#include "ext2.h" 36#include "ext2.h"
36#include "acl.h" 37#include "acl.h"
37#include "xip.h" 38#include "xip.h"
@@ -1286,9 +1287,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1286 else 1287 else
1287 inode->i_mapping->a_ops = &ext2_aops; 1288 inode->i_mapping->a_ops = &ext2_aops;
1288 } else if (S_ISLNK(inode->i_mode)) { 1289 } else if (S_ISLNK(inode->i_mode)) {
1289 if (ext2_inode_is_fast_symlink(inode)) 1290 if (ext2_inode_is_fast_symlink(inode)) {
1290 inode->i_op = &ext2_fast_symlink_inode_operations; 1291 inode->i_op = &ext2_fast_symlink_inode_operations;
1291 else { 1292 nd_terminate_link(ei->i_data, inode->i_size,
1293 sizeof(ei->i_data) - 1);
1294 } else {
1292 inode->i_op = &ext2_symlink_inode_operations; 1295 inode->i_op = &ext2_symlink_inode_operations;
1293 if (test_opt(inode->i_sb, NOBH)) 1296 if (test_opt(inode->i_sb, NOBH))
1294 inode->i_mapping->a_ops = &ext2_nobh_aops; 1297 inode->i_mapping->a_ops = &ext2_nobh_aops;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2a747252ec12..90ea17998a73 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,9 +41,11 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
41 int err = ext2_add_link(dentry, inode); 41 int err = ext2_add_link(dentry, inode);
42 if (!err) { 42 if (!err) {
43 d_instantiate(dentry, inode); 43 d_instantiate(dentry, inode);
44 unlock_new_inode(inode);
44 return 0; 45 return 0;
45 } 46 }
46 inode_dec_link_count(inode); 47 inode_dec_link_count(inode);
48 unlock_new_inode(inode);
47 iput(inode); 49 iput(inode);
48 return err; 50 return err;
49} 51}
@@ -170,6 +172,7 @@ out:
170 172
171out_fail: 173out_fail:
172 inode_dec_link_count(inode); 174 inode_dec_link_count(inode);
175 unlock_new_inode(inode);
173 iput (inode); 176 iput (inode);
174 goto out; 177 goto out;
175} 178}
@@ -178,6 +181,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
178 struct dentry *dentry) 181 struct dentry *dentry)
179{ 182{
180 struct inode *inode = old_dentry->d_inode; 183 struct inode *inode = old_dentry->d_inode;
184 int err;
181 185
182 if (inode->i_nlink >= EXT2_LINK_MAX) 186 if (inode->i_nlink >= EXT2_LINK_MAX)
183 return -EMLINK; 187 return -EMLINK;
@@ -186,7 +190,14 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
186 inode_inc_link_count(inode); 190 inode_inc_link_count(inode);
187 atomic_inc(&inode->i_count); 191 atomic_inc(&inode->i_count);
188 192
189 return ext2_add_nondir(dentry, inode); 193 err = ext2_add_link(dentry, inode);
194 if (!err) {
195 d_instantiate(dentry, inode);
196 return 0;
197 }
198 inode_dec_link_count(inode);
199 iput(inode);
200 return err;
190} 201}
191 202
192static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) 203static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
@@ -222,12 +233,14 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
222 goto out_fail; 233 goto out_fail;
223 234
224 d_instantiate(dentry, inode); 235 d_instantiate(dentry, inode);
236 unlock_new_inode(inode);
225out: 237out:
226 return err; 238 return err;
227 239
228out_fail: 240out_fail:
229 inode_dec_link_count(inode); 241 inode_dec_link_count(inode);
230 inode_dec_link_count(inode); 242 inode_dec_link_count(inode);
243 unlock_new_inode(inode);
231 iput(inode); 244 iput(inode);
232out_dir: 245out_dir:
233 inode_dec_link_count(dir); 246 inode_dec_link_count(dir);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 490bd0ed7896..5655fbcbd11f 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -579,7 +579,10 @@ got:
579 ext3_set_inode_flags(inode); 579 ext3_set_inode_flags(inode);
580 if (IS_DIRSYNC(inode)) 580 if (IS_DIRSYNC(inode))
581 handle->h_sync = 1; 581 handle->h_sync = 1;
582 insert_inode_hash(inode); 582 if (insert_inode_locked(inode) < 0) {
583 err = -EINVAL;
584 goto fail_drop;
585 }
583 spin_lock(&sbi->s_next_gen_lock); 586 spin_lock(&sbi->s_next_gen_lock);
584 inode->i_generation = sbi->s_next_generation++; 587 inode->i_generation = sbi->s_next_generation++;
585 spin_unlock(&sbi->s_next_gen_lock); 588 spin_unlock(&sbi->s_next_gen_lock);
@@ -627,6 +630,7 @@ fail_drop:
627 DQUOT_DROP(inode); 630 DQUOT_DROP(inode);
628 inode->i_flags |= S_NOQUOTA; 631 inode->i_flags |= S_NOQUOTA;
629 inode->i_nlink = 0; 632 inode->i_nlink = 0;
633 unlock_new_inode(inode);
630 iput(inode); 634 iput(inode);
631 brelse(bitmap_bh); 635 brelse(bitmap_bh);
632 return ERR_PTR(err); 636 return ERR_PTR(err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f8424ad89971..c4bdccf976b5 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -37,6 +37,7 @@
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/fiemap.h> 39#include <linux/fiemap.h>
40#include <linux/namei.h>
40#include "xattr.h" 41#include "xattr.h"
41#include "acl.h" 42#include "acl.h"
42 43
@@ -2817,9 +2818,11 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2817 inode->i_op = &ext3_dir_inode_operations; 2818 inode->i_op = &ext3_dir_inode_operations;
2818 inode->i_fop = &ext3_dir_operations; 2819 inode->i_fop = &ext3_dir_operations;
2819 } else if (S_ISLNK(inode->i_mode)) { 2820 } else if (S_ISLNK(inode->i_mode)) {
2820 if (ext3_inode_is_fast_symlink(inode)) 2821 if (ext3_inode_is_fast_symlink(inode)) {
2821 inode->i_op = &ext3_fast_symlink_inode_operations; 2822 inode->i_op = &ext3_fast_symlink_inode_operations;
2822 else { 2823 nd_terminate_link(ei->i_data, inode->i_size,
2824 sizeof(ei->i_data) - 1);
2825 } else {
2823 inode->i_op = &ext3_symlink_inode_operations; 2826 inode->i_op = &ext3_symlink_inode_operations;
2824 ext3_set_aops(inode); 2827 ext3_set_aops(inode);
2825 } 2828 }
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 3e5edc92aa0b..297ea8dfac7c 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1652,9 +1652,11 @@ static int ext3_add_nondir(handle_t *handle,
1652 if (!err) { 1652 if (!err) {
1653 ext3_mark_inode_dirty(handle, inode); 1653 ext3_mark_inode_dirty(handle, inode);
1654 d_instantiate(dentry, inode); 1654 d_instantiate(dentry, inode);
1655 unlock_new_inode(inode);
1655 return 0; 1656 return 0;
1656 } 1657 }
1657 drop_nlink(inode); 1658 drop_nlink(inode);
1659 unlock_new_inode(inode);
1658 iput(inode); 1660 iput(inode);
1659 return err; 1661 return err;
1660} 1662}
@@ -1765,6 +1767,7 @@ retry:
1765 dir_block = ext3_bread (handle, inode, 0, 1, &err); 1767 dir_block = ext3_bread (handle, inode, 0, 1, &err);
1766 if (!dir_block) { 1768 if (!dir_block) {
1767 drop_nlink(inode); /* is this nlink == 0? */ 1769 drop_nlink(inode); /* is this nlink == 0? */
1770 unlock_new_inode(inode);
1768 ext3_mark_inode_dirty(handle, inode); 1771 ext3_mark_inode_dirty(handle, inode);
1769 iput (inode); 1772 iput (inode);
1770 goto out_stop; 1773 goto out_stop;
@@ -1792,6 +1795,7 @@ retry:
1792 err = ext3_add_entry (handle, dentry, inode); 1795 err = ext3_add_entry (handle, dentry, inode);
1793 if (err) { 1796 if (err) {
1794 inode->i_nlink = 0; 1797 inode->i_nlink = 0;
1798 unlock_new_inode(inode);
1795 ext3_mark_inode_dirty(handle, inode); 1799 ext3_mark_inode_dirty(handle, inode);
1796 iput (inode); 1800 iput (inode);
1797 goto out_stop; 1801 goto out_stop;
@@ -1800,6 +1804,7 @@ retry:
1800 ext3_update_dx_flag(dir); 1804 ext3_update_dx_flag(dir);
1801 ext3_mark_inode_dirty(handle, dir); 1805 ext3_mark_inode_dirty(handle, dir);
1802 d_instantiate(dentry, inode); 1806 d_instantiate(dentry, inode);
1807 unlock_new_inode(inode);
1803out_stop: 1808out_stop:
1804 ext3_journal_stop(handle); 1809 ext3_journal_stop(handle);
1805 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1810 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
@@ -2174,6 +2179,7 @@ retry:
2174 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 2179 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2175 if (err) { 2180 if (err) {
2176 drop_nlink(inode); 2181 drop_nlink(inode);
2182 unlock_new_inode(inode);
2177 ext3_mark_inode_dirty(handle, inode); 2183 ext3_mark_inode_dirty(handle, inode);
2178 iput (inode); 2184 iput (inode);
2179 goto out_stop; 2185 goto out_stop;
@@ -2221,7 +2227,14 @@ retry:
2221 inc_nlink(inode); 2227 inc_nlink(inode);
2222 atomic_inc(&inode->i_count); 2228 atomic_inc(&inode->i_count);
2223 2229
2224 err = ext3_add_nondir(handle, dentry, inode); 2230 err = ext3_add_entry(handle, dentry, inode);
2231 if (!err) {
2232 ext3_mark_inode_dirty(handle, inode);
2233 d_instantiate(dentry, inode);
2234 } else {
2235 drop_nlink(inode);
2236 iput(inode);
2237 }
2225 ext3_journal_stop(handle); 2238 ext3_journal_stop(handle);
2226 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2239 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2227 goto retry; 2240 goto retry;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 08cac9fcace2..6e6052879aa2 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -826,7 +826,10 @@ got:
826 ext4_set_inode_flags(inode); 826 ext4_set_inode_flags(inode);
827 if (IS_DIRSYNC(inode)) 827 if (IS_DIRSYNC(inode))
828 handle->h_sync = 1; 828 handle->h_sync = 1;
829 insert_inode_hash(inode); 829 if (insert_inode_locked(inode) < 0) {
830 err = -EINVAL;
831 goto fail_drop;
832 }
830 spin_lock(&sbi->s_next_gen_lock); 833 spin_lock(&sbi->s_next_gen_lock);
831 inode->i_generation = sbi->s_next_generation++; 834 inode->i_generation = sbi->s_next_generation++;
832 spin_unlock(&sbi->s_next_gen_lock); 835 spin_unlock(&sbi->s_next_gen_lock);
@@ -881,6 +884,7 @@ fail_drop:
881 DQUOT_DROP(inode); 884 DQUOT_DROP(inode);
882 inode->i_flags |= S_NOQUOTA; 885 inode->i_flags |= S_NOQUOTA;
883 inode->i_nlink = 0; 886 inode->i_nlink = 0;
887 unlock_new_inode(inode);
884 iput(inode); 888 iput(inode);
885 brelse(bitmap_bh); 889 brelse(bitmap_bh);
886 return ERR_PTR(err); 890 return ERR_PTR(err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5ae33cb..7c3325e0b005 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -34,6 +34,7 @@
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <linux/pagevec.h> 35#include <linux/pagevec.h>
36#include <linux/mpage.h> 36#include <linux/mpage.h>
37#include <linux/namei.h>
37#include <linux/uio.h> 38#include <linux/uio.h>
38#include <linux/bio.h> 39#include <linux/bio.h>
39#include "ext4_jbd2.h" 40#include "ext4_jbd2.h"
@@ -4164,9 +4165,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4164 inode->i_op = &ext4_dir_inode_operations; 4165 inode->i_op = &ext4_dir_inode_operations;
4165 inode->i_fop = &ext4_dir_operations; 4166 inode->i_fop = &ext4_dir_operations;
4166 } else if (S_ISLNK(inode->i_mode)) { 4167 } else if (S_ISLNK(inode->i_mode)) {
4167 if (ext4_inode_is_fast_symlink(inode)) 4168 if (ext4_inode_is_fast_symlink(inode)) {
4168 inode->i_op = &ext4_fast_symlink_inode_operations; 4169 inode->i_op = &ext4_fast_symlink_inode_operations;
4169 else { 4170 nd_terminate_link(ei->i_data, inode->i_size,
4171 sizeof(ei->i_data) - 1);
4172 } else {
4170 inode->i_op = &ext4_symlink_inode_operations; 4173 inode->i_op = &ext4_symlink_inode_operations;
4171 ext4_set_aops(inode); 4174 ext4_set_aops(inode);
4172 } 4175 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 63adcb792988..da98a9012fa5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1693,9 +1693,11 @@ static int ext4_add_nondir(handle_t *handle,
1693 if (!err) { 1693 if (!err) {
1694 ext4_mark_inode_dirty(handle, inode); 1694 ext4_mark_inode_dirty(handle, inode);
1695 d_instantiate(dentry, inode); 1695 d_instantiate(dentry, inode);
1696 unlock_new_inode(inode);
1696 return 0; 1697 return 0;
1697 } 1698 }
1698 drop_nlink(inode); 1699 drop_nlink(inode);
1700 unlock_new_inode(inode);
1699 iput(inode); 1701 iput(inode);
1700 return err; 1702 return err;
1701} 1703}
@@ -1830,6 +1832,7 @@ retry:
1830 if (err) { 1832 if (err) {
1831out_clear_inode: 1833out_clear_inode:
1832 clear_nlink(inode); 1834 clear_nlink(inode);
1835 unlock_new_inode(inode);
1833 ext4_mark_inode_dirty(handle, inode); 1836 ext4_mark_inode_dirty(handle, inode);
1834 iput(inode); 1837 iput(inode);
1835 goto out_stop; 1838 goto out_stop;
@@ -1838,6 +1841,7 @@ out_clear_inode:
1838 ext4_update_dx_flag(dir); 1841 ext4_update_dx_flag(dir);
1839 ext4_mark_inode_dirty(handle, dir); 1842 ext4_mark_inode_dirty(handle, dir);
1840 d_instantiate(dentry, inode); 1843 d_instantiate(dentry, inode);
1844 unlock_new_inode(inode);
1841out_stop: 1845out_stop:
1842 ext4_journal_stop(handle); 1846 ext4_journal_stop(handle);
1843 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 1847 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -2212,6 +2216,7 @@ retry:
2212 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 2216 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2213 if (err) { 2217 if (err) {
2214 clear_nlink(inode); 2218 clear_nlink(inode);
2219 unlock_new_inode(inode);
2215 ext4_mark_inode_dirty(handle, inode); 2220 ext4_mark_inode_dirty(handle, inode);
2216 iput(inode); 2221 iput(inode);
2217 goto out_stop; 2222 goto out_stop;
@@ -2262,7 +2267,14 @@ retry:
2262 ext4_inc_count(handle, inode); 2267 ext4_inc_count(handle, inode);
2263 atomic_inc(&inode->i_count); 2268 atomic_inc(&inode->i_count);
2264 2269
2265 err = ext4_add_nondir(handle, dentry, inode); 2270 err = ext4_add_entry(handle, dentry, inode);
2271 if (!err) {
2272 ext4_mark_inode_dirty(handle, inode);
2273 d_instantiate(dentry, inode);
2274 } else {
2275 drop_nlink(inode);
2276 iput(inode);
2277 }
2266 ext4_journal_stop(handle); 2278 ext4_journal_stop(handle);
2267 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2279 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2268 goto retry; 2280 goto retry;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 67e058357098..3a7f603b6982 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -841,7 +841,6 @@ const struct file_operations fat_dir_operations = {
841 .compat_ioctl = fat_compat_dir_ioctl, 841 .compat_ioctl = fat_compat_dir_ioctl,
842#endif 842#endif
843 .fsync = file_fsync, 843 .fsync = file_fsync,
844 .llseek = generic_file_llseek,
845}; 844};
846 845
847static int fat_get_short_entry(struct inode *dir, loff_t *pos, 846static int fat_get_short_entry(struct inode *dir, loff_t *pos,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d937aaf77374..6b74d09adbe5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -749,6 +749,8 @@ static struct dentry *fat_get_parent(struct dentry *child)
749 brelse(bh); 749 brelse(bh);
750 750
751 parent = d_obtain_alias(inode); 751 parent = d_obtain_alias(inode);
752 if (!IS_ERR(parent))
753 parent->d_op = sb->s_root->d_op;
752out: 754out:
753 unlock_super(sb); 755 unlock_super(sb);
754 756
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index bf326d4356a3..8ae32e37673c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -78,7 +78,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
78 * for creation. 78 * for creation.
79 */ 79 */
80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
81 if (nd->flags & LOOKUP_CREATE) 81 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
82 return 0; 82 return 0;
83 } 83 }
84 84
diff --git a/fs/file_table.c b/fs/file_table.c
index 0fbcacc3ea75..bbeeac6efa1a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,6 +32,9 @@ struct files_stat_struct files_stat = {
32/* public. Not pretty! */ 32/* public. Not pretty! */
33__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 33__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
34 34
35/* SLAB cache for file structures */
36static struct kmem_cache *filp_cachep __read_mostly;
37
35static struct percpu_counter nr_files __cacheline_aligned_in_smp; 38static struct percpu_counter nr_files __cacheline_aligned_in_smp;
36 39
37static inline void file_free_rcu(struct rcu_head *head) 40static inline void file_free_rcu(struct rcu_head *head)
@@ -397,7 +400,12 @@ too_bad:
397void __init files_init(unsigned long mempages) 400void __init files_init(unsigned long mempages)
398{ 401{
399 int n; 402 int n;
400 /* One file with associated inode and dcache is very roughly 1K. 403
404 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
405 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
406
407 /*
408 * One file with associated inode and dcache is very roughly 1K.
401 * Per default don't use more than 10% of our memory for files. 409 * Per default don't use more than 10% of our memory for files.
402 */ 410 */
403 411
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 9f3f2ceb73f0..03a6ea5e99f7 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -325,8 +325,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
325 if (!VXFS_ISIMMED(vip)) { 325 if (!VXFS_ISIMMED(vip)) {
326 ip->i_op = &page_symlink_inode_operations; 326 ip->i_op = &page_symlink_inode_operations;
327 ip->i_mapping->a_ops = &vxfs_aops; 327 ip->i_mapping->a_ops = &vxfs_aops;
328 } else 328 } else {
329 ip->i_op = &vxfs_immed_symlink_iops; 329 ip->i_op = &vxfs_immed_symlink_iops;
330 vip->vii_immed.vi_immed[ip->i_size] = '\0';
331 }
330 } else 332 } else
331 init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); 333 init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
332 334
diff --git a/fs/inode.c b/fs/inode.c
index 098a2443196f..7de1cda92489 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1032 1032
1033EXPORT_SYMBOL(iget_locked); 1033EXPORT_SYMBOL(iget_locked);
1034 1034
1035int insert_inode_locked(struct inode *inode)
1036{
1037 struct super_block *sb = inode->i_sb;
1038 ino_t ino = inode->i_ino;
1039 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1040 struct inode *old;
1041
1042 inode->i_state |= I_LOCK|I_NEW;
1043 while (1) {
1044 spin_lock(&inode_lock);
1045 old = find_inode_fast(sb, head, ino);
1046 if (likely(!old)) {
1047 hlist_add_head(&inode->i_hash, head);
1048 spin_unlock(&inode_lock);
1049 return 0;
1050 }
1051 __iget(old);
1052 spin_unlock(&inode_lock);
1053 wait_on_inode(old);
1054 if (unlikely(!hlist_unhashed(&old->i_hash))) {
1055 iput(old);
1056 return -EBUSY;
1057 }
1058 iput(old);
1059 }
1060}
1061
1062EXPORT_SYMBOL(insert_inode_locked);
1063
1064int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1065 int (*test)(struct inode *, void *), void *data)
1066{
1067 struct super_block *sb = inode->i_sb;
1068 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1069 struct inode *old;
1070
1071 inode->i_state |= I_LOCK|I_NEW;
1072
1073 while (1) {
1074 spin_lock(&inode_lock);
1075 old = find_inode(sb, head, test, data);
1076 if (likely(!old)) {
1077 hlist_add_head(&inode->i_hash, head);
1078 spin_unlock(&inode_lock);
1079 return 0;
1080 }
1081 __iget(old);
1082 spin_unlock(&inode_lock);
1083 wait_on_inode(old);
1084 if (unlikely(!hlist_unhashed(&old->i_hash))) {
1085 iput(old);
1086 return -EBUSY;
1087 }
1088 iput(old);
1089 }
1090}
1091
1092EXPORT_SYMBOL(insert_inode_locked4);
1093
1035/** 1094/**
1036 * __insert_inode_hash - hash an inode 1095 * __insert_inode_hash - hash an inode
1037 * @inode: unhashed inode 1096 * @inode: unhashed inode
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 70022fd1c539..d4d142c2edd4 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -79,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
79 inode = new_inode(sb); 79 inode = new_inode(sb);
80 if (!inode) { 80 if (!inode) {
81 jfs_warn("ialloc: new_inode returned NULL!"); 81 jfs_warn("ialloc: new_inode returned NULL!");
82 return ERR_PTR(-ENOMEM); 82 rc = -ENOMEM;
83 goto fail;
83 } 84 }
84 85
85 jfs_inode = JFS_IP(inode); 86 jfs_inode = JFS_IP(inode);
@@ -89,8 +90,12 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
89 jfs_warn("ialloc: diAlloc returned %d!", rc); 90 jfs_warn("ialloc: diAlloc returned %d!", rc);
90 if (rc == -EIO) 91 if (rc == -EIO)
91 make_bad_inode(inode); 92 make_bad_inode(inode);
92 iput(inode); 93 goto fail_put;
93 return ERR_PTR(rc); 94 }
95
96 if (insert_inode_locked(inode) < 0) {
97 rc = -EINVAL;
98 goto fail_unlock;
94 } 99 }
95 100
96 inode->i_uid = current_fsuid(); 101 inode->i_uid = current_fsuid();
@@ -112,11 +117,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
112 * Allocate inode to quota. 117 * Allocate inode to quota.
113 */ 118 */
114 if (DQUOT_ALLOC_INODE(inode)) { 119 if (DQUOT_ALLOC_INODE(inode)) {
115 DQUOT_DROP(inode); 120 rc = -EDQUOT;
116 inode->i_flags |= S_NOQUOTA; 121 goto fail_drop;
117 inode->i_nlink = 0;
118 iput(inode);
119 return ERR_PTR(-EDQUOT);
120 } 122 }
121 123
122 inode->i_mode = mode; 124 inode->i_mode = mode;
@@ -158,4 +160,15 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
158 jfs_info("ialloc returns inode = 0x%p\n", inode); 160 jfs_info("ialloc returns inode = 0x%p\n", inode);
159 161
160 return inode; 162 return inode;
163
164fail_drop:
165 DQUOT_DROP(inode);
166 inode->i_flags |= S_NOQUOTA;
167fail_unlock:
168 inode->i_nlink = 0;
169 unlock_new_inode(inode);
170fail_put:
171 iput(inode);
172fail:
173 return ERR_PTR(rc);
161} 174}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index cc3cedffbfa1..b4de56b851e4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -155,7 +155,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
155 ip->i_fop = &jfs_file_operations; 155 ip->i_fop = &jfs_file_operations;
156 ip->i_mapping->a_ops = &jfs_aops; 156 ip->i_mapping->a_ops = &jfs_aops;
157 157
158 insert_inode_hash(ip);
159 mark_inode_dirty(ip); 158 mark_inode_dirty(ip);
160 159
161 dip->i_ctime = dip->i_mtime = CURRENT_TIME; 160 dip->i_ctime = dip->i_mtime = CURRENT_TIME;
@@ -171,9 +170,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
171 if (rc) { 170 if (rc) {
172 free_ea_wmap(ip); 171 free_ea_wmap(ip);
173 ip->i_nlink = 0; 172 ip->i_nlink = 0;
173 unlock_new_inode(ip);
174 iput(ip); 174 iput(ip);
175 } else 175 } else {
176 d_instantiate(dentry, ip); 176 d_instantiate(dentry, ip);
177 unlock_new_inode(ip);
178 }
177 179
178 out2: 180 out2:
179 free_UCSname(&dname); 181 free_UCSname(&dname);
@@ -289,7 +291,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
289 ip->i_op = &jfs_dir_inode_operations; 291 ip->i_op = &jfs_dir_inode_operations;
290 ip->i_fop = &jfs_dir_operations; 292 ip->i_fop = &jfs_dir_operations;
291 293
292 insert_inode_hash(ip);
293 mark_inode_dirty(ip); 294 mark_inode_dirty(ip);
294 295
295 /* update parent directory inode */ 296 /* update parent directory inode */
@@ -306,9 +307,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
306 if (rc) { 307 if (rc) {
307 free_ea_wmap(ip); 308 free_ea_wmap(ip);
308 ip->i_nlink = 0; 309 ip->i_nlink = 0;
310 unlock_new_inode(ip);
309 iput(ip); 311 iput(ip);
310 } else 312 } else {
311 d_instantiate(dentry, ip); 313 d_instantiate(dentry, ip);
314 unlock_new_inode(ip);
315 }
312 316
313 out2: 317 out2:
314 free_UCSname(&dname); 318 free_UCSname(&dname);
@@ -1019,7 +1023,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1019 goto out3; 1023 goto out3;
1020 } 1024 }
1021 1025
1022 insert_inode_hash(ip);
1023 mark_inode_dirty(ip); 1026 mark_inode_dirty(ip);
1024 1027
1025 dip->i_ctime = dip->i_mtime = CURRENT_TIME; 1028 dip->i_ctime = dip->i_mtime = CURRENT_TIME;
@@ -1039,9 +1042,12 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1039 if (rc) { 1042 if (rc) {
1040 free_ea_wmap(ip); 1043 free_ea_wmap(ip);
1041 ip->i_nlink = 0; 1044 ip->i_nlink = 0;
1045 unlock_new_inode(ip);
1042 iput(ip); 1046 iput(ip);
1043 } else 1047 } else {
1044 d_instantiate(dentry, ip); 1048 d_instantiate(dentry, ip);
1049 unlock_new_inode(ip);
1050 }
1045 1051
1046 out2: 1052 out2:
1047 free_UCSname(&dname); 1053 free_UCSname(&dname);
@@ -1399,7 +1405,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1399 jfs_ip->dev = new_encode_dev(rdev); 1405 jfs_ip->dev = new_encode_dev(rdev);
1400 init_special_inode(ip, ip->i_mode, rdev); 1406 init_special_inode(ip, ip->i_mode, rdev);
1401 1407
1402 insert_inode_hash(ip);
1403 mark_inode_dirty(ip); 1408 mark_inode_dirty(ip);
1404 1409
1405 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1410 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -1417,9 +1422,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1417 if (rc) { 1422 if (rc) {
1418 free_ea_wmap(ip); 1423 free_ea_wmap(ip);
1419 ip->i_nlink = 0; 1424 ip->i_nlink = 0;
1425 unlock_new_inode(ip);
1420 iput(ip); 1426 iput(ip);
1421 } else 1427 } else {
1422 d_instantiate(dentry, ip); 1428 d_instantiate(dentry, ip);
1429 unlock_new_inode(ip);
1430 }
1423 1431
1424 out1: 1432 out1:
1425 free_UCSname(&dname); 1433 free_UCSname(&dname);
diff --git a/fs/namei.c b/fs/namei.c
index af3783fff1de..dd5c9f0bf829 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask,
226 return -EACCES; 226 return -EACCES;
227} 227}
228 228
229/**
230 * inode_permission - check for access rights to a given inode
231 * @inode: inode to check permission on
232 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
233 *
234 * Used to check for read/write/execute permissions on an inode.
235 * We use "fsuid" for this, letting us set arbitrary permissions
236 * for filesystem access without changing the "normal" uids which
237 * are used for other things.
238 */
229int inode_permission(struct inode *inode, int mask) 239int inode_permission(struct inode *inode, int mask)
230{ 240{
231 int retval; 241 int retval;
@@ -247,7 +257,6 @@ int inode_permission(struct inode *inode, int mask)
247 return -EACCES; 257 return -EACCES;
248 } 258 }
249 259
250 /* Ordinary permission routines do not understand MAY_APPEND. */
251 if (inode->i_op && inode->i_op->permission) 260 if (inode->i_op && inode->i_op->permission)
252 retval = inode->i_op->permission(inode, mask); 261 retval = inode->i_op->permission(inode, mask);
253 else 262 else
@@ -265,21 +274,6 @@ int inode_permission(struct inode *inode, int mask)
265} 274}
266 275
267/** 276/**
268 * vfs_permission - check for access rights to a given path
269 * @nd: lookup result that describes the path
270 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
271 *
272 * Used to check for read/write/execute permissions on a path.
273 * We use "fsuid" for this, letting us set arbitrary permissions
274 * for filesystem access without changing the "normal" uids which
275 * are used for other things.
276 */
277int vfs_permission(struct nameidata *nd, int mask)
278{
279 return inode_permission(nd->path.dentry->d_inode, mask);
280}
281
282/**
283 * file_permission - check for additional access rights to a given file 277 * file_permission - check for additional access rights to a given file
284 * @file: file to check access rights for 278 * @file: file to check access rights for
285 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 279 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
@@ -289,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask)
289 * 283 *
290 * Note: 284 * Note:
291 * Do not use this function in new code. All access checks should 285 * Do not use this function in new code. All access checks should
292 * be done using vfs_permission(). 286 * be done using inode_permission().
293 */ 287 */
294int file_permission(struct file *file, int mask) 288int file_permission(struct file *file, int mask)
295{ 289{
@@ -527,18 +521,6 @@ out_unlock:
527 return result; 521 return result;
528} 522}
529 523
530/* SMP-safe */
531static __always_inline void
532walk_init_root(const char *name, struct nameidata *nd)
533{
534 struct fs_struct *fs = current->fs;
535
536 read_lock(&fs->lock);
537 nd->path = fs->root;
538 path_get(&fs->root);
539 read_unlock(&fs->lock);
540}
541
542/* 524/*
543 * Wrapper to retry pathname resolution whenever the underlying 525 * Wrapper to retry pathname resolution whenever the underlying
544 * file system returns an ESTALE. 526 * file system returns an ESTALE.
@@ -576,9 +558,16 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
576 goto fail; 558 goto fail;
577 559
578 if (*link == '/') { 560 if (*link == '/') {
561 struct fs_struct *fs = current->fs;
562
579 path_put(&nd->path); 563 path_put(&nd->path);
580 walk_init_root(link, nd); 564
565 read_lock(&fs->lock);
566 nd->path = fs->root;
567 path_get(&fs->root);
568 read_unlock(&fs->lock);
581 } 569 }
570
582 res = link_path_walk(link, nd); 571 res = link_path_walk(link, nd);
583 if (nd->depth || res || nd->last_type!=LAST_NORM) 572 if (nd->depth || res || nd->last_type!=LAST_NORM)
584 return res; 573 return res;
@@ -859,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
859 nd->flags |= LOOKUP_CONTINUE; 848 nd->flags |= LOOKUP_CONTINUE;
860 err = exec_permission_lite(inode); 849 err = exec_permission_lite(inode);
861 if (err == -EAGAIN) 850 if (err == -EAGAIN)
862 err = vfs_permission(nd, MAY_EXEC); 851 err = inode_permission(nd->path.dentry->d_inode,
852 MAY_EXEC);
863 if (err) 853 if (err)
864 break; 854 break;
865 855
@@ -1493,9 +1483,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1493 return error; 1483 return error;
1494} 1484}
1495 1485
1496int may_open(struct nameidata *nd, int acc_mode, int flag) 1486int may_open(struct path *path, int acc_mode, int flag)
1497{ 1487{
1498 struct dentry *dentry = nd->path.dentry; 1488 struct dentry *dentry = path->dentry;
1499 struct inode *inode = dentry->d_inode; 1489 struct inode *inode = dentry->d_inode;
1500 int error; 1490 int error;
1501 1491
@@ -1516,13 +1506,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1516 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1506 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1517 flag &= ~O_TRUNC; 1507 flag &= ~O_TRUNC;
1518 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1508 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1519 if (nd->path.mnt->mnt_flags & MNT_NODEV) 1509 if (path->mnt->mnt_flags & MNT_NODEV)
1520 return -EACCES; 1510 return -EACCES;
1521 1511
1522 flag &= ~O_TRUNC; 1512 flag &= ~O_TRUNC;
1523 } 1513 }
1524 1514
1525 error = vfs_permission(nd, acc_mode); 1515 error = inode_permission(inode, acc_mode);
1526 if (error) 1516 if (error)
1527 return error; 1517 return error;
1528 /* 1518 /*
@@ -1556,6 +1546,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1556 * Refuse to truncate files with mandatory locks held on them. 1546 * Refuse to truncate files with mandatory locks held on them.
1557 */ 1547 */
1558 error = locks_verify_locked(inode); 1548 error = locks_verify_locked(inode);
1549 if (!error)
1550 error = security_path_truncate(path, 0,
1551 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1559 if (!error) { 1552 if (!error) {
1560 DQUOT_INIT(inode); 1553 DQUOT_INIT(inode);
1561 1554
@@ -1586,14 +1579,18 @@ static int __open_namei_create(struct nameidata *nd, struct path *path,
1586 1579
1587 if (!IS_POSIXACL(dir->d_inode)) 1580 if (!IS_POSIXACL(dir->d_inode))
1588 mode &= ~current->fs->umask; 1581 mode &= ~current->fs->umask;
1582 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
1583 if (error)
1584 goto out_unlock;
1589 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1585 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
1586out_unlock:
1590 mutex_unlock(&dir->d_inode->i_mutex); 1587 mutex_unlock(&dir->d_inode->i_mutex);
1591 dput(nd->path.dentry); 1588 dput(nd->path.dentry);
1592 nd->path.dentry = path->dentry; 1589 nd->path.dentry = path->dentry;
1593 if (error) 1590 if (error)
1594 return error; 1591 return error;
1595 /* Don't check for write permission, don't truncate */ 1592 /* Don't check for write permission, don't truncate */
1596 return may_open(nd, 0, flag & ~O_TRUNC); 1593 return may_open(&nd->path, 0, flag & ~O_TRUNC);
1597} 1594}
1598 1595
1599/* 1596/*
@@ -1779,7 +1776,7 @@ ok:
1779 if (error) 1776 if (error)
1780 goto exit; 1777 goto exit;
1781 } 1778 }
1782 error = may_open(&nd, acc_mode, flag); 1779 error = may_open(&nd.path, acc_mode, flag);
1783 if (error) { 1780 if (error) {
1784 if (will_write) 1781 if (will_write)
1785 mnt_drop_write(nd.path.mnt); 1782 mnt_drop_write(nd.path.mnt);
@@ -1999,6 +1996,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1999 error = mnt_want_write(nd.path.mnt); 1996 error = mnt_want_write(nd.path.mnt);
2000 if (error) 1997 if (error)
2001 goto out_dput; 1998 goto out_dput;
1999 error = security_path_mknod(&nd.path, dentry, mode, dev);
2000 if (error)
2001 goto out_drop_write;
2002 switch (mode & S_IFMT) { 2002 switch (mode & S_IFMT) {
2003 case 0: case S_IFREG: 2003 case 0: case S_IFREG:
2004 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2004 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
@@ -2011,6 +2011,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
2011 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2011 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
2012 break; 2012 break;
2013 } 2013 }
2014out_drop_write:
2014 mnt_drop_write(nd.path.mnt); 2015 mnt_drop_write(nd.path.mnt);
2015out_dput: 2016out_dput:
2016 dput(dentry); 2017 dput(dentry);
@@ -2070,7 +2071,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2070 error = mnt_want_write(nd.path.mnt); 2071 error = mnt_want_write(nd.path.mnt);
2071 if (error) 2072 if (error)
2072 goto out_dput; 2073 goto out_dput;
2074 error = security_path_mkdir(&nd.path, dentry, mode);
2075 if (error)
2076 goto out_drop_write;
2073 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2077 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
2078out_drop_write:
2074 mnt_drop_write(nd.path.mnt); 2079 mnt_drop_write(nd.path.mnt);
2075out_dput: 2080out_dput:
2076 dput(dentry); 2081 dput(dentry);
@@ -2180,7 +2185,11 @@ static long do_rmdir(int dfd, const char __user *pathname)
2180 error = mnt_want_write(nd.path.mnt); 2185 error = mnt_want_write(nd.path.mnt);
2181 if (error) 2186 if (error)
2182 goto exit3; 2187 goto exit3;
2188 error = security_path_rmdir(&nd.path, dentry);
2189 if (error)
2190 goto exit4;
2183 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2191 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
2192exit4:
2184 mnt_drop_write(nd.path.mnt); 2193 mnt_drop_write(nd.path.mnt);
2185exit3: 2194exit3:
2186 dput(dentry); 2195 dput(dentry);
@@ -2265,7 +2274,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2265 error = mnt_want_write(nd.path.mnt); 2274 error = mnt_want_write(nd.path.mnt);
2266 if (error) 2275 if (error)
2267 goto exit2; 2276 goto exit2;
2277 error = security_path_unlink(&nd.path, dentry);
2278 if (error)
2279 goto exit3;
2268 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2280 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
2281exit3:
2269 mnt_drop_write(nd.path.mnt); 2282 mnt_drop_write(nd.path.mnt);
2270 exit2: 2283 exit2:
2271 dput(dentry); 2284 dput(dentry);
@@ -2346,7 +2359,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2346 error = mnt_want_write(nd.path.mnt); 2359 error = mnt_want_write(nd.path.mnt);
2347 if (error) 2360 if (error)
2348 goto out_dput; 2361 goto out_dput;
2362 error = security_path_symlink(&nd.path, dentry, from);
2363 if (error)
2364 goto out_drop_write;
2349 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); 2365 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
2366out_drop_write:
2350 mnt_drop_write(nd.path.mnt); 2367 mnt_drop_write(nd.path.mnt);
2351out_dput: 2368out_dput:
2352 dput(dentry); 2369 dput(dentry);
@@ -2443,7 +2460,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2443 error = mnt_want_write(nd.path.mnt); 2460 error = mnt_want_write(nd.path.mnt);
2444 if (error) 2461 if (error)
2445 goto out_dput; 2462 goto out_dput;
2463 error = security_path_link(old_path.dentry, &nd.path, new_dentry);
2464 if (error)
2465 goto out_drop_write;
2446 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); 2466 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
2467out_drop_write:
2447 mnt_drop_write(nd.path.mnt); 2468 mnt_drop_write(nd.path.mnt);
2448out_dput: 2469out_dput:
2449 dput(new_dentry); 2470 dput(new_dentry);
@@ -2679,8 +2700,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2679 error = mnt_want_write(oldnd.path.mnt); 2700 error = mnt_want_write(oldnd.path.mnt);
2680 if (error) 2701 if (error)
2681 goto exit5; 2702 goto exit5;
2703 error = security_path_rename(&oldnd.path, old_dentry,
2704 &newnd.path, new_dentry);
2705 if (error)
2706 goto exit6;
2682 error = vfs_rename(old_dir->d_inode, old_dentry, 2707 error = vfs_rename(old_dir->d_inode, old_dentry,
2683 new_dir->d_inode, new_dentry); 2708 new_dir->d_inode, new_dentry);
2709exit6:
2684 mnt_drop_write(oldnd.path.mnt); 2710 mnt_drop_write(oldnd.path.mnt);
2685exit5: 2711exit5:
2686 dput(new_dentry); 2712 dput(new_dentry);
@@ -2750,13 +2776,16 @@ int vfs_follow_link(struct nameidata *nd, const char *link)
2750/* get the link contents into pagecache */ 2776/* get the link contents into pagecache */
2751static char *page_getlink(struct dentry * dentry, struct page **ppage) 2777static char *page_getlink(struct dentry * dentry, struct page **ppage)
2752{ 2778{
2753 struct page * page; 2779 char *kaddr;
2780 struct page *page;
2754 struct address_space *mapping = dentry->d_inode->i_mapping; 2781 struct address_space *mapping = dentry->d_inode->i_mapping;
2755 page = read_mapping_page(mapping, 0, NULL); 2782 page = read_mapping_page(mapping, 0, NULL);
2756 if (IS_ERR(page)) 2783 if (IS_ERR(page))
2757 return (char*)page; 2784 return (char*)page;
2758 *ppage = page; 2785 *ppage = page;
2759 return kmap(page); 2786 kaddr = kmap(page);
2787 nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
2788 return kaddr;
2760} 2789}
2761 2790
2762int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2791int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
@@ -2849,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup);
2849EXPORT_SYMBOL(kern_path); 2878EXPORT_SYMBOL(kern_path);
2850EXPORT_SYMBOL(vfs_path_lookup); 2879EXPORT_SYMBOL(vfs_path_lookup);
2851EXPORT_SYMBOL(inode_permission); 2880EXPORT_SYMBOL(inode_permission);
2852EXPORT_SYMBOL(vfs_permission);
2853EXPORT_SYMBOL(file_permission); 2881EXPORT_SYMBOL(file_permission);
2854EXPORT_SYMBOL(unlock_rename); 2882EXPORT_SYMBOL(unlock_rename);
2855EXPORT_SYMBOL(vfs_create); 2883EXPORT_SYMBOL(vfs_create);
@@ -2865,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink);
2865EXPORT_SYMBOL(vfs_unlink); 2893EXPORT_SYMBOL(vfs_unlink);
2866EXPORT_SYMBOL(dentry_unhash); 2894EXPORT_SYMBOL(dentry_unhash);
2867EXPORT_SYMBOL(generic_readlink); 2895EXPORT_SYMBOL(generic_readlink);
2896
2897/* to be mentioned only in INIT_TASK */
2898struct fs_struct init_fs = {
2899 .count = ATOMIC_INIT(1),
2900 .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
2901 .umask = 0022,
2902};
diff --git a/fs/namespace.c b/fs/namespace.c
index 1c09cab8f7cf..a40685d800a8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1990,7 +1990,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1990 if (!new_ns->root) { 1990 if (!new_ns->root) {
1991 up_write(&namespace_sem); 1991 up_write(&namespace_sem);
1992 kfree(new_ns); 1992 kfree(new_ns);
1993 return ERR_PTR(-ENOMEM);; 1993 return ERR_PTR(-ENOMEM);
1994 } 1994 }
1995 spin_lock(&vfsmount_lock); 1995 spin_lock(&vfsmount_lock);
1996 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 1996 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index b1acbd6ab6fb..b27451909dff 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags)
38 return ERR_PTR(error); 38 return ERR_PTR(error);
39 39
40 if (flags == O_RDWR) 40 if (flags == O_RDWR)
41 error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE); 41 error = may_open(&nd.path, MAY_READ|MAY_WRITE,
42 FMODE_READ|FMODE_WRITE);
42 else 43 else
43 error = may_open(&nd, MAY_WRITE, FMODE_WRITE); 44 error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
44 45
45 if (!error) 46 if (!error)
46 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 47 return dentry_open(nd.path.dentry, nd.path.mnt, flags,
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
new file mode 100644
index 000000000000..50914d7303c6
--- /dev/null
+++ b/fs/notify/Kconfig
@@ -0,0 +1,2 @@
1source "fs/notify/dnotify/Kconfig"
2source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
new file mode 100644
index 000000000000..5a95b6010ce7
--- /dev/null
+++ b/fs/notify/Makefile
@@ -0,0 +1,2 @@
1obj-y += dnotify/
2obj-y += inotify/
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
new file mode 100644
index 000000000000..26adf5dfa646
--- /dev/null
+++ b/fs/notify/dnotify/Kconfig
@@ -0,0 +1,10 @@
1config DNOTIFY
2 bool "Dnotify support"
3 default y
4 help
5 Dnotify is a directory-based per-fd file change notification system
6 that uses signals to communicate events to user-space. There exist
7 superior alternatives, but some applications may still rely on
8 dnotify.
9
10 If unsure, say Y.
diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile
new file mode 100644
index 000000000000..f145251dcadb
--- /dev/null
+++ b/fs/notify/dnotify/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_DNOTIFY) += dnotify.o
diff --git a/fs/dnotify.c b/fs/notify/dnotify/dnotify.c
index 676073b8dda5..b0aa2cde80bd 100644
--- a/fs/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
115 dn->dn_next = inode->i_dnotify; 115 dn->dn_next = inode->i_dnotify;
116 inode->i_dnotify = dn; 116 inode->i_dnotify = dn;
117 spin_unlock(&inode->i_lock); 117 spin_unlock(&inode->i_lock);
118
119 if (filp->f_op && filp->f_op->dir_notify)
120 return filp->f_op->dir_notify(filp, arg);
121 return 0; 118 return 0;
122 119
123out_free: 120out_free:
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
new file mode 100644
index 000000000000..446792841023
--- /dev/null
+++ b/fs/notify/inotify/Kconfig
@@ -0,0 +1,27 @@
1config INOTIFY
2 bool "Inotify file change notification support"
3 default y
4 ---help---
5 Say Y here to enable inotify support. Inotify is a file change
6 notification system and a replacement for dnotify. Inotify fixes
7 numerous shortcomings in dnotify and introduces several new features
8 including multiple file events, one-shot support, and unmount
9 notification.
10
11 For more information, see <file:Documentation/filesystems/inotify.txt>
12
13 If unsure, say Y.
14
15config INOTIFY_USER
16 bool "Inotify support for userspace"
17 depends on INOTIFY
18 default y
19 ---help---
20 Say Y here to enable inotify support for userspace, including the
21 associated system calls. Inotify allows monitoring of both files and
22 directories via a single open fd. Events are read from the file
23 descriptor, which is also select()- and poll()-able.
24
25 For more information, see <file:Documentation/filesystems/inotify.txt>
26
27 If unsure, say Y.
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
new file mode 100644
index 000000000000..e290f3bb9d8d
--- /dev/null
+++ b/fs/notify/inotify/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INOTIFY) += inotify.o
2obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
diff --git a/fs/inotify.c b/fs/notify/inotify/inotify.c
index dae3f28f30d4..dae3f28f30d4 100644
--- a/fs/inotify.c
+++ b/fs/notify/inotify/inotify.c
diff --git a/fs/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e2425bbd871f..400f8064a548 100644
--- a/fs/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -76,10 +76,10 @@ struct inotify_device {
76 struct mutex ev_mutex; /* protects event queue */ 76 struct mutex ev_mutex; /* protects event queue */
77 struct mutex up_mutex; /* synchronizes watch updates */ 77 struct mutex up_mutex; /* synchronizes watch updates */
78 struct list_head events; /* list of queued events */ 78 struct list_head events; /* list of queued events */
79 atomic_t count; /* reference count */
80 struct user_struct *user; /* user who opened this dev */ 79 struct user_struct *user; /* user who opened this dev */
81 struct inotify_handle *ih; /* inotify handle */ 80 struct inotify_handle *ih; /* inotify handle */
82 struct fasync_struct *fa; /* async notification */ 81 struct fasync_struct *fa; /* async notification */
82 atomic_t count; /* reference count */
83 unsigned int queue_size; /* size of the queue (bytes) */ 83 unsigned int queue_size; /* size of the queue (bytes) */
84 unsigned int event_count; /* number of pending events */ 84 unsigned int event_count; /* number of pending events */
85 unsigned int max_events; /* maximum number of events */ 85 unsigned int max_events; /* maximum number of events */
diff --git a/fs/open.c b/fs/open.c
index c0a426d5766c..1cd7d40e9991 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
272 goto put_write_and_out; 272 goto put_write_and_out;
273 273
274 error = locks_verify_truncate(inode, NULL, length); 274 error = locks_verify_truncate(inode, NULL, length);
275 if (!error)
276 error = security_path_truncate(&path, length, 0);
275 if (!error) { 277 if (!error) {
276 DQUOT_INIT(inode); 278 DQUOT_INIT(inode);
277 error = do_truncate(path.dentry, length, 0, NULL); 279 error = do_truncate(path.dentry, length, 0, NULL);
@@ -329,6 +331,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
329 331
330 error = locks_verify_truncate(inode, file, length); 332 error = locks_verify_truncate(inode, file, length);
331 if (!error) 333 if (!error)
334 error = security_path_truncate(&file->f_path, length,
335 ATTR_MTIME|ATTR_CTIME);
336 if (!error)
332 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 337 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
333out_putf: 338out_putf:
334 fput(file); 339 fput(file);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 3bb1cf1e7425..f75efa22df5e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -9,6 +9,7 @@
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/irqnr.h>
12#include <asm/cputime.h> 13#include <asm/cputime.h>
13 14
14#ifndef arch_irq_stat_cpu 15#ifndef arch_irq_stat_cpu
@@ -45,10 +46,6 @@ static int show_stat(struct seq_file *p, void *v)
45 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 46 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
46 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 47 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
47 for_each_irq_nr(j) { 48 for_each_irq_nr(j) {
48#ifdef CONFIG_SPARSE_IRQ
49 if (!irq_to_desc(j))
50 continue;
51#endif
52 sum += kstat_irqs_cpu(j, i); 49 sum += kstat_irqs_cpu(j, i);
53 } 50 }
54 sum += arch_irq_stat_cpu(i); 51 sum += arch_irq_stat_cpu(i);
@@ -95,12 +92,6 @@ static int show_stat(struct seq_file *p, void *v)
95 /* sum again ? it could be updated? */ 92 /* sum again ? it could be updated? */
96 for_each_irq_nr(j) { 93 for_each_irq_nr(j) {
97 per_irq_sum = 0; 94 per_irq_sum = 0;
98#ifdef CONFIG_SPARSE_IRQ
99 if (!irq_to_desc(j)) {
100 seq_printf(p, " %u", per_irq_sum);
101 continue;
102 }
103#endif
104 for_each_possible_cpu(i) 95 for_each_possible_cpu(i)
105 per_irq_sum += kstat_irqs_cpu(j, i); 96 per_irq_sum += kstat_irqs_cpu(j, i);
106 97
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6c4c2c69449f..145c2d3e5e01 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1753,6 +1753,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1753 struct inode *inode) 1753 struct inode *inode)
1754{ 1754{
1755 struct super_block *sb; 1755 struct super_block *sb;
1756 struct reiserfs_iget_args args;
1756 INITIALIZE_PATH(path_to_key); 1757 INITIALIZE_PATH(path_to_key);
1757 struct cpu_key key; 1758 struct cpu_key key;
1758 struct item_head ih; 1759 struct item_head ih;
@@ -1780,6 +1781,14 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1780 err = -ENOMEM; 1781 err = -ENOMEM;
1781 goto out_bad_inode; 1782 goto out_bad_inode;
1782 } 1783 }
1784 args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1785 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1786 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1787 if (insert_inode_locked4(inode, args.objectid,
1788 reiserfs_find_actor, &args) < 0) {
1789 err = -EINVAL;
1790 goto out_bad_inode;
1791 }
1783 if (old_format_only(sb)) 1792 if (old_format_only(sb))
1784 /* not a perfect generation count, as object ids can be reused, but 1793 /* not a perfect generation count, as object ids can be reused, but
1785 ** this is as good as reiserfs can do right now. 1794 ** this is as good as reiserfs can do right now.
@@ -1859,13 +1868,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1859 } else { 1868 } else {
1860 inode2sd(&sd, inode, inode->i_size); 1869 inode2sd(&sd, inode, inode->i_size);
1861 } 1870 }
1862 // these do not go to on-disk stat data
1863 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1864
1865 // store in in-core inode the key of stat data and version all 1871 // store in in-core inode the key of stat data and version all
1866 // object items will have (directory items will have old offset 1872 // object items will have (directory items will have old offset
1867 // format, other new objects will consist of new items) 1873 // format, other new objects will consist of new items)
1868 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1869 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) 1874 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1870 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1875 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1871 else 1876 else
@@ -1929,7 +1934,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1929 reiserfs_mark_inode_private(inode); 1934 reiserfs_mark_inode_private(inode);
1930 } 1935 }
1931 1936
1932 insert_inode_hash(inode);
1933 reiserfs_update_sd(th, inode); 1937 reiserfs_update_sd(th, inode);
1934 reiserfs_check_path(&path_to_key); 1938 reiserfs_check_path(&path_to_key);
1935 1939
@@ -1956,6 +1960,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1956 out_inserted_sd: 1960 out_inserted_sd:
1957 inode->i_nlink = 0; 1961 inode->i_nlink = 0;
1958 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1962 th->t_trans_id = 0; /* so the caller can't use this handle later */
1963 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1959 1964
1960 /* If we were inheriting an ACL, we need to release the lock so that 1965 /* If we were inheriting an ACL, we need to release the lock so that
1961 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking 1966 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 4f322e5ed840..738967f6c8ee 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -646,6 +646,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
646 err = journal_end(&th, dir->i_sb, jbegin_count); 646 err = journal_end(&th, dir->i_sb, jbegin_count);
647 if (err) 647 if (err)
648 retval = err; 648 retval = err;
649 unlock_new_inode(inode);
649 iput(inode); 650 iput(inode);
650 goto out_failed; 651 goto out_failed;
651 } 652 }
@@ -653,6 +654,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
653 reiserfs_update_inode_transaction(dir); 654 reiserfs_update_inode_transaction(dir);
654 655
655 d_instantiate(dentry, inode); 656 d_instantiate(dentry, inode);
657 unlock_new_inode(inode);
656 retval = journal_end(&th, dir->i_sb, jbegin_count); 658 retval = journal_end(&th, dir->i_sb, jbegin_count);
657 659
658 out_failed: 660 out_failed:
@@ -727,11 +729,13 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
727 err = journal_end(&th, dir->i_sb, jbegin_count); 729 err = journal_end(&th, dir->i_sb, jbegin_count);
728 if (err) 730 if (err)
729 retval = err; 731 retval = err;
732 unlock_new_inode(inode);
730 iput(inode); 733 iput(inode);
731 goto out_failed; 734 goto out_failed;
732 } 735 }
733 736
734 d_instantiate(dentry, inode); 737 d_instantiate(dentry, inode);
738 unlock_new_inode(inode);
735 retval = journal_end(&th, dir->i_sb, jbegin_count); 739 retval = journal_end(&th, dir->i_sb, jbegin_count);
736 740
737 out_failed: 741 out_failed:
@@ -812,6 +816,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
812 err = journal_end(&th, dir->i_sb, jbegin_count); 816 err = journal_end(&th, dir->i_sb, jbegin_count);
813 if (err) 817 if (err)
814 retval = err; 818 retval = err;
819 unlock_new_inode(inode);
815 iput(inode); 820 iput(inode);
816 goto out_failed; 821 goto out_failed;
817 } 822 }
@@ -819,6 +824,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
819 reiserfs_update_sd(&th, dir); 824 reiserfs_update_sd(&th, dir);
820 825
821 d_instantiate(dentry, inode); 826 d_instantiate(dentry, inode);
827 unlock_new_inode(inode);
822 retval = journal_end(&th, dir->i_sb, jbegin_count); 828 retval = journal_end(&th, dir->i_sb, jbegin_count);
823 out_failed: 829 out_failed:
824 if (locked) 830 if (locked)
@@ -1096,11 +1102,13 @@ static int reiserfs_symlink(struct inode *parent_dir,
1096 err = journal_end(&th, parent_dir->i_sb, jbegin_count); 1102 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1097 if (err) 1103 if (err)
1098 retval = err; 1104 retval = err;
1105 unlock_new_inode(inode);
1099 iput(inode); 1106 iput(inode);
1100 goto out_failed; 1107 goto out_failed;
1101 } 1108 }
1102 1109
1103 d_instantiate(dentry, inode); 1110 d_instantiate(dentry, inode);
1111 unlock_new_inode(inode);
1104 retval = journal_end(&th, parent_dir->i_sb, jbegin_count); 1112 retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
1105 out_failed: 1113 out_failed:
1106 reiserfs_write_unlock(parent_dir->i_sb); 1114 reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index c99358a52176..b569ff1c4dc8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -389,8 +389,14 @@ char *mangle_path(char *s, char *p, char *esc)
389} 389}
390EXPORT_SYMBOL(mangle_path); 390EXPORT_SYMBOL(mangle_path);
391 391
392/* 392/**
393 * return the absolute path of 'dentry' residing in mount 'mnt'. 393 * seq_path - seq_file interface to print a pathname
394 * @m: the seq_file handle
395 * @path: the struct path to print
396 * @esc: set of characters to escape in the output
397 *
398 * return the absolute path of 'path', as represented by the
399 * dentry / mnt pair in the path parameter.
394 */ 400 */
395int seq_path(struct seq_file *m, struct path *path, char *esc) 401int seq_path(struct seq_file *m, struct path *path, char *esc)
396{ 402{
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index df0d435baa48..3d81bf58dae2 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -27,6 +27,7 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/vfs.h> 29#include <linux/vfs.h>
30#include <linux/namei.h>
30#include <asm/byteorder.h> 31#include <asm/byteorder.h>
31#include "sysv.h" 32#include "sysv.h"
32 33
@@ -163,8 +164,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
163 if (inode->i_blocks) { 164 if (inode->i_blocks) {
164 inode->i_op = &sysv_symlink_inode_operations; 165 inode->i_op = &sysv_symlink_inode_operations;
165 inode->i_mapping->a_ops = &sysv_aops; 166 inode->i_mapping->a_ops = &sysv_aops;
166 } else 167 } else {
167 inode->i_op = &sysv_fast_symlink_inode_operations; 168 inode->i_op = &sysv_fast_symlink_inode_operations;
169 nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size,
170 sizeof(SYSV_I(inode)->i_data) - 1);
171 }
168 } else 172 } else
169 init_special_inode(inode, inode->i_mode, rdev); 173 init_special_inode(inode, inode->i_mode, rdev);
170} 174}
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 4a18f084cc42..0e5e54d82924 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -32,18 +32,15 @@
32 32
33#include "ubifs.h" 33#include "ubifs.h"
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <asm/div64.h> 35#include <linux/math64.h>
36 36
37/* 37/*
38 * When pessimistic budget calculations say that there is no enough space, 38 * When pessimistic budget calculations say that there is no enough space,
39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection, 39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
40 * or committing. The below constants define maximum number of times UBIFS 40 * or committing. The below constant defines maximum number of times UBIFS
41 * repeats the operations. 41 * repeats the operations.
42 */ 42 */
43#define MAX_SHRINK_RETRIES 8 43#define MAX_MKSPC_RETRIES 3
44#define MAX_GC_RETRIES 4
45#define MAX_CMT_RETRIES 2
46#define MAX_NOSPC_RETRIES 1
47 44
48/* 45/*
49 * The below constant defines amount of dirty pages which should be written 46 * The below constant defines amount of dirty pages which should be written
@@ -52,30 +49,6 @@
52#define NR_TO_WRITE 16 49#define NR_TO_WRITE 16
53 50
54/** 51/**
55 * struct retries_info - information about re-tries while making free space.
56 * @prev_liability: previous liability
57 * @shrink_cnt: how many times the liability was shrinked
58 * @shrink_retries: count of liability shrink re-tries (increased when
59 * liability does not shrink)
60 * @try_gc: GC should be tried first
61 * @gc_retries: how many times GC was run
62 * @cmt_retries: how many times commit has been done
63 * @nospc_retries: how many times GC returned %-ENOSPC
64 *
65 * Since we consider budgeting to be the fast-path, and this structure has to
66 * be allocated on stack and zeroed out, we make it smaller using bit-fields.
67 */
68struct retries_info {
69 long long prev_liability;
70 unsigned int shrink_cnt;
71 unsigned int shrink_retries:5;
72 unsigned int try_gc:1;
73 unsigned int gc_retries:4;
74 unsigned int cmt_retries:3;
75 unsigned int nospc_retries:1;
76};
77
78/**
79 * shrink_liability - write-back some dirty pages/inodes. 52 * shrink_liability - write-back some dirty pages/inodes.
80 * @c: UBIFS file-system description object 53 * @c: UBIFS file-system description object
81 * @nr_to_write: how many dirty pages to write-back 54 * @nr_to_write: how many dirty pages to write-back
@@ -147,9 +120,25 @@ static int run_gc(struct ubifs_info *c)
147} 120}
148 121
149/** 122/**
123 * get_liability - calculate current liability.
124 * @c: UBIFS file-system description object
125 *
126 * This function calculates and returns current UBIFS liability, i.e. the
127 * amount of bytes UBIFS has "promised" to write to the media.
128 */
129static long long get_liability(struct ubifs_info *c)
130{
131 long long liab;
132
133 spin_lock(&c->space_lock);
134 liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
135 spin_unlock(&c->space_lock);
136 return liab;
137}
138
139/**
150 * make_free_space - make more free space on the file-system. 140 * make_free_space - make more free space on the file-system.
151 * @c: UBIFS file-system description object 141 * @c: UBIFS file-system description object
152 * @ri: information about previous invocations of this function
153 * 142 *
154 * This function is called when an operation cannot be budgeted because there 143 * This function is called when an operation cannot be budgeted because there
155 * is supposedly no free space. But in most cases there is some free space: 144 * is supposedly no free space. But in most cases there is some free space:
@@ -165,87 +154,42 @@ static int run_gc(struct ubifs_info *c)
165 * Returns %-ENOSPC if it couldn't do more free space, and other negative error 154 * Returns %-ENOSPC if it couldn't do more free space, and other negative error
166 * codes on failures. 155 * codes on failures.
167 */ 156 */
168static int make_free_space(struct ubifs_info *c, struct retries_info *ri) 157static int make_free_space(struct ubifs_info *c)
169{ 158{
170 int err; 159 int err, retries = 0;
171 160 long long liab1, liab2;
172 /*
173 * If we have some dirty pages and inodes (liability), try to write
174 * them back unless this was tried too many times without effect
175 * already.
176 */
177 if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
178 long long liability;
179
180 spin_lock(&c->space_lock);
181 liability = c->budg_idx_growth + c->budg_data_growth +
182 c->budg_dd_growth;
183 spin_unlock(&c->space_lock);
184 161
185 if (ri->prev_liability >= liability) { 162 do {
186 /* Liability does not shrink, next time try GC then */ 163 liab1 = get_liability(c);
187 ri->shrink_retries += 1; 164 /*
188 if (ri->gc_retries < MAX_GC_RETRIES) 165 * We probably have some dirty pages or inodes (liability), try
189 ri->try_gc = 1; 166 * to write them back.
190 dbg_budg("liability did not shrink: retries %d of %d", 167 */
191 ri->shrink_retries, MAX_SHRINK_RETRIES); 168 dbg_budg("liability %lld, run write-back", liab1);
192 } 169 shrink_liability(c, NR_TO_WRITE);
193 170
194 dbg_budg("force write-back (count %d)", ri->shrink_cnt); 171 liab2 = get_liability(c);
195 shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); 172 if (liab2 < liab1)
173 return -EAGAIN;
196 174
197 ri->prev_liability = liability; 175 dbg_budg("new liability %lld (not shrinked)", liab2);
198 ri->shrink_cnt += 1;
199 return -EAGAIN;
200 }
201 176
202 /* 177 /* Liability did not shrink again, try GC */
203 * Try to run garbage collector unless it was already tried too many 178 dbg_budg("Run GC");
204 * times.
205 */
206 if (ri->gc_retries < MAX_GC_RETRIES) {
207 ri->gc_retries += 1;
208 dbg_budg("run GC, retries %d of %d",
209 ri->gc_retries, MAX_GC_RETRIES);
210
211 ri->try_gc = 0;
212 err = run_gc(c); 179 err = run_gc(c);
213 if (!err) 180 if (!err)
214 return -EAGAIN; 181 return -EAGAIN;
215 182
216 if (err == -EAGAIN) { 183 if (err != -EAGAIN && err != -ENOSPC)
217 dbg_budg("GC asked to commit"); 184 /* Some real error happened */
218 err = ubifs_run_commit(c);
219 if (err)
220 return err;
221 return -EAGAIN;
222 }
223
224 if (err != -ENOSPC)
225 return err;
226
227 /*
228 * GC could not make any progress. If this is the first time,
229 * then it makes sense to try to commit, because it might make
230 * some dirty space.
231 */
232 dbg_budg("GC returned -ENOSPC, retries %d",
233 ri->nospc_retries);
234 if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
235 return err; 185 return err;
236 ri->nospc_retries += 1;
237 }
238 186
239 /* Neither GC nor write-back helped, try to commit */ 187 dbg_budg("Run commit (retries %d)", retries);
240 if (ri->cmt_retries < MAX_CMT_RETRIES) {
241 ri->cmt_retries += 1;
242 dbg_budg("run commit, retries %d of %d",
243 ri->cmt_retries, MAX_CMT_RETRIES);
244 err = ubifs_run_commit(c); 188 err = ubifs_run_commit(c);
245 if (err) 189 if (err)
246 return err; 190 return err;
247 return -EAGAIN; 191 } while (retries++ < MAX_MKSPC_RETRIES);
248 } 192
249 return -ENOSPC; 193 return -ENOSPC;
250} 194}
251 195
@@ -258,8 +202,8 @@ static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
258 */ 202 */
259int ubifs_calc_min_idx_lebs(struct ubifs_info *c) 203int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
260{ 204{
261 int ret; 205 int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
262 uint64_t idx_size; 206 long long idx_size;
263 207
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 208 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
265 209
@@ -271,23 +215,16 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
271 * pair, nor similarly the two variables for the new index size, so we 215 * pair, nor similarly the two variables for the new index size, so we
272 * have to do this costly 64-bit division on fast-path. 216 * have to do this costly 64-bit division on fast-path.
273 */ 217 */
274 if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) 218 idx_size += eff_leb_size - 1;
275 ret = idx_size + 1; 219 idx_lebs = div_u64(idx_size, eff_leb_size);
276 else
277 ret = idx_size;
278 /* 220 /*
279 * The index head is not available for the in-the-gaps method, so add an 221 * The index head is not available for the in-the-gaps method, so add an
280 * extra LEB to compensate. 222 * extra LEB to compensate.
281 */ 223 */
282 ret += 1; 224 idx_lebs += 1;
283 /* 225 if (idx_lebs < MIN_INDEX_LEBS)
284 * At present the index needs at least 2 LEBs: one for the index head 226 idx_lebs = MIN_INDEX_LEBS;
285 * and one for in-the-gaps method (which currently does not cater for 227 return idx_lebs;
286 * the index head and so excludes it from consideration).
287 */
288 if (ret < 2)
289 ret = 2;
290 return ret;
291} 228}
292 229
293/** 230/**
@@ -530,8 +467,7 @@ static int calc_dd_growth(const struct ubifs_info *c,
530int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) 467int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
531{ 468{
532 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); 469 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
533 int err, idx_growth, data_growth, dd_growth; 470 int err, idx_growth, data_growth, dd_growth, retried = 0;
534 struct retries_info ri;
535 471
536 ubifs_assert(req->new_page <= 1); 472 ubifs_assert(req->new_page <= 1);
537 ubifs_assert(req->dirtied_page <= 1); 473 ubifs_assert(req->dirtied_page <= 1);
@@ -549,7 +485,6 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
549 if (!data_growth && !dd_growth) 485 if (!data_growth && !dd_growth)
550 return 0; 486 return 0;
551 idx_growth = calc_idx_growth(c, req); 487 idx_growth = calc_idx_growth(c, req);
552 memset(&ri, 0, sizeof(struct retries_info));
553 488
554again: 489again:
555 spin_lock(&c->space_lock); 490 spin_lock(&c->space_lock);
@@ -587,12 +522,17 @@ again:
587 return err; 522 return err;
588 } 523 }
589 524
590 err = make_free_space(c, &ri); 525 err = make_free_space(c);
526 cond_resched();
591 if (err == -EAGAIN) { 527 if (err == -EAGAIN) {
592 dbg_budg("try again"); 528 dbg_budg("try again");
593 cond_resched();
594 goto again; 529 goto again;
595 } else if (err == -ENOSPC) { 530 } else if (err == -ENOSPC) {
531 if (!retried) {
532 retried = 1;
533 dbg_budg("-ENOSPC, but anyway try once again");
534 goto again;
535 }
596 dbg_budg("FS is full, -ENOSPC"); 536 dbg_budg("FS is full, -ENOSPC");
597 c->nospace = 1; 537 c->nospace = 1;
598 if (can_use_rp(c) || c->rp_size == 0) 538 if (can_use_rp(c) || c->rp_size == 0)
@@ -712,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
712 * user-space. User-space application tend to expect that if the file-system 652 * user-space. User-space application tend to expect that if the file-system
713 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they 653 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
714 * are able to write a file of size N. UBIFS attaches node headers to each data 654 * are able to write a file of size N. UBIFS attaches node headers to each data
715 * node and it has to write indexind nodes as well. This introduces additional 655 * node and it has to write indexing nodes as well. This introduces additional
716 * overhead, and UBIFS it has to report sligtly less free space to meet the 656 * overhead, and UBIFS has to report slightly less free space to meet the above
717 * above expectetion. 657 * expectations.
718 * 658 *
719 * This function assumes free space is made up of uncompressed data nodes and 659 * This function assumes free space is made up of uncompressed data nodes and
720 * full index nodes (one per data node, tripled because we always allow enough 660 * full index nodes (one per data node, tripled because we always allow enough
@@ -723,7 +663,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
723 * Note, the calculation is pessimistic, which means that most of the time 663 * Note, the calculation is pessimistic, which means that most of the time
724 * UBIFS reports less space than it actually has. 664 * UBIFS reports less space than it actually has.
725 */ 665 */
726long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) 666long long ubifs_reported_space(const struct ubifs_info *c, long long free)
727{ 667{
728 int divisor, factor, f; 668 int divisor, factor, f;
729 669
@@ -737,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
737 * of data nodes, f - fanout. Because effective UBIFS fanout is twice 677 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
738 * as less than maximum fanout, we assume that each data node 678 * as less than maximum fanout, we assume that each data node
739 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. 679 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
740 * Note, the multiplier 3 is because UBIFS reseves thrice as more space 680 * Note, the multiplier 3 is because UBIFS reserves thrice as more space
741 * for the index. 681 * for the index.
742 */ 682 */
743 f = c->fanout > 3 ? c->fanout >> 1 : 2; 683 f = c->fanout > 3 ? c->fanout >> 1 : 2;
@@ -745,8 +685,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
745 divisor = UBIFS_MAX_DATA_NODE_SZ; 685 divisor = UBIFS_MAX_DATA_NODE_SZ;
746 divisor += (c->max_idx_node_sz * 3) / (f - 1); 686 divisor += (c->max_idx_node_sz * 3) / (f - 1);
747 free *= factor; 687 free *= factor;
748 do_div(free, divisor); 688 return div_u64(free, divisor);
749 return free;
750} 689}
751 690
752/** 691/**
@@ -756,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
756 * This function calculates amount of free space to report to user-space. 695 * This function calculates amount of free space to report to user-space.
757 * 696 *
758 * Because UBIFS may introduce substantial overhead (the index, node headers, 697 * Because UBIFS may introduce substantial overhead (the index, node headers,
759 * alighment, wastage at the end of eraseblocks, etc), it cannot report real 698 * alignment, wastage at the end of eraseblocks, etc), it cannot report real
760 * amount of free flash space it has (well, because not all dirty space is 699 * amount of free flash space it has (well, because not all dirty space is
761 * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, 700 * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
762 * it would bread user expectetion about what free space is. Users seem to 701 * it would bread user expectations about what free space is. Users seem to
763 * accustomed to assume that if the file-system reports N bytes of free space, 702 * accustomed to assume that if the file-system reports N bytes of free space,
764 * they would be able to fit a file of N bytes to the FS. This almost works for 703 * they would be able to fit a file of N bytes to the FS. This almost works for
765 * traditional file-systems, because they have way less overhead than UBIFS. 704 * traditional file-systems, because they have way less overhead than UBIFS.
@@ -771,18 +710,9 @@ long long ubifs_get_free_space(struct ubifs_info *c)
771 long long available, outstanding, free; 710 long long available, outstanding, free;
772 711
773 spin_lock(&c->space_lock); 712 spin_lock(&c->space_lock);
774 min_idx_lebs = ubifs_calc_min_idx_lebs(c); 713 min_idx_lebs = c->min_idx_lebs;
714 ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c));
775 outstanding = c->budg_data_growth + c->budg_dd_growth; 715 outstanding = c->budg_data_growth + c->budg_dd_growth;
776
777 /*
778 * Force the amount available to the total size reported if the used
779 * space is zero.
780 */
781 if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
782 spin_unlock(&c->space_lock);
783 return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
784 }
785
786 available = ubifs_calc_available(c, min_idx_lebs); 716 available = ubifs_calc_available(c, min_idx_lebs);
787 717
788 /* 718 /*
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b49884c8c10e..f3a7945527fb 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -470,12 +470,12 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
470{ 470{
471 struct ubifs_idx_node *idx; 471 struct ubifs_idx_node *idx;
472 int lnum, offs, len, err = 0; 472 int lnum, offs, len, err = 0;
473 struct ubifs_debug_info *d = c->dbg;
473 474
474 c->old_zroot = *zroot; 475 d->old_zroot = *zroot;
475 476 lnum = d->old_zroot.lnum;
476 lnum = c->old_zroot.lnum; 477 offs = d->old_zroot.offs;
477 offs = c->old_zroot.offs; 478 len = d->old_zroot.len;
478 len = c->old_zroot.len;
479 479
480 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); 480 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
481 if (!idx) 481 if (!idx)
@@ -485,8 +485,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
485 if (err) 485 if (err)
486 goto out; 486 goto out;
487 487
488 c->old_zroot_level = le16_to_cpu(idx->level); 488 d->old_zroot_level = le16_to_cpu(idx->level);
489 c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); 489 d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
490out: 490out:
491 kfree(idx); 491 kfree(idx);
492 return err; 492 return err;
@@ -509,6 +509,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
509{ 509{
510 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; 510 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
511 int first = 1, iip; 511 int first = 1, iip;
512 struct ubifs_debug_info *d = c->dbg;
512 union ubifs_key lower_key, upper_key, l_key, u_key; 513 union ubifs_key lower_key, upper_key, l_key, u_key;
513 unsigned long long uninitialized_var(last_sqnum); 514 unsigned long long uninitialized_var(last_sqnum);
514 struct ubifs_idx_node *idx; 515 struct ubifs_idx_node *idx;
@@ -525,9 +526,9 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
525 UBIFS_IDX_NODE_SZ; 526 UBIFS_IDX_NODE_SZ;
526 527
527 /* Start at the old zroot */ 528 /* Start at the old zroot */
528 lnum = c->old_zroot.lnum; 529 lnum = d->old_zroot.lnum;
529 offs = c->old_zroot.offs; 530 offs = d->old_zroot.offs;
530 len = c->old_zroot.len; 531 len = d->old_zroot.len;
531 iip = 0; 532 iip = 0;
532 533
533 /* 534 /*
@@ -560,11 +561,11 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
560 if (first) { 561 if (first) {
561 first = 0; 562 first = 0;
562 /* Check root level and sqnum */ 563 /* Check root level and sqnum */
563 if (le16_to_cpu(idx->level) != c->old_zroot_level) { 564 if (le16_to_cpu(idx->level) != d->old_zroot_level) {
564 err = 2; 565 err = 2;
565 goto out_dump; 566 goto out_dump;
566 } 567 }
567 if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { 568 if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) {
568 err = 3; 569 err = 3;
569 goto out_dump; 570 goto out_dump;
570 } 571 }
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index a0ada596b17c..11e4132f314a 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -33,7 +33,7 @@
33/* Fake description object for the "none" compressor */ 33/* Fake description object for the "none" compressor */
34static struct ubifs_compressor none_compr = { 34static struct ubifs_compressor none_compr = {
35 .compr_type = UBIFS_COMPR_NONE, 35 .compr_type = UBIFS_COMPR_NONE,
36 .name = "no compression", 36 .name = "none",
37 .capi_name = "", 37 .capi_name = "",
38}; 38};
39 39
@@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex);
43static struct ubifs_compressor lzo_compr = { 43static struct ubifs_compressor lzo_compr = {
44 .compr_type = UBIFS_COMPR_LZO, 44 .compr_type = UBIFS_COMPR_LZO,
45 .comp_mutex = &lzo_mutex, 45 .comp_mutex = &lzo_mutex,
46 .name = "LZO", 46 .name = "lzo",
47 .capi_name = "lzo", 47 .capi_name = "lzo",
48}; 48};
49#else 49#else
50static struct ubifs_compressor lzo_compr = { 50static struct ubifs_compressor lzo_compr = {
51 .compr_type = UBIFS_COMPR_LZO, 51 .compr_type = UBIFS_COMPR_LZO,
52 .name = "LZO", 52 .name = "lzo",
53}; 53};
54#endif 54#endif
55 55
@@ -108,7 +108,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
108 if (compr->comp_mutex) 108 if (compr->comp_mutex)
109 mutex_lock(compr->comp_mutex); 109 mutex_lock(compr->comp_mutex);
110 err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, 110 err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
111 out_len); 111 (unsigned int *)out_len);
112 if (compr->comp_mutex) 112 if (compr->comp_mutex)
113 mutex_unlock(compr->comp_mutex); 113 mutex_unlock(compr->comp_mutex);
114 if (unlikely(err)) { 114 if (unlikely(err)) {
@@ -119,10 +119,10 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
119 } 119 }
120 120
121 /* 121 /*
122 * Presently, we just require that compression results in less data, 122 * If the data compressed only slightly, it is better to leave it
123 * rather than any defined minimum compression ratio or amount. 123 * uncompressed to improve read speed.
124 */ 124 */
125 if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) 125 if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
126 goto no_compr; 126 goto no_compr;
127 127
128 return; 128 return;
@@ -172,7 +172,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
172 if (compr->decomp_mutex) 172 if (compr->decomp_mutex)
173 mutex_lock(compr->decomp_mutex); 173 mutex_lock(compr->decomp_mutex);
174 err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, 174 err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
175 out_len); 175 (unsigned int *)out_len);
176 if (compr->decomp_mutex) 176 if (compr->decomp_mutex)
177 mutex_unlock(compr->decomp_mutex); 177 mutex_unlock(compr->decomp_mutex);
178 if (err) 178 if (err)
@@ -244,7 +244,7 @@ out_lzo:
244/** 244/**
245 * ubifs_compressors_exit - de-initialize UBIFS compressors. 245 * ubifs_compressors_exit - de-initialize UBIFS compressors.
246 */ 246 */
247void __exit ubifs_compressors_exit(void) 247void ubifs_compressors_exit(void)
248{ 248{
249 compr_exit(&lzo_compr); 249 compr_exit(&lzo_compr);
250 compr_exit(&zlib_compr); 250 compr_exit(&zlib_compr);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 510ffa0bbda4..792c5a16c182 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -32,6 +32,8 @@
32#include "ubifs.h" 32#include "ubifs.h"
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <linux/debugfs.h>
36#include <linux/math64.h>
35 37
36#ifdef CONFIG_UBIFS_FS_DEBUG 38#ifdef CONFIG_UBIFS_FS_DEBUG
37 39
@@ -596,7 +598,9 @@ void dbg_dump_budg(struct ubifs_info *c)
596 struct rb_node *rb; 598 struct rb_node *rb;
597 struct ubifs_bud *bud; 599 struct ubifs_bud *bud;
598 struct ubifs_gced_idx_leb *idx_gc; 600 struct ubifs_gced_idx_leb *idx_gc;
601 long long available, outstanding, free;
599 602
603 ubifs_assert(spin_is_locked(&c->space_lock));
600 spin_lock(&dbg_lock); 604 spin_lock(&dbg_lock);
601 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 605 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
602 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 606 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
@@ -629,6 +633,17 @@ void dbg_dump_budg(struct ubifs_info *c)
629 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", 633 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
630 idx_gc->lnum, idx_gc->unmap); 634 idx_gc->lnum, idx_gc->unmap);
631 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 635 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
636
637 /* Print budgeting predictions */
638 available = ubifs_calc_available(c, c->min_idx_lebs);
639 outstanding = c->budg_data_growth + c->budg_dd_growth;
640 if (available > outstanding)
641 free = ubifs_reported_space(c, available - outstanding);
642 else
643 free = 0;
644 printk(KERN_DEBUG "Budgeting predictions:\n");
645 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
646 available, outstanding, free);
632 spin_unlock(&dbg_lock); 647 spin_unlock(&dbg_lock);
633} 648}
634 649
@@ -645,7 +660,8 @@ void dbg_dump_lprops(struct ubifs_info *c)
645 struct ubifs_lprops lp; 660 struct ubifs_lprops lp;
646 struct ubifs_lp_stats lst; 661 struct ubifs_lp_stats lst;
647 662
648 printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); 663 printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n",
664 current->pid);
649 ubifs_get_lp_stats(c, &lst); 665 ubifs_get_lp_stats(c, &lst);
650 dbg_dump_lstats(&lst); 666 dbg_dump_lstats(&lst);
651 667
@@ -656,6 +672,8 @@ void dbg_dump_lprops(struct ubifs_info *c)
656 672
657 dbg_dump_lprop(c, &lp); 673 dbg_dump_lprop(c, &lp);
658 } 674 }
675 printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n",
676 current->pid);
659} 677}
660 678
661void dbg_dump_lpt_info(struct ubifs_info *c) 679void dbg_dump_lpt_info(struct ubifs_info *c)
@@ -663,6 +681,7 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
663 int i; 681 int i;
664 682
665 spin_lock(&dbg_lock); 683 spin_lock(&dbg_lock);
684 printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid);
666 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); 685 printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz);
667 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); 686 printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz);
668 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); 687 printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz);
@@ -684,7 +703,8 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
684 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); 703 printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
685 printk(KERN_DEBUG "\tLPT head is at %d:%d\n", 704 printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
686 c->nhead_lnum, c->nhead_offs); 705 c->nhead_lnum, c->nhead_offs);
687 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); 706 printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n",
707 c->ltab_lnum, c->ltab_offs);
688 if (c->big_lpt) 708 if (c->big_lpt)
689 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", 709 printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
690 c->lsave_lnum, c->lsave_offs); 710 c->lsave_lnum, c->lsave_offs);
@@ -703,9 +723,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
703 if (dbg_failure_mode) 723 if (dbg_failure_mode)
704 return; 724 return;
705 725
706 printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); 726 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
707 727 current->pid, lnum);
708 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 728 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
709 if (IS_ERR(sleb)) { 729 if (IS_ERR(sleb)) {
710 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 730 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
711 return; 731 return;
@@ -721,6 +741,8 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
721 dbg_dump_node(c, snod->node); 741 dbg_dump_node(c, snod->node);
722 } 742 }
723 743
744 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
745 current->pid, lnum);
724 ubifs_scan_destroy(sleb); 746 ubifs_scan_destroy(sleb);
725 return; 747 return;
726} 748}
@@ -768,7 +790,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
768{ 790{
769 int i; 791 int i;
770 792
771 printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", 793 printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n",
772 current->pid, cat, heap->cnt); 794 current->pid, cat, heap->cnt);
773 for (i = 0; i < heap->cnt; i++) { 795 for (i = 0; i < heap->cnt; i++) {
774 struct ubifs_lprops *lprops = heap->arr[i]; 796 struct ubifs_lprops *lprops = heap->arr[i];
@@ -777,6 +799,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
777 "flags %d\n", i, lprops->lnum, lprops->hpos, 799 "flags %d\n", i, lprops->lnum, lprops->hpos,
778 lprops->free, lprops->dirty, lprops->flags); 800 lprops->free, lprops->dirty, lprops->flags);
779 } 801 }
802 printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid);
780} 803}
781 804
782void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 805void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -784,7 +807,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
784{ 807{
785 int i; 808 int i;
786 809
787 printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); 810 printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid);
788 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", 811 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
789 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 812 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
790 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", 813 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -803,7 +826,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
803 int level; 826 int level;
804 827
805 printk(KERN_DEBUG "\n"); 828 printk(KERN_DEBUG "\n");
806 printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); 829 printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid);
807 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 830 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
808 level = znode->level; 831 level = znode->level;
809 printk(KERN_DEBUG "== Level %d ==\n", level); 832 printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -815,8 +838,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
815 dbg_dump_znode(c, znode); 838 dbg_dump_znode(c, znode);
816 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); 839 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
817 } 840 }
818 841 printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid);
819 printk(KERN_DEBUG "\n");
820} 842}
821 843
822static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, 844static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
@@ -992,8 +1014,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
992 zbr1->offs, DBGKEY(&key)); 1014 zbr1->offs, DBGKEY(&key));
993 dbg_err("but it should have key %s according to tnc", 1015 dbg_err("but it should have key %s according to tnc",
994 DBGKEY(&zbr1->key)); 1016 DBGKEY(&zbr1->key));
995 dbg_dump_node(c, dent1); 1017 dbg_dump_node(c, dent1);
996 goto out_free; 1018 goto out_free;
997 } 1019 }
998 1020
999 key_read(c, &dent2->key, &key); 1021 key_read(c, &dent2->key, &key);
@@ -1002,8 +1024,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1002 zbr1->offs, DBGKEY(&key)); 1024 zbr1->offs, DBGKEY(&key));
1003 dbg_err("but it should have key %s according to tnc", 1025 dbg_err("but it should have key %s according to tnc",
1004 DBGKEY(&zbr2->key)); 1026 DBGKEY(&zbr2->key));
1005 dbg_dump_node(c, dent2); 1027 dbg_dump_node(c, dent2);
1006 goto out_free; 1028 goto out_free;
1007 } 1029 }
1008 1030
1009 nlen1 = le16_to_cpu(dent1->nlen); 1031 nlen1 = le16_to_cpu(dent1->nlen);
@@ -1020,9 +1042,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1020 dbg_err("bad order of colliding key %s", 1042 dbg_err("bad order of colliding key %s",
1021 DBGKEY(&key)); 1043 DBGKEY(&key));
1022 1044
1023 dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); 1045 ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
1024 dbg_dump_node(c, dent1); 1046 dbg_dump_node(c, dent1);
1025 dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); 1047 ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
1026 dbg_dump_node(c, dent2); 1048 dbg_dump_node(c, dent2);
1027 1049
1028out_free: 1050out_free:
@@ -2097,13 +2119,13 @@ static int simple_rand(void)
2097 return (next >> 16) & 32767; 2119 return (next >> 16) & 32767;
2098} 2120}
2099 2121
2100void dbg_failure_mode_registration(struct ubifs_info *c) 2122static void failure_mode_init(struct ubifs_info *c)
2101{ 2123{
2102 struct failure_mode_info *fmi; 2124 struct failure_mode_info *fmi;
2103 2125
2104 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); 2126 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
2105 if (!fmi) { 2127 if (!fmi) {
2106 dbg_err("Failed to register failure mode - no memory"); 2128 ubifs_err("Failed to register failure mode - no memory");
2107 return; 2129 return;
2108 } 2130 }
2109 fmi->c = c; 2131 fmi->c = c;
@@ -2112,7 +2134,7 @@ void dbg_failure_mode_registration(struct ubifs_info *c)
2112 spin_unlock(&fmi_lock); 2134 spin_unlock(&fmi_lock);
2113} 2135}
2114 2136
2115void dbg_failure_mode_deregistration(struct ubifs_info *c) 2137static void failure_mode_exit(struct ubifs_info *c)
2116{ 2138{
2117 struct failure_mode_info *fmi, *tmp; 2139 struct failure_mode_info *fmi, *tmp;
2118 2140
@@ -2146,42 +2168,44 @@ static int in_failure_mode(struct ubi_volume_desc *desc)
2146 struct ubifs_info *c = dbg_find_info(desc); 2168 struct ubifs_info *c = dbg_find_info(desc);
2147 2169
2148 if (c && dbg_failure_mode) 2170 if (c && dbg_failure_mode)
2149 return c->failure_mode; 2171 return c->dbg->failure_mode;
2150 return 0; 2172 return 0;
2151} 2173}
2152 2174
2153static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) 2175static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
2154{ 2176{
2155 struct ubifs_info *c = dbg_find_info(desc); 2177 struct ubifs_info *c = dbg_find_info(desc);
2178 struct ubifs_debug_info *d;
2156 2179
2157 if (!c || !dbg_failure_mode) 2180 if (!c || !dbg_failure_mode)
2158 return 0; 2181 return 0;
2159 if (c->failure_mode) 2182 d = c->dbg;
2183 if (d->failure_mode)
2160 return 1; 2184 return 1;
2161 if (!c->fail_cnt) { 2185 if (!d->fail_cnt) {
2162 /* First call - decide delay to failure */ 2186 /* First call - decide delay to failure */
2163 if (chance(1, 2)) { 2187 if (chance(1, 2)) {
2164 unsigned int delay = 1 << (simple_rand() >> 11); 2188 unsigned int delay = 1 << (simple_rand() >> 11);
2165 2189
2166 if (chance(1, 2)) { 2190 if (chance(1, 2)) {
2167 c->fail_delay = 1; 2191 d->fail_delay = 1;
2168 c->fail_timeout = jiffies + 2192 d->fail_timeout = jiffies +
2169 msecs_to_jiffies(delay); 2193 msecs_to_jiffies(delay);
2170 dbg_rcvry("failing after %ums", delay); 2194 dbg_rcvry("failing after %ums", delay);
2171 } else { 2195 } else {
2172 c->fail_delay = 2; 2196 d->fail_delay = 2;
2173 c->fail_cnt_max = delay; 2197 d->fail_cnt_max = delay;
2174 dbg_rcvry("failing after %u calls", delay); 2198 dbg_rcvry("failing after %u calls", delay);
2175 } 2199 }
2176 } 2200 }
2177 c->fail_cnt += 1; 2201 d->fail_cnt += 1;
2178 } 2202 }
2179 /* Determine if failure delay has expired */ 2203 /* Determine if failure delay has expired */
2180 if (c->fail_delay == 1) { 2204 if (d->fail_delay == 1) {
2181 if (time_before(jiffies, c->fail_timeout)) 2205 if (time_before(jiffies, d->fail_timeout))
2182 return 0; 2206 return 0;
2183 } else if (c->fail_delay == 2) 2207 } else if (d->fail_delay == 2)
2184 if (c->fail_cnt++ < c->fail_cnt_max) 2208 if (d->fail_cnt++ < d->fail_cnt_max)
2185 return 0; 2209 return 0;
2186 if (lnum == UBIFS_SB_LNUM) { 2210 if (lnum == UBIFS_SB_LNUM) {
2187 if (write) { 2211 if (write) {
@@ -2239,7 +2263,7 @@ static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
2239 dbg_rcvry("failing in bud LEB %d commit not running", lnum); 2263 dbg_rcvry("failing in bud LEB %d commit not running", lnum);
2240 } 2264 }
2241 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); 2265 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
2242 c->failure_mode = 1; 2266 d->failure_mode = 1;
2243 dump_stack(); 2267 dump_stack();
2244 return 1; 2268 return 1;
2245} 2269}
@@ -2344,4 +2368,181 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2344 return 0; 2368 return 0;
2345} 2369}
2346 2370
2371/**
2372 * ubifs_debugging_init - initialize UBIFS debugging.
2373 * @c: UBIFS file-system description object
2374 *
2375 * This function initializes debugging-related data for the file system.
2376 * Returns zero in case of success and a negative error code in case of
2377 * failure.
2378 */
2379int ubifs_debugging_init(struct ubifs_info *c)
2380{
2381 c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
2382 if (!c->dbg)
2383 return -ENOMEM;
2384
2385 c->dbg->buf = vmalloc(c->leb_size);
2386 if (!c->dbg->buf)
2387 goto out;
2388
2389 failure_mode_init(c);
2390 return 0;
2391
2392out:
2393 kfree(c->dbg);
2394 return -ENOMEM;
2395}
2396
2397/**
2398 * ubifs_debugging_exit - free debugging data.
2399 * @c: UBIFS file-system description object
2400 */
2401void ubifs_debugging_exit(struct ubifs_info *c)
2402{
2403 failure_mode_exit(c);
2404 vfree(c->dbg->buf);
2405 kfree(c->dbg);
2406}
2407
2408/*
2409 * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
2410 * contain the stuff specific to particular file-system mounts.
2411 */
2412static struct dentry *debugfs_rootdir;
2413
2414/**
2415 * dbg_debugfs_init - initialize debugfs file-system.
2416 *
2417 * UBIFS uses debugfs file-system to expose various debugging knobs to
2418 * user-space. This function creates "ubifs" directory in the debugfs
2419 * file-system. Returns zero in case of success and a negative error code in
2420 * case of failure.
2421 */
2422int dbg_debugfs_init(void)
2423{
2424 debugfs_rootdir = debugfs_create_dir("ubifs", NULL);
2425 if (IS_ERR(debugfs_rootdir)) {
2426 int err = PTR_ERR(debugfs_rootdir);
2427 ubifs_err("cannot create \"ubifs\" debugfs directory, "
2428 "error %d\n", err);
2429 return err;
2430 }
2431
2432 return 0;
2433}
2434
2435/**
2436 * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
2437 */
2438void dbg_debugfs_exit(void)
2439{
2440 debugfs_remove(debugfs_rootdir);
2441}
2442
2443static int open_debugfs_file(struct inode *inode, struct file *file)
2444{
2445 file->private_data = inode->i_private;
2446 return 0;
2447}
2448
2449static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2450 size_t count, loff_t *ppos)
2451{
2452 struct ubifs_info *c = file->private_data;
2453 struct ubifs_debug_info *d = c->dbg;
2454
2455 if (file->f_path.dentry == d->dump_lprops)
2456 dbg_dump_lprops(c);
2457 else if (file->f_path.dentry == d->dump_budg) {
2458 spin_lock(&c->space_lock);
2459 dbg_dump_budg(c);
2460 spin_unlock(&c->space_lock);
2461 } else if (file->f_path.dentry == d->dump_tnc) {
2462 mutex_lock(&c->tnc_mutex);
2463 dbg_dump_tnc(c);
2464 mutex_unlock(&c->tnc_mutex);
2465 } else
2466 return -EINVAL;
2467
2468 *ppos += count;
2469 return count;
2470}
2471
2472static const struct file_operations debugfs_fops = {
2473 .open = open_debugfs_file,
2474 .write = write_debugfs_file,
2475 .owner = THIS_MODULE,
2476};
2477
2478/**
2479 * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance.
2480 * @c: UBIFS file-system description object
2481 *
2482 * This function creates all debugfs files for this instance of UBIFS. Returns
2483 * zero in case of success and a negative error code in case of failure.
2484 *
2485 * Note, the only reason we have not merged this function with the
2486 * 'ubifs_debugging_init()' function is because it is better to initialize
2487 * debugfs interfaces at the very end of the mount process, and remove them at
2488 * the very beginning of the mount process.
2489 */
2490int dbg_debugfs_init_fs(struct ubifs_info *c)
2491{
2492 int err;
2493 const char *fname;
2494 struct dentry *dent;
2495 struct ubifs_debug_info *d = c->dbg;
2496
2497 sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2498 d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name,
2499 debugfs_rootdir);
2500 if (IS_ERR(d->debugfs_dir)) {
2501 err = PTR_ERR(d->debugfs_dir);
2502 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2503 d->debugfs_dir_name, err);
2504 goto out;
2505 }
2506
2507 fname = "dump_lprops";
2508 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
2509 &debugfs_fops);
2510 if (IS_ERR(dent))
2511 goto out_remove;
2512 d->dump_lprops = dent;
2513
2514 fname = "dump_budg";
2515 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
2516 &debugfs_fops);
2517 if (IS_ERR(dent))
2518 goto out_remove;
2519 d->dump_budg = dent;
2520
2521 fname = "dump_tnc";
2522 dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c,
2523 &debugfs_fops);
2524 if (IS_ERR(dent))
2525 goto out_remove;
2526 d->dump_tnc = dent;
2527
2528 return 0;
2529
2530out_remove:
2531 err = PTR_ERR(dent);
2532 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2533 fname, err);
2534 debugfs_remove_recursive(d->debugfs_dir);
2535out:
2536 return err;
2537}
2538
2539/**
2540 * dbg_debugfs_exit_fs - remove all debugfs files.
2541 * @c: UBIFS file-system description object
2542 */
2543void dbg_debugfs_exit_fs(struct ubifs_info *c)
2544{
2545 debugfs_remove_recursive(c->dbg->debugfs_dir);
2546}
2547
2347#endif /* CONFIG_UBIFS_FS_DEBUG */ 2548#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 33d6b95071e4..9820d6999f7e 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -25,7 +25,56 @@
25 25
26#ifdef CONFIG_UBIFS_FS_DEBUG 26#ifdef CONFIG_UBIFS_FS_DEBUG
27 27
28#define UBIFS_DBG(op) op 28/**
29 * ubifs_debug_info - per-FS debugging information.
30 * @buf: a buffer of LEB size, used for various purposes
31 * @old_zroot: old index root - used by 'dbg_check_old_index()'
32 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
33 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
34 * @failure_mode: failure mode for recovery testing
35 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
36 * @fail_timeout: time in jiffies when delay of failure mode expires
37 * @fail_cnt: current number of calls to failure mode I/O functions
38 * @fail_cnt_max: number of calls by which to delay failure mode
39 * @chk_lpt_sz: used by LPT tree size checker
40 * @chk_lpt_sz2: used by LPT tree size checker
41 * @chk_lpt_wastage: used by LPT tree size checker
42 * @chk_lpt_lebs: used by LPT tree size checker
43 * @new_nhead_offs: used by LPT tree size checker
44 * @new_ihead_lnum: used by debugging to check ihead_lnum
45 * @new_ihead_offs: used by debugging to check ihead_offs
46 *
47 * debugfs_dir_name: name of debugfs directory containing this file-system's
48 * files
49 * debugfs_dir: direntry object of the file-system debugfs directory
50 * dump_lprops: "dump lprops" debugfs knob
51 * dump_budg: "dump budgeting information" debugfs knob
52 * dump_tnc: "dump TNC" debugfs knob
53 */
54struct ubifs_debug_info {
55 void *buf;
56 struct ubifs_zbranch old_zroot;
57 int old_zroot_level;
58 unsigned long long old_zroot_sqnum;
59 int failure_mode;
60 int fail_delay;
61 unsigned long fail_timeout;
62 unsigned int fail_cnt;
63 unsigned int fail_cnt_max;
64 long long chk_lpt_sz;
65 long long chk_lpt_sz2;
66 long long chk_lpt_wastage;
67 int chk_lpt_lebs;
68 int new_nhead_offs;
69 int new_ihead_lnum;
70 int new_ihead_offs;
71
72 char debugfs_dir_name[100];
73 struct dentry *debugfs_dir;
74 struct dentry *dump_lprops;
75 struct dentry *dump_budg;
76 struct dentry *dump_tnc;
77};
29 78
30#define ubifs_assert(expr) do { \ 79#define ubifs_assert(expr) do { \
31 if (unlikely(!(expr))) { \ 80 if (unlikely(!(expr))) { \
@@ -211,14 +260,18 @@ extern unsigned int ubifs_msg_flags;
211extern unsigned int ubifs_chk_flags; 260extern unsigned int ubifs_chk_flags;
212extern unsigned int ubifs_tst_flags; 261extern unsigned int ubifs_tst_flags;
213 262
214/* Dump functions */ 263int ubifs_debugging_init(struct ubifs_info *c);
264void ubifs_debugging_exit(struct ubifs_info *c);
215 265
266/* Dump functions */
216const char *dbg_ntype(int type); 267const char *dbg_ntype(int type);
217const char *dbg_cstate(int cmt_state); 268const char *dbg_cstate(int cmt_state);
218const char *dbg_get_key_dump(const struct ubifs_info *c, 269const char *dbg_get_key_dump(const struct ubifs_info *c,
219 const union ubifs_key *key); 270 const union ubifs_key *key);
220void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); 271void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
221void dbg_dump_node(const struct ubifs_info *c, const void *node); 272void dbg_dump_node(const struct ubifs_info *c, const void *node);
273void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
274 int offs);
222void dbg_dump_budget_req(const struct ubifs_budget_req *req); 275void dbg_dump_budget_req(const struct ubifs_budget_req *req);
223void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 276void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
224void dbg_dump_budg(struct ubifs_info *c); 277void dbg_dump_budg(struct ubifs_info *c);
@@ -233,9 +286,9 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
233 struct ubifs_nnode *parent, int iip); 286 struct ubifs_nnode *parent, int iip);
234void dbg_dump_tnc(struct ubifs_info *c); 287void dbg_dump_tnc(struct ubifs_info *c);
235void dbg_dump_index(struct ubifs_info *c); 288void dbg_dump_index(struct ubifs_info *c);
289void dbg_dump_lpt_lebs(const struct ubifs_info *c);
236 290
237/* Checking helper functions */ 291/* Checking helper functions */
238
239typedef int (*dbg_leaf_callback)(struct ubifs_info *c, 292typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
240 struct ubifs_zbranch *zbr, void *priv); 293 struct ubifs_zbranch *zbr, void *priv);
241typedef int (*dbg_znode_callback)(struct ubifs_info *c, 294typedef int (*dbg_znode_callback)(struct ubifs_info *c,
@@ -274,9 +327,6 @@ int dbg_force_in_the_gaps(void);
274 327
275#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 328#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
276 329
277void dbg_failure_mode_registration(struct ubifs_info *c);
278void dbg_failure_mode_deregistration(struct ubifs_info *c);
279
280#ifndef UBIFS_DBG_PRESERVE_UBI 330#ifndef UBIFS_DBG_PRESERVE_UBI
281 331
282#define ubi_leb_read dbg_leb_read 332#define ubi_leb_read dbg_leb_read
@@ -318,9 +368,13 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
318 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); 368 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
319} 369}
320 370
321#else /* !CONFIG_UBIFS_FS_DEBUG */ 371/* Debugfs-related stuff */
372int dbg_debugfs_init(void);
373void dbg_debugfs_exit(void);
374int dbg_debugfs_init_fs(struct ubifs_info *c);
375void dbg_debugfs_exit_fs(struct ubifs_info *c);
322 376
323#define UBIFS_DBG(op) 377#else /* !CONFIG_UBIFS_FS_DEBUG */
324 378
325/* Use "if (0)" to make compiler check arguments even if debugging is off */ 379/* Use "if (0)" to make compiler check arguments even if debugging is off */
326#define ubifs_assert(expr) do { \ 380#define ubifs_assert(expr) do { \
@@ -360,23 +414,28 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
360#define DBGKEY(key) ((char *)(key)) 414#define DBGKEY(key) ((char *)(key))
361#define DBGKEY1(key) ((char *)(key)) 415#define DBGKEY1(key) ((char *)(key))
362 416
363#define dbg_ntype(type) "" 417#define ubifs_debugging_init(c) 0
364#define dbg_cstate(cmt_state) "" 418#define ubifs_debugging_exit(c) ({})
365#define dbg_get_key_dump(c, key) ({}) 419
366#define dbg_dump_inode(c, inode) ({}) 420#define dbg_ntype(type) ""
367#define dbg_dump_node(c, node) ({}) 421#define dbg_cstate(cmt_state) ""
368#define dbg_dump_budget_req(req) ({}) 422#define dbg_get_key_dump(c, key) ({})
369#define dbg_dump_lstats(lst) ({}) 423#define dbg_dump_inode(c, inode) ({})
370#define dbg_dump_budg(c) ({}) 424#define dbg_dump_node(c, node) ({})
371#define dbg_dump_lprop(c, lp) ({}) 425#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
372#define dbg_dump_lprops(c) ({}) 426#define dbg_dump_budget_req(req) ({})
373#define dbg_dump_lpt_info(c) ({}) 427#define dbg_dump_lstats(lst) ({})
374#define dbg_dump_leb(c, lnum) ({}) 428#define dbg_dump_budg(c) ({})
375#define dbg_dump_znode(c, znode) ({}) 429#define dbg_dump_lprop(c, lp) ({})
376#define dbg_dump_heap(c, heap, cat) ({}) 430#define dbg_dump_lprops(c) ({})
377#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 431#define dbg_dump_lpt_info(c) ({})
378#define dbg_dump_tnc(c) ({}) 432#define dbg_dump_leb(c, lnum) ({})
379#define dbg_dump_index(c) ({}) 433#define dbg_dump_znode(c, znode) ({})
434#define dbg_dump_heap(c, heap, cat) ({})
435#define dbg_dump_pnode(c, pnode, parent, iip) ({})
436#define dbg_dump_tnc(c) ({})
437#define dbg_dump_index(c) ({})
438#define dbg_dump_lpt_lebs(c) ({})
380 439
381#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 440#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
382#define dbg_old_index_check_init(c, zroot) 0 441#define dbg_old_index_check_init(c, zroot) 0
@@ -396,9 +455,11 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
396#define dbg_force_in_the_gaps_enabled 0 455#define dbg_force_in_the_gaps_enabled 0
397#define dbg_force_in_the_gaps() 0 456#define dbg_force_in_the_gaps() 0
398#define dbg_failure_mode 0 457#define dbg_failure_mode 0
399#define dbg_failure_mode_registration(c) ({})
400#define dbg_failure_mode_deregistration(c) ({})
401 458
402#endif /* !CONFIG_UBIFS_FS_DEBUG */ 459#define dbg_debugfs_init() 0
460#define dbg_debugfs_exit()
461#define dbg_debugfs_init_fs(c) 0
462#define dbg_debugfs_exit_fs(c) 0
403 463
464#endif /* !CONFIG_UBIFS_FS_DEBUG */
404#endif /* !__UBIFS_DEBUG_H__ */ 465#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2624411d9758..fe82d2464d46 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
72 return err; 72 return err;
73 } 73 }
74 74
75 ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); 75 ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
76 76 ubifs_inode(inode)->creat_sqnum);
77 len = le32_to_cpu(dn->size); 77 len = le32_to_cpu(dn->size);
78 if (len <= 0 || len > UBIFS_BLOCK_SIZE) 78 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
79 goto dump; 79 goto dump;
@@ -254,7 +254,7 @@ static int write_begin_slow(struct address_space *mapping,
254 } 254 }
255 255
256 if (!PageUptodate(page)) { 256 if (!PageUptodate(page)) {
257 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) 257 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
258 SetPageChecked(page); 258 SetPageChecked(page);
259 else { 259 else {
260 err = do_readpage(page); 260 err = do_readpage(page);
@@ -444,7 +444,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
444 444
445 if (!PageUptodate(page)) { 445 if (!PageUptodate(page)) {
446 /* The page is not loaded from the flash */ 446 /* The page is not loaded from the flash */
447 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) 447 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
448 /* 448 /*
449 * We change whole page so no need to load it. But we 449 * We change whole page so no need to load it. But we
450 * have to set the @PG_checked flag to make the further 450 * have to set the @PG_checked flag to make the further
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 5e82cffe9695..6db7a6be6c97 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -154,6 +154,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
154 case FS_IOC_GETFLAGS: 154 case FS_IOC_GETFLAGS:
155 flags = ubifs2ioctl(ubifs_inode(inode)->flags); 155 flags = ubifs2ioctl(ubifs_inode(inode)->flags);
156 156
157 dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags);
157 return put_user(flags, (int __user *) arg); 158 return put_user(flags, (int __user *) arg);
158 159
159 case FS_IOC_SETFLAGS: { 160 case FS_IOC_SETFLAGS: {
@@ -176,6 +177,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
176 err = mnt_want_write(file->f_path.mnt); 177 err = mnt_want_write(file->f_path.mnt);
177 if (err) 178 if (err)
178 return err; 179 return err;
180 dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
179 err = setflags(inode, flags); 181 err = setflags(inode, flags);
180 mnt_drop_write(file->f_path.mnt); 182 mnt_drop_write(file->f_path.mnt);
181 return err; 183 return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f91b745908ea..10ae25b7d1db 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -704,7 +704,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
704 data->size = cpu_to_le32(len); 704 data->size = cpu_to_le32(len);
705 zero_data_node_unused(data); 705 zero_data_node_unused(data);
706 706
707 if (!(ui->flags && UBIFS_COMPR_FL)) 707 if (!(ui->flags & UBIFS_COMPR_FL))
708 /* Compression is disabled for this inode */ 708 /* Compression is disabled for this inode */
709 compr_type = UBIFS_COMPR_NONE; 709 compr_type = UBIFS_COMPR_NONE;
710 else 710 else
@@ -1220,7 +1220,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1220 data_key_init(c, &key, inum, blk); 1220 data_key_init(c, &key, inum, blk);
1221 1221
1222 bit = old_size & (UBIFS_BLOCK_SIZE - 1); 1222 bit = old_size & (UBIFS_BLOCK_SIZE - 1);
1223 blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); 1223 blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1);
1224 data_key_init(c, &to_key, inum, blk); 1224 data_key_init(c, &to_key, inum, blk);
1225 1225
1226 err = ubifs_tnc_remove_range(c, &key, &to_key); 1226 err = ubifs_tnc_remove_range(c, &key, &to_key);
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 3f1f16bc25c9..efb3430a2581 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -38,6 +38,22 @@
38#define __UBIFS_KEY_H__ 38#define __UBIFS_KEY_H__
39 39
40/** 40/**
41 * key_mask_hash - mask a valid hash value.
42 * @val: value to be masked
43 *
44 * We use hash values as offset in directories, so values %0 and %1 are
45 * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This
46 * function makes sure the reserved values are not used.
47 */
48static inline uint32_t key_mask_hash(uint32_t hash)
49{
50 hash &= UBIFS_S_KEY_HASH_MASK;
51 if (unlikely(hash <= 2))
52 hash += 3;
53 return hash;
54}
55
56/**
41 * key_r5_hash - R5 hash function (borrowed from reiserfs). 57 * key_r5_hash - R5 hash function (borrowed from reiserfs).
42 * @s: direntry name 58 * @s: direntry name
43 * @len: name length 59 * @len: name length
@@ -54,16 +70,7 @@ static inline uint32_t key_r5_hash(const char *s, int len)
54 str++; 70 str++;
55 } 71 }
56 72
57 a &= UBIFS_S_KEY_HASH_MASK; 73 return key_mask_hash(a);
58
59 /*
60 * We use hash values as offset in directories, so values %0 and %1 are
61 * reserved for "." and "..". %2 is reserved for "end of readdir"
62 * marker.
63 */
64 if (unlikely(a >= 0 && a <= 2))
65 a += 3;
66 return a;
67} 74}
68 75
69/** 76/**
@@ -77,10 +84,7 @@ static inline uint32_t key_test_hash(const char *str, int len)
77 84
78 len = min_t(uint32_t, len, 4); 85 len = min_t(uint32_t, len, 4);
79 memcpy(&a, str, len); 86 memcpy(&a, str, len);
80 a &= UBIFS_S_KEY_HASH_MASK; 87 return key_mask_hash(a);
81 if (unlikely(a >= 0 && a <= 2))
82 a += 3;
83 return a;
84} 88}
85 89
86/** 90/**
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index f27176e9b70d..dfd2bcece27a 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -520,13 +520,13 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
520 * @flags: new flags 520 * @flags: new flags
521 * @idx_gc_cnt: change to the count of idx_gc list 521 * @idx_gc_cnt: change to the count of idx_gc list
522 * 522 *
523 * This function changes LEB properties. This function does not change a LEB 523 * This function changes LEB properties (@free, @dirty or @flag). However, the
524 * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. 524 * property which has the %LPROPS_NC value is not changed. Returns a pointer to
525 * the updated LEB properties on success and a negative error code on failure.
525 * 526 *
526 * This function returns a pointer to the updated LEB properties on success 527 * Note, the LEB properties may have had to be copied (due to COW) and
527 * and a negative error code on failure. N.B. the LEB properties may have had to 528 * consequently the pointer returned may not be the same as the pointer
528 * be copied (due to COW) and consequently the pointer returned may not be the 529 * passed.
529 * same as the pointer passed.
530 */ 530 */
531const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, 531const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
532 const struct ubifs_lprops *lp, 532 const struct ubifs_lprops *lp,
@@ -1088,7 +1088,7 @@ static int scan_check_cb(struct ubifs_info *c,
1088 } 1088 }
1089 } 1089 }
1090 1090
1091 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 1091 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
1092 if (IS_ERR(sleb)) { 1092 if (IS_ERR(sleb)) {
1093 /* 1093 /*
1094 * After an unclean unmount, empty and freeable LEBs 1094 * After an unclean unmount, empty and freeable LEBs
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index db8bd0e518b2..b2792e84d245 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -36,15 +36,16 @@
36 * can be written into a single eraseblock. In that case, garbage collection 36 * can be written into a single eraseblock. In that case, garbage collection
37 * consists of just writing the whole table, which therefore makes all other 37 * consists of just writing the whole table, which therefore makes all other
38 * eraseblocks reusable. In the case of the big model, dirty eraseblocks are 38 * eraseblocks reusable. In the case of the big model, dirty eraseblocks are
39 * selected for garbage collection, which consists are marking the nodes in 39 * selected for garbage collection, which consists of marking the clean nodes in
40 * that LEB as dirty, and then only the dirty nodes are written out. Also, in 40 * that LEB as dirty, and then only the dirty nodes are written out. Also, in
41 * the case of the big model, a table of LEB numbers is saved so that the entire 41 * the case of the big model, a table of LEB numbers is saved so that the entire
42 * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first 42 * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
43 * mounted. 43 * mounted.
44 */ 44 */
45 45
46#include <linux/crc16.h>
47#include "ubifs.h" 46#include "ubifs.h"
47#include <linux/crc16.h>
48#include <linux/math64.h>
48 49
49/** 50/**
50 * do_calc_lpt_geom - calculate sizes for the LPT area. 51 * do_calc_lpt_geom - calculate sizes for the LPT area.
@@ -135,15 +136,13 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
135int ubifs_calc_lpt_geom(struct ubifs_info *c) 136int ubifs_calc_lpt_geom(struct ubifs_info *c)
136{ 137{
137 int lebs_needed; 138 int lebs_needed;
138 uint64_t sz; 139 long long sz;
139 140
140 do_calc_lpt_geom(c); 141 do_calc_lpt_geom(c);
141 142
142 /* Verify that lpt_lebs is big enough */ 143 /* Verify that lpt_lebs is big enough */
143 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ 144 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
144 sz += c->leb_size - 1; 145 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
145 do_div(sz, c->leb_size);
146 lebs_needed = sz;
147 if (lebs_needed > c->lpt_lebs) { 146 if (lebs_needed > c->lpt_lebs) {
148 ubifs_err("too few LPT LEBs"); 147 ubifs_err("too few LPT LEBs");
149 return -EINVAL; 148 return -EINVAL;
@@ -156,7 +155,6 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
156 } 155 }
157 156
158 c->check_lpt_free = c->big_lpt; 157 c->check_lpt_free = c->big_lpt;
159
160 return 0; 158 return 0;
161} 159}
162 160
@@ -176,7 +174,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
176 int *big_lpt) 174 int *big_lpt)
177{ 175{
178 int i, lebs_needed; 176 int i, lebs_needed;
179 uint64_t sz; 177 long long sz;
180 178
181 /* Start by assuming the minimum number of LPT LEBs */ 179 /* Start by assuming the minimum number of LPT LEBs */
182 c->lpt_lebs = UBIFS_MIN_LPT_LEBS; 180 c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
@@ -203,9 +201,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
203 /* Now check there are enough LPT LEBs */ 201 /* Now check there are enough LPT LEBs */
204 for (i = 0; i < 64 ; i++) { 202 for (i = 0; i < 64 ; i++) {
205 sz = c->lpt_sz * 4; /* Allow 4 times the size */ 203 sz = c->lpt_sz * 4; /* Allow 4 times the size */
206 sz += c->leb_size - 1; 204 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
207 do_div(sz, c->leb_size);
208 lebs_needed = sz;
209 if (lebs_needed > c->lpt_lebs) { 205 if (lebs_needed > c->lpt_lebs) {
210 /* Not enough LPT LEBs so try again with more */ 206 /* Not enough LPT LEBs so try again with more */
211 c->lpt_lebs = lebs_needed; 207 c->lpt_lebs = lebs_needed;
@@ -558,7 +554,7 @@ static int calc_nnode_num(int row, int col)
558 * This function calculates and returns the nnode number based on the parent's 554 * This function calculates and returns the nnode number based on the parent's
559 * nnode number and the index in parent. 555 * nnode number and the index in parent.
560 */ 556 */
561static int calc_nnode_num_from_parent(struct ubifs_info *c, 557static int calc_nnode_num_from_parent(const struct ubifs_info *c,
562 struct ubifs_nnode *parent, int iip) 558 struct ubifs_nnode *parent, int iip)
563{ 559{
564 int num, shft; 560 int num, shft;
@@ -583,7 +579,7 @@ static int calc_nnode_num_from_parent(struct ubifs_info *c,
583 * This function calculates and returns the pnode number based on the parent's 579 * This function calculates and returns the pnode number based on the parent's
584 * nnode number and the index in parent. 580 * nnode number and the index in parent.
585 */ 581 */
586static int calc_pnode_num_from_parent(struct ubifs_info *c, 582static int calc_pnode_num_from_parent(const struct ubifs_info *c,
587 struct ubifs_nnode *parent, int iip) 583 struct ubifs_nnode *parent, int iip)
588{ 584{
589 int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; 585 int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
@@ -966,7 +962,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type)
966 * 962 *
967 * This function returns %0 on success and a negative error code on failure. 963 * This function returns %0 on success and a negative error code on failure.
968 */ 964 */
969static int unpack_pnode(struct ubifs_info *c, void *buf, 965static int unpack_pnode(const struct ubifs_info *c, void *buf,
970 struct ubifs_pnode *pnode) 966 struct ubifs_pnode *pnode)
971{ 967{
972 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 968 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
@@ -996,15 +992,15 @@ static int unpack_pnode(struct ubifs_info *c, void *buf,
996} 992}
997 993
998/** 994/**
999 * unpack_nnode - unpack a nnode. 995 * ubifs_unpack_nnode - unpack a nnode.
1000 * @c: UBIFS file-system description object 996 * @c: UBIFS file-system description object
1001 * @buf: buffer containing packed nnode to unpack 997 * @buf: buffer containing packed nnode to unpack
1002 * @nnode: nnode structure to fill 998 * @nnode: nnode structure to fill
1003 * 999 *
1004 * This function returns %0 on success and a negative error code on failure. 1000 * This function returns %0 on success and a negative error code on failure.
1005 */ 1001 */
1006static int unpack_nnode(struct ubifs_info *c, void *buf, 1002int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1007 struct ubifs_nnode *nnode) 1003 struct ubifs_nnode *nnode)
1008{ 1004{
1009 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1005 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1010 int i, pos = 0, err; 1006 int i, pos = 0, err;
@@ -1036,7 +1032,7 @@ static int unpack_nnode(struct ubifs_info *c, void *buf,
1036 * 1032 *
1037 * This function returns %0 on success and a negative error code on failure. 1033 * This function returns %0 on success and a negative error code on failure.
1038 */ 1034 */
1039static int unpack_ltab(struct ubifs_info *c, void *buf) 1035static int unpack_ltab(const struct ubifs_info *c, void *buf)
1040{ 1036{
1041 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1037 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1042 int i, pos = 0, err; 1038 int i, pos = 0, err;
@@ -1068,7 +1064,7 @@ static int unpack_ltab(struct ubifs_info *c, void *buf)
1068 * 1064 *
1069 * This function returns %0 on success and a negative error code on failure. 1065 * This function returns %0 on success and a negative error code on failure.
1070 */ 1066 */
1071static int unpack_lsave(struct ubifs_info *c, void *buf) 1067static int unpack_lsave(const struct ubifs_info *c, void *buf)
1072{ 1068{
1073 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1069 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1074 int i, pos = 0, err; 1070 int i, pos = 0, err;
@@ -1096,7 +1092,7 @@ static int unpack_lsave(struct ubifs_info *c, void *buf)
1096 * 1092 *
1097 * This function returns %0 on success and a negative error code on failure. 1093 * This function returns %0 on success and a negative error code on failure.
1098 */ 1094 */
1099static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, 1095static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode,
1100 struct ubifs_nnode *parent, int iip) 1096 struct ubifs_nnode *parent, int iip)
1101{ 1097{
1102 int i, lvl, max_offs; 1098 int i, lvl, max_offs;
@@ -1140,7 +1136,7 @@ static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
1140 * 1136 *
1141 * This function returns %0 on success and a negative error code on failure. 1137 * This function returns %0 on success and a negative error code on failure.
1142 */ 1138 */
1143static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 1139static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode,
1144 struct ubifs_nnode *parent, int iip) 1140 struct ubifs_nnode *parent, int iip)
1145{ 1141{
1146 int i; 1142 int i;
@@ -1174,7 +1170,8 @@ static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
1174 * This function calculates the LEB numbers for the LEB properties it contains 1170 * This function calculates the LEB numbers for the LEB properties it contains
1175 * based on the pnode number. 1171 * based on the pnode number.
1176 */ 1172 */
1177static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) 1173static void set_pnode_lnum(const struct ubifs_info *c,
1174 struct ubifs_pnode *pnode)
1178{ 1175{
1179 int i, lnum; 1176 int i, lnum;
1180 1177
@@ -1227,7 +1224,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1227 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); 1224 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
1228 if (err) 1225 if (err)
1229 goto out; 1226 goto out;
1230 err = unpack_nnode(c, buf, nnode); 1227 err = ubifs_unpack_nnode(c, buf, nnode);
1231 if (err) 1228 if (err)
1232 goto out; 1229 goto out;
1233 } 1230 }
@@ -1816,7 +1813,7 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
1816 c->nnode_sz); 1813 c->nnode_sz);
1817 if (err) 1814 if (err)
1818 return ERR_PTR(err); 1815 return ERR_PTR(err);
1819 err = unpack_nnode(c, buf, nnode); 1816 err = ubifs_unpack_nnode(c, buf, nnode);
1820 if (err) 1817 if (err)
1821 return ERR_PTR(err); 1818 return ERR_PTR(err);
1822 } 1819 }
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index a41434b42785..96ca95707175 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -320,6 +320,8 @@ no_space:
320 dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " 320 dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
321 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); 321 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
322 dbg_dump_lpt_info(c); 322 dbg_dump_lpt_info(c);
323 dbg_dump_lpt_lebs(c);
324 dump_stack();
323 return err; 325 return err;
324} 326}
325 327
@@ -546,8 +548,10 @@ static int write_cnodes(struct ubifs_info *c)
546no_space: 548no_space:
547 ubifs_err("LPT out of space mismatch"); 549 ubifs_err("LPT out of space mismatch");
548 dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " 550 dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
549 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); 551 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
550 dbg_dump_lpt_info(c); 552 dbg_dump_lpt_info(c);
553 dbg_dump_lpt_lebs(c);
554 dump_stack();
551 return err; 555 return err;
552} 556}
553 557
@@ -749,7 +753,7 @@ static void lpt_tgc_start(struct ubifs_info *c)
749 * LPT trivial garbage collection is where a LPT LEB contains only dirty and 753 * LPT trivial garbage collection is where a LPT LEB contains only dirty and
750 * free space and so may be reused as soon as the next commit is completed. 754 * free space and so may be reused as soon as the next commit is completed.
751 * This function is called after the commit is completed (master node has been 755 * This function is called after the commit is completed (master node has been
752 * written) and unmaps LPT LEBs that were marked for trivial GC. 756 * written) and un-maps LPT LEBs that were marked for trivial GC.
753 */ 757 */
754static int lpt_tgc_end(struct ubifs_info *c) 758static int lpt_tgc_end(struct ubifs_info *c)
755{ 759{
@@ -1025,7 +1029,7 @@ static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
1025 * @c: UBIFS file-system description object 1029 * @c: UBIFS file-system description object
1026 * @node_type: LPT node type 1030 * @node_type: LPT node type
1027 */ 1031 */
1028static int get_lpt_node_len(struct ubifs_info *c, int node_type) 1032static int get_lpt_node_len(const struct ubifs_info *c, int node_type)
1029{ 1033{
1030 switch (node_type) { 1034 switch (node_type) {
1031 case UBIFS_LPT_NNODE: 1035 case UBIFS_LPT_NNODE:
@@ -1046,7 +1050,7 @@ static int get_lpt_node_len(struct ubifs_info *c, int node_type)
1046 * @buf: buffer 1050 * @buf: buffer
1047 * @len: length of buffer 1051 * @len: length of buffer
1048 */ 1052 */
1049static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) 1053static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len)
1050{ 1054{
1051 int offs, pad_len; 1055 int offs, pad_len;
1052 1056
@@ -1063,7 +1067,8 @@ static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
1063 * @buf: buffer 1067 * @buf: buffer
1064 * @node_num: node number is returned here 1068 * @node_num: node number is returned here
1065 */ 1069 */
1066static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) 1070static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf,
1071 int *node_num)
1067{ 1072{
1068 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1073 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1069 int pos = 0, node_type; 1074 int pos = 0, node_type;
@@ -1081,7 +1086,7 @@ static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
1081 * 1086 *
1082 * This function returns %1 if the buffer contains a node or %0 if it does not. 1087 * This function returns %1 if the buffer contains a node or %0 if it does not.
1083 */ 1088 */
1084static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) 1089static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len)
1085{ 1090{
1086 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1091 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1087 int pos = 0, node_type, node_len; 1092 int pos = 0, node_type, node_len;
@@ -1105,7 +1110,6 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
1105 return 1; 1110 return 1;
1106} 1111}
1107 1112
1108
1109/** 1113/**
1110 * lpt_gc_lnum - garbage collect a LPT LEB. 1114 * lpt_gc_lnum - garbage collect a LPT LEB.
1111 * @c: UBIFS file-system description object 1115 * @c: UBIFS file-system description object
@@ -1463,7 +1467,7 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
1463#ifdef CONFIG_UBIFS_FS_DEBUG 1467#ifdef CONFIG_UBIFS_FS_DEBUG
1464 1468
1465/** 1469/**
1466 * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. 1470 * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes.
1467 * @buf: buffer 1471 * @buf: buffer
1468 * @len: buffer length 1472 * @len: buffer length
1469 */ 1473 */
@@ -1488,7 +1492,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
1488 struct ubifs_nnode *nnode; 1492 struct ubifs_nnode *nnode;
1489 int hght; 1493 int hght;
1490 1494
1491 /* Entire tree is in memory so first_nnode / next_nnode are ok */ 1495 /* Entire tree is in memory so first_nnode / next_nnode are OK */
1492 nnode = first_nnode(c, &hght); 1496 nnode = first_nnode(c, &hght);
1493 for (; nnode; nnode = next_nnode(c, nnode, &hght)) { 1497 for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
1494 struct ubifs_nbranch *branch; 1498 struct ubifs_nbranch *branch;
@@ -1602,7 +1606,10 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1602{ 1606{
1603 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; 1607 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1604 int ret; 1608 int ret;
1605 void *buf = c->dbg_buf; 1609 void *buf = c->dbg->buf;
1610
1611 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1612 return 0;
1606 1613
1607 dbg_lp("LEB %d", lnum); 1614 dbg_lp("LEB %d", lnum);
1608 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1615 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
@@ -1704,6 +1711,9 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1704 long long free = 0; 1711 long long free = 0;
1705 int i; 1712 int i;
1706 1713
1714 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1715 return 0;
1716
1707 for (i = 0; i < c->lpt_lebs; i++) { 1717 for (i = 0; i < c->lpt_lebs; i++) {
1708 if (c->ltab[i].tgc || c->ltab[i].cmt) 1718 if (c->ltab[i].tgc || c->ltab[i].cmt)
1709 continue; 1719 continue;
@@ -1716,6 +1726,8 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1716 dbg_err("LPT space error: free %lld lpt_sz %lld", 1726 dbg_err("LPT space error: free %lld lpt_sz %lld",
1717 free, c->lpt_sz); 1727 free, c->lpt_sz);
1718 dbg_dump_lpt_info(c); 1728 dbg_dump_lpt_info(c);
1729 dbg_dump_lpt_lebs(c);
1730 dump_stack();
1719 return -EINVAL; 1731 return -EINVAL;
1720 } 1732 }
1721 return 0; 1733 return 0;
@@ -1731,15 +1743,19 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1731 */ 1743 */
1732int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) 1744int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1733{ 1745{
1746 struct ubifs_debug_info *d = c->dbg;
1734 long long chk_lpt_sz, lpt_sz; 1747 long long chk_lpt_sz, lpt_sz;
1735 int err = 0; 1748 int err = 0;
1736 1749
1750 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1751 return 0;
1752
1737 switch (action) { 1753 switch (action) {
1738 case 0: 1754 case 0:
1739 c->chk_lpt_sz = 0; 1755 d->chk_lpt_sz = 0;
1740 c->chk_lpt_sz2 = 0; 1756 d->chk_lpt_sz2 = 0;
1741 c->chk_lpt_lebs = 0; 1757 d->chk_lpt_lebs = 0;
1742 c->chk_lpt_wastage = 0; 1758 d->chk_lpt_wastage = 0;
1743 if (c->dirty_pn_cnt > c->pnode_cnt) { 1759 if (c->dirty_pn_cnt > c->pnode_cnt) {
1744 dbg_err("dirty pnodes %d exceed max %d", 1760 dbg_err("dirty pnodes %d exceed max %d",
1745 c->dirty_pn_cnt, c->pnode_cnt); 1761 c->dirty_pn_cnt, c->pnode_cnt);
@@ -1752,35 +1768,35 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1752 } 1768 }
1753 return err; 1769 return err;
1754 case 1: 1770 case 1:
1755 c->chk_lpt_sz += len; 1771 d->chk_lpt_sz += len;
1756 return 0; 1772 return 0;
1757 case 2: 1773 case 2:
1758 c->chk_lpt_sz += len; 1774 d->chk_lpt_sz += len;
1759 c->chk_lpt_wastage += len; 1775 d->chk_lpt_wastage += len;
1760 c->chk_lpt_lebs += 1; 1776 d->chk_lpt_lebs += 1;
1761 return 0; 1777 return 0;
1762 case 3: 1778 case 3:
1763 chk_lpt_sz = c->leb_size; 1779 chk_lpt_sz = c->leb_size;
1764 chk_lpt_sz *= c->chk_lpt_lebs; 1780 chk_lpt_sz *= d->chk_lpt_lebs;
1765 chk_lpt_sz += len - c->nhead_offs; 1781 chk_lpt_sz += len - c->nhead_offs;
1766 if (c->chk_lpt_sz != chk_lpt_sz) { 1782 if (d->chk_lpt_sz != chk_lpt_sz) {
1767 dbg_err("LPT wrote %lld but space used was %lld", 1783 dbg_err("LPT wrote %lld but space used was %lld",
1768 c->chk_lpt_sz, chk_lpt_sz); 1784 d->chk_lpt_sz, chk_lpt_sz);
1769 err = -EINVAL; 1785 err = -EINVAL;
1770 } 1786 }
1771 if (c->chk_lpt_sz > c->lpt_sz) { 1787 if (d->chk_lpt_sz > c->lpt_sz) {
1772 dbg_err("LPT wrote %lld but lpt_sz is %lld", 1788 dbg_err("LPT wrote %lld but lpt_sz is %lld",
1773 c->chk_lpt_sz, c->lpt_sz); 1789 d->chk_lpt_sz, c->lpt_sz);
1774 err = -EINVAL; 1790 err = -EINVAL;
1775 } 1791 }
1776 if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) { 1792 if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
1777 dbg_err("LPT layout size %lld but wrote %lld", 1793 dbg_err("LPT layout size %lld but wrote %lld",
1778 c->chk_lpt_sz, c->chk_lpt_sz2); 1794 d->chk_lpt_sz, d->chk_lpt_sz2);
1779 err = -EINVAL; 1795 err = -EINVAL;
1780 } 1796 }
1781 if (c->chk_lpt_sz2 && c->new_nhead_offs != len) { 1797 if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
1782 dbg_err("LPT new nhead offs: expected %d was %d", 1798 dbg_err("LPT new nhead offs: expected %d was %d",
1783 c->new_nhead_offs, len); 1799 d->new_nhead_offs, len);
1784 err = -EINVAL; 1800 err = -EINVAL;
1785 } 1801 }
1786 lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; 1802 lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
@@ -1788,26 +1804,146 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1788 lpt_sz += c->ltab_sz; 1804 lpt_sz += c->ltab_sz;
1789 if (c->big_lpt) 1805 if (c->big_lpt)
1790 lpt_sz += c->lsave_sz; 1806 lpt_sz += c->lsave_sz;
1791 if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) { 1807 if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
1792 dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", 1808 dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
1793 c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz); 1809 d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
1794 err = -EINVAL; 1810 err = -EINVAL;
1795 } 1811 }
1796 if (err) 1812 if (err) {
1797 dbg_dump_lpt_info(c); 1813 dbg_dump_lpt_info(c);
1798 c->chk_lpt_sz2 = c->chk_lpt_sz; 1814 dbg_dump_lpt_lebs(c);
1799 c->chk_lpt_sz = 0; 1815 dump_stack();
1800 c->chk_lpt_wastage = 0; 1816 }
1801 c->chk_lpt_lebs = 0; 1817 d->chk_lpt_sz2 = d->chk_lpt_sz;
1802 c->new_nhead_offs = len; 1818 d->chk_lpt_sz = 0;
1819 d->chk_lpt_wastage = 0;
1820 d->chk_lpt_lebs = 0;
1821 d->new_nhead_offs = len;
1803 return err; 1822 return err;
1804 case 4: 1823 case 4:
1805 c->chk_lpt_sz += len; 1824 d->chk_lpt_sz += len;
1806 c->chk_lpt_wastage += len; 1825 d->chk_lpt_wastage += len;
1807 return 0; 1826 return 0;
1808 default: 1827 default:
1809 return -EINVAL; 1828 return -EINVAL;
1810 } 1829 }
1811} 1830}
1812 1831
1832/**
1833 * dbg_dump_lpt_leb - dump an LPT LEB.
1834 * @c: UBIFS file-system description object
1835 * @lnum: LEB number to dump
1836 *
1837 * This function dumps an LEB from LPT area. Nodes in this area are very
1838 * different to nodes in the main area (e.g., they do not have common headers,
1839 * they do not have 8-byte alignments, etc), so we have a separate function to
1840 * dump LPT area LEBs. Note, LPT has to be locked by the caller.
1841 */
1842static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1843{
1844 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1845 void *buf = c->dbg->buf;
1846
1847 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1848 current->pid, lnum);
1849 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1850 if (err) {
1851 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1852 return;
1853 }
1854 while (1) {
1855 offs = c->leb_size - len;
1856 if (!is_a_node(c, buf, len)) {
1857 int pad_len;
1858
1859 pad_len = get_pad_len(c, buf, len);
1860 if (pad_len) {
1861 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
1862 lnum, offs, pad_len);
1863 buf += pad_len;
1864 len -= pad_len;
1865 continue;
1866 }
1867 if (len)
1868 printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n",
1869 lnum, offs, len);
1870 break;
1871 }
1872
1873 node_type = get_lpt_node_type(c, buf, &node_num);
1874 switch (node_type) {
1875 case UBIFS_LPT_PNODE:
1876 {
1877 node_len = c->pnode_sz;
1878 if (c->big_lpt)
1879 printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n",
1880 lnum, offs, node_num);
1881 else
1882 printk(KERN_DEBUG "LEB %d:%d, pnode\n",
1883 lnum, offs);
1884 break;
1885 }
1886 case UBIFS_LPT_NNODE:
1887 {
1888 int i;
1889 struct ubifs_nnode nnode;
1890
1891 node_len = c->nnode_sz;
1892 if (c->big_lpt)
1893 printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ",
1894 lnum, offs, node_num);
1895 else
1896 printk(KERN_DEBUG "LEB %d:%d, nnode, ",
1897 lnum, offs);
1898 err = ubifs_unpack_nnode(c, buf, &nnode);
1899 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1900 printk("%d:%d", nnode.nbranch[i].lnum,
1901 nnode.nbranch[i].offs);
1902 if (i != UBIFS_LPT_FANOUT - 1)
1903 printk(", ");
1904 }
1905 printk("\n");
1906 break;
1907 }
1908 case UBIFS_LPT_LTAB:
1909 node_len = c->ltab_sz;
1910 printk(KERN_DEBUG "LEB %d:%d, ltab\n",
1911 lnum, offs);
1912 break;
1913 case UBIFS_LPT_LSAVE:
1914 node_len = c->lsave_sz;
1915 printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs);
1916 break;
1917 default:
1918 ubifs_err("LPT node type %d not recognized", node_type);
1919 return;
1920 }
1921
1922 buf += node_len;
1923 len -= node_len;
1924 }
1925
1926 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
1927 current->pid, lnum);
1928}
1929
1930/**
1931 * dbg_dump_lpt_lebs - dump LPT lebs.
1932 * @c: UBIFS file-system description object
1933 *
1934 * This function dumps all LPT LEBs. The caller has to make sure the LPT is
1935 * locked.
1936 */
1937void dbg_dump_lpt_lebs(const struct ubifs_info *c)
1938{
1939 int i;
1940
1941 printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n",
1942 current->pid);
1943 for (i = 0; i < c->lpt_lebs; i++)
1944 dump_lpt_leb(c, i + c->lpt_first);
1945 printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n",
1946 current->pid);
1947}
1948
1813#endif /* CONFIG_UBIFS_FS_DEBUG */ 1949#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 9bd5a43d4526..9e6f403f170e 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -899,7 +899,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
899 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 899 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
900 struct ubifs_scan_leb *sleb; 900 struct ubifs_scan_leb *sleb;
901 901
902 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 902 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
903 if (IS_ERR(sleb)) { 903 if (IS_ERR(sleb)) {
904 err = PTR_ERR(sleb); 904 err = PTR_ERR(sleb);
905 break; 905 break;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 21f7d047c306..ce42a7b0ca5a 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -144,7 +144,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
144 /* 144 /*
145 * If the replay order was perfect the dirty space would now be 145 * If the replay order was perfect the dirty space would now be
146 * zero. The order is not perfect because the the journal heads 146 * zero. The order is not perfect because the the journal heads
147 * race with eachother. This is not a problem but is does mean 147 * race with each other. This is not a problem but is does mean
148 * that the dirty space may temporarily exceed c->leb_size 148 * that the dirty space may temporarily exceed c->leb_size
149 * during the replay. 149 * during the replay.
150 */ 150 */
@@ -656,7 +656,7 @@ out_dump:
656 * @dirty: amount of dirty space from padding and deletion nodes 656 * @dirty: amount of dirty space from padding and deletion nodes
657 * 657 *
658 * This function inserts a reference node to the replay tree and returns zero 658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success ort a negative error code in case of failure. 659 * in case of success or a negative error code in case of failure.
660 */ 660 */
661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
662 unsigned long long sqnum, int free, int dirty) 662 unsigned long long sqnum, int free, int dirty)
@@ -883,7 +883,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
883 * This means that we reached end of log and now 883 * This means that we reached end of log and now
884 * look to the older log data, which was already 884 * look to the older log data, which was already
885 * committed but the eraseblock was not erased (UBIFS 885 * committed but the eraseblock was not erased (UBIFS
886 * only unmaps it). So this basically means we have to 886 * only un-maps it). So this basically means we have to
887 * exit with "end of log" code. 887 * exit with "end of log" code.
888 */ 888 */
889 err = 1; 889 err = 1;
@@ -1062,6 +1062,15 @@ int ubifs_replay_journal(struct ubifs_info *c)
1062 if (err) 1062 if (err)
1063 goto out; 1063 goto out;
1064 1064
1065 /*
1066 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
1067 * to roughly estimate index growth. Things like @c->min_idx_lebs
1068 * depend on it. This means we have to initialize it to make sure
1069 * budgeting works properly.
1070 */
1071 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1072 c->budg_uncommitted_idx *= c->max_idx_node_sz;
1073
1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1074 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1075 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1076 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 0f392351dc5a..e070c643d1bb 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -28,6 +28,7 @@
28 28
29#include "ubifs.h" 29#include "ubifs.h"
30#include <linux/random.h> 30#include <linux/random.h>
31#include <linux/math64.h>
31 32
32/* 33/*
33 * Default journal size in logical eraseblocks as a percent of total 34 * Default journal size in logical eraseblocks as a percent of total
@@ -80,7 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c)
80 int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; 81 int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
81 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; 82 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
82 int min_leb_cnt = UBIFS_MIN_LEB_CNT; 83 int min_leb_cnt = UBIFS_MIN_LEB_CNT;
83 uint64_t tmp64, main_bytes; 84 long long tmp64, main_bytes;
84 __le64 tmp_le64; 85 __le64 tmp_le64;
85 86
86 /* Some functions called from here depend on the @c->key_len filed */ 87 /* Some functions called from here depend on the @c->key_len filed */
@@ -160,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c)
160 if (!sup) 161 if (!sup)
161 return -ENOMEM; 162 return -ENOMEM;
162 163
163 tmp64 = (uint64_t)max_buds * c->leb_size; 164 tmp64 = (long long)max_buds * c->leb_size;
164 if (big_lpt) 165 if (big_lpt)
165 sup_flags |= UBIFS_FLG_BIGLPT; 166 sup_flags |= UBIFS_FLG_BIGLPT;
166 167
@@ -179,14 +180,16 @@ static int create_default_filesystem(struct ubifs_info *c)
179 sup->fanout = cpu_to_le32(DEFAULT_FANOUT); 180 sup->fanout = cpu_to_le32(DEFAULT_FANOUT);
180 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); 181 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
181 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); 182 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
182 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
183 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); 183 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
184 if (c->mount_opts.override_compr)
185 sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
186 else
187 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
184 188
185 generate_random_uuid(sup->uuid); 189 generate_random_uuid(sup->uuid);
186 190
187 main_bytes = (uint64_t)main_lebs * c->leb_size; 191 main_bytes = (long long)main_lebs * c->leb_size;
188 tmp64 = main_bytes * DEFAULT_RP_PERCENT; 192 tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100);
189 do_div(tmp64, 100);
190 if (tmp64 > DEFAULT_MAX_RP_SIZE) 193 if (tmp64 > DEFAULT_MAX_RP_SIZE)
191 tmp64 = DEFAULT_MAX_RP_SIZE; 194 tmp64 = DEFAULT_MAX_RP_SIZE;
192 sup->rp_size = cpu_to_le64(tmp64); 195 sup->rp_size = cpu_to_le64(tmp64);
@@ -582,16 +585,15 @@ int ubifs_read_superblock(struct ubifs_info *c)
582 c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; 585 c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
583 c->fanout = le32_to_cpu(sup->fanout); 586 c->fanout = le32_to_cpu(sup->fanout);
584 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); 587 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
585 c->default_compr = le16_to_cpu(sup->default_compr);
586 c->rp_size = le64_to_cpu(sup->rp_size); 588 c->rp_size = le64_to_cpu(sup->rp_size);
587 c->rp_uid = le32_to_cpu(sup->rp_uid); 589 c->rp_uid = le32_to_cpu(sup->rp_uid);
588 c->rp_gid = le32_to_cpu(sup->rp_gid); 590 c->rp_gid = le32_to_cpu(sup->rp_gid);
589 sup_flags = le32_to_cpu(sup->flags); 591 sup_flags = le32_to_cpu(sup->flags);
592 if (!c->mount_opts.override_compr)
593 c->default_compr = le16_to_cpu(sup->default_compr);
590 594
591 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 595 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
592
593 memcpy(&c->uuid, &sup->uuid, 16); 596 memcpy(&c->uuid, &sup->uuid, 16);
594
595 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 597 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
596 598
597 /* Automatically increase file system size to the maximum size */ 599 /* Automatically increase file system size to the maximum size */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d80b2aef42b6..0d7564b95f8e 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -34,6 +34,8 @@
34#include <linux/parser.h> 34#include <linux/parser.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/mount.h> 36#include <linux/mount.h>
37#include <linux/math64.h>
38#include <linux/writeback.h>
37#include "ubifs.h" 39#include "ubifs.h"
38 40
39/* 41/*
@@ -417,39 +419,54 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
417 else if (c->mount_opts.chk_data_crc == 1) 419 else if (c->mount_opts.chk_data_crc == 1)
418 seq_printf(s, ",no_chk_data_crc"); 420 seq_printf(s, ",no_chk_data_crc");
419 421
422 if (c->mount_opts.override_compr) {
423 seq_printf(s, ",compr=");
424 seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
425 }
426
420 return 0; 427 return 0;
421} 428}
422 429
423static int ubifs_sync_fs(struct super_block *sb, int wait) 430static int ubifs_sync_fs(struct super_block *sb, int wait)
424{ 431{
432 int i, err;
425 struct ubifs_info *c = sb->s_fs_info; 433 struct ubifs_info *c = sb->s_fs_info;
426 int i, ret = 0, err; 434 struct writeback_control wbc = {
427 long long bud_bytes; 435 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
428 436 .range_start = 0,
429 if (c->jheads) { 437 .range_end = LLONG_MAX,
430 for (i = 0; i < c->jhead_cnt; i++) { 438 .nr_to_write = LONG_MAX,
431 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 439 };
432 if (err && !ret) 440
433 ret = err; 441 if (sb->s_flags & MS_RDONLY)
434 } 442 return 0;
435 443
436 /* Commit the journal unless it has too little data */ 444 /*
437 spin_lock(&c->buds_lock); 445 * Synchronize write buffers, because 'ubifs_run_commit()' does not
438 bud_bytes = c->bud_bytes; 446 * do this if it waits for an already running commit.
439 spin_unlock(&c->buds_lock); 447 */
440 if (bud_bytes > c->leb_size) { 448 for (i = 0; i < c->jhead_cnt; i++) {
441 err = ubifs_run_commit(c); 449 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
442 if (err) 450 if (err)
443 return err; 451 return err;
444 }
445 } 452 }
446 453
447 /* 454 /*
448 * We ought to call sync for c->ubi but it does not have one. If it had 455 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
449 * it would in turn call mtd->sync, however mtd operations are 456 * pages, so synchronize them first, then commit the journal. Strictly
450 * synchronous anyway, so we don't lose any sleep here. 457 * speaking, it is not necessary to commit the journal here,
458 * synchronizing write-buffers would be enough. But committing makes
459 * UBIFS free space predictions much more accurate, so we want to let
460 * the user be able to get more accurate results of 'statfs()' after
461 * they synchronize the file system.
451 */ 462 */
452 return ret; 463 generic_sync_sb_inodes(sb, &wbc);
464
465 err = ubifs_run_commit(c);
466 if (err)
467 return err;
468
469 return ubi_sync(c->vi.ubi_num);
453} 470}
454 471
455/** 472/**
@@ -596,7 +613,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
596} 613}
597 614
598/* 615/*
599 * init_constants_late - initialize UBIFS constants. 616 * init_constants_sb - initialize UBIFS constants.
600 * @c: UBIFS file-system description object 617 * @c: UBIFS file-system description object
601 * 618 *
602 * This is a helper function which initializes various UBIFS constants after 619 * This is a helper function which initializes various UBIFS constants after
@@ -604,10 +621,10 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
604 * makes sure they are all right. Returns zero in case of success and a 621 * makes sure they are all right. Returns zero in case of success and a
605 * negative error code in case of failure. 622 * negative error code in case of failure.
606 */ 623 */
607static int init_constants_late(struct ubifs_info *c) 624static int init_constants_sb(struct ubifs_info *c)
608{ 625{
609 int tmp, err; 626 int tmp, err;
610 uint64_t tmp64; 627 long long tmp64;
611 628
612 c->main_bytes = (long long)c->main_lebs * c->leb_size; 629 c->main_bytes = (long long)c->main_lebs * c->leb_size;
613 c->max_znode_sz = sizeof(struct ubifs_znode) + 630 c->max_znode_sz = sizeof(struct ubifs_znode) +
@@ -634,9 +651,8 @@ static int init_constants_late(struct ubifs_info *c)
634 * Make sure that the log is large enough to fit reference nodes for 651 * Make sure that the log is large enough to fit reference nodes for
635 * all buds plus one reserved LEB. 652 * all buds plus one reserved LEB.
636 */ 653 */
637 tmp64 = c->max_bud_bytes; 654 tmp64 = c->max_bud_bytes + c->leb_size - 1;
638 tmp = do_div(tmp64, c->leb_size); 655 c->max_bud_cnt = div_u64(tmp64, c->leb_size);
639 c->max_bud_cnt = tmp64 + !!tmp;
640 tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); 656 tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
641 tmp /= c->leb_size; 657 tmp /= c->leb_size;
642 tmp += 1; 658 tmp += 1;
@@ -672,7 +688,7 @@ static int init_constants_late(struct ubifs_info *c)
672 * Consequently, if the journal is too small, UBIFS will treat it as 688 * Consequently, if the journal is too small, UBIFS will treat it as
673 * always full. 689 * always full.
674 */ 690 */
675 tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; 691 tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1;
676 if (c->bg_bud_bytes < tmp64) 692 if (c->bg_bud_bytes < tmp64)
677 c->bg_bud_bytes = tmp64; 693 c->bg_bud_bytes = tmp64;
678 if (c->max_bud_bytes < tmp64 + c->leb_size) 694 if (c->max_bud_bytes < tmp64 + c->leb_size)
@@ -682,6 +698,21 @@ static int init_constants_late(struct ubifs_info *c)
682 if (err) 698 if (err)
683 return err; 699 return err;
684 700
701 return 0;
702}
703
704/*
705 * init_constants_master - initialize UBIFS constants.
706 * @c: UBIFS file-system description object
707 *
708 * This is a helper function which initializes various UBIFS constants after
709 * the master node has been read. It also checks various UBIFS parameters and
710 * makes sure they are all right.
711 */
712static void init_constants_master(struct ubifs_info *c)
713{
714 long long tmp64;
715
685 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 716 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
686 717
687 /* 718 /*
@@ -690,14 +721,13 @@ static int init_constants_late(struct ubifs_info *c)
690 * necessary to report something for the 'statfs()' call. 721 * necessary to report something for the 'statfs()' call.
691 * 722 *
692 * Subtract the LEB reserved for GC, the LEB which is reserved for 723 * Subtract the LEB reserved for GC, the LEB which is reserved for
693 * deletions, and assume only one journal head is available. 724 * deletions, minimum LEBs for the index, and assume only one journal
725 * head is available.
694 */ 726 */
695 tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; 727 tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1;
696 tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; 728 tmp64 *= (long long)c->leb_size - c->leb_overhead;
697 tmp64 = ubifs_reported_space(c, tmp64); 729 tmp64 = ubifs_reported_space(c, tmp64);
698 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; 730 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
699
700 return 0;
701} 731}
702 732
703/** 733/**
@@ -878,6 +908,7 @@ static int check_volume_empty(struct ubifs_info *c)
878 * Opt_no_bulk_read: disable bulk-reads 908 * Opt_no_bulk_read: disable bulk-reads
879 * Opt_chk_data_crc: check CRCs when reading data nodes 909 * Opt_chk_data_crc: check CRCs when reading data nodes
880 * Opt_no_chk_data_crc: do not check CRCs when reading data nodes 910 * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
911 * Opt_override_compr: override default compressor
881 * Opt_err: just end of array marker 912 * Opt_err: just end of array marker
882 */ 913 */
883enum { 914enum {
@@ -887,6 +918,7 @@ enum {
887 Opt_no_bulk_read, 918 Opt_no_bulk_read,
888 Opt_chk_data_crc, 919 Opt_chk_data_crc,
889 Opt_no_chk_data_crc, 920 Opt_no_chk_data_crc,
921 Opt_override_compr,
890 Opt_err, 922 Opt_err,
891}; 923};
892 924
@@ -897,6 +929,7 @@ static const match_table_t tokens = {
897 {Opt_no_bulk_read, "no_bulk_read"}, 929 {Opt_no_bulk_read, "no_bulk_read"},
898 {Opt_chk_data_crc, "chk_data_crc"}, 930 {Opt_chk_data_crc, "chk_data_crc"},
899 {Opt_no_chk_data_crc, "no_chk_data_crc"}, 931 {Opt_no_chk_data_crc, "no_chk_data_crc"},
932 {Opt_override_compr, "compr=%s"},
900 {Opt_err, NULL}, 933 {Opt_err, NULL},
901}; 934};
902 935
@@ -950,6 +983,28 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
950 c->mount_opts.chk_data_crc = 1; 983 c->mount_opts.chk_data_crc = 1;
951 c->no_chk_data_crc = 1; 984 c->no_chk_data_crc = 1;
952 break; 985 break;
986 case Opt_override_compr:
987 {
988 char *name = match_strdup(&args[0]);
989
990 if (!name)
991 return -ENOMEM;
992 if (!strcmp(name, "none"))
993 c->mount_opts.compr_type = UBIFS_COMPR_NONE;
994 else if (!strcmp(name, "lzo"))
995 c->mount_opts.compr_type = UBIFS_COMPR_LZO;
996 else if (!strcmp(name, "zlib"))
997 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
998 else {
999 ubifs_err("unknown compressor \"%s\"", name);
1000 kfree(name);
1001 return -EINVAL;
1002 }
1003 kfree(name);
1004 c->mount_opts.override_compr = 1;
1005 c->default_compr = c->mount_opts.compr_type;
1006 break;
1007 }
953 default: 1008 default:
954 ubifs_err("unrecognized mount option \"%s\" " 1009 ubifs_err("unrecognized mount option \"%s\" "
955 "or missing value", p); 1010 "or missing value", p);
@@ -1019,6 +1074,30 @@ again:
1019} 1074}
1020 1075
1021/** 1076/**
1077 * check_free_space - check if there is enough free space to mount.
1078 * @c: UBIFS file-system description object
1079 *
1080 * This function makes sure UBIFS has enough free space to be mounted in
1081 * read/write mode. UBIFS must always have some free space to allow deletions.
1082 */
1083static int check_free_space(struct ubifs_info *c)
1084{
1085 ubifs_assert(c->dark_wm > 0);
1086 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1087 ubifs_err("insufficient free space to mount in read/write mode");
1088 dbg_dump_budg(c);
1089 dbg_dump_lprops(c);
1090 /*
1091 * We return %-EINVAL instead of %-ENOSPC because it seems to
1092 * be the closest error code mentioned in the mount function
1093 * documentation.
1094 */
1095 return -EINVAL;
1096 }
1097 return 0;
1098}
1099
1100/**
1022 * mount_ubifs - mount UBIFS file-system. 1101 * mount_ubifs - mount UBIFS file-system.
1023 * @c: UBIFS file-system description object 1102 * @c: UBIFS file-system description object
1024 * 1103 *
@@ -1039,11 +1118,9 @@ static int mount_ubifs(struct ubifs_info *c)
1039 if (err) 1118 if (err)
1040 return err; 1119 return err;
1041 1120
1042#ifdef CONFIG_UBIFS_FS_DEBUG 1121 err = ubifs_debugging_init(c);
1043 c->dbg_buf = vmalloc(c->leb_size); 1122 if (err)
1044 if (!c->dbg_buf) 1123 return err;
1045 return -ENOMEM;
1046#endif
1047 1124
1048 err = check_volume_empty(c); 1125 err = check_volume_empty(c);
1049 if (err) 1126 if (err)
@@ -1100,27 +1177,25 @@ static int mount_ubifs(struct ubifs_info *c)
1100 goto out_free; 1177 goto out_free;
1101 1178
1102 /* 1179 /*
1103 * Make sure the compressor which is set as the default on in the 1180 * Make sure the compressor which is set as default in the superblock
1104 * superblock was actually compiled in. 1181 * or overridden by mount options is actually compiled in.
1105 */ 1182 */
1106 if (!ubifs_compr_present(c->default_compr)) { 1183 if (!ubifs_compr_present(c->default_compr)) {
1107 ubifs_warn("'%s' compressor is set by superblock, but not " 1184 ubifs_err("'compressor \"%s\" is not compiled in",
1108 "compiled in", ubifs_compr_name(c->default_compr)); 1185 ubifs_compr_name(c->default_compr));
1109 c->default_compr = UBIFS_COMPR_NONE; 1186 goto out_free;
1110 } 1187 }
1111 1188
1112 dbg_failure_mode_registration(c); 1189 err = init_constants_sb(c);
1113
1114 err = init_constants_late(c);
1115 if (err) 1190 if (err)
1116 goto out_dereg; 1191 goto out_free;
1117 1192
1118 sz = ALIGN(c->max_idx_node_sz, c->min_io_size); 1193 sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
1119 sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); 1194 sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
1120 c->cbuf = kmalloc(sz, GFP_NOFS); 1195 c->cbuf = kmalloc(sz, GFP_NOFS);
1121 if (!c->cbuf) { 1196 if (!c->cbuf) {
1122 err = -ENOMEM; 1197 err = -ENOMEM;
1123 goto out_dereg; 1198 goto out_free;
1124 } 1199 }
1125 1200
1126 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); 1201 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
@@ -1145,6 +1220,8 @@ static int mount_ubifs(struct ubifs_info *c)
1145 if (err) 1220 if (err)
1146 goto out_master; 1221 goto out_master;
1147 1222
1223 init_constants_master(c);
1224
1148 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1225 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1149 ubifs_msg("recovery needed"); 1226 ubifs_msg("recovery needed");
1150 c->need_recovery = 1; 1227 c->need_recovery = 1;
@@ -1183,12 +1260,9 @@ static int mount_ubifs(struct ubifs_info *c)
1183 if (!mounted_read_only) { 1260 if (!mounted_read_only) {
1184 int lnum; 1261 int lnum;
1185 1262
1186 /* Check for enough free space */ 1263 err = check_free_space(c);
1187 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { 1264 if (err)
1188 ubifs_err("insufficient available space");
1189 err = -EINVAL;
1190 goto out_orphans; 1265 goto out_orphans;
1191 }
1192 1266
1193 /* Check for enough log space */ 1267 /* Check for enough log space */
1194 lnum = c->lhead_lnum + 1; 1268 lnum = c->lhead_lnum + 1;
@@ -1232,6 +1306,10 @@ static int mount_ubifs(struct ubifs_info *c)
1232 } 1306 }
1233 } 1307 }
1234 1308
1309 err = dbg_debugfs_init_fs(c);
1310 if (err)
1311 goto out_infos;
1312
1235 err = dbg_check_filesystem(c); 1313 err = dbg_check_filesystem(c);
1236 if (err) 1314 if (err)
1237 goto out_infos; 1315 goto out_infos;
@@ -1283,8 +1361,20 @@ static int mount_ubifs(struct ubifs_info *c)
1283 dbg_msg("tree fanout: %d", c->fanout); 1361 dbg_msg("tree fanout: %d", c->fanout);
1284 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1362 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
1285 dbg_msg("first main LEB: %d", c->main_first); 1363 dbg_msg("first main LEB: %d", c->main_first);
1364 dbg_msg("max. znode size %d", c->max_znode_sz);
1365 dbg_msg("max. index node size %d", c->max_idx_node_sz);
1366 dbg_msg("node sizes: data %zu, inode %zu, dentry %zu",
1367 UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);
1368 dbg_msg("node sizes: trun %zu, sb %zu, master %zu",
1369 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
1370 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1371 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1372 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu",
1373 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1374 UBIFS_MAX_DENT_NODE_SZ);
1286 dbg_msg("dead watermark: %d", c->dead_wm); 1375 dbg_msg("dead watermark: %d", c->dead_wm);
1287 dbg_msg("dark watermark: %d", c->dark_wm); 1376 dbg_msg("dark watermark: %d", c->dark_wm);
1377 dbg_msg("LEB overhead: %d", c->leb_overhead);
1288 x = (long long)c->main_lebs * c->dark_wm; 1378 x = (long long)c->main_lebs * c->dark_wm;
1289 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", 1379 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)",
1290 x, x >> 10, x >> 20); 1380 x, x >> 10, x >> 20);
@@ -1320,14 +1410,12 @@ out_wbufs:
1320 free_wbufs(c); 1410 free_wbufs(c);
1321out_cbuf: 1411out_cbuf:
1322 kfree(c->cbuf); 1412 kfree(c->cbuf);
1323out_dereg:
1324 dbg_failure_mode_deregistration(c);
1325out_free: 1413out_free:
1326 kfree(c->bu.buf); 1414 kfree(c->bu.buf);
1327 vfree(c->ileb_buf); 1415 vfree(c->ileb_buf);
1328 vfree(c->sbuf); 1416 vfree(c->sbuf);
1329 kfree(c->bottom_up_buf); 1417 kfree(c->bottom_up_buf);
1330 UBIFS_DBG(vfree(c->dbg_buf)); 1418 ubifs_debugging_exit(c);
1331 return err; 1419 return err;
1332} 1420}
1333 1421
@@ -1345,6 +1433,7 @@ static void ubifs_umount(struct ubifs_info *c)
1345 dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, 1433 dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
1346 c->vi.vol_id); 1434 c->vi.vol_id);
1347 1435
1436 dbg_debugfs_exit_fs(c);
1348 spin_lock(&ubifs_infos_lock); 1437 spin_lock(&ubifs_infos_lock);
1349 list_del(&c->infos_list); 1438 list_del(&c->infos_list);
1350 spin_unlock(&ubifs_infos_lock); 1439 spin_unlock(&ubifs_infos_lock);
@@ -1364,8 +1453,7 @@ static void ubifs_umount(struct ubifs_info *c)
1364 vfree(c->ileb_buf); 1453 vfree(c->ileb_buf);
1365 vfree(c->sbuf); 1454 vfree(c->sbuf);
1366 kfree(c->bottom_up_buf); 1455 kfree(c->bottom_up_buf);
1367 UBIFS_DBG(vfree(c->dbg_buf)); 1456 ubifs_debugging_exit(c);
1368 dbg_failure_mode_deregistration(c);
1369} 1457}
1370 1458
1371/** 1459/**
@@ -1387,12 +1475,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1387 c->remounting_rw = 1; 1475 c->remounting_rw = 1;
1388 c->always_chk_crc = 1; 1476 c->always_chk_crc = 1;
1389 1477
1390 /* Check for enough free space */ 1478 err = check_free_space(c);
1391 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { 1479 if (err)
1392 ubifs_err("insufficient available space");
1393 err = -EINVAL;
1394 goto out; 1480 goto out;
1395 }
1396 1481
1397 if (c->old_leb_cnt != c->leb_cnt) { 1482 if (c->old_leb_cnt != c->leb_cnt) {
1398 struct ubifs_sb_node *sup; 1483 struct ubifs_sb_node *sup;
@@ -1515,20 +1600,24 @@ out:
1515 * @c: UBIFS file-system description object 1600 * @c: UBIFS file-system description object
1516 * 1601 *
1517 * This function is called during un-mounting and re-mounting, and it commits 1602 * This function is called during un-mounting and re-mounting, and it commits
1518 * the journal unless the "fast unmount" mode is enabled. It also avoids 1603 * the journal unless the "fast unmount" mode is enabled.
1519 * committing the journal if it contains too few data.
1520 */ 1604 */
1521static void commit_on_unmount(struct ubifs_info *c) 1605static void commit_on_unmount(struct ubifs_info *c)
1522{ 1606{
1523 if (!c->fast_unmount) { 1607 struct super_block *sb = c->vfs_sb;
1524 long long bud_bytes; 1608 long long bud_bytes;
1525 1609
1526 spin_lock(&c->buds_lock); 1610 /*
1527 bud_bytes = c->bud_bytes; 1611 * This function is called before the background thread is stopped, so
1528 spin_unlock(&c->buds_lock); 1612 * we may race with ongoing commit, which means we have to take
1529 if (bud_bytes > c->leb_size) 1613 * @c->bud_lock to access @c->bud_bytes.
1530 ubifs_run_commit(c); 1614 */
1531 } 1615 spin_lock(&c->buds_lock);
1616 bud_bytes = c->bud_bytes;
1617 spin_unlock(&c->buds_lock);
1618
1619 if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes)
1620 ubifs_run_commit(c);
1532} 1621}
1533 1622
1534/** 1623/**
@@ -1849,7 +1938,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1849 goto out_iput; 1938 goto out_iput;
1850 1939
1851 mutex_unlock(&c->umount_mutex); 1940 mutex_unlock(&c->umount_mutex);
1852
1853 return 0; 1941 return 0;
1854 1942
1855out_iput: 1943out_iput:
@@ -1955,7 +2043,7 @@ static void ubifs_kill_sb(struct super_block *sb)
1955 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' 2043 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
1956 * in order to be outside BKL. 2044 * in order to be outside BKL.
1957 */ 2045 */
1958 if (sb->s_root && !(sb->s_flags & MS_RDONLY)) 2046 if (sb->s_root)
1959 commit_on_unmount(c); 2047 commit_on_unmount(c);
1960 /* The un-mount routine is actually done in put_super() */ 2048 /* The un-mount routine is actually done in put_super() */
1961 generic_shutdown_super(sb); 2049 generic_shutdown_super(sb);
@@ -2021,6 +2109,14 @@ static int __init ubifs_init(void)
2021 BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); 2109 BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
2022 2110
2023 /* 2111 /*
2112 * We use 2 bit wide bit-fields to store compression type, which should
2113 * be amended if more compressors are added. The bit-fields are:
2114 * @compr_type in 'struct ubifs_inode', @default_compr in
2115 * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'.
2116 */
2117 BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
2118
2119 /*
2024 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to 2120 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
2025 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. 2121 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
2026 */ 2122 */
@@ -2049,11 +2145,17 @@ static int __init ubifs_init(void)
2049 2145
2050 err = ubifs_compressors_init(); 2146 err = ubifs_compressors_init();
2051 if (err) 2147 if (err)
2148 goto out_shrinker;
2149
2150 err = dbg_debugfs_init();
2151 if (err)
2052 goto out_compr; 2152 goto out_compr;
2053 2153
2054 return 0; 2154 return 0;
2055 2155
2056out_compr: 2156out_compr:
2157 ubifs_compressors_exit();
2158out_shrinker:
2057 unregister_shrinker(&ubifs_shrinker_info); 2159 unregister_shrinker(&ubifs_shrinker_info);
2058 kmem_cache_destroy(ubifs_inode_slab); 2160 kmem_cache_destroy(ubifs_inode_slab);
2059out_reg: 2161out_reg:
@@ -2068,6 +2170,7 @@ static void __exit ubifs_exit(void)
2068 ubifs_assert(list_empty(&ubifs_infos)); 2170 ubifs_assert(list_empty(&ubifs_infos));
2069 ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); 2171 ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
2070 2172
2173 dbg_debugfs_exit();
2071 ubifs_compressors_exit(); 2174 ubifs_compressors_exit();
2072 unregister_shrinker(&ubifs_shrinker_info); 2175 unregister_shrinker(&ubifs_shrinker_info);
2073 kmem_cache_destroy(ubifs_inode_slab); 2176 kmem_cache_destroy(ubifs_inode_slab);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 6eef5344a145..f7e36f545527 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2245,12 +2245,11 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
2245 if (found) { 2245 if (found) {
2246 /* Ensure the znode is dirtied */ 2246 /* Ensure the znode is dirtied */
2247 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2247 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2248 znode = dirty_cow_bottom_up(c, 2248 znode = dirty_cow_bottom_up(c, znode);
2249 znode); 2249 if (IS_ERR(znode)) {
2250 if (IS_ERR(znode)) { 2250 err = PTR_ERR(znode);
2251 err = PTR_ERR(znode); 2251 goto out_unlock;
2252 goto out_unlock; 2252 }
2253 }
2254 } 2253 }
2255 zbr = &znode->zbranch[n]; 2254 zbr = &znode->zbranch[n];
2256 lnc_free(zbr); 2255 lnc_free(zbr);
@@ -2317,11 +2316,11 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
2317 2316
2318 /* Ensure the znode is dirtied */ 2317 /* Ensure the znode is dirtied */
2319 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2318 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2320 znode = dirty_cow_bottom_up(c, znode); 2319 znode = dirty_cow_bottom_up(c, znode);
2321 if (IS_ERR(znode)) { 2320 if (IS_ERR(znode)) {
2322 err = PTR_ERR(znode); 2321 err = PTR_ERR(znode);
2323 goto out_unlock; 2322 goto out_unlock;
2324 } 2323 }
2325 } 2324 }
2326 2325
2327 if (found == 1) { 2326 if (found == 1) {
@@ -2627,11 +2626,11 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
2627 2626
2628 /* Ensure the znode is dirtied */ 2627 /* Ensure the znode is dirtied */
2629 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2628 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2630 znode = dirty_cow_bottom_up(c, znode); 2629 znode = dirty_cow_bottom_up(c, znode);
2631 if (IS_ERR(znode)) { 2630 if (IS_ERR(znode)) {
2632 err = PTR_ERR(znode); 2631 err = PTR_ERR(znode);
2633 goto out_unlock; 2632 goto out_unlock;
2634 } 2633 }
2635 } 2634 }
2636 2635
2637 /* Remove all keys in range except the first */ 2636 /* Remove all keys in range except the first */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8ac76b1c2d55..fde8d127c768 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -553,8 +553,8 @@ static int layout_in_empty_space(struct ubifs_info *c)
553 } 553 }
554 554
555#ifdef CONFIG_UBIFS_FS_DEBUG 555#ifdef CONFIG_UBIFS_FS_DEBUG
556 c->new_ihead_lnum = lnum; 556 c->dbg->new_ihead_lnum = lnum;
557 c->new_ihead_offs = buf_offs; 557 c->dbg->new_ihead_offs = buf_offs;
558#endif 558#endif
559 559
560 return 0; 560 return 0;
@@ -802,8 +802,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
802 * budgeting subsystem to assume the index is already committed, 802 * budgeting subsystem to assume the index is already committed,
803 * even though it is not. 803 * even though it is not.
804 */ 804 */
805 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
805 c->old_idx_sz = c->calc_idx_sz; 806 c->old_idx_sz = c->calc_idx_sz;
806 c->budg_uncommitted_idx = 0; 807 c->budg_uncommitted_idx = 0;
808 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
807 spin_unlock(&c->space_lock); 809 spin_unlock(&c->space_lock);
808 mutex_unlock(&c->tnc_mutex); 810 mutex_unlock(&c->tnc_mutex);
809 811
@@ -1002,7 +1004,8 @@ static int write_index(struct ubifs_info *c)
1002 } 1004 }
1003 1005
1004#ifdef CONFIG_UBIFS_FS_DEBUG 1006#ifdef CONFIG_UBIFS_FS_DEBUG
1005 if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { 1007 if (lnum != c->dbg->new_ihead_lnum ||
1008 buf_offs != c->dbg->new_ihead_offs) {
1006 ubifs_err("inconsistent ihead"); 1009 ubifs_err("inconsistent ihead");
1007 return -EINVAL; 1010 return -EINVAL;
1008 } 1011 }
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0b378042a3a2..b25fc36cf72f 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -51,6 +51,13 @@
51 */ 51 */
52#define UBIFS_MIN_COMPR_LEN 128 52#define UBIFS_MIN_COMPR_LEN 128
53 53
54/*
55 * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
56 * shorter than uncompressed data length, UBIFS preferes to leave this data
57 * node uncompress, because it'll be read faster.
58 */
59#define UBIFS_MIN_COMPRESS_DIFF 64
60
54/* Root inode number */ 61/* Root inode number */
55#define UBIFS_ROOT_INO 1 62#define UBIFS_ROOT_INO 1
56 63
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 46b172560a06..fc2a4cc66d03 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -63,6 +63,14 @@
63#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL 63#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
64#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL 64#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL
65 65
66/*
67 * Minimum amount of LEBs reserved for the index. At present the index needs at
68 * least 2 LEBs: one for the index head and one for in-the-gaps method (which
69 * currently does not cater for the index head and so excludes it from
70 * consideration).
71 */
72#define MIN_INDEX_LEBS 2
73
66/* Minimum amount of data UBIFS writes to the flash */ 74/* Minimum amount of data UBIFS writes to the flash */
67#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) 75#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
68 76
@@ -386,12 +394,12 @@ struct ubifs_inode {
386 unsigned int dirty:1; 394 unsigned int dirty:1;
387 unsigned int xattr:1; 395 unsigned int xattr:1;
388 unsigned int bulk_read:1; 396 unsigned int bulk_read:1;
397 unsigned int compr_type:2;
389 struct mutex ui_mutex; 398 struct mutex ui_mutex;
390 spinlock_t ui_lock; 399 spinlock_t ui_lock;
391 loff_t synced_i_size; 400 loff_t synced_i_size;
392 loff_t ui_size; 401 loff_t ui_size;
393 int flags; 402 int flags;
394 int compr_type;
395 pgoff_t last_page_read; 403 pgoff_t last_page_read;
396 pgoff_t read_in_a_row; 404 pgoff_t read_in_a_row;
397 int data_len; 405 int data_len;
@@ -419,7 +427,7 @@ struct ubifs_unclean_leb {
419 * 427 *
420 * LPROPS_UNCAT: not categorized 428 * LPROPS_UNCAT: not categorized
421 * LPROPS_DIRTY: dirty > 0, not index 429 * LPROPS_DIRTY: dirty > 0, not index
422 * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index 430 * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
423 * LPROPS_FREE: free > 0, not empty, not index 431 * LPROPS_FREE: free > 0, not empty, not index
424 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs 432 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
425 * LPROPS_EMPTY: LEB is empty, not taken 433 * LPROPS_EMPTY: LEB is empty, not taken
@@ -473,8 +481,8 @@ struct ubifs_lprops {
473struct ubifs_lpt_lprops { 481struct ubifs_lpt_lprops {
474 int free; 482 int free;
475 int dirty; 483 int dirty;
476 unsigned tgc : 1; 484 unsigned tgc:1;
477 unsigned cmt : 1; 485 unsigned cmt:1;
478}; 486};
479 487
480/** 488/**
@@ -482,24 +490,26 @@ struct ubifs_lpt_lprops {
482 * @empty_lebs: number of empty LEBs 490 * @empty_lebs: number of empty LEBs
483 * @taken_empty_lebs: number of taken LEBs 491 * @taken_empty_lebs: number of taken LEBs
484 * @idx_lebs: number of indexing LEBs 492 * @idx_lebs: number of indexing LEBs
485 * @total_free: total free space in bytes 493 * @total_free: total free space in bytes (includes all LEBs)
486 * @total_dirty: total dirty space in bytes 494 * @total_dirty: total dirty space in bytes (includes all LEBs)
487 * @total_used: total used space in bytes (includes only data LEBs) 495 * @total_used: total used space in bytes (does not include index LEBs)
488 * @total_dead: total dead space in bytes (includes only data LEBs) 496 * @total_dead: total dead space in bytes (does not include index LEBs)
489 * @total_dark: total dark space in bytes (includes only data LEBs) 497 * @total_dark: total dark space in bytes (does not include index LEBs)
498 *
499 * The @taken_empty_lebs field counts the LEBs that are in the transient state
500 * of having been "taken" for use but not yet written to. @taken_empty_lebs is
501 * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be
502 * used by itself (in which case 'unused_lebs' would be a better name). In the
503 * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained
504 * by GC, but unlike other empty LEBs that are "taken", it may not be written
505 * straight away (i.e. before the next commit start or unmount), so either
506 * @gc_lnum must be specially accounted for, or the current approach followed
507 * i.e. count it under @taken_empty_lebs.
490 * 508 *
491 * N.B. total_dirty and total_used are different to other total_* fields, 509 * @empty_lebs includes @taken_empty_lebs.
492 * because they account _all_ LEBs, not just data LEBs.
493 * 510 *
494 * 'taken_empty_lebs' counts the LEBs that are in the transient state of having 511 * @total_used, @total_dead and @total_dark fields do not account indexing
495 * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed 512 * LEBs.
496 * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
497 * by itself (in which case 'unused_lebs' would be a better name). In the case
498 * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
499 * but unlike other empty LEBs that are 'taken', it may not be written straight
500 * away (i.e. before the next commit start or unmount), so either gc_lnum must
501 * be specially accounted for, or the current approach followed i.e. count it
502 * under 'taken_empty_lebs'.
503 */ 513 */
504struct ubifs_lp_stats { 514struct ubifs_lp_stats {
505 int empty_lebs; 515 int empty_lebs;
@@ -893,15 +903,25 @@ struct ubifs_orphan {
893/** 903/**
894 * struct ubifs_mount_opts - UBIFS-specific mount options information. 904 * struct ubifs_mount_opts - UBIFS-specific mount options information.
895 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) 905 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
896 * @bulk_read: enable bulk-reads 906 * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable)
897 * @chk_data_crc: check CRCs when reading data nodes 907 * @chk_data_crc: enable/disable CRC data checking when reading data nodes
908 * (%0 default, %1 disabe, %2 enable)
909 * @override_compr: override default compressor (%0 - do not override and use
910 * superblock compressor, %1 - override and use compressor
911 * specified in @compr_type)
912 * @compr_type: compressor type to override the superblock compressor with
913 * (%UBIFS_COMPR_NONE, etc)
898 */ 914 */
899struct ubifs_mount_opts { 915struct ubifs_mount_opts {
900 unsigned int unmount_mode:2; 916 unsigned int unmount_mode:2;
901 unsigned int bulk_read:2; 917 unsigned int bulk_read:2;
902 unsigned int chk_data_crc:2; 918 unsigned int chk_data_crc:2;
919 unsigned int override_compr:1;
920 unsigned int compr_type:2;
903}; 921};
904 922
923struct ubifs_debug_info;
924
905/** 925/**
906 * struct ubifs_info - UBIFS file-system description data structure 926 * struct ubifs_info - UBIFS file-system description data structure
907 * (per-superblock). 927 * (per-superblock).
@@ -946,6 +966,7 @@ struct ubifs_mount_opts {
946 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 966 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
947 * recovery) 967 * recovery)
948 * @bulk_read: enable bulk-reads 968 * @bulk_read: enable bulk-reads
969 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
949 * 970 *
950 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and 971 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
951 * @calc_idx_sz 972 * @calc_idx_sz
@@ -963,8 +984,6 @@ struct ubifs_mount_opts {
963 * @ileb_nxt: next pre-allocated index LEBs 984 * @ileb_nxt: next pre-allocated index LEBs
964 * @old_idx: tree of index nodes obsoleted since the last commit start 985 * @old_idx: tree of index nodes obsoleted since the last commit start
965 * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c 986 * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
966 * @new_ihead_lnum: used by debugging to check ihead_lnum
967 * @new_ihead_offs: used by debugging to check ihead_offs
968 * 987 *
969 * @mst_node: master node 988 * @mst_node: master node
970 * @mst_offs: offset of valid master node 989 * @mst_offs: offset of valid master node
@@ -986,7 +1005,6 @@ struct ubifs_mount_opts {
986 * @main_lebs: count of LEBs in the main area 1005 * @main_lebs: count of LEBs in the main area
987 * @main_first: first LEB of the main area 1006 * @main_first: first LEB of the main area
988 * @main_bytes: main area size in bytes 1007 * @main_bytes: main area size in bytes
989 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
990 * 1008 *
991 * @key_hash_type: type of the key hash 1009 * @key_hash_type: type of the key hash
992 * @key_hash: direntry key hash function 1010 * @key_hash: direntry key hash function
@@ -1149,15 +1167,7 @@ struct ubifs_mount_opts {
1149 * @always_chk_crc: always check CRCs (while mounting and remounting rw) 1167 * @always_chk_crc: always check CRCs (while mounting and remounting rw)
1150 * @mount_opts: UBIFS-specific mount options 1168 * @mount_opts: UBIFS-specific mount options
1151 * 1169 *
1152 * @dbg_buf: a buffer of LEB size used for debugging purposes 1170 * @dbg: debugging-related information
1153 * @old_zroot: old index root - used by 'dbg_check_old_index()'
1154 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
1155 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
1156 * @failure_mode: failure mode for recovery testing
1157 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
1158 * @fail_timeout: time in jiffies when delay of failure mode expires
1159 * @fail_cnt: current number of calls to failure mode I/O functions
1160 * @fail_cnt_max: number of calls by which to delay failure mode
1161 */ 1171 */
1162struct ubifs_info { 1172struct ubifs_info {
1163 struct super_block *vfs_sb; 1173 struct super_block *vfs_sb;
@@ -1196,6 +1206,7 @@ struct ubifs_info {
1196 unsigned int big_lpt:1; 1206 unsigned int big_lpt:1;
1197 unsigned int no_chk_data_crc:1; 1207 unsigned int no_chk_data_crc:1;
1198 unsigned int bulk_read:1; 1208 unsigned int bulk_read:1;
1209 unsigned int default_compr:2;
1199 1210
1200 struct mutex tnc_mutex; 1211 struct mutex tnc_mutex;
1201 struct ubifs_zbranch zroot; 1212 struct ubifs_zbranch zroot;
@@ -1212,10 +1223,6 @@ struct ubifs_info {
1212 int ileb_nxt; 1223 int ileb_nxt;
1213 struct rb_root old_idx; 1224 struct rb_root old_idx;
1214 int *bottom_up_buf; 1225 int *bottom_up_buf;
1215#ifdef CONFIG_UBIFS_FS_DEBUG
1216 int new_ihead_lnum;
1217 int new_ihead_offs;
1218#endif
1219 1226
1220 struct ubifs_mst_node *mst_node; 1227 struct ubifs_mst_node *mst_node;
1221 int mst_offs; 1228 int mst_offs;
@@ -1237,7 +1244,6 @@ struct ubifs_info {
1237 int main_lebs; 1244 int main_lebs;
1238 int main_first; 1245 int main_first;
1239 long long main_bytes; 1246 long long main_bytes;
1240 int default_compr;
1241 1247
1242 uint8_t key_hash_type; 1248 uint8_t key_hash_type;
1243 uint32_t (*key_hash)(const char *str, int len); 1249 uint32_t (*key_hash)(const char *str, int len);
@@ -1315,8 +1321,8 @@ struct ubifs_info {
1315 void *sbuf; 1321 void *sbuf;
1316 struct list_head idx_gc; 1322 struct list_head idx_gc;
1317 int idx_gc_cnt; 1323 int idx_gc_cnt;
1318 volatile int gc_seq; 1324 int gc_seq;
1319 volatile int gced_lnum; 1325 int gced_lnum;
1320 1326
1321 struct list_head infos_list; 1327 struct list_head infos_list;
1322 struct mutex umount_mutex; 1328 struct mutex umount_mutex;
@@ -1391,21 +1397,7 @@ struct ubifs_info {
1391 struct ubifs_mount_opts mount_opts; 1397 struct ubifs_mount_opts mount_opts;
1392 1398
1393#ifdef CONFIG_UBIFS_FS_DEBUG 1399#ifdef CONFIG_UBIFS_FS_DEBUG
1394 void *dbg_buf; 1400 struct ubifs_debug_info *dbg;
1395 struct ubifs_zbranch old_zroot;
1396 int old_zroot_level;
1397 unsigned long long old_zroot_sqnum;
1398 int failure_mode;
1399 int fail_delay;
1400 unsigned long fail_timeout;
1401 unsigned int fail_cnt;
1402 unsigned int fail_cnt_max;
1403 long long chk_lpt_sz;
1404 long long chk_lpt_sz2;
1405 long long chk_lpt_wastage;
1406 int chk_lpt_lebs;
1407 int new_nhead_lnum;
1408 int new_nhead_offs;
1409#endif 1401#endif
1410}; 1402};
1411 1403
@@ -1505,7 +1497,7 @@ void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1505long long ubifs_get_free_space(struct ubifs_info *c); 1497long long ubifs_get_free_space(struct ubifs_info *c);
1506int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1498int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1507void ubifs_convert_page_budget(struct ubifs_info *c); 1499void ubifs_convert_page_budget(struct ubifs_info *c);
1508long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); 1500long long ubifs_reported_space(const struct ubifs_info *c, long long free);
1509long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); 1501long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1510 1502
1511/* find.c */ 1503/* find.c */
@@ -1639,6 +1631,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
1639void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); 1631void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
1640uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); 1632uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
1641struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); 1633struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
1634/* Needed only in debugging code in lpt_commit.c */
1635int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1636 struct ubifs_nnode *nnode);
1642 1637
1643/* lpt_commit.c */ 1638/* lpt_commit.c */
1644int ubifs_lpt_start_commit(struct ubifs_info *c); 1639int ubifs_lpt_start_commit(struct ubifs_info *c);
@@ -1714,7 +1709,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1714 1709
1715/* compressor.c */ 1710/* compressor.c */
1716int __init ubifs_compressors_init(void); 1711int __init ubifs_compressors_init(void);
1717void __exit ubifs_compressors_exit(void); 1712void ubifs_compressors_exit(void);
1718void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 1713void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
1719 int *compr_type); 1714 int *compr_type);
1720int ubifs_decompress(const void *buf, int len, void *out, int *out_len, 1715int ubifs_decompress(const void *buf, int len, void *out, int *out_len,