aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/binfmt_elf_fdpic.c45
-rw-r--r--fs/bio-integrity.c1
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/buffer.c48
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c210
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/dlm/lock.c4
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/user.c2
-rw-r--r--fs/dquot.c33
-rw-r--r--fs/ecryptfs/crypto.c30
-rw-r--r--fs/exec.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext3/inode.c67
-rw-r--r--fs/ext3/super.c3
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--fs/ext4/super.c3
-rw-r--r--fs/fcntl.c140
-rw-r--r--fs/file.c61
-rw-r--r--fs/jffs2/summary.c40
-rw-r--r--fs/jffs2/summary.h6
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/namei.c17
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c173
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/super.c12
-rw-r--r--fs/omfs/bitmap.c6
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/omfs/file.c6
-rw-r--r--fs/open.c56
-rw-r--r--fs/proc/generic.c27
-rw-r--r--fs/reiserfs/super.c16
-rw-r--r--fs/romfs/inode.c37
44 files changed, 792 insertions, 470 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 3263084eef9..4a551af6f3f 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -30,7 +30,7 @@ config COMPAT_BINFMT_ELF
30config BINFMT_ELF_FDPIC 30config BINFMT_ELF_FDPIC
31 bool "Kernel support for FDPIC ELF binaries" 31 bool "Kernel support for FDPIC ELF binaries"
32 default y 32 default y
33 depends on (FRV || BLACKFIN) 33 depends on (FRV || BLACKFIN || (SUPERH32 && !MMU))
34 help 34 help
35 ELF FDPIC binaries are based on ELF, but allow the individual load 35 ELF FDPIC binaries are based on ELF, but allow the individual load
36 segments of a binary to be located in memory independently of each 36 segments of a binary to be located in memory independently of each
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c3..78db4953a80 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
232 } 232 }
233 233
234 mntget(newmnt); 234 mntget(newmnt);
235 err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); 235 err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
236 switch (err) { 236 switch (err) {
237 case 0: 237 case 0:
238 path_put(&nd->path); 238 path_put(&nd->path);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fdeadab2f18..80c1f952ef7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -470,6 +470,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
470 char __user *u_platform, *p; 470 char __user *u_platform, *p;
471 long hwcap; 471 long hwcap;
472 int loop; 472 int loop;
473 int nr; /* reset for each csp adjustment */
473 474
474 /* we're going to shovel a whole load of stuff onto the stack */ 475 /* we're going to shovel a whole load of stuff onto the stack */
475#ifdef CONFIG_MMU 476#ifdef CONFIG_MMU
@@ -542,10 +543,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
542 /* force 16 byte _final_ alignment here for generality */ 543 /* force 16 byte _final_ alignment here for generality */
543#define DLINFO_ITEMS 13 544#define DLINFO_ITEMS 13
544 545
545 nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0); 546 nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
546#ifdef DLINFO_ARCH_ITEMS
547 nitems += DLINFO_ARCH_ITEMS;
548#endif
549 547
550 csp = sp; 548 csp = sp;
551 sp -= nitems * 2 * sizeof(unsigned long); 549 sp -= nitems * 2 * sizeof(unsigned long);
@@ -557,39 +555,46 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
557 sp -= sp & 15UL; 555 sp -= sp & 15UL;
558 556
559 /* put the ELF interpreter info on the stack */ 557 /* put the ELF interpreter info on the stack */
560#define NEW_AUX_ENT(nr, id, val) \ 558#define NEW_AUX_ENT(id, val) \
561 do { \ 559 do { \
562 struct { unsigned long _id, _val; } __user *ent; \ 560 struct { unsigned long _id, _val; } __user *ent; \
563 \ 561 \
564 ent = (void __user *) csp; \ 562 ent = (void __user *) csp; \
565 __put_user((id), &ent[nr]._id); \ 563 __put_user((id), &ent[nr]._id); \
566 __put_user((val), &ent[nr]._val); \ 564 __put_user((val), &ent[nr]._val); \
565 nr++; \
567 } while (0) 566 } while (0)
568 567
568 nr = 0;
569 csp -= 2 * sizeof(unsigned long); 569 csp -= 2 * sizeof(unsigned long);
570 NEW_AUX_ENT(0, AT_NULL, 0); 570 NEW_AUX_ENT(AT_NULL, 0);
571 if (k_platform) { 571 if (k_platform) {
572 nr = 0;
572 csp -= 2 * sizeof(unsigned long); 573 csp -= 2 * sizeof(unsigned long);
573 NEW_AUX_ENT(0, AT_PLATFORM, 574 NEW_AUX_ENT(AT_PLATFORM,
574 (elf_addr_t) (unsigned long) u_platform); 575 (elf_addr_t) (unsigned long) u_platform);
575 } 576 }
576 577
578 nr = 0;
577 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); 579 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
578 NEW_AUX_ENT( 0, AT_HWCAP, hwcap); 580 NEW_AUX_ENT(AT_HWCAP, hwcap);
579 NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); 581 NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE);
580 NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); 582 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
581 NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); 583 NEW_AUX_ENT(AT_PHDR, exec_params->ph_addr);
582 NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); 584 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
583 NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); 585 NEW_AUX_ENT(AT_PHNUM, exec_params->hdr.e_phnum);
584 NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); 586 NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
585 NEW_AUX_ENT( 7, AT_FLAGS, 0); 587 NEW_AUX_ENT(AT_FLAGS, 0);
586 NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); 588 NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
587 NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); 589 NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid);
588 NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); 590 NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid);
589 NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); 591 NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid);
590 NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); 592 NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid);
591 593
592#ifdef ARCH_DLINFO 594#ifdef ARCH_DLINFO
595 nr = 0;
596 csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long);
597
593 /* ARCH_DLINFO must come last so platform specific code can enforce 598 /* ARCH_DLINFO must come last so platform specific code can enforce
594 * special alignment requirements on the AUXV if necessary (eg. PPC). 599 * special alignment requirements on the AUXV if necessary (eg. PPC).
595 */ 600 */
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 63e2ee63058..c3e174b35fe 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void)
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707} 707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709 708
710static int __init integrity_init(void) 709static int __init integrity_init(void)
711{ 710{
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dcf37cada36..aff54219e04 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
941 * hooks: /n/, see "layering violations". 941 * hooks: /n/, see "layering violations".
942 */ 942 */
943 ret = devcgroup_inode_permission(bdev->bd_inode, perm); 943 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
944 if (ret != 0) 944 if (ret != 0) {
945 bdput(bdev);
945 return ret; 946 return ret;
947 }
946 948
947 ret = -ENXIO; 949 ret = -ENXIO;
948 file->f_mapping = bdev->bd_inode->i_mapping; 950 file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
1234 bdev = ERR_PTR(error); 1236 bdev = ERR_PTR(error);
1235 goto out; 1237 goto out;
1236} 1238}
1239EXPORT_SYMBOL(lookup_bdev);
1237 1240
1238/** 1241/**
1239 * open_bdev_excl - open a block device by name and set it up for use 1242 * open_bdev_excl - open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index f9580501963..4dbe52948e8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
580/* 580/*
581 * The buffer's backing address_space's private_lock must be held 581 * The buffer's backing address_space's private_lock must be held
582 */ 582 */
583static inline void __remove_assoc_queue(struct buffer_head *bh) 583static void __remove_assoc_queue(struct buffer_head *bh)
584{ 584{
585 list_del_init(&bh->b_assoc_buffers); 585 list_del_init(&bh->b_assoc_buffers);
586 WARN_ON(!bh->b_assoc_map); 586 WARN_ON(!bh->b_assoc_map);
@@ -2096,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2096EXPORT_SYMBOL(generic_write_end); 2096EXPORT_SYMBOL(generic_write_end);
2097 2097
2098/* 2098/*
2099 * block_is_partially_uptodate checks whether buffers within a page are
2100 * uptodate or not.
2101 *
2102 * Returns true if all buffers which correspond to a file portion
2103 * we want to read are uptodate.
2104 */
2105int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2106 unsigned long from)
2107{
2108 struct inode *inode = page->mapping->host;
2109 unsigned block_start, block_end, blocksize;
2110 unsigned to;
2111 struct buffer_head *bh, *head;
2112 int ret = 1;
2113
2114 if (!page_has_buffers(page))
2115 return 0;
2116
2117 blocksize = 1 << inode->i_blkbits;
2118 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2119 to = from + to;
2120 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2121 return 0;
2122
2123 head = page_buffers(page);
2124 bh = head;
2125 block_start = 0;
2126 do {
2127 block_end = block_start + blocksize;
2128 if (block_end > from && block_start < to) {
2129 if (!buffer_uptodate(bh)) {
2130 ret = 0;
2131 break;
2132 }
2133 if (block_end >= to)
2134 break;
2135 }
2136 block_start = block_end;
2137 bh = bh->b_this_page;
2138 } while (bh != head);
2139
2140 return ret;
2141}
2142EXPORT_SYMBOL(block_is_partially_uptodate);
2143
2144/*
2099 * Generic "read page" function for block devices that have the normal 2145 * Generic "read page" function for block devices that have the normal
2100 * get_block functionality. This is most of the block device filesystems. 2146 * get_block functionality. This is most of the block device filesystems.
2101 * Reads the page asynchronously --- the unlock_buffer() and 2147 * Reads the page asynchronously --- the unlock_buffer() and
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e32..d2c8eef84f3 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
226 int err; 226 int err;
227 227
228 mntget(newmnt); 228 mntget(newmnt);
229 err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); 229 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
230 switch (err) { 230 switch (err) {
231 case 0: 231 case 0:
232 path_put(&nd->path); 232 path_put(&nd->path);
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3e..762d287123c 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
49#define CONFIGFS_USET_DEFAULT 0x0080 49#define CONFIGFS_USET_DEFAULT 0x0080
50#define CONFIGFS_USET_DROPPING 0x0100 50#define CONFIGFS_USET_DROPPING 0x0100
51#define CONFIGFS_USET_IN_MKDIR 0x0200 51#define CONFIGFS_USET_IN_MKDIR 0x0200
52#define CONFIGFS_USET_CREATING 0x0400
52#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 53#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
53 54
55extern struct mutex configfs_symlink_mutex;
54extern spinlock_t configfs_dirent_lock; 56extern spinlock_t configfs_dirent_lock;
55 57
56extern struct vfsmount * configfs_mount; 58extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
66extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 68extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
67extern int configfs_make_dirent(struct configfs_dirent *, 69extern int configfs_make_dirent(struct configfs_dirent *,
68 struct dentry *, void *, umode_t, int); 70 struct dentry *, void *, umode_t, int);
71extern int configfs_dirent_is_ready(struct configfs_dirent *);
69 72
70extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); 73extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
71extern void configfs_hash_and_remove(struct dentry * dir, const char * name); 74extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063..7a8db78a91d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
186 if (!error) 186 if (!error)
187 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 187 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
188 CONFIGFS_DIR); 188 CONFIGFS_DIR | CONFIGFS_USET_CREATING);
189 if (!error) { 189 if (!error) {
190 error = configfs_create(d, mode, init_dir); 190 error = configfs_create(d, mode, init_dir);
191 if (!error) { 191 if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
209 * configfs_create_dir - create a directory for an config_item. 209 * configfs_create_dir - create a directory for an config_item.
210 * @item: config_itemwe're creating directory for. 210 * @item: config_itemwe're creating directory for.
211 * @dentry: config_item's dentry. 211 * @dentry: config_item's dentry.
212 *
213 * Note: user-created entries won't be allowed under this new directory
214 * until it is validated by configfs_dir_set_ready()
212 */ 215 */
213 216
214static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 217static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
231 return error; 234 return error;
232} 235}
233 236
237/*
238 * Allow userspace to create new entries under a new directory created with
239 * configfs_create_dir(), and under all of its chidlren directories recursively.
240 * @sd configfs_dirent of the new directory to validate
241 *
242 * Caller must hold configfs_dirent_lock.
243 */
244static void configfs_dir_set_ready(struct configfs_dirent *sd)
245{
246 struct configfs_dirent *child_sd;
247
248 sd->s_type &= ~CONFIGFS_USET_CREATING;
249 list_for_each_entry(child_sd, &sd->s_children, s_sibling)
250 if (child_sd->s_type & CONFIGFS_USET_CREATING)
251 configfs_dir_set_ready(child_sd);
252}
253
254/*
255 * Check that a directory does not belong to a directory hierarchy being
256 * attached and not validated yet.
257 * @sd configfs_dirent of the directory to check
258 *
259 * @return non-zero iff the directory was validated
260 *
261 * Note: takes configfs_dirent_lock, so the result may change from false to true
262 * in two consecutive calls, but never from true to false.
263 */
264int configfs_dirent_is_ready(struct configfs_dirent *sd)
265{
266 int ret;
267
268 spin_lock(&configfs_dirent_lock);
269 ret = !(sd->s_type & CONFIGFS_USET_CREATING);
270 spin_unlock(&configfs_dirent_lock);
271
272 return ret;
273}
274
234int configfs_create_link(struct configfs_symlink *sl, 275int configfs_create_link(struct configfs_symlink *sl,
235 struct dentry *parent, 276 struct dentry *parent,
236 struct dentry *dentry) 277 struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
283 * The only thing special about this is that we remove any files in 324 * The only thing special about this is that we remove any files in
284 * the directory before we remove the directory, and we've inlined 325 * the directory before we remove the directory, and we've inlined
285 * what used to be configfs_rmdir() below, instead of calling separately. 326 * what used to be configfs_rmdir() below, instead of calling separately.
327 *
328 * Caller holds the mutex of the item's inode
286 */ 329 */
287 330
288static void configfs_remove_dir(struct config_item * item) 331static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
330 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 373 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
331 struct configfs_dirent * sd; 374 struct configfs_dirent * sd;
332 int found = 0; 375 int found = 0;
333 int err = 0; 376 int err;
377
378 /*
379 * Fake invisibility if dir belongs to a group/default groups hierarchy
380 * being attached
381 *
382 * This forbids userspace to read/write attributes of items which may
383 * not complete their initialization, since the dentries of the
384 * attributes won't be instantiated.
385 */
386 err = -ENOENT;
387 if (!configfs_dirent_is_ready(parent_sd))
388 goto out;
334 389
335 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 390 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
336 if (sd->s_type & CONFIGFS_NOT_PINNED) { 391 if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
353 return simple_lookup(dir, dentry, nd); 408 return simple_lookup(dir, dentry, nd);
354 } 409 }
355 410
411out:
356 return ERR_PTR(err); 412 return ERR_PTR(err);
357} 413}
358 414
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
370 struct configfs_dirent *sd; 426 struct configfs_dirent *sd;
371 int ret; 427 int ret;
372 428
429 /* Mark that we're trying to drop the group */
430 parent_sd->s_type |= CONFIGFS_USET_DROPPING;
431
373 ret = -EBUSY; 432 ret = -EBUSY;
374 if (!list_empty(&parent_sd->s_links)) 433 if (!list_empty(&parent_sd->s_links))
375 goto out; 434 goto out;
376 435
377 ret = 0; 436 ret = 0;
378 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 437 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
379 if (sd->s_type & CONFIGFS_NOT_PINNED) 438 if (!sd->s_element ||
439 (sd->s_type & CONFIGFS_NOT_PINNED))
380 continue; 440 continue;
381 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 441 if (sd->s_type & CONFIGFS_USET_DEFAULT) {
382 /* Abort if racing with mkdir() */ 442 /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
385 *wait_mutex = &sd->s_dentry->d_inode->i_mutex; 445 *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
386 return -EAGAIN; 446 return -EAGAIN;
387 } 447 }
388 /* Mark that we're trying to drop the group */
389 sd->s_type |= CONFIGFS_USET_DROPPING;
390 448
391 /* 449 /*
392 * Yup, recursive. If there's a problem, blame 450 * Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
414 struct configfs_dirent *parent_sd = dentry->d_fsdata; 472 struct configfs_dirent *parent_sd = dentry->d_fsdata;
415 struct configfs_dirent *sd; 473 struct configfs_dirent *sd;
416 474
417 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 475 parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
418 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 476
477 list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
478 if (sd->s_type & CONFIGFS_USET_DEFAULT)
419 configfs_detach_rollback(sd->s_dentry); 479 configfs_detach_rollback(sd->s_dentry);
420 sd->s_type &= ~CONFIGFS_USET_DROPPING;
421 }
422 }
423} 480}
424 481
425static void detach_attrs(struct config_item * item) 482static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
558static int populate_groups(struct config_group *group) 615static int populate_groups(struct config_group *group)
559{ 616{
560 struct config_group *new_group; 617 struct config_group *new_group;
561 struct dentry *dentry = group->cg_item.ci_dentry;
562 int ret = 0; 618 int ret = 0;
563 int i; 619 int i;
564 620
565 if (group->default_groups) { 621 if (group->default_groups) {
566 /*
567 * FYI, we're faking mkdir here
568 * I'm not sure we need this semaphore, as we're called
569 * from our parent's mkdir. That holds our parent's
570 * i_mutex, so afaik lookup cannot continue through our
571 * parent to find us, let alone mess with our tree.
572 * That said, taking our i_mutex is closer to mkdir
573 * emulation, and shouldn't hurt.
574 */
575 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
576
577 for (i = 0; group->default_groups[i]; i++) { 622 for (i = 0; group->default_groups[i]; i++) {
578 new_group = group->default_groups[i]; 623 new_group = group->default_groups[i];
579 624
580 ret = create_default_group(group, new_group); 625 ret = create_default_group(group, new_group);
581 if (ret) 626 if (ret) {
627 detach_groups(group);
582 break; 628 break;
629 }
583 } 630 }
584
585 mutex_unlock(&dentry->d_inode->i_mutex);
586 } 631 }
587 632
588 if (ret)
589 detach_groups(group);
590
591 return ret; 633 return ret;
592} 634}
593 635
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
702 if (!ret) { 744 if (!ret) {
703 ret = populate_attrs(item); 745 ret = populate_attrs(item);
704 if (ret) { 746 if (ret) {
747 /*
748 * We are going to remove an inode and its dentry but
749 * the VFS may already have hit and used them. Thus,
750 * we must lock them as rmdir() would.
751 */
752 mutex_lock(&dentry->d_inode->i_mutex);
705 configfs_remove_dir(item); 753 configfs_remove_dir(item);
754 dentry->d_inode->i_flags |= S_DEAD;
755 mutex_unlock(&dentry->d_inode->i_mutex);
706 d_delete(dentry); 756 d_delete(dentry);
707 } 757 }
708 } 758 }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
710 return ret; 760 return ret;
711} 761}
712 762
763/* Caller holds the mutex of the item's inode */
713static void configfs_detach_item(struct config_item *item) 764static void configfs_detach_item(struct config_item *item)
714{ 765{
715 detach_attrs(item); 766 detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
728 sd = dentry->d_fsdata; 779 sd = dentry->d_fsdata;
729 sd->s_type |= CONFIGFS_USET_DIR; 780 sd->s_type |= CONFIGFS_USET_DIR;
730 781
782 /*
783 * FYI, we're faking mkdir in populate_groups()
784 * We must lock the group's inode to avoid races with the VFS
785 * which can already hit the inode and try to add/remove entries
786 * under it.
787 *
788 * We must also lock the inode to remove it safely in case of
789 * error, as rmdir() would.
790 */
791 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
731 ret = populate_groups(to_config_group(item)); 792 ret = populate_groups(to_config_group(item));
732 if (ret) { 793 if (ret) {
733 configfs_detach_item(item); 794 configfs_detach_item(item);
734 d_delete(dentry); 795 dentry->d_inode->i_flags |= S_DEAD;
735 } 796 }
797 mutex_unlock(&dentry->d_inode->i_mutex);
798 if (ret)
799 d_delete(dentry);
736 } 800 }
737 801
738 return ret; 802 return ret;
739} 803}
740 804
805/* Caller holds the mutex of the group's inode */
741static void configfs_detach_group(struct config_item *item) 806static void configfs_detach_group(struct config_item *item)
742{ 807{
743 detach_groups(to_config_group(item)); 808 detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1035 struct configfs_subsystem *subsys; 1100 struct configfs_subsystem *subsys;
1036 struct configfs_dirent *sd; 1101 struct configfs_dirent *sd;
1037 struct config_item_type *type; 1102 struct config_item_type *type;
1038 struct module *owner = NULL; 1103 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1039 char *name; 1104 char *name;
1040 1105
1041 if (dentry->d_parent == configfs_sb->s_root) { 1106 if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1044 } 1109 }
1045 1110
1046 sd = dentry->d_parent->d_fsdata; 1111 sd = dentry->d_parent->d_fsdata;
1112
1113 /*
1114 * Fake invisibility if dir belongs to a group/default groups hierarchy
1115 * being attached
1116 */
1117 if (!configfs_dirent_is_ready(sd)) {
1118 ret = -ENOENT;
1119 goto out;
1120 }
1121
1047 if (!(sd->s_type & CONFIGFS_USET_DIR)) { 1122 if (!(sd->s_type & CONFIGFS_USET_DIR)) {
1048 ret = -EPERM; 1123 ret = -EPERM;
1049 goto out; 1124 goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1062 goto out_put; 1137 goto out_put;
1063 } 1138 }
1064 1139
1140 /*
1141 * The subsystem may belong to a different module than the item
1142 * being created. We don't want to safely pin the new item but
1143 * fail to pin the subsystem it sits under.
1144 */
1145 if (!subsys->su_group.cg_item.ci_type) {
1146 ret = -EINVAL;
1147 goto out_put;
1148 }
1149 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1150 if (!try_module_get(subsys_owner)) {
1151 ret = -EINVAL;
1152 goto out_put;
1153 }
1154
1065 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); 1155 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
1066 if (!name) { 1156 if (!name) {
1067 ret = -ENOMEM; 1157 ret = -ENOMEM;
1068 goto out_put; 1158 goto out_subsys_put;
1069 } 1159 }
1070 1160
1071 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1161 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1094 kfree(name); 1184 kfree(name);
1095 if (ret) { 1185 if (ret) {
1096 /* 1186 /*
1097 * If item == NULL, then link_obj() was never called. 1187 * If ret != 0, then link_obj() was never called.
1098 * There are no extra references to clean up. 1188 * There are no extra references to clean up.
1099 */ 1189 */
1100 goto out_put; 1190 goto out_subsys_put;
1101 } 1191 }
1102 1192
1103 /* 1193 /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1111 goto out_unlink; 1201 goto out_unlink;
1112 } 1202 }
1113 1203
1114 owner = type->ct_owner; 1204 new_item_owner = type->ct_owner;
1115 if (!try_module_get(owner)) { 1205 if (!try_module_get(new_item_owner)) {
1116 ret = -EINVAL; 1206 ret = -EINVAL;
1117 goto out_unlink; 1207 goto out_unlink;
1118 } 1208 }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1142 1232
1143 spin_lock(&configfs_dirent_lock); 1233 spin_lock(&configfs_dirent_lock);
1144 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; 1234 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
1235 if (!ret)
1236 configfs_dir_set_ready(dentry->d_fsdata);
1145 spin_unlock(&configfs_dirent_lock); 1237 spin_unlock(&configfs_dirent_lock);
1146 1238
1147out_unlink: 1239out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
1159 mutex_unlock(&subsys->su_mutex); 1251 mutex_unlock(&subsys->su_mutex);
1160 1252
1161 if (module_got) 1253 if (module_got)
1162 module_put(owner); 1254 module_put(new_item_owner);
1163 } 1255 }
1164 1256
1257out_subsys_put:
1258 if (ret)
1259 module_put(subsys_owner);
1260
1165out_put: 1261out_put:
1166 /* 1262 /*
1167 * link_obj()/link_group() took a reference from child->parent, 1263 * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1180 struct config_item *item; 1276 struct config_item *item;
1181 struct configfs_subsystem *subsys; 1277 struct configfs_subsystem *subsys;
1182 struct configfs_dirent *sd; 1278 struct configfs_dirent *sd;
1183 struct module *owner = NULL; 1279 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1184 int ret; 1280 int ret;
1185 1281
1186 if (dentry->d_parent == configfs_sb->s_root) 1282 if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1207 return -EINVAL; 1303 return -EINVAL;
1208 } 1304 }
1209 1305
1306 /* configfs_mkdir() shouldn't have allowed this */
1307 BUG_ON(!subsys->su_group.cg_item.ci_type);
1308 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1309
1310 /*
1311 * Ensure that no racing symlink() will make detach_prep() fail while
1312 * the new link is temporarily attached
1313 */
1314 mutex_lock(&configfs_symlink_mutex);
1210 spin_lock(&configfs_dirent_lock); 1315 spin_lock(&configfs_dirent_lock);
1211 do { 1316 do {
1212 struct mutex *wait_mutex; 1317 struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1215 if (ret) { 1320 if (ret) {
1216 configfs_detach_rollback(dentry); 1321 configfs_detach_rollback(dentry);
1217 spin_unlock(&configfs_dirent_lock); 1322 spin_unlock(&configfs_dirent_lock);
1323 mutex_unlock(&configfs_symlink_mutex);
1218 if (ret != -EAGAIN) { 1324 if (ret != -EAGAIN) {
1219 config_item_put(parent_item); 1325 config_item_put(parent_item);
1220 return ret; 1326 return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1224 mutex_lock(wait_mutex); 1330 mutex_lock(wait_mutex);
1225 mutex_unlock(wait_mutex); 1331 mutex_unlock(wait_mutex);
1226 1332
1333 mutex_lock(&configfs_symlink_mutex);
1227 spin_lock(&configfs_dirent_lock); 1334 spin_lock(&configfs_dirent_lock);
1228 } 1335 }
1229 } while (ret == -EAGAIN); 1336 } while (ret == -EAGAIN);
1230 spin_unlock(&configfs_dirent_lock); 1337 spin_unlock(&configfs_dirent_lock);
1338 mutex_unlock(&configfs_symlink_mutex);
1231 1339
1232 /* Get a working ref for the duration of this function */ 1340 /* Get a working ref for the duration of this function */
1233 item = configfs_get_config_item(dentry); 1341 item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1236 config_item_put(parent_item); 1344 config_item_put(parent_item);
1237 1345
1238 if (item->ci_type) 1346 if (item->ci_type)
1239 owner = item->ci_type->ct_owner; 1347 dead_item_owner = item->ci_type->ct_owner;
1240 1348
1241 if (sd->s_type & CONFIGFS_USET_DIR) { 1349 if (sd->s_type & CONFIGFS_USET_DIR) {
1242 configfs_detach_group(item); 1350 configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1258 /* Drop our reference from above */ 1366 /* Drop our reference from above */
1259 config_item_put(item); 1367 config_item_put(item);
1260 1368
1261 module_put(owner); 1369 module_put(dead_item_owner);
1370 module_put(subsys_owner);
1262 1371
1263 return 0; 1372 return 0;
1264} 1373}
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
1314{ 1423{
1315 struct dentry * dentry = file->f_path.dentry; 1424 struct dentry * dentry = file->f_path.dentry;
1316 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1425 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1426 int err;
1317 1427
1318 mutex_lock(&dentry->d_inode->i_mutex); 1428 mutex_lock(&dentry->d_inode->i_mutex);
1319 file->private_data = configfs_new_dirent(parent_sd, NULL); 1429 /*
1430 * Fake invisibility if dir belongs to a group/default groups hierarchy
1431 * being attached
1432 */
1433 err = -ENOENT;
1434 if (configfs_dirent_is_ready(parent_sd)) {
1435 file->private_data = configfs_new_dirent(parent_sd, NULL);
1436 if (IS_ERR(file->private_data))
1437 err = PTR_ERR(file->private_data);
1438 else
1439 err = 0;
1440 }
1320 mutex_unlock(&dentry->d_inode->i_mutex); 1441 mutex_unlock(&dentry->d_inode->i_mutex);
1321 1442
1322 return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; 1443 return err;
1323
1324} 1444}
1325 1445
1326static int configfs_dir_close(struct inode *inode, struct file *file) 1446static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1491 if (err) { 1611 if (err) {
1492 d_delete(dentry); 1612 d_delete(dentry);
1493 dput(dentry); 1613 dput(dentry);
1614 } else {
1615 spin_lock(&configfs_dirent_lock);
1616 configfs_dir_set_ready(dentry->d_fsdata);
1617 spin_unlock(&configfs_dirent_lock);
1494 } 1618 }
1495 } 1619 }
1496 1620
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1517 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1641 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
1518 I_MUTEX_PARENT); 1642 I_MUTEX_PARENT);
1519 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1643 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1644 mutex_lock(&configfs_symlink_mutex);
1520 spin_lock(&configfs_dirent_lock); 1645 spin_lock(&configfs_dirent_lock);
1521 if (configfs_detach_prep(dentry, NULL)) { 1646 if (configfs_detach_prep(dentry, NULL)) {
1522 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); 1647 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
1523 } 1648 }
1524 spin_unlock(&configfs_dirent_lock); 1649 spin_unlock(&configfs_dirent_lock);
1650 mutex_unlock(&configfs_symlink_mutex);
1525 configfs_detach_group(&group->cg_item); 1651 configfs_detach_group(&group->cg_item);
1526 dentry->d_inode->i_flags |= S_DEAD; 1652 dentry->d_inode->i_flags |= S_DEAD;
1527 mutex_unlock(&dentry->d_inode->i_mutex); 1653 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40a..bf74973b049 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
31#include <linux/configfs.h> 31#include <linux/configfs.h>
32#include "configfs_internal.h" 32#include "configfs_internal.h"
33 33
34/* Protects attachments of new symlinks */
35DEFINE_MUTEX(configfs_symlink_mutex);
36
34static int item_depth(struct config_item * item) 37static int item_depth(struct config_item * item)
35{ 38{
36 struct config_item * p = item; 39 struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
73 struct configfs_symlink *sl; 76 struct configfs_symlink *sl;
74 int ret; 77 int ret;
75 78
79 ret = -ENOENT;
80 if (!configfs_dirent_is_ready(target_sd))
81 goto out;
76 ret = -ENOMEM; 82 ret = -ENOMEM;
77 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); 83 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
78 if (sl) { 84 if (sl) {
79 sl->sl_target = config_item_get(item); 85 sl->sl_target = config_item_get(item);
80 spin_lock(&configfs_dirent_lock); 86 spin_lock(&configfs_dirent_lock);
87 if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
88 spin_unlock(&configfs_dirent_lock);
89 config_item_put(item);
90 kfree(sl);
91 return -ENOENT;
92 }
81 list_add(&sl->sl_list, &target_sd->s_links); 93 list_add(&sl->sl_list, &target_sd->s_links);
82 spin_unlock(&configfs_dirent_lock); 94 spin_unlock(&configfs_dirent_lock);
83 ret = configfs_create_link(sl, parent_item->ci_dentry, 95 ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
91 } 103 }
92 } 104 }
93 105
106out:
94 return ret; 107 return ret;
95} 108}
96 109
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
120{ 133{
121 int ret; 134 int ret;
122 struct nameidata nd; 135 struct nameidata nd;
136 struct configfs_dirent *sd;
123 struct config_item *parent_item; 137 struct config_item *parent_item;
124 struct config_item *target_item; 138 struct config_item *target_item;
125 struct config_item_type *type; 139 struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
128 if (dentry->d_parent == configfs_sb->s_root) 142 if (dentry->d_parent == configfs_sb->s_root)
129 goto out; 143 goto out;
130 144
145 sd = dentry->d_parent->d_fsdata;
146 /*
147 * Fake invisibility if dir belongs to a group/default groups hierarchy
148 * being attached
149 */
150 ret = -ENOENT;
151 if (!configfs_dirent_is_ready(sd))
152 goto out;
153
131 parent_item = configfs_get_config_item(dentry->d_parent); 154 parent_item = configfs_get_config_item(dentry->d_parent);
132 type = parent_item->ci_type; 155 type = parent_item->ci_type;
133 156
157 ret = -EPERM;
134 if (!type || !type->ct_item_ops || 158 if (!type || !type->ct_item_ops ||
135 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
136 goto out_put; 160 goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 165
142 ret = type->ct_item_ops->allow_link(parent_item, target_item); 166 ret = type->ct_item_ops->allow_link(parent_item, target_item);
143 if (!ret) { 167 if (!ret) {
168 mutex_lock(&configfs_symlink_mutex);
144 ret = create_link(parent_item, target_item, dentry); 169 ret = create_link(parent_item, target_item, dentry);
170 mutex_unlock(&configfs_symlink_mutex);
145 if (ret && type->ct_item_ops->drop_link) 171 if (ret && type->ct_item_ops->drop_link)
146 type->ct_item_ops->drop_link(parent_item, 172 type->ct_item_ops->drop_link(parent_item,
147 target_item); 173 target_item);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06..488eb424f66 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
29#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
30 30
31extern int pty_limit; /* Config limit on Unix98 ptys */ 31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys); 32static DEFINE_IDA(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock); 33static DEFINE_MUTEX(allocated_ptys_lock);
34 34
35static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
180int devpts_new_index(void) 180int devpts_new_index(void)
181{ 181{
182 int index; 182 int index;
183 int idr_ret; 183 int ida_ret;
184 184
185retry: 185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { 186 if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM; 187 return -ENOMEM;
188 } 188 }
189 189
190 mutex_lock(&allocated_ptys_lock); 190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index); 191 ida_ret = ida_get_new(&allocated_ptys, &index);
192 if (idr_ret < 0) { 192 if (ida_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock); 193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN) 194 if (ida_ret == -EAGAIN)
195 goto retry; 195 goto retry;
196 return -EIO; 196 return -EIO;
197 } 197 }
198 198
199 if (index >= pty_limit) { 199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index); 200 ida_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock); 201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO; 202 return -EIO;
203 } 203 }
@@ -208,7 +208,7 @@ retry:
208void devpts_kill_index(int idx) 208void devpts_kill_index(int idx)
209{ 209{
210 mutex_lock(&allocated_ptys_lock); 210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx); 211 ida_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock); 212 mutex_unlock(&allocated_ptys_lock);
213} 213}
214 214
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 2d3d1027ce2..724ddac9153 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -363,6 +363,7 @@ static int search_rsb_list(struct list_head *head, char *name, int len,
363 if (len == r->res_length && !memcmp(name, r->res_name, len)) 363 if (len == r->res_length && !memcmp(name, r->res_name, len))
364 goto found; 364 goto found;
365 } 365 }
366 *r_ret = NULL;
366 return -EBADR; 367 return -EBADR;
367 368
368 found: 369 found:
@@ -1782,7 +1783,8 @@ static void grant_pending_locks(struct dlm_rsb *r)
1782 1783
1783 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { 1784 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
1784 if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) { 1785 if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
1785 if (cw && high == DLM_LOCK_PR) 1786 if (cw && high == DLM_LOCK_PR &&
1787 lkb->lkb_grmode == DLM_LOCK_PR)
1786 queue_bast(r, lkb, DLM_LOCK_CW); 1788 queue_bast(r, lkb, DLM_LOCK_CW);
1787 else 1789 else
1788 queue_bast(r, lkb, high); 1790 queue_bast(r, lkb, high);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 637018c891e..3962262f991 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -891,8 +891,10 @@ static void tcp_connect_to_sock(struct connection *con)
891 goto out_err; 891 goto out_err;
892 892
893 memset(&saddr, 0, sizeof(saddr)); 893 memset(&saddr, 0, sizeof(saddr));
894 if (dlm_nodeid_to_addr(con->nodeid, &saddr)) 894 if (dlm_nodeid_to_addr(con->nodeid, &saddr)) {
895 sock_release(sock);
895 goto out_err; 896 goto out_err;
897 }
896 898
897 sock->sk->sk_user_data = con; 899 sock->sk->sk_user_data = con;
898 con->rx_action = receive_from_sock; 900 con->rx_action = receive_from_sock;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index f976f303c19..929e48ae759 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -539,7 +539,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
539 539
540 /* do we really need this? can a write happen after a close? */ 540 /* do we really need this? can a write happen after a close? */
541 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && 541 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
542 test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) 542 (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)))
543 return -EINVAL; 543 return -EINVAL;
544 544
545 sigfillset(&allsigs); 545 sigfillset(&allsigs);
diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74c..8ec4d6cc763 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
1793 return ret; 1793 return ret;
1794} 1794}
1795 1795
1796int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
1797 struct path *path)
1798{
1799 int error = security_quota_on(path->dentry);
1800 if (error)
1801 return error;
1802 /* Quota file not on the same filesystem? */
1803 if (path->mnt->mnt_sb != sb)
1804 error = -EXDEV;
1805 else
1806 error = vfs_quota_on_inode(path->dentry->d_inode, type,
1807 format_id);
1808 return error;
1809}
1810
1796/* Actual function called from quotactl() */ 1811/* Actual function called from quotactl() */
1797int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, 1812int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1798 int remount) 1813 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1804 return vfs_quota_on_remount(sb, type); 1819 return vfs_quota_on_remount(sb, type);
1805 1820
1806 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1821 error = path_lookup(path, LOOKUP_FOLLOW, &nd);
1807 if (error < 0) 1822 if (!error) {
1808 return error; 1823 error = vfs_quota_on_path(sb, type, format_id, &nd.path);
1809 error = security_quota_on(nd.path.dentry); 1824 path_put(&nd.path);
1810 if (error) 1825 }
1811 goto out_path;
1812 /* Quota file not on the same filesystem? */
1813 if (nd.path.mnt->mnt_sb != sb)
1814 error = -EXDEV;
1815 else
1816 error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
1817 format_id);
1818out_path:
1819 path_put(&nd.path);
1820 return error; 1826 return error;
1821} 1827}
1822 1828
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
2185EXPORT_SYMBOL(dqstats); 2191EXPORT_SYMBOL(dqstats);
2186EXPORT_SYMBOL(dq_data_lock); 2192EXPORT_SYMBOL(dq_data_lock);
2187EXPORT_SYMBOL(vfs_quota_on); 2193EXPORT_SYMBOL(vfs_quota_on);
2194EXPORT_SYMBOL(vfs_quota_on_path);
2188EXPORT_SYMBOL(vfs_quota_on_mount); 2195EXPORT_SYMBOL(vfs_quota_on_mount);
2189EXPORT_SYMBOL(vfs_quota_off); 2196EXPORT_SYMBOL(vfs_quota_off);
2190EXPORT_SYMBOL(vfs_quota_sync); 2197EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7b99917ffad..06db79d05c1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page)
475{ 475{
476 struct inode *ecryptfs_inode; 476 struct inode *ecryptfs_inode;
477 struct ecryptfs_crypt_stat *crypt_stat; 477 struct ecryptfs_crypt_stat *crypt_stat;
478 char *enc_extent_virt = NULL; 478 char *enc_extent_virt;
479 struct page *enc_extent_page; 479 struct page *enc_extent_page = NULL;
480 loff_t extent_offset; 480 loff_t extent_offset;
481 int rc = 0; 481 int rc = 0;
482 482
@@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page)
492 page->index); 492 page->index);
493 goto out; 493 goto out;
494 } 494 }
495 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 495 enc_extent_page = alloc_page(GFP_USER);
496 if (!enc_extent_virt) { 496 if (!enc_extent_page) {
497 rc = -ENOMEM; 497 rc = -ENOMEM;
498 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 498 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
499 "encrypted extent\n"); 499 "encrypted extent\n");
500 goto out; 500 goto out;
501 } 501 }
502 enc_extent_page = virt_to_page(enc_extent_virt); 502 enc_extent_virt = kmap(enc_extent_page);
503 for (extent_offset = 0; 503 for (extent_offset = 0;
504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
505 extent_offset++) { 505 extent_offset++) {
@@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page)
527 } 527 }
528 } 528 }
529out: 529out:
530 kfree(enc_extent_virt); 530 if (enc_extent_page) {
531 kunmap(enc_extent_page);
532 __free_page(enc_extent_page);
533 }
531 return rc; 534 return rc;
532} 535}
533 536
@@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page)
609{ 612{
610 struct inode *ecryptfs_inode; 613 struct inode *ecryptfs_inode;
611 struct ecryptfs_crypt_stat *crypt_stat; 614 struct ecryptfs_crypt_stat *crypt_stat;
612 char *enc_extent_virt = NULL; 615 char *enc_extent_virt;
613 struct page *enc_extent_page; 616 struct page *enc_extent_page = NULL;
614 unsigned long extent_offset; 617 unsigned long extent_offset;
615 int rc = 0; 618 int rc = 0;
616 619
@@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page)
627 page->index); 630 page->index);
628 goto out; 631 goto out;
629 } 632 }
630 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 633 enc_extent_page = alloc_page(GFP_USER);
631 if (!enc_extent_virt) { 634 if (!enc_extent_page) {
632 rc = -ENOMEM; 635 rc = -ENOMEM;
633 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 636 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
634 "encrypted extent\n"); 637 "encrypted extent\n");
635 goto out; 638 goto out;
636 } 639 }
637 enc_extent_page = virt_to_page(enc_extent_virt); 640 enc_extent_virt = kmap(enc_extent_page);
638 for (extent_offset = 0; 641 for (extent_offset = 0;
639 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 642 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
640 extent_offset++) { 643 extent_offset++) {
@@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page)
662 } 665 }
663 } 666 }
664out: 667out:
665 kfree(enc_extent_virt); 668 if (enc_extent_page) {
669 kunmap(enc_extent_page);
670 __free_page(enc_extent_page);
671 }
666 return rc; 672 return rc;
667} 673}
668 674
diff --git a/fs/exec.c b/fs/exec.c
index 9696bbf0f0b..32993beecbe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,7 @@
32#include <linux/swap.h> 32#include <linux/swap.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/pagemap.h>
35#include <linux/highmem.h> 36#include <linux/highmem.h>
36#include <linux/spinlock.h> 37#include <linux/spinlock.h>
37#include <linux/key.h> 38#include <linux/key.h>
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd7..991d6dfeb51 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
791 .direct_IO = ext2_direct_IO, 791 .direct_IO = ext2_direct_IO,
792 .writepages = ext2_writepages, 792 .writepages = ext2_writepages,
793 .migratepage = buffer_migrate_page, 793 .migratepage = buffer_migrate_page,
794 .is_partially_uptodate = block_is_partially_uptodate,
794}; 795};
795 796
796const struct address_space_operations ext2_aops_xip = { 797const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3bf07d70b91..507d8689b11 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
1767} 1767}
1768 1768
1769static const struct address_space_operations ext3_ordered_aops = { 1769static const struct address_space_operations ext3_ordered_aops = {
1770 .readpage = ext3_readpage, 1770 .readpage = ext3_readpage,
1771 .readpages = ext3_readpages, 1771 .readpages = ext3_readpages,
1772 .writepage = ext3_ordered_writepage, 1772 .writepage = ext3_ordered_writepage,
1773 .sync_page = block_sync_page, 1773 .sync_page = block_sync_page,
1774 .write_begin = ext3_write_begin, 1774 .write_begin = ext3_write_begin,
1775 .write_end = ext3_ordered_write_end, 1775 .write_end = ext3_ordered_write_end,
1776 .bmap = ext3_bmap, 1776 .bmap = ext3_bmap,
1777 .invalidatepage = ext3_invalidatepage, 1777 .invalidatepage = ext3_invalidatepage,
1778 .releasepage = ext3_releasepage, 1778 .releasepage = ext3_releasepage,
1779 .direct_IO = ext3_direct_IO, 1779 .direct_IO = ext3_direct_IO,
1780 .migratepage = buffer_migrate_page, 1780 .migratepage = buffer_migrate_page,
1781 .is_partially_uptodate = block_is_partially_uptodate,
1781}; 1782};
1782 1783
1783static const struct address_space_operations ext3_writeback_aops = { 1784static const struct address_space_operations ext3_writeback_aops = {
1784 .readpage = ext3_readpage, 1785 .readpage = ext3_readpage,
1785 .readpages = ext3_readpages, 1786 .readpages = ext3_readpages,
1786 .writepage = ext3_writeback_writepage, 1787 .writepage = ext3_writeback_writepage,
1787 .sync_page = block_sync_page, 1788 .sync_page = block_sync_page,
1788 .write_begin = ext3_write_begin, 1789 .write_begin = ext3_write_begin,
1789 .write_end = ext3_writeback_write_end, 1790 .write_end = ext3_writeback_write_end,
1790 .bmap = ext3_bmap, 1791 .bmap = ext3_bmap,
1791 .invalidatepage = ext3_invalidatepage, 1792 .invalidatepage = ext3_invalidatepage,
1792 .releasepage = ext3_releasepage, 1793 .releasepage = ext3_releasepage,
1793 .direct_IO = ext3_direct_IO, 1794 .direct_IO = ext3_direct_IO,
1794 .migratepage = buffer_migrate_page, 1795 .migratepage = buffer_migrate_page,
1796 .is_partially_uptodate = block_is_partially_uptodate,
1795}; 1797};
1796 1798
1797static const struct address_space_operations ext3_journalled_aops = { 1799static const struct address_space_operations ext3_journalled_aops = {
1798 .readpage = ext3_readpage, 1800 .readpage = ext3_readpage,
1799 .readpages = ext3_readpages, 1801 .readpages = ext3_readpages,
1800 .writepage = ext3_journalled_writepage, 1802 .writepage = ext3_journalled_writepage,
1801 .sync_page = block_sync_page, 1803 .sync_page = block_sync_page,
1802 .write_begin = ext3_write_begin, 1804 .write_begin = ext3_write_begin,
1803 .write_end = ext3_journalled_write_end, 1805 .write_end = ext3_journalled_write_end,
1804 .set_page_dirty = ext3_journalled_set_page_dirty, 1806 .set_page_dirty = ext3_journalled_set_page_dirty,
1805 .bmap = ext3_bmap, 1807 .bmap = ext3_bmap,
1806 .invalidatepage = ext3_invalidatepage, 1808 .invalidatepage = ext3_invalidatepage,
1807 .releasepage = ext3_releasepage, 1809 .releasepage = ext3_releasepage,
1810 .is_partially_uptodate = block_is_partially_uptodate,
1808}; 1811};
1809 1812
1810void ext3_set_aops(struct inode *inode) 1813void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced38467..f38a5afc39a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2810 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2811 } 2811 }
2812 2812
2813 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
2813 path_put(&nd.path); 2814 path_put(&nd.path);
2814 return vfs_quota_on(sb, type, format_id, path, remount); 2815 return err;
2815} 2816}
2816 2817
2817/* Read data from quotafile - avoid pagecache and such because we cannot afford 2818/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df09..9843b046c23 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
2806} 2806}
2807 2807
2808static const struct address_space_operations ext4_ordered_aops = { 2808static const struct address_space_operations ext4_ordered_aops = {
2809 .readpage = ext4_readpage, 2809 .readpage = ext4_readpage,
2810 .readpages = ext4_readpages, 2810 .readpages = ext4_readpages,
2811 .writepage = ext4_normal_writepage, 2811 .writepage = ext4_normal_writepage,
2812 .sync_page = block_sync_page, 2812 .sync_page = block_sync_page,
2813 .write_begin = ext4_write_begin, 2813 .write_begin = ext4_write_begin,
2814 .write_end = ext4_ordered_write_end, 2814 .write_end = ext4_ordered_write_end,
2815 .bmap = ext4_bmap, 2815 .bmap = ext4_bmap,
2816 .invalidatepage = ext4_invalidatepage, 2816 .invalidatepage = ext4_invalidatepage,
2817 .releasepage = ext4_releasepage, 2817 .releasepage = ext4_releasepage,
2818 .direct_IO = ext4_direct_IO, 2818 .direct_IO = ext4_direct_IO,
2819 .migratepage = buffer_migrate_page, 2819 .migratepage = buffer_migrate_page,
2820 .is_partially_uptodate = block_is_partially_uptodate,
2820}; 2821};
2821 2822
2822static const struct address_space_operations ext4_writeback_aops = { 2823static const struct address_space_operations ext4_writeback_aops = {
2823 .readpage = ext4_readpage, 2824 .readpage = ext4_readpage,
2824 .readpages = ext4_readpages, 2825 .readpages = ext4_readpages,
2825 .writepage = ext4_normal_writepage, 2826 .writepage = ext4_normal_writepage,
2826 .sync_page = block_sync_page, 2827 .sync_page = block_sync_page,
2827 .write_begin = ext4_write_begin, 2828 .write_begin = ext4_write_begin,
2828 .write_end = ext4_writeback_write_end, 2829 .write_end = ext4_writeback_write_end,
2829 .bmap = ext4_bmap, 2830 .bmap = ext4_bmap,
2830 .invalidatepage = ext4_invalidatepage, 2831 .invalidatepage = ext4_invalidatepage,
2831 .releasepage = ext4_releasepage, 2832 .releasepage = ext4_releasepage,
2832 .direct_IO = ext4_direct_IO, 2833 .direct_IO = ext4_direct_IO,
2833 .migratepage = buffer_migrate_page, 2834 .migratepage = buffer_migrate_page,
2835 .is_partially_uptodate = block_is_partially_uptodate,
2834}; 2836};
2835 2837
2836static const struct address_space_operations ext4_journalled_aops = { 2838static const struct address_space_operations ext4_journalled_aops = {
2837 .readpage = ext4_readpage, 2839 .readpage = ext4_readpage,
2838 .readpages = ext4_readpages, 2840 .readpages = ext4_readpages,
2839 .writepage = ext4_journalled_writepage, 2841 .writepage = ext4_journalled_writepage,
2840 .sync_page = block_sync_page, 2842 .sync_page = block_sync_page,
2841 .write_begin = ext4_write_begin, 2843 .write_begin = ext4_write_begin,
2842 .write_end = ext4_journalled_write_end, 2844 .write_end = ext4_journalled_write_end,
2843 .set_page_dirty = ext4_journalled_set_page_dirty, 2845 .set_page_dirty = ext4_journalled_set_page_dirty,
2844 .bmap = ext4_bmap, 2846 .bmap = ext4_bmap,
2845 .invalidatepage = ext4_invalidatepage, 2847 .invalidatepage = ext4_invalidatepage,
2846 .releasepage = ext4_releasepage, 2848 .releasepage = ext4_releasepage,
2849 .is_partially_uptodate = block_is_partially_uptodate,
2847}; 2850};
2848 2851
2849static const struct address_space_operations ext4_da_aops = { 2852static const struct address_space_operations ext4_da_aops = {
2850 .readpage = ext4_readpage, 2853 .readpage = ext4_readpage,
2851 .readpages = ext4_readpages, 2854 .readpages = ext4_readpages,
2852 .writepage = ext4_da_writepage, 2855 .writepage = ext4_da_writepage,
2853 .writepages = ext4_da_writepages, 2856 .writepages = ext4_da_writepages,
2854 .sync_page = block_sync_page, 2857 .sync_page = block_sync_page,
2855 .write_begin = ext4_da_write_begin, 2858 .write_begin = ext4_da_write_begin,
2856 .write_end = ext4_da_write_end, 2859 .write_end = ext4_da_write_end,
2857 .bmap = ext4_bmap, 2860 .bmap = ext4_bmap,
2858 .invalidatepage = ext4_da_invalidatepage, 2861 .invalidatepage = ext4_da_invalidatepage,
2859 .releasepage = ext4_releasepage, 2862 .releasepage = ext4_releasepage,
2860 .direct_IO = ext4_direct_IO, 2863 .direct_IO = ext4_direct_IO,
2861 .migratepage = buffer_migrate_page, 2864 .migratepage = buffer_migrate_page,
2865 .is_partially_uptodate = block_is_partially_uptodate,
2862}; 2866};
2863 2867
2864void ext4_set_aops(struct inode *inode) 2868void ext4_set_aops(struct inode *inode)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff1..1e69f29a8c5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3352,8 +3352,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3352 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3352 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3353 } 3353 }
3354 3354
3355 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
3355 path_put(&nd.path); 3356 path_put(&nd.path);
3356 return vfs_quota_on(sb, type, format_id, path, remount); 3357 return err;
3357} 3358}
3358 3359
3359/* Read data from quotafile - avoid pagecache and such because we cannot afford 3360/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d62513681..ac4f7db9f13 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
49 return res; 49 return res;
50} 50}
51 51
52/*
53 * locate_fd finds a free file descriptor in the open_fds fdset,
54 * expanding the fd arrays if necessary. Must be called with the
55 * file_lock held for write.
56 */
57
58static int locate_fd(unsigned int orig_start, int cloexec)
59{
60 struct files_struct *files = current->files;
61 unsigned int newfd;
62 unsigned int start;
63 int error;
64 struct fdtable *fdt;
65
66 spin_lock(&files->file_lock);
67repeat:
68 fdt = files_fdtable(files);
69 /*
70 * Someone might have closed fd's in the range
71 * orig_start..fdt->next_fd
72 */
73 start = orig_start;
74 if (start < files->next_fd)
75 start = files->next_fd;
76
77 newfd = start;
78 if (start < fdt->max_fds)
79 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
80 fdt->max_fds, start);
81
82 error = expand_files(files, newfd);
83 if (error < 0)
84 goto out;
85
86 /*
87 * If we needed to expand the fs array we
88 * might have blocked - try again.
89 */
90 if (error)
91 goto repeat;
92
93 if (start <= files->next_fd)
94 files->next_fd = newfd + 1;
95
96 FD_SET(newfd, fdt->open_fds);
97 if (cloexec)
98 FD_SET(newfd, fdt->close_on_exec);
99 else
100 FD_CLR(newfd, fdt->close_on_exec);
101 error = newfd;
102
103out:
104 spin_unlock(&files->file_lock);
105 return error;
106}
107
108static int dupfd(struct file *file, unsigned int start, int cloexec)
109{
110 int fd = locate_fd(start, cloexec);
111 if (fd >= 0)
112 fd_install(fd, file);
113 else
114 fput(file);
115
116 return fd;
117}
118
119asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) 52asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
120{ 53{
121 int err = -EBADF; 54 int err = -EBADF;
@@ -130,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
130 return -EINVAL; 63 return -EINVAL;
131 64
132 spin_lock(&files->file_lock); 65 spin_lock(&files->file_lock);
133 if (!(file = fcheck(oldfd)))
134 goto out_unlock;
135 get_file(file); /* We are now finished with oldfd */
136
137 err = expand_files(files, newfd); 66 err = expand_files(files, newfd);
67 file = fcheck(oldfd);
68 if (unlikely(!file))
69 goto Ebadf;
138 if (unlikely(err < 0)) { 70 if (unlikely(err < 0)) {
139 if (err == -EMFILE) 71 if (err == -EMFILE)
140 err = -EBADF; 72 goto Ebadf;
141 goto out_fput; 73 goto out_unlock;
142 } 74 }
143 75 /*
144 /* To avoid races with open() and dup(), we will mark the fd as 76 * We need to detect attempts to do dup2() over allocated but still
145 * in-use in the open-file bitmap throughout the entire dup2() 77 * not finished descriptor. NB: OpenBSD avoids that at the price of
146 * process. This is quite safe: do_close() uses the fd array 78 * extra work in their equivalent of fget() - they insert struct
147 * entry, not the bitmap, to decide what work needs to be 79 * file immediately after grabbing descriptor, mark it larval if
148 * done. --sct */ 80 * more work (e.g. actual opening) is needed and make sure that
149 /* Doesn't work. open() might be there first. --AV */ 81 * fget() treats larval files as absent. Potentially interesting,
150 82 * but while extra work in fget() is trivial, locking implications
151 /* Yes. It's a race. In user space. Nothing sane to do */ 83 * and amount of surgery on open()-related paths in VFS are not.
84 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
85 * deadlocks in rather amusing ways, AFAICS. All of that is out of
86 * scope of POSIX or SUS, since neither considers shared descriptor
87 * tables and this condition does not arise without those.
88 */
152 err = -EBUSY; 89 err = -EBUSY;
153 fdt = files_fdtable(files); 90 fdt = files_fdtable(files);
154 tofree = fdt->fd[newfd]; 91 tofree = fdt->fd[newfd];
155 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 92 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
156 goto out_fput; 93 goto out_unlock;
157 94 get_file(file);
158 rcu_assign_pointer(fdt->fd[newfd], file); 95 rcu_assign_pointer(fdt->fd[newfd], file);
159 FD_SET(newfd, fdt->open_fds); 96 FD_SET(newfd, fdt->open_fds);
160 if (flags & O_CLOEXEC) 97 if (flags & O_CLOEXEC)
@@ -165,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
165 102
166 if (tofree) 103 if (tofree)
167 filp_close(tofree, files); 104 filp_close(tofree, files);
168 err = newfd;
169out:
170 return err;
171out_unlock:
172 spin_unlock(&files->file_lock);
173 goto out;
174 105
175out_fput: 106 return newfd;
107
108Ebadf:
109 err = -EBADF;
110out_unlock:
176 spin_unlock(&files->file_lock); 111 spin_unlock(&files->file_lock);
177 fput(file); 112 return err;
178 goto out;
179} 113}
180 114
181asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 115asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
@@ -194,10 +128,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
194asmlinkage long sys_dup(unsigned int fildes) 128asmlinkage long sys_dup(unsigned int fildes)
195{ 129{
196 int ret = -EBADF; 130 int ret = -EBADF;
197 struct file * file = fget(fildes); 131 struct file *file = fget(fildes);
198 132
199 if (file) 133 if (file) {
200 ret = dupfd(file, 0, 0); 134 ret = get_unused_fd();
135 if (ret >= 0)
136 fd_install(ret, file);
137 else
138 fput(file);
139 }
201 return ret; 140 return ret;
202} 141}
203 142
@@ -322,8 +261,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
322 case F_DUPFD_CLOEXEC: 261 case F_DUPFD_CLOEXEC:
323 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 262 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
324 break; 263 break;
325 get_file(filp); 264 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
326 err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); 265 if (err >= 0) {
266 get_file(filp);
267 fd_install(err, filp);
268 }
327 break; 269 break;
328 case F_GETFD: 270 case F_GETFD:
329 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 271 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe4..f313314f996 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/module.h>
9#include <linux/fs.h> 10#include <linux/fs.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/time.h> 12#include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
432 }, 433 },
433 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 434 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
434}; 435};
436
437/*
438 * allocate a file descriptor, mark it busy.
439 */
440int alloc_fd(unsigned start, unsigned flags)
441{
442 struct files_struct *files = current->files;
443 unsigned int fd;
444 int error;
445 struct fdtable *fdt;
446
447 spin_lock(&files->file_lock);
448repeat:
449 fdt = files_fdtable(files);
450 fd = start;
451 if (fd < files->next_fd)
452 fd = files->next_fd;
453
454 if (fd < fdt->max_fds)
455 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
456 fdt->max_fds, fd);
457
458 error = expand_files(files, fd);
459 if (error < 0)
460 goto out;
461
462 /*
463 * If we needed to expand the fs array we
464 * might have blocked - try again.
465 */
466 if (error)
467 goto repeat;
468
469 if (start <= files->next_fd)
470 files->next_fd = fd + 1;
471
472 FD_SET(fd, fdt->open_fds);
473 if (flags & O_CLOEXEC)
474 FD_SET(fd, fdt->close_on_exec);
475 else
476 FD_CLR(fd, fdt->close_on_exec);
477 error = fd;
478#if 1
479 /* Sanity check */
480 if (rcu_dereference(fdt->fd[fd]) != NULL) {
481 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
482 rcu_assign_pointer(fdt->fd[fd], NULL);
483 }
484#endif
485
486out:
487 spin_unlock(&files->file_lock);
488 return error;
489}
490
491int get_unused_fd(void)
492{
493 return alloc_fd(0, 0);
494}
495EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ad..6caf1e1ee26 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
23 23
24int jffs2_sum_init(struct jffs2_sb_info *c) 24int jffs2_sum_init(struct jffs2_sb_info *c)
25{ 25{
26 uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
27
26 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
27 29
28 if (!c->summary) { 30 if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
30 return -ENOMEM; 32 return -ENOMEM;
31 } 33 }
32 34
33 c->summary->sum_buf = vmalloc(c->sector_size); 35 c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
34 36
35 if (!c->summary->sum_buf) { 37 if (!c->summary->sum_buf) {
36 JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n"); 38 JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
49 51
50 jffs2_sum_disable_collecting(c->summary); 52 jffs2_sum_disable_collecting(c->summary);
51 53
52 vfree(c->summary->sum_buf); 54 kfree(c->summary->sum_buf);
53 c->summary->sum_buf = NULL; 55 c->summary->sum_buf = NULL;
54 56
55 kfree(c->summary); 57 kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
665/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */ 667/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
666 668
667static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 669static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
668 uint32_t infosize, uint32_t datasize, int padsize) 670 uint32_t infosize, uint32_t datasize, int padsize)
669{ 671{
670 struct jffs2_raw_summary isum; 672 struct jffs2_raw_summary isum;
671 union jffs2_sum_mem *temp; 673 union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
676 int ret; 678 int ret;
677 size_t retlen; 679 size_t retlen;
678 680
681 if (padsize + datasize > MAX_SUMMARY_SIZE) {
682 /* It won't fit in the buffer. Abort summary for this jeb */
683 jffs2_sum_disable_collecting(c->summary);
684
685 JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
686 datasize, padsize, jeb->offset);
687 /* Non-fatal */
688 return 0;
689 }
690 /* Is there enough space for summary? */
691 if (padsize < 0) {
692 /* don't try to write out summary for this jeb */
693 jffs2_sum_disable_collecting(c->summary);
694
695 JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
696 padsize);
697 /* Non-fatal */
698 return 0;
699 }
700
679 memset(c->summary->sum_buf, 0xff, datasize); 701 memset(c->summary->sum_buf, 0xff, datasize);
680 memset(&isum, 0, sizeof(isum)); 702 memset(&isum, 0, sizeof(isum));
681 703
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
821{ 843{
822 int datasize, infosize, padsize; 844 int datasize, infosize, padsize;
823 struct jffs2_eraseblock *jeb; 845 struct jffs2_eraseblock *jeb;
824 int ret; 846 int ret = 0;
825 847
826 dbg_summary("called\n"); 848 dbg_summary("called\n");
827 849
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
841 infosize += padsize; 863 infosize += padsize;
842 datasize += padsize; 864 datasize += padsize;
843 865
844 /* Is there enough space for summary? */
845 if (padsize < 0) {
846 /* don't try to write out summary for this jeb */
847 jffs2_sum_disable_collecting(c->summary);
848
849 JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
850 spin_lock(&c->erase_completion_lock);
851 return 0;
852 }
853
854 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); 866 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
855 spin_lock(&c->erase_completion_lock); 867 spin_lock(&c->erase_completion_lock);
856 return ret; 868 return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5c..60207a2ae95 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
13#ifndef JFFS2_SUMMARY_H 13#ifndef JFFS2_SUMMARY_H
14#define JFFS2_SUMMARY_H 14#define JFFS2_SUMMARY_H
15 15
16/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
17 is larger than that, we have to just ditch it and avoid using summary
18 for the eraseblock in question... and it probably doesn't hurt us much
19 anyway. */
20#define MAX_SUMMARY_SIZE 65536
21
16#include <linux/uio.h> 22#include <linux/uio.h>
17#include <linux/jffs2.h> 23#include <linux/jffs2.h>
18 24
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cd..1add676a19d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
216 216
217 s->s_flags = MS_NOUSER; 217 s->s_flags = MS_NOUSER;
218 s->s_maxbytes = ~0ULL; 218 s->s_maxbytes = ~0ULL;
219 s->s_blocksize = 1024; 219 s->s_blocksize = PAGE_SIZE;
220 s->s_blocksize_bits = 10; 220 s->s_blocksize_bits = PAGE_SHIFT;
221 s->s_magic = magic; 221 s->s_magic = magic;
222 s->s_op = ops ? ops : &simple_super_operations; 222 s->s_op = ops ? ops : &simple_super_operations;
223 s->s_time_gran = 1; 223 s->s_time_gran = 1;
diff --git a/fs/namei.c b/fs/namei.c
index a7b0a0b8012..4ea63ed5e79 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -274,7 +274,7 @@ int inode_permission(struct inode *inode, int mask)
274 return retval; 274 return retval;
275 275
276 return security_inode_permission(inode, 276 return security_inode_permission(inode,
277 mask & (MAY_READ|MAY_WRITE|MAY_EXEC)); 277 mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
278} 278}
279 279
280/** 280/**
@@ -1431,8 +1431,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1431 * 3. We should have write and exec permissions on dir 1431 * 3. We should have write and exec permissions on dir
1432 * 4. We can't do it if dir is immutable (done in permission()) 1432 * 4. We can't do it if dir is immutable (done in permission())
1433 */ 1433 */
1434static inline int may_create(struct inode *dir, struct dentry *child, 1434static inline int may_create(struct inode *dir, struct dentry *child)
1435 struct nameidata *nd)
1436{ 1435{
1437 if (child->d_inode) 1436 if (child->d_inode)
1438 return -EEXIST; 1437 return -EEXIST;
@@ -1504,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
1504int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1503int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1505 struct nameidata *nd) 1504 struct nameidata *nd)
1506{ 1505{
1507 int error = may_create(dir, dentry, nd); 1506 int error = may_create(dir, dentry);
1508 1507
1509 if (error) 1508 if (error)
1510 return error; 1509 return error;
@@ -1948,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
1948 1947
1949int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1948int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1950{ 1949{
1951 int error = may_create(dir, dentry, NULL); 1950 int error = may_create(dir, dentry);
1952 1951
1953 if (error) 1952 if (error)
1954 return error; 1953 return error;
@@ -2049,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
2049 2048
2050int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2049int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2051{ 2050{
2052 int error = may_create(dir, dentry, NULL); 2051 int error = may_create(dir, dentry);
2053 2052
2054 if (error) 2053 if (error)
2055 return error; 2054 return error;
@@ -2316,7 +2315,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
2316 2315
2317int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) 2316int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2318{ 2317{
2319 int error = may_create(dir, dentry, NULL); 2318 int error = may_create(dir, dentry);
2320 2319
2321 if (error) 2320 if (error)
2322 return error; 2321 return error;
@@ -2386,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2386 if (!inode) 2385 if (!inode)
2387 return -ENOENT; 2386 return -ENOENT;
2388 2387
2389 error = may_create(dir, new_dentry, NULL); 2388 error = may_create(dir, new_dentry);
2390 if (error) 2389 if (error)
2391 return error; 2390 return error;
2392 2391
@@ -2595,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2595 return error; 2594 return error;
2596 2595
2597 if (!new_dentry->d_inode) 2596 if (!new_dentry->d_inode)
2598 error = may_create(new_dir, new_dentry, NULL); 2597 error = may_create(new_dir, new_dentry);
2599 else 2598 else
2600 error = may_delete(new_dir, new_dentry, is_dir); 2599 error = may_delete(new_dir, new_dentry, is_dir);
2601 if (error) 2600 if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8b..6e283c93b50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
1667 if (IS_ERR(mnt)) 1667 if (IS_ERR(mnt))
1668 return PTR_ERR(mnt); 1668 return PTR_ERR(mnt);
1669 1669
1670 return do_add_mount(mnt, nd, mnt_flags, NULL); 1670 return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
1671} 1671}
1672 1672
1673/* 1673/*
1674 * add a mount into a namespace's mount tree 1674 * add a mount into a namespace's mount tree
1675 * - provide the option of adding the new mount to an expiration list 1675 * - provide the option of adding the new mount to an expiration list
1676 */ 1676 */
1677int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, 1677int do_add_mount(struct vfsmount *newmnt, struct path *path,
1678 int mnt_flags, struct list_head *fslist) 1678 int mnt_flags, struct list_head *fslist)
1679{ 1679{
1680 int err; 1680 int err;
1681 1681
1682 down_write(&namespace_sem); 1682 down_write(&namespace_sem);
1683 /* Something was mounted here while we slept */ 1683 /* Something was mounted here while we slept */
1684 while (d_mountpoint(nd->path.dentry) && 1684 while (d_mountpoint(path->dentry) &&
1685 follow_down(&nd->path.mnt, &nd->path.dentry)) 1685 follow_down(&path->mnt, &path->dentry))
1686 ; 1686 ;
1687 err = -EINVAL; 1687 err = -EINVAL;
1688 if (!check_mnt(nd->path.mnt)) 1688 if (!check_mnt(path->mnt))
1689 goto unlock; 1689 goto unlock;
1690 1690
1691 /* Refuse the same filesystem on the same mount point */ 1691 /* Refuse the same filesystem on the same mount point */
1692 err = -EBUSY; 1692 err = -EBUSY;
1693 if (nd->path.mnt->mnt_sb == newmnt->mnt_sb && 1693 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1694 nd->path.mnt->mnt_root == nd->path.dentry) 1694 path->mnt->mnt_root == path->dentry)
1695 goto unlock; 1695 goto unlock;
1696 1696
1697 err = -EINVAL; 1697 err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1699 goto unlock; 1699 goto unlock;
1700 1700
1701 newmnt->mnt_flags = mnt_flags; 1701 newmnt->mnt_flags = mnt_flags;
1702 if ((err = graft_tree(newmnt, &nd->path))) 1702 if ((err = graft_tree(newmnt, path)))
1703 goto unlock; 1703 goto unlock;
1704 1704
1705 if (fslist) /* add to the specified expiration list */ 1705 if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef7639..66df08dd1ca 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
129 goto out_err; 129 goto out_err;
130 130
131 mntget(mnt); 131 mntget(mnt);
132 err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, 132 err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
133 &nfs_automount_list); 133 &nfs_automount_list);
134 if (err < 0) { 134 if (err < 0) {
135 mntput(mnt); 135 mntput(mnt);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6..506c24fb507 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
1073 for(i = 0; i < wc->w_num_pages; i++) { 1073 for(i = 0; i < wc->w_num_pages; i++) {
1074 tmppage = wc->w_pages[i]; 1074 tmppage = wc->w_pages[i];
1075 1075
1076 if (ocfs2_should_order_data(inode)) 1076 if (page_has_buffers(tmppage)) {
1077 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1077 if (ocfs2_should_order_data(inode))
1078 from, to, NULL, 1078 walk_page_buffers(wc->w_handle,
1079 ocfs2_journal_dirty_data); 1079 page_buffers(tmppage),
1080 1080 from, to, NULL,
1081 block_commit_write(tmppage, from, to); 1081 ocfs2_journal_dirty_data);
1082
1083 block_commit_write(tmppage, from, to);
1084 }
1082 } 1085 }
1083} 1086}
1084 1087
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1901 to = PAGE_CACHE_SIZE; 1904 to = PAGE_CACHE_SIZE;
1902 } 1905 }
1903 1906
1904 if (ocfs2_should_order_data(inode)) 1907 if (page_has_buffers(tmppage)) {
1905 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1908 if (ocfs2_should_order_data(inode))
1906 from, to, NULL, 1909 walk_page_buffers(wc->w_handle,
1907 ocfs2_journal_dirty_data); 1910 page_buffers(tmppage),
1908 1911 from, to, NULL,
1909 block_commit_write(tmppage, from, to); 1912 ocfs2_journal_dirty_data);
1913 block_commit_write(tmppage, from, to);
1914 }
1910 } 1915 }
1911 1916
1912out_write_size: 1917out_write_size:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1..ec2ed15c3da 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
1766out_rw_unlock: 1766out_rw_unlock:
1767 ocfs2_rw_unlock(inode, 1); 1767 ocfs2_rw_unlock(inode, 1);
1768 1768
1769 mutex_unlock(&inode->i_mutex);
1770out: 1769out:
1770 mutex_unlock(&inode->i_mutex);
1771 return ret; 1771 return ret;
1772} 1772}
1773 1773
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfd..7a37240f7a3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 57static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 58static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty); 60 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 61static int ocfs2_trylock_journal(struct ocfs2_super *osb,
62 int slot_num); 62 int slot_num);
63static int ocfs2_recover_orphans(struct ocfs2_super *osb, 63static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
562 return status; 562 return status;
563} 563}
564 564
565static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
566{
567 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
568}
569
570static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
571{
572 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
573}
574
565static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 575static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
566 int dirty) 576 int dirty, int replayed)
567{ 577{
568 int status; 578 int status;
569 unsigned int flags; 579 unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
593 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 603 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
594 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 604 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
595 605
606 if (replayed)
607 ocfs2_bump_recovery_generation(fe);
608
596 status = ocfs2_write_block(osb, bh, journal->j_inode); 609 status = ocfs2_write_block(osb, bh, journal->j_inode);
597 if (status < 0) 610 if (status < 0)
598 mlog_errno(status); 611 mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
667 * Do not toggle if flush was unsuccessful otherwise 680 * Do not toggle if flush was unsuccessful otherwise
668 * will leave dirty metadata in a "clean" journal 681 * will leave dirty metadata in a "clean" journal
669 */ 682 */
670 status = ocfs2_journal_toggle_dirty(osb, 0); 683 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
671 if (status < 0) 684 if (status < 0)
672 mlog_errno(status); 685 mlog_errno(status);
673 } 686 }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
710 } 723 }
711} 724}
712 725
713int ocfs2_journal_load(struct ocfs2_journal *journal, int local) 726int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
714{ 727{
715 int status = 0; 728 int status = 0;
716 struct ocfs2_super *osb; 729 struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
729 742
730 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 743 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
731 744
732 status = ocfs2_journal_toggle_dirty(osb, 1); 745 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
733 if (status < 0) { 746 if (status < 0) {
734 mlog_errno(status); 747 mlog_errno(status);
735 goto done; 748 goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
771 goto bail; 784 goto bail;
772 } 785 }
773 786
774 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); 787 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
775 if (status < 0) 788 if (status < 0)
776 mlog_errno(status); 789 mlog_errno(status);
777 790
@@ -1034,6 +1047,12 @@ restart:
1034 spin_unlock(&osb->osb_lock); 1047 spin_unlock(&osb->osb_lock);
1035 mlog(0, "All nodes recovered\n"); 1048 mlog(0, "All nodes recovered\n");
1036 1049
1050 /* Refresh all journal recovery generations from disk */
1051 status = ocfs2_check_journals_nolocks(osb);
1052 status = (status == -EROFS) ? 0 : status;
1053 if (status < 0)
1054 mlog_errno(status);
1055
1037 ocfs2_super_unlock(osb, 1); 1056 ocfs2_super_unlock(osb, 1);
1038 1057
1039 /* We always run recovery on our own orphan dir - the dead 1058 /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
1096 mlog_exit_void(); 1115 mlog_exit_void();
1097} 1116}
1098 1117
1118static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1119 int slot_num,
1120 struct buffer_head **bh,
1121 struct inode **ret_inode)
1122{
1123 int status = -EACCES;
1124 struct inode *inode = NULL;
1125
1126 BUG_ON(slot_num >= osb->max_slots);
1127
1128 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1129 slot_num);
1130 if (!inode || is_bad_inode(inode)) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 SET_INODE_JOURNAL(inode);
1135
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
1137 if (status < 0) {
1138 mlog_errno(status);
1139 goto bail;
1140 }
1141
1142 status = 0;
1143
1144bail:
1145 if (inode) {
1146 if (status || !ret_inode)
1147 iput(inode);
1148 else
1149 *ret_inode = inode;
1150 }
1151 return status;
1152}
1153
1099/* Does the actual journal replay and marks the journal inode as 1154/* Does the actual journal replay and marks the journal inode as
1100 * clean. Will only replay if the journal inode is marked dirty. */ 1155 * clean. Will only replay if the journal inode is marked dirty. */
1101static int ocfs2_replay_journal(struct ocfs2_super *osb, 1156static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1109 struct ocfs2_dinode *fe; 1164 struct ocfs2_dinode *fe;
1110 journal_t *journal = NULL; 1165 journal_t *journal = NULL;
1111 struct buffer_head *bh = NULL; 1166 struct buffer_head *bh = NULL;
1167 u32 slot_reco_gen;
1112 1168
1113 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1169 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1114 slot_num); 1170 if (status) {
1115 if (inode == NULL) {
1116 status = -EACCES;
1117 mlog_errno(status); 1171 mlog_errno(status);
1118 goto done; 1172 goto done;
1119 } 1173 }
1120 if (is_bad_inode(inode)) { 1174
1121 status = -EACCES; 1175 fe = (struct ocfs2_dinode *)bh->b_data;
1122 iput(inode); 1176 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1123 inode = NULL; 1177 brelse(bh);
1124 mlog_errno(status); 1178 bh = NULL;
1179
1180 /*
1181 * As the fs recovery is asynchronous, there is a small chance that
1182 * another node mounted (and recovered) the slot before the recovery
1183 * thread could get the lock. To handle that, we dirty read the journal
1184 * inode for that slot to get the recovery generation. If it is
1185 * different than what we expected, the slot has been recovered.
1186 * If not, it needs recovery.
1187 */
1188 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1189 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
1190 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1191 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1192 status = -EBUSY;
1125 goto done; 1193 goto done;
1126 } 1194 }
1127 SET_INODE_JOURNAL(inode); 1195
1196 /* Continue with recovery as the journal has not yet been recovered */
1128 1197
1129 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1198 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1130 if (status < 0) { 1199 if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1138 fe = (struct ocfs2_dinode *) bh->b_data; 1207 fe = (struct ocfs2_dinode *) bh->b_data;
1139 1208
1140 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1209 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1210 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1141 1211
1142 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1212 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1143 mlog(0, "No recovery required for node %d\n", node_num); 1213 mlog(0, "No recovery required for node %d\n", node_num);
1214 /* Refresh recovery generation for the slot */
1215 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1144 goto done; 1216 goto done;
1145 } 1217 }
1146 1218
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1188 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 1260 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1189 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 1261 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1190 1262
1263 /* Increment recovery generation to indicate successful recovery */
1264 ocfs2_bump_recovery_generation(fe);
1265 osb->slot_recovery_generations[slot_num] =
1266 ocfs2_get_recovery_generation(fe);
1267
1191 status = ocfs2_write_block(osb, bh, inode); 1268 status = ocfs2_write_block(osb, bh, inode);
1192 if (status < 0) 1269 if (status < 0)
1193 mlog_errno(status); 1270 mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1252 1329
1253 status = ocfs2_replay_journal(osb, node_num, slot_num); 1330 status = ocfs2_replay_journal(osb, node_num, slot_num);
1254 if (status < 0) { 1331 if (status < 0) {
1332 if (status == -EBUSY) {
1333 mlog(0, "Skipping recovery for slot %u (node %u) "
1334 "as another node has recovered it\n", slot_num,
1335 node_num);
1336 status = 0;
1337 goto done;
1338 }
1255 mlog_errno(status); 1339 mlog_errno(status);
1256 goto done; 1340 goto done;
1257 } 1341 }
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1334{ 1418{
1335 unsigned int node_num; 1419 unsigned int node_num;
1336 int status, i; 1420 int status, i;
1421 struct buffer_head *bh = NULL;
1422 struct ocfs2_dinode *di;
1337 1423
1338 /* This is called with the super block cluster lock, so we 1424 /* This is called with the super block cluster lock, so we
1339 * know that the slot map can't change underneath us. */ 1425 * know that the slot map can't change underneath us. */
1340 1426
1341 spin_lock(&osb->osb_lock); 1427 spin_lock(&osb->osb_lock);
1342 for (i = 0; i < osb->max_slots; i++) { 1428 for (i = 0; i < osb->max_slots; i++) {
1429 /* Read journal inode to get the recovery generation */
1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1431 if (status) {
1432 mlog_errno(status);
1433 goto bail;
1434 }
1435 di = (struct ocfs2_dinode *)bh->b_data;
1436 osb->slot_recovery_generations[i] =
1437 ocfs2_get_recovery_generation(di);
1438 brelse(bh);
1439 bh = NULL;
1440
1441 mlog(0, "Slot %u recovery generation is %u\n", i,
1442 osb->slot_recovery_generations[i]);
1443
1343 if (i == osb->slot_num) 1444 if (i == osb->slot_num)
1344 continue; 1445 continue;
1345 1446
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
1603 return 0; 1704 return 0;
1604} 1705}
1605 1706
1606/* Look for a dirty journal without taking any cluster locks. Used for 1707/* Reads all the journal inodes without taking any cluster locks. Used
1607 * hard readonly access to determine whether the file system journals 1708 * for hard readonly access to determine whether any journal requires
1608 * require recovery. */ 1709 * recovery. Also used to refresh the recovery generation numbers after
1710 * a journal has been recovered by another node.
1711 */
1609int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 1712int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
1610{ 1713{
1611 int ret = 0; 1714 int ret = 0;
1612 unsigned int slot; 1715 unsigned int slot;
1613 struct buffer_head *di_bh; 1716 struct buffer_head *di_bh = NULL;
1614 struct ocfs2_dinode *di; 1717 struct ocfs2_dinode *di;
1615 struct inode *journal = NULL; 1718 int journal_dirty = 0;
1616 1719
1617 for(slot = 0; slot < osb->max_slots; slot++) { 1720 for(slot = 0; slot < osb->max_slots; slot++) {
1618 journal = ocfs2_get_system_file_inode(osb, 1721 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
1619 JOURNAL_SYSTEM_INODE, 1722 if (ret) {
1620 slot);
1621 if (!journal || is_bad_inode(journal)) {
1622 ret = -EACCES;
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 di_bh = NULL;
1628 ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
1629 0, journal);
1630 if (ret < 0) {
1631 mlog_errno(ret); 1723 mlog_errno(ret);
1632 goto out; 1724 goto out;
1633 } 1725 }
1634 1726
1635 di = (struct ocfs2_dinode *) di_bh->b_data; 1727 di = (struct ocfs2_dinode *) di_bh->b_data;
1636 1728
1729 osb->slot_recovery_generations[slot] =
1730 ocfs2_get_recovery_generation(di);
1731
1637 if (le32_to_cpu(di->id1.journal1.ij_flags) & 1732 if (le32_to_cpu(di->id1.journal1.ij_flags) &
1638 OCFS2_JOURNAL_DIRTY_FL) 1733 OCFS2_JOURNAL_DIRTY_FL)
1639 ret = -EROFS; 1734 journal_dirty = 1;
1640 1735
1641 brelse(di_bh); 1736 brelse(di_bh);
1642 if (ret) 1737 di_bh = NULL;
1643 break;
1644 } 1738 }
1645 1739
1646out: 1740out:
1647 if (journal) 1741 if (journal_dirty)
1648 iput(journal); 1742 ret = -EROFS;
1649
1650 return ret; 1743 return ret;
1651} 1744}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532e..2178ebffa05 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
161void ocfs2_journal_shutdown(struct ocfs2_super *osb); 161void ocfs2_journal_shutdown(struct ocfs2_super *osb);
162int ocfs2_journal_wipe(struct ocfs2_journal *journal, 162int ocfs2_journal_wipe(struct ocfs2_journal *journal,
163 int full); 163 int full);
164int ocfs2_journal_load(struct ocfs2_journal *journal, int local); 164int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
165 int replayed);
165int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 166int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
166void ocfs2_recovery_thread(struct ocfs2_super *osb, 167void ocfs2_recovery_thread(struct ocfs2_super *osb,
167 int node_num); 168 int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef..7f625f2b111 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
204 204
205 struct ocfs2_slot_info *slot_info; 205 struct ocfs2_slot_info *slot_info;
206 206
207 u32 *slot_recovery_generations;
208
207 spinlock_t node_map_lock; 209 spinlock_t node_map_lock;
208 210
209 u64 root_blkno; 211 u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f194517762..4f619850ccf 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
660 struct { /* Info for journal system 660 struct { /* Info for journal system
661 inodes */ 661 inodes */
662 __le32 ij_flags; /* Mounted, version, etc. */ 662 __le32 ij_flags; /* Mounted, version, etc. */
663 __le32 ij_pad; 663 __le32 ij_recovery_generation; /* Incremented when the
664 journal is recovered
665 after an unclean
666 shutdown */
664 } journal1; 667 } journal1;
665 } id1; /* Inode type dependant 1 */ 668 } id1; /* Inode type dependant 1 */
666/*C0*/ union { 669/*C0*/ union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889a..88255d3f52b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 } 1442 }
1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
1444 1444
1445 osb->slot_recovery_generations =
1446 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
1447 GFP_KERNEL);
1448 if (!osb->slot_recovery_generations) {
1449 status = -ENOMEM;
1450 mlog_errno(status);
1451 goto bail;
1452 }
1453
1445 init_waitqueue_head(&osb->osb_wipe_event); 1454 init_waitqueue_head(&osb->osb_wipe_event);
1446 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1455 osb->osb_orphan_wipes = kcalloc(osb->max_slots,
1447 sizeof(*osb->osb_orphan_wipes), 1456 sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1703 local = ocfs2_mount_local(osb); 1712 local = ocfs2_mount_local(osb);
1704 1713
1705 /* will play back anything left in the journal. */ 1714 /* will play back anything left in the journal. */
1706 status = ocfs2_journal_load(osb->journal, local); 1715 status = ocfs2_journal_load(osb->journal, local, dirty);
1707 if (status < 0) { 1716 if (status < 0) {
1708 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); 1717 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
1709 goto finally; 1718 goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
1768 ocfs2_free_slot_info(osb); 1777 ocfs2_free_slot_info(osb);
1769 1778
1770 kfree(osb->osb_orphan_wipes); 1779 kfree(osb->osb_orphan_wipes);
1780 kfree(osb->slot_recovery_generations);
1771 /* FIXME 1781 /* FIXME
1772 * This belongs in journal shutdown, but because we have to 1782 * This belongs in journal shutdown, but because we have to
1773 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1783 * allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index dc75f22be3f..697663b01ba 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -71,10 +71,10 @@ static int set_run(struct super_block *sb, int map,
71 } 71 }
72 if (set) { 72 if (set) {
73 set_bit(bit, sbi->s_imap[map]); 73 set_bit(bit, sbi->s_imap[map]);
74 set_bit(bit, (long *) bh->b_data); 74 set_bit(bit, (unsigned long *)bh->b_data);
75 } else { 75 } else {
76 clear_bit(bit, sbi->s_imap[map]); 76 clear_bit(bit, sbi->s_imap[map]);
77 clear_bit(bit, (long *) bh->b_data); 77 clear_bit(bit, (unsigned long *)bh->b_data);
78 } 78 }
79 } 79 }
80 mark_buffer_dirty(bh); 80 mark_buffer_dirty(bh);
@@ -109,7 +109,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
109 if (!bh) 109 if (!bh)
110 goto out; 110 goto out;
111 111
112 set_bit(bit, (long *) bh->b_data); 112 set_bit(bit, (unsigned long *)bh->b_data);
113 mark_buffer_dirty(bh); 113 mark_buffer_dirty(bh);
114 brelse(bh); 114 brelse(bh);
115 } 115 }
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 05a5bc31e4b..c0757e99887 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -104,7 +104,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
104 104
105 oi = (struct omfs_inode *) bh->b_data; 105 oi = (struct omfs_inode *) bh->b_data;
106 oi->i_head.h_self = cpu_to_be64(inode->i_ino); 106 oi->i_head.h_self = cpu_to_be64(inode->i_ino);
107 oi->i_sibling = ~0ULL; 107 oi->i_sibling = ~cpu_to_be64(0ULL);
108 108
109 mark_buffer_dirty(bh); 109 mark_buffer_dirty(bh);
110 brelse(bh); 110 brelse(bh);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 66e01fae438..7e2499053e4 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -30,11 +30,11 @@ void omfs_make_empty_table(struct buffer_head *bh, int offset)
30{ 30{
31 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; 31 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
32 32
33 oe->e_next = ~0ULL; 33 oe->e_next = ~cpu_to_be64(0ULL);
34 oe->e_extent_count = cpu_to_be32(1), 34 oe->e_extent_count = cpu_to_be32(1),
35 oe->e_fill = cpu_to_be32(0x22), 35 oe->e_fill = cpu_to_be32(0x22),
36 oe->e_entry.e_cluster = ~0ULL; 36 oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
37 oe->e_entry.e_blocks = ~0ULL; 37 oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
38} 38}
39 39
40int omfs_shrink_inode(struct inode *inode) 40int omfs_shrink_inode(struct inode *inode)
diff --git a/fs/open.c b/fs/open.c
index 52647be277a..07da9359481 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
963} 963}
964EXPORT_SYMBOL(dentry_open); 964EXPORT_SYMBOL(dentry_open);
965 965
966/*
967 * Find an empty file descriptor entry, and mark it busy.
968 */
969int get_unused_fd_flags(int flags)
970{
971 struct files_struct * files = current->files;
972 int fd, error;
973 struct fdtable *fdt;
974
975 spin_lock(&files->file_lock);
976
977repeat:
978 fdt = files_fdtable(files);
979 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
980 files->next_fd);
981
982 /* Do we need to expand the fd array or fd set? */
983 error = expand_files(files, fd);
984 if (error < 0)
985 goto out;
986
987 if (error) {
988 /*
989 * If we needed to expand the fs array we
990 * might have blocked - try again.
991 */
992 goto repeat;
993 }
994
995 FD_SET(fd, fdt->open_fds);
996 if (flags & O_CLOEXEC)
997 FD_SET(fd, fdt->close_on_exec);
998 else
999 FD_CLR(fd, fdt->close_on_exec);
1000 files->next_fd = fd + 1;
1001#if 1
1002 /* Sanity check */
1003 if (fdt->fd[fd] != NULL) {
1004 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
1005 fdt->fd[fd] = NULL;
1006 }
1007#endif
1008 error = fd;
1009
1010out:
1011 spin_unlock(&files->file_lock);
1012 return error;
1013}
1014
1015int get_unused_fd(void)
1016{
1017 return get_unused_fd_flags(0);
1018}
1019
1020EXPORT_SYMBOL(get_unused_fd);
1021
1022static void __put_unused_fd(struct files_struct *files, unsigned int fd) 966static void __put_unused_fd(struct files_struct *files, unsigned int fd)
1023{ 967{
1024 struct fdtable *fdt = files_fdtable(files); 968 struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cb4096cc3fb..4fb81e9c94e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
300 return rtn; 300 return rtn;
301} 301}
302 302
303static DEFINE_IDR(proc_inum_idr); 303static DEFINE_IDA(proc_inum_ida);
304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
305 305
306#define PROC_DYNAMIC_FIRST 0xF0000000UL 306#define PROC_DYNAMIC_FIRST 0xF0000000U
307 307
308/* 308/*
309 * Return an inode number between PROC_DYNAMIC_FIRST and 309 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,33 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
311 */ 311 */
312static unsigned int get_inode_number(void) 312static unsigned int get_inode_number(void)
313{ 313{
314 int i, inum = 0; 314 unsigned int i;
315 int error; 315 int error;
316 316
317retry: 317retry:
318 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 318 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
319 return 0; 319 return 0;
320 320
321 spin_lock(&proc_inum_lock); 321 spin_lock(&proc_inum_lock);
322 error = idr_get_new(&proc_inum_idr, NULL, &i); 322 error = ida_get_new(&proc_inum_ida, &i);
323 spin_unlock(&proc_inum_lock); 323 spin_unlock(&proc_inum_lock);
324 if (error == -EAGAIN) 324 if (error == -EAGAIN)
325 goto retry; 325 goto retry;
326 else if (error) 326 else if (error)
327 return 0; 327 return 0;
328 328
329 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 329 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
330 330 spin_lock(&proc_inum_lock);
331 /* inum will never be more than 0xf0ffffff, so no check 331 ida_remove(&proc_inum_ida, i);
332 * for overflow. 332 spin_unlock(&proc_inum_lock);
333 */ 333 }
334 334 return PROC_DYNAMIC_FIRST + i;
335 return inum;
336} 335}
337 336
338static void release_inode_number(unsigned int inum) 337static void release_inode_number(unsigned int inum)
339{ 338{
340 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
341
342 spin_lock(&proc_inum_lock); 339 spin_lock(&proc_inum_lock);
343 idr_remove(&proc_inum_idr, id); 340 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
344 spin_unlock(&proc_inum_lock); 341 spin_unlock(&proc_inum_lock);
345} 342}
346 343
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2..282a13596c7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2076,8 +2076,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2076 return err; 2076 return err;
2077 /* Quotafile not on the same filesystem? */ 2077 /* Quotafile not on the same filesystem? */
2078 if (nd.path.mnt->mnt_sb != sb) { 2078 if (nd.path.mnt->mnt_sb != sb) {
2079 path_put(&nd.path); 2079 err = -EXDEV;
2080 return -EXDEV; 2080 goto out;
2081 } 2081 }
2082 inode = nd.path.dentry->d_inode; 2082 inode = nd.path.dentry->d_inode;
2083 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2083 /* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2087,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2087 reiserfs_warning(sb, 2087 reiserfs_warning(sb,
2088 "reiserfs: Unpacking tail of quota file failed" 2088 "reiserfs: Unpacking tail of quota file failed"
2089 " (%d). Cannot turn on quotas.", err); 2089 " (%d). Cannot turn on quotas.", err);
2090 path_put(&nd.path); 2090 err = -EINVAL;
2091 return -EINVAL; 2091 goto out;
2092 } 2092 }
2093 mark_inode_dirty(inode); 2093 mark_inode_dirty(inode);
2094 } 2094 }
@@ -2109,13 +2109,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2109 /* Just start temporary transaction and finish it */ 2109 /* Just start temporary transaction and finish it */
2110 err = journal_begin(&th, sb, 1); 2110 err = journal_begin(&th, sb, 1);
2111 if (err) 2111 if (err)
2112 return err; 2112 goto out;
2113 err = journal_end_sync(&th, sb, 1); 2113 err = journal_end_sync(&th, sb, 1);
2114 if (err) 2114 if (err)
2115 return err; 2115 goto out;
2116 } 2116 }
2117 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
2118out:
2117 path_put(&nd.path); 2119 path_put(&nd.path);
2118 return vfs_quota_on(sb, type, format_id, path, 0); 2120 return err;
2119} 2121}
2120 2122
2121/* Read data from quotafile - avoid pagecache and such because we cannot afford 2123/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 8e51a2aaa97..60d2f822e87 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
418romfs_readpage(struct file *file, struct page * page) 418romfs_readpage(struct file *file, struct page * page)
419{ 419{
420 struct inode *inode = page->mapping->host; 420 struct inode *inode = page->mapping->host;
421 loff_t offset, avail, readlen; 421 loff_t offset, size;
422 unsigned long filled;
422 void *buf; 423 void *buf;
423 int result = -EIO; 424 int result = -EIO;
424 425
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
430 431
431 /* 32 bit warning -- but not for us :) */ 432 /* 32 bit warning -- but not for us :) */
432 offset = page_offset(page); 433 offset = page_offset(page);
433 if (offset < i_size_read(inode)) { 434 size = i_size_read(inode);
434 avail = inode->i_size-offset; 435 filled = 0;
435 readlen = min_t(unsigned long, avail, PAGE_SIZE); 436 result = 0;
436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { 437 if (offset < size) {
437 if (readlen < PAGE_SIZE) { 438 unsigned long readlen;
438 memset(buf + readlen,0,PAGE_SIZE-readlen); 439
439 } 440 size -= offset;
440 SetPageUptodate(page); 441 readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
441 result = 0; 442
443 filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
444
445 if (filled != readlen) {
446 SetPageError(page);
447 filled = 0;
448 result = -EIO;
442 } 449 }
443 } 450 }
444 if (result) { 451
445 memset(buf, 0, PAGE_SIZE); 452 if (filled < PAGE_SIZE)
446 SetPageError(page); 453 memset(buf + filled, 0, PAGE_SIZE-filled);
447 } 454
455 if (!result)
456 SetPageUptodate(page);
448 flush_dcache_page(page); 457 flush_dcache_page(page);
449 458
450 unlock_page(page); 459 unlock_page(page);