aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c24
-rw-r--r--fs/autofs4/autofs_i.h1
-rw-r--r--fs/autofs4/dev-ioctl.c1
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/binfmt_aout.c14
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/bio.c10
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/check-integrity.c13
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c19
-rw-r--r--fs/btrfs/extent-tree.c100
-rw-r--r--fs/btrfs/extent_io.c115
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file.c29
-rw-r--r--fs/btrfs/free-space-cache.c6
-rw-r--r--fs/btrfs/inode-map.c6
-rw-r--r--fs/btrfs/inode.c50
-rw-r--r--fs/btrfs/ioctl.c61
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/xattr.c4
-rw-r--r--fs/cifs/Kconfig4
-rw-r--r--fs/cifs/connect.c23
-rw-r--r--fs/cifs/dir.c22
-rw-r--r--fs/cifs/inode.c28
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/compat.c56
-rw-r--r--fs/dcache.c41
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/crypto.c68
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h6
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/ecryptfs/mmap.c4
-rw-r--r--fs/ecryptfs/read_write.c4
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c51
-rw-r--r--fs/fs-writeback.c16
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/ops_fstype.c5
-rw-r--r--fs/gfs2/rgrp.c13
-rw-r--r--fs/inode.c8
-rw-r--r--fs/ioprio.c2
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/logfs/dev_mtd.c6
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/logfs/gc.c2
-rw-r--r--fs/logfs/inode.c4
-rw-r--r--fs/logfs/journal.c1
-rw-r--r--fs/logfs/logfs.h5
-rw-r--r--fs/logfs/readwrite.c51
-rw-r--r--fs/logfs/segment.c51
-rw-r--r--fs/logfs/super.c3
-rw-r--r--fs/namei.c70
-rw-r--r--fs/nfs/nfs4proc.c130
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c5
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/ntfs/attrib.c6
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/proc/base.c126
-rw-r--r--fs/quota/quota.c24
-rw-r--r--fs/select.c2
-rw-r--r--fs/signalfd.c15
-rw-r--r--fs/super.c22
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/sysfs/inode.c5
-rw-r--r--fs/xfs/kmem.h6
-rw-r--r--fs/xfs/xfs_dquot.c127
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_qm.c291
-rw-r--r--fs/xfs/xfs_qm.h14
-rw-r--r--fs/xfs/xfs_qm_stats.c4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/xfs_trace.h5
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
95 files changed, 1164 insertions, 901 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 969beb0e2231..b9d64d89a043 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx)
228 call_rcu(&ctx->rcu_head, ctx_rcu_free); 228 call_rcu(&ctx->rcu_head, ctx_rcu_free);
229} 229}
230 230
231static inline void get_ioctx(struct kioctx *kioctx)
232{
233 BUG_ON(atomic_read(&kioctx->users) <= 0);
234 atomic_inc(&kioctx->users);
235}
236
237static inline int try_get_ioctx(struct kioctx *kioctx) 231static inline int try_get_ioctx(struct kioctx *kioctx)
238{ 232{
239 return atomic_inc_not_zero(&kioctx->users); 233 return atomic_inc_not_zero(&kioctx->users);
@@ -273,7 +267,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
273 mm = ctx->mm = current->mm; 267 mm = ctx->mm = current->mm;
274 atomic_inc(&mm->mm_count); 268 atomic_inc(&mm->mm_count);
275 269
276 atomic_set(&ctx->users, 1); 270 atomic_set(&ctx->users, 2);
277 spin_lock_init(&ctx->ctx_lock); 271 spin_lock_init(&ctx->ctx_lock);
278 spin_lock_init(&ctx->ring_info.ring_lock); 272 spin_lock_init(&ctx->ring_info.ring_lock);
279 init_waitqueue_head(&ctx->wait); 273 init_waitqueue_head(&ctx->wait);
@@ -490,6 +484,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
490 kmem_cache_free(kiocb_cachep, req); 484 kmem_cache_free(kiocb_cachep, req);
491 ctx->reqs_active--; 485 ctx->reqs_active--;
492 } 486 }
487 if (unlikely(!ctx->reqs_active && ctx->dead))
488 wake_up_all(&ctx->wait);
493 spin_unlock_irq(&ctx->ctx_lock); 489 spin_unlock_irq(&ctx->ctx_lock);
494} 490}
495 491
@@ -607,11 +603,16 @@ static void aio_fput_routine(struct work_struct *data)
607 fput(req->ki_filp); 603 fput(req->ki_filp);
608 604
609 /* Link the iocb into the context's free list */ 605 /* Link the iocb into the context's free list */
606 rcu_read_lock();
610 spin_lock_irq(&ctx->ctx_lock); 607 spin_lock_irq(&ctx->ctx_lock);
611 really_put_req(ctx, req); 608 really_put_req(ctx, req);
609 /*
610 * at that point ctx might've been killed, but actual
611 * freeing is RCU'd
612 */
612 spin_unlock_irq(&ctx->ctx_lock); 613 spin_unlock_irq(&ctx->ctx_lock);
614 rcu_read_unlock();
613 615
614 put_ioctx(ctx);
615 spin_lock_irq(&fput_lock); 616 spin_lock_irq(&fput_lock);
616 } 617 }
617 spin_unlock_irq(&fput_lock); 618 spin_unlock_irq(&fput_lock);
@@ -642,7 +643,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
642 * this function will be executed w/out any aio kthread wakeup. 643 * this function will be executed w/out any aio kthread wakeup.
643 */ 644 */
644 if (unlikely(!fput_atomic(req->ki_filp))) { 645 if (unlikely(!fput_atomic(req->ki_filp))) {
645 get_ioctx(ctx);
646 spin_lock(&fput_lock); 646 spin_lock(&fput_lock);
647 list_add(&req->ki_list, &fput_head); 647 list_add(&req->ki_list, &fput_head);
648 spin_unlock(&fput_lock); 648 spin_unlock(&fput_lock);
@@ -1336,10 +1336,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1336 ret = PTR_ERR(ioctx); 1336 ret = PTR_ERR(ioctx);
1337 if (!IS_ERR(ioctx)) { 1337 if (!IS_ERR(ioctx)) {
1338 ret = put_user(ioctx->user_id, ctxp); 1338 ret = put_user(ioctx->user_id, ctxp);
1339 if (!ret) 1339 if (!ret) {
1340 put_ioctx(ioctx);
1340 return 0; 1341 return 0;
1341 1342 }
1342 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
1343 io_destroy(ioctx); 1343 io_destroy(ioctx);
1344 } 1344 }
1345 1345
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d8d8e7ba6a1e..eb1cc92cd67d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,6 +110,7 @@ struct autofs_sb_info {
110 int sub_version; 110 int sub_version;
111 int min_proto; 111 int min_proto;
112 int max_proto; 112 int max_proto;
113 int compat_daemon;
113 unsigned long exp_timeout; 114 unsigned long exp_timeout;
114 unsigned int type; 115 unsigned int type;
115 int reghost_enabled; 116 int reghost_enabled;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 76741d8d7786..85f1fcdb30e7 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
385 sbi->pipefd = pipefd; 385 sbi->pipefd = pipefd;
386 sbi->pipe = pipe; 386 sbi->pipe = pipe;
387 sbi->catatonic = 0; 387 sbi->catatonic = 0;
388 sbi->compat_daemon = is_compat_task();
388 } 389 }
389out: 390out:
390 mutex_unlock(&sbi->wq_mutex); 391 mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 450f529a4eae..1feb68ecef95 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -124,6 +124,7 @@ start:
124 /* Negative dentry - try next */ 124 /* Negative dentry - try next */
125 if (!simple_positive(q)) { 125 if (!simple_positive(q)) {
126 spin_unlock(&p->d_lock); 126 spin_unlock(&p->d_lock);
127 lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
127 p = q; 128 p = q;
128 goto again; 129 goto again;
129 } 130 }
@@ -186,6 +187,7 @@ again:
186 /* Negative dentry - try next */ 187 /* Negative dentry - try next */
187 if (!simple_positive(ret)) { 188 if (!simple_positive(ret)) {
188 spin_unlock(&p->d_lock); 189 spin_unlock(&p->d_lock);
190 lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
189 p = ret; 191 p = ret;
190 goto again; 192 goto again;
191 } 193 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e16980b00b8d..06858d955120 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
19#include <linux/parser.h> 19#include <linux/parser.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/compat.h>
22#include "autofs_i.h" 23#include "autofs_i.h"
23#include <linux/module.h> 24#include <linux/module.h>
24 25
@@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
224 set_autofs_type_indirect(&sbi->type); 225 set_autofs_type_indirect(&sbi->type);
225 sbi->min_proto = 0; 226 sbi->min_proto = 0;
226 sbi->max_proto = 0; 227 sbi->max_proto = 0;
228 sbi->compat_daemon = is_compat_task();
227 mutex_init(&sbi->wq_mutex); 229 mutex_init(&sbi->wq_mutex);
228 mutex_init(&sbi->pipe_mutex); 230 mutex_init(&sbi->pipe_mutex);
229 spin_lock_init(&sbi->fs_lock); 231 spin_lock_init(&sbi->fs_lock);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7b..9c098db43344 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi,
91 91
92 return (bytes > 0); 92 return (bytes > 0);
93} 93}
94 94
95/*
96 * The autofs_v5 packet was misdesigned.
97 *
98 * The packets are identical on x86-32 and x86-64, but have different
99 * alignment. Which means that 'sizeof()' will give different results.
100 * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
101 */
102static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
103{
104 size_t pktsz = sizeof(struct autofs_v5_packet);
105#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
106 if (sbi->compat_daemon > 0)
107 pktsz -= 4;
108#endif
109 return pktsz;
110}
111
95static void autofs4_notify_daemon(struct autofs_sb_info *sbi, 112static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
96 struct autofs_wait_queue *wq, 113 struct autofs_wait_queue *wq,
97 int type) 114 int type)
@@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
155 { 172 {
156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; 173 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
157 174
158 pktsz = sizeof(*packet); 175 pktsz = autofs_v5_packet_size(sbi);
159
160 packet->wait_queue_token = wq->wait_queue_token; 176 packet->wait_queue_token = wq->wait_queue_token;
161 packet->len = wq->name.len; 177 packet->len = wq->name.len;
162 memcpy(packet->name, wq->name.name, wq->name.len); 178 memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a6395bdb26ae..1ff94054d35a 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -259,6 +259,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
259 current->mm->free_area_cache = current->mm->mmap_base; 259 current->mm->free_area_cache = current->mm->mmap_base;
260 current->mm->cached_hole_size = 0; 260 current->mm->cached_hole_size = 0;
261 261
262 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
263 if (retval < 0) {
264 /* Someone check-me: is this error path enough? */
265 send_sig(SIGKILL, current, 0);
266 return retval;
267 }
268
262 install_exec_creds(bprm); 269 install_exec_creds(bprm);
263 current->flags &= ~PF_FORKNOEXEC; 270 current->flags &= ~PF_FORKNOEXEC;
264 271
@@ -352,13 +359,6 @@ beyond_if:
352 return retval; 359 return retval;
353 } 360 }
354 361
355 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
356 if (retval < 0) {
357 /* Someone check-me: is this error path enough? */
358 send_sig(SIGKILL, current, 0);
359 return retval;
360 }
361
362 current->mm->start_stack = 362 current->mm->start_stack =
363 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); 363 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
364#ifdef __alpha__ 364#ifdef __alpha__
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d613..07d096c49920 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1421,7 +1421,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1421 for (i = 1; i < view->n; ++i) { 1421 for (i = 1; i < view->n; ++i) {
1422 const struct user_regset *regset = &view->regsets[i]; 1422 const struct user_regset *regset = &view->regsets[i];
1423 do_thread_regset_writeback(t->task, regset); 1423 do_thread_regset_writeback(t->task, regset);
1424 if (regset->core_note_type && 1424 if (regset->core_note_type && regset->get &&
1425 (!regset->active || regset->active(t->task, regset))) { 1425 (!regset->active || regset->active(t->task, regset))) {
1426 int ret; 1426 int ret;
1427 size_t size = regset->n * regset->size; 1427 size_t size = regset->n * regset->size;
diff --git a/fs/bio.c b/fs/bio.c
index b1fe82cf88cf..b980ecde026a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone);
505int bio_get_nr_vecs(struct block_device *bdev) 505int bio_get_nr_vecs(struct block_device *bdev)
506{ 506{
507 struct request_queue *q = bdev_get_queue(bdev); 507 struct request_queue *q = bdev_get_queue(bdev);
508 int nr_pages; 508 return min_t(unsigned,
509 509 queue_max_segments(q),
510 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; 510 queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
511 if (nr_pages > queue_max_segments(q))
512 nr_pages = queue_max_segments(q);
513
514 return nr_pages;
515} 511}
516EXPORT_SYMBOL(bio_get_nr_vecs); 512EXPORT_SYMBOL(bio_get_nr_vecs);
517 513
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b9a843226de8..0436c12da8c2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -297,7 +297,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
297 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 297 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
298 struct rb_node *n = &head->node.rb_node; 298 struct rb_node *n = &head->node.rb_node;
299 int sgn; 299 int sgn;
300 int ret; 300 int ret = 0;
301 301
302 if (extent_op && extent_op->update_key) 302 if (extent_op && extent_op->update_key)
303 btrfs_disk_key_to_cpu(info_key, &extent_op->key); 303 btrfs_disk_key_to_cpu(info_key, &extent_op->key);
@@ -392,7 +392,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
392 struct btrfs_key *info_key, int *info_level, 392 struct btrfs_key *info_key, int *info_level,
393 struct list_head *prefs) 393 struct list_head *prefs)
394{ 394{
395 int ret; 395 int ret = 0;
396 int slot; 396 int slot;
397 struct extent_buffer *leaf; 397 struct extent_buffer *leaf;
398 struct btrfs_key key; 398 struct btrfs_key key;
@@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
583 struct btrfs_path *path; 583 struct btrfs_path *path;
584 struct btrfs_key info_key = { 0 }; 584 struct btrfs_key info_key = { 0 };
585 struct btrfs_delayed_ref_root *delayed_refs = NULL; 585 struct btrfs_delayed_ref_root *delayed_refs = NULL;
586 struct btrfs_delayed_ref_head *head = NULL; 586 struct btrfs_delayed_ref_head *head;
587 int info_level = 0; 587 int info_level = 0;
588 int ret; 588 int ret;
589 struct list_head prefs_delayed; 589 struct list_head prefs_delayed;
@@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
607 * at a specified point in time 607 * at a specified point in time
608 */ 608 */
609again: 609again:
610 head = NULL;
611
610 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 612 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
611 if (ret < 0) 613 if (ret < 0)
612 goto out; 614 goto out;
@@ -635,8 +637,10 @@ again:
635 goto again; 637 goto again;
636 } 638 }
637 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); 639 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
638 if (ret) 640 if (ret) {
641 spin_unlock(&delayed_refs->lock);
639 goto out; 642 goto out;
643 }
640 } 644 }
641 spin_unlock(&delayed_refs->lock); 645 spin_unlock(&delayed_refs->lock);
642 646
@@ -892,6 +896,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
892 if (eb != eb_in) 896 if (eb != eb_in)
893 free_extent_buffer(eb); 897 free_extent_buffer(eb);
894 ret = inode_ref_info(parent, 0, fs_root, path, &found_key); 898 ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
899 if (ret > 0)
900 ret = -ENOENT;
895 if (ret) 901 if (ret)
896 break; 902 break;
897 next_inum = found_key.offset; 903 next_inum = found_key.offset;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ad0b3ba735b7..d986824bb2b4 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -644,7 +644,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
644static int btrfsic_process_superblock(struct btrfsic_state *state, 644static int btrfsic_process_superblock(struct btrfsic_state *state,
645 struct btrfs_fs_devices *fs_devices) 645 struct btrfs_fs_devices *fs_devices)
646{ 646{
647 int ret; 647 int ret = 0;
648 struct btrfs_super_block *selected_super; 648 struct btrfs_super_block *selected_super;
649 struct list_head *dev_head = &fs_devices->devices; 649 struct list_head *dev_head = &fs_devices->devices;
650 struct btrfs_device *device; 650 struct btrfs_device *device;
@@ -1662,7 +1662,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1662 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1662 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1663 &state->block_hashtable); 1663 &state->block_hashtable);
1664 if (NULL != block) { 1664 if (NULL != block) {
1665 u64 bytenr; 1665 u64 bytenr = 0;
1666 struct list_head *elem_ref_to; 1666 struct list_head *elem_ref_to;
1667 struct list_head *tmp_ref_to; 1667 struct list_head *tmp_ref_to;
1668 1668
@@ -2777,9 +2777,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2777 printk(KERN_INFO 2777 printk(KERN_INFO
2778 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 2778 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
2779 " size=%lu, data=%p, bdev=%p)\n", 2779 " size=%lu, data=%p, bdev=%p)\n",
2780 rw, bh->b_blocknr, 2780 rw, (unsigned long)bh->b_blocknr,
2781 (unsigned long long)dev_bytenr, bh->b_size, 2781 (unsigned long long)dev_bytenr,
2782 bh->b_data, bh->b_bdev); 2782 (unsigned long)bh->b_size, bh->b_data,
2783 bh->b_bdev);
2783 btrfsic_process_written_block(dev_state, dev_bytenr, 2784 btrfsic_process_written_block(dev_state, dev_bytenr,
2784 bh->b_data, bh->b_size, NULL, 2785 bh->b_data, bh->b_size, NULL,
2785 NULL, bh, rw); 2786 NULL, bh, rw);
@@ -2844,7 +2845,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
2844 printk(KERN_INFO 2845 printk(KERN_INFO
2845 "submit_bio(rw=0x%x, bi_vcnt=%u," 2846 "submit_bio(rw=0x%x, bi_vcnt=%u,"
2846 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 2847 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
2847 rw, bio->bi_vcnt, bio->bi_sector, 2848 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
2848 (unsigned long long)dev_bytenr, 2849 (unsigned long long)dev_bytenr,
2849 bio->bi_bdev); 2850 bio->bi_bdev);
2850 2851
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a0b2d2..d02c27cd14c7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
588 page_offset(bio->bi_io_vec->bv_page), 588 page_offset(bio->bi_io_vec->bv_page),
589 PAGE_CACHE_SIZE); 589 PAGE_CACHE_SIZE);
590 read_unlock(&em_tree->lock); 590 read_unlock(&em_tree->lock);
591 if (!em)
592 return -EIO;
591 593
592 compressed_len = em->block_len; 594 compressed_len = em->block_len;
593 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 595 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 27ebe61d3ccc..80b6486fd5e6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -886,7 +886,7 @@ struct btrfs_block_rsv {
886 u64 reserved; 886 u64 reserved;
887 struct btrfs_space_info *space_info; 887 struct btrfs_space_info *space_info;
888 spinlock_t lock; 888 spinlock_t lock;
889 unsigned int full:1; 889 unsigned int full;
890}; 890};
891 891
892/* 892/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7aa9cd36bf1b..534266fe505f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -962,6 +962,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
962 tree = &BTRFS_I(page->mapping->host)->io_tree; 962 tree = &BTRFS_I(page->mapping->host)->io_tree;
963 map = &BTRFS_I(page->mapping->host)->extent_tree; 963 map = &BTRFS_I(page->mapping->host)->extent_tree;
964 964
965 /*
966 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
967 * slab allocation from alloc_extent_state down the callchain where
968 * it'd hit a BUG_ON as those flags are not allowed.
969 */
970 gfp_flags &= ~GFP_SLAB_BUG_MASK;
971
965 ret = try_release_extent_state(map, tree, page, gfp_flags); 972 ret = try_release_extent_state(map, tree, page, gfp_flags);
966 if (!ret) 973 if (!ret)
967 return 0; 974 return 0;
@@ -2253,6 +2260,12 @@ int open_ctree(struct super_block *sb,
2253 goto fail_sb_buffer; 2260 goto fail_sb_buffer;
2254 } 2261 }
2255 2262
2263 if (sectorsize < PAGE_SIZE) {
2264 printk(KERN_WARNING "btrfs: Incompatible sector size "
2265 "found on %s\n", sb->s_id);
2266 goto fail_sb_buffer;
2267 }
2268
2256 mutex_lock(&fs_info->chunk_mutex); 2269 mutex_lock(&fs_info->chunk_mutex);
2257 ret = btrfs_read_sys_array(tree_root); 2270 ret = btrfs_read_sys_array(tree_root);
2258 mutex_unlock(&fs_info->chunk_mutex); 2271 mutex_unlock(&fs_info->chunk_mutex);
@@ -2294,6 +2307,12 @@ int open_ctree(struct super_block *sb,
2294 2307
2295 btrfs_close_extra_devices(fs_devices); 2308 btrfs_close_extra_devices(fs_devices);
2296 2309
2310 if (!fs_devices->latest_bdev) {
2311 printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
2312 sb->s_id);
2313 goto fail_tree_roots;
2314 }
2315
2297retry_root_backup: 2316retry_root_backup:
2298 blocksize = btrfs_level_size(tree_root, 2317 blocksize = btrfs_level_size(tree_root,
2299 btrfs_super_root_level(disk_super)); 2318 btrfs_super_root_level(disk_super));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 700879ed64cf..37e0a800d34e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -34,23 +34,24 @@
34#include "locking.h" 34#include "locking.h"
35#include "free-space-cache.h" 35#include "free-space-cache.h"
36 36
37/* control flags for do_chunk_alloc's force field 37/*
38 * control flags for do_chunk_alloc's force field
38 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 39 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
39 * if we really need one. 40 * if we really need one.
40 * 41 *
41 * CHUNK_ALLOC_FORCE means it must try to allocate one
42 *
43 * CHUNK_ALLOC_LIMITED means to only try and allocate one 42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
44 * if we have very few chunks already allocated. This is 43 * if we have very few chunks already allocated. This is
45 * used as part of the clustering code to help make sure 44 * used as part of the clustering code to help make sure
46 * we have a good pool of storage to cluster in, without 45 * we have a good pool of storage to cluster in, without
47 * filling the FS with empty chunks 46 * filling the FS with empty chunks
48 * 47 *
48 * CHUNK_ALLOC_FORCE means it must try to allocate one
49 *
49 */ 50 */
50enum { 51enum {
51 CHUNK_ALLOC_NO_FORCE = 0, 52 CHUNK_ALLOC_NO_FORCE = 0,
52 CHUNK_ALLOC_FORCE = 1, 53 CHUNK_ALLOC_LIMITED = 1,
53 CHUNK_ALLOC_LIMITED = 2, 54 CHUNK_ALLOC_FORCE = 2,
54}; 55};
55 56
56/* 57/*
@@ -3311,7 +3312,8 @@ commit_trans:
3311 } 3312 }
3312 data_sinfo->bytes_may_use += bytes; 3313 data_sinfo->bytes_may_use += bytes;
3313 trace_btrfs_space_reservation(root->fs_info, "space_info", 3314 trace_btrfs_space_reservation(root->fs_info, "space_info",
3314 (u64)data_sinfo, bytes, 1); 3315 (u64)(unsigned long)data_sinfo,
3316 bytes, 1);
3315 spin_unlock(&data_sinfo->lock); 3317 spin_unlock(&data_sinfo->lock);
3316 3318
3317 return 0; 3319 return 0;
@@ -3332,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3332 spin_lock(&data_sinfo->lock); 3334 spin_lock(&data_sinfo->lock);
3333 data_sinfo->bytes_may_use -= bytes; 3335 data_sinfo->bytes_may_use -= bytes;
3334 trace_btrfs_space_reservation(root->fs_info, "space_info", 3336 trace_btrfs_space_reservation(root->fs_info, "space_info",
3335 (u64)data_sinfo, bytes, 0); 3337 (u64)(unsigned long)data_sinfo,
3338 bytes, 0);
3336 spin_unlock(&data_sinfo->lock); 3339 spin_unlock(&data_sinfo->lock);
3337} 3340}
3338 3341
@@ -3414,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3414 3417
3415again: 3418again:
3416 spin_lock(&space_info->lock); 3419 spin_lock(&space_info->lock);
3417 if (space_info->force_alloc) 3420 if (force < space_info->force_alloc)
3418 force = space_info->force_alloc; 3421 force = space_info->force_alloc;
3419 if (space_info->full) { 3422 if (space_info->full) {
3420 spin_unlock(&space_info->lock); 3423 spin_unlock(&space_info->lock);
@@ -3610,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root,
3610 if (space_info != delayed_rsv->space_info) 3613 if (space_info != delayed_rsv->space_info)
3611 return -ENOSPC; 3614 return -ENOSPC;
3612 3615
3616 spin_lock(&space_info->lock);
3613 spin_lock(&delayed_rsv->lock); 3617 spin_lock(&delayed_rsv->lock);
3614 if (delayed_rsv->size < bytes) { 3618 if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
3615 spin_unlock(&delayed_rsv->lock); 3619 spin_unlock(&delayed_rsv->lock);
3620 spin_unlock(&space_info->lock);
3616 return -ENOSPC; 3621 return -ENOSPC;
3617 } 3622 }
3618 spin_unlock(&delayed_rsv->lock); 3623 spin_unlock(&delayed_rsv->lock);
3624 spin_unlock(&space_info->lock);
3619 3625
3620commit: 3626commit:
3621 trans = btrfs_join_transaction(root); 3627 trans = btrfs_join_transaction(root);
@@ -3694,9 +3700,9 @@ again:
3694 if (used + orig_bytes <= space_info->total_bytes) { 3700 if (used + orig_bytes <= space_info->total_bytes) {
3695 space_info->bytes_may_use += orig_bytes; 3701 space_info->bytes_may_use += orig_bytes;
3696 trace_btrfs_space_reservation(root->fs_info, 3702 trace_btrfs_space_reservation(root->fs_info,
3697 "space_info", 3703 "space_info",
3698 (u64)space_info, 3704 (u64)(unsigned long)space_info,
3699 orig_bytes, 1); 3705 orig_bytes, 1);
3700 ret = 0; 3706 ret = 0;
3701 } else { 3707 } else {
3702 /* 3708 /*
@@ -3765,9 +3771,9 @@ again:
3765 if (used + num_bytes < space_info->total_bytes + avail) { 3771 if (used + num_bytes < space_info->total_bytes + avail) {
3766 space_info->bytes_may_use += orig_bytes; 3772 space_info->bytes_may_use += orig_bytes;
3767 trace_btrfs_space_reservation(root->fs_info, 3773 trace_btrfs_space_reservation(root->fs_info,
3768 "space_info", 3774 "space_info",
3769 (u64)space_info, 3775 (u64)(unsigned long)space_info,
3770 orig_bytes, 1); 3776 orig_bytes, 1);
3771 ret = 0; 3777 ret = 0;
3772 } else { 3778 } else {
3773 wait_ordered = true; 3779 wait_ordered = true;
@@ -3912,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
3912 spin_lock(&space_info->lock); 3918 spin_lock(&space_info->lock);
3913 space_info->bytes_may_use -= num_bytes; 3919 space_info->bytes_may_use -= num_bytes;
3914 trace_btrfs_space_reservation(fs_info, "space_info", 3920 trace_btrfs_space_reservation(fs_info, "space_info",
3915 (u64)space_info, 3921 (u64)(unsigned long)space_info,
3916 num_bytes, 0); 3922 num_bytes, 0);
3917 space_info->reservation_progress++; 3923 space_info->reservation_progress++;
3918 spin_unlock(&space_info->lock); 3924 spin_unlock(&space_info->lock);
3919 } 3925 }
@@ -4104,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
4104 num_bytes += div64_u64(data_used + meta_used, 50); 4110 num_bytes += div64_u64(data_used + meta_used, 50);
4105 4111
4106 if (num_bytes * 3 > meta_used) 4112 if (num_bytes * 3 > meta_used)
4107 num_bytes = div64_u64(meta_used, 3); 4113 num_bytes = div64_u64(meta_used, 3) * 2;
4108 4114
4109 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); 4115 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
4110} 4116}
@@ -4131,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4131 block_rsv->reserved += num_bytes; 4137 block_rsv->reserved += num_bytes;
4132 sinfo->bytes_may_use += num_bytes; 4138 sinfo->bytes_may_use += num_bytes;
4133 trace_btrfs_space_reservation(fs_info, "space_info", 4139 trace_btrfs_space_reservation(fs_info, "space_info",
4134 (u64)sinfo, num_bytes, 1); 4140 (u64)(unsigned long)sinfo, num_bytes, 1);
4135 } 4141 }
4136 4142
4137 if (block_rsv->reserved >= block_rsv->size) { 4143 if (block_rsv->reserved >= block_rsv->size) {
4138 num_bytes = block_rsv->reserved - block_rsv->size; 4144 num_bytes = block_rsv->reserved - block_rsv->size;
4139 sinfo->bytes_may_use -= num_bytes; 4145 sinfo->bytes_may_use -= num_bytes;
4140 trace_btrfs_space_reservation(fs_info, "space_info", 4146 trace_btrfs_space_reservation(fs_info, "space_info",
4141 (u64)sinfo, num_bytes, 0); 4147 (u64)(unsigned long)sinfo, num_bytes, 0);
4142 sinfo->reservation_progress++; 4148 sinfo->reservation_progress++;
4143 block_rsv->reserved = block_rsv->size; 4149 block_rsv->reserved = block_rsv->size;
4144 block_rsv->full = 1; 4150 block_rsv->full = 1;
@@ -4191,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4191 if (!trans->bytes_reserved) 4197 if (!trans->bytes_reserved)
4192 return; 4198 return;
4193 4199
4194 trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans, 4200 trace_btrfs_space_reservation(root->fs_info, "transaction",
4201 (u64)(unsigned long)trans,
4195 trans->bytes_reserved, 0); 4202 trans->bytes_reserved, 0);
4196 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 4203 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
4197 trans->bytes_reserved = 0; 4204 trans->bytes_reserved = 0;
@@ -4709,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4709 space_info->bytes_reserved += num_bytes; 4716 space_info->bytes_reserved += num_bytes;
4710 if (reserve == RESERVE_ALLOC) { 4717 if (reserve == RESERVE_ALLOC) {
4711 trace_btrfs_space_reservation(cache->fs_info, 4718 trace_btrfs_space_reservation(cache->fs_info,
4712 "space_info", 4719 "space_info",
4713 (u64)space_info, 4720 (u64)(unsigned long)space_info,
4714 num_bytes, 0); 4721 num_bytes, 0);
4715 space_info->bytes_may_use -= num_bytes; 4722 space_info->bytes_may_use -= num_bytes;
4716 } 4723 }
4717 } 4724 }
@@ -5794,6 +5801,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
5794 u64 search_end, struct btrfs_key *ins, 5801 u64 search_end, struct btrfs_key *ins,
5795 u64 data) 5802 u64 data)
5796{ 5803{
5804 bool final_tried = false;
5797 int ret; 5805 int ret;
5798 u64 search_start = 0; 5806 u64 search_start = 0;
5799 5807
@@ -5813,22 +5821,25 @@ again:
5813 search_start, search_end, hint_byte, 5821 search_start, search_end, hint_byte,
5814 ins, data); 5822 ins, data);
5815 5823
5816 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 5824 if (ret == -ENOSPC) {
5817 num_bytes = num_bytes >> 1; 5825 if (!final_tried) {
5818 num_bytes = num_bytes & ~(root->sectorsize - 1); 5826 num_bytes = num_bytes >> 1;
5819 num_bytes = max(num_bytes, min_alloc_size); 5827 num_bytes = num_bytes & ~(root->sectorsize - 1);
5820 do_chunk_alloc(trans, root->fs_info->extent_root, 5828 num_bytes = max(num_bytes, min_alloc_size);
5821 num_bytes, data, CHUNK_ALLOC_FORCE); 5829 do_chunk_alloc(trans, root->fs_info->extent_root,
5822 goto again; 5830 num_bytes, data, CHUNK_ALLOC_FORCE);
5823 } 5831 if (num_bytes == min_alloc_size)
5824 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5832 final_tried = true;
5825 struct btrfs_space_info *sinfo; 5833 goto again;
5826 5834 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
5827 sinfo = __find_space_info(root->fs_info, data); 5835 struct btrfs_space_info *sinfo;
5828 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5836
5829 "wanted %llu\n", (unsigned long long)data, 5837 sinfo = __find_space_info(root->fs_info, data);
5830 (unsigned long long)num_bytes); 5838 printk(KERN_ERR "btrfs allocation failed flags %llu, "
5831 dump_space_info(sinfo, num_bytes, 1); 5839 "wanted %llu\n", (unsigned long long)data,
5840 (unsigned long long)num_bytes);
5841 dump_space_info(sinfo, num_bytes, 1);
5842 }
5832 } 5843 }
5833 5844
5834 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); 5845 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
@@ -7881,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
7881 u64 start; 7892 u64 start;
7882 u64 end; 7893 u64 end;
7883 u64 trimmed = 0; 7894 u64 trimmed = 0;
7895 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7884 int ret = 0; 7896 int ret = 0;
7885 7897
7886 cache = btrfs_lookup_block_group(fs_info, range->start); 7898 /*
7899 * try to trim all FS space, our block group may start from non-zero.
7900 */
7901 if (range->len == total_bytes)
7902 cache = btrfs_lookup_first_block_group(fs_info, range->start);
7903 else
7904 cache = btrfs_lookup_block_group(fs_info, range->start);
7887 7905
7888 while (cache) { 7906 while (cache) {
7889 if (cache->key.objectid >= (range->start + range->len)) { 7907 if (cache->key.objectid >= (range->start + range->len)) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9d09a4f81875..a55fbe6252de 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -513,6 +513,15 @@ hit_next:
513 WARN_ON(state->end < start); 513 WARN_ON(state->end < start);
514 last_end = state->end; 514 last_end = state->end;
515 515
516 if (state->end < end && !need_resched())
517 next_node = rb_next(&state->rb_node);
518 else
519 next_node = NULL;
520
521 /* the state doesn't have the wanted bits, go ahead */
522 if (!(state->state & bits))
523 goto next;
524
516 /* 525 /*
517 * | ---- desired range ---- | 526 * | ---- desired range ---- |
518 * | state | or 527 * | state | or
@@ -565,20 +574,15 @@ hit_next:
565 goto out; 574 goto out;
566 } 575 }
567 576
568 if (state->end < end && prealloc && !need_resched())
569 next_node = rb_next(&state->rb_node);
570 else
571 next_node = NULL;
572
573 set |= clear_state_bit(tree, state, &bits, wake); 577 set |= clear_state_bit(tree, state, &bits, wake);
578next:
574 if (last_end == (u64)-1) 579 if (last_end == (u64)-1)
575 goto out; 580 goto out;
576 start = last_end + 1; 581 start = last_end + 1;
577 if (start <= end && next_node) { 582 if (start <= end && next_node) {
578 state = rb_entry(next_node, struct extent_state, 583 state = rb_entry(next_node, struct extent_state,
579 rb_node); 584 rb_node);
580 if (state->start == start) 585 goto hit_next;
581 goto hit_next;
582 } 586 }
583 goto search_again; 587 goto search_again;
584 588
@@ -961,8 +965,6 @@ hit_next:
961 965
962 set_state_bits(tree, state, &bits); 966 set_state_bits(tree, state, &bits);
963 clear_state_bit(tree, state, &clear_bits, 0); 967 clear_state_bit(tree, state, &clear_bits, 0);
964
965 merge_state(tree, state);
966 if (last_end == (u64)-1) 968 if (last_end == (u64)-1)
967 goto out; 969 goto out;
968 970
@@ -1007,7 +1009,6 @@ hit_next:
1007 if (state->end <= end) { 1009 if (state->end <= end) {
1008 set_state_bits(tree, state, &bits); 1010 set_state_bits(tree, state, &bits);
1009 clear_state_bit(tree, state, &clear_bits, 0); 1011 clear_state_bit(tree, state, &clear_bits, 0);
1010 merge_state(tree, state);
1011 if (last_end == (u64)-1) 1012 if (last_end == (u64)-1)
1012 goto out; 1013 goto out;
1013 start = last_end + 1; 1014 start = last_end + 1;
@@ -1068,8 +1069,6 @@ hit_next:
1068 1069
1069 set_state_bits(tree, prealloc, &bits); 1070 set_state_bits(tree, prealloc, &bits);
1070 clear_state_bit(tree, prealloc, &clear_bits, 0); 1071 clear_state_bit(tree, prealloc, &clear_bits, 0);
1071
1072 merge_state(tree, prealloc);
1073 prealloc = NULL; 1072 prealloc = NULL;
1074 goto out; 1073 goto out;
1075 } 1074 }
@@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2154 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, 2153 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
2155 failrec->this_mirror, num_copies, failrec->in_validation); 2154 failrec->this_mirror, num_copies, failrec->in_validation);
2156 2155
2157 tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, 2156 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2158 failrec->bio_flags, 0); 2157 failrec->this_mirror,
2159 return 0; 2158 failrec->bio_flags, 0);
2159 return ret;
2160} 2160}
2161 2161
2162/* lots and lots of room for performance fixes in the end_bio funcs */ 2162/* lots and lots of room for performance fixes in the end_bio funcs */
2163 2163
2164int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2165{
2166 int uptodate = (err == 0);
2167 struct extent_io_tree *tree;
2168 int ret;
2169
2170 tree = &BTRFS_I(page->mapping->host)->io_tree;
2171
2172 if (tree->ops && tree->ops->writepage_end_io_hook) {
2173 ret = tree->ops->writepage_end_io_hook(page, start,
2174 end, NULL, uptodate);
2175 if (ret)
2176 uptodate = 0;
2177 }
2178
2179 if (!uptodate && tree->ops &&
2180 tree->ops->writepage_io_failed_hook) {
2181 ret = tree->ops->writepage_io_failed_hook(NULL, page,
2182 start, end, NULL);
2183 /* Writeback already completed */
2184 if (ret == 0)
2185 return 1;
2186 }
2187
2188 if (!uptodate) {
2189 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
2190 ClearPageUptodate(page);
2191 SetPageError(page);
2192 }
2193 return 0;
2194}
2195
2164/* 2196/*
2165 * after a writepage IO is done, we need to: 2197 * after a writepage IO is done, we need to:
2166 * clear the uptodate bits on error 2198 * clear the uptodate bits on error
@@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2172 */ 2204 */
2173static void end_bio_extent_writepage(struct bio *bio, int err) 2205static void end_bio_extent_writepage(struct bio *bio, int err)
2174{ 2206{
2175 int uptodate = err == 0;
2176 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2207 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2177 struct extent_io_tree *tree; 2208 struct extent_io_tree *tree;
2178 u64 start; 2209 u64 start;
2179 u64 end; 2210 u64 end;
2180 int whole_page; 2211 int whole_page;
2181 int ret;
2182 2212
2183 do { 2213 do {
2184 struct page *page = bvec->bv_page; 2214 struct page *page = bvec->bv_page;
@@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2195 2225
2196 if (--bvec >= bio->bi_io_vec) 2226 if (--bvec >= bio->bi_io_vec)
2197 prefetchw(&bvec->bv_page->flags); 2227 prefetchw(&bvec->bv_page->flags);
2198 if (tree->ops && tree->ops->writepage_end_io_hook) {
2199 ret = tree->ops->writepage_end_io_hook(page, start,
2200 end, NULL, uptodate);
2201 if (ret)
2202 uptodate = 0;
2203 }
2204
2205 if (!uptodate && tree->ops &&
2206 tree->ops->writepage_io_failed_hook) {
2207 ret = tree->ops->writepage_io_failed_hook(bio, page,
2208 start, end, NULL);
2209 if (ret == 0) {
2210 uptodate = (err == 0);
2211 continue;
2212 }
2213 }
2214 2228
2215 if (!uptodate) { 2229 if (end_extent_writepage(page, err, start, end))
2216 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); 2230 continue;
2217 ClearPageUptodate(page);
2218 SetPageError(page);
2219 }
2220 2231
2221 if (whole_page) 2232 if (whole_page)
2222 end_page_writeback(page); 2233 end_page_writeback(page);
@@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2779 delalloc_start = delalloc_end + 1; 2790 delalloc_start = delalloc_end + 1;
2780 continue; 2791 continue;
2781 } 2792 }
2782 tree->ops->fill_delalloc(inode, page, delalloc_start, 2793 ret = tree->ops->fill_delalloc(inode, page,
2783 delalloc_end, &page_started, 2794 delalloc_start,
2784 &nr_written); 2795 delalloc_end,
2796 &page_started,
2797 &nr_written);
2798 BUG_ON(ret);
2785 /* 2799 /*
2786 * delalloc_end is already one less than the total 2800 * delalloc_end is already one less than the total
2787 * length, so we don't subtract one from 2801 * length, so we don't subtract one from
@@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2818 if (tree->ops && tree->ops->writepage_start_hook) { 2832 if (tree->ops && tree->ops->writepage_start_hook) {
2819 ret = tree->ops->writepage_start_hook(page, start, 2833 ret = tree->ops->writepage_start_hook(page, start,
2820 page_end); 2834 page_end);
2821 if (ret == -EAGAIN) { 2835 if (ret) {
2822 redirty_page_for_writepage(wbc, page); 2836 /* Fixup worker will requeue */
2837 if (ret == -EBUSY)
2838 wbc->pages_skipped++;
2839 else
2840 redirty_page_for_writepage(wbc, page);
2823 update_nr_written(page, wbc, nr_written); 2841 update_nr_written(page, wbc, nr_written);
2824 unlock_page(page); 2842 unlock_page(page);
2825 ret = 0; 2843 ret = 0;
@@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
3289 len = end - start + 1; 3307 len = end - start + 1;
3290 write_lock(&map->lock); 3308 write_lock(&map->lock);
3291 em = lookup_extent_mapping(map, start, len); 3309 em = lookup_extent_mapping(map, start, len);
3292 if (IS_ERR_OR_NULL(em)) { 3310 if (!em) {
3293 write_unlock(&map->lock); 3311 write_unlock(&map->lock);
3294 break; 3312 break;
3295 } 3313 }
@@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3853 num_pages = num_extent_pages(eb->start, eb->len); 3871 num_pages = num_extent_pages(eb->start, eb->len);
3854 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3872 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3855 3873
3856 if (eb_straddles_pages(eb)) { 3874 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3857 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3875 cached_state, GFP_NOFS);
3858 cached_state, GFP_NOFS); 3876
3859 }
3860 for (i = 0; i < num_pages; i++) { 3877 for (i = 0; i < num_pages; i++) {
3861 page = extent_buffer_page(eb, i); 3878 page = extent_buffer_page(eb, i);
3862 if (page) 3879 if (page)
@@ -3909,6 +3926,8 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3909 while (start <= end) { 3926 while (start <= end) {
3910 index = start >> PAGE_CACHE_SHIFT; 3927 index = start >> PAGE_CACHE_SHIFT;
3911 page = find_get_page(tree->mapping, index); 3928 page = find_get_page(tree->mapping, index);
3929 if (!page)
3930 return 1;
3912 uptodate = PageUptodate(page); 3931 uptodate = PageUptodate(page);
3913 page_cache_release(page); 3932 page_cache_release(page);
3914 if (!uptodate) { 3933 if (!uptodate) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc6a042cb6fc..cecc3518c121 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -319,4 +319,5 @@ struct btrfs_mapping_tree;
319int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, 319int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
320 u64 length, u64 logical, struct page *page, 320 u64 length, u64 logical, struct page *page,
321 int mirror_num); 321 int mirror_num);
322int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
322#endif 323#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 33a7890b1f40..1195f09761fe 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,8 +26,8 @@ struct extent_map {
26 unsigned long flags; 26 unsigned long flags;
27 struct block_device *bdev; 27 struct block_device *bdev;
28 atomic_t refs; 28 atomic_t refs;
29 unsigned int in_tree:1; 29 unsigned int in_tree;
30 unsigned int compress_type:4; 30 unsigned int compress_type;
31}; 31};
32 32
33struct extent_map_tree { 33struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 859ba2dd8890..e8d06b6b9194 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode,
1605 return -EOPNOTSUPP; 1605 return -EOPNOTSUPP;
1606 1606
1607 /* 1607 /*
1608 * Make sure we have enough space before we do the
1609 * allocation.
1610 */
1611 ret = btrfs_check_data_free_space(inode, len);
1612 if (ret)
1613 return ret;
1614
1615 /*
1608 * wait for ordered IO before we have any locks. We'll loop again 1616 * wait for ordered IO before we have any locks. We'll loop again
1609 * below with the locks held. 1617 * below with the locks held.
1610 */ 1618 */
@@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode,
1667 if (em->block_start == EXTENT_MAP_HOLE || 1675 if (em->block_start == EXTENT_MAP_HOLE ||
1668 (cur_offset >= inode->i_size && 1676 (cur_offset >= inode->i_size &&
1669 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 1677 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
1670
1671 /*
1672 * Make sure we have enough space before we do the
1673 * allocation.
1674 */
1675 ret = btrfs_check_data_free_space(inode, last_byte -
1676 cur_offset);
1677 if (ret) {
1678 free_extent_map(em);
1679 break;
1680 }
1681
1682 ret = btrfs_prealloc_file_range(inode, mode, cur_offset, 1678 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
1683 last_byte - cur_offset, 1679 last_byte - cur_offset,
1684 1 << inode->i_blkbits, 1680 1 << inode->i_blkbits,
1685 offset + len, 1681 offset + len,
1686 &alloc_hint); 1682 &alloc_hint);
1687 1683
1688 /* Let go of our reservation. */
1689 btrfs_free_reserved_data_space(inode, last_byte -
1690 cur_offset);
1691 if (ret < 0) { 1684 if (ret < 0) {
1692 free_extent_map(em); 1685 free_extent_map(em);
1693 break; 1686 break;
@@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode,
1715 &cached_state, GFP_NOFS); 1708 &cached_state, GFP_NOFS);
1716out: 1709out:
1717 mutex_unlock(&inode->i_mutex); 1710 mutex_unlock(&inode->i_mutex);
1711 /* Let go of our reservation. */
1712 btrfs_free_reserved_data_space(inode, len);
1718 return ret; 1713 return ret;
1719} 1714}
1720 1715
@@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1761 start - root->sectorsize, 1756 start - root->sectorsize,
1762 root->sectorsize, 0); 1757 root->sectorsize, 0);
1763 if (IS_ERR(em)) { 1758 if (IS_ERR(em)) {
1764 ret = -ENXIO; 1759 ret = PTR_ERR(em);
1765 goto out; 1760 goto out;
1766 } 1761 }
1767 last_end = em->start + em->len; 1762 last_end = em->start + em->len;
@@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1773 while (1) { 1768 while (1) {
1774 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); 1769 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
1775 if (IS_ERR(em)) { 1770 if (IS_ERR(em)) {
1776 ret = -ENXIO; 1771 ret = PTR_ERR(em);
1777 break; 1772 break;
1778 } 1773 }
1779 1774
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d20ff87ca603..710ea380c7ed 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
777 spin_lock(&block_group->lock); 777 spin_lock(&block_group->lock);
778 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 778 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
779 spin_unlock(&block_group->lock); 779 spin_unlock(&block_group->lock);
780 btrfs_free_path(path);
780 goto out; 781 goto out;
781 } 782 }
782 spin_unlock(&block_group->lock); 783 spin_unlock(&block_group->lock);
@@ -2242,7 +2243,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2242 if (entry->bitmap) { 2243 if (entry->bitmap) {
2243 ret = btrfs_alloc_from_bitmap(block_group, 2244 ret = btrfs_alloc_from_bitmap(block_group,
2244 cluster, entry, bytes, 2245 cluster, entry, bytes,
2245 min_start); 2246 cluster->window_start);
2246 if (ret == 0) { 2247 if (ret == 0) {
2247 node = rb_next(&entry->offset_index); 2248 node = rb_next(&entry->offset_index);
2248 if (!node) 2249 if (!node)
@@ -2251,6 +2252,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2251 offset_index); 2252 offset_index);
2252 continue; 2253 continue;
2253 } 2254 }
2255 cluster->window_start += bytes;
2254 } else { 2256 } else {
2255 ret = entry->offset; 2257 ret = entry->offset;
2256 2258
@@ -2475,7 +2477,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2475 } 2477 }
2476 2478
2477 list_for_each_entry(entry, bitmaps, list) { 2479 list_for_each_entry(entry, bitmaps, list) {
2478 if (entry->bytes < min_bytes) 2480 if (entry->bytes < bytes)
2479 continue; 2481 continue;
2480 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2482 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2481 bytes, cont1_bytes, min_bytes); 2483 bytes, cont1_bytes, min_bytes);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 213ffa86ce1b..ee15d88b33d2 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
438 trans->bytes_reserved); 438 trans->bytes_reserved);
439 if (ret) 439 if (ret)
440 goto out; 440 goto out;
441 trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, 441 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
442 (u64)(unsigned long)trans,
442 trans->bytes_reserved, 1); 443 trans->bytes_reserved, 1);
443again: 444again:
444 inode = lookup_free_ino_inode(root, path); 445 inode = lookup_free_ino_inode(root, path);
@@ -500,7 +501,8 @@ again:
500out_put: 501out_put:
501 iput(inode); 502 iput(inode);
502out_release: 503out_release:
503 trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, 504 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
505 (u64)(unsigned long)trans,
504 trans->bytes_reserved, 0); 506 trans->bytes_reserved, 0);
505 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 507 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
506out: 508out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0da19a0ea00d..892b34785ccc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1555 struct inode *inode; 1555 struct inode *inode;
1556 u64 page_start; 1556 u64 page_start;
1557 u64 page_end; 1557 u64 page_end;
1558 int ret;
1558 1559
1559 fixup = container_of(work, struct btrfs_writepage_fixup, work); 1560 fixup = container_of(work, struct btrfs_writepage_fixup, work);
1560 page = fixup->page; 1561 page = fixup->page;
@@ -1582,12 +1583,21 @@ again:
1582 page_end, &cached_state, GFP_NOFS); 1583 page_end, &cached_state, GFP_NOFS);
1583 unlock_page(page); 1584 unlock_page(page);
1584 btrfs_start_ordered_extent(inode, ordered, 1); 1585 btrfs_start_ordered_extent(inode, ordered, 1);
1586 btrfs_put_ordered_extent(ordered);
1585 goto again; 1587 goto again;
1586 } 1588 }
1587 1589
1588 BUG(); 1590 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
1591 if (ret) {
1592 mapping_set_error(page->mapping, ret);
1593 end_extent_writepage(page, ret, page_start, page_end);
1594 ClearPageChecked(page);
1595 goto out;
1596 }
1597
1589 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); 1598 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1590 ClearPageChecked(page); 1599 ClearPageChecked(page);
1600 set_page_dirty(page);
1591out: 1601out:
1592 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, 1602 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1593 &cached_state, GFP_NOFS); 1603 &cached_state, GFP_NOFS);
@@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1630 fixup->work.func = btrfs_writepage_fixup_worker; 1640 fixup->work.func = btrfs_writepage_fixup_worker;
1631 fixup->page = page; 1641 fixup->page = page;
1632 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); 1642 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
1633 return -EAGAIN; 1643 return -EBUSY;
1634} 1644}
1635 1645
1636static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, 1646static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4575 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4585 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4576 parent_inode, &key, 4586 parent_inode, &key,
4577 btrfs_inode_type(inode), index); 4587 btrfs_inode_type(inode), index);
4578 BUG_ON(ret); 4588 if (ret)
4589 goto fail_dir_item;
4579 4590
4580 btrfs_i_size_write(parent_inode, parent_inode->i_size + 4591 btrfs_i_size_write(parent_inode, parent_inode->i_size +
4581 name_len * 2); 4592 name_len * 2);
@@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4583 ret = btrfs_update_inode(trans, root, parent_inode); 4594 ret = btrfs_update_inode(trans, root, parent_inode);
4584 } 4595 }
4585 return ret; 4596 return ret;
4597
4598fail_dir_item:
4599 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4600 u64 local_index;
4601 int err;
4602 err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
4603 key.objectid, root->root_key.objectid,
4604 parent_ino, &local_index, name, name_len);
4605
4606 } else if (add_backref) {
4607 u64 local_index;
4608 int err;
4609
4610 err = btrfs_del_inode_ref(trans, root, name, name_len,
4611 ino, parent_ino, &local_index);
4612 }
4613 return ret;
4586} 4614}
4587 4615
4588static int btrfs_add_nondir(struct btrfs_trans_handle *trans, 4616static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
@@ -6401,18 +6429,23 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6401 unsigned long zero_start; 6429 unsigned long zero_start;
6402 loff_t size; 6430 loff_t size;
6403 int ret; 6431 int ret;
6432 int reserved = 0;
6404 u64 page_start; 6433 u64 page_start;
6405 u64 page_end; 6434 u64 page_end;
6406 6435
6407 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6436 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6408 if (!ret) 6437 if (!ret) {
6409 ret = btrfs_update_time(vma->vm_file); 6438 ret = btrfs_update_time(vma->vm_file);
6439 reserved = 1;
6440 }
6410 if (ret) { 6441 if (ret) {
6411 if (ret == -ENOMEM) 6442 if (ret == -ENOMEM)
6412 ret = VM_FAULT_OOM; 6443 ret = VM_FAULT_OOM;
6413 else /* -ENOSPC, -EIO, etc */ 6444 else /* -ENOSPC, -EIO, etc */
6414 ret = VM_FAULT_SIGBUS; 6445 ret = VM_FAULT_SIGBUS;
6415 goto out; 6446 if (reserved)
6447 goto out;
6448 goto out_noreserve;
6416 } 6449 }
6417 6450
6418 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 6451 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
@@ -6495,6 +6528,7 @@ out_unlock:
6495 unlock_page(page); 6528 unlock_page(page);
6496out: 6529out:
6497 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 6530 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
6531out_noreserve:
6498 return ret; 6532 return ret;
6499} 6533}
6500 6534
@@ -6690,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6690 int err; 6724 int err;
6691 u64 index = 0; 6725 u64 index = 0;
6692 6726
6693 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 6727 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
6694 new_dirid, S_IFDIR | 0700, &index); 6728 new_dirid, new_dirid,
6729 S_IFDIR | (~current_umask() & S_IRWXUGO),
6730 &index);
6695 if (IS_ERR(inode)) 6731 if (IS_ERR(inode))
6696 return PTR_ERR(inode); 6732 return PTR_ERR(inode);
6697 inode->i_op = &btrfs_dir_inode_operations; 6733 inode->i_op = &btrfs_dir_inode_operations;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ab620014bcc3..d8b54715c2de 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
861 int i_done; 861 int i_done;
862 struct btrfs_ordered_extent *ordered; 862 struct btrfs_ordered_extent *ordered;
863 struct extent_state *cached_state = NULL; 863 struct extent_state *cached_state = NULL;
864 struct extent_io_tree *tree;
864 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 865 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
865 866
866 if (isize == 0) 867 if (isize == 0)
@@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
871 num_pages << PAGE_CACHE_SHIFT); 872 num_pages << PAGE_CACHE_SHIFT);
872 if (ret) 873 if (ret)
873 return ret; 874 return ret;
874again:
875 ret = 0;
876 i_done = 0; 875 i_done = 0;
876 tree = &BTRFS_I(inode)->io_tree;
877 877
878 /* step one, lock all the pages */ 878 /* step one, lock all the pages */
879 for (i = 0; i < num_pages; i++) { 879 for (i = 0; i < num_pages; i++) {
880 struct page *page; 880 struct page *page;
881again:
881 page = find_or_create_page(inode->i_mapping, 882 page = find_or_create_page(inode->i_mapping,
882 start_index + i, mask); 883 start_index + i, mask);
883 if (!page) 884 if (!page)
884 break; 885 break;
885 886
887 page_start = page_offset(page);
888 page_end = page_start + PAGE_CACHE_SIZE - 1;
889 while (1) {
890 lock_extent(tree, page_start, page_end, GFP_NOFS);
891 ordered = btrfs_lookup_ordered_extent(inode,
892 page_start);
893 unlock_extent(tree, page_start, page_end, GFP_NOFS);
894 if (!ordered)
895 break;
896
897 unlock_page(page);
898 btrfs_start_ordered_extent(inode, ordered, 1);
899 btrfs_put_ordered_extent(ordered);
900 lock_page(page);
901 }
902
886 if (!PageUptodate(page)) { 903 if (!PageUptodate(page)) {
887 btrfs_readpage(NULL, page); 904 btrfs_readpage(NULL, page);
888 lock_page(page); 905 lock_page(page);
@@ -893,15 +910,22 @@ again:
893 break; 910 break;
894 } 911 }
895 } 912 }
913
896 isize = i_size_read(inode); 914 isize = i_size_read(inode);
897 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 915 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
898 if (!isize || page->index > file_end || 916 if (!isize || page->index > file_end) {
899 page->mapping != inode->i_mapping) {
900 /* whoops, we blew past eof, skip this page */ 917 /* whoops, we blew past eof, skip this page */
901 unlock_page(page); 918 unlock_page(page);
902 page_cache_release(page); 919 page_cache_release(page);
903 break; 920 break;
904 } 921 }
922
923 if (page->mapping != inode->i_mapping) {
924 unlock_page(page);
925 page_cache_release(page);
926 goto again;
927 }
928
905 pages[i] = page; 929 pages[i] = page;
906 i_done++; 930 i_done++;
907 } 931 }
@@ -924,25 +948,6 @@ again:
924 lock_extent_bits(&BTRFS_I(inode)->io_tree, 948 lock_extent_bits(&BTRFS_I(inode)->io_tree,
925 page_start, page_end - 1, 0, &cached_state, 949 page_start, page_end - 1, 0, &cached_state,
926 GFP_NOFS); 950 GFP_NOFS);
927 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
928 if (ordered &&
929 ordered->file_offset + ordered->len > page_start &&
930 ordered->file_offset < page_end) {
931 btrfs_put_ordered_extent(ordered);
932 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
933 page_start, page_end - 1,
934 &cached_state, GFP_NOFS);
935 for (i = 0; i < i_done; i++) {
936 unlock_page(pages[i]);
937 page_cache_release(pages[i]);
938 }
939 btrfs_wait_ordered_range(inode, page_start,
940 page_end - page_start);
941 goto again;
942 }
943 if (ordered)
944 btrfs_put_ordered_extent(ordered);
945
946 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 951 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
947 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 952 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
948 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, 953 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
@@ -1065,7 +1070,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1065 i = range->start >> PAGE_CACHE_SHIFT; 1070 i = range->start >> PAGE_CACHE_SHIFT;
1066 } 1071 }
1067 if (!max_to_defrag) 1072 if (!max_to_defrag)
1068 max_to_defrag = last_index; 1073 max_to_defrag = last_index + 1;
1069 1074
1070 /* 1075 /*
1071 * make writeback starts from i, so the defrag range can be 1076 * make writeback starts from i, so the defrag range can be
@@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1327 goto out; 1332 goto out;
1328 } 1333 }
1329 1334
1335 if (name[0] == '.' &&
1336 (namelen == 1 || (name[1] == '.' && namelen == 2))) {
1337 ret = -EEXIST;
1338 goto out;
1339 }
1340
1330 if (subvol) { 1341 if (subvol) {
1331 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1342 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1332 NULL, transid, readonly); 1343 NULL, transid, readonly);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39a132b..22db04550f6a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -305,7 +305,7 @@ again:
305 305
306 spin_lock(&fs_info->reada_lock); 306 spin_lock(&fs_info->reada_lock);
307 ret = radix_tree_insert(&dev->reada_zones, 307 ret = radix_tree_insert(&dev->reada_zones,
308 (unsigned long)zone->end >> PAGE_CACHE_SHIFT, 308 (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
309 zone); 309 zone);
310 spin_unlock(&fs_info->reada_lock); 310 spin_unlock(&fs_info->reada_lock);
311 311
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9770cc5bfb76..abc0fbffa510 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1367,7 +1367,8 @@ out:
1367} 1367}
1368 1368
1369static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, 1369static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
1370 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) 1370 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
1371 u64 dev_offset)
1371{ 1372{
1372 struct btrfs_mapping_tree *map_tree = 1373 struct btrfs_mapping_tree *map_tree =
1373 &sdev->dev->dev_root->fs_info->mapping_tree; 1374 &sdev->dev->dev_root->fs_info->mapping_tree;
@@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
1391 goto out; 1392 goto out;
1392 1393
1393 for (i = 0; i < map->num_stripes; ++i) { 1394 for (i = 0; i < map->num_stripes; ++i) {
1394 if (map->stripes[i].dev == sdev->dev) { 1395 if (map->stripes[i].dev == sdev->dev &&
1396 map->stripes[i].physical == dev_offset) {
1395 ret = scrub_stripe(sdev, map, i, chunk_offset, length); 1397 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1396 if (ret) 1398 if (ret)
1397 goto out; 1399 goto out;
@@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1487 break; 1489 break;
1488 } 1490 }
1489 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1491 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1490 chunk_offset, length); 1492 chunk_offset, length, found_key.offset);
1491 btrfs_put_block_group(cache); 1493 btrfs_put_block_group(cache);
1492 if (ret) 1494 if (ret)
1493 break; 1495 break;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 287a6728b1ad..04b77e3ceb7a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -327,7 +327,8 @@ again:
327 327
328 if (num_bytes) { 328 if (num_bytes) {
329 trace_btrfs_space_reservation(root->fs_info, "transaction", 329 trace_btrfs_space_reservation(root->fs_info, "transaction",
330 (u64)h, num_bytes, 1); 330 (u64)(unsigned long)h,
331 num_bytes, 1);
331 h->block_rsv = &root->fs_info->trans_block_rsv; 332 h->block_rsv = &root->fs_info->trans_block_rsv;
332 h->bytes_reserved = num_bytes; 333 h->bytes_reserved = num_bytes;
333 } 334 }
@@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
915 dentry->d_name.name, dentry->d_name.len, 916 dentry->d_name.name, dentry->d_name.len,
916 parent_inode, &key, 917 parent_inode, &key,
917 BTRFS_FT_DIR, index); 918 BTRFS_FT_DIR, index);
918 BUG_ON(ret); 919 if (ret) {
920 pending->error = -EEXIST;
921 dput(parent);
922 goto fail;
923 }
919 924
920 btrfs_i_size_write(parent_inode, parent_inode->i_size + 925 btrfs_i_size_write(parent_inode, parent_inode->i_size +
921 dentry->d_name.len * 2); 926 dentry->d_name.len * 2);
@@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
993{ 998{
994 struct btrfs_pending_snapshot *pending; 999 struct btrfs_pending_snapshot *pending;
995 struct list_head *head = &trans->transaction->pending_snapshots; 1000 struct list_head *head = &trans->transaction->pending_snapshots;
996 int ret;
997 1001
998 list_for_each_entry(pending, head, list) { 1002 list_for_each_entry(pending, head, list)
999 ret = create_pending_snapshot(trans, fs_info, pending); 1003 create_pending_snapshot(trans, fs_info, pending);
1000 BUG_ON(ret);
1001 }
1002 return 0; 1004 return 0;
1003} 1005}
1004 1006
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index cb877e0886a7..966cc74f5d6c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1957,7 +1957,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
1957 1957
1958 finish_wait(&root->log_commit_wait[index], &wait); 1958 finish_wait(&root->log_commit_wait[index], &wait);
1959 mutex_lock(&root->log_mutex); 1959 mutex_lock(&root->log_mutex);
1960 } while (root->log_transid < transid + 2 && 1960 } while (root->fs_info->last_trans_log_full_commit !=
1961 trans->transid && root->log_transid < transid + 2 &&
1961 atomic_read(&root->log_commit[index])); 1962 atomic_read(&root->log_commit[index]));
1962 return 0; 1963 return 0;
1963} 1964}
@@ -1966,7 +1967,8 @@ static int wait_for_writer(struct btrfs_trans_handle *trans,
1966 struct btrfs_root *root) 1967 struct btrfs_root *root)
1967{ 1968{
1968 DEFINE_WAIT(wait); 1969 DEFINE_WAIT(wait);
1969 while (atomic_read(&root->log_writers)) { 1970 while (root->fs_info->last_trans_log_full_commit !=
1971 trans->transid && atomic_read(&root->log_writers)) {
1970 prepare_to_wait(&root->log_writer_wait, 1972 prepare_to_wait(&root->log_writer_wait,
1971 &wait, TASK_UNINTERRUPTIBLE); 1973 &wait, TASK_UNINTERRUPTIBLE);
1972 mutex_unlock(&root->log_mutex); 1974 mutex_unlock(&root->log_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0b4e2af7954d..ef41f285a475 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
459{ 459{
460 struct btrfs_device *device, *next; 460 struct btrfs_device *device, *next;
461 461
462 struct block_device *latest_bdev = NULL;
463 u64 latest_devid = 0;
464 u64 latest_transid = 0;
465
462 mutex_lock(&uuid_mutex); 466 mutex_lock(&uuid_mutex);
463again: 467again:
464 /* This is the initialized path, it is safe to release the devices. */ 468 /* This is the initialized path, it is safe to release the devices. */
465 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 469 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
466 if (device->in_fs_metadata) 470 if (device->in_fs_metadata) {
471 if (!latest_transid ||
472 device->generation > latest_transid) {
473 latest_devid = device->devid;
474 latest_transid = device->generation;
475 latest_bdev = device->bdev;
476 }
467 continue; 477 continue;
478 }
468 479
469 if (device->bdev) { 480 if (device->bdev) {
470 blkdev_put(device->bdev, device->mode); 481 blkdev_put(device->bdev, device->mode);
@@ -487,6 +498,10 @@ again:
487 goto again; 498 goto again;
488 } 499 }
489 500
501 fs_devices->latest_bdev = latest_bdev;
502 fs_devices->latest_devid = latest_devid;
503 fs_devices->latest_trans = latest_transid;
504
490 mutex_unlock(&uuid_mutex); 505 mutex_unlock(&uuid_mutex);
491 return 0; 506 return 0;
492} 507}
@@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1953 em = lookup_extent_mapping(em_tree, chunk_offset, 1); 1968 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1954 read_unlock(&em_tree->lock); 1969 read_unlock(&em_tree->lock);
1955 1970
1956 BUG_ON(em->start > chunk_offset || 1971 BUG_ON(!em || em->start > chunk_offset ||
1957 em->start + em->len < chunk_offset); 1972 em->start + em->len < chunk_offset);
1958 map = (struct map_lookup *)em->bdev; 1973 map = (struct map_lookup *)em->bdev;
1959 1974
@@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4356 return -ENOMEM; 4371 return -ENOMEM;
4357 btrfs_set_buffer_uptodate(sb); 4372 btrfs_set_buffer_uptodate(sb);
4358 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); 4373 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
4374 /*
4375 * The sb extent buffer is artifical and just used to read the system array.
4376 * btrfs_set_buffer_uptodate() call does not properly mark all it's
4377 * pages up-to-date when the page is larger: extent does not cover the
4378 * whole page and consequently check_page_uptodate does not find all
4379 * the page's extents up-to-date (the hole beyond sb),
4380 * write_extent_buffer then triggers a WARN_ON.
4381 *
4382 * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
4383 * but sb spans only this function. Add an explicit SetPageUptodate call
4384 * to silence the warning eg. on PowerPC 64.
4385 */
4386 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
4387 SetPageUptodate(sb->first_page);
4359 4388
4360 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 4389 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
4361 array_size = btrfs_super_sys_array_size(super_copy); 4390 array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b60fc8bfb3e9..620daad201db 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap)
641 unsigned long ttl; 641 unsigned long ttl;
642 u32 gen; 642 u32 gen;
643 643
644 spin_lock(&cap->session->s_cap_lock); 644 spin_lock(&cap->session->s_gen_ttl_lock);
645 gen = cap->session->s_cap_gen; 645 gen = cap->session->s_cap_gen;
646 ttl = cap->session->s_cap_ttl; 646 ttl = cap->session->s_cap_ttl;
647 spin_unlock(&cap->session->s_cap_lock); 647 spin_unlock(&cap->session->s_gen_ttl_lock);
648 648
649 if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { 649 if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
650 dout("__cap_is_valid %p cap %p issued %s " 650 dout("__cap_is_valid %p cap %p issued %s "
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 618246bc2196..3e8094be4604 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -975,10 +975,10 @@ static int dentry_lease_is_valid(struct dentry *dentry)
975 di = ceph_dentry(dentry); 975 di = ceph_dentry(dentry);
976 if (di->lease_session) { 976 if (di->lease_session) {
977 s = di->lease_session; 977 s = di->lease_session;
978 spin_lock(&s->s_cap_lock); 978 spin_lock(&s->s_gen_ttl_lock);
979 gen = s->s_cap_gen; 979 gen = s->s_cap_gen;
980 ttl = s->s_cap_ttl; 980 ttl = s->s_cap_ttl;
981 spin_unlock(&s->s_cap_lock); 981 spin_unlock(&s->s_gen_ttl_lock);
982 982
983 if (di->lease_gen == gen && 983 if (di->lease_gen == gen &&
984 time_before(jiffies, dentry->d_time) && 984 time_before(jiffies, dentry->d_time) &&
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 23ab6a3f1825..866e8d7ca37d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg,
262 /* trace */ 262 /* trace */
263 ceph_decode_32_safe(&p, end, len, bad); 263 ceph_decode_32_safe(&p, end, len, bad);
264 if (len > 0) { 264 if (len > 0) {
265 ceph_decode_need(&p, end, len, bad);
265 err = parse_reply_info_trace(&p, p+len, info, features); 266 err = parse_reply_info_trace(&p, p+len, info, features);
266 if (err < 0) 267 if (err < 0)
267 goto out_bad; 268 goto out_bad;
@@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg,
270 /* extra */ 271 /* extra */
271 ceph_decode_32_safe(&p, end, len, bad); 272 ceph_decode_32_safe(&p, end, len, bad);
272 if (len > 0) { 273 if (len > 0) {
274 ceph_decode_need(&p, end, len, bad);
273 err = parse_reply_info_extra(&p, p+len, info, features); 275 err = parse_reply_info_extra(&p, p+len, info, features);
274 if (err < 0) 276 if (err < 0)
275 goto out_bad; 277 goto out_bad;
@@ -398,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
398 s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; 400 s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
399 s->s_con.peer_name.num = cpu_to_le64(mds); 401 s->s_con.peer_name.num = cpu_to_le64(mds);
400 402
401 spin_lock_init(&s->s_cap_lock); 403 spin_lock_init(&s->s_gen_ttl_lock);
402 s->s_cap_gen = 0; 404 s->s_cap_gen = 0;
403 s->s_cap_ttl = 0; 405 s->s_cap_ttl = 0;
406
407 spin_lock_init(&s->s_cap_lock);
404 s->s_renew_requested = 0; 408 s->s_renew_requested = 0;
405 s->s_renew_seq = 0; 409 s->s_renew_seq = 0;
406 INIT_LIST_HEAD(&s->s_caps); 410 INIT_LIST_HEAD(&s->s_caps);
@@ -2326,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session,
2326 case CEPH_SESSION_STALE: 2330 case CEPH_SESSION_STALE:
2327 pr_info("mds%d caps went stale, renewing\n", 2331 pr_info("mds%d caps went stale, renewing\n",
2328 session->s_mds); 2332 session->s_mds);
2329 spin_lock(&session->s_cap_lock); 2333 spin_lock(&session->s_gen_ttl_lock);
2330 session->s_cap_gen++; 2334 session->s_cap_gen++;
2331 session->s_cap_ttl = 0; 2335 session->s_cap_ttl = 0;
2332 spin_unlock(&session->s_cap_lock); 2336 spin_unlock(&session->s_gen_ttl_lock);
2333 send_renew_caps(mdsc, session); 2337 send_renew_caps(mdsc, session);
2334 break; 2338 break;
2335 2339
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index a50ca0e39475..8c7c04ebb595 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -117,10 +117,13 @@ struct ceph_mds_session {
117 void *s_authorizer_buf, *s_authorizer_reply_buf; 117 void *s_authorizer_buf, *s_authorizer_reply_buf;
118 size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; 118 size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
119 119
120 /* protected by s_cap_lock */ 120 /* protected by s_gen_ttl_lock */
121 spinlock_t s_cap_lock; 121 spinlock_t s_gen_ttl_lock;
122 u32 s_cap_gen; /* inc each time we get mds stale msg */ 122 u32 s_cap_gen; /* inc each time we get mds stale msg */
123 unsigned long s_cap_ttl; /* when session caps expire */ 123 unsigned long s_cap_ttl; /* when session caps expire */
124
125 /* protected by s_cap_lock */
126 spinlock_t s_cap_lock;
124 struct list_head s_caps; /* all caps issued by this session */ 127 struct list_head s_caps; /* all caps issued by this session */
125 int s_nr_caps, s_trim_caps; 128 int s_nr_caps, s_trim_caps;
126 int s_num_cap_releases; 129 int s_num_cap_releases;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 857214ae8c08..a76f697303d9 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
111} 111}
112 112
113static struct ceph_vxattr_cb ceph_file_vxattrs[] = { 113static struct ceph_vxattr_cb ceph_file_vxattrs[] = {
114 { true, "ceph.file.layout", ceph_vxattrcb_layout},
115 /* The following extended attribute name is deprecated */
114 { true, "ceph.layout", ceph_vxattrcb_layout}, 116 { true, "ceph.layout", ceph_vxattrcb_layout},
115 { NULL, NULL } 117 { true, NULL, NULL }
116}; 118};
117 119
118static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) 120static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0554b00a7b33..2b243af70aa3 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -139,7 +139,7 @@ config CIFS_DFS_UPCALL
139 points. If unsure, say N. 139 points. If unsure, say N.
140 140
141config CIFS_FSCACHE 141config CIFS_FSCACHE
142 bool "Provide CIFS client caching support (EXPERIMENTAL)" 142 bool "Provide CIFS client caching support"
143 depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y 143 depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
144 help 144 help
145 Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data 145 Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
@@ -147,7 +147,7 @@ config CIFS_FSCACHE
147 manager. If unsure, say N. 147 manager. If unsure, say N.
148 148
149config CIFS_ACL 149config CIFS_ACL
150 bool "Provide CIFS ACL support (EXPERIMENTAL)" 150 bool "Provide CIFS ACL support"
151 depends on CIFS_XATTR && KEYS 151 depends on CIFS_XATTR && KEYS
152 help 152 help
153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 986709a8d903..602f77c304c9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
773 cifs_dump_mem("Bad SMB: ", buf, 773 cifs_dump_mem("Bad SMB: ", buf,
774 min_t(unsigned int, server->total_read, 48)); 774 min_t(unsigned int, server->total_read, 48));
775 775
776 if (mid) 776 if (!mid)
777 handle_mid(mid, server, smb_buffer, length); 777 return length;
778 778
779 return length; 779 handle_mid(mid, server, smb_buffer, length);
780 return 0;
780} 781}
781 782
782static int 783static int
@@ -2125,7 +2126,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
2125 down_read(&key->sem); 2126 down_read(&key->sem);
2126 upayload = key->payload.data; 2127 upayload = key->payload.data;
2127 if (IS_ERR_OR_NULL(upayload)) { 2128 if (IS_ERR_OR_NULL(upayload)) {
2128 rc = PTR_ERR(key); 2129 rc = upayload ? PTR_ERR(upayload) : -EINVAL;
2129 goto out_key_put; 2130 goto out_key_put;
2130 } 2131 }
2131 2132
@@ -2142,14 +2143,14 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
2142 2143
2143 len = delim - payload; 2144 len = delim - payload;
2144 if (len > MAX_USERNAME_SIZE || len <= 0) { 2145 if (len > MAX_USERNAME_SIZE || len <= 0) {
2145 cFYI(1, "Bad value from username search (len=%ld)", len); 2146 cFYI(1, "Bad value from username search (len=%zd)", len);
2146 rc = -EINVAL; 2147 rc = -EINVAL;
2147 goto out_key_put; 2148 goto out_key_put;
2148 } 2149 }
2149 2150
2150 vol->username = kstrndup(payload, len, GFP_KERNEL); 2151 vol->username = kstrndup(payload, len, GFP_KERNEL);
2151 if (!vol->username) { 2152 if (!vol->username) {
2152 cFYI(1, "Unable to allocate %ld bytes for username", len); 2153 cFYI(1, "Unable to allocate %zd bytes for username", len);
2153 rc = -ENOMEM; 2154 rc = -ENOMEM;
2154 goto out_key_put; 2155 goto out_key_put;
2155 } 2156 }
@@ -2157,7 +2158,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
2157 2158
2158 len = key->datalen - (len + 1); 2159 len = key->datalen - (len + 1);
2159 if (len > MAX_PASSWORD_SIZE || len <= 0) { 2160 if (len > MAX_PASSWORD_SIZE || len <= 0) {
2160 cFYI(1, "Bad len for password search (len=%ld)", len); 2161 cFYI(1, "Bad len for password search (len=%zd)", len);
2161 rc = -EINVAL; 2162 rc = -EINVAL;
2162 kfree(vol->username); 2163 kfree(vol->username);
2163 vol->username = NULL; 2164 vol->username = NULL;
@@ -2167,7 +2168,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
2167 ++delim; 2168 ++delim;
2168 vol->password = kstrndup(delim, len, GFP_KERNEL); 2169 vol->password = kstrndup(delim, len, GFP_KERNEL);
2169 if (!vol->password) { 2170 if (!vol->password) {
2170 cFYI(1, "Unable to allocate %ld bytes for password", len); 2171 cFYI(1, "Unable to allocate %zd bytes for password", len);
2171 rc = -ENOMEM; 2172 rc = -ENOMEM;
2172 kfree(vol->username); 2173 kfree(vol->username);
2173 vol->username = NULL; 2174 vol->username = NULL;
@@ -3857,10 +3858,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3857 struct smb_vol *vol_info; 3858 struct smb_vol *vol_info;
3858 3859
3859 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); 3860 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
3860 if (vol_info == NULL) { 3861 if (vol_info == NULL)
3861 tcon = ERR_PTR(-ENOMEM); 3862 return ERR_PTR(-ENOMEM);
3862 goto out;
3863 }
3864 3863
3865 vol_info->local_nls = cifs_sb->local_nls; 3864 vol_info->local_nls = cifs_sb->local_nls;
3866 vol_info->linux_uid = fsuid; 3865 vol_info->linux_uid = fsuid;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index df8fecb5b993..bc7e24420ac0 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
492{ 492{
493 int xid; 493 int xid;
494 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 494 int rc = 0; /* to get around spurious gcc warning, set to zero here */
495 __u32 oplock = 0; 495 __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0;
496 __u16 fileHandle = 0; 496 __u16 fileHandle = 0;
497 bool posix_open = false; 497 bool posix_open = false;
498 struct cifs_sb_info *cifs_sb; 498 struct cifs_sb_info *cifs_sb;
@@ -584,10 +584,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
584 * If either that or op not supported returned, follow 584 * If either that or op not supported returned, follow
585 * the normal lookup. 585 * the normal lookup.
586 */ 586 */
587 if ((rc == 0) || (rc == -ENOENT)) 587 switch (rc) {
588 case 0:
589 /*
590 * The server may allow us to open things like
591 * FIFOs, but the client isn't set up to deal
592 * with that. If it's not a regular file, just
593 * close it and proceed as if it were a normal
594 * lookup.
595 */
596 if (newInode && !S_ISREG(newInode->i_mode)) {
597 CIFSSMBClose(xid, pTcon, fileHandle);
598 break;
599 }
600 case -ENOENT:
588 posix_open = true; 601 posix_open = true;
589 else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) 602 case -EOPNOTSUPP:
603 break;
604 default:
590 pTcon->broken_posix_open = true; 605 pTcon->broken_posix_open = true;
606 }
591 } 607 }
592 if (!posix_open) 608 if (!posix_open)
593 rc = cifs_get_inode_info_unix(&newInode, full_path, 609 rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a5f54b7d9822..745da3d0653e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
534 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { 534 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
535 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; 535 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
536 fattr->cf_dtype = DT_DIR; 536 fattr->cf_dtype = DT_DIR;
537 /*
538 * Server can return wrong NumberOfLinks value for directories
539 * when Unix extensions are disabled - fake it.
540 */
541 fattr->cf_nlink = 2;
537 } else { 542 } else {
538 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; 543 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
539 fattr->cf_dtype = DT_REG; 544 fattr->cf_dtype = DT_REG;
@@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
541 /* clear write bits if ATTR_READONLY is set */ 546 /* clear write bits if ATTR_READONLY is set */
542 if (fattr->cf_cifsattrs & ATTR_READONLY) 547 if (fattr->cf_cifsattrs & ATTR_READONLY)
543 fattr->cf_mode &= ~(S_IWUGO); 548 fattr->cf_mode &= ~(S_IWUGO);
544 }
545 549
546 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); 550 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
551 }
547 552
548 fattr->cf_uid = cifs_sb->mnt_uid; 553 fattr->cf_uid = cifs_sb->mnt_uid;
549 fattr->cf_gid = cifs_sb->mnt_gid; 554 fattr->cf_gid = cifs_sb->mnt_gid;
@@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
1322 } 1327 }
1323/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need 1328/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
1324 to set uid/gid */ 1329 to set uid/gid */
1325 inc_nlink(inode);
1326 1330
1327 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1331 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1328 cifs_fill_uniqueid(inode->i_sb, &fattr); 1332 cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1355,7 +1359,6 @@ mkdir_retry_old:
1355 d_drop(direntry); 1359 d_drop(direntry);
1356 } else { 1360 } else {
1357mkdir_get_info: 1361mkdir_get_info:
1358 inc_nlink(inode);
1359 if (pTcon->unix_ext) 1362 if (pTcon->unix_ext)
1360 rc = cifs_get_inode_info_unix(&newinode, full_path, 1363 rc = cifs_get_inode_info_unix(&newinode, full_path,
1361 inode->i_sb, xid); 1364 inode->i_sb, xid);
@@ -1436,6 +1439,11 @@ mkdir_get_info:
1436 } 1439 }
1437 } 1440 }
1438mkdir_out: 1441mkdir_out:
1442 /*
1443 * Force revalidate to get parent dir info when needed since cached
1444 * attributes are invalid now.
1445 */
1446 CIFS_I(inode)->time = 0;
1439 kfree(full_path); 1447 kfree(full_path);
1440 FreeXid(xid); 1448 FreeXid(xid);
1441 cifs_put_tlink(tlink); 1449 cifs_put_tlink(tlink);
@@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1475 cifs_put_tlink(tlink); 1483 cifs_put_tlink(tlink);
1476 1484
1477 if (!rc) { 1485 if (!rc) {
1478 drop_nlink(inode);
1479 spin_lock(&direntry->d_inode->i_lock); 1486 spin_lock(&direntry->d_inode->i_lock);
1480 i_size_write(direntry->d_inode, 0); 1487 i_size_write(direntry->d_inode, 0);
1481 clear_nlink(direntry->d_inode); 1488 clear_nlink(direntry->d_inode);
@@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1483 } 1490 }
1484 1491
1485 cifsInode = CIFS_I(direntry->d_inode); 1492 cifsInode = CIFS_I(direntry->d_inode);
1486 cifsInode->time = 0; /* force revalidate to go get info when 1493 /* force revalidate to go get info when needed */
1487 needed */ 1494 cifsInode->time = 0;
1488 1495
1489 cifsInode = CIFS_I(inode); 1496 cifsInode = CIFS_I(inode);
1490 cifsInode->time = 0; /* force revalidate to get parent dir info 1497 /*
1491 since cached search results now invalid */ 1498 * Force revalidate to get parent dir info when needed since cached
1499 * attributes are invalid now.
1500 */
1501 cifsInode->time = 0;
1492 1502
1493 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = 1503 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
1494 current_fs_time(inode->i_sb); 1504 current_fs_time(inode->i_sb);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d85efad5765f..551d0c2b9736 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -246,16 +246,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
246 /* copy user */ 246 /* copy user */
247 /* BB what about null user mounts - check that we do this BB */ 247 /* BB what about null user mounts - check that we do this BB */
248 /* copy user */ 248 /* copy user */
249 if (ses->user_name != NULL) 249 if (ses->user_name != NULL) {
250 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); 250 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
251 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
252 }
251 /* else null user mount */ 253 /* else null user mount */
252
253 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
254 *bcc_ptr = 0; 254 *bcc_ptr = 0;
255 bcc_ptr++; /* account for null termination */ 255 bcc_ptr++; /* account for null termination */
256 256
257 /* copy domain */ 257 /* copy domain */
258
259 if (ses->domainName != NULL) { 258 if (ses->domainName != NULL) {
260 strncpy(bcc_ptr, ses->domainName, 256); 259 strncpy(bcc_ptr, ses->domainName, 256);
261 bcc_ptr += strnlen(ses->domainName, 256); 260 bcc_ptr += strnlen(ses->domainName, 256);
@@ -395,6 +394,10 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
395 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); 394 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
396 tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); 395 tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
397 tilen = le16_to_cpu(pblob->TargetInfoArray.Length); 396 tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
397 if (tioffset > blob_len || tioffset + tilen > blob_len) {
398 cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen);
399 return -EINVAL;
400 }
398 if (tilen) { 401 if (tilen) {
399 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); 402 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
400 if (!ses->auth_key.response) { 403 if (!ses->auth_key.response) {
diff --git a/fs/compat.c b/fs/compat.c
index fa9d721ecfee..07880bae28a9 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim
131 131
132static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) 132static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
133{ 133{
134 compat_ino_t ino = stat->ino; 134 struct compat_stat tmp;
135 typeof(ubuf->st_uid) uid = 0;
136 typeof(ubuf->st_gid) gid = 0;
137 int err;
138 135
139 SET_UID(uid, stat->uid); 136 if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
140 SET_GID(gid, stat->gid); 137 return -EOVERFLOW;
141 138
142 if ((u64) stat->size > MAX_NON_LFS || 139 memset(&tmp, 0, sizeof(tmp));
143 !old_valid_dev(stat->dev) || 140 tmp.st_dev = old_encode_dev(stat->dev);
144 !old_valid_dev(stat->rdev)) 141 tmp.st_ino = stat->ino;
142 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
145 return -EOVERFLOW; 143 return -EOVERFLOW;
146 if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) 144 tmp.st_mode = stat->mode;
145 tmp.st_nlink = stat->nlink;
146 if (tmp.st_nlink != stat->nlink)
147 return -EOVERFLOW; 147 return -EOVERFLOW;
148 148 SET_UID(tmp.st_uid, stat->uid);
149 if (clear_user(ubuf, sizeof(*ubuf))) 149 SET_GID(tmp.st_gid, stat->gid);
150 return -EFAULT; 150 tmp.st_rdev = old_encode_dev(stat->rdev);
151 151 if ((u64) stat->size > MAX_NON_LFS)
152 err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); 152 return -EOVERFLOW;
153 err |= __put_user(ino, &ubuf->st_ino); 153 tmp.st_size = stat->size;
154 err |= __put_user(stat->mode, &ubuf->st_mode); 154 tmp.st_atime = stat->atime.tv_sec;
155 err |= __put_user(stat->nlink, &ubuf->st_nlink); 155 tmp.st_atime_nsec = stat->atime.tv_nsec;
156 err |= __put_user(uid, &ubuf->st_uid); 156 tmp.st_mtime = stat->mtime.tv_sec;
157 err |= __put_user(gid, &ubuf->st_gid); 157 tmp.st_mtime_nsec = stat->mtime.tv_nsec;
158 err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); 158 tmp.st_ctime = stat->ctime.tv_sec;
159 err |= __put_user(stat->size, &ubuf->st_size); 159 tmp.st_ctime_nsec = stat->ctime.tv_nsec;
160 err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); 160 tmp.st_blocks = stat->blocks;
161 err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); 161 tmp.st_blksize = stat->blksize;
162 err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); 162 return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
163 err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
164 err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
165 err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
166 err |= __put_user(stat->blksize, &ubuf->st_blksize);
167 err |= __put_user(stat->blocks, &ubuf->st_blocks);
168 return err;
169} 163}
170 164
171asmlinkage long compat_sys_newstat(const char __user * filename, 165asmlinkage long compat_sys_newstat(const char __user * filename,
diff --git a/fs/dcache.c b/fs/dcache.c
index 16a53cc2cc02..bcbdb33fcc20 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly;
104 104
105static struct hlist_bl_head *dentry_hashtable __read_mostly; 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 106
107static inline struct hlist_bl_head *d_hash(struct dentry *parent, 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 unsigned long hash) 108 unsigned long hash)
109{ 109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -137,6 +137,26 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
137} 137}
138#endif 138#endif
139 139
140/*
141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142 * The strings are both count bytes long, and count is non-zero.
143 */
144static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 const unsigned char *ct, size_t tcount)
146{
147 if (scount != tcount)
148 return 1;
149
150 do {
151 if (*cs != *ct)
152 return 1;
153 cs++;
154 ct++;
155 tcount--;
156 } while (tcount);
157 return 0;
158}
159
140static void __d_free(struct rcu_head *head) 160static void __d_free(struct rcu_head *head)
141{ 161{
142 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 162 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1717,8 +1737,9 @@ EXPORT_SYMBOL(d_add_ci);
1717 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1737 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1718 * is formed, giving integrity down the path walk. 1738 * is formed, giving integrity down the path walk.
1719 */ 1739 */
1720struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1740struct dentry *__d_lookup_rcu(const struct dentry *parent,
1721 unsigned *seq, struct inode **inode) 1741 const struct qstr *name,
1742 unsigned *seqp, struct inode **inode)
1722{ 1743{
1723 unsigned int len = name->len; 1744 unsigned int len = name->len;
1724 unsigned int hash = name->hash; 1745 unsigned int hash = name->hash;
@@ -1748,6 +1769,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 * See Documentation/filesystems/path-lookup.txt for more details. 1769 * See Documentation/filesystems/path-lookup.txt for more details.
1749 */ 1770 */
1750 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1771 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1772 unsigned seq;
1751 struct inode *i; 1773 struct inode *i;
1752 const char *tname; 1774 const char *tname;
1753 int tlen; 1775 int tlen;
@@ -1756,7 +1778,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1756 continue; 1778 continue;
1757 1779
1758seqretry: 1780seqretry:
1759 *seq = read_seqcount_begin(&dentry->d_seq); 1781 seq = read_seqcount_begin(&dentry->d_seq);
1760 if (dentry->d_parent != parent) 1782 if (dentry->d_parent != parent)
1761 continue; 1783 continue;
1762 if (d_unhashed(dentry)) 1784 if (d_unhashed(dentry))
@@ -1771,7 +1793,7 @@ seqretry:
1771 * edge of memory when walking. If we could load this 1793 * edge of memory when walking. If we could load this
1772 * atomically some other way, we could drop this check. 1794 * atomically some other way, we could drop this check.
1773 */ 1795 */
1774 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1796 if (read_seqcount_retry(&dentry->d_seq, seq))
1775 goto seqretry; 1797 goto seqretry;
1776 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1798 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1777 if (parent->d_op->d_compare(parent, *inode, 1799 if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1810,7 @@ seqretry:
1788 * order to do anything useful with the returned dentry 1810 * order to do anything useful with the returned dentry
1789 * anyway. 1811 * anyway.
1790 */ 1812 */
1813 *seqp = seq;
1791 *inode = i; 1814 *inode = i;
1792 return dentry; 1815 return dentry;
1793 } 1816 }
@@ -2968,7 +2991,7 @@ __setup("dhash_entries=", set_dhash_entries);
2968 2991
2969static void __init dcache_init_early(void) 2992static void __init dcache_init_early(void)
2970{ 2993{
2971 int loop; 2994 unsigned int loop;
2972 2995
2973 /* If hashes are distributed across NUMA nodes, defer 2996 /* If hashes are distributed across NUMA nodes, defer
2974 * hash allocation until vmalloc space is available. 2997 * hash allocation until vmalloc space is available.
@@ -2986,13 +3009,13 @@ static void __init dcache_init_early(void)
2986 &d_hash_mask, 3009 &d_hash_mask,
2987 0); 3010 0);
2988 3011
2989 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3012 for (loop = 0; loop < (1U << d_hash_shift); loop++)
2990 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3013 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
2991} 3014}
2992 3015
2993static void __init dcache_init(void) 3016static void __init dcache_init(void)
2994{ 3017{
2995 int loop; 3018 unsigned int loop;
2996 3019
2997 /* 3020 /*
2998 * A constructor could be added for stable state like the lists, 3021 * A constructor could be added for stable state like the lists,
@@ -3016,7 +3039,7 @@ static void __init dcache_init(void)
3016 &d_hash_mask, 3039 &d_hash_mask,
3017 0); 3040 0);
3018 3041
3019 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3042 for (loop = 0; loop < (1U << d_hash_shift); loop++)
3020 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3043 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3021} 3044}
3022 3045
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4a588dbd11bf..f4aadd15b613 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode)
173 if (atomic_read(&inode->i_dio_count)) 173 if (atomic_read(&inode->i_dio_count))
174 __inode_dio_wait(inode); 174 __inode_dio_wait(inode);
175} 175}
176EXPORT_SYMBOL_GPL(inode_dio_wait); 176EXPORT_SYMBOL(inode_dio_wait);
177 177
178/* 178/*
179 * inode_dio_done - signal finish of a direct I/O requests 179 * inode_dio_done - signal finish of a direct I/O requests
@@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode)
187 if (atomic_dec_and_test(&inode->i_dio_count)) 187 if (atomic_dec_and_test(&inode->i_dio_count))
188 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); 188 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
189} 189}
190EXPORT_SYMBOL_GPL(inode_dio_done); 190EXPORT_SYMBOL(inode_dio_done);
191 191
192/* 192/*
193 * How many pages are in the queue? 193 * How many pages are in the queue?
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 63ab24510649..ea9931281557 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1990,6 +1990,17 @@ out:
1990 return; 1990 return;
1991} 1991}
1992 1992
1993static size_t ecryptfs_max_decoded_size(size_t encoded_size)
1994{
1995 /* Not exact; conservatively long. Every block of 4
1996 * encoded characters decodes into a block of 3
1997 * decoded characters. This segment of code provides
1998 * the caller with the maximum amount of allocated
1999 * space that @dst will need to point to in a
2000 * subsequent call. */
2001 return ((encoded_size + 1) * 3) / 4;
2002}
2003
1993/** 2004/**
1994 * ecryptfs_decode_from_filename 2005 * ecryptfs_decode_from_filename
1995 * @dst: If NULL, this function only sets @dst_size and returns. If 2006 * @dst: If NULL, this function only sets @dst_size and returns. If
@@ -2008,13 +2019,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
2008 size_t dst_byte_offset = 0; 2019 size_t dst_byte_offset = 0;
2009 2020
2010 if (dst == NULL) { 2021 if (dst == NULL) {
2011 /* Not exact; conservatively long. Every block of 4 2022 (*dst_size) = ecryptfs_max_decoded_size(src_size);
2012 * encoded characters decodes into a block of 3
2013 * decoded characters. This segment of code provides
2014 * the caller with the maximum amount of allocated
2015 * space that @dst will need to point to in a
2016 * subsequent call. */
2017 (*dst_size) = (((src_size + 1) * 3) / 4);
2018 goto out; 2023 goto out;
2019 } 2024 }
2020 while (src_byte_offset < src_size) { 2025 while (src_byte_offset < src_size) {
@@ -2239,3 +2244,52 @@ out_free:
2239out: 2244out:
2240 return rc; 2245 return rc;
2241} 2246}
2247
2248#define ENC_NAME_MAX_BLOCKLEN_8_OR_16 143
2249
2250int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
2251 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
2252{
2253 struct blkcipher_desc desc;
2254 struct mutex *tfm_mutex;
2255 size_t cipher_blocksize;
2256 int rc;
2257
2258 if (!(mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) {
2259 (*namelen) = lower_namelen;
2260 return 0;
2261 }
2262
2263 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
2264 mount_crypt_stat->global_default_fn_cipher_name);
2265 if (unlikely(rc)) {
2266 (*namelen) = 0;
2267 return rc;
2268 }
2269
2270 mutex_lock(tfm_mutex);
2271 cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm);
2272 mutex_unlock(tfm_mutex);
2273
2274 /* Return an exact amount for the common cases */
2275 if (lower_namelen == NAME_MAX
2276 && (cipher_blocksize == 8 || cipher_blocksize == 16)) {
2277 (*namelen) = ENC_NAME_MAX_BLOCKLEN_8_OR_16;
2278 return 0;
2279 }
2280
2281 /* Return a safe estimate for the uncommon cases */
2282 (*namelen) = lower_namelen;
2283 (*namelen) -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
2284 /* Since this is the max decoded size, subtract 1 "decoded block" len */
2285 (*namelen) = ecryptfs_max_decoded_size(*namelen) - 3;
2286 (*namelen) -= ECRYPTFS_TAG_70_MAX_METADATA_SIZE;
2287 (*namelen) -= ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES;
2288 /* Worst case is that the filename is padded nearly a full block size */
2289 (*namelen) -= cipher_blocksize - 1;
2290
2291 if ((*namelen) < 0)
2292 (*namelen) = 0;
2293
2294 return 0;
2295}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index a2362df58ae8..867b64c5d84f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -162,6 +162,10 @@ ecryptfs_get_key_payload_data(struct key *key)
162#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */ 162#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */
163#define MD5_DIGEST_SIZE 16 163#define MD5_DIGEST_SIZE 16
164#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE 164#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE
165#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \
166 + ECRYPTFS_SIG_SIZE + 1 + 1)
167#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \
168 + ECRYPTFS_SIG_SIZE + 1 + 1)
165#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED." 169#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED."
166#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23 170#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23
167#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED." 171#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED."
@@ -701,6 +705,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
701 size_t *packet_size, 705 size_t *packet_size,
702 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 706 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
703 char *data, size_t max_packet_size); 707 char *data, size_t max_packet_size);
708int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
709 struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
704int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, 710int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
705 loff_t offset); 711 loff_t offset);
706 712
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 19892d7d2ed1..ab35b113003b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1085,6 +1085,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1085 } 1085 }
1086 1086
1087 rc = vfs_setxattr(lower_dentry, name, value, size, flags); 1087 rc = vfs_setxattr(lower_dentry, name, value, size, flags);
1088 if (!rc)
1089 fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
1088out: 1090out:
1089 return rc; 1091 return rc;
1090} 1092}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 8e3b943e330f..2333203a120b 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -679,10 +679,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
679 * Octets N3-N4: Block-aligned encrypted filename 679 * Octets N3-N4: Block-aligned encrypted filename
680 * - Consists of a minimum number of random characters, a \0 680 * - Consists of a minimum number of random characters, a \0
681 * separator, and then the filename */ 681 * separator, and then the filename */
682 s->max_packet_size = (1 /* Tag 70 identifier */ 682 s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE
683 + 3 /* Max Tag 70 packet size */
684 + ECRYPTFS_SIG_SIZE /* FNEK sig */
685 + 1 /* Cipher identifier */
686 + s->block_aligned_filename_size); 683 + s->block_aligned_filename_size);
687 if (dest == NULL) { 684 if (dest == NULL) {
688 (*packet_size) = s->max_packet_size; 685 (*packet_size) = s->max_packet_size;
@@ -934,10 +931,10 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
934 goto out; 931 goto out;
935 } 932 }
936 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 933 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
937 if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) { 934 if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
938 printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be " 935 printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
939 "at least [%d]\n", __func__, max_packet_size, 936 "at least [%d]\n", __func__, max_packet_size,
940 (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)); 937 ECRYPTFS_TAG_70_MIN_METADATA_SIZE);
941 rc = -EINVAL; 938 rc = -EINVAL;
942 goto out; 939 goto out;
943 } 940 }
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 349209dc6a91..3a06f4043df4 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -429,7 +429,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
429 goto memdup; 429 goto memdup;
430 } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { 430 } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
431 printk(KERN_WARNING "%s: Acceptable packet size range is " 431 printk(KERN_WARNING "%s: Acceptable packet size range is "
432 "[%d-%lu], but amount of data written is [%zu].", 432 "[%d-%zu], but amount of data written is [%zu].",
433 __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); 433 __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
434 return -EINVAL; 434 return -EINVAL;
435 } 435 }
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 10ec695ccd68..a46b3a8fee1e 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -150,7 +150,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
150 /* This is a header extent */ 150 /* This is a header extent */
151 char *page_virt; 151 char *page_virt;
152 152
153 page_virt = kmap_atomic(page, KM_USER0); 153 page_virt = kmap_atomic(page);
154 memset(page_virt, 0, PAGE_CACHE_SIZE); 154 memset(page_virt, 0, PAGE_CACHE_SIZE);
155 /* TODO: Support more than one header extent */ 155 /* TODO: Support more than one header extent */
156 if (view_extent_num == 0) { 156 if (view_extent_num == 0) {
@@ -163,7 +163,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
163 crypt_stat, 163 crypt_stat,
164 &written); 164 &written);
165 } 165 }
166 kunmap_atomic(page_virt, KM_USER0); 166 kunmap_atomic(page_virt);
167 flush_dcache_page(page); 167 flush_dcache_page(page);
168 if (rc) { 168 if (rc) {
169 printk(KERN_ERR "%s: Error reading xattr " 169 printk(KERN_ERR "%s: Error reading xattr "
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 5c0106f75775..b2a34a192f4f 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -156,7 +156,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
156 ecryptfs_page_idx, rc); 156 ecryptfs_page_idx, rc);
157 goto out; 157 goto out;
158 } 158 }
159 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); 159 ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
160 160
161 /* 161 /*
162 * pos: where we're now writing, offset: where the request was 162 * pos: where we're now writing, offset: where the request was
@@ -179,7 +179,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
179 (data + data_offset), num_bytes); 179 (data + data_offset), num_bytes);
180 data_offset += num_bytes; 180 data_offset += num_bytes;
181 } 181 }
182 kunmap_atomic(ecryptfs_page_virt, KM_USER0); 182 kunmap_atomic(ecryptfs_page_virt);
183 flush_dcache_page(ecryptfs_page); 183 flush_dcache_page(ecryptfs_page);
184 SetPageUptodate(ecryptfs_page); 184 SetPageUptodate(ecryptfs_page);
185 unlock_page(ecryptfs_page); 185 unlock_page(ecryptfs_page);
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 9df7fd6e0c39..cf152823bbf4 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -30,6 +30,8 @@
30#include <linux/seq_file.h> 30#include <linux/seq_file.h>
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/statfs.h>
34#include <linux/magic.h>
33#include "ecryptfs_kernel.h" 35#include "ecryptfs_kernel.h"
34 36
35struct kmem_cache *ecryptfs_inode_info_cache; 37struct kmem_cache *ecryptfs_inode_info_cache;
@@ -102,10 +104,20 @@ static void ecryptfs_destroy_inode(struct inode *inode)
102static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) 104static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
103{ 105{
104 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 106 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
107 int rc;
105 108
106 if (!lower_dentry->d_sb->s_op->statfs) 109 if (!lower_dentry->d_sb->s_op->statfs)
107 return -ENOSYS; 110 return -ENOSYS;
108 return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); 111
112 rc = lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
113 if (rc)
114 return rc;
115
116 buf->f_type = ECRYPTFS_SUPER_MAGIC;
117 rc = ecryptfs_set_f_namelen(&buf->f_namelen, buf->f_namelen,
118 &ecryptfs_superblock_to_private(dentry->d_sb)->mount_crypt_stat);
119
120 return rc;
109} 121}
110 122
111/** 123/**
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf24..ea54cdef04dd 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
320 return !list_empty(p); 320 return !list_empty(p);
321} 321}
322 322
323static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
324{
325 return container_of(p, struct eppoll_entry, wait);
326}
327
323/* Get the "struct epitem" from a wait queue pointer */ 328/* Get the "struct epitem" from a wait queue pointer */
324static inline struct epitem *ep_item_from_wait(wait_queue_t *p) 329static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
325{ 330{
@@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
467 put_cpu(); 472 put_cpu();
468} 473}
469 474
475static void ep_remove_wait_queue(struct eppoll_entry *pwq)
476{
477 wait_queue_head_t *whead;
478
479 rcu_read_lock();
480 /* If it is cleared by POLLFREE, it should be rcu-safe */
481 whead = rcu_dereference(pwq->whead);
482 if (whead)
483 remove_wait_queue(whead, &pwq->wait);
484 rcu_read_unlock();
485}
486
470/* 487/*
471 * This function unregisters poll callbacks from the associated file 488 * This function unregisters poll callbacks from the associated file
472 * descriptor. Must be called with "mtx" held (or "epmutex" if called from 489 * descriptor. Must be called with "mtx" held (or "epmutex" if called from
@@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
481 pwq = list_first_entry(lsthead, struct eppoll_entry, llink); 498 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
482 499
483 list_del(&pwq->llink); 500 list_del(&pwq->llink);
484 remove_wait_queue(pwq->whead, &pwq->wait); 501 ep_remove_wait_queue(pwq);
485 kmem_cache_free(pwq_cache, pwq); 502 kmem_cache_free(pwq_cache, pwq);
486 } 503 }
487} 504}
@@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
842 struct epitem *epi = ep_item_from_wait(wait); 859 struct epitem *epi = ep_item_from_wait(wait);
843 struct eventpoll *ep = epi->ep; 860 struct eventpoll *ep = epi->ep;
844 861
862 if ((unsigned long)key & POLLFREE) {
863 ep_pwq_from_wait(wait)->whead = NULL;
864 /*
865 * whead = NULL above can race with ep_remove_wait_queue()
866 * which can do another remove_wait_queue() after us, so we
867 * can't use __remove_wait_queue(). whead->lock is held by
868 * the caller.
869 */
870 list_del_init(&wait->task_list);
871 }
872
845 spin_lock_irqsave(&ep->lock, flags); 873 spin_lock_irqsave(&ep->lock, flags);
846 874
847 /* 875 /*
diff --git a/fs/exec.c b/fs/exec.c
index aeb135c7ff5c..153dee14fe55 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf)
1071 perf_event_comm(tsk); 1071 perf_event_comm(tsk);
1072} 1072}
1073 1073
1074static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
1075{
1076 int i, ch;
1077
1078 /* Copies the binary name from after last slash */
1079 for (i = 0; (ch = *(fn++)) != '\0';) {
1080 if (ch == '/')
1081 i = 0; /* overwrite what we wrote */
1082 else
1083 if (i < len - 1)
1084 tcomm[i++] = ch;
1085 }
1086 tcomm[i] = '\0';
1087}
1088
1074int flush_old_exec(struct linux_binprm * bprm) 1089int flush_old_exec(struct linux_binprm * bprm)
1075{ 1090{
1076 int retval; 1091 int retval;
@@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1085 1100
1086 set_mm_exe_file(bprm->mm, bprm->file); 1101 set_mm_exe_file(bprm->mm, bprm->file);
1087 1102
1103 filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
1088 /* 1104 /*
1089 * Release all of the old mmap stuff 1105 * Release all of the old mmap stuff
1090 */ 1106 */
@@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump);
1116 1132
1117void setup_new_exec(struct linux_binprm * bprm) 1133void setup_new_exec(struct linux_binprm * bprm)
1118{ 1134{
1119 int i, ch;
1120 const char *name;
1121 char tcomm[sizeof(current->comm)];
1122
1123 arch_pick_mmap_layout(current->mm); 1135 arch_pick_mmap_layout(current->mm);
1124 1136
1125 /* This is the point of no return */ 1137 /* This is the point of no return */
@@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1130 else 1142 else
1131 set_dumpable(current->mm, suid_dumpable); 1143 set_dumpable(current->mm, suid_dumpable);
1132 1144
1133 name = bprm->filename; 1145 set_task_comm(current, bprm->tcomm);
1134
1135 /* Copies the binary name from after last slash */
1136 for (i=0; (ch = *(name++)) != '\0';) {
1137 if (ch == '/')
1138 i = 0; /* overwrite what we wrote */
1139 else
1140 if (i < (sizeof(tcomm) - 1))
1141 tcomm[i++] = ch;
1142 }
1143 tcomm[i] = '\0';
1144 set_task_comm(current, tcomm);
1145 1146
1146 /* Set the new mm task size. We have to do that late because it may 1147 /* Set the new mm task size. We have to do that late because it may
1147 * depend on TIF_32BIT which is only updated in flush_thread() on 1148 * depend on TIF_32BIT which is only updated in flush_thread() on
@@ -1914,7 +1915,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
1914{ 1915{
1915 struct task_struct *tsk = current; 1916 struct task_struct *tsk = current;
1916 struct mm_struct *mm = tsk->mm; 1917 struct mm_struct *mm = tsk->mm;
1917 struct completion *vfork_done;
1918 int core_waiters = -EBUSY; 1918 int core_waiters = -EBUSY;
1919 1919
1920 init_completion(&core_state->startup); 1920 init_completion(&core_state->startup);
@@ -1926,22 +1926,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
1926 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 1926 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1927 up_write(&mm->mmap_sem); 1927 up_write(&mm->mmap_sem);
1928 1928
1929 if (unlikely(core_waiters < 0)) 1929 if (core_waiters > 0)
1930 goto fail;
1931
1932 /*
1933 * Make sure nobody is waiting for us to release the VM,
1934 * otherwise we can deadlock when we wait on each other
1935 */
1936 vfork_done = tsk->vfork_done;
1937 if (vfork_done) {
1938 tsk->vfork_done = NULL;
1939 complete(vfork_done);
1940 }
1941
1942 if (core_waiters)
1943 wait_for_completion(&core_state->startup); 1930 wait_for_completion(&core_state->startup);
1944fail: 1931
1945 return core_waiters; 1932 return core_waiters;
1946} 1933}
1947 1934
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f855916657ba..5b4a9362d5aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,14 +53,6 @@ struct wb_writeback_work {
53}; 53};
54 54
55/* 55/*
56 * Include the creation of the trace points after defining the
57 * wb_writeback_work structure so that the definition remains local to this
58 * file.
59 */
60#define CREATE_TRACE_POINTS
61#include <trace/events/writeback.h>
62
63/*
64 * We don't actually have pdflush, but this one is exported though /proc... 56 * We don't actually have pdflush, but this one is exported though /proc...
65 */ 57 */
66int nr_pdflush_threads; 58int nr_pdflush_threads;
@@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head)
92 return list_entry(head, struct inode, i_wb_list); 84 return list_entry(head, struct inode, i_wb_list);
93} 85}
94 86
87/*
88 * Include the creation of the trace points after defining the
89 * wb_writeback_work structure and inline functions so that the definition
90 * remains local to this file.
91 */
92#define CREATE_TRACE_POINTS
93#include <trace/events/writeback.h>
94
95/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ 95/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
96static void bdi_wakeup_flusher(struct backing_dev_info *bdi) 96static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
97{ 97{
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 376816fcd040..351a3e797789 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -167,14 +167,19 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
167 spin_unlock(&lru_lock); 167 spin_unlock(&lru_lock);
168} 168}
169 169
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 170static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{ 171{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) { 172 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru); 173 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count); 174 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags); 175 clear_bit(GLF_LRU, &gl->gl_flags);
177 } 176 }
177}
178
179static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
180{
181 spin_lock(&lru_lock);
182 __gfs2_glock_remove_from_lru(gl);
178 spin_unlock(&lru_lock); 183 spin_unlock(&lru_lock);
179} 184}
180 185
@@ -217,11 +222,12 @@ void gfs2_glock_put(struct gfs2_glock *gl)
217 struct gfs2_sbd *sdp = gl->gl_sbd; 222 struct gfs2_sbd *sdp = gl->gl_sbd;
218 struct address_space *mapping = gfs2_glock2aspace(gl); 223 struct address_space *mapping = gfs2_glock2aspace(gl);
219 224
220 if (atomic_dec_and_test(&gl->gl_ref)) { 225 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
226 __gfs2_glock_remove_from_lru(gl);
227 spin_unlock(&lru_lock);
221 spin_lock_bucket(gl->gl_hash); 228 spin_lock_bucket(gl->gl_hash);
222 hlist_bl_del_rcu(&gl->gl_list); 229 hlist_bl_del_rcu(&gl->gl_list);
223 spin_unlock_bucket(gl->gl_hash); 230 spin_unlock_bucket(gl->gl_hash);
224 gfs2_glock_remove_from_lru(gl);
225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 231 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 232 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
227 trace_gfs2_glock_put(gl); 233 trace_gfs2_glock_put(gl);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a7d611b93f0f..56987460cdae 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -391,10 +391,6 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
391 int error; 391 int error;
392 int dblocks = 1; 392 int dblocks = 1;
393 393
394 error = gfs2_rindex_update(sdp);
395 if (error)
396 fs_warn(sdp, "rindex update returns %d\n", error);
397
398 error = gfs2_inplace_reserve(dip, RES_DINODE); 394 error = gfs2_inplace_reserve(dip, RES_DINODE);
399 if (error) 395 if (error)
400 goto out; 396 goto out;
@@ -1043,6 +1039,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1043 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1044 if (!rgd) 1040 if (!rgd)
1045 goto out_inodes; 1041 goto out_inodes;
1042
1046 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 1043 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1047 1044
1048 1045
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6aacf3f230a2..24f609c9ef91 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -800,6 +800,11 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
800 fs_err(sdp, "can't get quota file inode: %d\n", error); 800 fs_err(sdp, "can't get quota file inode: %d\n", error);
801 goto fail_rindex; 801 goto fail_rindex;
802 } 802 }
803
804 error = gfs2_rindex_update(sdp);
805 if (error)
806 goto fail_qinode;
807
803 return 0; 808 return 0;
804 809
805fail_qinode: 810fail_qinode:
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 981bfa32121a..49ada95209d0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -683,16 +683,21 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
683 struct gfs2_glock *gl = ip->i_gl; 683 struct gfs2_glock *gl = ip->i_gl;
684 struct gfs2_holder ri_gh; 684 struct gfs2_holder ri_gh;
685 int error = 0; 685 int error = 0;
686 int unlock_required = 0;
686 687
687 /* Read new copy from disk if we don't have the latest */ 688 /* Read new copy from disk if we don't have the latest */
688 if (!sdp->sd_rindex_uptodate) { 689 if (!sdp->sd_rindex_uptodate) {
689 mutex_lock(&sdp->sd_rindex_mutex); 690 mutex_lock(&sdp->sd_rindex_mutex);
690 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 691 if (!gfs2_glock_is_locked_by_me(gl)) {
691 if (error) 692 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
692 return error; 693 if (error)
694 return error;
695 unlock_required = 1;
696 }
693 if (!sdp->sd_rindex_uptodate) 697 if (!sdp->sd_rindex_uptodate)
694 error = gfs2_ri_update(ip); 698 error = gfs2_ri_update(ip);
695 gfs2_glock_dq_uninit(&ri_gh); 699 if (unlock_required)
700 gfs2_glock_dq_uninit(&ri_gh);
696 mutex_unlock(&sdp->sd_rindex_mutex); 701 mutex_unlock(&sdp->sd_rindex_mutex);
697 } 702 }
698 703
diff --git a/fs/inode.c b/fs/inode.c
index fb10d86ffad7..d3ebdbe723d0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1651,7 +1651,7 @@ __setup("ihash_entries=", set_ihash_entries);
1651 */ 1651 */
1652void __init inode_init_early(void) 1652void __init inode_init_early(void)
1653{ 1653{
1654 int loop; 1654 unsigned int loop;
1655 1655
1656 /* If hashes are distributed across NUMA nodes, defer 1656 /* If hashes are distributed across NUMA nodes, defer
1657 * hash allocation until vmalloc space is available. 1657 * hash allocation until vmalloc space is available.
@@ -1669,13 +1669,13 @@ void __init inode_init_early(void)
1669 &i_hash_mask, 1669 &i_hash_mask,
1670 0); 1670 0);
1671 1671
1672 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1672 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1673 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1673 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1674} 1674}
1675 1675
1676void __init inode_init(void) 1676void __init inode_init(void)
1677{ 1677{
1678 int loop; 1678 unsigned int loop;
1679 1679
1680 /* inode slab cache */ 1680 /* inode slab cache */
1681 inode_cachep = kmem_cache_create("inode_cache", 1681 inode_cachep = kmem_cache_create("inode_cache",
@@ -1699,7 +1699,7 @@ void __init inode_init(void)
1699 &i_hash_mask, 1699 &i_hash_mask,
1700 0); 1700 0);
1701 1701
1702 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1702 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1703 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1703 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1704} 1704}
1705 1705
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f84b380d65e5..0f1b9515213b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
51 ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); 51 ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
52 if (ioc) { 52 if (ioc) {
53 ioc_ioprio_changed(ioc, ioprio); 53 ioc_ioprio_changed(ioc, ioprio);
54 put_io_context(ioc, NULL); 54 put_io_context(ioc);
55 } 55 }
56 56
57 return err; 57 return err;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a01cdad6aad1..eafb8d37a6fb 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -335,7 +335,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
335 void *ebuf; 335 void *ebuf;
336 uint32_t ofs; 336 uint32_t ofs;
337 size_t retlen; 337 size_t retlen;
338 int ret = -EIO; 338 int ret;
339 unsigned long *wordebuf; 339 unsigned long *wordebuf;
340 340
341 ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, 341 ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index e97404d611e0..9c501449450d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
152 filler_t *filler = logfs_mtd_readpage; 152 filler_t *filler = logfs_mtd_readpage;
153 struct mtd_info *mtd = super->s_mtd; 153 struct mtd_info *mtd = super->s_mtd;
154 154
155 if (!mtd_can_have_bb(mtd))
156 return NULL;
157
158 *ofs = 0; 155 *ofs = 0;
159 while (mtd_block_isbad(mtd, *ofs)) { 156 while (mtd_block_isbad(mtd, *ofs)) {
160 *ofs += mtd->erasesize; 157 *ofs += mtd->erasesize;
@@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
172 filler_t *filler = logfs_mtd_readpage; 169 filler_t *filler = logfs_mtd_readpage;
173 struct mtd_info *mtd = super->s_mtd; 170 struct mtd_info *mtd = super->s_mtd;
174 171
175 if (!mtd_can_have_bb(mtd))
176 return NULL;
177
178 *ofs = mtd->size - mtd->erasesize; 172 *ofs = mtd->size - mtd->erasesize;
179 while (mtd_block_isbad(mtd, *ofs)) { 173 while (mtd_block_isbad(mtd, *ofs)) {
180 *ofs -= mtd->erasesize; 174 *ofs -= mtd->erasesize;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 501043e8966c..3de7a32cadbe 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -71,7 +71,7 @@ static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd,
71 71
72static int write_inode(struct inode *inode) 72static int write_inode(struct inode *inode)
73{ 73{
74 return __logfs_write_inode(inode, WF_LOCK); 74 return __logfs_write_inode(inode, NULL, WF_LOCK);
75} 75}
76 76
77static s64 dir_seek_data(struct inode *inode, s64 pos) 77static s64 dir_seek_data(struct inode *inode, s64 pos)
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index b548c87a86f1..3886cded283c 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -230,7 +230,9 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
230 return ret; 230 return ret;
231 231
232 mutex_lock(&inode->i_mutex); 232 mutex_lock(&inode->i_mutex);
233 logfs_get_wblocks(sb, NULL, WF_LOCK);
233 logfs_write_anchor(sb); 234 logfs_write_anchor(sb);
235 logfs_put_wblocks(sb, NULL, WF_LOCK);
234 mutex_unlock(&inode->i_mutex); 236 mutex_unlock(&inode->i_mutex);
235 237
236 return 0; 238 return 0;
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index caa4419285dc..d4efb061bdc5 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -367,7 +367,7 @@ static struct gc_candidate *get_candidate(struct super_block *sb)
367 int i, max_dist; 367 int i, max_dist;
368 struct gc_candidate *cand = NULL, *this; 368 struct gc_candidate *cand = NULL, *this;
369 369
370 max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS); 370 max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1);
371 371
372 for (i = max_dist; i >= 0; i--) { 372 for (i = max_dist; i >= 0; i--) {
373 this = first_in_list(&super->s_low_list[i]); 373 this = first_in_list(&super->s_low_list[i]);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 388df1aa35e5..a422f42238b2 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -286,7 +286,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
286 if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN) 286 if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN)
287 return 0; 287 return 0;
288 288
289 ret = __logfs_write_inode(inode, flags); 289 ret = __logfs_write_inode(inode, NULL, flags);
290 LOGFS_BUG_ON(ret, inode->i_sb); 290 LOGFS_BUG_ON(ret, inode->i_sb);
291 return ret; 291 return ret;
292} 292}
@@ -363,7 +363,9 @@ static void logfs_init_once(void *_li)
363 363
364static int logfs_sync_fs(struct super_block *sb, int wait) 364static int logfs_sync_fs(struct super_block *sb, int wait)
365{ 365{
366 logfs_get_wblocks(sb, NULL, WF_LOCK);
366 logfs_write_anchor(sb); 367 logfs_write_anchor(sb);
368 logfs_put_wblocks(sb, NULL, WF_LOCK);
367 return 0; 369 return 0;
368} 370}
369 371
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 9da29706f91c..1e1c369df22b 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -612,7 +612,6 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
612 if (len == 0) 612 if (len == 0)
613 return logfs_write_header(super, header, 0, type); 613 return logfs_write_header(super, header, 0, type);
614 614
615 BUG_ON(len > sb->s_blocksize);
616 compr_len = logfs_compress(buf, data, len, sb->s_blocksize); 615 compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
617 if (compr_len < 0 || type == JE_ANCHOR) { 616 if (compr_len < 0 || type == JE_ANCHOR) {
618 memcpy(data, buf, len); 617 memcpy(data, buf, len);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 926373866a55..5f0937609465 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -528,7 +528,7 @@ void logfs_destroy_inode_cache(void);
528void logfs_set_blocks(struct inode *inode, u64 no); 528void logfs_set_blocks(struct inode *inode, u64 no);
529/* these logically belong into inode.c but actually reside in readwrite.c */ 529/* these logically belong into inode.c but actually reside in readwrite.c */
530int logfs_read_inode(struct inode *inode); 530int logfs_read_inode(struct inode *inode);
531int __logfs_write_inode(struct inode *inode, long flags); 531int __logfs_write_inode(struct inode *inode, struct page *, long flags);
532void logfs_evict_inode(struct inode *inode); 532void logfs_evict_inode(struct inode *inode);
533 533
534/* journal.c */ 534/* journal.c */
@@ -577,6 +577,8 @@ void initialize_block_counters(struct page *page, struct logfs_block *block,
577 __be64 *array, int page_is_empty); 577 __be64 *array, int page_is_empty);
578int logfs_exist_block(struct inode *inode, u64 bix); 578int logfs_exist_block(struct inode *inode, u64 bix);
579int get_page_reserve(struct inode *inode, struct page *page); 579int get_page_reserve(struct inode *inode, struct page *page);
580void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock);
581void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock);
580extern struct logfs_block_ops indirect_block_ops; 582extern struct logfs_block_ops indirect_block_ops;
581 583
582/* segment.c */ 584/* segment.c */
@@ -594,6 +596,7 @@ int logfs_init_mapping(struct super_block *sb);
594void logfs_sync_area(struct logfs_area *area); 596void logfs_sync_area(struct logfs_area *area);
595void logfs_sync_segments(struct super_block *sb); 597void logfs_sync_segments(struct super_block *sb);
596void freeseg(struct super_block *sb, u32 segno); 598void freeseg(struct super_block *sb, u32 segno);
599void free_areas(struct super_block *sb);
597 600
598/* area handling */ 601/* area handling */
599int logfs_init_areas(struct super_block *sb); 602int logfs_init_areas(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 2ac4217b7901..4153e65b0148 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -244,8 +244,7 @@ static void preunlock_page(struct super_block *sb, struct page *page, int lock)
244 * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked 244 * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked
245 * in addition to PG_locked. 245 * in addition to PG_locked.
246 */ 246 */
247static void logfs_get_wblocks(struct super_block *sb, struct page *page, 247void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock)
248 int lock)
249{ 248{
250 struct logfs_super *super = logfs_super(sb); 249 struct logfs_super *super = logfs_super(sb);
251 250
@@ -260,8 +259,7 @@ static void logfs_get_wblocks(struct super_block *sb, struct page *page,
260 } 259 }
261} 260}
262 261
263static void logfs_put_wblocks(struct super_block *sb, struct page *page, 262void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock)
264 int lock)
265{ 263{
266 struct logfs_super *super = logfs_super(sb); 264 struct logfs_super *super = logfs_super(sb);
267 265
@@ -424,7 +422,7 @@ static void inode_write_block(struct logfs_block *block)
424 if (inode->i_ino == LOGFS_INO_MASTER) 422 if (inode->i_ino == LOGFS_INO_MASTER)
425 logfs_write_anchor(inode->i_sb); 423 logfs_write_anchor(inode->i_sb);
426 else { 424 else {
427 ret = __logfs_write_inode(inode, 0); 425 ret = __logfs_write_inode(inode, NULL, 0);
428 /* see indirect_write_block comment */ 426 /* see indirect_write_block comment */
429 BUG_ON(ret); 427 BUG_ON(ret);
430 } 428 }
@@ -560,8 +558,13 @@ static void inode_free_block(struct super_block *sb, struct logfs_block *block)
560static void indirect_free_block(struct super_block *sb, 558static void indirect_free_block(struct super_block *sb,
561 struct logfs_block *block) 559 struct logfs_block *block)
562{ 560{
563 ClearPagePrivate(block->page); 561 struct page *page = block->page;
564 block->page->private = 0; 562
563 if (PagePrivate(page)) {
564 ClearPagePrivate(page);
565 page_cache_release(page);
566 set_page_private(page, 0);
567 }
565 __free_block(sb, block); 568 __free_block(sb, block);
566} 569}
567 570
@@ -650,8 +653,11 @@ static void alloc_data_block(struct inode *inode, struct page *page)
650 logfs_unpack_index(page->index, &bix, &level); 653 logfs_unpack_index(page->index, &bix, &level);
651 block = __alloc_block(inode->i_sb, inode->i_ino, bix, level); 654 block = __alloc_block(inode->i_sb, inode->i_ino, bix, level);
652 block->page = page; 655 block->page = page;
656
653 SetPagePrivate(page); 657 SetPagePrivate(page);
654 page->private = (unsigned long)block; 658 page_cache_get(page);
659 set_page_private(page, (unsigned long) block);
660
655 block->ops = &indirect_block_ops; 661 block->ops = &indirect_block_ops;
656} 662}
657 663
@@ -1570,11 +1576,15 @@ int logfs_write_buf(struct inode *inode, struct page *page, long flags)
1570static int __logfs_delete(struct inode *inode, struct page *page) 1576static int __logfs_delete(struct inode *inode, struct page *page)
1571{ 1577{
1572 long flags = WF_DELETE; 1578 long flags = WF_DELETE;
1579 int err;
1573 1580
1574 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1581 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1575 1582
1576 if (page->index < I0_BLOCKS) 1583 if (page->index < I0_BLOCKS)
1577 return logfs_write_direct(inode, page, flags); 1584 return logfs_write_direct(inode, page, flags);
1585 err = grow_inode(inode, page->index, 0);
1586 if (err)
1587 return err;
1578 return logfs_write_rec(inode, page, page->index, 0, flags); 1588 return logfs_write_rec(inode, page, page->index, 0, flags);
1579} 1589}
1580 1590
@@ -1623,7 +1633,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1623 if (inode->i_ino == LOGFS_INO_MASTER) 1633 if (inode->i_ino == LOGFS_INO_MASTER)
1624 logfs_write_anchor(inode->i_sb); 1634 logfs_write_anchor(inode->i_sb);
1625 else { 1635 else {
1626 err = __logfs_write_inode(inode, flags); 1636 err = __logfs_write_inode(inode, page, flags);
1627 } 1637 }
1628 } 1638 }
1629 } 1639 }
@@ -1873,7 +1883,7 @@ int logfs_truncate(struct inode *inode, u64 target)
1873 logfs_get_wblocks(sb, NULL, 1); 1883 logfs_get_wblocks(sb, NULL, 1);
1874 err = __logfs_truncate(inode, size); 1884 err = __logfs_truncate(inode, size);
1875 if (!err) 1885 if (!err)
1876 err = __logfs_write_inode(inode, 0); 1886 err = __logfs_write_inode(inode, NULL, 0);
1877 logfs_put_wblocks(sb, NULL, 1); 1887 logfs_put_wblocks(sb, NULL, 1);
1878 } 1888 }
1879 1889
@@ -1901,8 +1911,11 @@ static void move_page_to_inode(struct inode *inode, struct page *page)
1901 li->li_block = block; 1911 li->li_block = block;
1902 1912
1903 block->page = NULL; 1913 block->page = NULL;
1904 page->private = 0; 1914 if (PagePrivate(page)) {
1905 ClearPagePrivate(page); 1915 ClearPagePrivate(page);
1916 page_cache_release(page);
1917 set_page_private(page, 0);
1918 }
1906} 1919}
1907 1920
1908static void move_inode_to_page(struct page *page, struct inode *inode) 1921static void move_inode_to_page(struct page *page, struct inode *inode)
@@ -1918,8 +1931,12 @@ static void move_inode_to_page(struct page *page, struct inode *inode)
1918 BUG_ON(PagePrivate(page)); 1931 BUG_ON(PagePrivate(page));
1919 block->ops = &indirect_block_ops; 1932 block->ops = &indirect_block_ops;
1920 block->page = page; 1933 block->page = page;
1921 page->private = (unsigned long)block; 1934
1922 SetPagePrivate(page); 1935 if (!PagePrivate(page)) {
1936 SetPagePrivate(page);
1937 page_cache_get(page);
1938 set_page_private(page, (unsigned long) block);
1939 }
1923 1940
1924 block->inode = NULL; 1941 block->inode = NULL;
1925 li->li_block = NULL; 1942 li->li_block = NULL;
@@ -2106,14 +2123,14 @@ void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec)
2106 ec_level); 2123 ec_level);
2107} 2124}
2108 2125
2109int __logfs_write_inode(struct inode *inode, long flags) 2126int __logfs_write_inode(struct inode *inode, struct page *page, long flags)
2110{ 2127{
2111 struct super_block *sb = inode->i_sb; 2128 struct super_block *sb = inode->i_sb;
2112 int ret; 2129 int ret;
2113 2130
2114 logfs_get_wblocks(sb, NULL, flags & WF_LOCK); 2131 logfs_get_wblocks(sb, page, flags & WF_LOCK);
2115 ret = do_write_inode(inode); 2132 ret = do_write_inode(inode);
2116 logfs_put_wblocks(sb, NULL, flags & WF_LOCK); 2133 logfs_put_wblocks(sb, page, flags & WF_LOCK);
2117 return ret; 2134 return ret;
2118} 2135}
2119 2136
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 9d5187353255..ab798ed1cc88 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -86,7 +86,11 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
86 BUG_ON(!page); /* FIXME: reserve a pool */ 86 BUG_ON(!page); /* FIXME: reserve a pool */
87 SetPageUptodate(page); 87 SetPageUptodate(page);
88 memcpy(page_address(page) + offset, buf, copylen); 88 memcpy(page_address(page) + offset, buf, copylen);
89 SetPagePrivate(page); 89
90 if (!PagePrivate(page)) {
91 SetPagePrivate(page);
92 page_cache_get(page);
93 }
90 page_cache_release(page); 94 page_cache_release(page);
91 95
92 buf += copylen; 96 buf += copylen;
@@ -110,7 +114,10 @@ static void pad_partial_page(struct logfs_area *area)
110 page = get_mapping_page(sb, index, 0); 114 page = get_mapping_page(sb, index, 0);
111 BUG_ON(!page); /* FIXME: reserve a pool */ 115 BUG_ON(!page); /* FIXME: reserve a pool */
112 memset(page_address(page) + offset, 0xff, len); 116 memset(page_address(page) + offset, 0xff, len);
113 SetPagePrivate(page); 117 if (!PagePrivate(page)) {
118 SetPagePrivate(page);
119 page_cache_get(page);
120 }
114 page_cache_release(page); 121 page_cache_release(page);
115 } 122 }
116} 123}
@@ -130,7 +137,10 @@ static void pad_full_pages(struct logfs_area *area)
130 BUG_ON(!page); /* FIXME: reserve a pool */ 137 BUG_ON(!page); /* FIXME: reserve a pool */
131 SetPageUptodate(page); 138 SetPageUptodate(page);
132 memset(page_address(page), 0xff, PAGE_CACHE_SIZE); 139 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
133 SetPagePrivate(page); 140 if (!PagePrivate(page)) {
141 SetPagePrivate(page);
142 page_cache_get(page);
143 }
134 page_cache_release(page); 144 page_cache_release(page);
135 index++; 145 index++;
136 no_indizes--; 146 no_indizes--;
@@ -485,8 +495,12 @@ static void move_btree_to_page(struct inode *inode, struct page *page,
485 mempool_free(item, super->s_alias_pool); 495 mempool_free(item, super->s_alias_pool);
486 } 496 }
487 block->page = page; 497 block->page = page;
488 SetPagePrivate(page); 498
489 page->private = (unsigned long)block; 499 if (!PagePrivate(page)) {
500 SetPagePrivate(page);
501 page_cache_get(page);
502 set_page_private(page, (unsigned long) block);
503 }
490 block->ops = &indirect_block_ops; 504 block->ops = &indirect_block_ops;
491 initialize_block_counters(page, block, data, 0); 505 initialize_block_counters(page, block, data, 0);
492} 506}
@@ -536,8 +550,12 @@ void move_page_to_btree(struct page *page)
536 list_add(&item->list, &block->item_list); 550 list_add(&item->list, &block->item_list);
537 } 551 }
538 block->page = NULL; 552 block->page = NULL;
539 ClearPagePrivate(page); 553
540 page->private = 0; 554 if (PagePrivate(page)) {
555 ClearPagePrivate(page);
556 page_cache_release(page);
557 set_page_private(page, 0);
558 }
541 block->ops = &btree_block_ops; 559 block->ops = &btree_block_ops;
542 err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, 560 err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
543 block); 561 block);
@@ -702,7 +720,10 @@ void freeseg(struct super_block *sb, u32 segno)
702 page = find_get_page(mapping, ofs >> PAGE_SHIFT); 720 page = find_get_page(mapping, ofs >> PAGE_SHIFT);
703 if (!page) 721 if (!page)
704 continue; 722 continue;
705 ClearPagePrivate(page); 723 if (PagePrivate(page)) {
724 ClearPagePrivate(page);
725 page_cache_release(page);
726 }
706 page_cache_release(page); 727 page_cache_release(page);
707 } 728 }
708} 729}
@@ -841,6 +862,16 @@ static void free_area(struct logfs_area *area)
841 kfree(area); 862 kfree(area);
842} 863}
843 864
865void free_areas(struct super_block *sb)
866{
867 struct logfs_super *super = logfs_super(sb);
868 int i;
869
870 for_each_area(i)
871 free_area(super->s_area[i]);
872 free_area(super->s_journal_area);
873}
874
844static struct logfs_area *alloc_area(struct super_block *sb) 875static struct logfs_area *alloc_area(struct super_block *sb)
845{ 876{
846 struct logfs_area *area; 877 struct logfs_area *area;
@@ -923,10 +954,6 @@ err:
923void logfs_cleanup_areas(struct super_block *sb) 954void logfs_cleanup_areas(struct super_block *sb)
924{ 955{
925 struct logfs_super *super = logfs_super(sb); 956 struct logfs_super *super = logfs_super(sb);
926 int i;
927 957
928 btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); 958 btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
929 for_each_area(i)
930 free_area(super->s_area[i]);
931 free_area(super->s_journal_area);
932} 959}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index e795c234ea33..c9ee7f5d1caf 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -486,14 +486,15 @@ static void logfs_kill_sb(struct super_block *sb)
486 /* Alias entries slow down mount, so evict as many as possible */ 486 /* Alias entries slow down mount, so evict as many as possible */
487 sync_filesystem(sb); 487 sync_filesystem(sb);
488 logfs_write_anchor(sb); 488 logfs_write_anchor(sb);
489 free_areas(sb);
489 490
490 /* 491 /*
491 * From this point on alias entries are simply dropped - and any 492 * From this point on alias entries are simply dropped - and any
492 * writes to the object store are considered bugs. 493 * writes to the object store are considered bugs.
493 */ 494 */
494 super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
495 log_super("LogFS: Now in shutdown\n"); 495 log_super("LogFS: Now in shutdown\n");
496 generic_shutdown_super(sb); 496 generic_shutdown_super(sb);
497 super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
497 498
498 BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes); 499 BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
499 500
diff --git a/fs/namei.c b/fs/namei.c
index 208c6aa4a989..e2ba62820a0f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
1095 struct dentry *old; 1095 struct dentry *old;
1096 1096
1097 /* Don't create child dentry for a dead directory. */ 1097 /* Don't create child dentry for a dead directory. */
1098 if (unlikely(IS_DEADDIR(inode))) 1098 if (unlikely(IS_DEADDIR(inode))) {
1099 dput(dentry);
1099 return ERR_PTR(-ENOENT); 1100 return ERR_PTR(-ENOENT);
1101 }
1100 1102
1101 old = inode->i_op->lookup(inode, dentry, nd); 1103 old = inode->i_op->lookup(inode, dentry, nd);
1102 if (unlikely(old)) { 1104 if (unlikely(old)) {
@@ -1372,6 +1374,34 @@ static inline int can_lookup(struct inode *inode)
1372 return 1; 1374 return 1;
1373} 1375}
1374 1376
1377unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1378{
1379 unsigned long hash = init_name_hash();
1380 while (len--)
1381 hash = partial_name_hash(*name++, hash);
1382 return end_name_hash(hash);
1383}
1384EXPORT_SYMBOL(full_name_hash);
1385
1386/*
1387 * We know there's a real path component here of at least
1388 * one character.
1389 */
1390static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1391{
1392 unsigned long hash = init_name_hash();
1393 unsigned long len = 0, c;
1394
1395 c = (unsigned char)*name;
1396 do {
1397 len++;
1398 hash = partial_name_hash(c, hash);
1399 c = (unsigned char)name[len];
1400 } while (c && c != '/');
1401 *hashp = end_name_hash(hash);
1402 return len;
1403}
1404
1375/* 1405/*
1376 * Name resolution. 1406 * Name resolution.
1377 * This is the basic name resolution function, turning a pathname into 1407 * This is the basic name resolution function, turning a pathname into
@@ -1392,31 +1422,22 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1392 1422
1393 /* At this point we know we have a real path component. */ 1423 /* At this point we know we have a real path component. */
1394 for(;;) { 1424 for(;;) {
1395 unsigned long hash;
1396 struct qstr this; 1425 struct qstr this;
1397 unsigned int c; 1426 long len;
1398 int type; 1427 int type;
1399 1428
1400 err = may_lookup(nd); 1429 err = may_lookup(nd);
1401 if (err) 1430 if (err)
1402 break; 1431 break;
1403 1432
1433 len = hash_name(name, &this.hash);
1404 this.name = name; 1434 this.name = name;
1405 c = *(const unsigned char *)name; 1435 this.len = len;
1406
1407 hash = init_name_hash();
1408 do {
1409 name++;
1410 hash = partial_name_hash(c, hash);
1411 c = *(const unsigned char *)name;
1412 } while (c && (c != '/'));
1413 this.len = name - (const char *) this.name;
1414 this.hash = end_name_hash(hash);
1415 1436
1416 type = LAST_NORM; 1437 type = LAST_NORM;
1417 if (this.name[0] == '.') switch (this.len) { 1438 if (name[0] == '.') switch (len) {
1418 case 2: 1439 case 2:
1419 if (this.name[1] == '.') { 1440 if (name[1] == '.') {
1420 type = LAST_DOTDOT; 1441 type = LAST_DOTDOT;
1421 nd->flags |= LOOKUP_JUMPED; 1442 nd->flags |= LOOKUP_JUMPED;
1422 } 1443 }
@@ -1435,12 +1456,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1435 } 1456 }
1436 } 1457 }
1437 1458
1438 /* remove trailing slashes? */ 1459 if (!name[len])
1439 if (!c)
1440 goto last_component; 1460 goto last_component;
1441 while (*++name == '/'); 1461 /*
1442 if (!*name) 1462 * If it wasn't NUL, we know it was '/'. Skip that
1463 * slash, and continue until no more slashes.
1464 */
1465 do {
1466 len++;
1467 } while (unlikely(name[len] == '/'));
1468 if (!name[len])
1443 goto last_component; 1469 goto last_component;
1470 name += len;
1444 1471
1445 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); 1472 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1446 if (err < 0) 1473 if (err < 0)
@@ -1773,24 +1800,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1773struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1800struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1774{ 1801{
1775 struct qstr this; 1802 struct qstr this;
1776 unsigned long hash;
1777 unsigned int c; 1803 unsigned int c;
1778 1804
1779 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1805 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1780 1806
1781 this.name = name; 1807 this.name = name;
1782 this.len = len; 1808 this.len = len;
1809 this.hash = full_name_hash(name, len);
1783 if (!len) 1810 if (!len)
1784 return ERR_PTR(-EACCES); 1811 return ERR_PTR(-EACCES);
1785 1812
1786 hash = init_name_hash();
1787 while (len--) { 1813 while (len--) {
1788 c = *(const unsigned char *)name++; 1814 c = *(const unsigned char *)name++;
1789 if (c == '/' || c == '\0') 1815 if (c == '/' || c == '\0')
1790 return ERR_PTR(-EACCES); 1816 return ERR_PTR(-EACCES);
1791 hash = partial_name_hash(c, hash);
1792 } 1817 }
1793 this.hash = end_name_hash(hash);
1794 /* 1818 /*
1795 * See if the low-level filesystem might want 1819 * See if the low-level filesystem might want
1796 * to use its own hash.. 1820 * to use its own hash..
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe4..ec9f6ef6c5dd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3575 } 3575 }
3576 if (npages > 1) { 3576 if (npages > 1) {
3577 /* for decoding across pages */ 3577 /* for decoding across pages */
3578 args.acl_scratch = alloc_page(GFP_KERNEL); 3578 res.acl_scratch = alloc_page(GFP_KERNEL);
3579 if (!args.acl_scratch) 3579 if (!res.acl_scratch)
3580 goto out_free; 3580 goto out_free;
3581 } 3581 }
3582 args.acl_len = npages * PAGE_SIZE; 3582 args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
3612 for (i = 0; i < npages; i++) 3612 for (i = 0; i < npages; i++)
3613 if (pages[i]) 3613 if (pages[i])
3614 __free_page(pages[i]); 3614 __free_page(pages[i]);
3615 if (args.acl_scratch) 3615 if (res.acl_scratch)
3616 __free_page(args.acl_scratch); 3616 __free_page(res.acl_scratch);
3617 return ret; 3617 return ret;
3618} 3618}
3619 3619
@@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4883 clp->cl_rpcclient->cl_auth->au_flavor); 4883 clp->cl_rpcclient->cl_auth->au_flavor);
4884 4884
4885 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 4885 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
4886 if (unlikely(!res.server_scope)) 4886 if (unlikely(!res.server_scope)) {
4887 return -ENOMEM; 4887 status = -ENOMEM;
4888 goto out;
4889 }
4888 4890
4889 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 4891 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4890 if (!status) 4892 if (!status)
@@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4901 clp->server_scope = NULL; 4903 clp->server_scope = NULL;
4902 } 4904 }
4903 4905
4904 if (!clp->server_scope) 4906 if (!clp->server_scope) {
4905 clp->server_scope = res.server_scope; 4907 clp->server_scope = res.server_scope;
4906 else 4908 goto out;
4907 kfree(res.server_scope); 4909 }
4908 } 4910 }
4909 4911 kfree(res.server_scope);
4912out:
4910 dprintk("<-- %s status= %d\n", __func__, status); 4913 dprintk("<-- %s status= %d\n", __func__, status);
4911 return status; 4914 return status;
4912} 4915}
@@ -5008,37 +5011,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
5008 return status; 5011 return status;
5009} 5012}
5010 5013
5014static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
5015{
5016 return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
5017}
5018
5019static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
5020 struct nfs4_slot *new,
5021 u32 max_slots,
5022 u32 ivalue)
5023{
5024 struct nfs4_slot *old = NULL;
5025 u32 i;
5026
5027 spin_lock(&tbl->slot_tbl_lock);
5028 if (new) {
5029 old = tbl->slots;
5030 tbl->slots = new;
5031 tbl->max_slots = max_slots;
5032 }
5033 tbl->highest_used_slotid = -1; /* no slot is currently used */
5034 for (i = 0; i < tbl->max_slots; i++)
5035 tbl->slots[i].seq_nr = ivalue;
5036 spin_unlock(&tbl->slot_tbl_lock);
5037 kfree(old);
5038}
5039
5011/* 5040/*
5012 * Reset a slot table 5041 * (re)Initialise a slot table
5013 */ 5042 */
5014static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, 5043static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
5015 int ivalue) 5044 u32 ivalue)
5016{ 5045{
5017 struct nfs4_slot *new = NULL; 5046 struct nfs4_slot *new = NULL;
5018 int i; 5047 int ret = -ENOMEM;
5019 int ret = 0;
5020 5048
5021 dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, 5049 dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
5022 max_reqs, tbl->max_slots); 5050 max_reqs, tbl->max_slots);
5023 5051
5024 /* Does the newly negotiated max_reqs match the existing slot table? */ 5052 /* Does the newly negotiated max_reqs match the existing slot table? */
5025 if (max_reqs != tbl->max_slots) { 5053 if (max_reqs != tbl->max_slots) {
5026 ret = -ENOMEM; 5054 new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
5027 new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
5028 GFP_NOFS);
5029 if (!new) 5055 if (!new)
5030 goto out; 5056 goto out;
5031 ret = 0;
5032 kfree(tbl->slots);
5033 } 5057 }
5034 spin_lock(&tbl->slot_tbl_lock); 5058 ret = 0;
5035 if (new) { 5059
5036 tbl->slots = new; 5060 nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
5037 tbl->max_slots = max_reqs;
5038 }
5039 for (i = 0; i < tbl->max_slots; ++i)
5040 tbl->slots[i].seq_nr = ivalue;
5041 spin_unlock(&tbl->slot_tbl_lock);
5042 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, 5061 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
5043 tbl, tbl->slots, tbl->max_slots); 5062 tbl, tbl->slots, tbl->max_slots);
5044out: 5063out:
@@ -5061,36 +5080,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
5061} 5080}
5062 5081
5063/* 5082/*
5064 * Initialize slot table
5065 */
5066static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
5067 int max_slots, int ivalue)
5068{
5069 struct nfs4_slot *slot;
5070 int ret = -ENOMEM;
5071
5072 BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
5073
5074 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
5075
5076 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
5077 if (!slot)
5078 goto out;
5079 ret = 0;
5080
5081 spin_lock(&tbl->slot_tbl_lock);
5082 tbl->max_slots = max_slots;
5083 tbl->slots = slot;
5084 tbl->highest_used_slotid = -1; /* no slot is currently used */
5085 spin_unlock(&tbl->slot_tbl_lock);
5086 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
5087 tbl, tbl->slots, tbl->max_slots);
5088out:
5089 dprintk("<-- %s: return %d\n", __func__, ret);
5090 return ret;
5091}
5092
5093/*
5094 * Initialize or reset the forechannel and backchannel tables 5083 * Initialize or reset the forechannel and backchannel tables
5095 */ 5084 */
5096static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) 5085static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
@@ -5101,25 +5090,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
5101 dprintk("--> %s\n", __func__); 5090 dprintk("--> %s\n", __func__);
5102 /* Fore channel */ 5091 /* Fore channel */
5103 tbl = &ses->fc_slot_table; 5092 tbl = &ses->fc_slot_table;
5104 if (tbl->slots == NULL) { 5093 status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
5105 status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1); 5094 if (status) /* -ENOMEM */
5106 if (status) /* -ENOMEM */ 5095 return status;
5107 return status;
5108 } else {
5109 status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
5110 if (status)
5111 return status;
5112 }
5113 /* Back channel */ 5096 /* Back channel */
5114 tbl = &ses->bc_slot_table; 5097 tbl = &ses->bc_slot_table;
5115 if (tbl->slots == NULL) { 5098 status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
5116 status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0); 5099 if (status && tbl->slots == NULL)
5117 if (status) 5100 /* Fore and back channel share a connection so get
5118 /* Fore and back channel share a connection so get 5101 * both slot tables or neither */
5119 * both slot tables or neither */ 5102 nfs4_destroy_slot_tables(ses);
5120 nfs4_destroy_slot_tables(ses);
5121 } else
5122 status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
5123 return status; 5103 return status;
5124} 5104}
5125 5105
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a53f33b4ac3a..45392032e7bd 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1132{ 1132{
1133 struct nfs_client *clp = server->nfs_client; 1133 struct nfs_client *clp = server->nfs_client;
1134 1134
1135 if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
1136 nfs_async_inode_return_delegation(state->inode, &state->stateid);
1135 nfs4_state_mark_reclaim_nograce(clp, state); 1137 nfs4_state_mark_reclaim_nograce(clp, state);
1136 nfs4_schedule_state_manager(clp); 1138 nfs4_schedule_state_manager(clp);
1137} 1139}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e438407..33bd8d0f745d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2522 2522
2523 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2523 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
2524 args->acl_pages, args->acl_pgbase, args->acl_len); 2524 args->acl_pages, args->acl_pgbase, args->acl_len);
2525 xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
2526 2525
2527 encode_nops(&hdr); 2526 encode_nops(&hdr);
2528} 2527}
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6032 struct compound_hdr hdr; 6031 struct compound_hdr hdr;
6033 int status; 6032 int status;
6034 6033
6034 if (res->acl_scratch != NULL) {
6035 void *p = page_address(res->acl_scratch);
6036 xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
6037 }
6035 status = decode_compound_hdr(xdr, &hdr); 6038 status = decode_compound_hdr(xdr, &hdr);
6036 if (status) 6039 if (status)
6037 goto out; 6040 goto out;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 886649627c3d..2a70fce70c65 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
603 nsegs = argv[4].v_nmembs; 603 nsegs = argv[4].v_nmembs;
604 if (argv[4].v_size != argsz[4]) 604 if (argv[4].v_size != argsz[4])
605 goto out; 605 goto out;
606 if (nsegs > UINT_MAX / sizeof(__u64))
607 goto out;
606 608
607 /* 609 /*
608 * argv[4] points to segment numbers this ioctl cleans. We 610 * argv[4] points to segment numbers this ioctl cleans. We
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f14fde2b03d6..e0281992ddc3 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. 2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2007 Anton Altaparmakov 4 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -345,10 +345,10 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
345 unsigned long flags; 345 unsigned long flags;
346 bool is_retry = false; 346 bool is_retry = false;
347 347
348 BUG_ON(!ni);
348 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", 349 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
349 ni->mft_no, (unsigned long long)vcn, 350 ni->mft_no, (unsigned long long)vcn,
350 write_locked ? "write" : "read"); 351 write_locked ? "write" : "read");
351 BUG_ON(!ni);
352 BUG_ON(!NInoNonResident(ni)); 352 BUG_ON(!NInoNonResident(ni));
353 BUG_ON(vcn < 0); 353 BUG_ON(vcn < 0);
354 if (!ni->runlist.rl) { 354 if (!ni->runlist.rl) {
@@ -469,9 +469,9 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
469 int err = 0; 469 int err = 0;
470 bool is_retry = false; 470 bool is_retry = false;
471 471
472 BUG_ON(!ni);
472 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.", 473 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
473 ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out"); 474 ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
474 BUG_ON(!ni);
475 BUG_ON(!NInoNonResident(ni)); 475 BUG_ON(!NInoNonResident(ni));
476 BUG_ON(vcn < 0); 476 BUG_ON(vcn < 0);
477 if (!ni->runlist.rl) { 477 if (!ni->runlist.rl) {
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 382857f9c7db..3014a36a255b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1367 ntfs_error(vol->sb, "Failed to merge runlists for mft " 1367 ntfs_error(vol->sb, "Failed to merge runlists for mft "
1368 "bitmap."); 1368 "bitmap.");
1369 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1369 if (ntfs_cluster_free_from_rl(vol, rl2)) {
1370 ntfs_error(vol->sb, "Failed to dealocate " 1370 ntfs_error(vol->sb, "Failed to deallocate "
1371 "allocated cluster.%s", es); 1371 "allocated cluster.%s", es);
1372 NVolSetErrors(vol); 1372 NVolSetErrors(vol);
1373 } 1373 }
@@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1805 ntfs_error(vol->sb, "Failed to merge runlists for mft data " 1805 ntfs_error(vol->sb, "Failed to merge runlists for mft data "
1806 "attribute."); 1806 "attribute.");
1807 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1807 if (ntfs_cluster_free_from_rl(vol, rl2)) {
1808 ntfs_error(vol->sb, "Failed to dealocate clusters " 1808 ntfs_error(vol->sb, "Failed to deallocate clusters "
1809 "from the mft data attribute.%s", es); 1809 "from the mft data attribute.%s", es);
1810 NVolSetErrors(vol); 1810 NVolSetErrors(vol);
1811 } 1811 }
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5a4a8af5c406..f907611cca73 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. 2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2001,2002 Richard Russon 5 * Copyright (c) 2001,2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -1239,7 +1239,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1239{ 1239{
1240 MFT_REF mref; 1240 MFT_REF mref;
1241 struct inode *vi; 1241 struct inode *vi;
1242 ntfs_inode *ni;
1243 struct page *page; 1242 struct page *page;
1244 u32 *kaddr, *kend; 1243 u32 *kaddr, *kend;
1245 ntfs_name *name = NULL; 1244 ntfs_name *name = NULL;
@@ -1290,7 +1289,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1290 "is not the system volume.", i_size_read(vi)); 1289 "is not the system volume.", i_size_read(vi));
1291 goto iput_out; 1290 goto iput_out;
1292 } 1291 }
1293 ni = NTFS_I(vi);
1294 page = ntfs_map_page(vi->i_mapping, 0); 1292 page = ntfs_map_page(vi->i_mapping, 0);
1295 if (IS_ERR(page)) { 1293 if (IS_ERR(page)) {
1296 ntfs_error(vol->sb, "Failed to read from hiberfil.sys."); 1294 ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be244692550d..a9856e3eaaf0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir,
1053 handle_t *handle = NULL; 1053 handle_t *handle = NULL;
1054 struct buffer_head *old_dir_bh = NULL; 1054 struct buffer_head *old_dir_bh = NULL;
1055 struct buffer_head *new_dir_bh = NULL; 1055 struct buffer_head *new_dir_bh = NULL;
1056 nlink_t old_dir_nlink = old_dir->i_nlink; 1056 u32 old_dir_nlink = old_dir->i_nlink;
1057 struct ocfs2_dinode *old_di; 1057 struct ocfs2_dinode *old_di;
1058 struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, }; 1058 struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
1059 struct ocfs2_dir_lookup_result target_lookup_res = { NULL, }; 1059 struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9cde9edf9c4d..d4548dd49b02 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
198 return result; 198 return result;
199} 199}
200 200
201static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
202{
203 struct mm_struct *mm;
204 int err;
205
206 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
207 if (err)
208 return ERR_PTR(err);
209
210 mm = get_task_mm(task);
211 if (mm && mm != current->mm &&
212 !ptrace_may_access(task, mode)) {
213 mmput(mm);
214 mm = ERR_PTR(-EACCES);
215 }
216 mutex_unlock(&task->signal->cred_guard_mutex);
217
218 return mm;
219}
220
221struct mm_struct *mm_for_maps(struct task_struct *task) 201struct mm_struct *mm_for_maps(struct task_struct *task)
222{ 202{
223 return mm_access(task, PTRACE_MODE_READ); 203 return mm_access(task, PTRACE_MODE_READ);
@@ -711,6 +691,13 @@ static int mem_open(struct inode* inode, struct file* file)
711 if (IS_ERR(mm)) 691 if (IS_ERR(mm))
712 return PTR_ERR(mm); 692 return PTR_ERR(mm);
713 693
694 if (mm) {
695 /* ensure this mm_struct can't be freed */
696 atomic_inc(&mm->mm_count);
697 /* but do not pin its memory */
698 mmput(mm);
699 }
700
714 /* OK to pass negative loff_t, we can catch out-of-range */ 701 /* OK to pass negative loff_t, we can catch out-of-range */
715 file->f_mode |= FMODE_UNSIGNED_OFFSET; 702 file->f_mode |= FMODE_UNSIGNED_OFFSET;
716 file->private_data = mm; 703 file->private_data = mm;
@@ -718,57 +705,13 @@ static int mem_open(struct inode* inode, struct file* file)
718 return 0; 705 return 0;
719} 706}
720 707
721static ssize_t mem_read(struct file * file, char __user * buf, 708static ssize_t mem_rw(struct file *file, char __user *buf,
722 size_t count, loff_t *ppos) 709 size_t count, loff_t *ppos, int write)
723{ 710{
724 int ret;
725 char *page;
726 unsigned long src = *ppos;
727 struct mm_struct *mm = file->private_data; 711 struct mm_struct *mm = file->private_data;
728 712 unsigned long addr = *ppos;
729 if (!mm) 713 ssize_t copied;
730 return 0;
731
732 page = (char *)__get_free_page(GFP_TEMPORARY);
733 if (!page)
734 return -ENOMEM;
735
736 ret = 0;
737
738 while (count > 0) {
739 int this_len, retval;
740
741 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
742 retval = access_remote_vm(mm, src, page, this_len, 0);
743 if (!retval) {
744 if (!ret)
745 ret = -EIO;
746 break;
747 }
748
749 if (copy_to_user(buf, page, retval)) {
750 ret = -EFAULT;
751 break;
752 }
753
754 ret += retval;
755 src += retval;
756 buf += retval;
757 count -= retval;
758 }
759 *ppos = src;
760
761 free_page((unsigned long) page);
762 return ret;
763}
764
765static ssize_t mem_write(struct file * file, const char __user *buf,
766 size_t count, loff_t *ppos)
767{
768 int copied;
769 char *page; 714 char *page;
770 unsigned long dst = *ppos;
771 struct mm_struct *mm = file->private_data;
772 715
773 if (!mm) 716 if (!mm)
774 return 0; 717 return 0;
@@ -778,31 +721,54 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
778 return -ENOMEM; 721 return -ENOMEM;
779 722
780 copied = 0; 723 copied = 0;
724 if (!atomic_inc_not_zero(&mm->mm_users))
725 goto free;
726
781 while (count > 0) { 727 while (count > 0) {
782 int this_len, retval; 728 int this_len = min_t(int, count, PAGE_SIZE);
783 729
784 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 730 if (write && copy_from_user(page, buf, this_len)) {
785 if (copy_from_user(page, buf, this_len)) {
786 copied = -EFAULT; 731 copied = -EFAULT;
787 break; 732 break;
788 } 733 }
789 retval = access_remote_vm(mm, dst, page, this_len, 1); 734
790 if (!retval) { 735 this_len = access_remote_vm(mm, addr, page, this_len, write);
736 if (!this_len) {
791 if (!copied) 737 if (!copied)
792 copied = -EIO; 738 copied = -EIO;
793 break; 739 break;
794 } 740 }
795 copied += retval; 741
796 buf += retval; 742 if (!write && copy_to_user(buf, page, this_len)) {
797 dst += retval; 743 copied = -EFAULT;
798 count -= retval; 744 break;
745 }
746
747 buf += this_len;
748 addr += this_len;
749 copied += this_len;
750 count -= this_len;
799 } 751 }
800 *ppos = dst; 752 *ppos = addr;
801 753
754 mmput(mm);
755free:
802 free_page((unsigned long) page); 756 free_page((unsigned long) page);
803 return copied; 757 return copied;
804} 758}
805 759
760static ssize_t mem_read(struct file *file, char __user *buf,
761 size_t count, loff_t *ppos)
762{
763 return mem_rw(file, buf, count, ppos, 0);
764}
765
766static ssize_t mem_write(struct file *file, const char __user *buf,
767 size_t count, loff_t *ppos)
768{
769 return mem_rw(file, (char __user*)buf, count, ppos, 1);
770}
771
806loff_t mem_lseek(struct file *file, loff_t offset, int orig) 772loff_t mem_lseek(struct file *file, loff_t offset, int orig)
807{ 773{
808 switch (orig) { 774 switch (orig) {
@@ -822,8 +788,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig)
822static int mem_release(struct inode *inode, struct file *file) 788static int mem_release(struct inode *inode, struct file *file)
823{ 789{
824 struct mm_struct *mm = file->private_data; 790 struct mm_struct *mm = file->private_data;
825 791 if (mm)
826 mmput(mm); 792 mmdrop(mm);
827 return 0; 793 return 0;
828} 794}
829 795
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7898cd688a00..fc2c4388d126 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
292 } 292 }
293} 293}
294 294
295/* Return 1 if 'cmd' will block on frozen filesystem */
296static int quotactl_cmd_write(int cmd)
297{
298 switch (cmd) {
299 case Q_GETFMT:
300 case Q_GETINFO:
301 case Q_SYNC:
302 case Q_XGETQSTAT:
303 case Q_XGETQUOTA:
304 case Q_XQUOTASYNC:
305 return 0;
306 }
307 return 1;
308}
309
295/* 310/*
296 * look up a superblock on which quota ops will be performed 311 * look up a superblock on which quota ops will be performed
297 * - use the name of a block device to find the superblock thereon 312 * - use the name of a block device to find the superblock thereon
298 */ 313 */
299static struct super_block *quotactl_block(const char __user *special) 314static struct super_block *quotactl_block(const char __user *special, int cmd)
300{ 315{
301#ifdef CONFIG_BLOCK 316#ifdef CONFIG_BLOCK
302 struct block_device *bdev; 317 struct block_device *bdev;
@@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special)
309 putname(tmp); 324 putname(tmp);
310 if (IS_ERR(bdev)) 325 if (IS_ERR(bdev))
311 return ERR_CAST(bdev); 326 return ERR_CAST(bdev);
312 sb = get_super(bdev); 327 if (quotactl_cmd_write(cmd))
328 sb = get_super_thawed(bdev);
329 else
330 sb = get_super(bdev);
313 bdput(bdev); 331 bdput(bdev);
314 if (!sb) 332 if (!sb)
315 return ERR_PTR(-ENODEV); 333 return ERR_PTR(-ENODEV);
@@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
361 pathp = &path; 379 pathp = &path;
362 } 380 }
363 381
364 sb = quotactl_block(special); 382 sb = quotactl_block(special, cmds);
365 if (IS_ERR(sb)) { 383 if (IS_ERR(sb)) {
366 ret = PTR_ERR(sb); 384 ret = PTR_ERR(sb);
367 goto out; 385 goto out;
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..e782258d0de3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
912} 912}
913 913
914SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, 914SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
915 long, timeout_msecs) 915 int, timeout_msecs)
916{ 916{
917 struct timespec end_time, *to = NULL; 917 struct timespec end_time, *to = NULL;
918 int ret; 918 int ret;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451dd..7ae2a574cb25 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
30#include <linux/signalfd.h> 30#include <linux/signalfd.h>
31#include <linux/syscalls.h> 31#include <linux/syscalls.h>
32 32
33void signalfd_cleanup(struct sighand_struct *sighand)
34{
35 wait_queue_head_t *wqh = &sighand->signalfd_wqh;
36 /*
37 * The lockless check can race with remove_wait_queue() in progress,
38 * but in this case its caller should run under rcu_read_lock() and
39 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
40 */
41 if (likely(!waitqueue_active(wqh)))
42 return;
43
44 /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
45 wake_up_poll(wqh, POLLHUP | POLLFREE);
46}
47
33struct signalfd_ctx { 48struct signalfd_ctx {
34 sigset_t sigmask; 49 sigset_t sigmask;
35}; 50};
diff --git a/fs/super.c b/fs/super.c
index 6015c02296b7..6277ec6cb60a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -634,6 +634,28 @@ rescan:
634EXPORT_SYMBOL(get_super); 634EXPORT_SYMBOL(get_super);
635 635
636/** 636/**
637 * get_super_thawed - get thawed superblock of a device
638 * @bdev: device to get the superblock for
639 *
640 * Scans the superblock list and finds the superblock of the file system
641 * mounted on the device. The superblock is returned once it is thawed
642 * (or immediately if it was not frozen). %NULL is returned if no match
643 * is found.
644 */
645struct super_block *get_super_thawed(struct block_device *bdev)
646{
647 while (1) {
648 struct super_block *s = get_super(bdev);
649 if (!s || s->s_frozen == SB_UNFROZEN)
650 return s;
651 up_read(&s->s_umount);
652 vfs_check_frozen(s, SB_FREEZE_WRITE);
653 put_super(s);
654 }
655}
656EXPORT_SYMBOL(get_super_thawed);
657
658/**
637 * get_active_super - get an active reference to the superblock of a device 659 * get_active_super - get an active reference to the superblock of a device
638 * @bdev: device to get the superblock for 660 * @bdev: device to get the superblock for
639 * 661 *
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 62f4fb37789e..00012e31829d 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -493,6 +493,12 @@ int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
493 const void *ns = NULL; 493 const void *ns = NULL;
494 int err; 494 int err;
495 495
496 if (!dir_sd) {
497 WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n",
498 kobject_name(kobj));
499 return -ENOENT;
500 }
501
496 err = 0; 502 err = 0;
497 if (!sysfs_ns_type(dir_sd)) 503 if (!sysfs_ns_type(dir_sd))
498 goto out; 504 goto out;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 4a802b4a9056..85eb81683a29 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -318,8 +318,11 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
318 struct sysfs_addrm_cxt acxt; 318 struct sysfs_addrm_cxt acxt;
319 struct sysfs_dirent *sd; 319 struct sysfs_dirent *sd;
320 320
321 if (!dir_sd) 321 if (!dir_sd) {
322 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
323 name);
322 return -ENOENT; 324 return -ENOENT;
325 }
323 326
324 sysfs_addrm_start(&acxt, dir_sd); 327 sysfs_addrm_start(&acxt, dir_sd);
325 328
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 292eff198030..ab7c53fe346e 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
110extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); 110extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
111extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); 111extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
112 112
113static inline int
114kmem_shake_allow(gfp_t gfp_mask)
115{
116 return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
117}
118
119#endif /* __XFS_SUPPORT_KMEM_H__ */ 113#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index b4ff40b5f918..53db20ee3e77 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33;
63static struct lock_class_key xfs_dquot_other_class; 63static struct lock_class_key xfs_dquot_other_class;
64 64
65/* 65/*
66 * Allocate and initialize a dquot. We don't always allocate fresh memory;
67 * we try to reclaim a free dquot if the number of incore dquots are above
68 * a threshold.
69 * The only field inside the core that gets initialized at this point
70 * is the d_id field. The idea is to fill in the entire q_core
71 * when we read in the on disk dquot.
72 */
73STATIC xfs_dquot_t *
74xfs_qm_dqinit(
75 xfs_mount_t *mp,
76 xfs_dqid_t id,
77 uint type)
78{
79 xfs_dquot_t *dqp;
80 boolean_t brandnewdquot;
81
82 brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
83 dqp->dq_flags = type;
84 dqp->q_core.d_id = cpu_to_be32(id);
85 dqp->q_mount = mp;
86
87 /*
88 * No need to re-initialize these if this is a reclaimed dquot.
89 */
90 if (brandnewdquot) {
91 INIT_LIST_HEAD(&dqp->q_freelist);
92 mutex_init(&dqp->q_qlock);
93 init_waitqueue_head(&dqp->q_pinwait);
94
95 /*
96 * Because we want to use a counting completion, complete
97 * the flush completion once to allow a single access to
98 * the flush completion without blocking.
99 */
100 init_completion(&dqp->q_flush);
101 complete(&dqp->q_flush);
102
103 trace_xfs_dqinit(dqp);
104 } else {
105 /*
106 * Only the q_core portion was zeroed in dqreclaim_one().
107 * So, we need to reset others.
108 */
109 dqp->q_nrefs = 0;
110 dqp->q_blkno = 0;
111 INIT_LIST_HEAD(&dqp->q_mplist);
112 INIT_LIST_HEAD(&dqp->q_hashlist);
113 dqp->q_bufoffset = 0;
114 dqp->q_fileoffset = 0;
115 dqp->q_transp = NULL;
116 dqp->q_gdquot = NULL;
117 dqp->q_res_bcount = 0;
118 dqp->q_res_icount = 0;
119 dqp->q_res_rtbcount = 0;
120 atomic_set(&dqp->q_pincount, 0);
121 dqp->q_hash = NULL;
122 ASSERT(list_empty(&dqp->q_freelist));
123
124 trace_xfs_dqreuse(dqp);
125 }
126
127 /*
128 * In either case we need to make sure group quotas have a different
129 * lock class than user quotas, to make sure lockdep knows we can
130 * locks of one of each at the same time.
131 */
132 if (!(type & XFS_DQ_USER))
133 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
134
135 /*
136 * log item gets initialized later
137 */
138 return (dqp);
139}
140
141/*
142 * This is called to free all the memory associated with a dquot 66 * This is called to free all the memory associated with a dquot
143 */ 67 */
144void 68void
@@ -215,10 +139,10 @@ xfs_qm_adjust_dqtimers(
215 139
216 if (!d->d_btimer) { 140 if (!d->d_btimer) {
217 if ((d->d_blk_softlimit && 141 if ((d->d_blk_softlimit &&
218 (be64_to_cpu(d->d_bcount) >= 142 (be64_to_cpu(d->d_bcount) >
219 be64_to_cpu(d->d_blk_softlimit))) || 143 be64_to_cpu(d->d_blk_softlimit))) ||
220 (d->d_blk_hardlimit && 144 (d->d_blk_hardlimit &&
221 (be64_to_cpu(d->d_bcount) >= 145 (be64_to_cpu(d->d_bcount) >
222 be64_to_cpu(d->d_blk_hardlimit)))) { 146 be64_to_cpu(d->d_blk_hardlimit)))) {
223 d->d_btimer = cpu_to_be32(get_seconds() + 147 d->d_btimer = cpu_to_be32(get_seconds() +
224 mp->m_quotainfo->qi_btimelimit); 148 mp->m_quotainfo->qi_btimelimit);
@@ -227,10 +151,10 @@ xfs_qm_adjust_dqtimers(
227 } 151 }
228 } else { 152 } else {
229 if ((!d->d_blk_softlimit || 153 if ((!d->d_blk_softlimit ||
230 (be64_to_cpu(d->d_bcount) < 154 (be64_to_cpu(d->d_bcount) <=
231 be64_to_cpu(d->d_blk_softlimit))) && 155 be64_to_cpu(d->d_blk_softlimit))) &&
232 (!d->d_blk_hardlimit || 156 (!d->d_blk_hardlimit ||
233 (be64_to_cpu(d->d_bcount) < 157 (be64_to_cpu(d->d_bcount) <=
234 be64_to_cpu(d->d_blk_hardlimit)))) { 158 be64_to_cpu(d->d_blk_hardlimit)))) {
235 d->d_btimer = 0; 159 d->d_btimer = 0;
236 } 160 }
@@ -238,10 +162,10 @@ xfs_qm_adjust_dqtimers(
238 162
239 if (!d->d_itimer) { 163 if (!d->d_itimer) {
240 if ((d->d_ino_softlimit && 164 if ((d->d_ino_softlimit &&
241 (be64_to_cpu(d->d_icount) >= 165 (be64_to_cpu(d->d_icount) >
242 be64_to_cpu(d->d_ino_softlimit))) || 166 be64_to_cpu(d->d_ino_softlimit))) ||
243 (d->d_ino_hardlimit && 167 (d->d_ino_hardlimit &&
244 (be64_to_cpu(d->d_icount) >= 168 (be64_to_cpu(d->d_icount) >
245 be64_to_cpu(d->d_ino_hardlimit)))) { 169 be64_to_cpu(d->d_ino_hardlimit)))) {
246 d->d_itimer = cpu_to_be32(get_seconds() + 170 d->d_itimer = cpu_to_be32(get_seconds() +
247 mp->m_quotainfo->qi_itimelimit); 171 mp->m_quotainfo->qi_itimelimit);
@@ -250,10 +174,10 @@ xfs_qm_adjust_dqtimers(
250 } 174 }
251 } else { 175 } else {
252 if ((!d->d_ino_softlimit || 176 if ((!d->d_ino_softlimit ||
253 (be64_to_cpu(d->d_icount) < 177 (be64_to_cpu(d->d_icount) <=
254 be64_to_cpu(d->d_ino_softlimit))) && 178 be64_to_cpu(d->d_ino_softlimit))) &&
255 (!d->d_ino_hardlimit || 179 (!d->d_ino_hardlimit ||
256 (be64_to_cpu(d->d_icount) < 180 (be64_to_cpu(d->d_icount) <=
257 be64_to_cpu(d->d_ino_hardlimit)))) { 181 be64_to_cpu(d->d_ino_hardlimit)))) {
258 d->d_itimer = 0; 182 d->d_itimer = 0;
259 } 183 }
@@ -261,10 +185,10 @@ xfs_qm_adjust_dqtimers(
261 185
262 if (!d->d_rtbtimer) { 186 if (!d->d_rtbtimer) {
263 if ((d->d_rtb_softlimit && 187 if ((d->d_rtb_softlimit &&
264 (be64_to_cpu(d->d_rtbcount) >= 188 (be64_to_cpu(d->d_rtbcount) >
265 be64_to_cpu(d->d_rtb_softlimit))) || 189 be64_to_cpu(d->d_rtb_softlimit))) ||
266 (d->d_rtb_hardlimit && 190 (d->d_rtb_hardlimit &&
267 (be64_to_cpu(d->d_rtbcount) >= 191 (be64_to_cpu(d->d_rtbcount) >
268 be64_to_cpu(d->d_rtb_hardlimit)))) { 192 be64_to_cpu(d->d_rtb_hardlimit)))) {
269 d->d_rtbtimer = cpu_to_be32(get_seconds() + 193 d->d_rtbtimer = cpu_to_be32(get_seconds() +
270 mp->m_quotainfo->qi_rtbtimelimit); 194 mp->m_quotainfo->qi_rtbtimelimit);
@@ -273,10 +197,10 @@ xfs_qm_adjust_dqtimers(
273 } 197 }
274 } else { 198 } else {
275 if ((!d->d_rtb_softlimit || 199 if ((!d->d_rtb_softlimit ||
276 (be64_to_cpu(d->d_rtbcount) < 200 (be64_to_cpu(d->d_rtbcount) <=
277 be64_to_cpu(d->d_rtb_softlimit))) && 201 be64_to_cpu(d->d_rtb_softlimit))) &&
278 (!d->d_rtb_hardlimit || 202 (!d->d_rtb_hardlimit ||
279 (be64_to_cpu(d->d_rtbcount) < 203 (be64_to_cpu(d->d_rtbcount) <=
280 be64_to_cpu(d->d_rtb_hardlimit)))) { 204 be64_to_cpu(d->d_rtb_hardlimit)))) {
281 d->d_rtbtimer = 0; 205 d->d_rtbtimer = 0;
282 } 206 }
@@ -567,7 +491,32 @@ xfs_qm_dqread(
567 int error; 491 int error;
568 int cancelflags = 0; 492 int cancelflags = 0;
569 493
570 dqp = xfs_qm_dqinit(mp, id, type); 494
495 dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
496
497 dqp->dq_flags = type;
498 dqp->q_core.d_id = cpu_to_be32(id);
499 dqp->q_mount = mp;
500 INIT_LIST_HEAD(&dqp->q_freelist);
501 mutex_init(&dqp->q_qlock);
502 init_waitqueue_head(&dqp->q_pinwait);
503
504 /*
505 * Because we want to use a counting completion, complete
506 * the flush completion once to allow a single access to
507 * the flush completion without blocking.
508 */
509 init_completion(&dqp->q_flush);
510 complete(&dqp->q_flush);
511
512 /*
513 * Make sure group quotas have a different lock class than user
514 * quotas.
515 */
516 if (!(type & XFS_DQ_USER))
517 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
518
519 atomic_inc(&xfs_Gqm->qm_totaldquots);
571 520
572 trace_xfs_dqread(dqp); 521 trace_xfs_dqread(dqp);
573 522
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 541a508adea1..0ed9ee77937c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans(
1489 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1489 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1490 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1490 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1491 1491
1492 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); 1492 ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
1493 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1493 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1494 item->ri_buf[item->ri_cnt-1].i_len += len; 1494 item->ri_buf[item->ri_cnt-1].i_len += len;
1495 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1495 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -1981,7 +1981,7 @@ xfs_qm_dqcheck(
1981 1981
1982 if (!errs && ddq->d_id) { 1982 if (!errs && ddq->d_id) {
1983 if (ddq->d_blk_softlimit && 1983 if (ddq->d_blk_softlimit &&
1984 be64_to_cpu(ddq->d_bcount) >= 1984 be64_to_cpu(ddq->d_bcount) >
1985 be64_to_cpu(ddq->d_blk_softlimit)) { 1985 be64_to_cpu(ddq->d_blk_softlimit)) {
1986 if (!ddq->d_btimer) { 1986 if (!ddq->d_btimer) {
1987 if (flags & XFS_QMOPT_DOWARN) 1987 if (flags & XFS_QMOPT_DOWARN)
@@ -1992,7 +1992,7 @@ xfs_qm_dqcheck(
1992 } 1992 }
1993 } 1993 }
1994 if (ddq->d_ino_softlimit && 1994 if (ddq->d_ino_softlimit &&
1995 be64_to_cpu(ddq->d_icount) >= 1995 be64_to_cpu(ddq->d_icount) >
1996 be64_to_cpu(ddq->d_ino_softlimit)) { 1996 be64_to_cpu(ddq->d_ino_softlimit)) {
1997 if (!ddq->d_itimer) { 1997 if (!ddq->d_itimer) {
1998 if (flags & XFS_QMOPT_DOWARN) 1998 if (flags & XFS_QMOPT_DOWARN)
@@ -2003,7 +2003,7 @@ xfs_qm_dqcheck(
2003 } 2003 }
2004 } 2004 }
2005 if (ddq->d_rtb_softlimit && 2005 if (ddq->d_rtb_softlimit &&
2006 be64_to_cpu(ddq->d_rtbcount) >= 2006 be64_to_cpu(ddq->d_rtbcount) >
2007 be64_to_cpu(ddq->d_rtb_softlimit)) { 2007 be64_to_cpu(ddq->d_rtb_softlimit)) {
2008 if (!ddq->d_rtbtimer) { 2008 if (!ddq->d_rtbtimer) {
2009 if (flags & XFS_QMOPT_DOWARN) 2009 if (flags & XFS_QMOPT_DOWARN)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 671f37eae1c7..c436def733bf 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -50,7 +50,6 @@
50 */ 50 */
51struct mutex xfs_Gqm_lock; 51struct mutex xfs_Gqm_lock;
52struct xfs_qm *xfs_Gqm; 52struct xfs_qm *xfs_Gqm;
53uint ndquot;
54 53
55kmem_zone_t *qm_dqzone; 54kmem_zone_t *qm_dqzone;
56kmem_zone_t *qm_dqtrxzone; 55kmem_zone_t *qm_dqtrxzone;
@@ -93,7 +92,6 @@ xfs_Gqm_init(void)
93 goto out_free_udqhash; 92 goto out_free_udqhash;
94 93
95 hsize /= sizeof(xfs_dqhash_t); 94 hsize /= sizeof(xfs_dqhash_t);
96 ndquot = hsize << 8;
97 95
98 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); 96 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
99 xqm->qm_dqhashmask = hsize - 1; 97 xqm->qm_dqhashmask = hsize - 1;
@@ -137,7 +135,6 @@ xfs_Gqm_init(void)
137 xqm->qm_dqtrxzone = qm_dqtrxzone; 135 xqm->qm_dqtrxzone = qm_dqtrxzone;
138 136
139 atomic_set(&xqm->qm_totaldquots, 0); 137 atomic_set(&xqm->qm_totaldquots, 0);
140 xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
141 xqm->qm_nrefs = 0; 138 xqm->qm_nrefs = 0;
142 return xqm; 139 return xqm;
143 140
@@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
1600 return 0; 1597 return 0;
1601} 1598}
1602 1599
1600STATIC void
1601xfs_qm_dqfree_one(
1602 struct xfs_dquot *dqp)
1603{
1604 struct xfs_mount *mp = dqp->q_mount;
1605 struct xfs_quotainfo *qi = mp->m_quotainfo;
1603 1606
1607 mutex_lock(&dqp->q_hash->qh_lock);
1608 list_del_init(&dqp->q_hashlist);
1609 dqp->q_hash->qh_version++;
1610 mutex_unlock(&dqp->q_hash->qh_lock);
1604 1611
1605/* 1612 mutex_lock(&qi->qi_dqlist_lock);
1606 * Pop the least recently used dquot off the freelist and recycle it. 1613 list_del_init(&dqp->q_mplist);
1607 */ 1614 qi->qi_dquots--;
1608STATIC struct xfs_dquot * 1615 qi->qi_dqreclaims++;
1609xfs_qm_dqreclaim_one(void) 1616 mutex_unlock(&qi->qi_dqlist_lock);
1617
1618 xfs_qm_dqdestroy(dqp);
1619}
1620
1621STATIC void
1622xfs_qm_dqreclaim_one(
1623 struct xfs_dquot *dqp,
1624 struct list_head *dispose_list)
1610{ 1625{
1611 struct xfs_dquot *dqp; 1626 struct xfs_mount *mp = dqp->q_mount;
1612 int restarts = 0; 1627 int error;
1613 1628
1614 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1629 if (!xfs_dqlock_nowait(dqp))
1615restart: 1630 goto out_busy;
1616 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1617 struct xfs_mount *mp = dqp->q_mount;
1618 1631
1619 if (!xfs_dqlock_nowait(dqp)) 1632 /*
1620 continue; 1633 * This dquot has acquired a reference in the meantime remove it from
1634 * the freelist and try again.
1635 */
1636 if (dqp->q_nrefs) {
1637 xfs_dqunlock(dqp);
1621 1638
1622 /* 1639 trace_xfs_dqreclaim_want(dqp);
1623 * This dquot has already been grabbed by dqlookup. 1640 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1624 * Remove it from the freelist and try again.
1625 */
1626 if (dqp->q_nrefs) {
1627 trace_xfs_dqreclaim_want(dqp);
1628 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1629
1630 list_del_init(&dqp->q_freelist);
1631 xfs_Gqm->qm_dqfrlist_cnt--;
1632 restarts++;
1633 goto dqunlock;
1634 }
1635 1641
1636 ASSERT(dqp->q_hash); 1642 list_del_init(&dqp->q_freelist);
1637 ASSERT(!list_empty(&dqp->q_mplist)); 1643 xfs_Gqm->qm_dqfrlist_cnt--;
1644 return;
1645 }
1638 1646
1639 /* 1647 ASSERT(dqp->q_hash);
1640 * Try to grab the flush lock. If this dquot is in the process 1648 ASSERT(!list_empty(&dqp->q_mplist));
1641 * of getting flushed to disk, we don't want to reclaim it.
1642 */
1643 if (!xfs_dqflock_nowait(dqp))
1644 goto dqunlock;
1645 1649
1646 /* 1650 /*
1647 * We have the flush lock so we know that this is not in the 1651 * Try to grab the flush lock. If this dquot is in the process of
1648 * process of being flushed. So, if this is dirty, flush it 1652 * getting flushed to disk, we don't want to reclaim it.
1649 * DELWRI so that we don't get a freelist infested with 1653 */
1650 * dirty dquots. 1654 if (!xfs_dqflock_nowait(dqp))
1651 */ 1655 goto out_busy;
1652 if (XFS_DQ_IS_DIRTY(dqp)) {
1653 int error;
1654 1656
1655 trace_xfs_dqreclaim_dirty(dqp); 1657 /*
1658 * We have the flush lock so we know that this is not in the
1659 * process of being flushed. So, if this is dirty, flush it
1660 * DELWRI so that we don't get a freelist infested with
1661 * dirty dquots.
1662 */
1663 if (XFS_DQ_IS_DIRTY(dqp)) {
1664 trace_xfs_dqreclaim_dirty(dqp);
1656 1665
1657 /* 1666 /*
1658 * We flush it delayed write, so don't bother 1667 * We flush it delayed write, so don't bother releasing the
1659 * releasing the freelist lock. 1668 * freelist lock.
1660 */ 1669 */
1661 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); 1670 error = xfs_qm_dqflush(dqp, 0);
1662 if (error) { 1671 if (error) {
1663 xfs_warn(mp, "%s: dquot %p flush failed", 1672 xfs_warn(mp, "%s: dquot %p flush failed",
1664 __func__, dqp); 1673 __func__, dqp);
1665 }
1666 goto dqunlock;
1667 } 1674 }
1668 xfs_dqfunlock(dqp);
1669 1675
1670 /* 1676 /*
1671 * Prevent lookup now that we are going to reclaim the dquot. 1677 * Give the dquot another try on the freelist, as the
1672 * Once XFS_DQ_FREEING is set lookup won't touch the dquot, 1678 * flushing will take some time.
1673 * thus we can drop the lock now.
1674 */ 1679 */
1675 dqp->dq_flags |= XFS_DQ_FREEING; 1680 goto out_busy;
1676 xfs_dqunlock(dqp); 1681 }
1677 1682 xfs_dqfunlock(dqp);
1678 mutex_lock(&dqp->q_hash->qh_lock);
1679 list_del_init(&dqp->q_hashlist);
1680 dqp->q_hash->qh_version++;
1681 mutex_unlock(&dqp->q_hash->qh_lock);
1682
1683 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1684 list_del_init(&dqp->q_mplist);
1685 mp->m_quotainfo->qi_dquots--;
1686 mp->m_quotainfo->qi_dqreclaims++;
1687 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1688 1683
1689 ASSERT(dqp->q_nrefs == 0); 1684 /*
1690 list_del_init(&dqp->q_freelist); 1685 * Prevent lookups now that we are past the point of no return.
1691 xfs_Gqm->qm_dqfrlist_cnt--; 1686 */
1687 dqp->dq_flags |= XFS_DQ_FREEING;
1688 xfs_dqunlock(dqp);
1692 1689
1693 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1690 ASSERT(dqp->q_nrefs == 0);
1694 return dqp; 1691 list_move_tail(&dqp->q_freelist, dispose_list);
1695dqunlock: 1692 xfs_Gqm->qm_dqfrlist_cnt--;
1696 xfs_dqunlock(dqp);
1697 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1698 break;
1699 goto restart;
1700 }
1701 1693
1702 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1694 trace_xfs_dqreclaim_done(dqp);
1703 return NULL; 1695 XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
1704} 1696 return;
1705 1697
1706/* 1698out_busy:
1707 * Traverse the freelist of dquots and attempt to reclaim a maximum of 1699 xfs_dqunlock(dqp);
1708 * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1709 * favor the lookup function ...
1710 */
1711STATIC int
1712xfs_qm_shake_freelist(
1713 int howmany)
1714{
1715 int nreclaimed = 0;
1716 xfs_dquot_t *dqp;
1717 1700
1718 if (howmany <= 0) 1701 /*
1719 return 0; 1702 * Move the dquot to the tail of the list so that we don't spin on it.
1703 */
1704 list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
1720 1705
1721 while (nreclaimed < howmany) { 1706 trace_xfs_dqreclaim_busy(dqp);
1722 dqp = xfs_qm_dqreclaim_one(); 1707 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
1723 if (!dqp)
1724 return nreclaimed;
1725 xfs_qm_dqdestroy(dqp);
1726 nreclaimed++;
1727 }
1728 return nreclaimed;
1729} 1708}
1730 1709
1731/*
1732 * The kmem_shake interface is invoked when memory is running low.
1733 */
1734/* ARGSUSED */
1735STATIC int 1710STATIC int
1736xfs_qm_shake( 1711xfs_qm_shake(
1737 struct shrinker *shrink, 1712 struct shrinker *shrink,
1738 struct shrink_control *sc) 1713 struct shrink_control *sc)
1739{ 1714{
1740 int ndqused, nfree, n; 1715 int nr_to_scan = sc->nr_to_scan;
1741 gfp_t gfp_mask = sc->gfp_mask; 1716 LIST_HEAD (dispose_list);
1742 1717 struct xfs_dquot *dqp;
1743 if (!kmem_shake_allow(gfp_mask))
1744 return 0;
1745 if (!xfs_Gqm)
1746 return 0;
1747
1748 nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
1749 /* incore dquots in all f/s's */
1750 ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
1751
1752 ASSERT(ndqused >= 0);
1753 1718
1754 if (nfree <= ndqused && nfree < ndquot) 1719 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
1755 return 0; 1720 return 0;
1721 if (!nr_to_scan)
1722 goto out;
1756 1723
1757 ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ 1724 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1758 n = nfree - ndqused - ndquot; /* # over target */ 1725 while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
1759 1726 if (nr_to_scan-- <= 0)
1760 return xfs_qm_shake_freelist(MAX(nfree, n)); 1727 break;
1761} 1728 dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
1762 1729 q_freelist);
1763 1730 xfs_qm_dqreclaim_one(dqp, &dispose_list);
1764/*------------------------------------------------------------------*/
1765
1766/*
1767 * Return a new incore dquot. Depending on the number of
1768 * dquots in the system, we either allocate a new one on the kernel heap,
1769 * or reclaim a free one.
1770 * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
1771 * to reclaim an existing one from the freelist.
1772 */
1773boolean_t
1774xfs_qm_dqalloc_incore(
1775 xfs_dquot_t **O_dqpp)
1776{
1777 xfs_dquot_t *dqp;
1778
1779 /*
1780 * Check against high water mark to see if we want to pop
1781 * a nincompoop dquot off the freelist.
1782 */
1783 if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
1784 /*
1785 * Try to recycle a dquot from the freelist.
1786 */
1787 if ((dqp = xfs_qm_dqreclaim_one())) {
1788 XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
1789 /*
1790 * Just zero the core here. The rest will get
1791 * reinitialized by caller. XXX we shouldn't even
1792 * do this zero ...
1793 */
1794 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1795 *O_dqpp = dqp;
1796 return B_FALSE;
1797 }
1798 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
1799 } 1731 }
1732 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1800 1733
1801 /* 1734 while (!list_empty(&dispose_list)) {
1802 * Allocate a brand new dquot on the kernel heap and return it 1735 dqp = list_first_entry(&dispose_list, struct xfs_dquot,
1803 * to the caller to initialize. 1736 q_freelist);
1804 */ 1737 list_del_init(&dqp->q_freelist);
1805 ASSERT(xfs_Gqm->qm_dqzone != NULL); 1738 xfs_qm_dqfree_one(dqp);
1806 *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); 1739 }
1807 atomic_inc(&xfs_Gqm->qm_totaldquots); 1740out:
1808 1741 return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
1809 return B_TRUE;
1810} 1742}
1811 1743
1812
1813/* 1744/*
1814 * Start a transaction and write the incore superblock changes to 1745 * Start a transaction and write the incore superblock changes to
1815 * disk. flags parameter indicates which fields have changed. 1746 * disk. flags parameter indicates which fields have changed.
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9b4f3adefbc5..9a9b997e1a0a 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -26,24 +26,12 @@
26struct xfs_qm; 26struct xfs_qm;
27struct xfs_inode; 27struct xfs_inode;
28 28
29extern uint ndquot;
30extern struct mutex xfs_Gqm_lock; 29extern struct mutex xfs_Gqm_lock;
31extern struct xfs_qm *xfs_Gqm; 30extern struct xfs_qm *xfs_Gqm;
32extern kmem_zone_t *qm_dqzone; 31extern kmem_zone_t *qm_dqzone;
33extern kmem_zone_t *qm_dqtrxzone; 32extern kmem_zone_t *qm_dqtrxzone;
34 33
35/* 34/*
36 * Ditto, for xfs_qm_dqreclaim_one.
37 */
38#define XFS_QM_RECLAIM_MAX_RESTARTS 4
39
40/*
41 * Ideal ratio of free to in use dquots. Quota manager makes an attempt
42 * to keep this balance.
43 */
44#define XFS_QM_DQFREE_RATIO 2
45
46/*
47 * Dquot hashtable constants/threshold values. 35 * Dquot hashtable constants/threshold values.
48 */ 36 */
49#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) 37#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
@@ -74,7 +62,6 @@ typedef struct xfs_qm {
74 int qm_dqfrlist_cnt; 62 int qm_dqfrlist_cnt;
75 atomic_t qm_totaldquots; /* total incore dquots */ 63 atomic_t qm_totaldquots; /* total incore dquots */
76 uint qm_nrefs; /* file systems with quota on */ 64 uint qm_nrefs; /* file systems with quota on */
77 int qm_dqfree_ratio;/* ratio of free to inuse dquots */
78 kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ 65 kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
79 kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ 66 kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
80} xfs_qm_t; 67} xfs_qm_t;
@@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
143extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 130extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
144 131
145/* dquot stuff */ 132/* dquot stuff */
146extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
147extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); 133extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
148extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); 134extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
149 135
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
index 8671a0b32644..5729ba570877 100644
--- a/fs/xfs/xfs_qm_stats.c
+++ b/fs/xfs/xfs_qm_stats.c
@@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v)
42{ 42{
43 /* maximum; incore; ratio free to inuse; freelist */ 43 /* maximum; incore; ratio free to inuse; freelist */
44 seq_printf(m, "%d\t%d\t%d\t%u\n", 44 seq_printf(m, "%d\t%d\t%d\t%u\n",
45 ndquot, 45 0,
46 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, 46 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
47 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 47 0,
48 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); 48 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
49 return 0; 49 return 0;
50} 50}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index eafbcff81f3a..711a86e39ff0 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -813,11 +813,11 @@ xfs_qm_export_dquot(
813 (XFS_IS_OQUOTA_ENFORCED(mp) && 813 (XFS_IS_OQUOTA_ENFORCED(mp) &&
814 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && 814 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
815 dst->d_id != 0) { 815 dst->d_id != 0) {
816 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && 816 if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
817 (dst->d_blk_softlimit > 0)) { 817 (dst->d_blk_softlimit > 0)) {
818 ASSERT(dst->d_btimer != 0); 818 ASSERT(dst->d_btimer != 0);
819 } 819 }
820 if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && 820 if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
821 (dst->d_ino_softlimit > 0)) { 821 (dst->d_ino_softlimit > 0)) {
822 ASSERT(dst->d_itimer != 0); 822 ASSERT(dst->d_itimer != 0);
823 } 823 }
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6b6df5802e95..bb134a819930 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
733DEFINE_DQUOT_EVENT(xfs_dqadjust); 733DEFINE_DQUOT_EVENT(xfs_dqadjust);
734DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); 734DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
735DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); 735DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
736DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); 736DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
737DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
737DEFINE_DQUOT_EVENT(xfs_dqattach_found); 738DEFINE_DQUOT_EVENT(xfs_dqattach_found);
738DEFINE_DQUOT_EVENT(xfs_dqattach_get); 739DEFINE_DQUOT_EVENT(xfs_dqattach_get);
739DEFINE_DQUOT_EVENT(xfs_dqinit);
740DEFINE_DQUOT_EVENT(xfs_dqreuse);
741DEFINE_DQUOT_EVENT(xfs_dqalloc); 740DEFINE_DQUOT_EVENT(xfs_dqalloc);
742DEFINE_DQUOT_EVENT(xfs_dqtobp_read); 741DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
743DEFINE_DQUOT_EVENT(xfs_dqread); 742DEFINE_DQUOT_EVENT(xfs_dqread);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 329b06aba1c2..7adcdf15ae0c 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1151,8 +1151,8 @@ xfs_trans_add_item(
1151{ 1151{
1152 struct xfs_log_item_desc *lidp; 1152 struct xfs_log_item_desc *lidp;
1153 1153
1154 ASSERT(lip->li_mountp = tp->t_mountp); 1154 ASSERT(lip->li_mountp == tp->t_mountp);
1155 ASSERT(lip->li_ailp = tp->t_mountp->m_ail); 1155 ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
1156 1156
1157 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); 1157 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
1158 1158
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 4d00ee67792d..c4ba366d24e6 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -649,12 +649,12 @@ xfs_trans_dqresv(
649 * nblks. 649 * nblks.
650 */ 650 */
651 if (hardlimit > 0ULL && 651 if (hardlimit > 0ULL &&
652 hardlimit <= nblks + *resbcountp) { 652 hardlimit < nblks + *resbcountp) {
653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); 653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
654 goto error_return; 654 goto error_return;
655 } 655 }
656 if (softlimit > 0ULL && 656 if (softlimit > 0ULL &&
657 softlimit <= nblks + *resbcountp) { 657 softlimit < nblks + *resbcountp) {
658 if ((timer != 0 && get_seconds() > timer) || 658 if ((timer != 0 && get_seconds() > timer) ||
659 (warns != 0 && warns >= warnlimit)) { 659 (warns != 0 && warns >= warnlimit)) {
660 xfs_quota_warn(mp, dqp, 660 xfs_quota_warn(mp, dqp,
@@ -677,11 +677,13 @@ xfs_trans_dqresv(
677 if (!softlimit) 677 if (!softlimit)
678 softlimit = q->qi_isoftlimit; 678 softlimit = q->qi_isoftlimit;
679 679
680 if (hardlimit > 0ULL && count >= hardlimit) { 680 if (hardlimit > 0ULL &&
681 hardlimit < ninos + count) {
681 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); 682 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
682 goto error_return; 683 goto error_return;
683 } 684 }
684 if (softlimit > 0ULL && count >= softlimit) { 685 if (softlimit > 0ULL &&
686 softlimit < ninos + count) {
685 if ((timer != 0 && get_seconds() > timer) || 687 if ((timer != 0 && get_seconds() > timer) ||
686 (warns != 0 && warns >= warnlimit)) { 688 (warns != 0 && warns >= warnlimit)) {
687 xfs_quota_warn(mp, dqp, 689 xfs_quota_warn(mp, dqp,