aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/main.c4
-rw-r--r--fs/aio.c13
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/ordered-data.c11
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/cifs/connect.c10
-rw-r--r--fs/cifs/file.c9
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c39
-rw-r--r--fs/direct-io.c23
-rw-r--r--fs/exec.c6
-rw-r--r--fs/ext4/balloc.c1
-rw-r--r--fs/ext4/dir.c25
-rw-r--r--fs/ext4/ext4.h14
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/indirect.c281
-rw-r--r--fs/ext4/inline.c18
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/mballoc.c41
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/super.c88
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c178
-rw-r--r--fs/f2fs/data.c59
-rw-r--r--fs/f2fs/debug.c19
-rw-r--r--fs/f2fs/dir.c87
-rw-r--r--fs/f2fs/f2fs.h50
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c7
-rw-r--r--fs/f2fs/hash.c4
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c12
-rw-r--r--fs/f2fs/namei.c246
-rw-r--r--fs/f2fs/node.c273
-rw-r--r--fs/f2fs/node.h7
-rw-r--r--fs/f2fs/recovery.c22
-rw-r--r--fs/f2fs/segment.c38
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c21
-rw-r--r--fs/fs-writeback.c3
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/fscache/internal.h2
-rw-r--r--fs/fscache/main.c18
-rw-r--r--fs/fscache/page.c4
-rw-r--r--fs/fuse/dev.c51
-rw-r--r--fs/fuse/dir.c41
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/inode.c27
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c39
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/lock_dlm.c12
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/recovery.c8
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c8
-rw-r--r--fs/inode.c7
-rw-r--r--fs/jbd2/transaction.c10
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/lockd/mon.c4
-rw-r--r--fs/locks.c26
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c4
-rw-r--r--fs/nfs/idmap.c10
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs3acl.c43
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/pagelist.c34
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/write.c339
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/open.c5
-rw-r--r--fs/proc/array.c18
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/timerfd.c77
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_bmap.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c53
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_btree.c82
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_sb.c25
94 files changed, 1883 insertions, 1028 deletions
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 42dd2e499ed8..35de0c04729f 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -55,13 +55,13 @@ static int __init afs_get_client_UUID(void)
55 afs_uuid.time_low = uuidtime; 55 afs_uuid.time_low = uuidtime;
56 afs_uuid.time_mid = uuidtime >> 32; 56 afs_uuid.time_mid = uuidtime >> 32;
57 afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK; 57 afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
58 afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME; 58 afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME;
59 59
60 get_random_bytes(&clockseq, 2); 60 get_random_bytes(&clockseq, 2);
61 afs_uuid.clock_seq_low = clockseq; 61 afs_uuid.clock_seq_low = clockseq;
62 afs_uuid.clock_seq_hi_and_reserved = 62 afs_uuid.clock_seq_hi_and_reserved =
63 (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK; 63 (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
64 afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD; 64 afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD;
65 65
66 _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", 66 _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
67 afs_uuid.time_low, 67 afs_uuid.time_low,
diff --git a/fs/aio.c b/fs/aio.c
index 955947ef3e02..bd7ec2cc2674 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
506 506
507 aio_free_ring(ctx); 507 aio_free_ring(ctx);
508 free_percpu(ctx->cpu); 508 free_percpu(ctx->cpu);
509 percpu_ref_exit(&ctx->reqs);
510 percpu_ref_exit(&ctx->users);
509 kmem_cache_free(kioctx_cachep, ctx); 511 kmem_cache_free(kioctx_cachep, ctx);
510} 512}
511 513
@@ -715,8 +717,8 @@ err_ctx:
715err: 717err:
716 mutex_unlock(&ctx->ring_lock); 718 mutex_unlock(&ctx->ring_lock);
717 free_percpu(ctx->cpu); 719 free_percpu(ctx->cpu);
718 free_percpu(ctx->reqs.pcpu_count); 720 percpu_ref_exit(&ctx->reqs);
719 free_percpu(ctx->users.pcpu_count); 721 percpu_ref_exit(&ctx->users);
720 kmem_cache_free(kioctx_cachep, ctx); 722 kmem_cache_free(kioctx_cachep, ctx);
721 pr_debug("error allocating ioctx %d\n", err); 723 pr_debug("error allocating ioctx %d\n", err);
722 return ERR_PTR(err); 724 return ERR_PTR(err);
@@ -830,16 +832,20 @@ void exit_aio(struct mm_struct *mm)
830static void put_reqs_available(struct kioctx *ctx, unsigned nr) 832static void put_reqs_available(struct kioctx *ctx, unsigned nr)
831{ 833{
832 struct kioctx_cpu *kcpu; 834 struct kioctx_cpu *kcpu;
835 unsigned long flags;
833 836
834 preempt_disable(); 837 preempt_disable();
835 kcpu = this_cpu_ptr(ctx->cpu); 838 kcpu = this_cpu_ptr(ctx->cpu);
836 839
840 local_irq_save(flags);
837 kcpu->reqs_available += nr; 841 kcpu->reqs_available += nr;
842
838 while (kcpu->reqs_available >= ctx->req_batch * 2) { 843 while (kcpu->reqs_available >= ctx->req_batch * 2) {
839 kcpu->reqs_available -= ctx->req_batch; 844 kcpu->reqs_available -= ctx->req_batch;
840 atomic_add(ctx->req_batch, &ctx->reqs_available); 845 atomic_add(ctx->req_batch, &ctx->reqs_available);
841 } 846 }
842 847
848 local_irq_restore(flags);
843 preempt_enable(); 849 preempt_enable();
844} 850}
845 851
@@ -847,10 +853,12 @@ static bool get_reqs_available(struct kioctx *ctx)
847{ 853{
848 struct kioctx_cpu *kcpu; 854 struct kioctx_cpu *kcpu;
849 bool ret = false; 855 bool ret = false;
856 unsigned long flags;
850 857
851 preempt_disable(); 858 preempt_disable();
852 kcpu = this_cpu_ptr(ctx->cpu); 859 kcpu = this_cpu_ptr(ctx->cpu);
853 860
861 local_irq_save(flags);
854 if (!kcpu->reqs_available) { 862 if (!kcpu->reqs_available) {
855 int old, avail = atomic_read(&ctx->reqs_available); 863 int old, avail = atomic_read(&ctx->reqs_available);
856 864
@@ -869,6 +877,7 @@ static bool get_reqs_available(struct kioctx *ctx)
869 ret = true; 877 ret = true;
870 kcpu->reqs_available--; 878 kcpu->reqs_available--;
871out: 879out:
880 local_irq_restore(flags);
872 preempt_enable(); 881 preempt_enable();
873 return ret; 882 return ret;
874} 883}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a389820d158b..3e11aab9f391 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3437,16 +3437,10 @@ done_unlocked:
3437 return 0; 3437 return 0;
3438} 3438}
3439 3439
3440static int eb_wait(void *word)
3441{
3442 io_schedule();
3443 return 0;
3444}
3445
3446void wait_on_extent_buffer_writeback(struct extent_buffer *eb) 3440void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3447{ 3441{
3448 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, 3442 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3449 TASK_UNINTERRUPTIBLE); 3443 TASK_UNINTERRUPTIBLE);
3450} 3444}
3451 3445
3452static noinline_for_stack int 3446static noinline_for_stack int
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e12441c7cf1d..7187b14faa6c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -484,8 +484,19 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
484 log_list); 484 log_list);
485 list_del_init(&ordered->log_list); 485 list_del_init(&ordered->log_list);
486 spin_unlock_irq(&log->log_extents_lock[index]); 486 spin_unlock_irq(&log->log_extents_lock[index]);
487
488 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
489 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
490 struct inode *inode = ordered->inode;
491 u64 start = ordered->file_offset;
492 u64 end = ordered->file_offset + ordered->len - 1;
493
494 WARN_ON(!inode);
495 filemap_fdatawrite_range(inode->i_mapping, start, end);
496 }
487 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 497 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
488 &ordered->flags)); 498 &ordered->flags));
499
489 btrfs_put_ordered_extent(ordered); 500 btrfs_put_ordered_extent(ordered);
490 spin_lock_irq(&log->log_extents_lock[index]); 501 spin_lock_irq(&log->log_extents_lock[index]);
491 } 502 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6104676857f5..6cb82f62cb7c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1680,11 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1680 if (device->bdev == root->fs_info->fs_devices->latest_bdev) 1680 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1681 root->fs_info->fs_devices->latest_bdev = next_device->bdev; 1681 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1682 1682
1683 if (device->bdev) 1683 if (device->bdev) {
1684 device->fs_devices->open_devices--; 1684 device->fs_devices->open_devices--;
1685 1685 /* remove sysfs entry */
1686 /* remove sysfs entry */ 1686 btrfs_kobj_rm_device(root->fs_info, device);
1687 btrfs_kobj_rm_device(root->fs_info, device); 1687 }
1688 1688
1689 call_rcu(&device->rcu, free_device); 1689 call_rcu(&device->rcu, free_device);
1690 1690
diff --git a/fs/buffer.c b/fs/buffer.c
index eba6e4f621ce..8f05111bbb8b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh)
61} 61}
62EXPORT_SYMBOL(touch_buffer); 62EXPORT_SYMBOL(touch_buffer);
63 63
64static int sleep_on_buffer(void *word)
65{
66 io_schedule();
67 return 0;
68}
69
70void __lock_buffer(struct buffer_head *bh) 64void __lock_buffer(struct buffer_head *bh)
71{ 65{
72 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer, 66 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
73 TASK_UNINTERRUPTIBLE);
74} 67}
75EXPORT_SYMBOL(__lock_buffer); 68EXPORT_SYMBOL(__lock_buffer);
76 69
@@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback);
123 */ 116 */
124void __wait_on_buffer(struct buffer_head * bh) 117void __wait_on_buffer(struct buffer_head * bh)
125{ 118{
126 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE); 119 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
127} 120}
128EXPORT_SYMBOL(__wait_on_buffer); 121EXPORT_SYMBOL(__wait_on_buffer);
129 122
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 20d75b8ddb26..b98366f21f9e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
3934 return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); 3934 return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
3935} 3935}
3936 3936
3937static int
3938cifs_sb_tcon_pending_wait(void *unused)
3939{
3940 schedule();
3941 return signal_pending(current) ? -ERESTARTSYS : 0;
3942}
3943
3944/* find and return a tlink with given uid */ 3937/* find and return a tlink with given uid */
3945static struct tcon_link * 3938static struct tcon_link *
3946tlink_rb_search(struct rb_root *root, kuid_t uid) 3939tlink_rb_search(struct rb_root *root, kuid_t uid)
@@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
4039 } else { 4032 } else {
4040wait_for_construction: 4033wait_for_construction:
4041 ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, 4034 ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
4042 cifs_sb_tcon_pending_wait,
4043 TASK_INTERRUPTIBLE); 4035 TASK_INTERRUPTIBLE);
4044 if (ret) { 4036 if (ret) {
4045 cifs_put_tlink(tlink); 4037 cifs_put_tlink(tlink);
4046 return ERR_PTR(ret); 4038 return ERR_PTR(-ERESTARTSYS);
4047 } 4039 }
4048 4040
4049 /* if it's good, return it */ 4041 /* if it's good, return it */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e90a1e9aa627..b88b1ade4d3d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3618,13 +3618,6 @@ static int cifs_launder_page(struct page *page)
3618 return rc; 3618 return rc;
3619} 3619}
3620 3620
3621static int
3622cifs_pending_writers_wait(void *unused)
3623{
3624 schedule();
3625 return 0;
3626}
3627
3628void cifs_oplock_break(struct work_struct *work) 3621void cifs_oplock_break(struct work_struct *work)
3629{ 3622{
3630 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 3623 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3636,7 +3629,7 @@ void cifs_oplock_break(struct work_struct *work)
3636 int rc = 0; 3629 int rc = 0;
3637 3630
3638 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 3631 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3639 cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE); 3632 TASK_UNINTERRUPTIBLE);
3640 3633
3641 server->ops->downgrade_oplock(server, cinode, 3634 server->ops->downgrade_oplock(server, cinode,
3642 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); 3635 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a174605f6afa..41de3935caa0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode)
1780 * @word: long word containing the bit lock 1780 * @word: long word containing the bit lock
1781 */ 1781 */
1782static int 1782static int
1783cifs_wait_bit_killable(void *word) 1783cifs_wait_bit_killable(struct wait_bit_key *key)
1784{ 1784{
1785 if (fatal_signal_pending(current)) 1785 if (fatal_signal_pending(current))
1786 return -ERESTARTSYS; 1786 return -ERESTARTSYS;
@@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode)
1794 int rc; 1794 int rc;
1795 unsigned long *flags = &CIFS_I(inode)->flags; 1795 unsigned long *flags = &CIFS_I(inode)->flags;
1796 1796
1797 rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, 1797 rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
1798 TASK_KILLABLE); 1798 TASK_KILLABLE);
1799 if (rc) 1799 if (rc)
1800 return rc; 1800 return rc;
1801 1801
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b0c62e622da..6bf55d0ed494 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -582,7 +582,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode)
582 582
583start: 583start:
584 rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, 584 rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
585 cifs_oplock_break_wait, TASK_KILLABLE); 585 TASK_KILLABLE);
586 if (rc) 586 if (rc)
587 return rc; 587 return rc;
588 588
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e82289047272..afec6450450f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -59,7 +59,7 @@
59#include <linux/gfp.h> 59#include <linux/gfp.h>
60 60
61#include <net/bluetooth/bluetooth.h> 61#include <net/bluetooth/bluetooth.h>
62#include <net/bluetooth/hci.h> 62#include <net/bluetooth/hci_sock.h>
63#include <net/bluetooth/rfcomm.h> 63#include <net/bluetooth/rfcomm.h>
64 64
65#include <linux/capi.h> 65#include <linux/capi.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 0b2528fb640e..a93f7e6ea4cf 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -306,7 +306,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
306 if (unlikely(nr < 0)) 306 if (unlikely(nr < 0))
307 return nr; 307 return nr;
308 308
309 tsk->flags = PF_DUMPCORE; 309 tsk->flags |= PF_DUMPCORE;
310 if (atomic_read(&mm->mm_users) == nr + 1) 310 if (atomic_read(&mm->mm_users) == nr + 1)
311 goto done; 311 goto done;
312 /* 312 /*
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 63146295153b..76c08c2beb2f 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -451,7 +451,7 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf,
451{ 451{
452 char buf[3]; 452 char buf[3];
453 u32 *val = file->private_data; 453 u32 *val = file->private_data;
454 454
455 if (*val) 455 if (*val)
456 buf[0] = 'Y'; 456 buf[0] = 'Y';
457 else 457 else
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8c41b52da358..1e3b99d3db0d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -66,7 +66,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
66 break; 66 break;
67 } 67 }
68 } 68 }
69 return inode; 69 return inode;
70} 70}
71 71
72/* SMP-safe */ 72/* SMP-safe */
@@ -317,7 +317,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
317 goto exit; 317 goto exit;
318 318
319 /* If the parent is not specified, we create it in the root. 319 /* If the parent is not specified, we create it in the root.
320 * We need the root dentry to do this, which is in the super 320 * We need the root dentry to do this, which is in the super
321 * block. A pointer to that is in the struct vfsmount that we 321 * block. A pointer to that is in the struct vfsmount that we
322 * have around. 322 * have around.
323 */ 323 */
@@ -330,7 +330,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
330 switch (mode & S_IFMT) { 330 switch (mode & S_IFMT) {
331 case S_IFDIR: 331 case S_IFDIR:
332 error = debugfs_mkdir(parent->d_inode, dentry, mode); 332 error = debugfs_mkdir(parent->d_inode, dentry, mode);
333 333
334 break; 334 break;
335 case S_IFLNK: 335 case S_IFLNK:
336 error = debugfs_link(parent->d_inode, dentry, mode, 336 error = debugfs_link(parent->d_inode, dentry, mode,
@@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
534 */ 534 */
535void debugfs_remove_recursive(struct dentry *dentry) 535void debugfs_remove_recursive(struct dentry *dentry)
536{ 536{
537 struct dentry *child, *next, *parent; 537 struct dentry *child, *parent;
538 538
539 if (IS_ERR_OR_NULL(dentry)) 539 if (IS_ERR_OR_NULL(dentry))
540 return; 540 return;
@@ -546,30 +546,49 @@ void debugfs_remove_recursive(struct dentry *dentry)
546 parent = dentry; 546 parent = dentry;
547 down: 547 down:
548 mutex_lock(&parent->d_inode->i_mutex); 548 mutex_lock(&parent->d_inode->i_mutex);
549 list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { 549 loop:
550 /*
551 * The parent->d_subdirs is protected by the d_lock. Outside that
552 * lock, the child can be unlinked and set to be freed which can
553 * use the d_u.d_child as the rcu head and corrupt this list.
554 */
555 spin_lock(&parent->d_lock);
556 list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
550 if (!debugfs_positive(child)) 557 if (!debugfs_positive(child))
551 continue; 558 continue;
552 559
553 /* perhaps simple_empty(child) makes more sense */ 560 /* perhaps simple_empty(child) makes more sense */
554 if (!list_empty(&child->d_subdirs)) { 561 if (!list_empty(&child->d_subdirs)) {
562 spin_unlock(&parent->d_lock);
555 mutex_unlock(&parent->d_inode->i_mutex); 563 mutex_unlock(&parent->d_inode->i_mutex);
556 parent = child; 564 parent = child;
557 goto down; 565 goto down;
558 } 566 }
559 up: 567
568 spin_unlock(&parent->d_lock);
569
560 if (!__debugfs_remove(child, parent)) 570 if (!__debugfs_remove(child, parent))
561 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 571 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
572
573 /*
574 * The parent->d_lock protects agaist child from unlinking
575 * from d_subdirs. When releasing the parent->d_lock we can
576 * no longer trust that the next pointer is valid.
577 * Restart the loop. We'll skip this one with the
578 * debugfs_positive() check.
579 */
580 goto loop;
562 } 581 }
582 spin_unlock(&parent->d_lock);
563 583
564 mutex_unlock(&parent->d_inode->i_mutex); 584 mutex_unlock(&parent->d_inode->i_mutex);
565 child = parent; 585 child = parent;
566 parent = parent->d_parent; 586 parent = parent->d_parent;
567 mutex_lock(&parent->d_inode->i_mutex); 587 mutex_lock(&parent->d_inode->i_mutex);
568 588
569 if (child != dentry) { 589 if (child != dentry)
570 next = list_next_entry(child, d_u.d_child); 590 /* go up */
571 goto up; 591 goto loop;
572 }
573 592
574 if (!__debugfs_remove(child, parent)) 593 if (!__debugfs_remove(child, parent))
575 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 594 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 98040ba388ac..17e39b047de5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -71,7 +71,6 @@ struct dio_submit {
71 been performed at the start of a 71 been performed at the start of a
72 write */ 72 write */
73 int pages_in_io; /* approximate total IO pages */ 73 int pages_in_io; /* approximate total IO pages */
74 size_t size; /* total request size (doesn't change)*/
75 sector_t block_in_file; /* Current offset into the underlying 74 sector_t block_in_file; /* Current offset into the underlying
76 file in dio_block units. */ 75 file in dio_block units. */
77 unsigned blocks_available; /* At block_in_file. changes */ 76 unsigned blocks_available; /* At block_in_file. changes */
@@ -198,9 +197,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
198 * L1 cache. 197 * L1 cache.
199 */ 198 */
200static inline struct page *dio_get_page(struct dio *dio, 199static inline struct page *dio_get_page(struct dio *dio,
201 struct dio_submit *sdio, size_t *from, size_t *to) 200 struct dio_submit *sdio)
202{ 201{
203 int n;
204 if (dio_pages_present(sdio) == 0) { 202 if (dio_pages_present(sdio) == 0) {
205 int ret; 203 int ret;
206 204
@@ -209,10 +207,7 @@ static inline struct page *dio_get_page(struct dio *dio,
209 return ERR_PTR(ret); 207 return ERR_PTR(ret);
210 BUG_ON(dio_pages_present(sdio) == 0); 208 BUG_ON(dio_pages_present(sdio) == 0);
211 } 209 }
212 n = sdio->head++; 210 return dio->pages[sdio->head];
213 *from = n ? 0 : sdio->from;
214 *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
215 return dio->pages[n];
216} 211}
217 212
218/** 213/**
@@ -911,11 +906,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
911 while (sdio->block_in_file < sdio->final_block_in_request) { 906 while (sdio->block_in_file < sdio->final_block_in_request) {
912 struct page *page; 907 struct page *page;
913 size_t from, to; 908 size_t from, to;
914 page = dio_get_page(dio, sdio, &from, &to); 909
910 page = dio_get_page(dio, sdio);
915 if (IS_ERR(page)) { 911 if (IS_ERR(page)) {
916 ret = PTR_ERR(page); 912 ret = PTR_ERR(page);
917 goto out; 913 goto out;
918 } 914 }
915 from = sdio->head ? 0 : sdio->from;
916 to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
917 sdio->head++;
919 918
920 while (from < to) { 919 while (from < to) {
921 unsigned this_chunk_bytes; /* # of bytes mapped */ 920 unsigned this_chunk_bytes; /* # of bytes mapped */
@@ -1104,7 +1103,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1104 unsigned blkbits = i_blkbits; 1103 unsigned blkbits = i_blkbits;
1105 unsigned blocksize_mask = (1 << blkbits) - 1; 1104 unsigned blocksize_mask = (1 << blkbits) - 1;
1106 ssize_t retval = -EINVAL; 1105 ssize_t retval = -EINVAL;
1107 loff_t end = offset + iov_iter_count(iter); 1106 size_t count = iov_iter_count(iter);
1107 loff_t end = offset + count;
1108 struct dio *dio; 1108 struct dio *dio;
1109 struct dio_submit sdio = { 0, }; 1109 struct dio_submit sdio = { 0, };
1110 struct buffer_head map_bh = { 0, }; 1110 struct buffer_head map_bh = { 0, };
@@ -1287,10 +1287,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1287 */ 1287 */
1288 BUG_ON(retval == -EIOCBQUEUED); 1288 BUG_ON(retval == -EIOCBQUEUED);
1289 if (dio->is_async && retval == 0 && dio->result && 1289 if (dio->is_async && retval == 0 && dio->result &&
1290 ((rw == READ) || (dio->result == sdio.size))) 1290 (rw == READ || dio->result == count))
1291 retval = -EIOCBQUEUED; 1291 retval = -EIOCBQUEUED;
1292 1292 else
1293 if (retval != -EIOCBQUEUED)
1294 dio_await_completion(dio); 1293 dio_await_completion(dio);
1295 1294
1296 if (drop_refcount(dio) == 0) { 1295 if (drop_refcount(dio) == 0) {
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..ab1f1200ce5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
1216/* 1216/*
1217 * determine how safe it is to execute the proposed program 1217 * determine how safe it is to execute the proposed program
1218 * - the caller must hold ->cred_guard_mutex to protect against 1218 * - the caller must hold ->cred_guard_mutex to protect against
1219 * PTRACE_ATTACH 1219 * PTRACE_ATTACH or seccomp thread-sync
1220 */ 1220 */
1221static void check_unsafe_exec(struct linux_binprm *bprm) 1221static void check_unsafe_exec(struct linux_binprm *bprm)
1222{ 1222{
@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
1234 * This isn't strictly necessary, but it makes it harder for LSMs to 1234 * This isn't strictly necessary, but it makes it harder for LSMs to
1235 * mess up. 1235 * mess up.
1236 */ 1236 */
1237 if (current->no_new_privs) 1237 if (task_no_new_privs(current))
1238 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; 1238 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1239 1239
1240 t = p; 1240 t = p;
@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
1272 bprm->cred->egid = current_egid(); 1272 bprm->cred->egid = current_egid();
1273 1273
1274 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && 1274 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1275 !current->no_new_privs && 1275 !task_no_new_privs(current) &&
1276 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && 1276 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1277 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { 1277 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1278 /* Set-uid? */ 1278 /* Set-uid? */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index fca382037ddd..581ef40fbe90 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -639,7 +639,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
639 if (!(*errp) && 639 if (!(*errp) &&
640 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { 640 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
641 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 641 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
642 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
643 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 642 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
644 dquot_alloc_block_nofail(inode, 643 dquot_alloc_block_nofail(inode,
645 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); 644 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ef1bed66c14f..0bb3f9ea0832 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -571,6 +571,31 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
571 return 0; 571 return 0;
572} 572}
573 573
574int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
575 int buf_size)
576{
577 struct ext4_dir_entry_2 *de;
578 int nlen, rlen;
579 unsigned int offset = 0;
580 char *top;
581
582 de = (struct ext4_dir_entry_2 *)buf;
583 top = buf + buf_size;
584 while ((char *) de < top) {
585 if (ext4_check_dir_entry(dir, NULL, de, bh,
586 buf, buf_size, offset))
587 return -EIO;
588 nlen = EXT4_DIR_REC_LEN(de->name_len);
589 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
590 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
591 offset += rlen;
592 }
593 if ((char *) de > top)
594 return -EIO;
595
596 return 0;
597}
598
574const struct file_operations ext4_dir_operations = { 599const struct file_operations ext4_dir_operations = {
575 .llseek = ext4_dir_llseek, 600 .llseek = ext4_dir_llseek,
576 .read = generic_read_dir, 601 .read = generic_read_dir,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7cc5a0e23688..5b19760b1de5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -591,7 +591,6 @@ enum {
591#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 591#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
592#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 592#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
593#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 593#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
594#define EXT4_FREE_BLOCKS_RESERVE 0x0040
595 594
596/* 595/*
597 * ioctl commands 596 * ioctl commands
@@ -2029,6 +2028,8 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype)
2029 2028
2030 return ext4_filetype_table[filetype]; 2029 return ext4_filetype_table[filetype];
2031} 2030}
2031extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
2032 void *buf, int buf_size);
2032 2033
2033/* fsync.c */ 2034/* fsync.c */
2034extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 2035extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -2144,8 +2145,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2144extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2145extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2145extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2146extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2146extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2147extern void ext4_ind_truncate(handle_t *, struct inode *inode);
2147extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, 2148extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
2148 ext4_lblk_t first, ext4_lblk_t stop); 2149 ext4_lblk_t start, ext4_lblk_t end);
2149 2150
2150/* ioctl.c */ 2151/* ioctl.c */
2151extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 2152extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2560,7 +2561,6 @@ extern const struct file_operations ext4_file_operations;
2560extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2561extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2561 2562
2562/* inline.c */ 2563/* inline.c */
2563extern int ext4_has_inline_data(struct inode *inode);
2564extern int ext4_get_max_inline_size(struct inode *inode); 2564extern int ext4_get_max_inline_size(struct inode *inode);
2565extern int ext4_find_inline_data_nolock(struct inode *inode); 2565extern int ext4_find_inline_data_nolock(struct inode *inode);
2566extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, 2566extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
@@ -2626,6 +2626,12 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
2626 2626
2627extern int ext4_convert_inline_data(struct inode *inode); 2627extern int ext4_convert_inline_data(struct inode *inode);
2628 2628
2629static inline int ext4_has_inline_data(struct inode *inode)
2630{
2631 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
2632 EXT4_I(inode)->i_inline_off;
2633}
2634
2629/* namei.c */ 2635/* namei.c */
2630extern const struct inode_operations ext4_dir_inode_operations; 2636extern const struct inode_operations ext4_dir_inode_operations;
2631extern const struct inode_operations ext4_special_inode_operations; 2637extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a0e6d0..76c2df382b7d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -161,6 +161,8 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
161 struct inode *inode, struct ext4_ext_path *path) 161 struct inode *inode, struct ext4_ext_path *path)
162{ 162{
163 int err; 163 int err;
164
165 WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
164 if (path->p_bh) { 166 if (path->p_bh) {
165 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); 167 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
166 /* path points to block */ 168 /* path points to block */
@@ -1808,8 +1810,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
1808 1810
1809 brelse(path[1].p_bh); 1811 brelse(path[1].p_bh);
1810 ext4_free_blocks(handle, inode, NULL, blk, 1, 1812 ext4_free_blocks(handle, inode, NULL, blk, 1,
1811 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET | 1813 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1812 EXT4_FREE_BLOCKS_RESERVE);
1813} 1814}
1814 1815
1815/* 1816/*
@@ -3253,7 +3254,7 @@ out:
3253 3254
3254fix_extent_len: 3255fix_extent_len:
3255 ex->ee_len = orig_ex.ee_len; 3256 ex->ee_len = orig_ex.ee_len;
3256 ext4_ext_dirty(handle, inode, path + depth); 3257 ext4_ext_dirty(handle, inode, path + path->p_depth);
3257 return err; 3258 return err;
3258} 3259}
3259 3260
@@ -5403,16 +5404,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5403 int ret; 5404 int ret;
5404 5405
5405 /* Collapse range works only on fs block size aligned offsets. */ 5406 /* Collapse range works only on fs block size aligned offsets. */
5406 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || 5407 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5407 len & (EXT4_BLOCK_SIZE(sb) - 1)) 5408 len & (EXT4_CLUSTER_SIZE(sb) - 1))
5408 return -EINVAL; 5409 return -EINVAL;
5409 5410
5410 if (!S_ISREG(inode->i_mode)) 5411 if (!S_ISREG(inode->i_mode))
5411 return -EINVAL; 5412 return -EINVAL;
5412 5413
5413 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
5414 return -EOPNOTSUPP;
5415
5416 trace_ext4_collapse_range(inode, offset, len); 5414 trace_ext4_collapse_range(inode, offset, len);
5417 5415
5418 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); 5416 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8695f70af1ef..aca7b24a4432 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,10 +200,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
200 200
201static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 201static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
202{ 202{
203 struct address_space *mapping = file->f_mapping;
204
205 if (!mapping->a_ops->readpage)
206 return -ENOEXEC;
207 file_accessed(file); 203 file_accessed(file);
208 vma->vm_ops = &ext4_file_vm_ops; 204 vma->vm_ops = &ext4_file_vm_ops;
209 return 0; 205 return 0;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index fd69da194826..e75f840000a0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1295,97 +1295,220 @@ do_indirects:
1295 } 1295 }
1296} 1296}
1297 1297
1298static int free_hole_blocks(handle_t *handle, struct inode *inode, 1298/**
1299 struct buffer_head *parent_bh, __le32 *i_data, 1299 * ext4_ind_remove_space - remove space from the range
1300 int level, ext4_lblk_t first, 1300 * @handle: JBD handle for this transaction
1301 ext4_lblk_t count, int max) 1301 * @inode: inode we are dealing with
1302 * @start: First block to remove
1303 * @end: One block after the last block to remove (exclusive)
1304 *
1305 * Free the blocks in the defined range (end is exclusive endpoint of
1306 * range). This is used by ext4_punch_hole().
1307 */
1308int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
1309 ext4_lblk_t start, ext4_lblk_t end)
1302{ 1310{
1303 struct buffer_head *bh = NULL; 1311 struct ext4_inode_info *ei = EXT4_I(inode);
1312 __le32 *i_data = ei->i_data;
1304 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1313 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1305 int ret = 0; 1314 ext4_lblk_t offsets[4], offsets2[4];
1306 int i, inc; 1315 Indirect chain[4], chain2[4];
1307 ext4_lblk_t offset; 1316 Indirect *partial, *partial2;
1308 __le32 blk; 1317 ext4_lblk_t max_block;
1309 1318 __le32 nr = 0, nr2 = 0;
1310 inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level); 1319 int n = 0, n2 = 0;
1311 for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) { 1320 unsigned blocksize = inode->i_sb->s_blocksize;
1312 if (offset >= count + first)
1313 break;
1314 if (*i_data == 0 || (offset + inc) <= first)
1315 continue;
1316 blk = *i_data;
1317 if (level > 0) {
1318 ext4_lblk_t first2;
1319 ext4_lblk_t count2;
1320 1321
1321 bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); 1322 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1322 if (!bh) { 1323 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1323 EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), 1324 if (end >= max_block)
1324 "Read failure"); 1325 end = max_block;
1325 return -EIO; 1326 if ((start >= end) || (start > max_block))
1326 } 1327 return 0;
1327 if (first > offset) { 1328
1328 first2 = first - offset; 1329 n = ext4_block_to_path(inode, start, offsets, NULL);
1329 count2 = count; 1330 n2 = ext4_block_to_path(inode, end, offsets2, NULL);
1331
1332 BUG_ON(n > n2);
1333
1334 if ((n == 1) && (n == n2)) {
1335 /* We're punching only within direct block range */
1336 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1337 i_data + offsets2[0]);
1338 return 0;
1339 } else if (n2 > n) {
1340 /*
1341 * Start and end are on a different levels so we're going to
1342 * free partial block at start, and partial block at end of
1343 * the range. If there are some levels in between then
1344 * do_indirects label will take care of that.
1345 */
1346
1347 if (n == 1) {
1348 /*
1349 * Start is at the direct block level, free
1350 * everything to the end of the level.
1351 */
1352 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1353 i_data + EXT4_NDIR_BLOCKS);
1354 goto end_range;
1355 }
1356
1357
1358 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1359 if (nr) {
1360 if (partial == chain) {
1361 /* Shared branch grows from the inode */
1362 ext4_free_branches(handle, inode, NULL,
1363 &nr, &nr+1, (chain+n-1) - partial);
1364 *partial->p = 0;
1330 } else { 1365 } else {
1331 first2 = 0; 1366 /* Shared branch grows from an indirect block */
1332 count2 = count - (offset - first); 1367 BUFFER_TRACE(partial->bh, "get_write_access");
1368 ext4_free_branches(handle, inode, partial->bh,
1369 partial->p,
1370 partial->p+1, (chain+n-1) - partial);
1333 } 1371 }
1334 ret = free_hole_blocks(handle, inode, bh, 1372 }
1335 (__le32 *)bh->b_data, level - 1, 1373
1336 first2, count2, 1374 /*
1337 inode->i_sb->s_blocksize >> 2); 1375 * Clear the ends of indirect blocks on the shared branch
1338 if (ret) { 1376 * at the start of the range
1339 brelse(bh); 1377 */
1340 goto err; 1378 while (partial > chain) {
1379 ext4_free_branches(handle, inode, partial->bh,
1380 partial->p + 1,
1381 (__le32 *)partial->bh->b_data+addr_per_block,
1382 (chain+n-1) - partial);
1383 BUFFER_TRACE(partial->bh, "call brelse");
1384 brelse(partial->bh);
1385 partial--;
1386 }
1387
1388end_range:
1389 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1390 if (nr2) {
1391 if (partial2 == chain2) {
1392 /*
1393 * Remember, end is exclusive so here we're at
1394 * the start of the next level we're not going
1395 * to free. Everything was covered by the start
1396 * of the range.
1397 */
1398 return 0;
1399 } else {
1400 /* Shared branch grows from an indirect block */
1401 partial2--;
1341 } 1402 }
1403 } else {
1404 /*
1405 * ext4_find_shared returns Indirect structure which
1406 * points to the last element which should not be
1407 * removed by truncate. But this is end of the range
1408 * in punch_hole so we need to point to the next element
1409 */
1410 partial2->p++;
1342 } 1411 }
1343 if (level == 0 || 1412
1344 (bh && all_zeroes((__le32 *)bh->b_data, 1413 /*
1345 (__le32 *)bh->b_data + addr_per_block))) { 1414 * Clear the ends of indirect blocks on the shared branch
1346 ext4_free_data(handle, inode, parent_bh, 1415 * at the end of the range
1347 i_data, i_data + 1); 1416 */
1417 while (partial2 > chain2) {
1418 ext4_free_branches(handle, inode, partial2->bh,
1419 (__le32 *)partial2->bh->b_data,
1420 partial2->p,
1421 (chain2+n2-1) - partial2);
1422 BUFFER_TRACE(partial2->bh, "call brelse");
1423 brelse(partial2->bh);
1424 partial2--;
1348 } 1425 }
1349 brelse(bh); 1426 goto do_indirects;
1350 bh = NULL;
1351 } 1427 }
1352 1428
1353err: 1429 /* Punch happened within the same level (n == n2) */
1354 return ret; 1430 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1355} 1431 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1356 1432 /*
1357int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, 1433 * ext4_find_shared returns Indirect structure which
1358 ext4_lblk_t first, ext4_lblk_t stop) 1434 * points to the last element which should not be
1359{ 1435 * removed by truncate. But this is end of the range
1360 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1436 * in punch_hole so we need to point to the next element
1361 int level, ret = 0; 1437 */
1362 int num = EXT4_NDIR_BLOCKS; 1438 partial2->p++;
1363 ext4_lblk_t count, max = EXT4_NDIR_BLOCKS; 1439 while ((partial > chain) || (partial2 > chain2)) {
1364 __le32 *i_data = EXT4_I(inode)->i_data; 1440 /* We're at the same block, so we're almost finished */
1365 1441 if ((partial->bh && partial2->bh) &&
1366 count = stop - first; 1442 (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
1367 for (level = 0; level < 4; level++, max *= addr_per_block) { 1443 if ((partial > chain) && (partial2 > chain2)) {
1368 if (first < max) { 1444 ext4_free_branches(handle, inode, partial->bh,
1369 ret = free_hole_blocks(handle, inode, NULL, i_data, 1445 partial->p + 1,
1370 level, first, count, num); 1446 partial2->p,
1371 if (ret) 1447 (chain+n-1) - partial);
1372 goto err; 1448 BUFFER_TRACE(partial->bh, "call brelse");
1373 if (count > max - first) 1449 brelse(partial->bh);
1374 count -= max - first; 1450 BUFFER_TRACE(partial2->bh, "call brelse");
1375 else 1451 brelse(partial2->bh);
1376 break; 1452 }
1377 first = 0; 1453 return 0;
1378 } else {
1379 first -= max;
1380 } 1454 }
1381 i_data += num; 1455 /*
1382 if (level == 0) { 1456 * Clear the ends of indirect blocks on the shared branch
1383 num = 1; 1457 * at the start of the range
1384 max = 1; 1458 */
1459 if (partial > chain) {
1460 ext4_free_branches(handle, inode, partial->bh,
1461 partial->p + 1,
1462 (__le32 *)partial->bh->b_data+addr_per_block,
1463 (chain+n-1) - partial);
1464 BUFFER_TRACE(partial->bh, "call brelse");
1465 brelse(partial->bh);
1466 partial--;
1467 }
1468 /*
1469 * Clear the ends of indirect blocks on the shared branch
1470 * at the end of the range
1471 */
1472 if (partial2 > chain2) {
1473 ext4_free_branches(handle, inode, partial2->bh,
1474 (__le32 *)partial2->bh->b_data,
1475 partial2->p,
1476 (chain2+n-1) - partial2);
1477 BUFFER_TRACE(partial2->bh, "call brelse");
1478 brelse(partial2->bh);
1479 partial2--;
1385 } 1480 }
1386 } 1481 }
1387 1482
1388err: 1483do_indirects:
1389 return ret; 1484 /* Kill the remaining (whole) subtrees */
1485 switch (offsets[0]) {
1486 default:
1487 if (++n >= n2)
1488 return 0;
1489 nr = i_data[EXT4_IND_BLOCK];
1490 if (nr) {
1491 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1492 i_data[EXT4_IND_BLOCK] = 0;
1493 }
1494 case EXT4_IND_BLOCK:
1495 if (++n >= n2)
1496 return 0;
1497 nr = i_data[EXT4_DIND_BLOCK];
1498 if (nr) {
1499 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1500 i_data[EXT4_DIND_BLOCK] = 0;
1501 }
1502 case EXT4_DIND_BLOCK:
1503 if (++n >= n2)
1504 return 0;
1505 nr = i_data[EXT4_TIND_BLOCK];
1506 if (nr) {
1507 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1508 i_data[EXT4_TIND_BLOCK] = 0;
1509 }
1510 case EXT4_TIND_BLOCK:
1511 ;
1512 }
1513 return 0;
1390} 1514}
1391
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 645205d8ada6..bea662bd0ca6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -120,12 +120,6 @@ int ext4_get_max_inline_size(struct inode *inode)
120 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 120 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
121} 121}
122 122
123int ext4_has_inline_data(struct inode *inode)
124{
125 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
126 EXT4_I(inode)->i_inline_off;
127}
128
129/* 123/*
130 * this function does not take xattr_sem, which is OK because it is 124 * this function does not take xattr_sem, which is OK because it is
131 * currently only used in a code path coming form ext4_iget, before 125 * currently only used in a code path coming form ext4_iget, before
@@ -1178,6 +1172,18 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
1178 if (error < 0) 1172 if (error < 0)
1179 goto out; 1173 goto out;
1180 1174
1175 /*
1176 * Make sure the inline directory entries pass checks before we try to
1177 * convert them, so that we avoid touching stuff that needs fsck.
1178 */
1179 if (S_ISDIR(inode->i_mode)) {
1180 error = ext4_check_all_de(inode, iloc->bh,
1181 buf + EXT4_INLINE_DOTDOT_SIZE,
1182 inline_size - EXT4_INLINE_DOTDOT_SIZE);
1183 if (error)
1184 goto out;
1185 }
1186
1181 error = ext4_destroy_inline_data_nolock(handle, inode); 1187 error = ext4_destroy_inline_data_nolock(handle, inode);
1182 if (error) 1188 if (error)
1183 goto out; 1189 goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8a064734e6eb..367a60c07cf0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -325,18 +325,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
325#endif 325#endif
326 326
327/* 327/*
328 * Calculate the number of metadata blocks need to reserve
329 * to allocate a block located at @lblock
330 */
331static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
332{
333 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
334 return ext4_ext_calc_metadata_amount(inode, lblock);
335
336 return ext4_ind_calc_metadata_amount(inode, lblock);
337}
338
339/*
340 * Called with i_data_sem down, which is important since we can call 328 * Called with i_data_sem down, which is important since we can call
341 * ext4_discard_preallocations() from here. 329 * ext4_discard_preallocations() from here.
342 */ 330 */
@@ -357,35 +345,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
357 used = ei->i_reserved_data_blocks; 345 used = ei->i_reserved_data_blocks;
358 } 346 }
359 347
360 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
361 ext4_warning(inode->i_sb, "ino %lu, allocated %d "
362 "with only %d reserved metadata blocks "
363 "(releasing %d blocks with reserved %d data blocks)",
364 inode->i_ino, ei->i_allocated_meta_blocks,
365 ei->i_reserved_meta_blocks, used,
366 ei->i_reserved_data_blocks);
367 WARN_ON(1);
368 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
369 }
370
371 /* Update per-inode reservations */ 348 /* Update per-inode reservations */
372 ei->i_reserved_data_blocks -= used; 349 ei->i_reserved_data_blocks -= used;
373 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 350 percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
374 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
375 used + ei->i_allocated_meta_blocks);
376 ei->i_allocated_meta_blocks = 0;
377 351
378 if (ei->i_reserved_data_blocks == 0) {
379 /*
380 * We can release all of the reserved metadata blocks
381 * only when we have written all of the delayed
382 * allocation blocks.
383 */
384 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
385 ei->i_reserved_meta_blocks);
386 ei->i_reserved_meta_blocks = 0;
387 ei->i_da_metadata_calc_len = 0;
388 }
389 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 352 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
390 353
391 /* Update quota subsystem for data blocks */ 354 /* Update quota subsystem for data blocks */
@@ -1222,49 +1185,6 @@ static int ext4_journalled_write_end(struct file *file,
1222} 1185}
1223 1186
1224/* 1187/*
1225 * Reserve a metadata for a single block located at lblock
1226 */
1227static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1228{
1229 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1230 struct ext4_inode_info *ei = EXT4_I(inode);
1231 unsigned int md_needed;
1232 ext4_lblk_t save_last_lblock;
1233 int save_len;
1234
1235 /*
1236 * recalculate the amount of metadata blocks to reserve
1237 * in order to allocate nrblocks
1238 * worse case is one extent per block
1239 */
1240 spin_lock(&ei->i_block_reservation_lock);
1241 /*
1242 * ext4_calc_metadata_amount() has side effects, which we have
1243 * to be prepared undo if we fail to claim space.
1244 */
1245 save_len = ei->i_da_metadata_calc_len;
1246 save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1247 md_needed = EXT4_NUM_B2C(sbi,
1248 ext4_calc_metadata_amount(inode, lblock));
1249 trace_ext4_da_reserve_space(inode, md_needed);
1250
1251 /*
1252 * We do still charge estimated metadata to the sb though;
1253 * we cannot afford to run out of free blocks.
1254 */
1255 if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
1256 ei->i_da_metadata_calc_len = save_len;
1257 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1258 spin_unlock(&ei->i_block_reservation_lock);
1259 return -ENOSPC;
1260 }
1261 ei->i_reserved_meta_blocks += md_needed;
1262 spin_unlock(&ei->i_block_reservation_lock);
1263
1264 return 0; /* success */
1265}
1266
1267/*
1268 * Reserve a single cluster located at lblock 1188 * Reserve a single cluster located at lblock
1269 */ 1189 */
1270static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1190static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1273,8 +1193,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1273 struct ext4_inode_info *ei = EXT4_I(inode); 1193 struct ext4_inode_info *ei = EXT4_I(inode);
1274 unsigned int md_needed; 1194 unsigned int md_needed;
1275 int ret; 1195 int ret;
1276 ext4_lblk_t save_last_lblock;
1277 int save_len;
1278 1196
1279 /* 1197 /*
1280 * We will charge metadata quota at writeout time; this saves 1198 * We will charge metadata quota at writeout time; this saves
@@ -1295,25 +1213,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1295 * ext4_calc_metadata_amount() has side effects, which we have 1213 * ext4_calc_metadata_amount() has side effects, which we have
1296 * to be prepared undo if we fail to claim space. 1214 * to be prepared undo if we fail to claim space.
1297 */ 1215 */
1298 save_len = ei->i_da_metadata_calc_len; 1216 md_needed = 0;
1299 save_last_lblock = ei->i_da_metadata_calc_last_lblock; 1217 trace_ext4_da_reserve_space(inode, 0);
1300 md_needed = EXT4_NUM_B2C(sbi,
1301 ext4_calc_metadata_amount(inode, lblock));
1302 trace_ext4_da_reserve_space(inode, md_needed);
1303 1218
1304 /* 1219 if (ext4_claim_free_clusters(sbi, 1, 0)) {
1305 * We do still charge estimated metadata to the sb though;
1306 * we cannot afford to run out of free blocks.
1307 */
1308 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1309 ei->i_da_metadata_calc_len = save_len;
1310 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1311 spin_unlock(&ei->i_block_reservation_lock); 1220 spin_unlock(&ei->i_block_reservation_lock);
1312 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1221 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1313 return -ENOSPC; 1222 return -ENOSPC;
1314 } 1223 }
1315 ei->i_reserved_data_blocks++; 1224 ei->i_reserved_data_blocks++;
1316 ei->i_reserved_meta_blocks += md_needed;
1317 spin_unlock(&ei->i_block_reservation_lock); 1225 spin_unlock(&ei->i_block_reservation_lock);
1318 1226
1319 return 0; /* success */ 1227 return 0; /* success */
@@ -1346,20 +1254,6 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1346 } 1254 }
1347 ei->i_reserved_data_blocks -= to_free; 1255 ei->i_reserved_data_blocks -= to_free;
1348 1256
1349 if (ei->i_reserved_data_blocks == 0) {
1350 /*
1351 * We can release all of the reserved metadata blocks
1352 * only when we have written all of the delayed
1353 * allocation blocks.
1354 * Note that in case of bigalloc, i_reserved_meta_blocks,
1355 * i_reserved_data_blocks, etc. refer to number of clusters.
1356 */
1357 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1358 ei->i_reserved_meta_blocks);
1359 ei->i_reserved_meta_blocks = 0;
1360 ei->i_da_metadata_calc_len = 0;
1361 }
1362
1363 /* update fs dirty data blocks counter */ 1257 /* update fs dirty data blocks counter */
1364 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); 1258 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1365 1259
@@ -1500,10 +1394,6 @@ static void ext4_print_free_blocks(struct inode *inode)
1500 ext4_msg(sb, KERN_CRIT, "Block reservation details"); 1394 ext4_msg(sb, KERN_CRIT, "Block reservation details");
1501 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 1395 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
1502 ei->i_reserved_data_blocks); 1396 ei->i_reserved_data_blocks);
1503 ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
1504 ei->i_reserved_meta_blocks);
1505 ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
1506 ei->i_allocated_meta_blocks);
1507 return; 1397 return;
1508} 1398}
1509 1399
@@ -1620,13 +1510,6 @@ add_delayed:
1620 retval = ret; 1510 retval = ret;
1621 goto out_unlock; 1511 goto out_unlock;
1622 } 1512 }
1623 } else {
1624 ret = ext4_da_reserve_metadata(inode, iblock);
1625 if (ret) {
1626 /* not enough space to reserve */
1627 retval = ret;
1628 goto out_unlock;
1629 }
1630 } 1513 }
1631 1514
1632 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 1515 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2843,8 +2726,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
2843{ 2726{
2844 trace_ext4_alloc_da_blocks(inode); 2727 trace_ext4_alloc_da_blocks(inode);
2845 2728
2846 if (!EXT4_I(inode)->i_reserved_data_blocks && 2729 if (!EXT4_I(inode)->i_reserved_data_blocks)
2847 !EXT4_I(inode)->i_reserved_meta_blocks)
2848 return 0; 2730 return 0;
2849 2731
2850 /* 2732 /*
@@ -3624,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3624 ret = ext4_ext_remove_space(inode, first_block, 3506 ret = ext4_ext_remove_space(inode, first_block,
3625 stop_block - 1); 3507 stop_block - 1);
3626 else 3508 else
3627 ret = ext4_free_hole_blocks(handle, inode, first_block, 3509 ret = ext4_ind_remove_space(handle, inode, first_block,
3628 stop_block); 3510 stop_block);
3629 3511
3630 up_write(&EXT4_I(inode)->i_data_sem); 3512 up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 2dcb936be90e..956027711faf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3075,8 +3075,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3075 (23 - bsbits)) << 23; 3075 (23 - bsbits)) << 23;
3076 size = 8 * 1024 * 1024; 3076 size = 8 * 1024 * 1024;
3077 } else { 3077 } else {
3078 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; 3078 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3079 size = ac->ac_o_ex.fe_len << bsbits; 3079 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3080 ac->ac_o_ex.fe_len) << bsbits;
3080 } 3081 }
3081 size = size >> bsbits; 3082 size = size >> bsbits;
3082 start = start_off >> bsbits; 3083 start = start_off >> bsbits;
@@ -3216,8 +3217,27 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3216static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) 3217static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3217{ 3218{
3218 struct ext4_prealloc_space *pa = ac->ac_pa; 3219 struct ext4_prealloc_space *pa = ac->ac_pa;
3220 struct ext4_buddy e4b;
3221 int err;
3219 3222
3220 if (pa && pa->pa_type == MB_INODE_PA) 3223 if (pa == NULL) {
3224 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3225 if (err) {
3226 /*
3227 * This should never happen since we pin the
3228 * pages in the ext4_allocation_context so
3229 * ext4_mb_load_buddy() should never fail.
3230 */
3231 WARN(1, "mb_load_buddy failed (%d)", err);
3232 return;
3233 }
3234 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3235 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3236 ac->ac_f_ex.fe_len);
3237 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3238 return;
3239 }
3240 if (pa->pa_type == MB_INODE_PA)
3221 pa->pa_free += ac->ac_b_ex.fe_len; 3241 pa->pa_free += ac->ac_b_ex.fe_len;
3222} 3242}
3223 3243
@@ -4627,7 +4647,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4627 struct buffer_head *gd_bh; 4647 struct buffer_head *gd_bh;
4628 ext4_group_t block_group; 4648 ext4_group_t block_group;
4629 struct ext4_sb_info *sbi; 4649 struct ext4_sb_info *sbi;
4630 struct ext4_inode_info *ei = EXT4_I(inode);
4631 struct ext4_buddy e4b; 4650 struct ext4_buddy e4b;
4632 unsigned int count_clusters; 4651 unsigned int count_clusters;
4633 int err = 0; 4652 int err = 0;
@@ -4838,19 +4857,7 @@ do_more:
4838 &sbi->s_flex_groups[flex_group].free_clusters); 4857 &sbi->s_flex_groups[flex_group].free_clusters);
4839 } 4858 }
4840 4859
4841 if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) { 4860 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4842 percpu_counter_add(&sbi->s_dirtyclusters_counter,
4843 count_clusters);
4844 spin_lock(&ei->i_block_reservation_lock);
4845 if (flags & EXT4_FREE_BLOCKS_METADATA)
4846 ei->i_reserved_meta_blocks += count_clusters;
4847 else
4848 ei->i_reserved_data_blocks += count_clusters;
4849 spin_unlock(&ei->i_block_reservation_lock);
4850 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4851 dquot_reclaim_block(inode,
4852 EXT4_C2B(sbi, count_clusters));
4853 } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4854 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4861 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4855 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); 4862 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4856 4863
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index ec092437d3e0..d3567f27bae7 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,6 +39,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
39 newext.ee_block = cpu_to_le32(lb->first_block); 39 newext.ee_block = cpu_to_le32(lb->first_block);
40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
41 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 ext4_ext_store_pblock(&newext, lb->first_pblock);
42 /* Locking only for convinience since we are operating on temp inode */
43 down_write(&EXT4_I(inode)->i_data_sem);
42 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); 44 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
43 45
44 if (IS_ERR(path)) { 46 if (IS_ERR(path)) {
@@ -61,7 +63,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
61 */ 63 */
62 if (needed && ext4_handle_has_enough_credits(handle, 64 if (needed && ext4_handle_has_enough_credits(handle,
63 EXT4_RESERVE_TRANS_BLOCKS)) { 65 EXT4_RESERVE_TRANS_BLOCKS)) {
66 up_write((&EXT4_I(inode)->i_data_sem));
64 retval = ext4_journal_restart(handle, needed); 67 retval = ext4_journal_restart(handle, needed);
68 down_write((&EXT4_I(inode)->i_data_sem));
65 if (retval) 69 if (retval)
66 goto err_out; 70 goto err_out;
67 } else if (needed) { 71 } else if (needed) {
@@ -70,13 +74,16 @@ static int finish_range(handle_t *handle, struct inode *inode,
70 /* 74 /*
71 * IF not able to extend the journal restart the journal 75 * IF not able to extend the journal restart the journal
72 */ 76 */
77 up_write((&EXT4_I(inode)->i_data_sem));
73 retval = ext4_journal_restart(handle, needed); 78 retval = ext4_journal_restart(handle, needed);
79 down_write((&EXT4_I(inode)->i_data_sem));
74 if (retval) 80 if (retval)
75 goto err_out; 81 goto err_out;
76 } 82 }
77 } 83 }
78 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 84 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
79err_out: 85err_out:
86 up_write((&EXT4_I(inode)->i_data_sem));
80 if (path) { 87 if (path) {
81 ext4_ext_drop_refs(path); 88 ext4_ext_drop_refs(path);
82 kfree(path); 89 kfree(path);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2484c7ec6a72..671a74b14fd7 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1013,10 +1013,11 @@ data_copy:
1013 *err = -EBUSY; 1013 *err = -EBUSY;
1014 goto unlock_pages; 1014 goto unlock_pages;
1015 } 1015 }
1016 1016 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1017 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 1017 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
1018 orig_blk_offset, 1018 orig_blk_offset,
1019 block_len_in_page, err); 1019 block_len_in_page, err);
1020 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1020 if (*err) { 1021 if (*err) {
1021 if (replaced_count) { 1022 if (replaced_count) {
1022 block_len_in_page = replaced_count; 1023 block_len_in_page = replaced_count;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6df7bc611dbd..32b43ad154b9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2142,10 +2142,6 @@ static int ext4_check_descriptors(struct super_block *sb,
2142 } 2142 }
2143 if (NULL != first_not_zeroed) 2143 if (NULL != first_not_zeroed)
2144 *first_not_zeroed = grp; 2144 *first_not_zeroed = grp;
2145
2146 ext4_free_blocks_count_set(sbi->s_es,
2147 EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2148 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2149 return 1; 2145 return 1;
2150} 2146}
2151 2147
@@ -3883,13 +3879,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3883 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3879 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3884 goto failed_mount2; 3880 goto failed_mount2;
3885 } 3881 }
3886 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3887 if (!ext4_fill_flex_info(sb)) {
3888 ext4_msg(sb, KERN_ERR,
3889 "unable to initialize "
3890 "flex_bg meta info!");
3891 goto failed_mount2;
3892 }
3893 3882
3894 sbi->s_gdb_count = db_count; 3883 sbi->s_gdb_count = db_count;
3895 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3884 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
@@ -3902,23 +3891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3902 /* Register extent status tree shrinker */ 3891 /* Register extent status tree shrinker */
3903 ext4_es_register_shrinker(sbi); 3892 ext4_es_register_shrinker(sbi);
3904 3893
3905 err = percpu_counter_init(&sbi->s_freeclusters_counter, 3894 if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
3906 ext4_count_free_clusters(sb));
3907 if (!err) {
3908 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3909 ext4_count_free_inodes(sb));
3910 }
3911 if (!err) {
3912 err = percpu_counter_init(&sbi->s_dirs_counter,
3913 ext4_count_dirs(sb));
3914 }
3915 if (!err) {
3916 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3917 }
3918 if (!err) {
3919 err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
3920 }
3921 if (err) {
3922 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3895 ext4_msg(sb, KERN_ERR, "insufficient memory");
3923 goto failed_mount3; 3896 goto failed_mount3;
3924 } 3897 }
@@ -4022,18 +3995,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
4022 3995
4023 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 3996 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4024 3997
4025 /*
4026 * The journal may have updated the bg summary counts, so we
4027 * need to update the global counters.
4028 */
4029 percpu_counter_set(&sbi->s_freeclusters_counter,
4030 ext4_count_free_clusters(sb));
4031 percpu_counter_set(&sbi->s_freeinodes_counter,
4032 ext4_count_free_inodes(sb));
4033 percpu_counter_set(&sbi->s_dirs_counter,
4034 ext4_count_dirs(sb));
4035 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
4036
4037no_journal: 3998no_journal:
4038 if (ext4_mballoc_ready) { 3999 if (ext4_mballoc_ready) {
4039 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); 4000 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
@@ -4141,6 +4102,33 @@ no_journal:
4141 goto failed_mount5; 4102 goto failed_mount5;
4142 } 4103 }
4143 4104
4105 block = ext4_count_free_clusters(sb);
4106 ext4_free_blocks_count_set(sbi->s_es,
4107 EXT4_C2B(sbi, block));
4108 err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
4109 if (!err) {
4110 unsigned long freei = ext4_count_free_inodes(sb);
4111 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4112 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
4113 }
4114 if (!err)
4115 err = percpu_counter_init(&sbi->s_dirs_counter,
4116 ext4_count_dirs(sb));
4117 if (!err)
4118 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
4119 if (err) {
4120 ext4_msg(sb, KERN_ERR, "insufficient memory");
4121 goto failed_mount6;
4122 }
4123
4124 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
4125 if (!ext4_fill_flex_info(sb)) {
4126 ext4_msg(sb, KERN_ERR,
4127 "unable to initialize "
4128 "flex_bg meta info!");
4129 goto failed_mount6;
4130 }
4131
4144 err = ext4_register_li_request(sb, first_not_zeroed); 4132 err = ext4_register_li_request(sb, first_not_zeroed);
4145 if (err) 4133 if (err)
4146 goto failed_mount6; 4134 goto failed_mount6;
@@ -4215,6 +4203,12 @@ failed_mount7:
4215 ext4_unregister_li_request(sb); 4203 ext4_unregister_li_request(sb);
4216failed_mount6: 4204failed_mount6:
4217 ext4_mb_release(sb); 4205 ext4_mb_release(sb);
4206 if (sbi->s_flex_groups)
4207 ext4_kvfree(sbi->s_flex_groups);
4208 percpu_counter_destroy(&sbi->s_freeclusters_counter);
4209 percpu_counter_destroy(&sbi->s_freeinodes_counter);
4210 percpu_counter_destroy(&sbi->s_dirs_counter);
4211 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4218failed_mount5: 4212failed_mount5:
4219 ext4_ext_release(sb); 4213 ext4_ext_release(sb);
4220 ext4_release_system_zone(sb); 4214 ext4_release_system_zone(sb);
@@ -4233,12 +4227,6 @@ failed_mount_wq:
4233failed_mount3: 4227failed_mount3:
4234 ext4_es_unregister_shrinker(sbi); 4228 ext4_es_unregister_shrinker(sbi);
4235 del_timer_sync(&sbi->s_err_report); 4229 del_timer_sync(&sbi->s_err_report);
4236 if (sbi->s_flex_groups)
4237 ext4_kvfree(sbi->s_flex_groups);
4238 percpu_counter_destroy(&sbi->s_freeclusters_counter);
4239 percpu_counter_destroy(&sbi->s_freeinodes_counter);
4240 percpu_counter_destroy(&sbi->s_dirs_counter);
4241 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4242 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4230 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4243 if (sbi->s_mmp_tsk) 4231 if (sbi->s_mmp_tsk)
4244 kthread_stop(sbi->s_mmp_tsk); 4232 kthread_stop(sbi->s_mmp_tsk);
@@ -4556,11 +4544,13 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4556 else 4544 else
4557 es->s_kbytes_written = 4545 es->s_kbytes_written =
4558 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4546 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4559 ext4_free_blocks_count_set(es, 4547 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4548 ext4_free_blocks_count_set(es,
4560 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( 4549 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4561 &EXT4_SB(sb)->s_freeclusters_counter))); 4550 &EXT4_SB(sb)->s_freeclusters_counter)));
4562 es->s_free_inodes_count = 4551 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4563 cpu_to_le32(percpu_counter_sum_positive( 4552 es->s_free_inodes_count =
4553 cpu_to_le32(percpu_counter_sum_positive(
4564 &EXT4_SB(sb)->s_freeinodes_counter)); 4554 &EXT4_SB(sb)->s_freeinodes_counter));
4565 BUFFER_TRACE(sbh, "marking dirty"); 4555 BUFFER_TRACE(sbh, "marking dirty");
4566 ext4_superblock_csum_set(sb); 4556 ext4_superblock_csum_set(sb);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
203 size_t size = 0; 203 size_t size = 0;
204 int error; 204 int error;
205 205
206 if (acl) {
207 error = posix_acl_valid(acl);
208 if (error < 0)
209 return error;
210 }
211
212 switch (type) { 206 switch (type) {
213 case ACL_TYPE_ACCESS: 207 case ACL_TYPE_ACCESS:
214 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c1d370..6aeed5bada52 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
22#include "segment.h" 22#include "segment.h"
23#include <trace/events/f2fs.h> 23#include <trace/events/f2fs.h>
24 24
25static struct kmem_cache *orphan_entry_slab; 25static struct kmem_cache *ino_entry_slab;
26static struct kmem_cache *inode_entry_slab; 26static struct kmem_cache *inode_entry_slab;
27 27
28/* 28/*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
282 .set_page_dirty = f2fs_set_meta_page_dirty, 282 .set_page_dirty = f2fs_set_meta_page_dirty,
283}; 283};
284 284
285static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
286{
287 struct ino_entry *e;
288retry:
289 spin_lock(&sbi->ino_lock[type]);
290
291 e = radix_tree_lookup(&sbi->ino_root[type], ino);
292 if (!e) {
293 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
294 if (!e) {
295 spin_unlock(&sbi->ino_lock[type]);
296 goto retry;
297 }
298 if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
299 spin_unlock(&sbi->ino_lock[type]);
300 kmem_cache_free(ino_entry_slab, e);
301 goto retry;
302 }
303 memset(e, 0, sizeof(struct ino_entry));
304 e->ino = ino;
305
306 list_add_tail(&e->list, &sbi->ino_list[type]);
307 }
308 spin_unlock(&sbi->ino_lock[type]);
309}
310
311static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
312{
313 struct ino_entry *e;
314
315 spin_lock(&sbi->ino_lock[type]);
316 e = radix_tree_lookup(&sbi->ino_root[type], ino);
317 if (e) {
318 list_del(&e->list);
319 radix_tree_delete(&sbi->ino_root[type], ino);
320 if (type == ORPHAN_INO)
321 sbi->n_orphans--;
322 spin_unlock(&sbi->ino_lock[type]);
323 kmem_cache_free(ino_entry_slab, e);
324 return;
325 }
326 spin_unlock(&sbi->ino_lock[type]);
327}
328
329void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
330{
331 /* add new dirty ino entry into list */
332 __add_ino_entry(sbi, ino, type);
333}
334
335void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
336{
337 /* remove dirty ino entry from list */
338 __remove_ino_entry(sbi, ino, type);
339}
340
341/* mode should be APPEND_INO or UPDATE_INO */
342bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
343{
344 struct ino_entry *e;
345 spin_lock(&sbi->ino_lock[mode]);
346 e = radix_tree_lookup(&sbi->ino_root[mode], ino);
347 spin_unlock(&sbi->ino_lock[mode]);
348 return e ? true : false;
349}
350
351static void release_dirty_inode(struct f2fs_sb_info *sbi)
352{
353 struct ino_entry *e, *tmp;
354 int i;
355
356 for (i = APPEND_INO; i <= UPDATE_INO; i++) {
357 spin_lock(&sbi->ino_lock[i]);
358 list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
359 list_del(&e->list);
360 radix_tree_delete(&sbi->ino_root[i], e->ino);
361 kmem_cache_free(ino_entry_slab, e);
362 }
363 spin_unlock(&sbi->ino_lock[i]);
364 }
365}
366
285int acquire_orphan_inode(struct f2fs_sb_info *sbi) 367int acquire_orphan_inode(struct f2fs_sb_info *sbi)
286{ 368{
287 int err = 0; 369 int err = 0;
288 370
289 spin_lock(&sbi->orphan_inode_lock); 371 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
290 if (unlikely(sbi->n_orphans >= sbi->max_orphans)) 372 if (unlikely(sbi->n_orphans >= sbi->max_orphans))
291 err = -ENOSPC; 373 err = -ENOSPC;
292 else 374 else
293 sbi->n_orphans++; 375 sbi->n_orphans++;
294 spin_unlock(&sbi->orphan_inode_lock); 376 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
295 377
296 return err; 378 return err;
297} 379}
298 380
299void release_orphan_inode(struct f2fs_sb_info *sbi) 381void release_orphan_inode(struct f2fs_sb_info *sbi)
300{ 382{
301 spin_lock(&sbi->orphan_inode_lock); 383 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
302 f2fs_bug_on(sbi->n_orphans == 0); 384 f2fs_bug_on(sbi->n_orphans == 0);
303 sbi->n_orphans--; 385 sbi->n_orphans--;
304 spin_unlock(&sbi->orphan_inode_lock); 386 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
305} 387}
306 388
307void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 389void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
308{ 390{
309 struct list_head *head; 391 /* add new orphan ino entry into list */
310 struct orphan_inode_entry *new, *orphan; 392 __add_ino_entry(sbi, ino, ORPHAN_INO);
311
312 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
313 new->ino = ino;
314
315 spin_lock(&sbi->orphan_inode_lock);
316 head = &sbi->orphan_inode_list;
317 list_for_each_entry(orphan, head, list) {
318 if (orphan->ino == ino) {
319 spin_unlock(&sbi->orphan_inode_lock);
320 kmem_cache_free(orphan_entry_slab, new);
321 return;
322 }
323
324 if (orphan->ino > ino)
325 break;
326 }
327
328 /* add new orphan entry into list which is sorted by inode number */
329 list_add_tail(&new->list, &orphan->list);
330 spin_unlock(&sbi->orphan_inode_lock);
331} 393}
332 394
333void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 395void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
334{ 396{
335 struct list_head *head; 397 /* remove orphan entry from orphan list */
336 struct orphan_inode_entry *orphan; 398 __remove_ino_entry(sbi, ino, ORPHAN_INO);
337
338 spin_lock(&sbi->orphan_inode_lock);
339 head = &sbi->orphan_inode_list;
340 list_for_each_entry(orphan, head, list) {
341 if (orphan->ino == ino) {
342 list_del(&orphan->list);
343 f2fs_bug_on(sbi->n_orphans == 0);
344 sbi->n_orphans--;
345 spin_unlock(&sbi->orphan_inode_lock);
346 kmem_cache_free(orphan_entry_slab, orphan);
347 return;
348 }
349 }
350 spin_unlock(&sbi->orphan_inode_lock);
351} 399}
352 400
353static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 401static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
401 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 449 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
402 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 450 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
403 struct page *page = NULL; 451 struct page *page = NULL;
404 struct orphan_inode_entry *orphan = NULL; 452 struct ino_entry *orphan = NULL;
405 453
406 for (index = 0; index < orphan_blocks; index++) 454 for (index = 0; index < orphan_blocks; index++)
407 grab_meta_page(sbi, start_blk + index); 455 grab_meta_page(sbi, start_blk + index);
408 456
409 index = 1; 457 index = 1;
410 spin_lock(&sbi->orphan_inode_lock); 458 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
411 head = &sbi->orphan_inode_list; 459 head = &sbi->ino_list[ORPHAN_INO];
412 460
413 /* loop for each orphan inode entry and write them in Jornal block */ 461 /* loop for each orphan inode entry and write them in Jornal block */
414 list_for_each_entry(orphan, head, list) { 462 list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
448 f2fs_put_page(page, 1); 496 f2fs_put_page(page, 1);
449 } 497 }
450 498
451 spin_unlock(&sbi->orphan_inode_lock); 499 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
452} 500}
453 501
454static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 502static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ retry_flush_dents:
714 * until finishing nat/sit flush. 762 * until finishing nat/sit flush.
715 */ 763 */
716retry_flush_nodes: 764retry_flush_nodes:
717 mutex_lock(&sbi->node_write); 765 down_write(&sbi->node_write);
718 766
719 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 767 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
720 mutex_unlock(&sbi->node_write); 768 up_write(&sbi->node_write);
721 sync_node_pages(sbi, 0, &wbc); 769 sync_node_pages(sbi, 0, &wbc);
722 goto retry_flush_nodes; 770 goto retry_flush_nodes;
723 } 771 }
@@ -726,7 +774,7 @@ retry_flush_nodes:
726 774
727static void unblock_operations(struct f2fs_sb_info *sbi) 775static void unblock_operations(struct f2fs_sb_info *sbi)
728{ 776{
729 mutex_unlock(&sbi->node_write); 777 up_write(&sbi->node_write);
730 f2fs_unlock_all(sbi); 778 f2fs_unlock_all(sbi);
731} 779}
732 780
@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
748static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 796static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
749{ 797{
750 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 798 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
799 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
751 nid_t last_nid = 0; 800 nid_t last_nid = 0;
752 block_t start_blk; 801 block_t start_blk;
753 struct page *cp_page; 802 struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
761 * This avoids to conduct wrong roll-forward operations and uses 810 * This avoids to conduct wrong roll-forward operations and uses
762 * metapages, so should be called prior to sync_meta_pages below. 811 * metapages, so should be called prior to sync_meta_pages below.
763 */ 812 */
764 discard_next_dnode(sbi); 813 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
765 814
766 /* Flush all the NAT/SIT pages */ 815 /* Flush all the NAT/SIT pages */
767 while (get_pages(sbi, F2FS_DIRTY_META)) 816 while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
885 /* Here, we only have one bio having CP pack */ 934 /* Here, we only have one bio having CP pack */
886 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 935 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
887 936
888 if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 937 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
889 clear_prefree_segments(sbi); 938 clear_prefree_segments(sbi);
939 release_dirty_inode(sbi);
890 F2FS_RESET_SB_DIRT(sbi); 940 F2FS_RESET_SB_DIRT(sbi);
891 } 941 }
892} 942}
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
932 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 982 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
933} 983}
934 984
935void init_orphan_info(struct f2fs_sb_info *sbi) 985void init_ino_entry_info(struct f2fs_sb_info *sbi)
936{ 986{
937 spin_lock_init(&sbi->orphan_inode_lock); 987 int i;
938 INIT_LIST_HEAD(&sbi->orphan_inode_list); 988
939 sbi->n_orphans = 0; 989 for (i = 0; i < MAX_INO_ENTRY; i++) {
990 INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
991 spin_lock_init(&sbi->ino_lock[i]);
992 INIT_LIST_HEAD(&sbi->ino_list[i]);
993 }
994
940 /* 995 /*
941 * considering 512 blocks in a segment 8 blocks are needed for cp 996 * considering 512 blocks in a segment 8 blocks are needed for cp
942 * and log segment summaries. Remaining blocks are used to keep 997 * and log segment summaries. Remaining blocks are used to keep
943 * orphan entries with the limitation one reserved segment 998 * orphan entries with the limitation one reserved segment
944 * for cp pack we can have max 1020*504 orphan entries 999 * for cp pack we can have max 1020*504 orphan entries
945 */ 1000 */
1001 sbi->n_orphans = 0;
946 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 1002 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
947 * F2FS_ORPHANS_PER_BLOCK; 1003 * F2FS_ORPHANS_PER_BLOCK;
948} 1004}
949 1005
950int __init create_checkpoint_caches(void) 1006int __init create_checkpoint_caches(void)
951{ 1007{
952 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 1008 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
953 sizeof(struct orphan_inode_entry)); 1009 sizeof(struct ino_entry));
954 if (!orphan_entry_slab) 1010 if (!ino_entry_slab)
955 return -ENOMEM; 1011 return -ENOMEM;
956 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 1012 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
957 sizeof(struct dir_inode_entry)); 1013 sizeof(struct dir_inode_entry));
958 if (!inode_entry_slab) { 1014 if (!inode_entry_slab) {
959 kmem_cache_destroy(orphan_entry_slab); 1015 kmem_cache_destroy(ino_entry_slab);
960 return -ENOMEM; 1016 return -ENOMEM;
961 } 1017 }
962 return 0; 1018 return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)
964 1020
965void destroy_checkpoint_caches(void) 1021void destroy_checkpoint_caches(void)
966{ 1022{
967 kmem_cache_destroy(orphan_entry_slab); 1023 kmem_cache_destroy(ino_entry_slab);
968 kmem_cache_destroy(inode_entry_slab); 1024 kmem_cache_destroy(inode_entry_slab);
969} 1025}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619edb5f..03313099c51c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
139 /* change META to META_FLUSH in the checkpoint procedure */ 139 /* change META to META_FLUSH in the checkpoint procedure */
140 if (type >= META_FLUSH) { 140 if (type >= META_FLUSH) {
141 io->fio.type = META_FLUSH; 141 io->fio.type = META_FLUSH;
142 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 if (test_opt(sbi, NOBARRIER))
143 io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
144 else
145 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
143 } 146 }
144 __submit_merged_bio(io); 147 __submit_merged_bio(io);
145 up_write(&io->io_rwsem); 148 up_write(&io->io_rwsem);
@@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
626 if (check_extent_cache(inode, pgofs, bh_result)) 629 if (check_extent_cache(inode, pgofs, bh_result))
627 goto out; 630 goto out;
628 631
629 if (create) 632 if (create) {
633 f2fs_balance_fs(sbi);
630 f2fs_lock_op(sbi); 634 f2fs_lock_op(sbi);
635 }
631 636
632 /* When reading holes, we need its node page */ 637 /* When reading holes, we need its node page */
633 set_new_dnode(&dn, inode, NULL, NULL, 0); 638 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
784 !is_cold_data(page) && 789 !is_cold_data(page) &&
785 need_inplace_update(inode))) { 790 need_inplace_update(inode))) {
786 rewrite_data_page(page, old_blkaddr, fio); 791 rewrite_data_page(page, old_blkaddr, fio);
792 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
787 } else { 793 } else {
788 write_data_page(page, &dn, &new_blkaddr, fio); 794 write_data_page(page, &dn, &new_blkaddr, fio);
789 update_extent_cache(new_blkaddr, &dn); 795 update_extent_cache(new_blkaddr, &dn);
796 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
790 } 797 }
791out_writepage: 798out_writepage:
792 f2fs_put_dnode(&dn); 799 f2fs_put_dnode(&dn);
@@ -914,6 +921,16 @@ skip_write:
914 return 0; 921 return 0;
915} 922}
916 923
924static void f2fs_write_failed(struct address_space *mapping, loff_t to)
925{
926 struct inode *inode = mapping->host;
927
928 if (to > inode->i_size) {
929 truncate_pagecache(inode, inode->i_size);
930 truncate_blocks(inode, inode->i_size);
931 }
932}
933
917static int f2fs_write_begin(struct file *file, struct address_space *mapping, 934static int f2fs_write_begin(struct file *file, struct address_space *mapping,
918 loff_t pos, unsigned len, unsigned flags, 935 loff_t pos, unsigned len, unsigned flags,
919 struct page **pagep, void **fsdata) 936 struct page **pagep, void **fsdata)
@@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
931repeat: 948repeat:
932 err = f2fs_convert_inline_data(inode, pos + len); 949 err = f2fs_convert_inline_data(inode, pos + len);
933 if (err) 950 if (err)
934 return err; 951 goto fail;
935 952
936 page = grab_cache_page_write_begin(mapping, index, flags); 953 page = grab_cache_page_write_begin(mapping, index, flags);
937 if (!page) 954 if (!page) {
938 return -ENOMEM; 955 err = -ENOMEM;
956 goto fail;
957 }
939 958
940 /* to avoid latency during memory pressure */ 959 /* to avoid latency during memory pressure */
941 unlock_page(page); 960 unlock_page(page);
@@ -949,10 +968,9 @@ repeat:
949 set_new_dnode(&dn, inode, NULL, NULL, 0); 968 set_new_dnode(&dn, inode, NULL, NULL, 0);
950 err = f2fs_reserve_block(&dn, index); 969 err = f2fs_reserve_block(&dn, index);
951 f2fs_unlock_op(sbi); 970 f2fs_unlock_op(sbi);
952
953 if (err) { 971 if (err) {
954 f2fs_put_page(page, 0); 972 f2fs_put_page(page, 0);
955 return err; 973 goto fail;
956 } 974 }
957inline_data: 975inline_data:
958 lock_page(page); 976 lock_page(page);
@@ -982,19 +1000,20 @@ inline_data:
982 err = f2fs_read_inline_data(inode, page); 1000 err = f2fs_read_inline_data(inode, page);
983 if (err) { 1001 if (err) {
984 page_cache_release(page); 1002 page_cache_release(page);
985 return err; 1003 goto fail;
986 } 1004 }
987 } else { 1005 } else {
988 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 1006 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
989 READ_SYNC); 1007 READ_SYNC);
990 if (err) 1008 if (err)
991 return err; 1009 goto fail;
992 } 1010 }
993 1011
994 lock_page(page); 1012 lock_page(page);
995 if (unlikely(!PageUptodate(page))) { 1013 if (unlikely(!PageUptodate(page))) {
996 f2fs_put_page(page, 1); 1014 f2fs_put_page(page, 1);
997 return -EIO; 1015 err = -EIO;
1016 goto fail;
998 } 1017 }
999 if (unlikely(page->mapping != mapping)) { 1018 if (unlikely(page->mapping != mapping)) {
1000 f2fs_put_page(page, 1); 1019 f2fs_put_page(page, 1);
@@ -1005,6 +1024,9 @@ out:
1005 SetPageUptodate(page); 1024 SetPageUptodate(page);
1006 clear_cold_data(page); 1025 clear_cold_data(page);
1007 return 0; 1026 return 0;
1027fail:
1028 f2fs_write_failed(mapping, pos + len);
1029 return err;
1008} 1030}
1009 1031
1010static int f2fs_write_end(struct file *file, 1032static int f2fs_write_end(struct file *file,
@@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,
1016 1038
1017 trace_f2fs_write_end(inode, pos, len, copied); 1039 trace_f2fs_write_end(inode, pos, len, copied);
1018 1040
1019 SetPageUptodate(page);
1020 set_page_dirty(page); 1041 set_page_dirty(page);
1021 1042
1022 if (pos + copied > i_size_read(inode)) { 1043 if (pos + copied > i_size_read(inode)) {
@@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1050 struct iov_iter *iter, loff_t offset) 1071 struct iov_iter *iter, loff_t offset)
1051{ 1072{
1052 struct file *file = iocb->ki_filp; 1073 struct file *file = iocb->ki_filp;
1053 struct inode *inode = file->f_mapping->host; 1074 struct address_space *mapping = file->f_mapping;
1075 struct inode *inode = mapping->host;
1076 size_t count = iov_iter_count(iter);
1077 int err;
1054 1078
1055 /* Let buffer I/O handle the inline data case. */ 1079 /* Let buffer I/O handle the inline data case. */
1056 if (f2fs_has_inline_data(inode)) 1080 if (f2fs_has_inline_data(inode))
@@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1062 /* clear fsync mark to recover these blocks */ 1086 /* clear fsync mark to recover these blocks */
1063 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); 1087 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1064 1088
1065 return blockdev_direct_IO(rw, iocb, inode, iter, offset, 1089 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1066 get_data_block); 1090
1091 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
1092 if (err < 0 && (rw & WRITE))
1093 f2fs_write_failed(mapping, offset + count);
1094
1095 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
1096
1097 return err;
1067} 1098}
1068 1099
1069static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1100static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..a441ba33be11 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
167 si->cache_mem += npages << PAGE_CACHE_SHIFT; 167 si->cache_mem += npages << PAGE_CACHE_SHIFT;
168 npages = META_MAPPING(sbi)->nrpages; 168 npages = META_MAPPING(sbi)->nrpages;
169 si->cache_mem += npages << PAGE_CACHE_SHIFT; 169 si->cache_mem += npages << PAGE_CACHE_SHIFT;
170 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); 170 si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); 171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
172} 172}
173 173
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
345 345
346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); 346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
347 if (!f2fs_debugfs_root) 347 if (!f2fs_debugfs_root)
348 goto bail; 348 return;
349 349
350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, 350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
351 NULL, &stat_fops); 351 NULL, &stat_fops);
352 if (!file) 352 if (!file) {
353 goto free_debugfs_dir; 353 debugfs_remove(f2fs_debugfs_root);
354 354 f2fs_debugfs_root = NULL;
355 return; 355 }
356
357free_debugfs_dir:
358 debugfs_remove(f2fs_debugfs_root);
359
360bail:
361 f2fs_debugfs_root = NULL;
362 return;
363} 356}
364 357
365void f2fs_destroy_root_stats(void) 358void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a4addd72ebbd..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
77 return bidx; 77 return bidx;
78} 78}
79 79
80static bool early_match_name(const char *name, size_t namelen, 80static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
81 f2fs_hash_t namehash, struct f2fs_dir_entry *de) 81 struct f2fs_dir_entry *de)
82{ 82{
83 if (le16_to_cpu(de->name_len) != namelen) 83 if (le16_to_cpu(de->name_len) != namelen)
84 return false; 84 return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
90} 90}
91 91
92static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, 92static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
93 const char *name, size_t namelen, int *max_slots, 93 struct qstr *name, int *max_slots,
94 f2fs_hash_t namehash, struct page **res_page) 94 f2fs_hash_t namehash, struct page **res_page)
95{ 95{
96 struct f2fs_dir_entry *de; 96 struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
109 continue; 109 continue;
110 } 110 }
111 de = &dentry_blk->dentry[bit_pos]; 111 de = &dentry_blk->dentry[bit_pos];
112 if (early_match_name(name, namelen, namehash, de)) { 112 if (early_match_name(name->len, namehash, de)) {
113 if (!memcmp(dentry_blk->filename[bit_pos], 113 if (!memcmp(dentry_blk->filename[bit_pos],
114 name, namelen)) { 114 name->name,
115 name->len)) {
115 *res_page = dentry_page; 116 *res_page = dentry_page;
116 goto found; 117 goto found;
117 } 118 }
@@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
120 *max_slots = max_len; 121 *max_slots = max_len;
121 max_len = 0; 122 max_len = 0;
122 } 123 }
124
125 /*
126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs are occurred.
128 */
129 f2fs_bug_on(!de->name_len);
130
123 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
124 } 132 }
125 133
@@ -132,10 +140,10 @@ found:
132} 140}
133 141
134static struct f2fs_dir_entry *find_in_level(struct inode *dir, 142static struct f2fs_dir_entry *find_in_level(struct inode *dir,
135 unsigned int level, const char *name, size_t namelen, 143 unsigned int level, struct qstr *name,
136 f2fs_hash_t namehash, struct page **res_page) 144 f2fs_hash_t namehash, struct page **res_page)
137{ 145{
138 int s = GET_DENTRY_SLOTS(namelen); 146 int s = GET_DENTRY_SLOTS(name->len);
139 unsigned int nbucket, nblock; 147 unsigned int nbucket, nblock;
140 unsigned int bidx, end_block; 148 unsigned int bidx, end_block;
141 struct page *dentry_page; 149 struct page *dentry_page;
@@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
160 continue; 168 continue;
161 } 169 }
162 170
163 de = find_in_block(dentry_page, name, namelen, 171 de = find_in_block(dentry_page, name, &max_slots,
164 &max_slots, namehash, res_page); 172 namehash, res_page);
165 if (de) 173 if (de)
166 break; 174 break;
167 175
@@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
187struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, 195struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
188 struct qstr *child, struct page **res_page) 196 struct qstr *child, struct page **res_page)
189{ 197{
190 const char *name = child->name;
191 size_t namelen = child->len;
192 unsigned long npages = dir_blocks(dir); 198 unsigned long npages = dir_blocks(dir);
193 struct f2fs_dir_entry *de = NULL; 199 struct f2fs_dir_entry *de = NULL;
194 f2fs_hash_t name_hash; 200 f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
200 206
201 *res_page = NULL; 207 *res_page = NULL;
202 208
203 name_hash = f2fs_dentry_hash(name, namelen); 209 name_hash = f2fs_dentry_hash(child);
204 max_depth = F2FS_I(dir)->i_current_depth; 210 max_depth = F2FS_I(dir)->i_current_depth;
205 211
206 for (level = 0; level < max_depth; level++) { 212 for (level = 0; level < max_depth; level++) {
207 de = find_in_level(dir, level, name, 213 de = find_in_level(dir, level, child, name_hash, res_page);
208 namelen, name_hash, res_page);
209 if (de) 214 if (de)
210 break; 215 break;
211 } 216 }
@@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,
298 struct page *dentry_page; 303 struct page *dentry_page;
299 struct f2fs_dentry_block *dentry_blk; 304 struct f2fs_dentry_block *dentry_blk;
300 struct f2fs_dir_entry *de; 305 struct f2fs_dir_entry *de;
301 void *kaddr;
302 306
303 dentry_page = get_new_data_page(inode, page, 0, true); 307 dentry_page = get_new_data_page(inode, page, 0, true);
304 if (IS_ERR(dentry_page)) 308 if (IS_ERR(dentry_page))
305 return PTR_ERR(dentry_page); 309 return PTR_ERR(dentry_page);
306 310
307 kaddr = kmap_atomic(dentry_page); 311
308 dentry_blk = (struct f2fs_dentry_block *)kaddr; 312 dentry_blk = kmap_atomic(dentry_page);
309 313
310 de = &dentry_blk->dentry[0]; 314 de = &dentry_blk->dentry[0];
311 de->name_len = cpu_to_le16(1); 315 de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,
323 327
324 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); 328 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
325 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); 329 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
326 kunmap_atomic(kaddr); 330 kunmap_atomic(dentry_blk);
327 331
328 set_page_dirty(dentry_page); 332 set_page_dirty(dentry_page);
329 f2fs_put_page(dentry_page, 1); 333 f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,
333static struct page *init_inode_metadata(struct inode *inode, 337static struct page *init_inode_metadata(struct inode *inode,
334 struct inode *dir, const struct qstr *name) 338 struct inode *dir, const struct qstr *name)
335{ 339{
340 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
336 struct page *page; 341 struct page *page;
337 int err; 342 int err;
338 343
339 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 344 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
340 page = new_inode_page(inode, name); 345 page = new_inode_page(inode);
341 if (IS_ERR(page)) 346 if (IS_ERR(page))
342 return page; 347 return page;
343 348
@@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,
362 set_cold_node(inode, page); 367 set_cold_node(inode, page);
363 } 368 }
364 369
365 init_dent_inode(name, page); 370 if (name)
371 init_dent_inode(name, page);
366 372
367 /* 373 /*
368 * This file should be checkpointed during fsync. 374 * This file should be checkpointed during fsync.
@@ -370,6 +376,12 @@ static struct page *init_inode_metadata(struct inode *inode,
370 */ 376 */
371 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 377 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
372 file_lost_pino(inode); 378 file_lost_pino(inode);
379 /*
380 * If link the tmpfile to alias through linkat path,
381 * we should remove this inode from orphan list.
382 */
383 if (inode->i_nlink == 0)
384 remove_orphan_inode(sbi, inode->i_ino);
373 inc_nlink(inode); 385 inc_nlink(inode);
374 } 386 }
375 return page; 387 return page;
@@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
453 int err = 0; 465 int err = 0;
454 int i; 466 int i;
455 467
456 dentry_hash = f2fs_dentry_hash(name->name, name->len); 468 dentry_hash = f2fs_dentry_hash(name);
457 level = 0; 469 level = 0;
458 current_depth = F2FS_I(dir)->i_current_depth; 470 current_depth = F2FS_I(dir)->i_current_depth;
459 if (F2FS_I(dir)->chash == dentry_hash) { 471 if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@ fail:
529 return err; 541 return err;
530} 542}
531 543
544int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
545{
546 struct page *page;
547 int err = 0;
548
549 down_write(&F2FS_I(inode)->i_sem);
550 page = init_inode_metadata(inode, dir, NULL);
551 if (IS_ERR(page)) {
552 err = PTR_ERR(page);
553 goto fail;
554 }
555 /* we don't need to mark_inode_dirty now */
556 update_inode(inode, page);
557 f2fs_put_page(page, 1);
558
559 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
560fail:
561 up_write(&F2FS_I(inode)->i_sem);
562 return err;
563}
564
532/* 565/*
533 * It only removes the dentry from the dentry page,corresponding name 566 * It only removes the dentry from the dentry page,corresponding name
534 * entry in name page does not need to be touched during deletion. 567 * entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
541 struct address_space *mapping = page->mapping; 574 struct address_space *mapping = page->mapping;
542 struct inode *dir = mapping->host; 575 struct inode *dir = mapping->host;
543 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 576 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
544 void *kaddr = page_address(page);
545 int i; 577 int i;
546 578
547 lock_page(page); 579 lock_page(page);
548 f2fs_wait_on_page_writeback(page, DATA); 580 f2fs_wait_on_page_writeback(page, DATA);
549 581
550 dentry_blk = (struct f2fs_dentry_block *)kaddr; 582 dentry_blk = page_address(page);
551 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 583 bit_pos = dentry - dentry_blk->dentry;
552 for (i = 0; i < slots; i++) 584 for (i = 0; i < slots; i++)
553 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); 585 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
554 586
@@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)
603 unsigned long nblock = dir_blocks(dir); 635 unsigned long nblock = dir_blocks(dir);
604 636
605 for (bidx = 0; bidx < nblock; bidx++) { 637 for (bidx = 0; bidx < nblock; bidx++) {
606 void *kaddr;
607 dentry_page = get_lock_data_page(dir, bidx); 638 dentry_page = get_lock_data_page(dir, bidx);
608 if (IS_ERR(dentry_page)) { 639 if (IS_ERR(dentry_page)) {
609 if (PTR_ERR(dentry_page) == -ENOENT) 640 if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)
612 return false; 643 return false;
613 } 644 }
614 645
615 kaddr = kmap_atomic(dentry_page); 646
616 dentry_blk = (struct f2fs_dentry_block *)kaddr; 647 dentry_blk = kmap_atomic(dentry_page);
617 if (bidx == 0) 648 if (bidx == 0)
618 bit_pos = 2; 649 bit_pos = 2;
619 else 650 else
@@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)
621 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 652 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
622 NR_DENTRY_IN_BLOCK, 653 NR_DENTRY_IN_BLOCK,
623 bit_pos); 654 bit_pos);
624 kunmap_atomic(kaddr); 655 kunmap_atomic(dentry_blk);
625 656
626 f2fs_put_page(dentry_page, 1); 657 f2fs_put_page(dentry_page, 1);
627 658
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58df97e174d0..4dab5338a97a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
41#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100 42#define F2FS_MOUNT_INLINE_DATA 0x00000100
43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
44#define F2FS_MOUNT_NOBARRIER 0x00000400
44 45
45#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 46#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
46#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 47#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -99,8 +100,15 @@ enum {
99 META_SSA 100 META_SSA
100}; 101};
101 102
102/* for the list of orphan inodes */ 103/* for the list of ino */
103struct orphan_inode_entry { 104enum {
105 ORPHAN_INO, /* for orphan ino list */
106 APPEND_INO, /* for append ino list */
107 UPDATE_INO, /* for update ino list */
108 MAX_INO_ENTRY, /* max. list */
109};
110
111struct ino_entry {
104 struct list_head list; /* list head */ 112 struct list_head list; /* list head */
105 nid_t ino; /* inode number */ 113 nid_t ino; /* inode number */
106}; 114};
@@ -256,6 +264,8 @@ struct f2fs_nm_info {
256 unsigned int nat_cnt; /* the # of cached nat entries */ 264 unsigned int nat_cnt; /* the # of cached nat entries */
257 struct list_head nat_entries; /* cached nat entry list (clean) */ 265 struct list_head nat_entries; /* cached nat entry list (clean) */
258 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 266 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
267 struct list_head nat_entry_set; /* nat entry set list */
268 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
259 269
260 /* free node ids management */ 270 /* free node ids management */
261 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 271 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -442,14 +452,17 @@ struct f2fs_sb_info {
442 struct inode *meta_inode; /* cache meta blocks */ 452 struct inode *meta_inode; /* cache meta blocks */
443 struct mutex cp_mutex; /* checkpoint procedure lock */ 453 struct mutex cp_mutex; /* checkpoint procedure lock */
444 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 454 struct rw_semaphore cp_rwsem; /* blocking FS operations */
445 struct mutex node_write; /* locking node writes */ 455 struct rw_semaphore node_write; /* locking node writes */
446 struct mutex writepages; /* mutex for writepages() */ 456 struct mutex writepages; /* mutex for writepages() */
447 bool por_doing; /* recovery is doing or not */ 457 bool por_doing; /* recovery is doing or not */
448 wait_queue_head_t cp_wait; 458 wait_queue_head_t cp_wait;
449 459
450 /* for orphan inode management */ 460 /* for inode management */
451 struct list_head orphan_inode_list; /* orphan inode list */ 461 struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
452 spinlock_t orphan_inode_lock; /* for orphan inode list */ 462 spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
463 struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
464
465 /* for orphan inode, use 0'th array */
453 unsigned int n_orphans; /* # of orphan inodes */ 466 unsigned int n_orphans; /* # of orphan inodes */
454 unsigned int max_orphans; /* max orphan inodes */ 467 unsigned int max_orphans; /* max orphan inodes */
455 468
@@ -768,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
768 if (flag == NAT_BITMAP) 781 if (flag == NAT_BITMAP)
769 return &ckpt->sit_nat_version_bitmap; 782 return &ckpt->sit_nat_version_bitmap;
770 else 783 else
771 return ((unsigned char *)ckpt + F2FS_BLKSIZE); 784 return (unsigned char *)ckpt + F2FS_BLKSIZE;
772 } else { 785 } else {
773 offset = (flag == NAT_BITMAP) ? 786 offset = (flag == NAT_BITMAP) ?
774 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; 787 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -983,11 +996,15 @@ enum {
983 FI_NO_EXTENT, /* not to use the extent cache */ 996 FI_NO_EXTENT, /* not to use the extent cache */
984 FI_INLINE_XATTR, /* used for inline xattr */ 997 FI_INLINE_XATTR, /* used for inline xattr */
985 FI_INLINE_DATA, /* used for inline data*/ 998 FI_INLINE_DATA, /* used for inline data*/
999 FI_APPEND_WRITE, /* inode has appended data */
1000 FI_UPDATE_WRITE, /* inode has in-place-update data */
1001 FI_NEED_IPU, /* used fo ipu for fdatasync */
986}; 1002};
987 1003
988static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1004static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
989{ 1005{
990 set_bit(flag, &fi->flags); 1006 if (!test_bit(flag, &fi->flags))
1007 set_bit(flag, &fi->flags);
991} 1008}
992 1009
993static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) 1010static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -997,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
997 1014
998static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) 1015static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
999{ 1016{
1000 clear_bit(flag, &fi->flags); 1017 if (test_bit(flag, &fi->flags))
1018 clear_bit(flag, &fi->flags);
1001} 1019}
1002 1020
1003static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) 1021static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1136,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1136int update_dent_inode(struct inode *, const struct qstr *); 1154int update_dent_inode(struct inode *, const struct qstr *);
1137int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1155int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
1138void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); 1156void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
1157int f2fs_do_tmpfile(struct inode *, struct inode *);
1139int f2fs_make_empty(struct inode *, struct inode *); 1158int f2fs_make_empty(struct inode *, struct inode *);
1140bool f2fs_empty_dir(struct inode *); 1159bool f2fs_empty_dir(struct inode *);
1141 1160
@@ -1155,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
1155/* 1174/*
1156 * hash.c 1175 * hash.c
1157 */ 1176 */
1158f2fs_hash_t f2fs_dentry_hash(const char *, size_t); 1177f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
1159 1178
1160/* 1179/*
1161 * node.c 1180 * node.c
@@ -1173,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
1173int truncate_xattr_node(struct inode *, struct page *); 1192int truncate_xattr_node(struct inode *, struct page *);
1174int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1193int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
1175void remove_inode_page(struct inode *); 1194void remove_inode_page(struct inode *);
1176struct page *new_inode_page(struct inode *, const struct qstr *); 1195struct page *new_inode_page(struct inode *);
1177struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1196struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
1178void ra_node_page(struct f2fs_sb_info *, nid_t); 1197void ra_node_page(struct f2fs_sb_info *, nid_t);
1179struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 1198struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1185,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1185void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1204void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1186void recover_node_page(struct f2fs_sb_info *, struct page *, 1205void recover_node_page(struct f2fs_sb_info *, struct page *,
1187 struct f2fs_summary *, struct node_info *, block_t); 1206 struct f2fs_summary *, struct node_info *, block_t);
1207void recover_inline_xattr(struct inode *, struct page *);
1188bool recover_xattr_data(struct inode *, struct page *, block_t); 1208bool recover_xattr_data(struct inode *, struct page *, block_t);
1189int recover_inode_page(struct f2fs_sb_info *, struct page *); 1209int recover_inode_page(struct f2fs_sb_info *, struct page *);
1190int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1210int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1206,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
1206void invalidate_blocks(struct f2fs_sb_info *, block_t); 1226void invalidate_blocks(struct f2fs_sb_info *, block_t);
1207void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1227void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1208void clear_prefree_segments(struct f2fs_sb_info *); 1228void clear_prefree_segments(struct f2fs_sb_info *);
1209void discard_next_dnode(struct f2fs_sb_info *); 1229void discard_next_dnode(struct f2fs_sb_info *, block_t);
1210int npages_for_summary_flush(struct f2fs_sb_info *); 1230int npages_for_summary_flush(struct f2fs_sb_info *);
1211void allocate_new_segments(struct f2fs_sb_info *); 1231void allocate_new_segments(struct f2fs_sb_info *);
1212struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1232struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1240,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1240struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1260struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1241int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1261int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1242long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1262long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1263void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1264void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1265bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1243int acquire_orphan_inode(struct f2fs_sb_info *); 1266int acquire_orphan_inode(struct f2fs_sb_info *);
1244void release_orphan_inode(struct f2fs_sb_info *); 1267void release_orphan_inode(struct f2fs_sb_info *);
1245void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1268void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1251,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);
1251void remove_dirty_dir_inode(struct inode *); 1274void remove_dirty_dir_inode(struct inode *);
1252void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1275void sync_dirty_dir_inodes(struct f2fs_sb_info *);
1253void write_checkpoint(struct f2fs_sb_info *, bool); 1276void write_checkpoint(struct f2fs_sb_info *, bool);
1254void init_orphan_info(struct f2fs_sb_info *); 1277void init_ino_entry_info(struct f2fs_sb_info *);
1255int __init create_checkpoint_caches(void); 1278int __init create_checkpoint_caches(void);
1256void destroy_checkpoint_caches(void); 1279void destroy_checkpoint_caches(void);
1257 1280
@@ -1295,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
1295struct f2fs_stat_info { 1318struct f2fs_stat_info {
1296 struct list_head stat_list; 1319 struct list_head stat_list;
1297 struct f2fs_sb_info *sbi; 1320 struct f2fs_sb_info *sbi;
1298 struct mutex stat_lock;
1299 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1321 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1300 int main_area_segs, main_area_sections, main_area_zones; 1322 int main_area_segs, main_area_sections, main_area_zones;
1301 int hit_ext, total_ext; 1323 int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d8b96275092..208f1a9bd569 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
127 return 0; 127 return 0;
128 128
129 trace_f2fs_sync_file_enter(inode); 129 trace_f2fs_sync_file_enter(inode);
130
131 /* if fdatasync is triggered, let's do in-place-update */
132 if (datasync)
133 set_inode_flag(fi, FI_NEED_IPU);
134
130 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 135 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
136 if (datasync)
137 clear_inode_flag(fi, FI_NEED_IPU);
131 if (ret) { 138 if (ret) {
132 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 139 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
133 return ret; 140 return ret;
134 } 141 }
135 142
143 /*
144 * if there is no written data, don't waste time to write recovery info.
145 */
146 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
147 !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
148 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
149 exist_written_data(sbi, inode->i_ino, UPDATE_INO))
150 goto flush_out;
151 goto out;
152 }
153
136 /* guarantee free sections for fsync */ 154 /* guarantee free sections for fsync */
137 f2fs_balance_fs(sbi); 155 f2fs_balance_fs(sbi);
138 156
@@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
188 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 206 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
189 if (ret) 207 if (ret)
190 goto out; 208 goto out;
209
210 /* once recovery info is written, don't need to tack this */
211 remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
212 clear_inode_flag(fi, FI_APPEND_WRITE);
213flush_out:
214 remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
215 clear_inode_flag(fi, FI_UPDATE_WRITE);
191 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 216 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
192 } 217 }
193out: 218out:
@@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
206 231
207 /* find first dirty page index */ 232 /* find first dirty page index */
208 pagevec_init(&pvec, 0); 233 pagevec_init(&pvec, 0);
209 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); 234 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
210 pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; 235 PAGECACHE_TAG_DIRTY, 1);
236 pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
211 pagevec_release(&pvec); 237 pagevec_release(&pvec);
212 return pgofs; 238 return pgofs;
213} 239}
@@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
272 } 298 }
273 } 299 }
274 300
275 end_offset = IS_INODE(dn.node_page) ? 301 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
276 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
277 302
278 /* find data/hole in dnode block */ 303 /* find data/hole in dnode block */
279 for (; dn.ofs_in_node < end_offset; 304 for (; dn.ofs_in_node < end_offset;
@@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
380 return; 405 return;
381 406
382 lock_page(page); 407 lock_page(page);
383 if (unlikely(page->mapping != inode->i_mapping)) { 408 if (unlikely(!PageUptodate(page) ||
384 f2fs_put_page(page, 1); 409 page->mapping != inode->i_mapping))
385 return; 410 goto out;
386 } 411
387 f2fs_wait_on_page_writeback(page, DATA); 412 f2fs_wait_on_page_writeback(page, DATA);
388 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 413 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
389 set_page_dirty(page); 414 set_page_dirty(page);
415
416out:
390 f2fs_put_page(page, 1); 417 f2fs_put_page(page, 1);
391} 418}
392 419
@@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
645 loff_t off_start, off_end; 672 loff_t off_start, off_end;
646 int ret = 0; 673 int ret = 0;
647 674
675 f2fs_balance_fs(sbi);
676
648 ret = inode_newsize_ok(inode, (len + offset)); 677 ret = inode_newsize_ok(inode, (len + offset));
649 if (ret) 678 if (ret)
650 return ret; 679 return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b90dbe55403a..d7947d90ccc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
186static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 186static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
187{ 187{
188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
189 unsigned int hint = 0;
190 unsigned int secno; 189 unsigned int secno;
191 190
192 /* 191 /*
@@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
194 * selected by background GC before. 193 * selected by background GC before.
195 * Those segments guarantee they have small valid blocks. 194 * Those segments guarantee they have small valid blocks.
196 */ 195 */
197next: 196 for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
198 secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
199 if (secno < TOTAL_SECS(sbi)) {
200 if (sec_usage_check(sbi, secno)) 197 if (sec_usage_check(sbi, secno))
201 goto next; 198 continue;
202 clear_bit(secno, dirty_i->victim_secmap); 199 clear_bit(secno, dirty_i->victim_secmap);
203 return secno * sbi->segs_per_sec; 200 return secno * sbi->segs_per_sec;
204 } 201 }
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
69 *buf++ = pad; 69 *buf++ = pad;
70} 70}
71 71
72f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) 72f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
73{ 73{
74 __u32 hash; 74 __u32 hash;
75 f2fs_hash_t f2fs_hash; 75 f2fs_hash_t f2fs_hash;
76 const char *p; 76 const char *p;
77 __u32 in[8], buf[4]; 77 __u32 in[8], buf[4];
78 const char *name = name_info->name;
79 size_t len = name_info->len;
78 80
79 if ((len <= 2) && (name[0] == '.') && 81 if ((len <= 2) && (name[0] == '.') &&
80 (name[1] == '.' || name[1] == '\0')) 82 (name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba5228c197..5beeccef9ae1 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,
172 stat_inc_inline_inode(inode); 172 stat_inc_inline_inode(inode);
173 } 173 }
174 174
175 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
175 sync_inode_page(&dn); 176 sync_inode_page(&dn);
176 f2fs_put_dnode(&dn); 177 f2fs_put_dnode(&dn);
177 178
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2cf6962f6cc8..2c39999f3868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -267,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
267void f2fs_evict_inode(struct inode *inode) 267void f2fs_evict_inode(struct inode *inode)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
270 271
271 trace_f2fs_evict_inode(inode); 272 trace_f2fs_evict_inode(inode);
272 truncate_inode_pages_final(&inode->i_data); 273 truncate_inode_pages_final(&inode->i_data);
273 274
274 if (inode->i_ino == F2FS_NODE_INO(sbi) || 275 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
275 inode->i_ino == F2FS_META_INO(sbi)) 276 inode->i_ino == F2FS_META_INO(sbi))
276 goto no_delete; 277 goto out_clear;
277 278
278 f2fs_bug_on(get_dirty_dents(inode)); 279 f2fs_bug_on(get_dirty_dents(inode));
279 remove_dirty_dir_inode(inode); 280 remove_dirty_dir_inode(inode);
@@ -295,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)
295 296
296 sb_end_intwrite(inode->i_sb); 297 sb_end_intwrite(inode->i_sb);
297no_delete: 298no_delete:
298 clear_inode(inode);
299 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 299 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
300 if (xnid)
301 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
302 if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
303 add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
304 if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
305 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
306out_clear:
307 clear_inode(inode);
300} 308}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a6bdddc33ce2..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dcache.h>
16 17
17#include "f2fs.h" 18#include "f2fs.h"
18#include "node.h" 19#include "node.h"
@@ -22,14 +23,13 @@
22 23
23static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
24{ 25{
25 struct super_block *sb = dir->i_sb; 26 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
26 struct f2fs_sb_info *sbi = F2FS_SB(sb);
27 nid_t ino; 27 nid_t ino;
28 struct inode *inode; 28 struct inode *inode;
29 bool nid_free = false; 29 bool nid_free = false;
30 int err; 30 int err;
31 31
32 inode = new_inode(sb); 32 inode = new_inode(dir->i_sb);
33 if (!inode) 33 if (!inode)
34 return ERR_PTR(-ENOMEM); 34 return ERR_PTR(-ENOMEM);
35 35
@@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
103 bool excl) 103 bool excl)
104{ 104{
105 struct super_block *sb = dir->i_sb; 105 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
106 struct f2fs_sb_info *sbi = F2FS_SB(sb);
107 struct inode *inode; 106 struct inode *inode;
108 nid_t ino = 0; 107 nid_t ino = 0;
109 int err; 108 int err;
@@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
146 struct dentry *dentry) 145 struct dentry *dentry)
147{ 146{
148 struct inode *inode = old_dentry->d_inode; 147 struct inode *inode = old_dentry->d_inode;
149 struct super_block *sb = dir->i_sb; 148 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
150 struct f2fs_sb_info *sbi = F2FS_SB(sb);
151 int err; 149 int err;
152 150
153 f2fs_balance_fs(sbi); 151 f2fs_balance_fs(sbi);
@@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
207 205
208static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 206static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
209{ 207{
210 struct super_block *sb = dir->i_sb; 208 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
211 struct f2fs_sb_info *sbi = F2FS_SB(sb);
212 struct inode *inode = dentry->d_inode; 209 struct inode *inode = dentry->d_inode;
213 struct f2fs_dir_entry *de; 210 struct f2fs_dir_entry *de;
214 struct page *page; 211 struct page *page;
@@ -242,8 +239,7 @@ fail:
242static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 239static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
243 const char *symname) 240 const char *symname)
244{ 241{
245 struct super_block *sb = dir->i_sb; 242 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
246 struct f2fs_sb_info *sbi = F2FS_SB(sb);
247 struct inode *inode; 243 struct inode *inode;
248 size_t symlen = strlen(symname) + 1; 244 size_t symlen = strlen(symname) + 1;
249 int err; 245 int err;
@@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
330static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 326static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
331 umode_t mode, dev_t rdev) 327 umode_t mode, dev_t rdev)
332{ 328{
333 struct super_block *sb = dir->i_sb; 329 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
334 struct f2fs_sb_info *sbi = F2FS_SB(sb);
335 struct inode *inode; 330 struct inode *inode;
336 int err = 0; 331 int err = 0;
337 332
@@ -369,8 +364,7 @@ out:
369static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 364static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
370 struct inode *new_dir, struct dentry *new_dentry) 365 struct inode *new_dir, struct dentry *new_dentry)
371{ 366{
372 struct super_block *sb = old_dir->i_sb; 367 struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
373 struct f2fs_sb_info *sbi = F2FS_SB(sb);
374 struct inode *old_inode = old_dentry->d_inode; 368 struct inode *old_inode = old_dentry->d_inode;
375 struct inode *new_inode = new_dentry->d_inode; 369 struct inode *new_inode = new_dentry->d_inode;
376 struct page *old_dir_page; 370 struct page *old_dir_page;
@@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
393 goto out_old; 387 goto out_old;
394 } 388 }
395 389
396 f2fs_lock_op(sbi);
397
398 if (new_inode) { 390 if (new_inode) {
399 391
400 err = -ENOTEMPTY; 392 err = -ENOTEMPTY;
@@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
407 if (!new_entry) 399 if (!new_entry)
408 goto out_dir; 400 goto out_dir;
409 401
402 f2fs_lock_op(sbi);
403
410 err = acquire_orphan_inode(sbi); 404 err = acquire_orphan_inode(sbi);
411 if (err) 405 if (err)
412 goto put_out_dir; 406 goto put_out_dir;
@@ -435,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
435 update_inode_page(old_inode); 429 update_inode_page(old_inode);
436 update_inode_page(new_inode); 430 update_inode_page(new_inode);
437 } else { 431 } else {
432 f2fs_lock_op(sbi);
433
438 err = f2fs_add_link(new_dentry, old_inode); 434 err = f2fs_add_link(new_dentry, old_inode);
439 if (err) 435 if (err) {
436 f2fs_unlock_op(sbi);
440 goto out_dir; 437 goto out_dir;
438 }
441 439
442 if (old_dir_entry) { 440 if (old_dir_entry) {
443 inc_nlink(new_dir); 441 inc_nlink(new_dir);
@@ -472,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
472 return 0; 470 return 0;
473 471
474put_out_dir: 472put_out_dir:
473 f2fs_unlock_op(sbi);
475 kunmap(new_page); 474 kunmap(new_page);
476 f2fs_put_page(new_page, 0); 475 f2fs_put_page(new_page, 0);
477out_dir: 476out_dir:
@@ -479,7 +478,151 @@ out_dir:
479 kunmap(old_dir_page); 478 kunmap(old_dir_page);
480 f2fs_put_page(old_dir_page, 0); 479 f2fs_put_page(old_dir_page, 0);
481 } 480 }
481out_old:
482 kunmap(old_page);
483 f2fs_put_page(old_page, 0);
484out:
485 return err;
486}
487
488static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
489 struct inode *new_dir, struct dentry *new_dentry)
490{
491 struct super_block *sb = old_dir->i_sb;
492 struct f2fs_sb_info *sbi = F2FS_SB(sb);
493 struct inode *old_inode = old_dentry->d_inode;
494 struct inode *new_inode = new_dentry->d_inode;
495 struct page *old_dir_page, *new_dir_page;
496 struct page *old_page, *new_page;
497 struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
498 struct f2fs_dir_entry *old_entry, *new_entry;
499 int old_nlink = 0, new_nlink = 0;
500 int err = -ENOENT;
501
502 f2fs_balance_fs(sbi);
503
504 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
505 if (!old_entry)
506 goto out;
507
508 new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
509 if (!new_entry)
510 goto out_old;
511
512 /* prepare for updating ".." directory entry info later */
513 if (old_dir != new_dir) {
514 if (S_ISDIR(old_inode->i_mode)) {
515 err = -EIO;
516 old_dir_entry = f2fs_parent_dir(old_inode,
517 &old_dir_page);
518 if (!old_dir_entry)
519 goto out_new;
520 }
521
522 if (S_ISDIR(new_inode->i_mode)) {
523 err = -EIO;
524 new_dir_entry = f2fs_parent_dir(new_inode,
525 &new_dir_page);
526 if (!new_dir_entry)
527 goto out_old_dir;
528 }
529 }
530
531 /*
532 * If cross rename between file and directory those are not
533 * in the same directory, we will inc nlink of file's parent
534 * later, so we should check upper boundary of its nlink.
535 */
536 if ((!old_dir_entry || !new_dir_entry) &&
537 old_dir_entry != new_dir_entry) {
538 old_nlink = old_dir_entry ? -1 : 1;
539 new_nlink = -old_nlink;
540 err = -EMLINK;
541 if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
542 (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
543 goto out_new_dir;
544 }
545
546 f2fs_lock_op(sbi);
547
548 err = update_dent_inode(old_inode, &new_dentry->d_name);
549 if (err)
550 goto out_unlock;
551
552 err = update_dent_inode(new_inode, &old_dentry->d_name);
553 if (err)
554 goto out_undo;
555
556 /* update ".." directory entry info of old dentry */
557 if (old_dir_entry)
558 f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
559
560 /* update ".." directory entry info of new dentry */
561 if (new_dir_entry)
562 f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
563
564 /* update directory entry info of old dir inode */
565 f2fs_set_link(old_dir, old_entry, old_page, new_inode);
566
567 down_write(&F2FS_I(old_inode)->i_sem);
568 file_lost_pino(old_inode);
569 up_write(&F2FS_I(old_inode)->i_sem);
570
571 update_inode_page(old_inode);
572
573 old_dir->i_ctime = CURRENT_TIME;
574 if (old_nlink) {
575 down_write(&F2FS_I(old_dir)->i_sem);
576 if (old_nlink < 0)
577 drop_nlink(old_dir);
578 else
579 inc_nlink(old_dir);
580 up_write(&F2FS_I(old_dir)->i_sem);
581 }
582 mark_inode_dirty(old_dir);
583 update_inode_page(old_dir);
584
585 /* update directory entry info of new dir inode */
586 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
587
588 down_write(&F2FS_I(new_inode)->i_sem);
589 file_lost_pino(new_inode);
590 up_write(&F2FS_I(new_inode)->i_sem);
591
592 update_inode_page(new_inode);
593
594 new_dir->i_ctime = CURRENT_TIME;
595 if (new_nlink) {
596 down_write(&F2FS_I(new_dir)->i_sem);
597 if (new_nlink < 0)
598 drop_nlink(new_dir);
599 else
600 inc_nlink(new_dir);
601 up_write(&F2FS_I(new_dir)->i_sem);
602 }
603 mark_inode_dirty(new_dir);
604 update_inode_page(new_dir);
605
606 f2fs_unlock_op(sbi);
607 return 0;
608out_undo:
609 /* Still we may fail to recover name info of f2fs_inode here */
610 update_dent_inode(old_inode, &old_dentry->d_name);
611out_unlock:
482 f2fs_unlock_op(sbi); 612 f2fs_unlock_op(sbi);
613out_new_dir:
614 if (new_dir_entry) {
615 kunmap(new_dir_page);
616 f2fs_put_page(new_dir_page, 0);
617 }
618out_old_dir:
619 if (old_dir_entry) {
620 kunmap(old_dir_page);
621 f2fs_put_page(old_dir_page, 0);
622 }
623out_new:
624 kunmap(new_page);
625 f2fs_put_page(new_page, 0);
483out_old: 626out_old:
484 kunmap(old_page); 627 kunmap(old_page);
485 f2fs_put_page(old_page, 0); 628 f2fs_put_page(old_page, 0);
@@ -487,6 +630,71 @@ out:
487 return err; 630 return err;
488} 631}
489 632
633static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
634 struct inode *new_dir, struct dentry *new_dentry,
635 unsigned int flags)
636{
637 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
638 return -EINVAL;
639
640 if (flags & RENAME_EXCHANGE) {
641 return f2fs_cross_rename(old_dir, old_dentry,
642 new_dir, new_dentry);
643 }
644 /*
645 * VFS has already handled the new dentry existence case,
646 * here, we just deal with "RENAME_NOREPLACE" as regular rename.
647 */
648 return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
649}
650
651static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
652{
653 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
654 struct inode *inode;
655 int err;
656
657 inode = f2fs_new_inode(dir, mode);
658 if (IS_ERR(inode))
659 return PTR_ERR(inode);
660
661 inode->i_op = &f2fs_file_inode_operations;
662 inode->i_fop = &f2fs_file_operations;
663 inode->i_mapping->a_ops = &f2fs_dblock_aops;
664
665 f2fs_lock_op(sbi);
666 err = acquire_orphan_inode(sbi);
667 if (err)
668 goto out;
669
670 err = f2fs_do_tmpfile(inode, dir);
671 if (err)
672 goto release_out;
673
674 /*
675 * add this non-linked tmpfile to orphan list, in this way we could
676 * remove all unused data of tmpfile after abnormal power-off.
677 */
678 add_orphan_inode(sbi, inode->i_ino);
679 f2fs_unlock_op(sbi);
680
681 alloc_nid_done(sbi, inode->i_ino);
682 d_tmpfile(dentry, inode);
683 unlock_new_inode(inode);
684 return 0;
685
686release_out:
687 release_orphan_inode(sbi);
688out:
689 f2fs_unlock_op(sbi);
690 clear_nlink(inode);
691 unlock_new_inode(inode);
692 make_bad_inode(inode);
693 iput(inode);
694 alloc_nid_failed(sbi, inode->i_ino);
695 return err;
696}
697
490const struct inode_operations f2fs_dir_inode_operations = { 698const struct inode_operations f2fs_dir_inode_operations = {
491 .create = f2fs_create, 699 .create = f2fs_create,
492 .lookup = f2fs_lookup, 700 .lookup = f2fs_lookup,
@@ -497,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {
497 .rmdir = f2fs_rmdir, 705 .rmdir = f2fs_rmdir,
498 .mknod = f2fs_mknod, 706 .mknod = f2fs_mknod,
499 .rename = f2fs_rename, 707 .rename = f2fs_rename,
708 .rename2 = f2fs_rename2,
709 .tmpfile = f2fs_tmpfile,
500 .getattr = f2fs_getattr, 710 .getattr = f2fs_getattr,
501 .setattr = f2fs_setattr, 711 .setattr = f2fs_setattr,
502 .get_acl = f2fs_get_acl, 712 .get_acl = f2fs_get_acl,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4b697ccc9b0c..d3d90d284631 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
25 25
26static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
27static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
28static struct kmem_cache *nat_entry_set_slab;
28 29
29bool available_free_memory(struct f2fs_sb_info *sbi, int type) 30bool available_free_memory(struct f2fs_sb_info *sbi, int type)
30{ 31{
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
90 91
91 /* get current nat block page with lock */ 92 /* get current nat block page with lock */
92 src_page = get_meta_page(sbi, src_off); 93 src_page = get_meta_page(sbi, src_off);
93
94 /* Dirty src_page means that it is already the new target NAT page. */
95 if (PageDirty(src_page))
96 return src_page;
97
98 dst_page = grab_meta_page(sbi, dst_off); 94 dst_page = grab_meta_page(sbi, dst_off);
95 f2fs_bug_on(PageDirty(src_page));
99 96
100 src_addr = page_address(src_page); 97 src_addr = page_address(src_page);
101 dst_addr = page_address(dst_page); 98 dst_addr = page_address(dst_page);
@@ -845,7 +842,7 @@ void remove_inode_page(struct inode *inode)
845 truncate_node(&dn); 842 truncate_node(&dn);
846} 843}
847 844
848struct page *new_inode_page(struct inode *inode, const struct qstr *name) 845struct page *new_inode_page(struct inode *inode)
849{ 846{
850 struct dnode_of_data dn; 847 struct dnode_of_data dn;
851 848
@@ -1234,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
1234 if (wbc->for_reclaim) 1231 if (wbc->for_reclaim)
1235 goto redirty_out; 1232 goto redirty_out;
1236 1233
1237 mutex_lock(&sbi->node_write); 1234 down_read(&sbi->node_write);
1238 set_page_writeback(page); 1235 set_page_writeback(page);
1239 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1236 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1240 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); 1237 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1241 dec_page_count(sbi, F2FS_DIRTY_NODES); 1238 dec_page_count(sbi, F2FS_DIRTY_NODES);
1242 mutex_unlock(&sbi->node_write); 1239 up_read(&sbi->node_write);
1243 unlock_page(page); 1240 unlock_page(page);
1244 return 0; 1241 return 0;
1245 1242
@@ -1552,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1552 clear_node_page_dirty(page); 1549 clear_node_page_dirty(page);
1553} 1550}
1554 1551
1555static void recover_inline_xattr(struct inode *inode, struct page *page) 1552void recover_inline_xattr(struct inode *inode, struct page *page)
1556{ 1553{
1557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1558 void *src_addr, *dst_addr; 1555 void *src_addr, *dst_addr;
@@ -1591,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1591 nid_t new_xnid = nid_of_node(page); 1588 nid_t new_xnid = nid_of_node(page);
1592 struct node_info ni; 1589 struct node_info ni;
1593 1590
1594 recover_inline_xattr(inode, page);
1595
1596 if (!f2fs_has_xattr_block(ofs_of_node(page))) 1591 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1597 return false; 1592 return false;
1598 1593
@@ -1744,7 +1739,90 @@ skip:
1744 return err; 1739 return err;
1745} 1740}
1746 1741
1747static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1742static struct nat_entry_set *grab_nat_entry_set(void)
1743{
1744 struct nat_entry_set *nes =
1745 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1746
1747 nes->entry_cnt = 0;
1748 INIT_LIST_HEAD(&nes->set_list);
1749 INIT_LIST_HEAD(&nes->entry_list);
1750 return nes;
1751}
1752
1753static void release_nat_entry_set(struct nat_entry_set *nes,
1754 struct f2fs_nm_info *nm_i)
1755{
1756 f2fs_bug_on(!list_empty(&nes->entry_list));
1757
1758 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1759 list_del(&nes->set_list);
1760 kmem_cache_free(nat_entry_set_slab, nes);
1761}
1762
1763static void adjust_nat_entry_set(struct nat_entry_set *nes,
1764 struct list_head *head)
1765{
1766 struct nat_entry_set *next = nes;
1767
1768 if (list_is_last(&nes->set_list, head))
1769 return;
1770
1771 list_for_each_entry_continue(next, head, set_list)
1772 if (nes->entry_cnt <= next->entry_cnt)
1773 break;
1774
1775 list_move_tail(&nes->set_list, &next->set_list);
1776}
1777
1778static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1779{
1780 struct nat_entry_set *nes;
1781 nid_t start_nid = START_NID(ne->ni.nid);
1782
1783 list_for_each_entry(nes, head, set_list) {
1784 if (nes->start_nid == start_nid) {
1785 list_move_tail(&ne->list, &nes->entry_list);
1786 nes->entry_cnt++;
1787 adjust_nat_entry_set(nes, head);
1788 return;
1789 }
1790 }
1791
1792 nes = grab_nat_entry_set();
1793
1794 nes->start_nid = start_nid;
1795 list_move_tail(&ne->list, &nes->entry_list);
1796 nes->entry_cnt++;
1797 list_add(&nes->set_list, head);
1798}
1799
1800static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1801{
1802 struct f2fs_nm_info *nm_i = NM_I(sbi);
1803 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1804 struct list_head *set_list = &nm_i->nat_entry_set;
1805 struct nat_entry *ne, *tmp;
1806
1807 write_lock(&nm_i->nat_tree_lock);
1808 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1809 if (nat_get_blkaddr(ne) == NEW_ADDR)
1810 continue;
1811 add_nat_entry(ne, set_list);
1812 nm_i->dirty_nat_cnt++;
1813 }
1814 write_unlock(&nm_i->nat_tree_lock);
1815}
1816
1817static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1818{
1819 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1820 return true;
1821 else
1822 return false;
1823}
1824
1825static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1748{ 1826{
1749 struct f2fs_nm_info *nm_i = NM_I(sbi); 1827 struct f2fs_nm_info *nm_i = NM_I(sbi);
1750 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1828 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1752 int i; 1830 int i;
1753 1831
1754 mutex_lock(&curseg->curseg_mutex); 1832 mutex_lock(&curseg->curseg_mutex);
1755
1756 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1757 mutex_unlock(&curseg->curseg_mutex);
1758 return false;
1759 }
1760
1761 for (i = 0; i < nats_in_cursum(sum); i++) { 1833 for (i = 0; i < nats_in_cursum(sum); i++) {
1762 struct nat_entry *ne; 1834 struct nat_entry *ne;
1763 struct f2fs_nat_entry raw_ne; 1835 struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1767retry: 1839retry:
1768 write_lock(&nm_i->nat_tree_lock); 1840 write_lock(&nm_i->nat_tree_lock);
1769 ne = __lookup_nat_cache(nm_i, nid); 1841 ne = __lookup_nat_cache(nm_i, nid);
1770 if (ne) { 1842 if (ne)
1771 __set_nat_cache_dirty(nm_i, ne); 1843 goto found;
1772 write_unlock(&nm_i->nat_tree_lock); 1844
1773 continue;
1774 }
1775 ne = grab_nat_entry(nm_i, nid); 1845 ne = grab_nat_entry(nm_i, nid);
1776 if (!ne) { 1846 if (!ne) {
1777 write_unlock(&nm_i->nat_tree_lock); 1847 write_unlock(&nm_i->nat_tree_lock);
1778 goto retry; 1848 goto retry;
1779 } 1849 }
1780 node_info_from_raw_nat(&ne->ni, &raw_ne); 1850 node_info_from_raw_nat(&ne->ni, &raw_ne);
1851found:
1781 __set_nat_cache_dirty(nm_i, ne); 1852 __set_nat_cache_dirty(nm_i, ne);
1782 write_unlock(&nm_i->nat_tree_lock); 1853 write_unlock(&nm_i->nat_tree_lock);
1783 } 1854 }
1784 update_nats_in_cursum(sum, -i); 1855 update_nats_in_cursum(sum, -i);
1785 mutex_unlock(&curseg->curseg_mutex); 1856 mutex_unlock(&curseg->curseg_mutex);
1786 return true;
1787} 1857}
1788 1858
1789/* 1859/*
@@ -1794,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1794 struct f2fs_nm_info *nm_i = NM_I(sbi); 1864 struct f2fs_nm_info *nm_i = NM_I(sbi);
1795 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1865 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1796 struct f2fs_summary_block *sum = curseg->sum_blk; 1866 struct f2fs_summary_block *sum = curseg->sum_blk;
1797 struct nat_entry *ne, *cur; 1867 struct nat_entry_set *nes, *tmp;
1798 struct page *page = NULL; 1868 struct list_head *head = &nm_i->nat_entry_set;
1799 struct f2fs_nat_block *nat_blk = NULL; 1869 bool to_journal = true;
1800 nid_t start_nid = 0, end_nid = 0;
1801 bool flushed;
1802 1870
1803 flushed = flush_nats_in_journal(sbi); 1871 /* merge nat entries of dirty list to nat entry set temporarily */
1804 1872 merge_nats_in_set(sbi);
1805 if (!flushed)
1806 mutex_lock(&curseg->curseg_mutex);
1807
1808 /* 1) flush dirty nat caches */
1809 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1810 nid_t nid;
1811 struct f2fs_nat_entry raw_ne;
1812 int offset = -1;
1813
1814 if (nat_get_blkaddr(ne) == NEW_ADDR)
1815 continue;
1816 1873
1817 nid = nat_get_nid(ne); 1874 /*
1875 * if there are no enough space in journal to store dirty nat
1876 * entries, remove all entries from journal and merge them
1877 * into nat entry set.
1878 */
1879 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
1880 remove_nats_in_journal(sbi);
1818 1881
1819 if (flushed) 1882 /*
1820 goto to_nat_page; 1883 * merge nat entries of dirty list to nat entry set temporarily
1884 */
1885 merge_nats_in_set(sbi);
1886 }
1821 1887
1822 /* if there is room for nat enries in curseg->sumpage */ 1888 if (!nm_i->dirty_nat_cnt)
1823 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1889 return;
1824 if (offset >= 0) {
1825 raw_ne = nat_in_journal(sum, offset);
1826 goto flush_now;
1827 }
1828to_nat_page:
1829 if (!page || (start_nid > nid || nid > end_nid)) {
1830 if (page) {
1831 f2fs_put_page(page, 1);
1832 page = NULL;
1833 }
1834 start_nid = START_NID(nid);
1835 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1836 1890
1837 /* 1891 /*
1838 * get nat block with dirty flag, increased reference 1892 * there are two steps to flush nat entries:
1839 * count, mapped and lock 1893 * #1, flush nat entries to journal in current hot data summary block.
1840 */ 1894 * #2, flush nat entries to nat page.
1895 */
1896 list_for_each_entry_safe(nes, tmp, head, set_list) {
1897 struct f2fs_nat_block *nat_blk;
1898 struct nat_entry *ne, *cur;
1899 struct page *page;
1900 nid_t start_nid = nes->start_nid;
1901
1902 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
1903 to_journal = false;
1904
1905 if (to_journal) {
1906 mutex_lock(&curseg->curseg_mutex);
1907 } else {
1841 page = get_next_nat_page(sbi, start_nid); 1908 page = get_next_nat_page(sbi, start_nid);
1842 nat_blk = page_address(page); 1909 nat_blk = page_address(page);
1910 f2fs_bug_on(!nat_blk);
1843 } 1911 }
1844 1912
1845 f2fs_bug_on(!nat_blk); 1913 /* flush dirty nats in nat entry set */
1846 raw_ne = nat_blk->entries[nid - start_nid]; 1914 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
1847flush_now: 1915 struct f2fs_nat_entry *raw_ne;
1848 raw_nat_from_node_info(&raw_ne, &ne->ni); 1916 nid_t nid = nat_get_nid(ne);
1849 1917 int offset;
1850 if (offset < 0) { 1918
1851 nat_blk->entries[nid - start_nid] = raw_ne; 1919 if (to_journal) {
1852 } else { 1920 offset = lookup_journal_in_cursum(sum,
1853 nat_in_journal(sum, offset) = raw_ne; 1921 NAT_JOURNAL, nid, 1);
1854 nid_in_journal(sum, offset) = cpu_to_le32(nid); 1922 f2fs_bug_on(offset < 0);
1855 } 1923 raw_ne = &nat_in_journal(sum, offset);
1924 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1925 } else {
1926 raw_ne = &nat_blk->entries[nid - start_nid];
1927 }
1928 raw_nat_from_node_info(raw_ne, &ne->ni);
1856 1929
1857 if (nat_get_blkaddr(ne) == NULL_ADDR && 1930 if (nat_get_blkaddr(ne) == NULL_ADDR &&
1858 add_free_nid(sbi, nid, false) <= 0) { 1931 add_free_nid(sbi, nid, false) <= 0) {
1859 write_lock(&nm_i->nat_tree_lock); 1932 write_lock(&nm_i->nat_tree_lock);
1860 __del_from_nat_cache(nm_i, ne); 1933 __del_from_nat_cache(nm_i, ne);
1861 write_unlock(&nm_i->nat_tree_lock); 1934 write_unlock(&nm_i->nat_tree_lock);
1862 } else { 1935 } else {
1863 write_lock(&nm_i->nat_tree_lock); 1936 write_lock(&nm_i->nat_tree_lock);
1864 __clear_nat_cache_dirty(nm_i, ne); 1937 __clear_nat_cache_dirty(nm_i, ne);
1865 write_unlock(&nm_i->nat_tree_lock); 1938 write_unlock(&nm_i->nat_tree_lock);
1939 }
1866 } 1940 }
1941
1942 if (to_journal)
1943 mutex_unlock(&curseg->curseg_mutex);
1944 else
1945 f2fs_put_page(page, 1);
1946
1947 release_nat_entry_set(nes, nm_i);
1867 } 1948 }
1868 if (!flushed) 1949
1869 mutex_unlock(&curseg->curseg_mutex); 1950 f2fs_bug_on(!list_empty(head));
1870 f2fs_put_page(page, 1); 1951 f2fs_bug_on(nm_i->dirty_nat_cnt);
1871} 1952}
1872 1953
1873static int init_node_manager(struct f2fs_sb_info *sbi) 1954static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1896 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1977 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1897 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 INIT_LIST_HEAD(&nm_i->nat_entries);
1898 INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1979 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1980 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1899 1981
1900 mutex_init(&nm_i->build_lock); 1982 mutex_init(&nm_i->build_lock);
1901 spin_lock_init(&nm_i->free_nid_list_lock); 1983 spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2058,30 @@ int __init create_node_manager_caches(void)
1976 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2058 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1977 sizeof(struct nat_entry)); 2059 sizeof(struct nat_entry));
1978 if (!nat_entry_slab) 2060 if (!nat_entry_slab)
1979 return -ENOMEM; 2061 goto fail;
1980 2062
1981 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2063 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1982 sizeof(struct free_nid)); 2064 sizeof(struct free_nid));
1983 if (!free_nid_slab) { 2065 if (!free_nid_slab)
1984 kmem_cache_destroy(nat_entry_slab); 2066 goto destory_nat_entry;
1985 return -ENOMEM; 2067
1986 } 2068 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2069 sizeof(struct nat_entry_set));
2070 if (!nat_entry_set_slab)
2071 goto destory_free_nid;
1987 return 0; 2072 return 0;
2073
2074destory_free_nid:
2075 kmem_cache_destroy(free_nid_slab);
2076destory_nat_entry:
2077 kmem_cache_destroy(nat_entry_slab);
2078fail:
2079 return -ENOMEM;
1988} 2080}
1989 2081
1990void destroy_node_manager_caches(void) 2082void destroy_node_manager_caches(void)
1991{ 2083{
2084 kmem_cache_destroy(nat_entry_set_slab);
1992 kmem_cache_destroy(free_nid_slab); 2085 kmem_cache_destroy(free_nid_slab);
1993 kmem_cache_destroy(nat_entry_slab); 2086 kmem_cache_destroy(nat_entry_slab);
1994} 2087}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
89 DIRTY_DENTS /* indicates dirty dentry pages */ 89 DIRTY_DENTS /* indicates dirty dentry pages */
90}; 90};
91 91
92struct nat_entry_set {
93 struct list_head set_list; /* link with all nat sets */
94 struct list_head entry_list; /* link with dirty nat entries */
95 nid_t start_nid; /* start nid of nats in set */
96 unsigned int entry_cnt; /* the # of nat entries in set */
97};
98
92/* 99/*
93 * For free nid mangement 100 * For free nid mangement
94 */ 101 */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368a4a86..fe1c6d921ba2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
300 struct node_info ni; 300 struct node_info ni;
301 int err = 0, recovered = 0; 301 int err = 0, recovered = 0;
302 302
303 recover_inline_xattr(inode, page);
304
303 if (recover_inline_data(inode, page)) 305 if (recover_inline_data(inode, page))
304 goto out; 306 goto out;
305 307
@@ -434,7 +436,9 @@ next:
434 436
435int recover_fsync_data(struct f2fs_sb_info *sbi) 437int recover_fsync_data(struct f2fs_sb_info *sbi)
436{ 438{
439 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
437 struct list_head inode_list; 440 struct list_head inode_list;
441 block_t blkaddr;
438 int err; 442 int err;
439 bool need_writecp = false; 443 bool need_writecp = false;
440 444
@@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
447 451
448 /* step #1: find fsynced inode numbers */ 452 /* step #1: find fsynced inode numbers */
449 sbi->por_doing = true; 453 sbi->por_doing = true;
454
455 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
456
450 err = find_fsync_dnodes(sbi, &inode_list); 457 err = find_fsync_dnodes(sbi, &inode_list);
451 if (err) 458 if (err)
452 goto out; 459 goto out;
@@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
462out: 469out:
463 destroy_fsync_dnodes(&inode_list); 470 destroy_fsync_dnodes(&inode_list);
464 kmem_cache_destroy(fsync_entry_slab); 471 kmem_cache_destroy(fsync_entry_slab);
472
473 if (err) {
474 truncate_inode_pages_final(NODE_MAPPING(sbi));
475 truncate_inode_pages_final(META_MAPPING(sbi));
476 }
477
465 sbi->por_doing = false; 478 sbi->por_doing = false;
466 if (!err && need_writecp) 479 if (err) {
480 discard_next_dnode(sbi, blkaddr);
481
482 /* Flush all the NAT/SIT pages */
483 while (get_pages(sbi, F2FS_DIRTY_META))
484 sync_meta_pages(sbi, META, LONG_MAX);
485 } else if (need_writecp) {
467 write_checkpoint(sbi, false); 486 write_checkpoint(sbi, false);
487 }
468 return err; 488 return err;
469} 489}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d04613df710a..0dfeebae2a50 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
240 struct flush_cmd cmd; 240 struct flush_cmd cmd;
241 241
242 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
243 test_opt(sbi, FLUSH_MERGE));
244
245 if (test_opt(sbi, NOBARRIER))
246 return 0;
247
242 if (!test_opt(sbi, FLUSH_MERGE)) 248 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244 250
@@ -272,13 +278,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
272 return -ENOMEM; 278 return -ENOMEM;
273 spin_lock_init(&fcc->issue_lock); 279 spin_lock_init(&fcc->issue_lock);
274 init_waitqueue_head(&fcc->flush_wait_queue); 280 init_waitqueue_head(&fcc->flush_wait_queue);
275 sbi->sm_info->cmd_control_info = fcc; 281 SM_I(sbi)->cmd_control_info = fcc;
276 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 282 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
277 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 283 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
278 if (IS_ERR(fcc->f2fs_issue_flush)) { 284 if (IS_ERR(fcc->f2fs_issue_flush)) {
279 err = PTR_ERR(fcc->f2fs_issue_flush); 285 err = PTR_ERR(fcc->f2fs_issue_flush);
280 kfree(fcc); 286 kfree(fcc);
281 sbi->sm_info->cmd_control_info = NULL; 287 SM_I(sbi)->cmd_control_info = NULL;
282 return err; 288 return err;
283 } 289 }
284 290
@@ -287,13 +293,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
287 293
288void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) 294void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
289{ 295{
290 struct flush_cmd_control *fcc = 296 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
291 sbi->sm_info->cmd_control_info;
292 297
293 if (fcc && fcc->f2fs_issue_flush) 298 if (fcc && fcc->f2fs_issue_flush)
294 kthread_stop(fcc->f2fs_issue_flush); 299 kthread_stop(fcc->f2fs_issue_flush);
295 kfree(fcc); 300 kfree(fcc);
296 sbi->sm_info->cmd_control_info = NULL; 301 SM_I(sbi)->cmd_control_info = NULL;
297} 302}
298 303
299static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 304static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -377,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
377 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 382 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
378} 383}
379 384
380void discard_next_dnode(struct f2fs_sb_info *sbi) 385void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
381{ 386{
382 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
383 block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
384
385 if (f2fs_issue_discard(sbi, blkaddr, 1)) { 387 if (f2fs_issue_discard(sbi, blkaddr, 1)) {
386 struct page *page = grab_meta_page(sbi, blkaddr); 388 struct page *page = grab_meta_page(sbi, blkaddr);
387 /* zero-filled page */ 389 /* zero-filled page */
@@ -437,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
437static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 439static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
438{ 440{
439 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 441 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
440 unsigned int segno = -1; 442 unsigned int segno;
441 unsigned int total_segs = TOTAL_SEGS(sbi); 443 unsigned int total_segs = TOTAL_SEGS(sbi);
442 444
443 mutex_lock(&dirty_i->seglist_lock); 445 mutex_lock(&dirty_i->seglist_lock);
444 while (1) { 446 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
445 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
446 segno + 1);
447 if (segno >= total_segs)
448 break;
449 __set_test_and_free(sbi, segno); 447 __set_test_and_free(sbi, segno);
450 }
451 mutex_unlock(&dirty_i->seglist_lock); 448 mutex_unlock(&dirty_i->seglist_lock);
452} 449}
453 450
@@ -974,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
974{ 971{
975 struct sit_info *sit_i = SIT_I(sbi); 972 struct sit_info *sit_i = SIT_I(sbi);
976 struct curseg_info *curseg; 973 struct curseg_info *curseg;
977 unsigned int old_cursegno;
978 974
979 curseg = CURSEG_I(sbi, type); 975 curseg = CURSEG_I(sbi, type);
980 976
981 mutex_lock(&curseg->curseg_mutex); 977 mutex_lock(&curseg->curseg_mutex);
982 978
983 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 979 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
984 old_cursegno = curseg->segno;
985 980
986 /* 981 /*
987 * __add_sum_entry should be resided under the curseg_mutex 982 * __add_sum_entry should be resided under the curseg_mutex
@@ -1002,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1002 * since SSR needs latest valid block information. 997 * since SSR needs latest valid block information.
1003 */ 998 */
1004 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 999 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1005 locate_dirty_segment(sbi, old_cursegno);
1006 1000
1007 mutex_unlock(&sit_i->sentry_lock); 1001 mutex_unlock(&sit_i->sentry_lock);
1008 1002
@@ -1532,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1532 struct page *page = NULL; 1526 struct page *page = NULL;
1533 struct f2fs_sit_block *raw_sit = NULL; 1527 struct f2fs_sit_block *raw_sit = NULL;
1534 unsigned int start = 0, end = 0; 1528 unsigned int start = 0, end = 0;
1535 unsigned int segno = -1; 1529 unsigned int segno;
1536 bool flushed; 1530 bool flushed;
1537 1531
1538 mutex_lock(&curseg->curseg_mutex); 1532 mutex_lock(&curseg->curseg_mutex);
@@ -1544,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1544 */ 1538 */
1545 flushed = flush_sits_in_journal(sbi); 1539 flushed = flush_sits_in_journal(sbi);
1546 1540
1547 while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { 1541 for_each_set_bit(segno, bitmap, nsegs) {
1548 struct seg_entry *se = get_seg_entry(sbi, segno); 1542 struct seg_entry *se = get_seg_entry(sbi, segno);
1549 int sit_offset, offset; 1543 int sit_offset, offset;
1550 1544
@@ -1703,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1703 struct curseg_info *array; 1697 struct curseg_info *array;
1704 int i; 1698 int i;
1705 1699
1706 array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); 1700 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1707 if (!array) 1701 if (!array)
1708 return -ENOMEM; 1702 return -ENOMEM;
1709 1703
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..55973f7b0330 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
347 if (test_and_clear_bit(segno, free_i->free_segmap)) { 347 if (test_and_clear_bit(segno, free_i->free_segmap)) {
348 free_i->free_segments++; 348 free_i->free_segments++;
349 349
350 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), 350 next = find_next_bit(free_i->free_segmap,
351 start_segno); 351 start_segno + sbi->segs_per_sec, start_segno);
352 if (next >= start_segno + sbi->segs_per_sec) { 352 if (next >= start_segno + sbi->segs_per_sec) {
353 if (test_and_clear_bit(secno, free_i->free_secmap)) 353 if (test_and_clear_bit(secno, free_i->free_secmap))
354 free_i->free_sections++; 354 free_i->free_sections++;
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
486 if (S_ISDIR(inode->i_mode)) 486 if (S_ISDIR(inode->i_mode))
487 return false; 487 return false;
488 488
489 /* this is only set during fdatasync */
490 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
491 return true;
492
489 switch (SM_I(sbi)->ipu_policy) { 493 switch (SM_I(sbi)->ipu_policy) {
490 case F2FS_IPU_FORCE: 494 case F2FS_IPU_FORCE:
491 return true; 495 return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f96d9372ade..657582fc7601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data, 53 Opt_inline_data,
54 Opt_flush_merge, 54 Opt_flush_merge,
55 Opt_nobarrier,
55 Opt_err, 56 Opt_err,
56}; 57};
57 58
@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
69 {Opt_inline_xattr, "inline_xattr"}, 70 {Opt_inline_xattr, "inline_xattr"},
70 {Opt_inline_data, "inline_data"}, 71 {Opt_inline_data, "inline_data"},
71 {Opt_flush_merge, "flush_merge"}, 72 {Opt_flush_merge, "flush_merge"},
73 {Opt_nobarrier, "nobarrier"},
72 {Opt_err, NULL}, 74 {Opt_err, NULL},
73}; 75};
74 76
@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
339 case Opt_flush_merge: 341 case Opt_flush_merge:
340 set_opt(sbi, FLUSH_MERGE); 342 set_opt(sbi, FLUSH_MERGE);
341 break; 343 break;
344 case Opt_nobarrier:
345 set_opt(sbi, NOBARRIER);
346 break;
342 default: 347 default:
343 f2fs_msg(sb, KERN_ERR, 348 f2fs_msg(sb, KERN_ERR,
344 "Unrecognized mount option \"%s\" or missing value", 349 "Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
544 seq_puts(seq, ",inline_data"); 549 seq_puts(seq, ",inline_data");
545 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 550 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge"); 551 seq_puts(seq, ",flush_merge");
552 if (test_opt(sbi, NOBARRIER))
553 seq_puts(seq, ",nobarrier");
547 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 554 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
548 555
549 return 0; 556 return 0;
@@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
615 * Previous and new state of filesystem is RO, 622 * Previous and new state of filesystem is RO,
616 * so skip checking GC and FLUSH_MERGE conditions. 623 * so skip checking GC and FLUSH_MERGE conditions.
617 */ 624 */
618 if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 625 if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
619 goto skip; 626 goto skip;
620 627
621 /* 628 /*
@@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
642 */ 649 */
643 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { 650 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
644 destroy_flush_cmd_control(sbi); 651 destroy_flush_cmd_control(sbi);
645 } else if (test_opt(sbi, FLUSH_MERGE) && 652 } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
646 !sbi->sm_info->cmd_control_info) {
647 err = create_flush_cmd_control(sbi); 653 err = create_flush_cmd_control(sbi);
648 if (err) 654 if (err)
649 goto restore_gc; 655 goto restore_gc;
@@ -947,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
947 mutex_init(&sbi->gc_mutex); 953 mutex_init(&sbi->gc_mutex);
948 mutex_init(&sbi->writepages); 954 mutex_init(&sbi->writepages);
949 mutex_init(&sbi->cp_mutex); 955 mutex_init(&sbi->cp_mutex);
950 mutex_init(&sbi->node_write); 956 init_rwsem(&sbi->node_write);
951 sbi->por_doing = false; 957 sbi->por_doing = false;
952 spin_lock_init(&sbi->stat_lock); 958 spin_lock_init(&sbi->stat_lock);
953 959
@@ -997,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
997 INIT_LIST_HEAD(&sbi->dir_inode_list); 1003 INIT_LIST_HEAD(&sbi->dir_inode_list);
998 spin_lock_init(&sbi->dir_inode_lock); 1004 spin_lock_init(&sbi->dir_inode_lock);
999 1005
1000 init_orphan_info(sbi); 1006 init_ino_entry_info(sbi);
1001 1007
1002 /* setup f2fs internal modules */ 1008 /* setup f2fs internal modules */
1003 err = build_segment_manager(sbi); 1009 err = build_segment_manager(sbi);
@@ -1034,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1034 goto free_node_inode; 1040 goto free_node_inode;
1035 } 1041 }
1036 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1042 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1043 iput(root);
1037 err = -EINVAL; 1044 err = -EINVAL;
1038 goto free_root_inode; 1045 goto free_node_inode;
1039 } 1046 }
1040 1047
1041 sb->s_root = d_make_root(root); /* allocate root dentry */ 1048 sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -1082,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1082 * If filesystem is not mounted as read-only then 1089 * If filesystem is not mounted as read-only then
1083 * do start the gc_thread. 1090 * do start the gc_thread.
1084 */ 1091 */
1085 if (!(sb->s_flags & MS_RDONLY)) { 1092 if (!f2fs_readonly(sb)) {
1086 /* After POR, we can run background GC thread.*/ 1093 /* After POR, we can run background GC thread.*/
1087 err = start_gc_thread(sbi); 1094 err = start_gc_thread(sbi);
1088 if (err) 1095 if (err)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be568b7311d6..ef9bef118342 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode)
342 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 342 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
343 while (inode->i_state & I_SYNC) { 343 while (inode->i_state & I_SYNC) {
344 spin_unlock(&inode->i_lock); 344 spin_unlock(&inode->i_lock);
345 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 345 __wait_on_bit(wqh, &wq, bit_wait,
346 TASK_UNINTERRUPTIBLE);
346 spin_lock(&inode->i_lock); 347 spin_lock(&inode->i_lock);
347 } 348 }
348} 349}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index aec01be91b0a..89acec742e0b 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
160 _enter("%p", cookie); 160 _enter("%p", cookie);
161 161
162 wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, 162 wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
163 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 163 TASK_UNINTERRUPTIBLE);
164 164
165 if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) 165 if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
166 goto out_unlock; 166 goto out_unlock;
@@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
255 if (!fscache_defer_lookup) { 255 if (!fscache_defer_lookup) {
256 _debug("non-deferred lookup %p", &cookie->flags); 256 _debug("non-deferred lookup %p", &cookie->flags);
257 wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, 257 wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
258 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 258 TASK_UNINTERRUPTIBLE);
259 _debug("complete"); 259 _debug("complete");
260 if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) 260 if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
261 goto unavailable; 261 goto unavailable;
@@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
463 _enter("%p", cookie); 463 _enter("%p", cookie);
464 464
465 wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, 465 wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
466 fscache_wait_bit_interruptible,
467 TASK_UNINTERRUPTIBLE); 466 TASK_UNINTERRUPTIBLE);
468 467
469 _leave(""); 468 _leave("");
@@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
525 } 524 }
526 525
527 wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, 526 wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
528 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 527 TASK_UNINTERRUPTIBLE);
529 if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) 528 if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
530 goto out_unlock_enable; 529 goto out_unlock_enable;
531 530
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index bc6c08fcfddd..7872a62ef30c 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
97 return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); 97 return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
98} 98}
99 99
100extern int fscache_wait_bit(void *);
101extern int fscache_wait_bit_interruptible(void *);
102extern int fscache_wait_atomic_t(atomic_t *); 100extern int fscache_wait_atomic_t(atomic_t *);
103 101
104/* 102/*
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 63f868e869b9..a31b83c5cbd9 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -197,24 +197,6 @@ static void __exit fscache_exit(void)
197module_exit(fscache_exit); 197module_exit(fscache_exit);
198 198
199/* 199/*
200 * wait_on_bit() sleep function for uninterruptible waiting
201 */
202int fscache_wait_bit(void *flags)
203{
204 schedule();
205 return 0;
206}
207
208/*
209 * wait_on_bit() sleep function for interruptible waiting
210 */
211int fscache_wait_bit_interruptible(void *flags)
212{
213 schedule();
214 return signal_pending(current);
215}
216
217/*
218 * wait_on_atomic_t() sleep function for uninterruptible waiting 200 * wait_on_atomic_t() sleep function for uninterruptible waiting
219 */ 201 */
220int fscache_wait_atomic_t(atomic_t *p) 202int fscache_wait_atomic_t(atomic_t *p)
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ed70714503fa..85332b9d19d1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
298 298
299 jif = jiffies; 299 jif = jiffies;
300 if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, 300 if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
301 fscache_wait_bit_interruptible,
302 TASK_INTERRUPTIBLE) != 0) { 301 TASK_INTERRUPTIBLE) != 0) {
303 fscache_stat(&fscache_n_retrievals_intr); 302 fscache_stat(&fscache_n_retrievals_intr);
304 _leave(" = -ERESTARTSYS"); 303 _leave(" = -ERESTARTSYS");
@@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
342 if (stat_op_waits) 341 if (stat_op_waits)
343 fscache_stat(stat_op_waits); 342 fscache_stat(stat_op_waits);
344 if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, 343 if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
345 fscache_wait_bit_interruptible,
346 TASK_INTERRUPTIBLE) != 0) { 344 TASK_INTERRUPTIBLE) != 0) {
347 ret = fscache_cancel_op(op, do_cancel); 345 ret = fscache_cancel_op(op, do_cancel);
348 if (ret == 0) 346 if (ret == 0)
@@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
351 /* it's been removed from the pending queue by another party, 349 /* it's been removed from the pending queue by another party,
352 * so we should get to run shortly */ 350 * so we should get to run shortly */
353 wait_on_bit(&op->flags, FSCACHE_OP_WAITING, 351 wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
354 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 352 TASK_UNINTERRUPTIBLE);
355 } 353 }
356 _debug("<<< GO"); 354 _debug("<<< GO");
357 355
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 098f97bdcf1b..ca887314aba9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -643,9 +643,8 @@ struct fuse_copy_state {
643 unsigned long seglen; 643 unsigned long seglen;
644 unsigned long addr; 644 unsigned long addr;
645 struct page *pg; 645 struct page *pg;
646 void *mapaddr;
647 void *buf;
648 unsigned len; 646 unsigned len;
647 unsigned offset;
649 unsigned move_pages:1; 648 unsigned move_pages:1;
650}; 649};
651 650
@@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
666 if (cs->currbuf) { 665 if (cs->currbuf) {
667 struct pipe_buffer *buf = cs->currbuf; 666 struct pipe_buffer *buf = cs->currbuf;
668 667
669 if (!cs->write) { 668 if (cs->write)
670 kunmap_atomic(cs->mapaddr);
671 } else {
672 kunmap_atomic(cs->mapaddr);
673 buf->len = PAGE_SIZE - cs->len; 669 buf->len = PAGE_SIZE - cs->len;
674 }
675 cs->currbuf = NULL; 670 cs->currbuf = NULL;
676 cs->mapaddr = NULL; 671 } else if (cs->pg) {
677 } else if (cs->mapaddr) {
678 kunmap_atomic(cs->mapaddr);
679 if (cs->write) { 672 if (cs->write) {
680 flush_dcache_page(cs->pg); 673 flush_dcache_page(cs->pg);
681 set_page_dirty_lock(cs->pg); 674 set_page_dirty_lock(cs->pg);
682 } 675 }
683 put_page(cs->pg); 676 put_page(cs->pg);
684 cs->mapaddr = NULL;
685 } 677 }
678 cs->pg = NULL;
686} 679}
687 680
688/* 681/*
@@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
691 */ 684 */
692static int fuse_copy_fill(struct fuse_copy_state *cs) 685static int fuse_copy_fill(struct fuse_copy_state *cs)
693{ 686{
694 unsigned long offset; 687 struct page *page;
695 int err; 688 int err;
696 689
697 unlock_request(cs->fc, cs->req); 690 unlock_request(cs->fc, cs->req);
@@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
706 699
707 BUG_ON(!cs->nr_segs); 700 BUG_ON(!cs->nr_segs);
708 cs->currbuf = buf; 701 cs->currbuf = buf;
709 cs->mapaddr = kmap_atomic(buf->page); 702 cs->pg = buf->page;
703 cs->offset = buf->offset;
710 cs->len = buf->len; 704 cs->len = buf->len;
711 cs->buf = cs->mapaddr + buf->offset;
712 cs->pipebufs++; 705 cs->pipebufs++;
713 cs->nr_segs--; 706 cs->nr_segs--;
714 } else { 707 } else {
715 struct page *page;
716
717 if (cs->nr_segs == cs->pipe->buffers) 708 if (cs->nr_segs == cs->pipe->buffers)
718 return -EIO; 709 return -EIO;
719 710
@@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
726 buf->len = 0; 717 buf->len = 0;
727 718
728 cs->currbuf = buf; 719 cs->currbuf = buf;
729 cs->mapaddr = kmap_atomic(page); 720 cs->pg = page;
730 cs->buf = cs->mapaddr; 721 cs->offset = 0;
731 cs->len = PAGE_SIZE; 722 cs->len = PAGE_SIZE;
732 cs->pipebufs++; 723 cs->pipebufs++;
733 cs->nr_segs++; 724 cs->nr_segs++;
@@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
740 cs->iov++; 731 cs->iov++;
741 cs->nr_segs--; 732 cs->nr_segs--;
742 } 733 }
743 err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg); 734 err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
744 if (err < 0) 735 if (err < 0)
745 return err; 736 return err;
746 BUG_ON(err != 1); 737 BUG_ON(err != 1);
747 offset = cs->addr % PAGE_SIZE; 738 cs->pg = page;
748 cs->mapaddr = kmap_atomic(cs->pg); 739 cs->offset = cs->addr % PAGE_SIZE;
749 cs->buf = cs->mapaddr + offset; 740 cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
750 cs->len = min(PAGE_SIZE - offset, cs->seglen);
751 cs->seglen -= cs->len; 741 cs->seglen -= cs->len;
752 cs->addr += cs->len; 742 cs->addr += cs->len;
753 } 743 }
@@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
760{ 750{
761 unsigned ncpy = min(*size, cs->len); 751 unsigned ncpy = min(*size, cs->len);
762 if (val) { 752 if (val) {
753 void *pgaddr = kmap_atomic(cs->pg);
754 void *buf = pgaddr + cs->offset;
755
763 if (cs->write) 756 if (cs->write)
764 memcpy(cs->buf, *val, ncpy); 757 memcpy(buf, *val, ncpy);
765 else 758 else
766 memcpy(*val, cs->buf, ncpy); 759 memcpy(*val, buf, ncpy);
760
761 kunmap_atomic(pgaddr);
767 *val += ncpy; 762 *val += ncpy;
768 } 763 }
769 *size -= ncpy; 764 *size -= ncpy;
770 cs->len -= ncpy; 765 cs->len -= ncpy;
771 cs->buf += ncpy; 766 cs->offset += ncpy;
772 return ncpy; 767 return ncpy;
773} 768}
774 769
@@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
874out_fallback_unlock: 869out_fallback_unlock:
875 unlock_page(newpage); 870 unlock_page(newpage);
876out_fallback: 871out_fallback:
877 cs->mapaddr = kmap_atomic(buf->page); 872 cs->pg = buf->page;
878 cs->buf = cs->mapaddr + buf->offset; 873 cs->offset = buf->offset;
879 874
880 err = lock_request(cs->fc, cs->req); 875 err = lock_request(cs->fc, cs->req);
881 if (err) 876 if (err)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 42198359fa1b..0c6048247a34 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
198 inode = ACCESS_ONCE(entry->d_inode); 198 inode = ACCESS_ONCE(entry->d_inode);
199 if (inode && is_bad_inode(inode)) 199 if (inode && is_bad_inode(inode))
200 goto invalid; 200 goto invalid;
201 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 201 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
202 (flags & LOOKUP_REVAL)) {
202 int err; 203 int err;
203 struct fuse_entry_out outarg; 204 struct fuse_entry_out outarg;
204 struct fuse_req *req; 205 struct fuse_req *req;
@@ -814,13 +815,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
814 return err; 815 return err;
815} 816}
816 817
817static int fuse_rename(struct inode *olddir, struct dentry *oldent,
818 struct inode *newdir, struct dentry *newent)
819{
820 return fuse_rename_common(olddir, oldent, newdir, newent, 0,
821 FUSE_RENAME, sizeof(struct fuse_rename_in));
822}
823
824static int fuse_rename2(struct inode *olddir, struct dentry *oldent, 818static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
825 struct inode *newdir, struct dentry *newent, 819 struct inode *newdir, struct dentry *newent,
826 unsigned int flags) 820 unsigned int flags)
@@ -831,17 +825,30 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
831 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 825 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
832 return -EINVAL; 826 return -EINVAL;
833 827
834 if (fc->no_rename2 || fc->minor < 23) 828 if (flags) {
835 return -EINVAL; 829 if (fc->no_rename2 || fc->minor < 23)
830 return -EINVAL;
836 831
837 err = fuse_rename_common(olddir, oldent, newdir, newent, flags, 832 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
838 FUSE_RENAME2, sizeof(struct fuse_rename2_in)); 833 FUSE_RENAME2,
839 if (err == -ENOSYS) { 834 sizeof(struct fuse_rename2_in));
840 fc->no_rename2 = 1; 835 if (err == -ENOSYS) {
841 err = -EINVAL; 836 fc->no_rename2 = 1;
837 err = -EINVAL;
838 }
839 } else {
840 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
841 FUSE_RENAME,
842 sizeof(struct fuse_rename_in));
842 } 843 }
844
843 return err; 845 return err;
846}
844 847
848static int fuse_rename(struct inode *olddir, struct dentry *oldent,
849 struct inode *newdir, struct dentry *newent)
850{
851 return fuse_rename2(olddir, oldent, newdir, newent, 0);
845} 852}
846 853
847static int fuse_link(struct dentry *entry, struct inode *newdir, 854static int fuse_link(struct dentry *entry, struct inode *newdir,
@@ -985,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
985 int err; 992 int err;
986 bool r; 993 bool r;
987 994
988 if (fi->i_time < get_jiffies_64()) { 995 if (time_before64(fi->i_time, get_jiffies_64())) {
989 r = true; 996 r = true;
990 err = fuse_do_getattr(inode, stat, file); 997 err = fuse_do_getattr(inode, stat, file);
991 } else { 998 } else {
@@ -1171,7 +1178,7 @@ static int fuse_permission(struct inode *inode, int mask)
1171 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1178 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1172 struct fuse_inode *fi = get_fuse_inode(inode); 1179 struct fuse_inode *fi = get_fuse_inode(inode);
1173 1180
1174 if (fi->i_time < get_jiffies_64()) { 1181 if (time_before64(fi->i_time, get_jiffies_64())) {
1175 refreshed = true; 1182 refreshed = true;
1176 1183
1177 err = fuse_perm_getattr(inode, mask); 1184 err = fuse_perm_getattr(inode, mask);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6e16dad13e9b..40ac2628ddcf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1687,7 +1687,7 @@ static int fuse_writepage_locked(struct page *page)
1687 error = -EIO; 1687 error = -EIO;
1688 req->ff = fuse_write_file_get(fc, fi); 1688 req->ff = fuse_write_file_get(fc, fi);
1689 if (!req->ff) 1689 if (!req->ff)
1690 goto err_free; 1690 goto err_nofile;
1691 1691
1692 fuse_write_fill(req, req->ff, page_offset(page), 0); 1692 fuse_write_fill(req, req->ff, page_offset(page), 0);
1693 1693
@@ -1715,6 +1715,8 @@ static int fuse_writepage_locked(struct page *page)
1715 1715
1716 return 0; 1716 return 0;
1717 1717
1718err_nofile:
1719 __free_page(tmp_page);
1718err_free: 1720err_free:
1719 fuse_request_free(req); 1721 fuse_request_free(req);
1720err: 1722err:
@@ -1955,8 +1957,8 @@ static int fuse_writepages(struct address_space *mapping,
1955 data.ff = NULL; 1957 data.ff = NULL;
1956 1958
1957 err = -ENOMEM; 1959 err = -ENOMEM;
1958 data.orig_pages = kzalloc(sizeof(struct page *) * 1960 data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
1959 FUSE_MAX_PAGES_PER_REQ, 1961 sizeof(struct page *),
1960 GFP_NOFS); 1962 GFP_NOFS);
1961 if (!data.orig_pages) 1963 if (!data.orig_pages)
1962 goto out; 1964 goto out;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 754dcf23de8a..03246cd9d47a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -478,6 +478,17 @@ static const match_table_t tokens = {
478 {OPT_ERR, NULL} 478 {OPT_ERR, NULL}
479}; 479};
480 480
481static int fuse_match_uint(substring_t *s, unsigned int *res)
482{
483 int err = -ENOMEM;
484 char *buf = match_strdup(s);
485 if (buf) {
486 err = kstrtouint(buf, 10, res);
487 kfree(buf);
488 }
489 return err;
490}
491
481static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) 492static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
482{ 493{
483 char *p; 494 char *p;
@@ -488,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
488 while ((p = strsep(&opt, ",")) != NULL) { 499 while ((p = strsep(&opt, ",")) != NULL) {
489 int token; 500 int token;
490 int value; 501 int value;
502 unsigned uv;
491 substring_t args[MAX_OPT_ARGS]; 503 substring_t args[MAX_OPT_ARGS];
492 if (!*p) 504 if (!*p)
493 continue; 505 continue;
@@ -511,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
511 break; 523 break;
512 524
513 case OPT_USER_ID: 525 case OPT_USER_ID:
514 if (match_int(&args[0], &value)) 526 if (fuse_match_uint(&args[0], &uv))
515 return 0; 527 return 0;
516 d->user_id = make_kuid(current_user_ns(), value); 528 d->user_id = make_kuid(current_user_ns(), uv);
517 if (!uid_valid(d->user_id)) 529 if (!uid_valid(d->user_id))
518 return 0; 530 return 0;
519 d->user_id_present = 1; 531 d->user_id_present = 1;
520 break; 532 break;
521 533
522 case OPT_GROUP_ID: 534 case OPT_GROUP_ID:
523 if (match_int(&args[0], &value)) 535 if (fuse_match_uint(&args[0], &uv))
524 return 0; 536 return 0;
525 d->group_id = make_kgid(current_user_ns(), value); 537 d->group_id = make_kgid(current_user_ns(), uv);
526 if (!gid_valid(d->group_id)) 538 if (!gid_valid(d->group_id))
527 return 0; 539 return 0;
528 d->group_id_present = 1; 540 d->group_id_present = 1;
@@ -895,9 +907,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
895 fc->writeback_cache = 1; 907 fc->writeback_cache = 1;
896 if (arg->time_gran && arg->time_gran <= 1000000000) 908 if (arg->time_gran && arg->time_gran <= 1000000000)
897 fc->sb->s_time_gran = arg->time_gran; 909 fc->sb->s_time_gran = arg->time_gran;
898 else
899 fc->sb->s_time_gran = 1000000000;
900
901 } else { 910 } else {
902 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 911 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
903 fc->no_lock = 1; 912 fc->no_lock = 1;
@@ -926,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
926 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 935 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
927 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 936 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
928 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 937 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
929 FUSE_WRITEBACK_CACHE; 938 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
930 req->in.h.opcode = FUSE_INIT; 939 req->in.h.opcode = FUSE_INIT;
931 req->in.numargs = 1; 940 req->in.numargs = 1;
932 req->in.args[0].size = sizeof(*arg); 941 req->in.args[0].size = sizeof(*arg);
@@ -1006,7 +1015,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1006 1015
1007 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); 1016 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
1008 1017
1009 if (!parse_fuse_opt((char *) data, &d, is_bdev)) 1018 if (!parse_fuse_opt(data, &d, is_bdev))
1010 goto err; 1019 goto err;
1011 1020
1012 if (is_bdev) { 1021 if (is_bdev) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4fc3a3046174..26b3f952e6b1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
981 int error = 0; 981 int error = 0;
982 982
983 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 983 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
984 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; 984 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT;
985 985
986 mutex_lock(&fp->f_fl_mutex); 986 mutex_lock(&fp->f_fl_mutex);
987 987
@@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
991 goto out; 991 goto out;
992 flock_lock_file_wait(file, 992 flock_lock_file_wait(file,
993 &(struct file_lock){.fl_type = F_UNLCK}); 993 &(struct file_lock){.fl_type = F_UNLCK});
994 gfs2_glock_dq_wait(fl_gh); 994 gfs2_glock_dq(fl_gh);
995 gfs2_holder_reinit(state, flags, fl_gh); 995 gfs2_holder_reinit(state, flags, fl_gh);
996 } else { 996 } else {
997 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, 997 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c355f7320e44..7f513b1ceb2c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
731 cachep = gfs2_glock_aspace_cachep; 731 cachep = gfs2_glock_aspace_cachep;
732 else 732 else
733 cachep = gfs2_glock_cachep; 733 cachep = gfs2_glock_cachep;
734 gl = kmem_cache_alloc(cachep, GFP_KERNEL); 734 gl = kmem_cache_alloc(cachep, GFP_NOFS);
735 if (!gl) 735 if (!gl)
736 return -ENOMEM; 736 return -ENOMEM;
737 737
738 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 738 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
739 739
740 if (glops->go_flags & GLOF_LVB) { 740 if (glops->go_flags & GLOF_LVB) {
741 gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); 741 gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
742 if (!gl->gl_lksb.sb_lvbptr) { 742 if (!gl->gl_lksb.sb_lvbptr) {
743 kmem_cache_free(cachep, gl); 743 kmem_cache_free(cachep, gl);
744 return -ENOMEM; 744 return -ENOMEM;
@@ -856,27 +856,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
856} 856}
857 857
858/** 858/**
859 * gfs2_glock_holder_wait
860 * @word: unused
861 *
862 * This function and gfs2_glock_demote_wait both show up in the WCHAN
863 * field. Thus I've separated these otherwise identical functions in
864 * order to be more informative to the user.
865 */
866
867static int gfs2_glock_holder_wait(void *word)
868{
869 schedule();
870 return 0;
871}
872
873static int gfs2_glock_demote_wait(void *word)
874{
875 schedule();
876 return 0;
877}
878
879/**
880 * gfs2_glock_wait - wait on a glock acquisition 859 * gfs2_glock_wait - wait on a glock acquisition
881 * @gh: the glock holder 860 * @gh: the glock holder
882 * 861 *
@@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
888 unsigned long time1 = jiffies; 867 unsigned long time1 = jiffies;
889 868
890 might_sleep(); 869 might_sleep();
891 wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); 870 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
892 if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ 871 if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
893 /* Lengthen the minimum hold time. */ 872 /* Lengthen the minimum hold time. */
894 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + 873 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
@@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1128 struct gfs2_glock *gl = gh->gh_gl; 1107 struct gfs2_glock *gl = gh->gh_gl;
1129 gfs2_glock_dq(gh); 1108 gfs2_glock_dq(gh);
1130 might_sleep(); 1109 might_sleep();
1131 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); 1110 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
1132} 1111}
1133 1112
1134/** 1113/**
@@ -1404,12 +1383,16 @@ __acquires(&lru_lock)
1404 gl = list_entry(list->next, struct gfs2_glock, gl_lru); 1383 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1405 list_del_init(&gl->gl_lru); 1384 list_del_init(&gl->gl_lru);
1406 if (!spin_trylock(&gl->gl_spin)) { 1385 if (!spin_trylock(&gl->gl_spin)) {
1386add_back_to_lru:
1407 list_add(&gl->gl_lru, &lru_list); 1387 list_add(&gl->gl_lru, &lru_list);
1408 atomic_inc(&lru_count); 1388 atomic_inc(&lru_count);
1409 continue; 1389 continue;
1410 } 1390 }
1391 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1392 spin_unlock(&gl->gl_spin);
1393 goto add_back_to_lru;
1394 }
1411 clear_bit(GLF_LRU, &gl->gl_flags); 1395 clear_bit(GLF_LRU, &gl->gl_flags);
1412 spin_unlock(&lru_lock);
1413 gl->gl_lockref.count++; 1396 gl->gl_lockref.count++;
1414 if (demote_ok(gl)) 1397 if (demote_ok(gl))
1415 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 1398 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1417,7 +1400,7 @@ __acquires(&lru_lock)
1417 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1400 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1418 gl->gl_lockref.count--; 1401 gl->gl_lockref.count--;
1419 spin_unlock(&gl->gl_spin); 1402 spin_unlock(&gl->gl_spin);
1420 spin_lock(&lru_lock); 1403 cond_resched_lock(&lru_lock);
1421 } 1404 }
1422} 1405}
1423 1406
@@ -1442,7 +1425,7 @@ static long gfs2_scan_glock_lru(int nr)
1442 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1425 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1443 1426
1444 /* Test for being demotable */ 1427 /* Test for being demotable */
1445 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1428 if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
1446 list_move(&gl->gl_lru, &dispose); 1429 list_move(&gl->gl_lru, &dispose);
1447 atomic_dec(&lru_count); 1430 atomic_dec(&lru_count);
1448 freed++; 1431 freed++;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fc1100781bbc..2ffc67dce87f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
234 * inode_go_inval - prepare a inode glock to be released 234 * inode_go_inval - prepare a inode glock to be released
235 * @gl: the glock 235 * @gl: the glock
236 * @flags: 236 * @flags:
237 * 237 *
238 * Normally we invlidate everything, but if we are moving into 238 * Normally we invalidate everything, but if we are moving into
239 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we 239 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
240 * can keep hold of the metadata, since it won't have changed. 240 * can keep hold of the metadata, since it won't have changed.
241 * 241 *
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 91f274de1246..641383a9c1bb 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -936,12 +936,6 @@ fail:
936 return error; 936 return error;
937} 937}
938 938
939static int dlm_recovery_wait(void *word)
940{
941 schedule();
942 return 0;
943}
944
945static int control_first_done(struct gfs2_sbd *sdp) 939static int control_first_done(struct gfs2_sbd *sdp)
946{ 940{
947 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 941 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -976,7 +970,7 @@ restart:
976 fs_info(sdp, "control_first_done wait gen %u\n", start_gen); 970 fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
977 971
978 wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, 972 wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
979 dlm_recovery_wait, TASK_UNINTERRUPTIBLE); 973 TASK_UNINTERRUPTIBLE);
980 goto restart; 974 goto restart;
981 } 975 }
982 976
@@ -1036,8 +1030,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
1036 1030
1037 new_size = old_size + RECOVER_SIZE_INC; 1031 new_size = old_size + RECOVER_SIZE_INC;
1038 1032
1039 submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); 1033 submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1040 result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); 1034 result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1041 if (!submit || !result) { 1035 if (!submit || !result) {
1042 kfree(submit); 1036 kfree(submit);
1043 kfree(result); 1037 kfree(result);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0d6d16..d3eae244076e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1024 lm->lm_unmount(sdp); 1024 lm->lm_unmount(sdp);
1025} 1025}
1026 1026
1027static int gfs2_journalid_wait(void *word)
1028{
1029 if (signal_pending(current))
1030 return -EINTR;
1031 schedule();
1032 return 0;
1033}
1034
1035static int wait_on_journal(struct gfs2_sbd *sdp) 1027static int wait_on_journal(struct gfs2_sbd *sdp)
1036{ 1028{
1037 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) 1029 if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
1038 return 0; 1030 return 0;
1039 1031
1040 return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE); 1032 return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
1033 ? -EINTR : 0;
1041} 1034}
1042 1035
1043void gfs2_online_uevent(struct gfs2_sbd *sdp) 1036void gfs2_online_uevent(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 94555d4c5698..573bd3b758fa 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -591,12 +591,6 @@ done:
591 wake_up_bit(&jd->jd_flags, JDF_RECOVERY); 591 wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
592} 592}
593 593
594static int gfs2_recovery_wait(void *word)
595{
596 schedule();
597 return 0;
598}
599
600int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) 594int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
601{ 595{
602 int rv; 596 int rv;
@@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
609 BUG_ON(!rv); 603 BUG_ON(!rv);
610 604
611 if (wait) 605 if (wait)
612 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, 606 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
613 TASK_UNINTERRUPTIBLE); 607 TASK_UNINTERRUPTIBLE);
614 608
615 return wait ? jd->jd_recover_error : 0; 609 return wait ? jd->jd_recover_error : 0;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index db629d1bd1bd..f4cb9c0d6bbd 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
337 337
338/** 338/**
339 * gfs2_free_extlen - Return extent length of free blocks 339 * gfs2_free_extlen - Return extent length of free blocks
340 * @rbm: Starting position 340 * @rrbm: Starting position
341 * @len: Max length to check 341 * @len: Max length to check
342 * 342 *
343 * Starting at the block specified by the rbm, see how many free blocks 343 * Starting at the block specified by the rbm, see how many free blocks
@@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2522 2522
2523/** 2523/**
2524 * gfs2_rlist_free - free a resource group list 2524 * gfs2_rlist_free - free a resource group list
2525 * @list: the list of resource groups 2525 * @rlist: the list of resource groups
2526 * 2526 *
2527 */ 2527 */
2528 2528
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1319b5c4ec68..2607ff13d486 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
864 return error; 864 return error;
865} 865}
866 866
867static int gfs2_umount_recovery_wait(void *word)
868{
869 schedule();
870 return 0;
871}
872
873/** 867/**
874 * gfs2_put_super - Unmount the filesystem 868 * gfs2_put_super - Unmount the filesystem
875 * @sb: The VFS superblock 869 * @sb: The VFS superblock
@@ -894,7 +888,7 @@ restart:
894 continue; 888 continue;
895 spin_unlock(&sdp->sd_jindex_spin); 889 spin_unlock(&sdp->sd_jindex_spin);
896 wait_on_bit(&jd->jd_flags, JDF_RECOVERY, 890 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
897 gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); 891 TASK_UNINTERRUPTIBLE);
898 goto restart; 892 goto restart;
899 } 893 }
900 spin_unlock(&sdp->sd_jindex_spin); 894 spin_unlock(&sdp->sd_jindex_spin);
diff --git a/fs/inode.c b/fs/inode.c
index 6eecb7ff0b9a..5938f3928944 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1695,13 +1695,6 @@ int inode_needs_sync(struct inode *inode)
1695} 1695}
1696EXPORT_SYMBOL(inode_needs_sync); 1696EXPORT_SYMBOL(inode_needs_sync);
1697 1697
1698int inode_wait(void *word)
1699{
1700 schedule();
1701 return 0;
1702}
1703EXPORT_SYMBOL(inode_wait);
1704
1705/* 1698/*
1706 * If we try to find an inode in the inode hash while it is being 1699 * If we try to find an inode in the inode hash while it is being
1707 * deleted, we have to wait until the filesystem completes its 1700 * deleted, we have to wait until the filesystem completes its
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6f0f590cc5a3..5f09370c90a8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh)
763 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 763 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
764} 764}
765 765
766static int sleep_on_shadow_bh(void *word)
767{
768 io_schedule();
769 return 0;
770}
771
772/* 766/*
773 * If the buffer is already part of the current transaction, then there 767 * If the buffer is already part of the current transaction, then there
774 * is nothing we need to do. If it is already part of a prior 768 * is nothing we need to do. If it is already part of a prior
@@ -906,8 +900,8 @@ repeat:
906 if (buffer_shadow(bh)) { 900 if (buffer_shadow(bh)) {
907 JBUFFER_TRACE(jh, "on shadow: sleep"); 901 JBUFFER_TRACE(jh, "on shadow: sleep");
908 jbd_unlock_bh_state(bh); 902 jbd_unlock_bh_state(bh);
909 wait_on_bit(&bh->b_state, BH_Shadow, 903 wait_on_bit_io(&bh->b_state, BH_Shadow,
910 sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); 904 TASK_UNINTERRUPTIBLE);
911 goto repeat; 905 goto repeat;
912 } 906 }
913 907
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index d895b4b7b661..4429d6d9217f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -896,7 +896,7 @@ const struct file_operations kernfs_file_fops = {
896 * @ops: kernfs operations for the file 896 * @ops: kernfs operations for the file
897 * @priv: private data for the file 897 * @priv: private data for the file
898 * @ns: optional namespace tag of the file 898 * @ns: optional namespace tag of the file
899 * @static_name: don't copy file name 899 * @name_is_static: don't copy file name
900 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 900 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
901 * 901 *
902 * Returns the created node on success, ERR_PTR() value on error. 902 * Returns the created node on success, ERR_PTR() value on error.
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..daa8e7514eae 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -306,11 +306,9 @@ static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
306static void nsm_init_private(struct nsm_handle *nsm) 306static void nsm_init_private(struct nsm_handle *nsm)
307{ 307{
308 u64 *p = (u64 *)&nsm->sm_priv.data; 308 u64 *p = (u64 *)&nsm->sm_priv.data;
309 struct timespec ts;
310 s64 ns; 309 s64 ns;
311 310
312 ktime_get_ts(&ts); 311 ns = ktime_get_ns();
313 ns = timespec_to_ns(&ts);
314 put_unaligned(ns, p); 312 put_unaligned(ns, p);
315 put_unaligned((unsigned long)nsm, p + 1); 313 put_unaligned((unsigned long)nsm, p + 1);
316} 314}
diff --git a/fs/locks.c b/fs/locks.c
index 717fbc404e6b..a6f54802d277 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -325,7 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
325 return -ENOMEM; 325 return -ENOMEM;
326 326
327 fl->fl_file = filp; 327 fl->fl_file = filp;
328 fl->fl_owner = (fl_owner_t)filp; 328 fl->fl_owner = filp;
329 fl->fl_pid = current->tgid; 329 fl->fl_pid = current->tgid;
330 fl->fl_flags = FL_FLOCK; 330 fl->fl_flags = FL_FLOCK;
331 fl->fl_type = type; 331 fl->fl_type = type;
@@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
431 if (assign_type(fl, type) != 0) 431 if (assign_type(fl, type) != 0)
432 return -EINVAL; 432 return -EINVAL;
433 433
434 fl->fl_owner = (fl_owner_t)current->files; 434 fl->fl_owner = current->files;
435 fl->fl_pid = current->tgid; 435 fl->fl_pid = current->tgid;
436 436
437 fl->fl_file = filp; 437 fl->fl_file = filp;
@@ -1155,7 +1155,6 @@ EXPORT_SYMBOL(posix_lock_file_wait);
1155int locks_mandatory_locked(struct file *file) 1155int locks_mandatory_locked(struct file *file)
1156{ 1156{
1157 struct inode *inode = file_inode(file); 1157 struct inode *inode = file_inode(file);
1158 fl_owner_t owner = current->files;
1159 struct file_lock *fl; 1158 struct file_lock *fl;
1160 1159
1161 /* 1160 /*
@@ -1165,7 +1164,8 @@ int locks_mandatory_locked(struct file *file)
1165 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1164 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
1166 if (!IS_POSIX(fl)) 1165 if (!IS_POSIX(fl))
1167 continue; 1166 continue;
1168 if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) 1167 if (fl->fl_owner != current->files &&
1168 fl->fl_owner != file)
1169 break; 1169 break;
1170 } 1170 }
1171 spin_unlock(&inode->i_lock); 1171 spin_unlock(&inode->i_lock);
@@ -1205,7 +1205,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1205 1205
1206 for (;;) { 1206 for (;;) {
1207 if (filp) { 1207 if (filp) {
1208 fl.fl_owner = (fl_owner_t)filp; 1208 fl.fl_owner = filp;
1209 fl.fl_flags &= ~FL_SLEEP; 1209 fl.fl_flags &= ~FL_SLEEP;
1210 error = __posix_lock_file(inode, &fl, NULL); 1210 error = __posix_lock_file(inode, &fl, NULL);
1211 if (!error) 1211 if (!error)
@@ -1948,7 +1948,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1948 1948
1949 cmd = F_GETLK; 1949 cmd = F_GETLK;
1950 file_lock.fl_flags |= FL_OFDLCK; 1950 file_lock.fl_flags |= FL_OFDLCK;
1951 file_lock.fl_owner = (fl_owner_t)filp; 1951 file_lock.fl_owner = filp;
1952 } 1952 }
1953 1953
1954 error = vfs_test_lock(filp, &file_lock); 1954 error = vfs_test_lock(filp, &file_lock);
@@ -2103,7 +2103,7 @@ again:
2103 2103
2104 cmd = F_SETLK; 2104 cmd = F_SETLK;
2105 file_lock->fl_flags |= FL_OFDLCK; 2105 file_lock->fl_flags |= FL_OFDLCK;
2106 file_lock->fl_owner = (fl_owner_t)filp; 2106 file_lock->fl_owner = filp;
2107 break; 2107 break;
2108 case F_OFD_SETLKW: 2108 case F_OFD_SETLKW:
2109 error = -EINVAL; 2109 error = -EINVAL;
@@ -2112,7 +2112,7 @@ again:
2112 2112
2113 cmd = F_SETLKW; 2113 cmd = F_SETLKW;
2114 file_lock->fl_flags |= FL_OFDLCK; 2114 file_lock->fl_flags |= FL_OFDLCK;
2115 file_lock->fl_owner = (fl_owner_t)filp; 2115 file_lock->fl_owner = filp;
2116 /* Fallthrough */ 2116 /* Fallthrough */
2117 case F_SETLKW: 2117 case F_SETLKW:
2118 file_lock->fl_flags |= FL_SLEEP; 2118 file_lock->fl_flags |= FL_SLEEP;
@@ -2170,7 +2170,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
2170 2170
2171 cmd = F_GETLK64; 2171 cmd = F_GETLK64;
2172 file_lock.fl_flags |= FL_OFDLCK; 2172 file_lock.fl_flags |= FL_OFDLCK;
2173 file_lock.fl_owner = (fl_owner_t)filp; 2173 file_lock.fl_owner = filp;
2174 } 2174 }
2175 2175
2176 error = vfs_test_lock(filp, &file_lock); 2176 error = vfs_test_lock(filp, &file_lock);
@@ -2242,7 +2242,7 @@ again:
2242 2242
2243 cmd = F_SETLK64; 2243 cmd = F_SETLK64;
2244 file_lock->fl_flags |= FL_OFDLCK; 2244 file_lock->fl_flags |= FL_OFDLCK;
2245 file_lock->fl_owner = (fl_owner_t)filp; 2245 file_lock->fl_owner = filp;
2246 break; 2246 break;
2247 case F_OFD_SETLKW: 2247 case F_OFD_SETLKW:
2248 error = -EINVAL; 2248 error = -EINVAL;
@@ -2251,7 +2251,7 @@ again:
2251 2251
2252 cmd = F_SETLKW64; 2252 cmd = F_SETLKW64;
2253 file_lock->fl_flags |= FL_OFDLCK; 2253 file_lock->fl_flags |= FL_OFDLCK;
2254 file_lock->fl_owner = (fl_owner_t)filp; 2254 file_lock->fl_owner = filp;
2255 /* Fallthrough */ 2255 /* Fallthrough */
2256 case F_SETLKW64: 2256 case F_SETLKW64:
2257 file_lock->fl_flags |= FL_SLEEP; 2257 file_lock->fl_flags |= FL_SLEEP;
@@ -2324,11 +2324,11 @@ void locks_remove_file(struct file *filp)
2324 if (!inode->i_flock) 2324 if (!inode->i_flock)
2325 return; 2325 return;
2326 2326
2327 locks_remove_posix(filp, (fl_owner_t)filp); 2327 locks_remove_posix(filp, filp);
2328 2328
2329 if (filp->f_op->flock) { 2329 if (filp->f_op->flock) {
2330 struct file_lock fl = { 2330 struct file_lock fl = {
2331 .fl_owner = (fl_owner_t)filp, 2331 .fl_owner = filp,
2332 .fl_pid = current->tgid, 2332 .fl_pid = current->tgid,
2333 .fl_file = filp, 2333 .fl_file = filp,
2334 .fl_flags = FL_FLOCK, 2334 .fl_flags = FL_FLOCK,
diff --git a/fs/namei.c b/fs/namei.c
index 985c6f368485..9eb787e5c167 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2256,9 +2256,10 @@ done:
2256 goto out; 2256 goto out;
2257 } 2257 }
2258 path->dentry = dentry; 2258 path->dentry = dentry;
2259 path->mnt = mntget(nd->path.mnt); 2259 path->mnt = nd->path.mnt;
2260 if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) 2260 if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
2261 return 1; 2261 return 1;
2262 mntget(path->mnt);
2262 follow_mount(path); 2263 follow_mount(path);
2263 error = 0; 2264 error = 0;
2264out: 2265out:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138cbc43..f11b9eed0de1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
756 spin_unlock(&dreq->lock); 756 spin_unlock(&dreq->lock);
757 757
758 while (!list_empty(&hdr->pages)) { 758 while (!list_empty(&hdr->pages)) {
759 bool do_destroy = true;
760 759
761 req = nfs_list_entry(hdr->pages.next); 760 req = nfs_list_entry(hdr->pages.next);
762 nfs_list_remove_request(req); 761 nfs_list_remove_request(req);
@@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
765 case NFS_IOHDR_NEED_COMMIT: 764 case NFS_IOHDR_NEED_COMMIT:
766 kref_get(&req->wb_kref); 765 kref_get(&req->wb_kref);
767 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 766 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
768 do_destroy = false;
769 } 767 }
770 nfs_unlock_and_release_request(req); 768 nfs_unlock_and_release_request(req);
771 } 769 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
361 * Prevent starvation issues if someone is doing a consistency 361 * Prevent starvation issues if someone is doing a consistency
362 * sync-to-disk 362 * sync-to-disk
363 */ 363 */
364 ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, 364 ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
365 nfs_wait_bit_killable, TASK_KILLABLE); 365 nfs_wait_bit_killable, TASK_KILLABLE);
366 if (ret) 366 if (ret)
367 return ret; 367 return ret;
368 368
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..e2a0361e24c6 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
783static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 783static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
784{ 784{
785 might_sleep(); 785 might_sleep();
786 wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, 786 wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
787 nfs_wait_bit_killable, TASK_KILLABLE); 787 nfs_wait_bit_killable, TASK_KILLABLE);
788} 788}
789 789
790static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 790static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 567983d2c0eb..7dd55b745c4d 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -174,7 +174,9 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
174 174
175static struct key_type key_type_id_resolver = { 175static struct key_type key_type_id_resolver = {
176 .name = "id_resolver", 176 .name = "id_resolver",
177 .instantiate = user_instantiate, 177 .preparse = user_preparse,
178 .free_preparse = user_free_preparse,
179 .instantiate = generic_key_instantiate,
178 .match = user_match, 180 .match = user_match,
179 .revoke = user_revoke, 181 .revoke = user_revoke,
180 .destroy = user_destroy, 182 .destroy = user_destroy,
@@ -282,6 +284,8 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
282 desc, "", 0, idmap); 284 desc, "", 0, idmap);
283 mutex_unlock(&idmap->idmap_mutex); 285 mutex_unlock(&idmap->idmap_mutex);
284 } 286 }
287 if (!IS_ERR(rkey))
288 set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
285 289
286 kfree(desc); 290 kfree(desc);
287 return rkey; 291 return rkey;
@@ -394,7 +398,9 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
394 398
395static struct key_type key_type_id_resolver_legacy = { 399static struct key_type key_type_id_resolver_legacy = {
396 .name = "id_legacy", 400 .name = "id_legacy",
397 .instantiate = user_instantiate, 401 .preparse = user_preparse,
402 .free_preparse = user_free_preparse,
403 .instantiate = generic_key_instantiate,
398 .match = user_match, 404 .match = user_match,
399 .revoke = user_revoke, 405 .revoke = user_revoke,
400 .destroy = user_destroy, 406 .destroy = user_destroy,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..abd37a380535 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
75 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks 75 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
76 * @word: long word containing the bit lock 76 * @word: long word containing the bit lock
77 */ 77 */
78int nfs_wait_bit_killable(void *word) 78int nfs_wait_bit_killable(struct wait_bit_key *key)
79{ 79{
80 if (fatal_signal_pending(current)) 80 if (fatal_signal_pending(current))
81 return -ERESTARTSYS; 81 return -ERESTARTSYS;
@@ -1074,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1074 * the bit lock here if it looks like we're going to be doing that. 1074 * the bit lock here if it looks like we're going to be doing that.
1075 */ 1075 */
1076 for (;;) { 1076 for (;;) {
1077 ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, 1077 ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
1078 nfs_wait_bit_killable, TASK_KILLABLE); 1078 nfs_wait_bit_killable, TASK_KILLABLE);
1079 if (ret) 1079 if (ret)
1080 goto out; 1080 goto out;
1081 spin_lock(&inode->i_lock); 1081 spin_lock(&inode->i_lock);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..617f36611d4a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); 244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, 245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
246 const struct rpc_call_ops *, int, int); 246 const struct rpc_call_ops *, int, int);
247void nfs_free_request(struct nfs_page *req);
247 248
248static inline void nfs_iocounter_init(struct nfs_io_counter *c) 249static inline void nfs_iocounter_init(struct nfs_io_counter *c)
249{ 250{
@@ -347,7 +348,7 @@ extern int nfs_drop_inode(struct inode *);
347extern void nfs_clear_inode(struct inode *); 348extern void nfs_clear_inode(struct inode *);
348extern void nfs_evict_inode(struct inode *); 349extern void nfs_evict_inode(struct inode *);
349void nfs_zap_acl_cache(struct inode *inode); 350void nfs_zap_acl_cache(struct inode *inode);
350extern int nfs_wait_bit_killable(void *word); 351extern int nfs_wait_bit_killable(struct wait_bit_key *key);
351 352
352/* super.c */ 353/* super.c */
353extern const struct super_operations nfs_sops; 354extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 871d6eda8dba..8f854dde4150 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = {
247 &posix_acl_default_xattr_handler, 247 &posix_acl_default_xattr_handler,
248 NULL, 248 NULL,
249}; 249};
250
251static int
252nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
253 size_t size, ssize_t *result)
254{
255 struct posix_acl *acl;
256 char *p = data + *result;
257
258 acl = get_acl(inode, type);
259 if (!acl)
260 return 0;
261
262 posix_acl_release(acl);
263
264 *result += strlen(name);
265 *result += 1;
266 if (!size)
267 return 0;
268 if (*result > size)
269 return -ERANGE;
270
271 strcpy(p, name);
272 return 0;
273}
274
275ssize_t
276nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
277{
278 struct inode *inode = dentry->d_inode;
279 ssize_t result = 0;
280 int error;
281
282 error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
283 POSIX_ACL_XATTR_ACCESS, data, size, &result);
284 if (error)
285 return error;
286
287 error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
288 POSIX_ACL_XATTR_DEFAULT, data, size, &result);
289 if (error)
290 return error;
291 return result;
292}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42bbc86..f0afa291fd58 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
885 .getattr = nfs_getattr, 885 .getattr = nfs_getattr,
886 .setattr = nfs_setattr, 886 .setattr = nfs_setattr,
887#ifdef CONFIG_NFS_V3_ACL 887#ifdef CONFIG_NFS_V3_ACL
888 .listxattr = generic_listxattr, 888 .listxattr = nfs3_listxattr,
889 .getxattr = generic_getxattr, 889 .getxattr = generic_getxattr,
890 .setxattr = generic_setxattr, 890 .setxattr = generic_setxattr,
891 .removexattr = generic_removexattr, 891 .removexattr = generic_removexattr,
@@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
899 .getattr = nfs_getattr, 899 .getattr = nfs_getattr,
900 .setattr = nfs_setattr, 900 .setattr = nfs_setattr,
901#ifdef CONFIG_NFS_V3_ACL 901#ifdef CONFIG_NFS_V3_ACL
902 .listxattr = generic_listxattr, 902 .listxattr = nfs3_listxattr,
903 .getxattr = generic_getxattr, 903 .getxattr = generic_getxattr,
904 .setxattr = generic_setxattr, 904 .setxattr = generic_setxattr,
905 .removexattr = generic_removexattr, 905 .removexattr = generic_removexattr,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..42f121182167 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
1251 might_sleep(); 1251 might_sleep();
1252 1252
1253 atomic_inc(&clp->cl_count); 1253 atomic_inc(&clp->cl_count);
1254 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, 1254 res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
1255 nfs_wait_bit_killable, TASK_KILLABLE); 1255 nfs_wait_bit_killable, TASK_KILLABLE);
1256 if (res) 1256 if (res)
1257 goto out; 1257 goto out;
1258 if (clp->cl_cons_state < 0) 1258 if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6ee96d..0be5050638f7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,8 +29,6 @@
29static struct kmem_cache *nfs_page_cachep; 29static struct kmem_cache *nfs_page_cachep;
30static const struct rpc_call_ops nfs_pgio_common_ops; 30static const struct rpc_call_ops nfs_pgio_common_ops;
31 31
32static void nfs_free_request(struct nfs_page *);
33
34static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) 32static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
35{ 33{
36 p->npages = pagecount; 34 p->npages = pagecount;
@@ -117,7 +115,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
117 set_bit(NFS_IO_INPROGRESS, &c->flags); 115 set_bit(NFS_IO_INPROGRESS, &c->flags);
118 if (atomic_read(&c->io_count) == 0) 116 if (atomic_read(&c->io_count) == 0)
119 break; 117 break;
120 ret = nfs_wait_bit_killable(&c->flags); 118 ret = nfs_wait_bit_killable(&q.key);
121 } while (atomic_read(&c->io_count) != 0); 119 } while (atomic_read(&c->io_count) != 0);
122 finish_wait(wq, &q.wait); 120 finish_wait(wq, &q.wait);
123 return ret; 121 return ret;
@@ -138,12 +136,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
138 return __nfs_iocounter_wait(c); 136 return __nfs_iocounter_wait(c);
139} 137}
140 138
141static int nfs_wait_bit_uninterruptible(void *word)
142{
143 io_schedule();
144 return 0;
145}
146
147/* 139/*
148 * nfs_page_group_lock - lock the head of the page group 140 * nfs_page_group_lock - lock the head of the page group
149 * @req - request in group that is to be locked 141 * @req - request in group that is to be locked
@@ -158,7 +150,6 @@ nfs_page_group_lock(struct nfs_page *req)
158 WARN_ON_ONCE(head != head->wb_head); 150 WARN_ON_ONCE(head != head->wb_head);
159 151
160 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 152 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
161 nfs_wait_bit_uninterruptible,
162 TASK_UNINTERRUPTIBLE); 153 TASK_UNINTERRUPTIBLE);
163} 154}
164 155
@@ -239,20 +230,28 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
239 WARN_ON_ONCE(prev == req); 230 WARN_ON_ONCE(prev == req);
240 231
241 if (!prev) { 232 if (!prev) {
233 /* a head request */
242 req->wb_head = req; 234 req->wb_head = req;
243 req->wb_this_page = req; 235 req->wb_this_page = req;
244 } else { 236 } else {
237 /* a subrequest */
245 WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); 238 WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
246 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); 239 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
247 req->wb_head = prev->wb_head; 240 req->wb_head = prev->wb_head;
248 req->wb_this_page = prev->wb_this_page; 241 req->wb_this_page = prev->wb_this_page;
249 prev->wb_this_page = req; 242 prev->wb_this_page = req;
250 243
244 /* All subrequests take a ref on the head request until
245 * nfs_page_group_destroy is called */
246 kref_get(&req->wb_head->wb_kref);
247
251 /* grab extra ref if head request has extra ref from 248 /* grab extra ref if head request has extra ref from
252 * the write/commit path to handle handoff between write 249 * the write/commit path to handle handoff between write
253 * and commit lists */ 250 * and commit lists */
254 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) 251 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
252 set_bit(PG_INODE_REF, &req->wb_flags);
255 kref_get(&req->wb_kref); 253 kref_get(&req->wb_kref);
254 }
256 } 255 }
257} 256}
258 257
@@ -269,6 +268,10 @@ nfs_page_group_destroy(struct kref *kref)
269 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 268 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
270 struct nfs_page *tmp, *next; 269 struct nfs_page *tmp, *next;
271 270
271 /* subrequests must release the ref on the head request */
272 if (req->wb_head != req)
273 nfs_release_request(req->wb_head);
274
272 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) 275 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
273 return; 276 return;
274 277
@@ -394,7 +397,7 @@ static void nfs_clear_request(struct nfs_page *req)
394 * 397 *
395 * Note: Should never be called with the spinlock held! 398 * Note: Should never be called with the spinlock held!
396 */ 399 */
397static void nfs_free_request(struct nfs_page *req) 400void nfs_free_request(struct nfs_page *req)
398{ 401{
399 WARN_ON_ONCE(req->wb_this_page != req); 402 WARN_ON_ONCE(req->wb_this_page != req);
400 403
@@ -425,9 +428,8 @@ void nfs_release_request(struct nfs_page *req)
425int 428int
426nfs_wait_on_request(struct nfs_page *req) 429nfs_wait_on_request(struct nfs_page *req)
427{ 430{
428 return wait_on_bit(&req->wb_flags, PG_BUSY, 431 return wait_on_bit_io(&req->wb_flags, PG_BUSY,
429 nfs_wait_bit_uninterruptible, 432 TASK_UNINTERRUPTIBLE);
430 TASK_UNINTERRUPTIBLE);
431} 433}
432 434
433/* 435/*
@@ -925,7 +927,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
925 nfs_pageio_doio(desc); 927 nfs_pageio_doio(desc);
926 if (desc->pg_error < 0) 928 if (desc->pg_error < 0)
927 return 0; 929 return 0;
928 desc->pg_moreio = 0;
929 if (desc->pg_recoalesce) 930 if (desc->pg_recoalesce)
930 return 0; 931 return 0;
931 /* retry add_request for this subreq */ 932 /* retry add_request for this subreq */
@@ -972,6 +973,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
972 desc->pg_count = 0; 973 desc->pg_count = 0;
973 desc->pg_base = 0; 974 desc->pg_base = 0;
974 desc->pg_recoalesce = 0; 975 desc->pg_recoalesce = 0;
976 desc->pg_moreio = 0;
975 977
976 while (!list_empty(&head)) { 978 while (!list_empty(&head)) {
977 struct nfs_page *req; 979 struct nfs_page *req;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a8914b335617 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1885 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 1885 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
1886 if (!sync) 1886 if (!sync)
1887 goto out; 1887 goto out;
1888 status = wait_on_bit_lock(&nfsi->flags, 1888 status = wait_on_bit_lock_action(&nfsi->flags,
1889 NFS_INO_LAYOUTCOMMITTING, 1889 NFS_INO_LAYOUTCOMMITTING,
1890 nfs_wait_bit_killable, 1890 nfs_wait_bit_killable,
1891 TASK_KILLABLE); 1891 TASK_KILLABLE);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061ccaf3..962c9ee758be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
46static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; 46static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops; 48static const struct nfs_rw_ops nfs_rw_write_ops;
49static void nfs_clear_request_commit(struct nfs_page *req);
49 50
50static struct kmem_cache *nfs_wdata_cachep; 51static struct kmem_cache *nfs_wdata_cachep;
51static mempool_t *nfs_wdata_mempool; 52static mempool_t *nfs_wdata_mempool;
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
91 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 92 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
92} 93}
93 94
95/*
96 * nfs_page_find_head_request_locked - find head request associated with @page
97 *
98 * must be called while holding the inode lock.
99 *
100 * returns matching head request with reference held, or NULL if not found.
101 */
94static struct nfs_page * 102static struct nfs_page *
95nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) 103nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
96{ 104{
97 struct nfs_page *req = NULL; 105 struct nfs_page *req = NULL;
98 106
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
104 /* Linearly search the commit list for the correct req */ 112 /* Linearly search the commit list for the correct req */
105 list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { 113 list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
106 if (freq->wb_page == page) { 114 if (freq->wb_page == page) {
107 req = freq; 115 req = freq->wb_head;
108 break; 116 break;
109 } 117 }
110 } 118 }
111 } 119 }
112 120
113 if (req) 121 if (req) {
122 WARN_ON_ONCE(req->wb_head != req);
123
114 kref_get(&req->wb_kref); 124 kref_get(&req->wb_kref);
125 }
115 126
116 return req; 127 return req;
117} 128}
118 129
119static struct nfs_page *nfs_page_find_request(struct page *page) 130/*
131 * nfs_page_find_head_request - find head request associated with @page
132 *
133 * returns matching head request with reference held, or NULL if not found.
134 */
135static struct nfs_page *nfs_page_find_head_request(struct page *page)
120{ 136{
121 struct inode *inode = page_file_mapping(page)->host; 137 struct inode *inode = page_file_mapping(page)->host;
122 struct nfs_page *req = NULL; 138 struct nfs_page *req = NULL;
123 139
124 spin_lock(&inode->i_lock); 140 spin_lock(&inode->i_lock);
125 req = nfs_page_find_request_locked(NFS_I(inode), page); 141 req = nfs_page_find_head_request_locked(NFS_I(inode), page);
126 spin_unlock(&inode->i_lock); 142 spin_unlock(&inode->i_lock);
127 return req; 143 return req;
128} 144}
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
274 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 290 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
275} 291}
276 292
277static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) 293
294/* nfs_page_group_clear_bits
295 * @req - an nfs request
296 * clears all page group related bits from @req
297 */
298static void
299nfs_page_group_clear_bits(struct nfs_page *req)
300{
301 clear_bit(PG_TEARDOWN, &req->wb_flags);
302 clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
303 clear_bit(PG_UPTODATE, &req->wb_flags);
304 clear_bit(PG_WB_END, &req->wb_flags);
305 clear_bit(PG_REMOVE, &req->wb_flags);
306}
307
308
309/*
310 * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
311 *
312 * this is a helper function for nfs_lock_and_join_requests
313 *
314 * @inode - inode associated with request page group, must be holding inode lock
315 * @head - head request of page group, must be holding head lock
316 * @req - request that couldn't lock and needs to wait on the req bit lock
317 * @nonblock - if true, don't actually wait
318 *
319 * NOTE: this must be called holding page_group bit lock and inode spin lock
320 * and BOTH will be released before returning.
321 *
322 * returns 0 on success, < 0 on error.
323 */
324static int
325nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
326 struct nfs_page *req, bool nonblock)
327 __releases(&inode->i_lock)
328{
329 struct nfs_page *tmp;
330 int ret;
331
332 /* relinquish all the locks successfully grabbed this run */
333 for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
334 nfs_unlock_request(tmp);
335
336 WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
337
338 /* grab a ref on the request that will be waited on */
339 kref_get(&req->wb_kref);
340
341 nfs_page_group_unlock(head);
342 spin_unlock(&inode->i_lock);
343
344 /* release ref from nfs_page_find_head_request_locked */
345 nfs_release_request(head);
346
347 if (!nonblock)
348 ret = nfs_wait_on_request(req);
349 else
350 ret = -EAGAIN;
351 nfs_release_request(req);
352
353 return ret;
354}
355
356/*
357 * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
358 *
359 * @destroy_list - request list (using wb_this_page) terminated by @old_head
360 * @old_head - the old head of the list
361 *
362 * All subrequests must be locked and removed from all lists, so at this point
363 * they are only "active" in this function, and possibly in nfs_wait_on_request
364 * with a reference held by some other context.
365 */
366static void
367nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
368 struct nfs_page *old_head)
369{
370 while (destroy_list) {
371 struct nfs_page *subreq = destroy_list;
372
373 destroy_list = (subreq->wb_this_page == old_head) ?
374 NULL : subreq->wb_this_page;
375
376 WARN_ON_ONCE(old_head != subreq->wb_head);
377
378 /* make sure old group is not used */
379 subreq->wb_head = subreq;
380 subreq->wb_this_page = subreq;
381
382 nfs_clear_request_commit(subreq);
383
384 /* subreq is now totally disconnected from page group or any
385 * write / commit lists. last chance to wake any waiters */
386 nfs_unlock_request(subreq);
387
388 if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
389 /* release ref on old head request */
390 nfs_release_request(old_head);
391
392 nfs_page_group_clear_bits(subreq);
393
394 /* release the PG_INODE_REF reference */
395 if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
396 nfs_release_request(subreq);
397 else
398 WARN_ON_ONCE(1);
399 } else {
400 WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
401 /* zombie requests have already released the last
402 * reference and were waiting on the rest of the
403 * group to complete. Since it's no longer part of a
404 * group, simply free the request */
405 nfs_page_group_clear_bits(subreq);
406 nfs_free_request(subreq);
407 }
408 }
409}
410
411/*
412 * nfs_lock_and_join_requests - join all subreqs to the head req and return
413 * a locked reference, cancelling any pending
414 * operations for this page.
415 *
416 * @page - the page used to lookup the "page group" of nfs_page structures
417 * @nonblock - if true, don't block waiting for request locks
418 *
419 * This function joins all sub requests to the head request by first
420 * locking all requests in the group, cancelling any pending operations
421 * and finally updating the head request to cover the whole range covered by
422 * the (former) group. All subrequests are removed from any write or commit
423 * lists, unlinked from the group and destroyed.
424 *
425 * Returns a locked, referenced pointer to the head request - which after
426 * this call is guaranteed to be the only request associated with the page.
427 * Returns NULL if no requests are found for @page, or a ERR_PTR if an
428 * error was encountered.
429 */
430static struct nfs_page *
431nfs_lock_and_join_requests(struct page *page, bool nonblock)
278{ 432{
279 struct inode *inode = page_file_mapping(page)->host; 433 struct inode *inode = page_file_mapping(page)->host;
280 struct nfs_page *req; 434 struct nfs_page *head, *subreq;
435 struct nfs_page *destroy_list = NULL;
436 unsigned int total_bytes;
281 int ret; 437 int ret;
282 438
439try_again:
440 total_bytes = 0;
441
442 WARN_ON_ONCE(destroy_list);
443
283 spin_lock(&inode->i_lock); 444 spin_lock(&inode->i_lock);
284 for (;;) { 445
285 req = nfs_page_find_request_locked(NFS_I(inode), page); 446 /*
286 if (req == NULL) 447 * A reference is taken only on the head request which acts as a
287 break; 448 * reference to the whole page group - the group will not be destroyed
288 if (nfs_lock_request(req)) 449 * until the head reference is released.
289 break; 450 */
290 /* Note: If we hold the page lock, as is the case in nfs_writepage, 451 head = nfs_page_find_head_request_locked(NFS_I(inode), page);
291 * then the call to nfs_lock_request() will always 452
292 * succeed provided that someone hasn't already marked the 453 if (!head) {
293 * request as dirty (in which case we don't care).
294 */
295 spin_unlock(&inode->i_lock); 454 spin_unlock(&inode->i_lock);
296 if (!nonblock) 455 return NULL;
297 ret = nfs_wait_on_request(req); 456 }
298 else 457
299 ret = -EAGAIN; 458 /* lock each request in the page group */
300 nfs_release_request(req); 459 nfs_page_group_lock(head);
301 if (ret != 0) 460 subreq = head;
461 do {
462 /*
463 * Subrequests are always contiguous, non overlapping
464 * and in order. If not, it's a programming error.
465 */
466 WARN_ON_ONCE(subreq->wb_offset !=
467 (head->wb_offset + total_bytes));
468
469 /* keep track of how many bytes this group covers */
470 total_bytes += subreq->wb_bytes;
471
472 if (!nfs_lock_request(subreq)) {
473 /* releases page group bit lock and
474 * inode spin lock and all references */
475 ret = nfs_unroll_locks_and_wait(inode, head,
476 subreq, nonblock);
477
478 if (ret == 0)
479 goto try_again;
480
302 return ERR_PTR(ret); 481 return ERR_PTR(ret);
303 spin_lock(&inode->i_lock); 482 }
483
484 subreq = subreq->wb_this_page;
485 } while (subreq != head);
486
487 /* Now that all requests are locked, make sure they aren't on any list.
488 * Commit list removal accounting is done after locks are dropped */
489 subreq = head;
490 do {
491 nfs_list_remove_request(subreq);
492 subreq = subreq->wb_this_page;
493 } while (subreq != head);
494
495 /* unlink subrequests from head, destroy them later */
496 if (head->wb_this_page != head) {
497 /* destroy list will be terminated by head */
498 destroy_list = head->wb_this_page;
499 head->wb_this_page = head;
500
501 /* change head request to cover whole range that
502 * the former page group covered */
503 head->wb_bytes = total_bytes;
304 } 504 }
505
506 /*
507 * prepare head request to be added to new pgio descriptor
508 */
509 nfs_page_group_clear_bits(head);
510
511 /*
512 * some part of the group was still on the inode list - otherwise
513 * the group wouldn't be involved in async write.
514 * grab a reference for the head request, iff it needs one.
515 */
516 if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
517 kref_get(&head->wb_kref);
518
519 nfs_page_group_unlock(head);
520
521 /* drop lock to clear_request_commit the head req and clean up
522 * requests on destroy list */
305 spin_unlock(&inode->i_lock); 523 spin_unlock(&inode->i_lock);
306 return req; 524
525 nfs_destroy_unlinked_subrequests(destroy_list, head);
526
527 /* clean up commit list state */
528 nfs_clear_request_commit(head);
529
530 /* still holds ref on head from nfs_page_find_head_request_locked
531 * and still has lock on head from lock loop */
532 return head;
307} 533}
308 534
309/* 535/*
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
316 struct nfs_page *req; 542 struct nfs_page *req;
317 int ret = 0; 543 int ret = 0;
318 544
319 req = nfs_find_and_lock_request(page, nonblock); 545 req = nfs_lock_and_join_requests(page, nonblock);
320 if (!req) 546 if (!req)
321 goto out; 547 goto out;
322 ret = PTR_ERR(req); 548 ret = PTR_ERR(req);
@@ -397,7 +623,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
397 int err; 623 int err;
398 624
399 /* Stop dirtying of new pages while we sync */ 625 /* Stop dirtying of new pages while we sync */
400 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 626 err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
401 nfs_wait_bit_killable, TASK_KILLABLE); 627 nfs_wait_bit_killable, TASK_KILLABLE);
402 if (err) 628 if (err)
403 goto out_err; 629 goto out_err;
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
448 set_page_private(req->wb_page, (unsigned long)req); 674 set_page_private(req->wb_page, (unsigned long)req);
449 } 675 }
450 nfsi->npages++; 676 nfsi->npages++;
451 set_bit(PG_INODE_REF, &req->wb_flags); 677 /* this a head request for a page group - mark it as having an
678 * extra reference so sub groups can follow suit */
679 WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
452 kref_get(&req->wb_kref); 680 kref_get(&req->wb_kref);
453 spin_unlock(&inode->i_lock); 681 spin_unlock(&inode->i_lock);
454} 682}
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
474 nfsi->npages--; 702 nfsi->npages--;
475 spin_unlock(&inode->i_lock); 703 spin_unlock(&inode->i_lock);
476 } 704 }
477 nfs_release_request(req); 705
706 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
707 nfs_release_request(req);
478} 708}
479 709
480static void 710static void
@@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
638{ 868{
639 struct nfs_commit_info cinfo; 869 struct nfs_commit_info cinfo;
640 unsigned long bytes = 0; 870 unsigned long bytes = 0;
641 bool do_destroy;
642 871
643 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 872 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
644 goto out; 873 goto out;
@@ -668,7 +897,6 @@ remove_req:
668next: 897next:
669 nfs_unlock_request(req); 898 nfs_unlock_request(req);
670 nfs_end_page_writeback(req); 899 nfs_end_page_writeback(req);
671 do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
672 nfs_release_request(req); 900 nfs_release_request(req);
673 } 901 }
674out: 902out:
@@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
769 spin_lock(&inode->i_lock); 997 spin_lock(&inode->i_lock);
770 998
771 for (;;) { 999 for (;;) {
772 req = nfs_page_find_request_locked(NFS_I(inode), page); 1000 req = nfs_page_find_head_request_locked(NFS_I(inode), page);
773 if (req == NULL) 1001 if (req == NULL)
774 goto out_unlock; 1002 goto out_unlock;
775 1003
@@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
877 * dropped page. 1105 * dropped page.
878 */ 1106 */
879 do { 1107 do {
880 req = nfs_page_find_request(page); 1108 req = nfs_page_find_head_request(page);
881 if (req == NULL) 1109 if (req == NULL)
882 return 0; 1110 return 0;
883 l_ctx = req->wb_lock_context; 1111 l_ctx = req->wb_lock_context;
@@ -1475,7 +1703,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1475 return error; 1703 return error;
1476 if (!may_wait) 1704 if (!may_wait)
1477 goto out_mark_dirty; 1705 goto out_mark_dirty;
1478 error = wait_on_bit(&NFS_I(inode)->flags, 1706 error = wait_on_bit_action(&NFS_I(inode)->flags,
1479 NFS_INO_COMMIT, 1707 NFS_INO_COMMIT,
1480 nfs_wait_bit_killable, 1708 nfs_wait_bit_killable,
1481 TASK_KILLABLE); 1709 TASK_KILLABLE);
@@ -1569,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1569 struct nfs_page *req; 1797 struct nfs_page *req;
1570 int ret = 0; 1798 int ret = 0;
1571 1799
1572 for (;;) { 1800 wait_on_page_writeback(page);
1573 wait_on_page_writeback(page); 1801
1574 req = nfs_page_find_request(page); 1802 /* blocking call to cancel all requests and join to a single (head)
1575 if (req == NULL) 1803 * request */
1576 break; 1804 req = nfs_lock_and_join_requests(page, false);
1577 if (nfs_lock_request(req)) { 1805
1578 nfs_clear_request_commit(req); 1806 if (IS_ERR(req)) {
1579 nfs_inode_remove_request(req); 1807 ret = PTR_ERR(req);
1580 /* 1808 } else if (req) {
1581 * In case nfs_inode_remove_request has marked the 1809 /* all requests from this page have been cancelled by
1582 * page as being dirty 1810 * nfs_lock_and_join_requests, so just remove the head
1583 */ 1811 * request from the inode / page_private pointer and
1584 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1812 * release it */
1585 nfs_unlock_and_release_request(req); 1813 nfs_inode_remove_request(req);
1586 break; 1814 /*
1587 } 1815 * In case nfs_inode_remove_request has marked the
1588 ret = nfs_wait_on_request(req); 1816 * page as being dirty
1589 nfs_release_request(req); 1817 */
1590 if (ret < 0) 1818 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1591 break; 1819 nfs_unlock_and_release_request(req);
1592 } 1820 }
1821
1593 return ret; 1822 return ret;
1594} 1823}
1595 1824
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b56b1cc02718..944275c8f56d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2879,6 +2879,7 @@ again:
2879 * return the conflicting open: 2879 * return the conflicting open:
2880 */ 2880 */
2881 if (conf->len) { 2881 if (conf->len) {
2882 kfree(conf->data);
2882 conf->len = 0; 2883 conf->len = 0;
2883 conf->data = NULL; 2884 conf->data = NULL;
2884 goto again; 2885 goto again;
@@ -2891,6 +2892,7 @@ again:
2891 if (conf->len) { 2892 if (conf->len) {
2892 p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); 2893 p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8);
2893 p = xdr_encode_opaque(p, conf->data, conf->len); 2894 p = xdr_encode_opaque(p, conf->data, conf->len);
2895 kfree(conf->data);
2894 } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ 2896 } else { /* non - nfsv4 lock in conflict, no clientid nor owner */
2895 p = xdr_encode_hyper(p, (u64)0); /* clientid */ 2897 p = xdr_encode_hyper(p, (u64)0); /* clientid */
2896 *p++ = cpu_to_be32(0); /* length of owner name */ 2898 *p++ = cpu_to_be32(0); /* length of owner name */
@@ -2907,7 +2909,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
2907 nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); 2909 nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
2908 else if (nfserr == nfserr_denied) 2910 else if (nfserr == nfserr_denied)
2909 nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); 2911 nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);
2910 kfree(lock->lk_denied.ld_owner.data); 2912
2911 return nfserr; 2913 return nfserr;
2912} 2914}
2913 2915
diff --git a/fs/open.c b/fs/open.c
index 36662d036237..d6fd3acde134 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -263,11 +263,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
263 return -EPERM; 263 return -EPERM;
264 264
265 /* 265 /*
266 * We can not allow to do any fallocate operation on an active 266 * We cannot allow any fallocate operation on an active swapfile
267 * swapfile
268 */ 267 */
269 if (IS_SWAPFILE(inode)) 268 if (IS_SWAPFILE(inode))
270 ret = -ETXTBSY; 269 return -ETXTBSY;
271 270
272 /* 271 /*
273 * Revalidate the write permissions, in case security policy has 272 * Revalidate the write permissions, in case security policy has
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..cd3653e4f35c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -297,15 +297,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
297 seq_puts(m, header); 297 seq_puts(m, header);
298 CAP_FOR_EACH_U32(__capi) { 298 CAP_FOR_EACH_U32(__capi) {
299 seq_printf(m, "%08x", 299 seq_printf(m, "%08x",
300 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); 300 a->cap[CAP_LAST_U32 - __capi]);
301 } 301 }
302 seq_putc(m, '\n'); 302 seq_putc(m, '\n');
303} 303}
304 304
305/* Remove non-existent capabilities */
306#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
307 CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
308
309static inline void task_cap(struct seq_file *m, struct task_struct *p) 305static inline void task_cap(struct seq_file *m, struct task_struct *p)
310{ 306{
311 const struct cred *cred; 307 const struct cred *cred;
@@ -319,11 +315,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
319 cap_bset = cred->cap_bset; 315 cap_bset = cred->cap_bset;
320 rcu_read_unlock(); 316 rcu_read_unlock();
321 317
322 NORM_CAPS(cap_inheritable);
323 NORM_CAPS(cap_permitted);
324 NORM_CAPS(cap_effective);
325 NORM_CAPS(cap_bset);
326
327 render_cap_t(m, "CapInh:\t", &cap_inheritable); 318 render_cap_t(m, "CapInh:\t", &cap_inheritable);
328 render_cap_t(m, "CapPrm:\t", &cap_permitted); 319 render_cap_t(m, "CapPrm:\t", &cap_permitted);
329 render_cap_t(m, "CapEff:\t", &cap_effective); 320 render_cap_t(m, "CapEff:\t", &cap_effective);
@@ -473,13 +464,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
473 priority = task_prio(task); 464 priority = task_prio(task);
474 nice = task_nice(task); 465 nice = task_nice(task);
475 466
476 /* Temporary variable needed for gcc-2.96 */
477 /* convert timespec -> nsec*/
478 start_time =
479 (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
480 + task->real_start_time.tv_nsec;
481 /* convert nsec -> ticks */ 467 /* convert nsec -> ticks */
482 start_time = nsec_to_clock_t(start_time); 468 start_time = nsec_to_clock_t(task->real_start_time);
483 469
484 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); 470 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
485 seq_put_decimal_ll(m, ' ', ppid); 471 seq_put_decimal_ll(m, ' ', ppid);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9cd5f63715c0..7f30bdc57d13 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
702 struct dquot *dquot; 702 struct dquot *dquot;
703 unsigned long freed = 0; 703 unsigned long freed = 0;
704 704
705 spin_lock(&dq_list_lock);
705 head = free_dquots.prev; 706 head = free_dquots.prev;
706 while (head != &free_dquots && sc->nr_to_scan) { 707 while (head != &free_dquots && sc->nr_to_scan) {
707 dquot = list_entry(head, struct dquot, dq_free); 708 dquot = list_entry(head, struct dquot, dq_free);
@@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
713 freed++; 714 freed++;
714 head = free_dquots.prev; 715 head = free_dquots.prev;
715 } 716 }
717 spin_unlock(&dq_list_lock);
716 return freed; 718 return freed;
717} 719}
718 720
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0013142c0475..80c350216ea8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -35,8 +35,9 @@ struct timerfd_ctx {
35 ktime_t moffs; 35 ktime_t moffs;
36 wait_queue_head_t wqh; 36 wait_queue_head_t wqh;
37 u64 ticks; 37 u64 ticks;
38 int expired;
39 int clockid; 38 int clockid;
39 short unsigned expired;
40 short unsigned settime_flags; /* to show in fdinfo */
40 struct rcu_head rcu; 41 struct rcu_head rcu;
41 struct list_head clist; 42 struct list_head clist;
42 bool might_cancel; 43 bool might_cancel;
@@ -92,7 +93,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
92 */ 93 */
93void timerfd_clock_was_set(void) 94void timerfd_clock_was_set(void)
94{ 95{
95 ktime_t moffs = ktime_get_monotonic_offset(); 96 ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
96 struct timerfd_ctx *ctx; 97 struct timerfd_ctx *ctx;
97 unsigned long flags; 98 unsigned long flags;
98 99
@@ -125,7 +126,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
125{ 126{
126 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) 127 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
127 return false; 128 return false;
128 ctx->moffs = ktime_get_monotonic_offset(); 129 ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
129 return true; 130 return true;
130} 131}
131 132
@@ -196,6 +197,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
196 if (timerfd_canceled(ctx)) 197 if (timerfd_canceled(ctx))
197 return -ECANCELED; 198 return -ECANCELED;
198 } 199 }
200
201 ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
199 return 0; 202 return 0;
200} 203}
201 204
@@ -284,11 +287,77 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
284 return res; 287 return res;
285} 288}
286 289
290#ifdef CONFIG_PROC_FS
291static int timerfd_show(struct seq_file *m, struct file *file)
292{
293 struct timerfd_ctx *ctx = file->private_data;
294 struct itimerspec t;
295
296 spin_lock_irq(&ctx->wqh.lock);
297 t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
298 t.it_interval = ktime_to_timespec(ctx->tintv);
299 spin_unlock_irq(&ctx->wqh.lock);
300
301 return seq_printf(m,
302 "clockid: %d\n"
303 "ticks: %llu\n"
304 "settime flags: 0%o\n"
305 "it_value: (%llu, %llu)\n"
306 "it_interval: (%llu, %llu)\n",
307 ctx->clockid, (unsigned long long)ctx->ticks,
308 ctx->settime_flags,
309 (unsigned long long)t.it_value.tv_sec,
310 (unsigned long long)t.it_value.tv_nsec,
311 (unsigned long long)t.it_interval.tv_sec,
312 (unsigned long long)t.it_interval.tv_nsec);
313}
314#else
315#define timerfd_show NULL
316#endif
317
318#ifdef CONFIG_CHECKPOINT_RESTORE
319static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
320{
321 struct timerfd_ctx *ctx = file->private_data;
322 int ret = 0;
323
324 switch (cmd) {
325 case TFD_IOC_SET_TICKS: {
326 u64 ticks;
327
328 if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
329 return -EFAULT;
330 if (!ticks)
331 return -EINVAL;
332
333 spin_lock_irq(&ctx->wqh.lock);
334 if (!timerfd_canceled(ctx)) {
335 ctx->ticks = ticks;
336 if (ticks)
337 wake_up_locked(&ctx->wqh);
338 } else
339 ret = -ECANCELED;
340 spin_unlock_irq(&ctx->wqh.lock);
341 break;
342 }
343 default:
344 ret = -ENOTTY;
345 break;
346 }
347
348 return ret;
349}
350#else
351#define timerfd_ioctl NULL
352#endif
353
287static const struct file_operations timerfd_fops = { 354static const struct file_operations timerfd_fops = {
288 .release = timerfd_release, 355 .release = timerfd_release,
289 .poll = timerfd_poll, 356 .poll = timerfd_poll,
290 .read = timerfd_read, 357 .read = timerfd_read,
291 .llseek = noop_llseek, 358 .llseek = noop_llseek,
359 .show_fdinfo = timerfd_show,
360 .unlocked_ioctl = timerfd_ioctl,
292}; 361};
293 362
294static int timerfd_fget(int fd, struct fd *p) 363static int timerfd_fget(int fd, struct fd *p)
@@ -336,7 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
336 else 405 else
337 hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); 406 hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
338 407
339 ctx->moffs = ktime_get_monotonic_offset(); 408 ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
340 409
341 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 410 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
342 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 411 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff18404..c69e6d43a0d2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -843,7 +843,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
843 843
844 /* wrap around? */ 844 /* wrap around? */
845 len = sizeof(*new_xattr) + size; 845 len = sizeof(*new_xattr) + size;
846 if (len <= sizeof(*new_xattr)) 846 if (len < sizeof(*new_xattr))
847 return NULL; 847 return NULL;
848 848
849 new_xattr = kmalloc(len, GFP_KERNEL); 849 new_xattr = kmalloc(len, GFP_KERNEL);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 96175df211b1..75c3fe5f3d9d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
4298} 4298}
4299 4299
4300 4300
4301int 4301static int
4302__xfs_bmapi_allocate( 4302xfs_bmapi_allocate(
4303 struct xfs_bmalloca *bma) 4303 struct xfs_bmalloca *bma)
4304{ 4304{
4305 struct xfs_mount *mp = bma->ip->i_mount; 4305 struct xfs_mount *mp = bma->ip->i_mount;
@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
4578 bma.flist = flist; 4578 bma.flist = flist;
4579 bma.firstblock = firstblock; 4579 bma.firstblock = firstblock;
4580 4580
4581 if (flags & XFS_BMAPI_STACK_SWITCH)
4582 bma.stack_switch = 1;
4583
4584 while (bno < end && n < *nmap) { 4581 while (bno < end && n < *nmap) {
4585 inhole = eof || bma.got.br_startoff > bno; 4582 inhole = eof || bma.got.br_startoff > bno;
4586 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 4583 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 38ba36e9b2f0..b879ca56a64c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
77 * from written to unwritten, otherwise convert from unwritten to written. 77 * from written to unwritten, otherwise convert from unwritten to written.
78 */ 78 */
79#define XFS_BMAPI_CONVERT 0x040 79#define XFS_BMAPI_CONVERT 0x040
80#define XFS_BMAPI_STACK_SWITCH 0x080
81 80
82#define XFS_BMAPI_FLAGS \ 81#define XFS_BMAPI_FLAGS \
83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 82 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 85 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 86 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
88 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 87 { XFS_BMAPI_CONTIG, "CONTIG" }, \
89 { XFS_BMAPI_CONVERT, "CONVERT" }, \ 88 { XFS_BMAPI_CONVERT, "CONVERT" }
90 { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
91 89
92 90
93static inline int xfs_bmapi_aflag(int w) 91static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 703b3ec1796c..64731ef3324d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,59 +249,6 @@ xfs_bmap_rtalloc(
249} 249}
250 250
251/* 251/*
252 * Stack switching interfaces for allocation
253 */
254static void
255xfs_bmapi_allocate_worker(
256 struct work_struct *work)
257{
258 struct xfs_bmalloca *args = container_of(work,
259 struct xfs_bmalloca, work);
260 unsigned long pflags;
261 unsigned long new_pflags = PF_FSTRANS;
262
263 /*
264 * we are in a transaction context here, but may also be doing work
265 * in kswapd context, and hence we may need to inherit that state
266 * temporarily to ensure that we don't block waiting for memory reclaim
267 * in any way.
268 */
269 if (args->kswapd)
270 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
271
272 current_set_flags_nested(&pflags, new_pflags);
273
274 args->result = __xfs_bmapi_allocate(args);
275 complete(args->done);
276
277 current_restore_flags_nested(&pflags, new_pflags);
278}
279
280/*
281 * Some allocation requests often come in with little stack to work on. Push
282 * them off to a worker thread so there is lots of stack to use. Otherwise just
283 * call directly to avoid the context switch overhead here.
284 */
285int
286xfs_bmapi_allocate(
287 struct xfs_bmalloca *args)
288{
289 DECLARE_COMPLETION_ONSTACK(done);
290
291 if (!args->stack_switch)
292 return __xfs_bmapi_allocate(args);
293
294
295 args->done = &done;
296 args->kswapd = current_is_kswapd();
297 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
298 queue_work(xfs_alloc_wq, &args->work);
299 wait_for_completion(&done);
300 destroy_work_on_stack(&args->work);
301 return args->result;
302}
303
304/*
305 * Check if the endoff is outside the last extent. If so the caller will grow 252 * Check if the endoff is outside the last extent. If so the caller will grow
306 * the allocation to a stripe unit boundary. All offsets are considered outside 253 * the allocation to a stripe unit boundary. All offsets are considered outside
307 * the end of file for an empty fork, so 1 is returned in *eof in that case. 254 * the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f72232a64..2fdb72d2c908 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -55,8 +55,6 @@ struct xfs_bmalloca {
55 bool userdata;/* set if is user data */ 55 bool userdata;/* set if is user data */
56 bool aeof; /* allocated space at eof */ 56 bool aeof; /* allocated space at eof */
57 bool conv; /* overwriting unwritten extents */ 57 bool conv; /* overwriting unwritten extents */
58 bool stack_switch;
59 bool kswapd; /* allocation in kswapd context */
60 int flags; 58 int flags;
61 struct completion *done; 59 struct completion *done;
62 struct work_struct work; 60 struct work_struct work;
@@ -66,8 +64,6 @@ struct xfs_bmalloca {
66int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, 64int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
67 int *committed); 65 int *committed);
68int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); 66int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
69int xfs_bmapi_allocate(struct xfs_bmalloca *args);
70int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
71int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, 67int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
72 int whichfork, int *eof); 68 int whichfork, int *eof);
73int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, 69int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index bf810c6baf2b..cf893bc1e373 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -33,6 +33,7 @@
33#include "xfs_error.h" 33#include "xfs_error.h"
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35#include "xfs_cksum.h" 35#include "xfs_cksum.h"
36#include "xfs_alloc.h"
36 37
37/* 38/*
38 * Cursor allocation zone. 39 * Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
2323 * record (to be inserted into parent). 2324 * record (to be inserted into parent).
2324 */ 2325 */
2325STATIC int /* error */ 2326STATIC int /* error */
2326xfs_btree_split( 2327__xfs_btree_split(
2327 struct xfs_btree_cur *cur, 2328 struct xfs_btree_cur *cur,
2328 int level, 2329 int level,
2329 union xfs_btree_ptr *ptrp, 2330 union xfs_btree_ptr *ptrp,
@@ -2503,6 +2504,85 @@ error0:
2503 return error; 2504 return error;
2504} 2505}
2505 2506
2507struct xfs_btree_split_args {
2508 struct xfs_btree_cur *cur;
2509 int level;
2510 union xfs_btree_ptr *ptrp;
2511 union xfs_btree_key *key;
2512 struct xfs_btree_cur **curp;
2513 int *stat; /* success/failure */
2514 int result;
2515 bool kswapd; /* allocation in kswapd context */
2516 struct completion *done;
2517 struct work_struct work;
2518};
2519
2520/*
2521 * Stack switching interfaces for allocation
2522 */
2523static void
2524xfs_btree_split_worker(
2525 struct work_struct *work)
2526{
2527 struct xfs_btree_split_args *args = container_of(work,
2528 struct xfs_btree_split_args, work);
2529 unsigned long pflags;
2530 unsigned long new_pflags = PF_FSTRANS;
2531
2532 /*
2533 * we are in a transaction context here, but may also be doing work
2534 * in kswapd context, and hence we may need to inherit that state
2535 * temporarily to ensure that we don't block waiting for memory reclaim
2536 * in any way.
2537 */
2538 if (args->kswapd)
2539 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2540
2541 current_set_flags_nested(&pflags, new_pflags);
2542
2543 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2544 args->key, args->curp, args->stat);
2545 complete(args->done);
2546
2547 current_restore_flags_nested(&pflags, new_pflags);
2548}
2549
2550/*
2551 * BMBT split requests often come in with little stack to work on. Push
2552 * them off to a worker thread so there is lots of stack to use. For the other
2553 * btree types, just call directly to avoid the context switch overhead here.
2554 */
2555STATIC int /* error */
2556xfs_btree_split(
2557 struct xfs_btree_cur *cur,
2558 int level,
2559 union xfs_btree_ptr *ptrp,
2560 union xfs_btree_key *key,
2561 struct xfs_btree_cur **curp,
2562 int *stat) /* success/failure */
2563{
2564 struct xfs_btree_split_args args;
2565 DECLARE_COMPLETION_ONSTACK(done);
2566
2567 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2568 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2569
2570 args.cur = cur;
2571 args.level = level;
2572 args.ptrp = ptrp;
2573 args.key = key;
2574 args.curp = curp;
2575 args.stat = stat;
2576 args.done = &done;
2577 args.kswapd = current_is_kswapd();
2578 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2579 queue_work(xfs_alloc_wq, &args.work);
2580 wait_for_completion(&done);
2581 destroy_work_on_stack(&args.work);
2582 return args.result;
2583}
2584
2585
2506/* 2586/*
2507 * Copy the old inode root contents into a real block and make the 2587 * Copy the old inode root contents into a real block and make the
2508 * broot point to it. 2588 * broot point to it.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6c5eb4c551e3..6d3ec2b6ee29 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
749 * pointer that the caller gave to us. 749 * pointer that the caller gave to us.
750 */ 750 */
751 error = xfs_bmapi_write(tp, ip, map_start_fsb, 751 error = xfs_bmapi_write(tp, ip, map_start_fsb,
752 count_fsb, 752 count_fsb, 0,
753 XFS_BMAPI_STACK_SWITCH,
754 &first_block, 1, 753 &first_block, 1,
755 imap, &nimaps, &free_list); 754 imap, &nimaps, &free_list);
756 if (error) 755 if (error)
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index c3453b11f563..7703fa6770ff 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
483 } 483 }
484 484
485 /* 485 /*
486 * GQUOTINO and PQUOTINO cannot be used together in versions 486 * GQUOTINO and PQUOTINO cannot be used together in versions of
487 * of superblock that do not have pquotino. from->sb_flags 487 * superblock that do not have pquotino. from->sb_flags tells us which
488 * tells us which quota is active and should be copied to 488 * quota is active and should be copied to disk. If neither are active,
489 * disk. 489 * make sure we write NULLFSINO to the sb_gquotino field as a quota
490 * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
491 * bit is set.
492 *
493 * Note that we don't need to handle the sb_uquotino or sb_pquotino here
494 * as they do not require any translation. Hence the main sb field loop
495 * will write them appropriately from the in-core superblock.
490 */ 496 */
491 if ((*fields & XFS_SB_GQUOTINO) && 497 if ((*fields & XFS_SB_GQUOTINO) &&
492 (from->sb_qflags & XFS_GQUOTA_ACCT)) 498 (from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
494 else if ((*fields & XFS_SB_PQUOTINO) && 500 else if ((*fields & XFS_SB_PQUOTINO) &&
495 (from->sb_qflags & XFS_PQUOTA_ACCT)) 501 (from->sb_qflags & XFS_PQUOTA_ACCT))
496 to->sb_gquotino = cpu_to_be64(from->sb_pquotino); 502 to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
503 else {
504 /*
505 * We can't rely on just the fields being logged to tell us
506 * that it is safe to write NULLFSINO - we should only do that
507 * if quotas are not actually enabled. Hence only write
508 * NULLFSINO if both in-core quota inodes are NULL.
509 */
510 if (from->sb_gquotino == NULLFSINO &&
511 from->sb_pquotino == NULLFSINO)
512 to->sb_gquotino = cpu_to_be64(NULLFSINO);
513 }
497 514
498 *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); 515 *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
499} 516}