Merge tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes

Pull GFS2 fixes from Steven Whitehouse: "Here is a set of small fixes for GFS2. There is a fix to drop s_umount which is copied in from the core vfs, two patches relate to a hard to hit "use after free" and memory leak. Two patches related to using DIO and buffered I/O on the same file to ensure correct operation in relation to glock state changes. The final patch adds an RCU read lock to ensure correct locking on an error path" * tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes: GFS2: Fix unsafe dereference in dump_holder() GFS2: Wait for async DIO in glock state changes GFS2: Fix incorrect invalidation for DIO/buffered I/O GFS2: Fix slab memory leak in gfs2_bufdata GFS2: Fix use-after-free race when calling gfs2_remove_from_ail GFS2: don't hold s_umount over blkdev_put
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-01-02 15:45:47 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-01-02 15:45:47 -0500
commit: 152b734a9e38aa2e9668fa072cf66625383ca865 (patch)
tree: eb34048f98817b5fc868ed7d494cf8c186acc2a2
parent: b479667923464591f44531ab3469ae4a0b2e074d (diff)
parent: 0b3a2c9968d453d5827e635a6f3d69129f70af66 (diff)
6 files changed, 58 insertions, 5 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index b7fc035a6943..73f3e4ee4037 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+        struct address_space *mapping = inode->i_mapping;
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int rv;
@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
        if (rv != 1)
                goto out; /* dio not valid, fall back to buffered i/o */
+        /*
+         * Now since we are holding a deferred (CW) lock at this point, you
+         * might be wondering why this is ever needed. There is a case however
+         * where we've granted a deferred local lock against a cached exclusive
+         * glock. That is ok provided all granted local locks are deferred, but
+         * it also means that it is possible to encounter pages which are
+         * cached and possibly also mapped. So here we check for that and sort
+         * them out ahead of the dio. The glock state machine will take care of
+         * everything else.
+         *
+         * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+         * the first place, mapping->nr_pages will always be zero.
+         */
+        if (mapping->nrpages) {
+                loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+                loff_t len = iov_length(iov, nr_segs);
+                loff_t end = PAGE_ALIGN(offset + len) - 1;
+                rv = 0;
+                if (len == 0)
+                        goto out;
+                if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+                        unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
+                rv = filemap_write_and_wait_range(mapping, lstart, end);
+                if (rv)
+                        return rv;
+                truncate_inode_pages_range(mapping, lstart, end);
+        }
        rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
                                  offset, nr_segs, gfs2_get_block_direct,
                                  NULL, NULL, 0);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c8420f7e4db6..6f7a47c05259 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
        struct task_struct *gh_owner = NULL;
        char flags_buf[32];
+        rcu_read_lock();
        if (gh->gh_owner_pid)
                gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
        gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
                       gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
                       gh_owner ? gh_owner->comm : "(ended)",
                       (void *)gh->gh_ip);
+        rcu_read_unlock();
        return 0;
 }
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index db908f697139..f88dcd925010 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
        if (ip && !S_ISREG(ip->i_inode.i_mode))
                ip = NULL;
-        if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+        if (ip) {
-                unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+                if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+                        unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+                inode_dio_wait(&ip->i_inode);
+        }
        if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
                return;
@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh)
                        return error;
        }
+        if (gh->gh_state != LM_ST_DEFERRED)
+                inode_dio_wait(&ip->i_inode);
        if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
            (gl->gl_state == LM_ST_EXCLUSIVE) &&
            (gh->gh_state == LM_ST_EXCLUSIVE)) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 610613fb65b5..9dcb9777a5f8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct buffer_head *bh = bd->bd_bh;
        struct gfs2_glock *gl = bd->bd_gl;
-        gfs2_remove_from_ail(bd);
-        bd->bd_bh = NULL;
        bh->b_private = NULL;
        bd->bd_blkno = bh->b_blocknr;
+        gfs2_remove_from_ail(bd); /* drops ref on bh */
+        bd->bd_bh = NULL;
        bd->bd_ops = &gfs2_revoke_lops;
        sdp->sd_log_num_revoke++;
        atomic_inc(&gl->gl_revokes);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 932415050540..52f177be3bf8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
        struct address_space *mapping = bh->b_page->mapping;
        struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
        struct gfs2_bufdata *bd = bh->b_private;
+        int was_pinned = 0;
        if (test_clear_buffer_pinned(bh)) {
                trace_gfs2_pin(bd, 0);
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
                        tr->tr_num_databuf_rm++;
                }
                tr->tr_touched = 1;
+                was_pinned = 1;
                brelse(bh);
        }
        if (bd) {
                spin_lock(&sdp->sd_ail_lock);
                if (bd->bd_tr) {
                        gfs2_trans_add_revoke(sdp, bd);
+                } else if (was_pinned) {
+                        bh->b_private = NULL;
+                        kmem_cache_free(gfs2_bufdata_cachep, bd);
                }
                spin_unlock(&sdp->sd_ail_lock);
        }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 82303b474958..52fa88314f5c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
        if (IS_ERR(s))
                goto error_bdev;
-        if (s->s_root)
+        if (s->s_root) {
+                /*
+                 * s_umount nests inside bd_mutex during
+                 * __invalidate_device().  blkdev_put() acquires
+                 * bd_mutex and can't be called under s_umount.  Drop
+                 * s_umount temporarily.  This is safe as we're
+                 * holding an active reference.
+                 */
+                up_write(&s->s_umount);
                blkdev_put(bdev, mode);
+                down_write(&s->s_umount);
+        }
        memset(&args, 0, sizeof(args));
        args.ar_quota = GFS2_QUOTA_DEFAULT;
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-01-02 15:45:47 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-01-02 15:45:47 -0500
commit	152b734a9e38aa2e9668fa072cf66625383ca865 (patch)
tree	eb34048f98817b5fc868ed7d494cf8c186acc2a2
parent	b479667923464591f44531ab3469ae4a0b2e074d (diff)
parent	0b3a2c9968d453d5827e635a6f3d69129f70af66 (diff)