diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-02 15:45:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-02 15:45:47 -0500 |
commit | 152b734a9e38aa2e9668fa072cf66625383ca865 (patch) | |
tree | eb34048f98817b5fc868ed7d494cf8c186acc2a2 | |
parent | b479667923464591f44531ab3469ae4a0b2e074d (diff) | |
parent | 0b3a2c9968d453d5827e635a6f3d69129f70af66 (diff) |
Merge tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes
Pull GFS2 fixes from Steven Whitehouse:
"Here is a set of small fixes for GFS2. There is a fix to drop
s_umount which is copied in from the core vfs, two patches relate to a
hard to hit "use after free" and memory leak. Two patches related to
using DIO and buffered I/O on the same file to ensure correct
operation in relation to glock state changes. The final patch adds an
RCU read lock to ensure correct locking on an error path"
* tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes:
GFS2: Fix unsafe dereference in dump_holder()
GFS2: Wait for async DIO in glock state changes
GFS2: Fix incorrect invalidation for DIO/buffered I/O
GFS2: Fix slab memory leak in gfs2_bufdata
GFS2: Fix use-after-free race when calling gfs2_remove_from_ail
GFS2: don't hold s_umount over blkdev_put
-rw-r--r-- | fs/gfs2/aops.c | 30 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 2 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 10 | ||||
-rw-r--r-- | fs/gfs2/log.c | 4 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 5 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 12 |
6 files changed, 58 insertions, 5 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index b7fc035a6943..73f3e4ee4037 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
986 | { | 986 | { |
987 | struct file *file = iocb->ki_filp; | 987 | struct file *file = iocb->ki_filp; |
988 | struct inode *inode = file->f_mapping->host; | 988 | struct inode *inode = file->f_mapping->host; |
989 | struct address_space *mapping = inode->i_mapping; | ||
989 | struct gfs2_inode *ip = GFS2_I(inode); | 990 | struct gfs2_inode *ip = GFS2_I(inode); |
990 | struct gfs2_holder gh; | 991 | struct gfs2_holder gh; |
991 | int rv; | 992 | int rv; |
@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1006 | if (rv != 1) | 1007 | if (rv != 1) |
1007 | goto out; /* dio not valid, fall back to buffered i/o */ | 1008 | goto out; /* dio not valid, fall back to buffered i/o */ |
1008 | 1009 | ||
1010 | /* | ||
1011 | * Now since we are holding a deferred (CW) lock at this point, you | ||
1012 | * might be wondering why this is ever needed. There is a case however | ||
1013 | * where we've granted a deferred local lock against a cached exclusive | ||
1014 | * glock. That is ok provided all granted local locks are deferred, but | ||
1015 | * it also means that it is possible to encounter pages which are | ||
1016 | * cached and possibly also mapped. So here we check for that and sort | ||
1017 | * them out ahead of the dio. The glock state machine will take care of | ||
1018 | * everything else. | ||
1019 | * | ||
1020 | * If in fact the cached glock state (gl->gl_state) is deferred (CW) in | ||
1021 | * the first place, mapping->nr_pages will always be zero. | ||
1022 | */ | ||
1023 | if (mapping->nrpages) { | ||
1024 | loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); | ||
1025 | loff_t len = iov_length(iov, nr_segs); | ||
1026 | loff_t end = PAGE_ALIGN(offset + len) - 1; | ||
1027 | |||
1028 | rv = 0; | ||
1029 | if (len == 0) | ||
1030 | goto out; | ||
1031 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | ||
1032 | unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); | ||
1033 | rv = filemap_write_and_wait_range(mapping, lstart, end); | ||
1034 | if (rv) | ||
1035 | return rv; | ||
1036 | truncate_inode_pages_range(mapping, lstart, end); | ||
1037 | } | ||
1038 | |||
1009 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1039 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
1010 | offset, nr_segs, gfs2_get_block_direct, | 1040 | offset, nr_segs, gfs2_get_block_direct, |
1011 | NULL, NULL, 0); | 1041 | NULL, NULL, 0); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c8420f7e4db6..6f7a47c05259 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | |||
1655 | struct task_struct *gh_owner = NULL; | 1655 | struct task_struct *gh_owner = NULL; |
1656 | char flags_buf[32]; | 1656 | char flags_buf[32]; |
1657 | 1657 | ||
1658 | rcu_read_lock(); | ||
1658 | if (gh->gh_owner_pid) | 1659 | if (gh->gh_owner_pid) |
1659 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); | 1660 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); |
1660 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", | 1661 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", |
@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | |||
1664 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, | 1665 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, |
1665 | gh_owner ? gh_owner->comm : "(ended)", | 1666 | gh_owner ? gh_owner->comm : "(ended)", |
1666 | (void *)gh->gh_ip); | 1667 | (void *)gh->gh_ip); |
1668 | rcu_read_unlock(); | ||
1667 | return 0; | 1669 | return 0; |
1668 | } | 1670 | } |
1669 | 1671 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index db908f697139..f88dcd925010 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
192 | 192 | ||
193 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | 193 | if (ip && !S_ISREG(ip->i_inode.i_mode)) |
194 | ip = NULL; | 194 | ip = NULL; |
195 | if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | 195 | if (ip) { |
196 | unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); | 196 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) |
197 | unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); | ||
198 | inode_dio_wait(&ip->i_inode); | ||
199 | } | ||
197 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) | 200 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) |
198 | return; | 201 | return; |
199 | 202 | ||
@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
410 | return error; | 413 | return error; |
411 | } | 414 | } |
412 | 415 | ||
416 | if (gh->gh_state != LM_ST_DEFERRED) | ||
417 | inode_dio_wait(&ip->i_inode); | ||
418 | |||
413 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && | 419 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && |
414 | (gl->gl_state == LM_ST_EXCLUSIVE) && | 420 | (gl->gl_state == LM_ST_EXCLUSIVE) && |
415 | (gh->gh_state == LM_ST_EXCLUSIVE)) { | 421 | (gh->gh_state == LM_ST_EXCLUSIVE)) { |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 610613fb65b5..9dcb9777a5f8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
551 | struct buffer_head *bh = bd->bd_bh; | 551 | struct buffer_head *bh = bd->bd_bh; |
552 | struct gfs2_glock *gl = bd->bd_gl; | 552 | struct gfs2_glock *gl = bd->bd_gl; |
553 | 553 | ||
554 | gfs2_remove_from_ail(bd); | ||
555 | bd->bd_bh = NULL; | ||
556 | bh->b_private = NULL; | 554 | bh->b_private = NULL; |
557 | bd->bd_blkno = bh->b_blocknr; | 555 | bd->bd_blkno = bh->b_blocknr; |
556 | gfs2_remove_from_ail(bd); /* drops ref on bh */ | ||
557 | bd->bd_bh = NULL; | ||
558 | bd->bd_ops = &gfs2_revoke_lops; | 558 | bd->bd_ops = &gfs2_revoke_lops; |
559 | sdp->sd_log_num_revoke++; | 559 | sdp->sd_log_num_revoke++; |
560 | atomic_inc(&gl->gl_revokes); | 560 | atomic_inc(&gl->gl_revokes); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 932415050540..52f177be3bf8 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
258 | struct address_space *mapping = bh->b_page->mapping; | 258 | struct address_space *mapping = bh->b_page->mapping; |
259 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); | 259 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); |
260 | struct gfs2_bufdata *bd = bh->b_private; | 260 | struct gfs2_bufdata *bd = bh->b_private; |
261 | int was_pinned = 0; | ||
261 | 262 | ||
262 | if (test_clear_buffer_pinned(bh)) { | 263 | if (test_clear_buffer_pinned(bh)) { |
263 | trace_gfs2_pin(bd, 0); | 264 | trace_gfs2_pin(bd, 0); |
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
273 | tr->tr_num_databuf_rm++; | 274 | tr->tr_num_databuf_rm++; |
274 | } | 275 | } |
275 | tr->tr_touched = 1; | 276 | tr->tr_touched = 1; |
277 | was_pinned = 1; | ||
276 | brelse(bh); | 278 | brelse(bh); |
277 | } | 279 | } |
278 | if (bd) { | 280 | if (bd) { |
279 | spin_lock(&sdp->sd_ail_lock); | 281 | spin_lock(&sdp->sd_ail_lock); |
280 | if (bd->bd_tr) { | 282 | if (bd->bd_tr) { |
281 | gfs2_trans_add_revoke(sdp, bd); | 283 | gfs2_trans_add_revoke(sdp, bd); |
284 | } else if (was_pinned) { | ||
285 | bh->b_private = NULL; | ||
286 | kmem_cache_free(gfs2_bufdata_cachep, bd); | ||
282 | } | 287 | } |
283 | spin_unlock(&sdp->sd_ail_lock); | 288 | spin_unlock(&sdp->sd_ail_lock); |
284 | } | 289 | } |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 82303b474958..52fa88314f5c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, | |||
1366 | if (IS_ERR(s)) | 1366 | if (IS_ERR(s)) |
1367 | goto error_bdev; | 1367 | goto error_bdev; |
1368 | 1368 | ||
1369 | if (s->s_root) | 1369 | if (s->s_root) { |
1370 | /* | ||
1371 | * s_umount nests inside bd_mutex during | ||
1372 | * __invalidate_device(). blkdev_put() acquires | ||
1373 | * bd_mutex and can't be called under s_umount. Drop | ||
1374 | * s_umount temporarily. This is safe as we're | ||
1375 | * holding an active reference. | ||
1376 | */ | ||
1377 | up_write(&s->s_umount); | ||
1370 | blkdev_put(bdev, mode); | 1378 | blkdev_put(bdev, mode); |
1379 | down_write(&s->s_umount); | ||
1380 | } | ||
1371 | 1381 | ||
1372 | memset(&args, 0, sizeof(args)); | 1382 | memset(&args, 0, sizeof(args)); |
1373 | args.ar_quota = GFS2_QUOTA_DEFAULT; | 1383 | args.ar_quota = GFS2_QUOTA_DEFAULT; |