From fd90d4dfb94a8c0d626c0c85ca7dcfb905f81a65 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:42 -0700 Subject: ocfs2: delete unnecessary checks before three function calls kfree(), ocfs2_free_path() and __ocfs2_free_slot_info() test whether their argument is NULL and then return immediately. Thus the test around their calls is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 17 +++++------------ fs/ocfs2/slot_map.c | 2 +- fs/ocfs2/stack_user.c | 2 +- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 044158bd22be..fdab27c9be99 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, subtree_index); } out: - if (right_path) - ocfs2_free_path(right_path); + ocfs2_free_path(right_path); return ret; } @@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, right_path, subtree_index); } out: - if (left_path) - ocfs2_free_path(left_path); + ocfs2_free_path(left_path); return ret; } @@ -4431,11 +4429,8 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, } out: - if (left_path) - ocfs2_free_path(left_path); - if (right_path) - ocfs2_free_path(right_path); - + ocfs2_free_path(left_path); + ocfs2_free_path(right_path); return ret; } @@ -6996,9 +6991,7 @@ out_commit: out: if (data_ac) ocfs2_free_alloc_context(data_ac); - if (pages) - kfree(pages); - + kfree(pages); return ret; } diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d5493e361a38..c5e530a9d1b1 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) osb->slot_info = (struct ocfs2_slot_info *)si; bail: - if (status < 0 && si) + if (status < 0) __ocfs2_free_slot_info(si); return status; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 720aa389e0ea..c3b7807c65d6 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1063,7 +1063,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) } out: - if (rc && lc) + if (rc) kfree(lc); return rc; } -- cgit v1.2.2 From 3cc79b795b53a9a04aa1bcc1a943379f06324bb6 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:45 -0700 Subject: ocfs2: less function calls in ocfs2_convert_inline_data_to_extents() after error detection kfree() was called in a few cases by ocfs2_convert_inline_data_to_extents() during error handling even if the passed variable "pages" contained a null pointer. * Return from this implementation directly after failure detection for the function call "kcalloc". * Corresponding details could be improved by the introduction of another jump label. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index fdab27c9be99..bf806e58b1cb 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6853,13 +6853,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, if (pages == NULL) { ret = -ENOMEM; mlog_errno(ret); - goto out; + return ret; } ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { mlog_errno(ret); - goto out; + goto free_pages; } } @@ -6991,6 +6991,7 @@ out_commit: out: if (data_ac) ocfs2_free_alloc_context(data_ac); +free_pages: kfree(pages); return ret; } -- cgit v1.2.2 From 06a269ccdf0550671667f5cfa00bdabb6bf05259 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:48 -0700 Subject: ocfs2: less function calls in ocfs2_figure_merge_contig_type() after error detection ocfs2_free_path() was called in some cases by ocfs2_figure_merge_contig_type() during error handling even if the passed variables "left_path" and "right_path" contained still a null pointer. Corresponding implementation details could be improved by adjustments for jump labels according to the current Linux coding style convention. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index bf806e58b1cb..370b4ea4c23a 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -4332,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, } else if (path->p_tree_depth > 0) { status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); if (status) - goto out; + goto exit; if (left_cpos != 0) { left_path = ocfs2_new_path_from_path(path); if (!left_path) - goto out; + goto exit; status = ocfs2_find_path(et->et_ci, left_path, left_cpos); if (status) - goto out; + goto free_left_path; new_el = path_leaf_el(left_path); @@ -4359,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, le16_to_cpu(new_el->l_next_free_rec), le16_to_cpu(new_el->l_count)); status = -EINVAL; - goto out; + goto free_left_path; } rec = &new_el->l_recs[ le16_to_cpu(new_el->l_next_free_rec) - 1]; @@ -4386,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, path->p_tree_depth > 0) { status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); if (status) - goto out; + goto free_left_path; if (right_cpos == 0) - goto out; + goto free_left_path; right_path = ocfs2_new_path_from_path(path); if (!right_path) - goto out; + goto free_left_path; status = ocfs2_find_path(et->et_ci, right_path, right_cpos); if (status) - goto out; + goto free_right_path; new_el = path_leaf_el(right_path); rec = &new_el->l_recs[0]; @@ -4411,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, (unsigned long long)le64_to_cpu(eb->h_blkno), le16_to_cpu(new_el->l_next_free_rec)); status = -EINVAL; - goto out; + goto free_right_path; } rec = &new_el->l_recs[1]; } @@ -4428,9 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, ret = contig_type; } -out: - ocfs2_free_path(left_path); +free_right_path: ocfs2_free_path(right_path); +free_left_path: + ocfs2_free_path(left_path); +exit: return ret; } -- cgit v1.2.2 From 992ef6e794c4d6b84e7930ee79310f066c7f6727 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:51 -0700 Subject: ocfs2: one function call less in ocfs2_merge_rec_left() after error detection ocfs2_free_path() was called by ocfs2_merge_rec_left() even if a call of the ocfs2_get_left_path() function failed. Return from this implementation directly after corresponding exception handling. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 370b4ea4c23a..4bdc19fb7b85 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3535,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, ret = ocfs2_get_left_path(et, right_path, &left_path); if (ret) { mlog_errno(ret); - goto out; + return ret; } left_el = path_leaf_el(left_path); -- cgit v1.2.2 From 629a3b5f0b1c09025546e110ea2b2a67335ed8c5 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:53 -0700 Subject: ocfs2: one function call less in ocfs2_merge_rec_right() after error detection ocfs2_free_path() was called by ocfs2_merge_rec_right() even if a call of the ocfs2_get_right_path() function failed. Return from this implementation directly after corresponding exception handling. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 4bdc19fb7b85..2d7f76e52c37 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, ret = ocfs2_get_right_path(et, left_path, &right_path); if (ret) { mlog_errno(ret); - goto out; + return ret; } right_el = path_leaf_el(right_path); -- cgit v1.2.2 From bb34ed21bce54a900c034089a6b1fde8c09f6a6d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:56 -0700 Subject: ocfs2: one function call less in ocfs2_init_slot_info() after error detection __ocfs2_free_slot_info() was called by ocfs2_init_slot_info() even if a call of the kzalloc() function failed. Return from this implementation directly after corresponding exception handling. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/slot_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index c5e530a9d1b1..e78a203d44c8 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) if (!si) { status = -ENOMEM; mlog_errno(status); - goto bail; + return status; } si->si_extended = ocfs2_uses_extended_slot_map(osb); -- cgit v1.2.2 From 43ee9cad8a81954eea893b52e08d0c00ca9baccc Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 14 Apr 2015 15:42:59 -0700 Subject: ocfs2: one function call less in user_cluster_connect() after error detection kfree() was called by user_cluster_connect() even if a previous call of the kzalloc() function failed. Return from this implementation directly after failure detection. Signed-off-by: Markus Elfring Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/stack_user.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index c3b7807c65d6..2768eb1da2b8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) BUG_ON(conn == NULL); lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); - if (!lc) { - rc = -ENOMEM; - goto out; - } + if (!lc) + return -ENOMEM; init_waitqueue_head(&lc->oc_wait); init_completion(&lc->oc_sync_wait); -- cgit v1.2.2 From 7a8346429d6da4039a4687a8a07f3f8cdaf96d92 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Tue, 14 Apr 2015 15:43:02 -0700 Subject: ocfs2: avoid a pointless delay in o2cb_cluster_check() Fix an off-by-one when attempting to avoid an msleep() on the final loop iteration. Signed-off-by: Daeseok Youn Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/stack_o2cb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 1724d43d3da1..220cae7bbdbc 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -295,7 +295,7 @@ static int o2cb_cluster_check(void) set_bit(node_num, netmap); if (!memcmp(hbmap, netmap, sizeof(hbmap))) return 0; - if (i < O2CB_MAP_STABILIZE_COUNT) + if (i < O2CB_MAP_STABILIZE_COUNT - 1) msleep(1000); } -- cgit v1.2.2 From bdd86215b393b743be34b854299c6cda7cbb361c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:05 -0700 Subject: ocfs2: fix a typing error in ocfs2_direct_IO_write Only when direct IO succeeds we need consider zeroing out in case of cluster not aligned. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 44db1808cdb5..0c2848a599c9 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -771,7 +771,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (ret < 0) mlog_errno(ret); } - } else if (written < 0 && append_write && !is_overwrite && + } else if (written > 0 && append_write && !is_overwrite && !cluster_align) { u32 p_cpos = 0; u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); -- cgit v1.2.2 From 7e9b19551c8249baf380cbd274633ee4af95bc99 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:08 -0700 Subject: ocfs2: no need get dinode bh when zeroing extend Since di_bh won't be used when zeroing extend, set it to NULL. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0c2848a599c9..2a618dd2577d 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -706,7 +706,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, } if (append_write) { - ret = ocfs2_inode_lock(inode, &di_bh, 1); + ret = ocfs2_inode_lock(inode, NULL, 1); if (ret < 0) { mlog_errno(ret); goto clean_orphan; @@ -720,7 +720,6 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (ret < 0) { mlog_errno(ret); ocfs2_inode_unlock(inode, 1); - brelse(di_bh); goto clean_orphan; } @@ -728,13 +727,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (is_overwrite < 0) { mlog_errno(is_overwrite); ocfs2_inode_unlock(inode, 1); - brelse(di_bh); goto clean_orphan; } ocfs2_inode_unlock(inode, 1); - brelse(di_bh); - di_bh = NULL; } written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, -- cgit v1.2.2 From 37a8d89aee2a5b58812e19520d96632efe987f54 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:10 -0700 Subject: ocfs2: take inode lock when get clusters We need take inode lock when calling ocfs2_get_clusters. And use GFP_NOFS instead of GFP_KERNEL. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 2a618dd2577d..973a636285d1 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -772,10 +772,17 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, u32 p_cpos = 0; u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); + ret = ocfs2_inode_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto clean_orphan; + } + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, &ext_flags); if (ret < 0) { mlog_errno(ret); + ocfs2_inode_unlock(inode, 0); goto clean_orphan; } @@ -783,9 +790,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, ret = blkdev_issue_zeroout(osb->sb->s_bdev, p_cpos << (osb->s_clustersize_bits - 9), - zero_len >> 9, GFP_KERNEL, false); + zero_len >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); + + ocfs2_inode_unlock(inode, 0); } clean_orphan: -- cgit v1.2.2 From 14a5275d8c31ba24832f45eeb2469e835ded660d Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:13 -0700 Subject: ocfs2: do not use ocfs2_zero_extend during direct IO In ocfs2_direct_IO_write, we use ocfs2_zero_extend to zero allocated clusters in case of cluster not aligned. But ocfs2_zero_extend uses page cache, this may happen that it clears the data which blockdev_direct_IO has already written. We should use blkdev_issue_zeroout instead of ocfs2_zero_extend during direct IO. So fix this issue by introducing ocfs2_direct_IO_zero_extend and ocfs2_direct_IO_extend_no_holes. Reported-by: Yiwen Jiang Signed-off-by: Joseph Qi Tested-by: Yiwen Jiang Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 130 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 973a636285d1..1b0463a92b17 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -663,6 +663,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb, return 0; } +static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, + struct inode *inode, loff_t offset, + u64 zero_len, int cluster_align) +{ + u32 p_cpos = 0; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + int ret = 0; + + if (offset <= i_size_read(inode) || cluster_align) + return 0; + + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, + &ext_flags); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + u64 s = i_size_read(inode); + sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + (do_div(s, osb->s_clustersize) >> 9); + + ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, + zero_len >> 9, GFP_NOFS, false); + if (ret < 0) + mlog_errno(ret); + } + + return ret; +} + +static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, + struct inode *inode, loff_t offset) +{ + u64 zero_start, zero_len, total_zero_len; + u32 p_cpos = 0, clusters_to_add; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + u32 size_div, offset_div; + int ret = 0; + + { + u64 o = offset; + u64 s = i_size_read(inode); + + offset_div = do_div(o, osb->s_clustersize); + size_div = do_div(s, osb->s_clustersize); + } + + if (offset <= i_size_read(inode)) + return 0; + + clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - + ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); + total_zero_len = offset - i_size_read(inode); + if (clusters_to_add) + total_zero_len -= offset_div; + + /* Allocate clusters to fill out holes, and this is only needed + * when we add more than one clusters. Otherwise the cluster will + * be allocated during direct IO */ + if (clusters_to_add > 1) { + ret = ocfs2_extend_allocation(inode, + OCFS2_I(inode)->ip_clusters, + clusters_to_add - 1, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + while (total_zero_len) { + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, + &ext_flags); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + + size_div; + zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - + size_div; + zero_len = min(total_zero_len, zero_len); + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + ret = blkdev_issue_zeroout(osb->sb->s_bdev, + zero_start >> 9, zero_len >> 9, + GFP_NOFS, false); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + total_zero_len -= zero_len; + v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); + + /* Only at first iteration can be cluster not aligned. + * So set size_div to 0 for the rest */ + size_div = 0; + } + +out: + return ret; +} + static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) @@ -677,8 +788,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, struct buffer_head *di_bh = NULL; size_t count = iter->count; journal_t *journal = osb->journal->j_journal; - u32 zero_len; - int cluster_align; + u64 zero_len_head, zero_len_tail; + int cluster_align_head, cluster_align_tail; loff_t final_size = offset + count; int append_write = offset >= i_size_read(inode) ? 1 : 0; unsigned int num_clusters = 0; @@ -686,9 +797,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, { u64 o = offset; + u64 s = i_size_read(inode); + + zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); + cluster_align_head = !zero_len_head; - zero_len = do_div(o, 1 << osb->s_clustersize_bits); - cluster_align = !zero_len; + zero_len_tail = osb->s_clustersize - + do_div(s, osb->s_clustersize); + if ((offset - i_size_read(inode)) < zero_len_tail) + zero_len_tail = offset - i_size_read(inode); + cluster_align_tail = !zero_len_tail; } /* @@ -712,10 +830,13 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, goto clean_orphan; } + /* zeroing out the previously allocated cluster tail + * that but not zeroed */ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - ret = ocfs2_zero_extend(inode, di_bh, offset); + ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, + zero_len_tail, cluster_align_tail); else - ret = ocfs2_extend_no_holes(inode, di_bh, offset, + ret = ocfs2_direct_IO_extend_no_holes(osb, inode, offset); if (ret < 0) { mlog_errno(ret); @@ -768,7 +889,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, mlog_errno(ret); } } else if (written > 0 && append_write && !is_overwrite && - !cluster_align) { + !cluster_align_head) { + /* zeroing out the allocated cluster head */ u32 p_cpos = 0; u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); @@ -790,7 +912,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, ret = blkdev_issue_zeroout(osb->sb->s_bdev, p_cpos << (osb->s_clustersize_bits - 9), - zero_len >> 9, GFP_NOFS, false); + zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); -- cgit v1.2.2 From d0ba25b905ba1246d04578cd59df83014e9b9152 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:16 -0700 Subject: ocfs2: fix typo in ocfs2_reserve_local_alloc_bits In ocfs2_reserve_local_alloc_bits, it calls ocfs2_error if local alloc inode bitmap used bits mismatch, but the log mistakes it as free bits. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/localalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 044013455621..096cff6f9ba8 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { ocfs2_error(osb->sb, "local alloc inode %llu says it has " - "%u free bits, but a count shows %u", + "%u used bits, but a count shows %u", (unsigned long long)le64_to_cpu(alloc->i_blkno), le32_to_cpu(alloc->id1.bitmap1.i_used), ocfs2_local_alloc_count_bits(alloc)); -- cgit v1.2.2 From e073fc58dfe6a4c9b614320c1d56bb71cb213ec4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Apr 2015 15:43:19 -0700 Subject: ocfs2: dereferencing freed pointers in ocfs2_reflink() The code at the "out" label assumes that "default_acl" and "acl" are NULL, but actually the pointers can be NULL, unitialized, or freed. Signed-off-by: Dan Carpenter Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/refcounttree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee541f92dab4..df3a500789c7 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) { mlog_errno(error); - goto out; + return error; } error = ocfs2_create_inode_in_orphan(dir, mode, -- cgit v1.2.2 From e38a573907383b3c57514fe2331322b1fc110eef Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:22 -0700 Subject: ocfs2: use actual name length when find entry in ocfs2_orphan_del() If the namelen is 20 and name only has actual length 16, it will fail in ocfs2_find_entry because of mismatch. So use actual name length when find entry. Signed-off-by: Joseph Qi Signed-off-by: Yiwen Jiang Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b5c3a5ea3ee6..49837404541e 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, trace_ocfs2_orphan_del( (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, - name, namelen); + name, strlen(name)); /* find it's spot in the orphan directory */ - status = ocfs2_find_entry(name, namelen, orphan_dir_inode, + status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, &lookup); if (status) { mlog_errno(status); -- cgit v1.2.2 From 62f8b1f0d609838361d65b5b2114859d464e6baa Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:24 -0700 Subject: ocfs2: use ENOENT instead of EEXIST when get system file fails When ocfs2_get_system_file_inode fails, it is obscure to set the return value to -EEXIST. So change it to -ENOENT. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/inode.c | 4 ++-- fs/ocfs2/namei.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3025c0da6b8a..be71ca0937f7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode, ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(di->i_suballoc_slot)); if (!inode_alloc_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto bail; } @@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode, ORPHAN_DIR_SYSTEM_INODE, orphaned_slot); if (!orphan_dir_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 49837404541e..09f90cbf0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, ORPHAN_DIR_SYSTEM_INODE, osb->slot_num); if (!orphan_dir_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto leave; } -- cgit v1.2.2 From a47726bcf299db6b5743d574df36c423263b4e65 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:27 -0700 Subject: ocfs2: rollback the cleared bits if error occurs after ocfs2_block_group_clear_bits ocfs2_block_group_clear_bits will clear bits in block group bitmap. Once it succeeds but fails in the following step, it will cause block group bitmap mismatch the corresponding count recorded in dinode. So rollback the cleared bits if error occurs. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 0cb889a17ae1..4479029630bb 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); + ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, + start_bit, count); goto bail; } -- cgit v1.2.2 From 7c01ad8fe7c159d7e1ddbd8e586d4d0dfed7ab3d Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Tue, 14 Apr 2015 15:43:30 -0700 Subject: ocfs2: remove goto statement in ocfs2_check_dir_for_entry() Signed-off-by: Daeseok Youn Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dir.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b08050bd3f2e..2241a19b9335 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir, const char *name, int namelen) { - int ret; + int ret = 0; struct ocfs2_dir_lookup_result lookup = { NULL, }; trace_ocfs2_check_dir_for_entry( (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); - ret = -EEXIST; - if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) - goto bail; + if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { + ret = -EEXIST; + mlog_errno(ret); + } - ret = 0; -bail: ocfs2_free_dir_lookup_result(&lookup); - if (ret) - mlog_errno(ret); return ret; } -- cgit v1.2.2 From 023d4ea358494ccfeb37abfe5b0fd01b45a6051c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 14 Apr 2015 15:43:33 -0700 Subject: ocfs2: fix possible uninitialized variable access In ocfs2_local_alloc_find_clear_bits and ocfs2_get_dentry, variable numfound and set may be uninitialized and then used in tracepoint. In ocfs2_xattr_block_get and ocfs2_delete_xattr_in_bucket, variable block_off and xv may be uninitialized and then used in the following logic due to unchecked return value. This patch fixes these possible issues. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/export.c | 2 +- fs/ocfs2/localalloc.c | 2 +- fs/ocfs2/xattr.c | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 29651167190d..540dc4bdd042 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, } status = ocfs2_test_inode_bit(osb, blkno, &set); - trace_ocfs2_get_dentry_test_bit(status, set); if (status < 0) { if (status == -EINVAL) { /* @@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, goto unlock_nfs_sync; } + trace_ocfs2_get_dentry_test_bit(status, set); /* If the inode allocator bit is clear, this inode must be stale */ if (!set) { status = -ESTALE; diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 096cff6f9ba8..857bbbcd39f3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, u32 *numbits, struct ocfs2_alloc_reservation *resv) { - int numfound, bitoff, left, startoff, lastzero; + int numfound = 0, bitoff, left, startoff, lastzero; int local_resv = 0; struct ocfs2_alloc_reservation r; void *bitmap = NULL; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 85b190dc132f..4ca7533be479 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode, i, &block_off, &name_offset); + if (ret) { + mlog_errno(ret); + goto cleanup; + } xs->base = bucket_block(xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { @@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, &xv, NULL); + if (ret) { + mlog_errno(ret); + break; + } ret = ocfs2_lock_xattr_remove_allocators(inode, xv, args->ref_ci, -- cgit v1.2.2 From 762515a8e9c7ead55539cf96a63dec2363b1df50 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 14 Apr 2015 15:43:41 -0700 Subject: ocfs2: fix a typo in the copyright statement Signed-off-by: Jakub Wilk Reviewed-by: Eric Ren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 2241a19b9335..ccd4dcfc3645 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -18,7 +18,7 @@ * * linux/fs/minix/dir.c * - * Copyright (C) 1991, 1992 Linux Torvalds + * Copyright (C) 1991, 1992 Linus Torvalds * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public -- cgit v1.2.2 From e2ac55b6a8e337fac7cc59c6f452caac92ab5ee6 Mon Sep 17 00:00:00 2001 From: Chengyu Song Date: Tue, 14 Apr 2015 15:43:44 -0700 Subject: ocfs2: incorrect check for debugfs returns debugfs_create_dir and debugfs_create_file may return -ENODEV when debugfs is not configured, so the return value should be checked against ERROR_VALUE as well, otherwise the later dereference of the dentry pointer would crash the kernel. This patch tries to solve this problem by fixing certain checks. However, I have that found other call sites are protected by #ifdef CONFIG_DEBUG_FS. In current implementation, if CONFIG_DEBUG_FS is defined, then the above two functions will never return any ERROR_VALUE. So another possibility to fix this is to surround all the buggy checks/functions with the same #ifdef CONFIG_DEBUG_FS. But I'm not sure if this would break any functionality, as only OCFS2_FS_STATS declares dependency on DEBUG_FS. Signed-off-by: Chengyu Song Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/heartbeat.c | 42 +++++++++++++++++++++++++++++++----------- fs/ocfs2/dlmglue.c | 2 +- fs/ocfs2/super.c | 9 +++++---- 3 files changed, 37 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..8e19b9d7aba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void) int ret = -ENOMEM; o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); - if (!o2hb_debug_dir) { + if (IS_ERR_OR_NULL(o2hb_debug_dir)) { + ret = o2hb_debug_dir ? + PTR_ERR(o2hb_debug_dir) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_live_node_bitmap), O2NM_MAX_NODES, o2hb_live_node_bitmap); - if (!o2hb_debug_livenodes) { + if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { + ret = o2hb_debug_livenodes ? + PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS, o2hb_live_region_bitmap); - if (!o2hb_debug_liveregions) { + if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { + ret = o2hb_debug_liveregions ? + PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS, o2hb_quorum_region_bitmap); - if (!o2hb_debug_quorumregions) { + if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { + ret = o2hb_debug_quorumregions ? + PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS, o2hb_failed_region_bitmap); - if (!o2hb_debug_failedregions) { + if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { + ret = o2hb_debug_failedregions ? + PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) reg->hr_debug_dir = debugfs_create_dir(config_item_name(®->hr_item), dir); - if (!reg->hr_debug_dir) { + if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { + ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) O2HB_DB_TYPE_REGION_LIVENODES, sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES, reg); - if (!reg->hr_debug_livenodes) { + if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { + ret = reg->hr_debug_livenodes ? + PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_regnum)), O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg); - if (!reg->hr_debug_regnum) { + if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { + ret = reg->hr_debug_regnum ? + PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_elapsed_time)), O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg); - if (!reg->hr_debug_elapsed_time) { + if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { + ret = reg->hr_debug_elapsed_time ? + PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_pinned)), O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg); - if (!reg->hr_debug_pinned) { + if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { + ret = reg->hr_debug_pinned ? + PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; mlog_errno(ret); goto bail; } - ret = 0; + return 0; bail: + debugfs_remove_recursive(reg->hr_debug_dir); return ret; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 11849a44dc5a..23adcbf374d3 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2954,7 +2954,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops); - if (!dlm_debug->d_locking_state) { + if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { ret = -EINVAL; mlog(ML_ERROR, "Unable to create locking state debugfs file.\n"); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26675185b886..fb43de586791 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); - if (!osb->osb_debug_root) { + if (IS_ERR_OR_NULL(osb->osb_debug_root)) { status = -EINVAL; mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); goto read_super_error; @@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (!osb->osb_ctxt) { + if (IS_ERR_OR_NULL(osb->osb_ctxt)) { status = -EINVAL; mlog_errno(status); goto read_super_error; @@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void) } ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); - if (!ocfs2_debugfs_root) { - status = -ENOMEM; + if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { + status = ocfs2_debugfs_root ? + PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); goto out4; } -- cgit v1.2.2 From 1543306e75ee40662b2c6d37c43c8659f3d6f880 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 14 Apr 2015 15:43:46 -0700 Subject: ocfs2: logging: remove static buffer, use vsprintf extension %pV Use the vsprintf %pV extension to avoid using a static buffer and remove the now unnecessary buffer. Signed-off-by: Joe Perches Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fb43de586791..c558bff99165 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2564,22 +2564,22 @@ static void ocfs2_handle_error(struct super_block *sb) ocfs2_set_ro_flag(osb, 0); } -static char error_buf[1024]; - -void __ocfs2_error(struct super_block *sb, - const char *function, - const char *fmt, ...) +void __ocfs2_error(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); - va_end(args); + vaf.fmt = fmt; + vaf.va = &args; /* Not using mlog here because we want to show the actual * function the error came from. */ - printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", - sb->s_id, function, error_buf); + printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + + va_end(args); ocfs2_handle_error(sb); } @@ -2587,18 +2587,21 @@ void __ocfs2_error(struct super_block *sb, /* Handle critical errors. This is intentionally more drastic than * ocfs2_handle_error, so we only use for things like journal errors, * etc. */ -void __ocfs2_abort(struct super_block* sb, - const char *function, +void __ocfs2_abort(struct super_block *sb, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); - va_end(args); - printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", - sb->s_id, function, error_buf); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + + va_end(args); /* We don't have the cluster support yet to go straight to * hard readonly in here. Until then, we want to keep -- cgit v1.2.2 From 2f2eca20a09dac0e9d62bf57ce6a0c6ef6cf91e6 Mon Sep 17 00:00:00 2001 From: alex chen Date: Tue, 14 Apr 2015 15:43:49 -0700 Subject: ocfs2: check if the ocfs2 lock resource has been initialized before calling ocfs2_dlm_lock If ocfs2 lockres has not been initialized before calling ocfs2_dlm_lock, the lock won't be dropped and then will lead umount hung. The case is described below: ocfs2_mknod ocfs2_mknod_locked __ocfs2_mknod_locked ocfs2_journal_access_di Failed because of -ENOMEM or other reasons, the inode lockres has not been initialized yet. iput(inode) ocfs2_evict_inode ocfs2_delete_inode ocfs2_inode_lock ocfs2_inode_lock_full_nested __ocfs2_cluster_lock Succeeds and allocates a new dlm lockres. ocfs2_clear_inode ocfs2_open_unlock ocfs2_drop_inode_locks ocfs2_drop_lock Since lockres has not been initialized, the lock can't be dropped and the lockres can't be migrated, thus umount will hang forever. Signed-off-by: Alex Chen Reviewed-by: Joseph Qi Reviewed-by: joyce.xue Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlmglue.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 23adcbf374d3..956edf67be20 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, int noqueue_attempted = 0; int dlm_locked = 0; + if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { + mlog_errno(-EINVAL); + return -EINVAL; + } + ocfs2_init_mask_waiter(&mw); if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) -- cgit v1.2.2 From 1d5b897706d11ac59fecd85ba7b1cbf91c44fe50 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 14 Apr 2015 15:43:52 -0700 Subject: ocfs2: make mlog_errno return the errno ocfs2 does mlog_errno(v); return v; in many places. Change mlog_errno() so we can do return mlog_errno(v); For some weird reason this patch reduces the size of ocfs2 by 6k: akpm3:/usr/src/25> size fs/ocfs2/ocfs2.ko text data bss dec hex filename 1146613 82767 832192 2061572 1f7504 fs/ocfs2/ocfs2.ko-before 1140857 82767 832192 2055816 1f5e88 fs/ocfs2/ocfs2.ko-after [dan.carpenter@oracle.com: double evaluation concerns in mlog_errno()] Cc: Mark Fasheh Cc: Joel Becker Cc: alex chen Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/masklog.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9e6508..7fdc25a4d8c0 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; } \ } while (0) -#define mlog_errno(st) do { \ +#define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ _st != -EDQUOT) \ mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ -} while (0) + _st; \ +}) #define mlog_bug_on_msg(cond, fmt, args...) do { \ if (cond) { \ -- cgit v1.2.2 From b9ea25152e56365ce149b9a39637cd7a16eec556 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Apr 2015 15:45:27 -0700 Subject: page_writeback: clean up mess around cancel_dirty_page() This patch replaces cancel_dirty_page() with a helper function account_page_cleaned() which only updates counters. It's called from truncate_complete_page() and from try_to_free_buffers() (hack for ext3). Page is locked in both cases, page-lock protects against concurrent dirtiers: see commit 2d6d7f982846 ("mm: protect set_page_dirty() from ongoing truncation"). Delete_from_page_cache() shouldn't be called for dirty pages, they must be handled by caller (either written or truncated). This patch treats final dirty accounting fixup at the end of __delete_from_page_cache() as a debug check and adds WARN_ON_ONCE() around it. If something removes dirty pages without proper handling that might be a bug and unwritten data might be lost. Hugetlbfs has no dirty pages accounting, ClearPageDirty() is enough here. cancel_dirty_page() in nfs_wb_page_cancel() is redundant. This is helper for nfs_invalidate_page() and it's called only in case complete invalidation. The mess was started in v2.6.20 after commits 46d2277c796f ("Clean up and make try_to_free_buffers() not race with dirty pages") and 3e67c0987d75 ("truncate: clear page dirtiness before running try_to_free_buffers()") first was reverted right in v2.6.20 in commit ecdfc9787fe5 ("Resurrect 'try_to_free_buffers()' VM hackery"), second in v2.6.25 commit a2b345642f53 ("Fix dirty page accounting leak with ext3 data=journal"). Custom fixes were introduced between these points. NFS in v2.6.23, commit 1b3b4a1a2deb ("NFS: Fix a write request leak in nfs_invalidate_page()"). Kludge in __delete_from_page_cache() in v2.6.24, commit 3a6927906f1b ("Do dirty page accounting when removing a page from the page cache"). Since v2.6.25 all of them are redundant. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Konstantin Khlebnikov Cc: Tejun Heo Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 4 ++-- fs/hugetlbfs/inode.c | 2 +- fs/nfs/write.c | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) * to synchronise against __set_page_dirty_buffers and prevent the * dirty bit from being lost. */ - if (ret) - cancel_dirty_page(page, PAGE_CACHE_SIZE); + if (ret && TestClearPageDirty(page)) + account_page_cleaned(page, mapping); spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..db76cec3ce21 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, static void truncate_huge_page(struct page *page) { - cancel_dirty_page(page, /* No IO accounting for huge pages? */0); + ClearPageDirty(page); ClearPageUptodate(page); delete_from_page_cache(page); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) * request from the inode / page_private pointer and * release it */ nfs_inode_remove_request(req); - /* - * In case nfs_inode_remove_request has marked the - * page as being dirty - */ - cancel_dirty_page(page, PAGE_CACHE_SIZE); nfs_unlock_and_release_request(req); } -- cgit v1.2.2 From 58be19dcf7c0c206e60796c2ee18fc4fc1659fea Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 14 Apr 2015 15:46:39 -0700 Subject: ocfs2: copy fs uuid to superblock Currently, maximal number of cleancache enabled filesystems equals 32, which is insufficient nowadays, because a Linux host can have hundreds of containers on board, each of which might want its own filesystem. This patch set targets at removing this limitation - see patch 4 for more details. Patches 1-3 prepare the code for this change. This patch (of 4): This will allow us to remove the uuid argument from cleancache_init_shared_fs. Signed-off-by: Vladimir Davydov Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Mark Fasheh Cc: Joel Becker Cc: Stefan Hengelein Cc: Florian Schmaus Cc: Andor Daam Cc: Dan Magenheimer Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c558bff99165..a811a95cfd5f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2070,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb, cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); + memcpy(sb->s_uuid, di->id2.i_super.s_uuid, + sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; -- cgit v1.2.2 From 9de1626290eaa7d921413ddc83544bc3bae27283 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 14 Apr 2015 15:46:42 -0700 Subject: cleancache: zap uuid arg of cleancache_init_shared_fs Use super_block->s_uuid instead. Every shared filesystem using cleancache must now initialize super_block->s_uuid before calling cleancache_init_shared_fs. The only one on the tree, ocfs2, already meets this requirement. Signed-off-by: Vladimir Davydov Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Mark Fasheh Cc: Joel Becker Cc: Stefan Hengelein Cc: Florian Schmaus Cc: Andor Daam Cc: Dan Magenheimer Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a811a95cfd5f..837ddce4b659 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2336,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto bail; } - cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); + cleancache_init_shared_fs(sb); bail: return status; -- cgit v1.2.2 From 3cb29d11174f29b76addcba4374884b14f8ea4b1 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 14 Apr 2015 15:46:48 -0700 Subject: cleancache: remove limit on the number of cleancache enabled filesystems The limit equals 32 and is imposed by the number of entries in the fs_poolid_map and shared_fs_poolid_map. Nowadays it is insufficient, because with containers on board a Linux host can have hundreds of active fs mounts. These maps were introduced by commit 49a9ab815acb8 ("mm: cleancache: lazy initialization to allow tmem backends to build/run as modules") in order to allow compiling cleancache drivers as modules. Real pool ids are stored in these maps while super_block->cleancache_poolid points to an entry in the map, so that on cleancache registration we can walk over all (if there are <= 32 of them, of course) cleancache-enabled super blocks and assign real pool ids. Actually, there is absolutely no need in these maps, because we can iterate over all super blocks immediately using iterate_supers. This is not racy, because cleancache_init_ops is called from mount_fs with super_block->s_umount held for writing, while iterate_supers takes this semaphore for reading, so if we call iterate_supers after setting cleancache_ops, all super blocks that had been created before cleancache_register_ops was called will be assigned pool ids by the action function of iterate_supers while all newer super blocks will receive it in cleancache_init_fs. This patch therefore removes the maps and hence the artificial limit on the number of cleancache enabled filesystems. Signed-off-by: Vladimir Davydov Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Mark Fasheh Cc: Joel Becker Cc: Stefan Hengelein Cc: Florian Schmaus Cc: Andor Daam Cc: Dan Magenheimer Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 2b7dc90ccdbb..928c20f47af9 100644 --- a/fs/super.c +++ b/fs/super.c @@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; + s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; -- cgit v1.2.2 From a87938b2e246b81b4fb713edb371a9fa3c5c3c86 Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Tue, 14 Apr 2015 15:47:38 -0700 Subject: fs/binfmt_elf.c: fix bug in loading of PIE binaries With CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE enabled, and a normal top-down address allocation strategy, load_elf_binary() will attempt to map a PIE binary into an address range immediately below mm->mmap_base. Unfortunately, load_elf_ binary() does not take account of the need to allocate sufficient space for the entire binary which means that, while the first PT_LOAD segment is mapped below mm->mmap_base, the subsequent PT_LOAD segment(s) end up being mapped above mm->mmap_base into the are that is supposed to be the "gap" between the stack and the binary. Since the size of the "gap" on x86_64 is only guaranteed to be 128MB this means that binaries with large data segments > 128MB can end up mapping part of their data segment over their stack resulting in corruption of the stack (and the data segment once the binary starts to run). Any PIE binary with a data segment > 128MB is vulnerable to this although address randomization means that the actual gap between the stack and the end of the binary is normally greater than 128MB. The larger the data segment of the binary the higher the probability of failure. Fix this by calculating the total size of the binary in the same way as load_elf_interp(). Signed-off-by: Michael Davidson Cc: Alexander Viro Cc: Jiri Kosina Cc: Kees Cook Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 995986b8e36b..d925f55e4857 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -862,6 +862,7 @@ static int load_elf_binary(struct linux_binprm *bprm) i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; unsigned long k, vaddr; + unsigned long total_size = 0; if (elf_ppnt->p_type != PT_LOAD) continue; @@ -924,10 +925,16 @@ static int load_elf_binary(struct linux_binprm *bprm) #else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); #endif + total_size = total_mapping_size(elf_phdata, + loc->elf_ex.e_phnum); + if (!total_size) { + error = -EINVAL; + goto out_free_dentry; + } } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, 0); + elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; -- cgit v1.2.2 From d1fd836dcf00d2028c700c7e44d2c23404062c90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:07 -0700 Subject: mm: split ET_DYN ASLR from mmap ASLR This fixes the "offset2lib" weakness in ASLR for arm, arm64, mips, powerpc, and x86. The problem is that if there is a leak of ASLR from the executable (ET_DYN), it means a leak of shared library offset as well (mmap), and vice versa. Further details and a PoC of this attack is available here: http://cybersecurity.upv.es/attacks/offset2lib/offset2lib.html With this patch, a PIE linked executable (ET_DYN) has its own ASLR region: $ ./show_mmaps_pie 54859ccd6000-54859ccd7000 r-xp ... /tmp/show_mmaps_pie 54859ced6000-54859ced7000 r--p ... /tmp/show_mmaps_pie 54859ced7000-54859ced8000 rw-p ... /tmp/show_mmaps_pie 7f75be764000-7f75be91f000 r-xp ... /lib/x86_64-linux-gnu/libc.so.6 7f75be91f000-7f75beb1f000 ---p ... /lib/x86_64-linux-gnu/libc.so.6 7f75beb1f000-7f75beb23000 r--p ... /lib/x86_64-linux-gnu/libc.so.6 7f75beb23000-7f75beb25000 rw-p ... /lib/x86_64-linux-gnu/libc.so.6 7f75beb25000-7f75beb2a000 rw-p ... 7f75beb2a000-7f75beb4d000 r-xp ... /lib64/ld-linux-x86-64.so.2 7f75bed45000-7f75bed46000 rw-p ... 7f75bed46000-7f75bed47000 r-xp ... 7f75bed47000-7f75bed4c000 rw-p ... 7f75bed4c000-7f75bed4d000 r--p ... /lib64/ld-linux-x86-64.so.2 7f75bed4d000-7f75bed4e000 rw-p ... /lib64/ld-linux-x86-64.so.2 7f75bed4e000-7f75bed4f000 rw-p ... 7fffb3741000-7fffb3762000 rw-p ... [stack] 7fffb377b000-7fffb377d000 r--p ... [vvar] 7fffb377d000-7fffb377f000 r-xp ... [vdso] The change is to add a call the newly created arch_mmap_rnd() into the ELF loader for handling ET_DYN ASLR in a separate region from mmap ASLR, as was already done on s390. Removes CONFIG_BINFMT_ELF_RANDOMIZE_PIE, which is no longer needed. Signed-off-by: Kees Cook Reported-by: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig.binfmt | 3 --- fs/binfmt_elf.c | 18 ++++-------------- 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF bool depends on COMPAT && BINFMT_ELF -config ARCH_BINFMT_ELF_RANDOMIZE_PIE - bool - config ARCH_BINFMT_ELF_STATE bool diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d925f55e4857..b20c05477e90 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -910,21 +911,10 @@ static int load_elf_binary(struct linux_binprm *bprm) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ -#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE - /* Memory randomization might have been switched off - * in runtime via sysctl or explicit setting of - * personality flags. - * If that is the case, retain the original non-zero - * load_bias value in order to establish proper - * non-randomized mappings. - */ + load_bias = ELF_ET_DYN_BASE - vaddr; if (current->flags & PF_RANDOMIZE) - load_bias = 0; - else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#endif + load_bias += arch_mmap_rnd(); + load_bias = ELF_PAGESTART(load_bias); total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { -- cgit v1.2.2 From 204db6ed17743000691d930368a5abd6ea541c58 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:12 -0700 Subject: mm: fold arch_randomize_brk into ARCH_HAS_ELF_RANDOMIZE The arch_randomize_brk() function is used on several architectures, even those that don't support ET_DYN ASLR. To avoid bulky extern/#define tricks, consolidate the support under CONFIG_ARCH_HAS_ELF_RANDOMIZE for the architectures that support it, while still handling CONFIG_COMPAT_BRK. Signed-off-by: Kees Cook Cc: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b20c05477e90..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1050,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) current->mm->end_data = end_data; current->mm->start_stack = bprm->p; -#ifdef arch_randomize_brk if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); -#ifdef CONFIG_COMPAT_BRK +#ifdef compat_brk_randomized current->brk_randomized = 1; #endif } -#endif if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, -- cgit v1.2.2 From 11d83360452ea2a95e699da01f8e1bcc4676a5de Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:48:21 -0700 Subject: mm, mempool: do not allow atomic resizing Allocating a large number of elements in atomic context could quickly deplete memory reserves, so just disallow atomic resizing entirely. Nothing currently uses mempool_resize() with anything other than GFP_KERNEL, so convert existing callers to drop the gfp_mask. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: David Rientjes Acked-by: Steffen Maier [zfcp] Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Steve French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/connect.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 480cf9c81d50..f3bfe08e177b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) length = atomic_dec_return(&tcpSesAllocCount); if (length > 0) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); } static int @@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p) length = atomic_inc_return(&tcpSesAllocCount); if (length > 1) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); while (server->tcpStatus != CifsExiting) { -- cgit v1.2.2