aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-11-03 13:21:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-11-03 13:21:43 -0400
commitcddfa11aef3c4914f406a059138ccc354f034d1c (patch)
tree388031c740f221576e7bb3f1ad35b0b158280525
parent5f21585384a4a69b8bfdd2cae7e3648ae805f57d (diff)
parentdd33ad7b251f900481701b2a82d25de583867708 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - more ocfs2 work - various leftovers * emailed patches from Andrew Morton <akpm@linux-foundation.org>: memory_hotplug: cond_resched in __remove_pages bfs: add sanity check at bfs_fill_super() kernel/sysctl.c: remove duplicated include kernel/kexec_file.c: remove some duplicated includes mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask ocfs2: fix clusters leak in ocfs2_defrag_extent() ocfs2: dlmglue: clean up timestamp handling ocfs2: don't put and assigning null to bh allocated outside ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry ocfs2: don't use iocb when EIOCBQUEUED returns ocfs2: without quota support, avoid calling quota recovery ocfs2: remove ocfs2_is_o2cb_active() mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings include/linux/notifier.h: SRCU: fix ctags mm: handle no memcg case in memcg_kmem_charge() properly
-rw-r--r--fs/bfs/inode.c9
-rw-r--r--fs/ocfs2/buffer_head_io.c77
-rw-r--r--fs/ocfs2/dir.c3
-rw-r--r--fs/ocfs2/dlmglue.c28
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/journal.c51
-rw-r--r--fs/ocfs2/move_extents.c17
-rw-r--r--fs/ocfs2/stackglue.c6
-rw-r--r--fs/ocfs2/stackglue.h3
-rw-r--r--include/linux/gfp.h12
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/notifier.h3
-rw-r--r--kernel/kexec_file.c2
-rw-r--r--kernel/sysctl.c1
-rw-r--r--mm/huge_memory.c38
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mempolicy.c35
-rw-r--r--mm/shmem.c2
19 files changed, 172 insertions, 124 deletions
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 9a69392f1fb3..d81c148682e7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
350 350
351 s->s_magic = BFS_MAGIC; 351 s->s_magic = BFS_MAGIC;
352 352
353 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { 353 if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
354 le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
354 printf("Superblock is corrupted\n"); 355 printf("Superblock is corrupted\n");
355 goto out1; 356 goto out1;
356 } 357 }
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
359 sizeof(struct bfs_inode) 360 sizeof(struct bfs_inode)
360 + BFS_ROOT_INO - 1; 361 + BFS_ROOT_INO - 1;
361 imap_len = (info->si_lasti / 8) + 1; 362 imap_len = (info->si_lasti / 8) + 1;
362 info->si_imap = kzalloc(imap_len, GFP_KERNEL); 363 info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
363 if (!info->si_imap) 364 if (!info->si_imap) {
365 printf("Cannot allocate %u bytes\n", imap_len);
364 goto out1; 366 goto out1;
367 }
365 for (i = 0; i < BFS_ROOT_INO; i++) 368 for (i = 0; i < BFS_ROOT_INO; i++)
366 set_bit(i, info->si_imap); 369 set_bit(i, info->si_imap);
367 370
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 1d098c3c00e0..4ebbd57cbf84 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -99,25 +99,34 @@ out:
99 return ret; 99 return ret;
100} 100}
101 101
102/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
103 * will be easier to handle read failure.
104 */
102int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, 105int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
103 unsigned int nr, struct buffer_head *bhs[]) 106 unsigned int nr, struct buffer_head *bhs[])
104{ 107{
105 int status = 0; 108 int status = 0;
106 unsigned int i; 109 unsigned int i;
107 struct buffer_head *bh; 110 struct buffer_head *bh;
111 int new_bh = 0;
108 112
109 trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); 113 trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
110 114
111 if (!nr) 115 if (!nr)
112 goto bail; 116 goto bail;
113 117
118 /* Don't put buffer head and re-assign it to NULL if it is allocated
119 * outside since the caller can't be aware of this alternation!
120 */
121 new_bh = (bhs[0] == NULL);
122
114 for (i = 0 ; i < nr ; i++) { 123 for (i = 0 ; i < nr ; i++) {
115 if (bhs[i] == NULL) { 124 if (bhs[i] == NULL) {
116 bhs[i] = sb_getblk(osb->sb, block++); 125 bhs[i] = sb_getblk(osb->sb, block++);
117 if (bhs[i] == NULL) { 126 if (bhs[i] == NULL) {
118 status = -ENOMEM; 127 status = -ENOMEM;
119 mlog_errno(status); 128 mlog_errno(status);
120 goto bail; 129 break;
121 } 130 }
122 } 131 }
123 bh = bhs[i]; 132 bh = bhs[i];
@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
158 submit_bh(REQ_OP_READ, 0, bh); 167 submit_bh(REQ_OP_READ, 0, bh);
159 } 168 }
160 169
170read_failure:
161 for (i = nr; i > 0; i--) { 171 for (i = nr; i > 0; i--) {
162 bh = bhs[i - 1]; 172 bh = bhs[i - 1];
163 173
174 if (unlikely(status)) {
175 if (new_bh && bh) {
176 /* If middle bh fails, let previous bh
177 * finish its read and then put it to
178 * aovoid bh leak
179 */
180 if (!buffer_jbd(bh))
181 wait_on_buffer(bh);
182 put_bh(bh);
183 bhs[i - 1] = NULL;
184 } else if (bh && buffer_uptodate(bh)) {
185 clear_buffer_uptodate(bh);
186 }
187 continue;
188 }
189
164 /* No need to wait on the buffer if it's managed by JBD. */ 190 /* No need to wait on the buffer if it's managed by JBD. */
165 if (!buffer_jbd(bh)) 191 if (!buffer_jbd(bh))
166 wait_on_buffer(bh); 192 wait_on_buffer(bh);
@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
170 * so we can safely record this and loop back 196 * so we can safely record this and loop back
171 * to cleanup the other buffers. */ 197 * to cleanup the other buffers. */
172 status = -EIO; 198 status = -EIO;
173 put_bh(bh); 199 goto read_failure;
174 bhs[i - 1] = NULL;
175 } 200 }
176 } 201 }
177 202
@@ -179,6 +204,9 @@ bail:
179 return status; 204 return status;
180} 205}
181 206
207/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
208 * will be easier to handle read failure.
209 */
182int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, 210int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
183 struct buffer_head *bhs[], int flags, 211 struct buffer_head *bhs[], int flags,
184 int (*validate)(struct super_block *sb, 212 int (*validate)(struct super_block *sb,
@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
188 int i, ignore_cache = 0; 216 int i, ignore_cache = 0;
189 struct buffer_head *bh; 217 struct buffer_head *bh;
190 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 218 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
219 int new_bh = 0;
191 220
192 trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); 221 trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
193 222
@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
213 goto bail; 242 goto bail;
214 } 243 }
215 244
245 /* Don't put buffer head and re-assign it to NULL if it is allocated
246 * outside since the caller can't be aware of this alternation!
247 */
248 new_bh = (bhs[0] == NULL);
249
216 ocfs2_metadata_cache_io_lock(ci); 250 ocfs2_metadata_cache_io_lock(ci);
217 for (i = 0 ; i < nr ; i++) { 251 for (i = 0 ; i < nr ; i++) {
218 if (bhs[i] == NULL) { 252 if (bhs[i] == NULL) {
@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
221 ocfs2_metadata_cache_io_unlock(ci); 255 ocfs2_metadata_cache_io_unlock(ci);
222 status = -ENOMEM; 256 status = -ENOMEM;
223 mlog_errno(status); 257 mlog_errno(status);
224 goto bail; 258 /* Don't forget to put previous bh! */
259 break;
225 } 260 }
226 } 261 }
227 bh = bhs[i]; 262 bh = bhs[i];
@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
316 } 351 }
317 } 352 }
318 353
319 status = 0; 354read_failure:
320
321 for (i = (nr - 1); i >= 0; i--) { 355 for (i = (nr - 1); i >= 0; i--) {
322 bh = bhs[i]; 356 bh = bhs[i];
323 357
324 if (!(flags & OCFS2_BH_READAHEAD)) { 358 if (!(flags & OCFS2_BH_READAHEAD)) {
325 if (status) { 359 if (unlikely(status)) {
326 /* Clear the rest of the buffers on error */ 360 /* Clear the buffers on error including those
327 put_bh(bh); 361 * ever succeeded in reading
328 bhs[i] = NULL; 362 */
363 if (new_bh && bh) {
364 /* If middle bh fails, let previous bh
365 * finish its read and then put it to
366 * aovoid bh leak
367 */
368 if (!buffer_jbd(bh))
369 wait_on_buffer(bh);
370 put_bh(bh);
371 bhs[i] = NULL;
372 } else if (bh && buffer_uptodate(bh)) {
373 clear_buffer_uptodate(bh);
374 }
329 continue; 375 continue;
330 } 376 }
331 /* We know this can't have changed as we hold the 377 /* We know this can't have changed as we hold the
@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
343 * uptodate. */ 389 * uptodate. */
344 status = -EIO; 390 status = -EIO;
345 clear_buffer_needs_validate(bh); 391 clear_buffer_needs_validate(bh);
346 put_bh(bh); 392 goto read_failure;
347 bhs[i] = NULL;
348 continue;
349 } 393 }
350 394
351 if (buffer_needs_validate(bh)) { 395 if (buffer_needs_validate(bh)) {
@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
355 BUG_ON(buffer_jbd(bh)); 399 BUG_ON(buffer_jbd(bh));
356 clear_buffer_needs_validate(bh); 400 clear_buffer_needs_validate(bh);
357 status = validate(sb, bh); 401 status = validate(sb, bh);
358 if (status) { 402 if (status)
359 put_bh(bh); 403 goto read_failure;
360 bhs[i] = NULL;
361 continue;
362 }
363 } 404 }
364 } 405 }
365 406
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b048d4fa3959..c121abbdfc7d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
1897 /* On error, skip the f_pos to the 1897 /* On error, skip the f_pos to the
1898 next block. */ 1898 next block. */
1899 ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; 1899 ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
1900 brelse(bh); 1900 break;
1901 continue;
1902 } 1901 }
1903 if (le64_to_cpu(de->inode)) { 1902 if (le64_to_cpu(de->inode)) {
1904 unsigned char d_type = DT_UNKNOWN; 1903 unsigned char d_type = DT_UNKNOWN;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 933aac5da193..7c835824247e 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2123 2123
2124/* LVB only has room for 64 bits of time here so we pack it for 2124/* LVB only has room for 64 bits of time here so we pack it for
2125 * now. */ 2125 * now. */
2126static u64 ocfs2_pack_timespec(struct timespec *spec) 2126static u64 ocfs2_pack_timespec(struct timespec64 *spec)
2127{ 2127{
2128 u64 res; 2128 u64 res;
2129 u64 sec = spec->tv_sec; 2129 u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
2130 u32 nsec = spec->tv_nsec; 2130 u32 nsec = spec->tv_nsec;
2131 2131
2132 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 2132 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2142 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2142 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2143 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2143 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2144 struct ocfs2_meta_lvb *lvb; 2144 struct ocfs2_meta_lvb *lvb;
2145 struct timespec ts;
2146 2145
2147 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2146 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2148 2147
@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2163 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); 2162 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
2164 lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2163 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
2165 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2164 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
2166 ts = timespec64_to_timespec(inode->i_atime);
2167 lvb->lvb_iatime_packed = 2165 lvb->lvb_iatime_packed =
2168 cpu_to_be64(ocfs2_pack_timespec(&ts)); 2166 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2169 ts = timespec64_to_timespec(inode->i_ctime);
2170 lvb->lvb_ictime_packed = 2167 lvb->lvb_ictime_packed =
2171 cpu_to_be64(ocfs2_pack_timespec(&ts)); 2168 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2172 ts = timespec64_to_timespec(inode->i_mtime);
2173 lvb->lvb_imtime_packed = 2169 lvb->lvb_imtime_packed =
2174 cpu_to_be64(ocfs2_pack_timespec(&ts)); 2170 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2175 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 2171 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
2176 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2172 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2177 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2173 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
@@ -2180,7 +2176,7 @@ out:
2180 mlog_meta_lvb(0, lockres); 2176 mlog_meta_lvb(0, lockres);
2181} 2177}
2182 2178
2183static void ocfs2_unpack_timespec(struct timespec *spec, 2179static void ocfs2_unpack_timespec(struct timespec64 *spec,
2184 u64 packed_time) 2180 u64 packed_time)
2185{ 2181{
2186 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2182 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
2189 2185
2190static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2186static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2191{ 2187{
2192 struct timespec ts;
2193 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2188 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2194 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2189 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2195 struct ocfs2_meta_lvb *lvb; 2190 struct ocfs2_meta_lvb *lvb;
@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2217 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); 2212 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2218 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2213 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2219 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2214 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2220 ocfs2_unpack_timespec(&ts, 2215 ocfs2_unpack_timespec(&inode->i_atime,
2221 be64_to_cpu(lvb->lvb_iatime_packed)); 2216 be64_to_cpu(lvb->lvb_iatime_packed));
2222 inode->i_atime = timespec_to_timespec64(ts); 2217 ocfs2_unpack_timespec(&inode->i_mtime,
2223 ocfs2_unpack_timespec(&ts,
2224 be64_to_cpu(lvb->lvb_imtime_packed)); 2218 be64_to_cpu(lvb->lvb_imtime_packed));
2225 inode->i_mtime = timespec_to_timespec64(ts); 2219 ocfs2_unpack_timespec(&inode->i_ctime,
2226 ocfs2_unpack_timespec(&ts,
2227 be64_to_cpu(lvb->lvb_ictime_packed)); 2220 be64_to_cpu(lvb->lvb_ictime_packed));
2228 inode->i_ctime = timespec_to_timespec64(ts);
2229 spin_unlock(&oi->ip_lock); 2221 spin_unlock(&oi->ip_lock);
2230} 2222}
2231 2223
@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3603 * we can recover correctly from node failure. Otherwise, we may get 3595 * we can recover correctly from node failure. Otherwise, we may get
3604 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. 3596 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
3605 */ 3597 */
3606 if (!ocfs2_is_o2cb_active() && 3598 if (ocfs2_userspace_stack(osb) &&
3607 lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3599 lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3608 lvb = 1; 3600 lvb = 1;
3609 3601
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index fe570824b991..d640c5f8a85d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2343 2343
2344 written = __generic_file_write_iter(iocb, from); 2344 written = __generic_file_write_iter(iocb, from);
2345 /* buffered aio wouldn't have proper lock coverage today */ 2345 /* buffered aio wouldn't have proper lock coverage today */
2346 BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); 2346 BUG_ON(written == -EIOCBQUEUED && !direct_io);
2347 2347
2348 /* 2348 /*
2349 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2349 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2463 trace_generic_file_read_iter_ret(ret); 2463 trace_generic_file_read_iter_ret(ret);
2464 2464
2465 /* buffered aio wouldn't have proper lock coverage today */ 2465 /* buffered aio wouldn't have proper lock coverage today */
2466 BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); 2466 BUG_ON(ret == -EIOCBQUEUED && !direct_io);
2467 2467
2468 /* see ocfs2_file_write_iter */ 2468 /* see ocfs2_file_write_iter */
2469 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 2469 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bd3475694e83..b63c97f4318e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg)
1378 int rm_quota_used = 0, i; 1378 int rm_quota_used = 0, i;
1379 struct ocfs2_quota_recovery *qrec; 1379 struct ocfs2_quota_recovery *qrec;
1380 1380
1381 /* Whether the quota supported. */
1382 int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1383 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
1384 || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1385 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
1386
1381 status = ocfs2_wait_on_mount(osb); 1387 status = ocfs2_wait_on_mount(osb);
1382 if (status < 0) { 1388 if (status < 0) {
1383 goto bail; 1389 goto bail;
1384 } 1390 }
1385 1391
1386 rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); 1392 if (quota_enabled) {
1387 if (!rm_quota) { 1393 rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
1388 status = -ENOMEM; 1394 if (!rm_quota) {
1389 goto bail; 1395 status = -ENOMEM;
1396 goto bail;
1397 }
1390 } 1398 }
1391restart: 1399restart:
1392 status = ocfs2_super_lock(osb, 1); 1400 status = ocfs2_super_lock(osb, 1);
@@ -1422,9 +1430,14 @@ restart:
1422 * then quota usage would be out of sync until some node takes 1430 * then quota usage would be out of sync until some node takes
1423 * the slot. So we remember which nodes need quota recovery 1431 * the slot. So we remember which nodes need quota recovery
1424 * and when everything else is done, we recover quotas. */ 1432 * and when everything else is done, we recover quotas. */
1425 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); 1433 if (quota_enabled) {
1426 if (i == rm_quota_used) 1434 for (i = 0; i < rm_quota_used
1427 rm_quota[rm_quota_used++] = slot_num; 1435 && rm_quota[i] != slot_num; i++)
1436 ;
1437
1438 if (i == rm_quota_used)
1439 rm_quota[rm_quota_used++] = slot_num;
1440 }
1428 1441
1429 status = ocfs2_recover_node(osb, node_num, slot_num); 1442 status = ocfs2_recover_node(osb, node_num, slot_num);
1430skip_recovery: 1443skip_recovery:
@@ -1452,16 +1465,19 @@ skip_recovery:
1452 /* Now it is right time to recover quotas... We have to do this under 1465 /* Now it is right time to recover quotas... We have to do this under
1453 * superblock lock so that no one can start using the slot (and crash) 1466 * superblock lock so that no one can start using the slot (and crash)
1454 * before we recover it */ 1467 * before we recover it */
1455 for (i = 0; i < rm_quota_used; i++) { 1468 if (quota_enabled) {
1456 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); 1469 for (i = 0; i < rm_quota_used; i++) {
1457 if (IS_ERR(qrec)) { 1470 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1458 status = PTR_ERR(qrec); 1471 if (IS_ERR(qrec)) {
1459 mlog_errno(status); 1472 status = PTR_ERR(qrec);
1460 continue; 1473 mlog_errno(status);
1474 continue;
1475 }
1476 ocfs2_queue_recovery_completion(osb->journal,
1477 rm_quota[i],
1478 NULL, NULL, qrec,
1479 ORPHAN_NEED_TRUNCATE);
1461 } 1480 }
1462 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1463 NULL, NULL, qrec,
1464 ORPHAN_NEED_TRUNCATE);
1465 } 1481 }
1466 1482
1467 ocfs2_super_unlock(osb, 1); 1483 ocfs2_super_unlock(osb, 1);
@@ -1483,7 +1499,8 @@ bail:
1483 1499
1484 mutex_unlock(&osb->recovery_lock); 1500 mutex_unlock(&osb->recovery_lock);
1485 1501
1486 kfree(rm_quota); 1502 if (quota_enabled)
1503 kfree(rm_quota);
1487 1504
1488 /* no one is callint kthread_stop() for us so the kthread() api 1505 /* no one is callint kthread_stop() for us so the kthread() api
1489 * requires that we call do_exit(). And it isn't exported, but 1506 * requires that we call do_exit(). And it isn't exported, but
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 7eb3b0a6347e..3f1685d7d43b 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -25,6 +25,7 @@
25#include "ocfs2_ioctl.h" 25#include "ocfs2_ioctl.h"
26 26
27#include "alloc.h" 27#include "alloc.h"
28#include "localalloc.h"
28#include "aops.h" 29#include "aops.h"
29#include "dlmglue.h" 30#include "dlmglue.h"
30#include "extent_map.h" 31#include "extent_map.h"
@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
233 struct ocfs2_refcount_tree *ref_tree = NULL; 234 struct ocfs2_refcount_tree *ref_tree = NULL;
234 u32 new_phys_cpos, new_len; 235 u32 new_phys_cpos, new_len;
235 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 236 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
237 int need_free = 0;
236 238
237 if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { 239 if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
238 BUG_ON(!ocfs2_is_refcount_inode(inode)); 240 BUG_ON(!ocfs2_is_refcount_inode(inode));
@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
308 if (!partial) { 310 if (!partial) {
309 context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; 311 context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
310 ret = -ENOSPC; 312 ret = -ENOSPC;
313 need_free = 1;
311 goto out_commit; 314 goto out_commit;
312 } 315 }
313 } 316 }
@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
332 mlog_errno(ret); 335 mlog_errno(ret);
333 336
334out_commit: 337out_commit:
338 if (need_free && context->data_ac) {
339 struct ocfs2_alloc_context *data_ac = context->data_ac;
340
341 if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
342 ocfs2_free_local_alloc_bits(osb, handle, data_ac,
343 new_phys_cpos, new_len);
344 else
345 ocfs2_free_clusters(handle,
346 data_ac->ac_inode,
347 data_ac->ac_bh,
348 ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
349 new_len);
350 }
351
335 ocfs2_commit_trans(osb, handle); 352 ocfs2_commit_trans(osb, handle);
336 353
337out_unlock_mutex: 354out_unlock_mutex:
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index d6c350ba25b9..c4b029c43464 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
48 */ 48 */
49static struct ocfs2_stack_plugin *active_stack; 49static struct ocfs2_stack_plugin *active_stack;
50 50
51inline int ocfs2_is_o2cb_active(void)
52{
53 return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
54}
55EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
56
57static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) 51static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
58{ 52{
59 struct ocfs2_stack_plugin *p; 53 struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index e3036e1790e8..f2dce10fae54 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
298int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); 298int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
299void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); 299void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
300 300
301/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
302int ocfs2_is_o2cb_active(void);
303
304extern struct kset *ocfs2_kset; 301extern struct kset *ocfs2_kset;
305 302
306#endif /* STACKGLUE_H */ 303#endif /* STACKGLUE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 24bcc5eec6b4..76f8db0b0e71 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
510} 510}
511extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, 511extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
512 struct vm_area_struct *vma, unsigned long addr, 512 struct vm_area_struct *vma, unsigned long addr,
513 int node, bool hugepage); 513 int node);
514#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
515 alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
516#else 514#else
517#define alloc_pages(gfp_mask, order) \ 515#define alloc_pages(gfp_mask, order) \
518 alloc_pages_node(numa_node_id(), gfp_mask, order) 516 alloc_pages_node(numa_node_id(), gfp_mask, order)
519#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ 517#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
520 alloc_pages(gfp_mask, order)
521#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
522 alloc_pages(gfp_mask, order) 518 alloc_pages(gfp_mask, order)
523#endif 519#endif
524#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 520#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
525#define alloc_page_vma(gfp_mask, vma, addr) \ 521#define alloc_page_vma(gfp_mask, vma, addr) \
526 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) 522 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
527#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ 523#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
528 alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) 524 alloc_pages_vma(gfp_mask, 0, vma, addr, node)
529 525
530extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 526extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
531extern unsigned long get_zeroed_page(gfp_t gfp_mask); 527extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62af416..bac395f1d00a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
139struct mempolicy *get_task_policy(struct task_struct *p); 139struct mempolicy *get_task_policy(struct task_struct *p);
140struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, 140struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
141 unsigned long addr); 141 unsigned long addr);
142struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
143 unsigned long addr);
142bool vma_policy_mof(struct vm_area_struct *vma); 144bool vma_policy_mof(struct vm_area_struct *vma);
143 145
144extern void numa_default_policy(void); 146extern void numa_default_policy(void);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index f35c7bf76143..0096a05395e3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
122 122
123#ifdef CONFIG_TREE_SRCU 123#ifdef CONFIG_TREE_SRCU
124#define _SRCU_NOTIFIER_HEAD(name, mod) \ 124#define _SRCU_NOTIFIER_HEAD(name, mod) \
125 static DEFINE_PER_CPU(struct srcu_data, \ 125 static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
126 name##_head_srcu_data); \
127 mod struct srcu_notifier_head name = \ 126 mod struct srcu_notifier_head name = \
128 SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) 127 SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
129 128
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index c6a3b6851372..35cf0ad29718 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -25,8 +25,6 @@
25#include <linux/elf.h> 25#include <linux/elf.h>
26#include <linux/elfcore.h> 26#include <linux/elfcore.h>
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/kexec.h>
29#include <linux/slab.h>
30#include <linux/syscalls.h> 28#include <linux/syscalls.h>
31#include <linux/vmalloc.h> 29#include <linux/vmalloc.h>
32#include "kexec_internal.h" 30#include "kexec_internal.h"
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3ae223f7b5df..5fc724e4e454 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,7 +66,6 @@
66#include <linux/kexec.h> 66#include <linux/kexec.h>
67#include <linux/bpf.h> 67#include <linux/bpf.h>
68#include <linux/mount.h> 68#include <linux/mount.h>
69#include <linux/pipe_fs_i.h>
70 69
71#include <linux/uaccess.h> 70#include <linux/uaccess.h>
72#include <asm/processor.h> 71#include <asm/processor.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4e4ef8fa479d..55478ab3c83b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,21 +629,40 @@ release:
629 * available 629 * available
630 * never: never stall for any thp allocation 630 * never: never stall for any thp allocation
631 */ 631 */
632static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) 632static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
633{ 633{
634 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); 634 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
635 gfp_t this_node = 0;
636
637#ifdef CONFIG_NUMA
638 struct mempolicy *pol;
639 /*
640 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
641 * specified, to express a general desire to stay on the current
642 * node for optimistic allocation attempts. If the defrag mode
643 * and/or madvise hint requires the direct reclaim then we prefer
644 * to fallback to other node rather than node reclaim because that
645 * can lead to excessive reclaim even though there is free memory
646 * on other nodes. We expect that NUMA preferences are specified
647 * by memory policies.
648 */
649 pol = get_vma_policy(vma, addr);
650 if (pol->mode != MPOL_BIND)
651 this_node = __GFP_THISNODE;
652 mpol_cond_put(pol);
653#endif
635 654
636 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) 655 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
637 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); 656 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
638 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) 657 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
639 return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; 658 return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
640 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) 659 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
641 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : 660 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
642 __GFP_KSWAPD_RECLAIM); 661 __GFP_KSWAPD_RECLAIM | this_node);
643 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) 662 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
644 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : 663 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
645 0); 664 this_node);
646 return GFP_TRANSHUGE_LIGHT; 665 return GFP_TRANSHUGE_LIGHT | this_node;
647} 666}
648 667
649/* Caller must hold page table lock. */ 668/* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
715 pte_free(vma->vm_mm, pgtable); 734 pte_free(vma->vm_mm, pgtable);
716 return ret; 735 return ret;
717 } 736 }
718 gfp = alloc_hugepage_direct_gfpmask(vma); 737 gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
719 page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); 738 page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
720 if (unlikely(!page)) { 739 if (unlikely(!page)) {
721 count_vm_event(THP_FAULT_FALLBACK); 740 count_vm_event(THP_FAULT_FALLBACK);
722 return VM_FAULT_FALLBACK; 741 return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
1286alloc: 1305alloc:
1287 if (transparent_hugepage_enabled(vma) && 1306 if (transparent_hugepage_enabled(vma) &&
1288 !transparent_hugepage_debug_cow()) { 1307 !transparent_hugepage_debug_cow()) {
1289 huge_gfp = alloc_hugepage_direct_gfpmask(vma); 1308 huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
1290 new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); 1309 new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
1310 haddr, numa_node_id());
1291 } else 1311 } else
1292 new_page = NULL; 1312 new_page = NULL;
1293 1313
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 54920cbc46bf..6e1469b80cb7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
2593 struct mem_cgroup *memcg; 2593 struct mem_cgroup *memcg;
2594 int ret = 0; 2594 int ret = 0;
2595 2595
2596 if (memcg_kmem_bypass()) 2596 if (mem_cgroup_disabled() || memcg_kmem_bypass())
2597 return 0; 2597 return 0;
2598 2598
2599 memcg = get_mem_cgroup_from_current(); 2599 memcg = get_mem_cgroup_from_current();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 61972da38d93..2b2b3ccbbfb5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
586 for (i = 0; i < sections_to_remove; i++) { 586 for (i = 0; i < sections_to_remove; i++) {
587 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; 587 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
588 588
589 cond_resched();
589 ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, 590 ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
590 altmap); 591 altmap);
591 map_offset = 0; 592 map_offset = 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cfd26d7e61a1..5837a067124d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
1116 } else if (PageTransHuge(page)) { 1116 } else if (PageTransHuge(page)) {
1117 struct page *thp; 1117 struct page *thp;
1118 1118
1119 thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, 1119 thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
1120 HPAGE_PMD_ORDER); 1120 address, numa_node_id());
1121 if (!thp) 1121 if (!thp)
1122 return NULL; 1122 return NULL;
1123 prep_transhuge_page(thp); 1123 prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
1662 * freeing by another task. It is the caller's responsibility to free the 1662 * freeing by another task. It is the caller's responsibility to free the
1663 * extra reference for shared policies. 1663 * extra reference for shared policies.
1664 */ 1664 */
1665static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, 1665struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
1666 unsigned long addr) 1666 unsigned long addr)
1667{ 1667{
1668 struct mempolicy *pol = __get_vma_policy(vma, addr); 1668 struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2011 * @vma: Pointer to VMA or NULL if not available. 2011 * @vma: Pointer to VMA or NULL if not available.
2012 * @addr: Virtual Address of the allocation. Must be inside the VMA. 2012 * @addr: Virtual Address of the allocation. Must be inside the VMA.
2013 * @node: Which node to prefer for allocation (modulo policy). 2013 * @node: Which node to prefer for allocation (modulo policy).
2014 * @hugepage: for hugepages try only the preferred node if possible
2015 * 2014 *
2016 * This function allocates a page from the kernel page pool and applies 2015 * This function allocates a page from the kernel page pool and applies
2017 * a NUMA policy associated with the VMA or the current process. 2016 * a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2022 */ 2021 */
2023struct page * 2022struct page *
2024alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 2023alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2025 unsigned long addr, int node, bool hugepage) 2024 unsigned long addr, int node)
2026{ 2025{
2027 struct mempolicy *pol; 2026 struct mempolicy *pol;
2028 struct page *page; 2027 struct page *page;
@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2040 goto out; 2039 goto out;
2041 } 2040 }
2042 2041
2043 if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2044 int hpage_node = node;
2045
2046 /*
2047 * For hugepage allocation and non-interleave policy which
2048 * allows the current node (or other explicitly preferred
2049 * node) we only try to allocate from the current/preferred
2050 * node and don't fall back to other nodes, as the cost of
2051 * remote accesses would likely offset THP benefits.
2052 *
2053 * If the policy is interleave, or does not allow the current
2054 * node in its nodemask, we allocate the standard way.
2055 */
2056 if (pol->mode == MPOL_PREFERRED &&
2057 !(pol->flags & MPOL_F_LOCAL))
2058 hpage_node = pol->v.preferred_node;
2059
2060 nmask = policy_nodemask(gfp, pol);
2061 if (!nmask || node_isset(hpage_node, *nmask)) {
2062 mpol_cond_put(pol);
2063 page = __alloc_pages_node(hpage_node,
2064 gfp | __GFP_THISNODE, order);
2065 goto out;
2066 }
2067 }
2068
2069 nmask = policy_nodemask(gfp, pol); 2042 nmask = policy_nodemask(gfp, pol);
2070 preferred_nid = policy_node(gfp, pol, node); 2043 preferred_nid = policy_node(gfp, pol, node);
2071 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); 2044 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 56bf122e0bb4..ea26d7a0342d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
1435 1435
1436 shmem_pseudo_vma_init(&pvma, info, hindex); 1436 shmem_pseudo_vma_init(&pvma, info, hindex);
1437 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, 1437 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
1438 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); 1438 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
1439 shmem_pseudo_vma_destroy(&pvma); 1439 shmem_pseudo_vma_destroy(&pvma);
1440 if (page) 1440 if (page)
1441 prep_transhuge_page(page); 1441 prep_transhuge_page(page);