diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-03 13:21:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-03 13:21:43 -0400 |
commit | cddfa11aef3c4914f406a059138ccc354f034d1c (patch) | |
tree | 388031c740f221576e7bb3f1ad35b0b158280525 | |
parent | 5f21585384a4a69b8bfdd2cae7e3648ae805f57d (diff) | |
parent | dd33ad7b251f900481701b2a82d25de583867708 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- more ocfs2 work
- various leftovers
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
memory_hotplug: cond_resched in __remove_pages
bfs: add sanity check at bfs_fill_super()
kernel/sysctl.c: remove duplicated include
kernel/kexec_file.c: remove some duplicated includes
mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask
ocfs2: fix clusters leak in ocfs2_defrag_extent()
ocfs2: dlmglue: clean up timestamp handling
ocfs2: don't put and assigning null to bh allocated outside
ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry
ocfs2: don't use iocb when EIOCBQUEUED returns
ocfs2: without quota support, avoid calling quota recovery
ocfs2: remove ocfs2_is_o2cb_active()
mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings
include/linux/notifier.h: SRCU: fix ctags
mm: handle no memcg case in memcg_kmem_charge() properly
-rw-r--r-- | fs/bfs/inode.c | 9 | ||||
-rw-r--r-- | fs/ocfs2/buffer_head_io.c | 77 | ||||
-rw-r--r-- | fs/ocfs2/dir.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 28 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 51 | ||||
-rw-r--r-- | fs/ocfs2/move_extents.c | 17 | ||||
-rw-r--r-- | fs/ocfs2/stackglue.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/stackglue.h | 3 | ||||
-rw-r--r-- | include/linux/gfp.h | 12 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | include/linux/notifier.h | 3 | ||||
-rw-r--r-- | kernel/kexec_file.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 1 | ||||
-rw-r--r-- | mm/huge_memory.c | 38 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 1 | ||||
-rw-r--r-- | mm/mempolicy.c | 35 | ||||
-rw-r--r-- | mm/shmem.c | 2 |
19 files changed, 172 insertions, 124 deletions
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9a69392f1fb3..d81c148682e7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
350 | 350 | ||
351 | s->s_magic = BFS_MAGIC; | 351 | s->s_magic = BFS_MAGIC; |
352 | 352 | ||
353 | if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { | 353 | if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) || |
354 | le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) { | ||
354 | printf("Superblock is corrupted\n"); | 355 | printf("Superblock is corrupted\n"); |
355 | goto out1; | 356 | goto out1; |
356 | } | 357 | } |
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
359 | sizeof(struct bfs_inode) | 360 | sizeof(struct bfs_inode) |
360 | + BFS_ROOT_INO - 1; | 361 | + BFS_ROOT_INO - 1; |
361 | imap_len = (info->si_lasti / 8) + 1; | 362 | imap_len = (info->si_lasti / 8) + 1; |
362 | info->si_imap = kzalloc(imap_len, GFP_KERNEL); | 363 | info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN); |
363 | if (!info->si_imap) | 364 | if (!info->si_imap) { |
365 | printf("Cannot allocate %u bytes\n", imap_len); | ||
364 | goto out1; | 366 | goto out1; |
367 | } | ||
365 | for (i = 0; i < BFS_ROOT_INO; i++) | 368 | for (i = 0; i < BFS_ROOT_INO; i++) |
366 | set_bit(i, info->si_imap); | 369 | set_bit(i, info->si_imap); |
367 | 370 | ||
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 1d098c3c00e0..4ebbd57cbf84 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -99,25 +99,34 @@ out: | |||
99 | return ret; | 99 | return ret; |
100 | } | 100 | } |
101 | 101 | ||
102 | /* Caller must provide a bhs[] with all NULL or non-NULL entries, so it | ||
103 | * will be easier to handle read failure. | ||
104 | */ | ||
102 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, | 105 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
103 | unsigned int nr, struct buffer_head *bhs[]) | 106 | unsigned int nr, struct buffer_head *bhs[]) |
104 | { | 107 | { |
105 | int status = 0; | 108 | int status = 0; |
106 | unsigned int i; | 109 | unsigned int i; |
107 | struct buffer_head *bh; | 110 | struct buffer_head *bh; |
111 | int new_bh = 0; | ||
108 | 112 | ||
109 | trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); | 113 | trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); |
110 | 114 | ||
111 | if (!nr) | 115 | if (!nr) |
112 | goto bail; | 116 | goto bail; |
113 | 117 | ||
118 | /* Don't put buffer head and re-assign it to NULL if it is allocated | ||
119 | * outside since the caller can't be aware of this alternation! | ||
120 | */ | ||
121 | new_bh = (bhs[0] == NULL); | ||
122 | |||
114 | for (i = 0 ; i < nr ; i++) { | 123 | for (i = 0 ; i < nr ; i++) { |
115 | if (bhs[i] == NULL) { | 124 | if (bhs[i] == NULL) { |
116 | bhs[i] = sb_getblk(osb->sb, block++); | 125 | bhs[i] = sb_getblk(osb->sb, block++); |
117 | if (bhs[i] == NULL) { | 126 | if (bhs[i] == NULL) { |
118 | status = -ENOMEM; | 127 | status = -ENOMEM; |
119 | mlog_errno(status); | 128 | mlog_errno(status); |
120 | goto bail; | 129 | break; |
121 | } | 130 | } |
122 | } | 131 | } |
123 | bh = bhs[i]; | 132 | bh = bhs[i]; |
@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, | |||
158 | submit_bh(REQ_OP_READ, 0, bh); | 167 | submit_bh(REQ_OP_READ, 0, bh); |
159 | } | 168 | } |
160 | 169 | ||
170 | read_failure: | ||
161 | for (i = nr; i > 0; i--) { | 171 | for (i = nr; i > 0; i--) { |
162 | bh = bhs[i - 1]; | 172 | bh = bhs[i - 1]; |
163 | 173 | ||
174 | if (unlikely(status)) { | ||
175 | if (new_bh && bh) { | ||
176 | /* If middle bh fails, let previous bh | ||
177 | * finish its read and then put it to | ||
178 | * aovoid bh leak | ||
179 | */ | ||
180 | if (!buffer_jbd(bh)) | ||
181 | wait_on_buffer(bh); | ||
182 | put_bh(bh); | ||
183 | bhs[i - 1] = NULL; | ||
184 | } else if (bh && buffer_uptodate(bh)) { | ||
185 | clear_buffer_uptodate(bh); | ||
186 | } | ||
187 | continue; | ||
188 | } | ||
189 | |||
164 | /* No need to wait on the buffer if it's managed by JBD. */ | 190 | /* No need to wait on the buffer if it's managed by JBD. */ |
165 | if (!buffer_jbd(bh)) | 191 | if (!buffer_jbd(bh)) |
166 | wait_on_buffer(bh); | 192 | wait_on_buffer(bh); |
@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, | |||
170 | * so we can safely record this and loop back | 196 | * so we can safely record this and loop back |
171 | * to cleanup the other buffers. */ | 197 | * to cleanup the other buffers. */ |
172 | status = -EIO; | 198 | status = -EIO; |
173 | put_bh(bh); | 199 | goto read_failure; |
174 | bhs[i - 1] = NULL; | ||
175 | } | 200 | } |
176 | } | 201 | } |
177 | 202 | ||
@@ -179,6 +204,9 @@ bail: | |||
179 | return status; | 204 | return status; |
180 | } | 205 | } |
181 | 206 | ||
207 | /* Caller must provide a bhs[] with all NULL or non-NULL entries, so it | ||
208 | * will be easier to handle read failure. | ||
209 | */ | ||
182 | int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | 210 | int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, |
183 | struct buffer_head *bhs[], int flags, | 211 | struct buffer_head *bhs[], int flags, |
184 | int (*validate)(struct super_block *sb, | 212 | int (*validate)(struct super_block *sb, |
@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
188 | int i, ignore_cache = 0; | 216 | int i, ignore_cache = 0; |
189 | struct buffer_head *bh; | 217 | struct buffer_head *bh; |
190 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | 218 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
219 | int new_bh = 0; | ||
191 | 220 | ||
192 | trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); | 221 | trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); |
193 | 222 | ||
@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
213 | goto bail; | 242 | goto bail; |
214 | } | 243 | } |
215 | 244 | ||
245 | /* Don't put buffer head and re-assign it to NULL if it is allocated | ||
246 | * outside since the caller can't be aware of this alternation! | ||
247 | */ | ||
248 | new_bh = (bhs[0] == NULL); | ||
249 | |||
216 | ocfs2_metadata_cache_io_lock(ci); | 250 | ocfs2_metadata_cache_io_lock(ci); |
217 | for (i = 0 ; i < nr ; i++) { | 251 | for (i = 0 ; i < nr ; i++) { |
218 | if (bhs[i] == NULL) { | 252 | if (bhs[i] == NULL) { |
@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
221 | ocfs2_metadata_cache_io_unlock(ci); | 255 | ocfs2_metadata_cache_io_unlock(ci); |
222 | status = -ENOMEM; | 256 | status = -ENOMEM; |
223 | mlog_errno(status); | 257 | mlog_errno(status); |
224 | goto bail; | 258 | /* Don't forget to put previous bh! */ |
259 | break; | ||
225 | } | 260 | } |
226 | } | 261 | } |
227 | bh = bhs[i]; | 262 | bh = bhs[i]; |
@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
316 | } | 351 | } |
317 | } | 352 | } |
318 | 353 | ||
319 | status = 0; | 354 | read_failure: |
320 | |||
321 | for (i = (nr - 1); i >= 0; i--) { | 355 | for (i = (nr - 1); i >= 0; i--) { |
322 | bh = bhs[i]; | 356 | bh = bhs[i]; |
323 | 357 | ||
324 | if (!(flags & OCFS2_BH_READAHEAD)) { | 358 | if (!(flags & OCFS2_BH_READAHEAD)) { |
325 | if (status) { | 359 | if (unlikely(status)) { |
326 | /* Clear the rest of the buffers on error */ | 360 | /* Clear the buffers on error including those |
327 | put_bh(bh); | 361 | * ever succeeded in reading |
328 | bhs[i] = NULL; | 362 | */ |
363 | if (new_bh && bh) { | ||
364 | /* If middle bh fails, let previous bh | ||
365 | * finish its read and then put it to | ||
366 | * aovoid bh leak | ||
367 | */ | ||
368 | if (!buffer_jbd(bh)) | ||
369 | wait_on_buffer(bh); | ||
370 | put_bh(bh); | ||
371 | bhs[i] = NULL; | ||
372 | } else if (bh && buffer_uptodate(bh)) { | ||
373 | clear_buffer_uptodate(bh); | ||
374 | } | ||
329 | continue; | 375 | continue; |
330 | } | 376 | } |
331 | /* We know this can't have changed as we hold the | 377 | /* We know this can't have changed as we hold the |
@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
343 | * uptodate. */ | 389 | * uptodate. */ |
344 | status = -EIO; | 390 | status = -EIO; |
345 | clear_buffer_needs_validate(bh); | 391 | clear_buffer_needs_validate(bh); |
346 | put_bh(bh); | 392 | goto read_failure; |
347 | bhs[i] = NULL; | ||
348 | continue; | ||
349 | } | 393 | } |
350 | 394 | ||
351 | if (buffer_needs_validate(bh)) { | 395 | if (buffer_needs_validate(bh)) { |
@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
355 | BUG_ON(buffer_jbd(bh)); | 399 | BUG_ON(buffer_jbd(bh)); |
356 | clear_buffer_needs_validate(bh); | 400 | clear_buffer_needs_validate(bh); |
357 | status = validate(sb, bh); | 401 | status = validate(sb, bh); |
358 | if (status) { | 402 | if (status) |
359 | put_bh(bh); | 403 | goto read_failure; |
360 | bhs[i] = NULL; | ||
361 | continue; | ||
362 | } | ||
363 | } | 404 | } |
364 | } | 405 | } |
365 | 406 | ||
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b048d4fa3959..c121abbdfc7d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, | |||
1897 | /* On error, skip the f_pos to the | 1897 | /* On error, skip the f_pos to the |
1898 | next block. */ | 1898 | next block. */ |
1899 | ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; | 1899 | ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; |
1900 | brelse(bh); | 1900 | break; |
1901 | continue; | ||
1902 | } | 1901 | } |
1903 | if (le64_to_cpu(de->inode)) { | 1902 | if (le64_to_cpu(de->inode)) { |
1904 | unsigned char d_type = DT_UNKNOWN; | 1903 | unsigned char d_type = DT_UNKNOWN; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 933aac5da193..7c835824247e 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | |||
2123 | 2123 | ||
2124 | /* LVB only has room for 64 bits of time here so we pack it for | 2124 | /* LVB only has room for 64 bits of time here so we pack it for |
2125 | * now. */ | 2125 | * now. */ |
2126 | static u64 ocfs2_pack_timespec(struct timespec *spec) | 2126 | static u64 ocfs2_pack_timespec(struct timespec64 *spec) |
2127 | { | 2127 | { |
2128 | u64 res; | 2128 | u64 res; |
2129 | u64 sec = spec->tv_sec; | 2129 | u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull); |
2130 | u32 nsec = spec->tv_nsec; | 2130 | u32 nsec = spec->tv_nsec; |
2131 | 2131 | ||
2132 | res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); | 2132 | res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); |
@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
2142 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2142 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2143 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 2143 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
2144 | struct ocfs2_meta_lvb *lvb; | 2144 | struct ocfs2_meta_lvb *lvb; |
2145 | struct timespec ts; | ||
2146 | 2145 | ||
2147 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2146 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2148 | 2147 | ||
@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
2163 | lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); | 2162 | lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); |
2164 | lvb->lvb_imode = cpu_to_be16(inode->i_mode); | 2163 | lvb->lvb_imode = cpu_to_be16(inode->i_mode); |
2165 | lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); | 2164 | lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); |
2166 | ts = timespec64_to_timespec(inode->i_atime); | ||
2167 | lvb->lvb_iatime_packed = | 2165 | lvb->lvb_iatime_packed = |
2168 | cpu_to_be64(ocfs2_pack_timespec(&ts)); | 2166 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); |
2169 | ts = timespec64_to_timespec(inode->i_ctime); | ||
2170 | lvb->lvb_ictime_packed = | 2167 | lvb->lvb_ictime_packed = |
2171 | cpu_to_be64(ocfs2_pack_timespec(&ts)); | 2168 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); |
2172 | ts = timespec64_to_timespec(inode->i_mtime); | ||
2173 | lvb->lvb_imtime_packed = | 2169 | lvb->lvb_imtime_packed = |
2174 | cpu_to_be64(ocfs2_pack_timespec(&ts)); | 2170 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); |
2175 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); | 2171 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); |
2176 | lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); | 2172 | lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); |
2177 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | 2173 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); |
@@ -2180,7 +2176,7 @@ out: | |||
2180 | mlog_meta_lvb(0, lockres); | 2176 | mlog_meta_lvb(0, lockres); |
2181 | } | 2177 | } |
2182 | 2178 | ||
2183 | static void ocfs2_unpack_timespec(struct timespec *spec, | 2179 | static void ocfs2_unpack_timespec(struct timespec64 *spec, |
2184 | u64 packed_time) | 2180 | u64 packed_time) |
2185 | { | 2181 | { |
2186 | spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; | 2182 | spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; |
@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec, | |||
2189 | 2185 | ||
2190 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 2186 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) |
2191 | { | 2187 | { |
2192 | struct timespec ts; | ||
2193 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2188 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2194 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 2189 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
2195 | struct ocfs2_meta_lvb *lvb; | 2190 | struct ocfs2_meta_lvb *lvb; |
@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
2217 | i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); | 2212 | i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); |
2218 | inode->i_mode = be16_to_cpu(lvb->lvb_imode); | 2213 | inode->i_mode = be16_to_cpu(lvb->lvb_imode); |
2219 | set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); | 2214 | set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); |
2220 | ocfs2_unpack_timespec(&ts, | 2215 | ocfs2_unpack_timespec(&inode->i_atime, |
2221 | be64_to_cpu(lvb->lvb_iatime_packed)); | 2216 | be64_to_cpu(lvb->lvb_iatime_packed)); |
2222 | inode->i_atime = timespec_to_timespec64(ts); | 2217 | ocfs2_unpack_timespec(&inode->i_mtime, |
2223 | ocfs2_unpack_timespec(&ts, | ||
2224 | be64_to_cpu(lvb->lvb_imtime_packed)); | 2218 | be64_to_cpu(lvb->lvb_imtime_packed)); |
2225 | inode->i_mtime = timespec_to_timespec64(ts); | 2219 | ocfs2_unpack_timespec(&inode->i_ctime, |
2226 | ocfs2_unpack_timespec(&ts, | ||
2227 | be64_to_cpu(lvb->lvb_ictime_packed)); | 2220 | be64_to_cpu(lvb->lvb_ictime_packed)); |
2228 | inode->i_ctime = timespec_to_timespec64(ts); | ||
2229 | spin_unlock(&oi->ip_lock); | 2221 | spin_unlock(&oi->ip_lock); |
2230 | } | 2222 | } |
2231 | 2223 | ||
@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | |||
3603 | * we can recover correctly from node failure. Otherwise, we may get | 3595 | * we can recover correctly from node failure. Otherwise, we may get |
3604 | * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. | 3596 | * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. |
3605 | */ | 3597 | */ |
3606 | if (!ocfs2_is_o2cb_active() && | 3598 | if (ocfs2_userspace_stack(osb) && |
3607 | lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 3599 | lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
3608 | lvb = 1; | 3600 | lvb = 1; |
3609 | 3601 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index fe570824b991..d640c5f8a85d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2343 | 2343 | ||
2344 | written = __generic_file_write_iter(iocb, from); | 2344 | written = __generic_file_write_iter(iocb, from); |
2345 | /* buffered aio wouldn't have proper lock coverage today */ | 2345 | /* buffered aio wouldn't have proper lock coverage today */ |
2346 | BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); | 2346 | BUG_ON(written == -EIOCBQUEUED && !direct_io); |
2347 | 2347 | ||
2348 | /* | 2348 | /* |
2349 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io | 2349 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io |
@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2463 | trace_generic_file_read_iter_ret(ret); | 2463 | trace_generic_file_read_iter_ret(ret); |
2464 | 2464 | ||
2465 | /* buffered aio wouldn't have proper lock coverage today */ | 2465 | /* buffered aio wouldn't have proper lock coverage today */ |
2466 | BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); | 2466 | BUG_ON(ret == -EIOCBQUEUED && !direct_io); |
2467 | 2467 | ||
2468 | /* see ocfs2_file_write_iter */ | 2468 | /* see ocfs2_file_write_iter */ |
2469 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { | 2469 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index bd3475694e83..b63c97f4318e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg) | |||
1378 | int rm_quota_used = 0, i; | 1378 | int rm_quota_used = 0, i; |
1379 | struct ocfs2_quota_recovery *qrec; | 1379 | struct ocfs2_quota_recovery *qrec; |
1380 | 1380 | ||
1381 | /* Whether the quota supported. */ | ||
1382 | int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, | ||
1383 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA) | ||
1384 | || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, | ||
1385 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA); | ||
1386 | |||
1381 | status = ocfs2_wait_on_mount(osb); | 1387 | status = ocfs2_wait_on_mount(osb); |
1382 | if (status < 0) { | 1388 | if (status < 0) { |
1383 | goto bail; | 1389 | goto bail; |
1384 | } | 1390 | } |
1385 | 1391 | ||
1386 | rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); | 1392 | if (quota_enabled) { |
1387 | if (!rm_quota) { | 1393 | rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); |
1388 | status = -ENOMEM; | 1394 | if (!rm_quota) { |
1389 | goto bail; | 1395 | status = -ENOMEM; |
1396 | goto bail; | ||
1397 | } | ||
1390 | } | 1398 | } |
1391 | restart: | 1399 | restart: |
1392 | status = ocfs2_super_lock(osb, 1); | 1400 | status = ocfs2_super_lock(osb, 1); |
@@ -1422,9 +1430,14 @@ restart: | |||
1422 | * then quota usage would be out of sync until some node takes | 1430 | * then quota usage would be out of sync until some node takes |
1423 | * the slot. So we remember which nodes need quota recovery | 1431 | * the slot. So we remember which nodes need quota recovery |
1424 | * and when everything else is done, we recover quotas. */ | 1432 | * and when everything else is done, we recover quotas. */ |
1425 | for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); | 1433 | if (quota_enabled) { |
1426 | if (i == rm_quota_used) | 1434 | for (i = 0; i < rm_quota_used |
1427 | rm_quota[rm_quota_used++] = slot_num; | 1435 | && rm_quota[i] != slot_num; i++) |
1436 | ; | ||
1437 | |||
1438 | if (i == rm_quota_used) | ||
1439 | rm_quota[rm_quota_used++] = slot_num; | ||
1440 | } | ||
1428 | 1441 | ||
1429 | status = ocfs2_recover_node(osb, node_num, slot_num); | 1442 | status = ocfs2_recover_node(osb, node_num, slot_num); |
1430 | skip_recovery: | 1443 | skip_recovery: |
@@ -1452,16 +1465,19 @@ skip_recovery: | |||
1452 | /* Now it is right time to recover quotas... We have to do this under | 1465 | /* Now it is right time to recover quotas... We have to do this under |
1453 | * superblock lock so that no one can start using the slot (and crash) | 1466 | * superblock lock so that no one can start using the slot (and crash) |
1454 | * before we recover it */ | 1467 | * before we recover it */ |
1455 | for (i = 0; i < rm_quota_used; i++) { | 1468 | if (quota_enabled) { |
1456 | qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); | 1469 | for (i = 0; i < rm_quota_used; i++) { |
1457 | if (IS_ERR(qrec)) { | 1470 | qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); |
1458 | status = PTR_ERR(qrec); | 1471 | if (IS_ERR(qrec)) { |
1459 | mlog_errno(status); | 1472 | status = PTR_ERR(qrec); |
1460 | continue; | 1473 | mlog_errno(status); |
1474 | continue; | ||
1475 | } | ||
1476 | ocfs2_queue_recovery_completion(osb->journal, | ||
1477 | rm_quota[i], | ||
1478 | NULL, NULL, qrec, | ||
1479 | ORPHAN_NEED_TRUNCATE); | ||
1461 | } | 1480 | } |
1462 | ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], | ||
1463 | NULL, NULL, qrec, | ||
1464 | ORPHAN_NEED_TRUNCATE); | ||
1465 | } | 1481 | } |
1466 | 1482 | ||
1467 | ocfs2_super_unlock(osb, 1); | 1483 | ocfs2_super_unlock(osb, 1); |
@@ -1483,7 +1499,8 @@ bail: | |||
1483 | 1499 | ||
1484 | mutex_unlock(&osb->recovery_lock); | 1500 | mutex_unlock(&osb->recovery_lock); |
1485 | 1501 | ||
1486 | kfree(rm_quota); | 1502 | if (quota_enabled) |
1503 | kfree(rm_quota); | ||
1487 | 1504 | ||
1488 | /* no one is callint kthread_stop() for us so the kthread() api | 1505 | /* no one is callint kthread_stop() for us so the kthread() api |
1489 | * requires that we call do_exit(). And it isn't exported, but | 1506 | * requires that we call do_exit(). And it isn't exported, but |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 7eb3b0a6347e..3f1685d7d43b 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "ocfs2_ioctl.h" | 25 | #include "ocfs2_ioctl.h" |
26 | 26 | ||
27 | #include "alloc.h" | 27 | #include "alloc.h" |
28 | #include "localalloc.h" | ||
28 | #include "aops.h" | 29 | #include "aops.h" |
29 | #include "dlmglue.h" | 30 | #include "dlmglue.h" |
30 | #include "extent_map.h" | 31 | #include "extent_map.h" |
@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
233 | struct ocfs2_refcount_tree *ref_tree = NULL; | 234 | struct ocfs2_refcount_tree *ref_tree = NULL; |
234 | u32 new_phys_cpos, new_len; | 235 | u32 new_phys_cpos, new_len; |
235 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | 236 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); |
237 | int need_free = 0; | ||
236 | 238 | ||
237 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { | 239 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { |
238 | BUG_ON(!ocfs2_is_refcount_inode(inode)); | 240 | BUG_ON(!ocfs2_is_refcount_inode(inode)); |
@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
308 | if (!partial) { | 310 | if (!partial) { |
309 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; | 311 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; |
310 | ret = -ENOSPC; | 312 | ret = -ENOSPC; |
313 | need_free = 1; | ||
311 | goto out_commit; | 314 | goto out_commit; |
312 | } | 315 | } |
313 | } | 316 | } |
@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
332 | mlog_errno(ret); | 335 | mlog_errno(ret); |
333 | 336 | ||
334 | out_commit: | 337 | out_commit: |
338 | if (need_free && context->data_ac) { | ||
339 | struct ocfs2_alloc_context *data_ac = context->data_ac; | ||
340 | |||
341 | if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) | ||
342 | ocfs2_free_local_alloc_bits(osb, handle, data_ac, | ||
343 | new_phys_cpos, new_len); | ||
344 | else | ||
345 | ocfs2_free_clusters(handle, | ||
346 | data_ac->ac_inode, | ||
347 | data_ac->ac_bh, | ||
348 | ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), | ||
349 | new_len); | ||
350 | } | ||
351 | |||
335 | ocfs2_commit_trans(osb, handle); | 352 | ocfs2_commit_trans(osb, handle); |
336 | 353 | ||
337 | out_unlock_mutex: | 354 | out_unlock_mutex: |
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index d6c350ba25b9..c4b029c43464 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; | |||
48 | */ | 48 | */ |
49 | static struct ocfs2_stack_plugin *active_stack; | 49 | static struct ocfs2_stack_plugin *active_stack; |
50 | 50 | ||
51 | inline int ocfs2_is_o2cb_active(void) | ||
52 | { | ||
53 | return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); | ||
56 | |||
57 | static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) | 51 | static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) |
58 | { | 52 | { |
59 | struct ocfs2_stack_plugin *p; | 53 | struct ocfs2_stack_plugin *p; |
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index e3036e1790e8..f2dce10fae54 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p | |||
298 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); | 298 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); |
299 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); | 299 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); |
300 | 300 | ||
301 | /* In ocfs2_downconvert_lock(), we need to know which stack we are using */ | ||
302 | int ocfs2_is_o2cb_active(void); | ||
303 | |||
304 | extern struct kset *ocfs2_kset; | 301 | extern struct kset *ocfs2_kset; |
305 | 302 | ||
306 | #endif /* STACKGLUE_H */ | 303 | #endif /* STACKGLUE_H */ |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 24bcc5eec6b4..76f8db0b0e71 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) | |||
510 | } | 510 | } |
511 | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, | 511 | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, |
512 | struct vm_area_struct *vma, unsigned long addr, | 512 | struct vm_area_struct *vma, unsigned long addr, |
513 | int node, bool hugepage); | 513 | int node); |
514 | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||
515 | alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) | ||
516 | #else | 514 | #else |
517 | #define alloc_pages(gfp_mask, order) \ | 515 | #define alloc_pages(gfp_mask, order) \ |
518 | alloc_pages_node(numa_node_id(), gfp_mask, order) | 516 | alloc_pages_node(numa_node_id(), gfp_mask, order) |
519 | #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ | 517 | #define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ |
520 | alloc_pages(gfp_mask, order) | ||
521 | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||
522 | alloc_pages(gfp_mask, order) | 518 | alloc_pages(gfp_mask, order) |
523 | #endif | 519 | #endif |
524 | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) | 520 | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) |
525 | #define alloc_page_vma(gfp_mask, vma, addr) \ | 521 | #define alloc_page_vma(gfp_mask, vma, addr) \ |
526 | alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) | 522 | alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) |
527 | #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ | 523 | #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ |
528 | alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) | 524 | alloc_pages_vma(gfp_mask, 0, vma, addr, node) |
529 | 525 | ||
530 | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); | 526 | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); |
531 | extern unsigned long get_zeroed_page(gfp_t gfp_mask); | 527 | extern unsigned long get_zeroed_page(gfp_t gfp_mask); |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..bac395f1d00a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, | |||
139 | struct mempolicy *get_task_policy(struct task_struct *p); | 139 | struct mempolicy *get_task_policy(struct task_struct *p); |
140 | struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, | 140 | struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, |
141 | unsigned long addr); | 141 | unsigned long addr); |
142 | struct mempolicy *get_vma_policy(struct vm_area_struct *vma, | ||
143 | unsigned long addr); | ||
142 | bool vma_policy_mof(struct vm_area_struct *vma); | 144 | bool vma_policy_mof(struct vm_area_struct *vma); |
143 | 145 | ||
144 | extern void numa_default_policy(void); | 146 | extern void numa_default_policy(void); |
diff --git a/include/linux/notifier.h b/include/linux/notifier.h index f35c7bf76143..0096a05395e3 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h | |||
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |||
122 | 122 | ||
123 | #ifdef CONFIG_TREE_SRCU | 123 | #ifdef CONFIG_TREE_SRCU |
124 | #define _SRCU_NOTIFIER_HEAD(name, mod) \ | 124 | #define _SRCU_NOTIFIER_HEAD(name, mod) \ |
125 | static DEFINE_PER_CPU(struct srcu_data, \ | 125 | static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \ |
126 | name##_head_srcu_data); \ | ||
127 | mod struct srcu_notifier_head name = \ | 126 | mod struct srcu_notifier_head name = \ |
128 | SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) | 127 | SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) |
129 | 128 | ||
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index c6a3b6851372..35cf0ad29718 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c | |||
@@ -25,8 +25,6 @@ | |||
25 | #include <linux/elf.h> | 25 | #include <linux/elf.h> |
26 | #include <linux/elfcore.h> | 26 | #include <linux/elfcore.h> |
27 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
28 | #include <linux/kexec.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
31 | #include <linux/vmalloc.h> | 29 | #include <linux/vmalloc.h> |
32 | #include "kexec_internal.h" | 30 | #include "kexec_internal.h" |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3ae223f7b5df..5fc724e4e454 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -66,7 +66,6 @@ | |||
66 | #include <linux/kexec.h> | 66 | #include <linux/kexec.h> |
67 | #include <linux/bpf.h> | 67 | #include <linux/bpf.h> |
68 | #include <linux/mount.h> | 68 | #include <linux/mount.h> |
69 | #include <linux/pipe_fs_i.h> | ||
70 | 69 | ||
71 | #include <linux/uaccess.h> | 70 | #include <linux/uaccess.h> |
72 | #include <asm/processor.h> | 71 | #include <asm/processor.h> |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4e4ef8fa479d..55478ab3c83b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -629,21 +629,40 @@ release: | |||
629 | * available | 629 | * available |
630 | * never: never stall for any thp allocation | 630 | * never: never stall for any thp allocation |
631 | */ | 631 | */ |
632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) | 632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) |
633 | { | 633 | { |
634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); | 634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); |
635 | gfp_t this_node = 0; | ||
636 | |||
637 | #ifdef CONFIG_NUMA | ||
638 | struct mempolicy *pol; | ||
639 | /* | ||
640 | * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not | ||
641 | * specified, to express a general desire to stay on the current | ||
642 | * node for optimistic allocation attempts. If the defrag mode | ||
643 | * and/or madvise hint requires the direct reclaim then we prefer | ||
644 | * to fallback to other node rather than node reclaim because that | ||
645 | * can lead to excessive reclaim even though there is free memory | ||
646 | * on other nodes. We expect that NUMA preferences are specified | ||
647 | * by memory policies. | ||
648 | */ | ||
649 | pol = get_vma_policy(vma, addr); | ||
650 | if (pol->mode != MPOL_BIND) | ||
651 | this_node = __GFP_THISNODE; | ||
652 | mpol_cond_put(pol); | ||
653 | #endif | ||
635 | 654 | ||
636 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) | 655 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) |
637 | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); | 656 | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); |
638 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) | 657 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) |
639 | return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; | 658 | return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node; |
640 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) | 659 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) |
641 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 660 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : |
642 | __GFP_KSWAPD_RECLAIM); | 661 | __GFP_KSWAPD_RECLAIM | this_node); |
643 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) | 662 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) |
644 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 663 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : |
645 | 0); | 664 | this_node); |
646 | return GFP_TRANSHUGE_LIGHT; | 665 | return GFP_TRANSHUGE_LIGHT | this_node; |
647 | } | 666 | } |
648 | 667 | ||
649 | /* Caller must hold page table lock. */ | 668 | /* Caller must hold page table lock. */ |
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) | |||
715 | pte_free(vma->vm_mm, pgtable); | 734 | pte_free(vma->vm_mm, pgtable); |
716 | return ret; | 735 | return ret; |
717 | } | 736 | } |
718 | gfp = alloc_hugepage_direct_gfpmask(vma); | 737 | gfp = alloc_hugepage_direct_gfpmask(vma, haddr); |
719 | page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); | 738 | page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); |
720 | if (unlikely(!page)) { | 739 | if (unlikely(!page)) { |
721 | count_vm_event(THP_FAULT_FALLBACK); | 740 | count_vm_event(THP_FAULT_FALLBACK); |
722 | return VM_FAULT_FALLBACK; | 741 | return VM_FAULT_FALLBACK; |
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) | |||
1286 | alloc: | 1305 | alloc: |
1287 | if (transparent_hugepage_enabled(vma) && | 1306 | if (transparent_hugepage_enabled(vma) && |
1288 | !transparent_hugepage_debug_cow()) { | 1307 | !transparent_hugepage_debug_cow()) { |
1289 | huge_gfp = alloc_hugepage_direct_gfpmask(vma); | 1308 | huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); |
1290 | new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); | 1309 | new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, |
1310 | haddr, numa_node_id()); | ||
1291 | } else | 1311 | } else |
1292 | new_page = NULL; | 1312 | new_page = NULL; |
1293 | 1313 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 54920cbc46bf..6e1469b80cb7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) | |||
2593 | struct mem_cgroup *memcg; | 2593 | struct mem_cgroup *memcg; |
2594 | int ret = 0; | 2594 | int ret = 0; |
2595 | 2595 | ||
2596 | if (memcg_kmem_bypass()) | 2596 | if (mem_cgroup_disabled() || memcg_kmem_bypass()) |
2597 | return 0; | 2597 | return 0; |
2598 | 2598 | ||
2599 | memcg = get_mem_cgroup_from_current(); | 2599 | memcg = get_mem_cgroup_from_current(); |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 61972da38d93..2b2b3ccbbfb5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
586 | for (i = 0; i < sections_to_remove; i++) { | 586 | for (i = 0; i < sections_to_remove; i++) { |
587 | unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; | 587 | unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; |
588 | 588 | ||
589 | cond_resched(); | ||
589 | ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, | 590 | ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, |
590 | altmap); | 591 | altmap); |
591 | map_offset = 0; | 592 | map_offset = 0; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cfd26d7e61a1..5837a067124d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start) | |||
1116 | } else if (PageTransHuge(page)) { | 1116 | } else if (PageTransHuge(page)) { |
1117 | struct page *thp; | 1117 | struct page *thp; |
1118 | 1118 | ||
1119 | thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, | 1119 | thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, |
1120 | HPAGE_PMD_ORDER); | 1120 | address, numa_node_id()); |
1121 | if (!thp) | 1121 | if (!thp) |
1122 | return NULL; | 1122 | return NULL; |
1123 | prep_transhuge_page(thp); | 1123 | prep_transhuge_page(thp); |
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, | |||
1662 | * freeing by another task. It is the caller's responsibility to free the | 1662 | * freeing by another task. It is the caller's responsibility to free the |
1663 | * extra reference for shared policies. | 1663 | * extra reference for shared policies. |
1664 | */ | 1664 | */ |
1665 | static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, | 1665 | struct mempolicy *get_vma_policy(struct vm_area_struct *vma, |
1666 | unsigned long addr) | 1666 | unsigned long addr) |
1667 | { | 1667 | { |
1668 | struct mempolicy *pol = __get_vma_policy(vma, addr); | 1668 | struct mempolicy *pol = __get_vma_policy(vma, addr); |
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2011 | * @vma: Pointer to VMA or NULL if not available. | 2011 | * @vma: Pointer to VMA or NULL if not available. |
2012 | * @addr: Virtual Address of the allocation. Must be inside the VMA. | 2012 | * @addr: Virtual Address of the allocation. Must be inside the VMA. |
2013 | * @node: Which node to prefer for allocation (modulo policy). | 2013 | * @node: Which node to prefer for allocation (modulo policy). |
2014 | * @hugepage: for hugepages try only the preferred node if possible | ||
2015 | * | 2014 | * |
2016 | * This function allocates a page from the kernel page pool and applies | 2015 | * This function allocates a page from the kernel page pool and applies |
2017 | * a NUMA policy associated with the VMA or the current process. | 2016 | * a NUMA policy associated with the VMA or the current process. |
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2022 | */ | 2021 | */ |
2023 | struct page * | 2022 | struct page * |
2024 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | 2023 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, |
2025 | unsigned long addr, int node, bool hugepage) | 2024 | unsigned long addr, int node) |
2026 | { | 2025 | { |
2027 | struct mempolicy *pol; | 2026 | struct mempolicy *pol; |
2028 | struct page *page; | 2027 | struct page *page; |
@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
2040 | goto out; | 2039 | goto out; |
2041 | } | 2040 | } |
2042 | 2041 | ||
2043 | if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { | ||
2044 | int hpage_node = node; | ||
2045 | |||
2046 | /* | ||
2047 | * For hugepage allocation and non-interleave policy which | ||
2048 | * allows the current node (or other explicitly preferred | ||
2049 | * node) we only try to allocate from the current/preferred | ||
2050 | * node and don't fall back to other nodes, as the cost of | ||
2051 | * remote accesses would likely offset THP benefits. | ||
2052 | * | ||
2053 | * If the policy is interleave, or does not allow the current | ||
2054 | * node in its nodemask, we allocate the standard way. | ||
2055 | */ | ||
2056 | if (pol->mode == MPOL_PREFERRED && | ||
2057 | !(pol->flags & MPOL_F_LOCAL)) | ||
2058 | hpage_node = pol->v.preferred_node; | ||
2059 | |||
2060 | nmask = policy_nodemask(gfp, pol); | ||
2061 | if (!nmask || node_isset(hpage_node, *nmask)) { | ||
2062 | mpol_cond_put(pol); | ||
2063 | page = __alloc_pages_node(hpage_node, | ||
2064 | gfp | __GFP_THISNODE, order); | ||
2065 | goto out; | ||
2066 | } | ||
2067 | } | ||
2068 | |||
2069 | nmask = policy_nodemask(gfp, pol); | 2042 | nmask = policy_nodemask(gfp, pol); |
2070 | preferred_nid = policy_node(gfp, pol, node); | 2043 | preferred_nid = policy_node(gfp, pol, node); |
2071 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); | 2044 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); |
diff --git a/mm/shmem.c b/mm/shmem.c index 56bf122e0bb4..ea26d7a0342d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, | |||
1435 | 1435 | ||
1436 | shmem_pseudo_vma_init(&pvma, info, hindex); | 1436 | shmem_pseudo_vma_init(&pvma, info, hindex); |
1437 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, | 1437 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, |
1438 | HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); | 1438 | HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); |
1439 | shmem_pseudo_vma_destroy(&pvma); | 1439 | shmem_pseudo_vma_destroy(&pvma); |
1440 | if (page) | 1440 | if (page) |
1441 | prep_transhuge_page(page); | 1441 | prep_transhuge_page(page); |