diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-02-03 13:10:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-02-03 13:10:02 -0500 |
commit | b37a05c083c85c2657dca9bbe1f5d79dccf756d5 (patch) | |
tree | 0a9bd376a437484e21a6728ca16f2266a0e3e788 | |
parent | d5bfb96bdad3588961f49a6eff89a625fbaa12bf (diff) | |
parent | 12c9d70bd5056b3ae84746fca973c286f48384cc (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton:
"18 fixes"
[ The 18 fixes turned into 17 commits, because one of the fixes was a
fix for another patch in the series that I just folded in by editing
the patch manually - hopefully correctly - Linus ]
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm: fix memory leak in copy_huge_pmd()
drivers/hwspinlock: fix race between radix tree insertion and lookup
radix-tree: fix race in gang lookup
mm/vmpressure.c: fix subtree pressure detection
mm: polish virtual memory accounting
mm: warn about VmData over RLIMIT_DATA
Documentation: cgroup-v2: add memory.stat::sock description
mm: memcontrol: drop superfluous entry in the per-memcg stats array
drivers/scsi/sg.c: mark VMA as VM_IO to prevent migration
proc: revert /proc/<pid>/maps [stack:TID] annotation
numa: fix /proc/<pid>/numa_maps for hugetlbfs on s390
MAINTAINERS: update Seth email
ocfs2/cluster: fix memory leak in o2hb_region_release
lib/test-string_helpers.c: fix and improve string_get_size() tests
thp: limit number of object to scan on deferred_split_scan()
thp: change deferred_split_count() to return number of THP in queue
thp: make split_queue per-node
-rw-r--r-- | Documentation/cgroup-v2.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 13 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 5 | ||||
-rw-r--r-- | MAINTAINERS | 4 | ||||
-rw-r--r-- | drivers/hwspinlock/hwspinlock_core.c | 4 | ||||
-rw-r--r-- | drivers/scsi/sg.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 14 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 73 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 49 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 9 | ||||
-rw-r--r-- | include/linux/mm_types.h | 6 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/radix-tree.h | 16 | ||||
-rw-r--r-- | lib/radix-tree.c | 12 | ||||
-rw-r--r-- | lib/test-string_helpers.c | 67 | ||||
-rw-r--r-- | mm/huge_memory.c | 87 | ||||
-rw-r--r-- | mm/internal.h | 31 | ||||
-rw-r--r-- | mm/mmap.c | 23 | ||||
-rw-r--r-- | mm/page_alloc.c | 5 | ||||
-rw-r--r-- | mm/util.c | 27 | ||||
-rw-r--r-- | mm/vmpressure.c | 3 |
22 files changed, 269 insertions, 193 deletions
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 65b3eac8856c..e8d25e784214 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt | |||
@@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back. | |||
843 | Amount of memory used to cache filesystem data, | 843 | Amount of memory used to cache filesystem data, |
844 | including tmpfs and shared memory. | 844 | including tmpfs and shared memory. |
845 | 845 | ||
846 | sock | ||
847 | |||
848 | Amount of memory used in network transmission buffers | ||
849 | |||
846 | file_mapped | 850 | file_mapped |
847 | 851 | ||
848 | Amount of cached filesystem data mapped with mmap() | 852 | Amount of cached filesystem data mapped with mmap() |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fde9fd06fa98..843b045b4069 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -240,8 +240,8 @@ Table 1-2: Contents of the status files (as of 4.1) | |||
240 | RssFile size of resident file mappings | 240 | RssFile size of resident file mappings |
241 | RssShmem size of resident shmem memory (includes SysV shm, | 241 | RssShmem size of resident shmem memory (includes SysV shm, |
242 | mapping of tmpfs and shared anonymous mappings) | 242 | mapping of tmpfs and shared anonymous mappings) |
243 | VmData size of data, stack, and text segments | 243 | VmData size of private data segments |
244 | VmStk size of data, stack, and text segments | 244 | VmStk size of stack segments |
245 | VmExe size of text segment | 245 | VmExe size of text segment |
246 | VmLib size of shared library code | 246 | VmLib size of shared library code |
247 | VmPTE size of page table entries | 247 | VmPTE size of page table entries |
@@ -356,7 +356,7 @@ address perms offset dev inode pathname | |||
356 | a7cb1000-a7cb2000 ---p 00000000 00:00 0 | 356 | a7cb1000-a7cb2000 ---p 00000000 00:00 0 |
357 | a7cb2000-a7eb2000 rw-p 00000000 00:00 0 | 357 | a7cb2000-a7eb2000 rw-p 00000000 00:00 0 |
358 | a7eb2000-a7eb3000 ---p 00000000 00:00 0 | 358 | a7eb2000-a7eb3000 ---p 00000000 00:00 0 |
359 | a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001] | 359 | a7eb3000-a7ed5000 rw-p 00000000 00:00 0 |
360 | a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 | 360 | a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 |
361 | a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 | 361 | a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 |
362 | a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 | 362 | a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 |
@@ -388,7 +388,6 @@ is not associated with a file: | |||
388 | 388 | ||
389 | [heap] = the heap of the program | 389 | [heap] = the heap of the program |
390 | [stack] = the stack of the main process | 390 | [stack] = the stack of the main process |
391 | [stack:1001] = the stack of the thread with tid 1001 | ||
392 | [vdso] = the "virtual dynamic shared object", | 391 | [vdso] = the "virtual dynamic shared object", |
393 | the kernel system call handler | 392 | the kernel system call handler |
394 | 393 | ||
@@ -396,10 +395,8 @@ is not associated with a file: | |||
396 | 395 | ||
397 | The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint | 396 | The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint |
398 | of the individual tasks of a process. In this file you will see a mapping marked | 397 | of the individual tasks of a process. In this file you will see a mapping marked |
399 | as [stack] if that task sees it as a stack. This is a key difference from the | 398 | as [stack] if that task sees it as a stack. Hence, for the example above, the |
400 | content of /proc/PID/maps, where you will see all mappings that are being used | 399 | task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: |
401 | as stack by all of those tasks. Hence, for the example above, the task-level | ||
402 | map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: | ||
403 | 400 | ||
404 | 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test | 401 | 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test |
405 | 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test | 402 | 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 87d40a72f6a1..551ecf09c8dd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1496,6 +1496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1496 | could change it dynamically, usually by | 1496 | could change it dynamically, usually by |
1497 | /sys/module/printk/parameters/ignore_loglevel. | 1497 | /sys/module/printk/parameters/ignore_loglevel. |
1498 | 1498 | ||
1499 | ignore_rlimit_data | ||
1500 | Ignore RLIMIT_DATA setting for data mappings, | ||
1501 | print warning at first misuse. Can be changed via | ||
1502 | /sys/module/kernel/parameters/ignore_rlimit_data. | ||
1503 | |||
1499 | ihash_entries= [KNL] | 1504 | ihash_entries= [KNL] |
1500 | Set number of hash buckets for inode cache. | 1505 | Set number of hash buckets for inode cache. |
1501 | 1506 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index c245e42cf1d5..24c5b9a29670 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -12150,7 +12150,7 @@ F: drivers/net/hamradio/*scc.c | |||
12150 | F: drivers/net/hamradio/z8530.h | 12150 | F: drivers/net/hamradio/z8530.h |
12151 | 12151 | ||
12152 | ZBUD COMPRESSED PAGE ALLOCATOR | 12152 | ZBUD COMPRESSED PAGE ALLOCATOR |
12153 | M: Seth Jennings <sjennings@variantweb.net> | 12153 | M: Seth Jennings <sjenning@redhat.com> |
12154 | L: linux-mm@kvack.org | 12154 | L: linux-mm@kvack.org |
12155 | S: Maintained | 12155 | S: Maintained |
12156 | F: mm/zbud.c | 12156 | F: mm/zbud.c |
@@ -12205,7 +12205,7 @@ F: include/linux/zsmalloc.h | |||
12205 | F: Documentation/vm/zsmalloc.txt | 12205 | F: Documentation/vm/zsmalloc.txt |
12206 | 12206 | ||
12207 | ZSWAP COMPRESSED SWAP CACHING | 12207 | ZSWAP COMPRESSED SWAP CACHING |
12208 | M: Seth Jennings <sjennings@variantweb.net> | 12208 | M: Seth Jennings <sjenning@redhat.com> |
12209 | L: linux-mm@kvack.org | 12209 | L: linux-mm@kvack.org |
12210 | S: Maintained | 12210 | S: Maintained |
12211 | F: mm/zswap.c | 12211 | F: mm/zswap.c |
diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c index 52f708bcf77f..d50c701b19d6 100644 --- a/drivers/hwspinlock/hwspinlock_core.c +++ b/drivers/hwspinlock/hwspinlock_core.c | |||
@@ -313,6 +313,10 @@ int of_hwspin_lock_get_id(struct device_node *np, int index) | |||
313 | hwlock = radix_tree_deref_slot(slot); | 313 | hwlock = radix_tree_deref_slot(slot); |
314 | if (unlikely(!hwlock)) | 314 | if (unlikely(!hwlock)) |
315 | continue; | 315 | continue; |
316 | if (radix_tree_is_indirect_ptr(hwlock)) { | ||
317 | slot = radix_tree_iter_retry(&iter); | ||
318 | continue; | ||
319 | } | ||
316 | 320 | ||
317 | if (hwlock->bank->dev->of_node == args.np) { | 321 | if (hwlock->bank->dev->of_node == args.np) { |
318 | ret = 0; | 322 | ret = 0; |
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 503ab8b46c0b..5e820674432c 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c | |||
@@ -1261,7 +1261,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | sfp->mmap_called = 1; | 1263 | sfp->mmap_called = 1; |
1264 | vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; | 1264 | vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; |
1265 | vma->vm_private_data = sfp; | 1265 | vma->vm_private_data = sfp; |
1266 | vma->vm_ops = &sg_mmap_vm_ops; | 1266 | vma->vm_ops = &sg_mmap_vm_ops; |
1267 | return 0; | 1267 | return 0; |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a3cc6d2fc896..a76b9ea7722e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = { | |||
1254 | 1254 | ||
1255 | void o2hb_exit(void) | 1255 | void o2hb_exit(void) |
1256 | { | 1256 | { |
1257 | kfree(o2hb_db_livenodes); | ||
1258 | kfree(o2hb_db_liveregions); | ||
1259 | kfree(o2hb_db_quorumregions); | ||
1260 | kfree(o2hb_db_failedregions); | ||
1261 | debugfs_remove(o2hb_debug_failedregions); | 1257 | debugfs_remove(o2hb_debug_failedregions); |
1262 | debugfs_remove(o2hb_debug_quorumregions); | 1258 | debugfs_remove(o2hb_debug_quorumregions); |
1263 | debugfs_remove(o2hb_debug_liveregions); | 1259 | debugfs_remove(o2hb_debug_liveregions); |
1264 | debugfs_remove(o2hb_debug_livenodes); | 1260 | debugfs_remove(o2hb_debug_livenodes); |
1265 | debugfs_remove(o2hb_debug_dir); | 1261 | debugfs_remove(o2hb_debug_dir); |
1262 | kfree(o2hb_db_livenodes); | ||
1263 | kfree(o2hb_db_liveregions); | ||
1264 | kfree(o2hb_db_quorumregions); | ||
1265 | kfree(o2hb_db_failedregions); | ||
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | 1268 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, |
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item) | |||
1438 | 1438 | ||
1439 | kfree(reg->hr_slots); | 1439 | kfree(reg->hr_slots); |
1440 | 1440 | ||
1441 | kfree(reg->hr_db_regnum); | ||
1442 | kfree(reg->hr_db_livenodes); | ||
1443 | debugfs_remove(reg->hr_debug_livenodes); | 1441 | debugfs_remove(reg->hr_debug_livenodes); |
1444 | debugfs_remove(reg->hr_debug_regnum); | 1442 | debugfs_remove(reg->hr_debug_regnum); |
1445 | debugfs_remove(reg->hr_debug_elapsed_time); | 1443 | debugfs_remove(reg->hr_debug_elapsed_time); |
1446 | debugfs_remove(reg->hr_debug_pinned); | 1444 | debugfs_remove(reg->hr_debug_pinned); |
1447 | debugfs_remove(reg->hr_debug_dir); | 1445 | debugfs_remove(reg->hr_debug_dir); |
1446 | kfree(reg->hr_db_livenodes); | ||
1447 | kfree(reg->hr_db_regnum); | ||
1448 | kfree(reg->hr_debug_elapsed_time); | ||
1449 | kfree(reg->hr_debug_pinned); | ||
1448 | 1450 | ||
1449 | spin_lock(&o2hb_live_lock); | 1451 | spin_lock(&o2hb_live_lock); |
1450 | list_del(®->hr_all_item); | 1452 | list_del(®->hr_all_item); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 85d16c67c33e..fa95ab2d3674 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file, | |||
259 | sizeof(struct proc_maps_private)); | 259 | sizeof(struct proc_maps_private)); |
260 | } | 260 | } |
261 | 261 | ||
262 | static pid_t pid_of_stack(struct proc_maps_private *priv, | 262 | /* |
263 | struct vm_area_struct *vma, bool is_pid) | 263 | * Indicate if the VMA is a stack for the given task; for |
264 | * /proc/PID/maps that is the stack of the main task. | ||
265 | */ | ||
266 | static int is_stack(struct proc_maps_private *priv, | ||
267 | struct vm_area_struct *vma, int is_pid) | ||
264 | { | 268 | { |
265 | struct inode *inode = priv->inode; | 269 | int stack = 0; |
266 | struct task_struct *task; | 270 | |
267 | pid_t ret = 0; | 271 | if (is_pid) { |
272 | stack = vma->vm_start <= vma->vm_mm->start_stack && | ||
273 | vma->vm_end >= vma->vm_mm->start_stack; | ||
274 | } else { | ||
275 | struct inode *inode = priv->inode; | ||
276 | struct task_struct *task; | ||
268 | 277 | ||
269 | rcu_read_lock(); | 278 | rcu_read_lock(); |
270 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 279 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
271 | if (task) { | ||
272 | task = task_of_stack(task, vma, is_pid); | ||
273 | if (task) | 280 | if (task) |
274 | ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | 281 | stack = vma_is_stack_for_task(vma, task); |
282 | rcu_read_unlock(); | ||
275 | } | 283 | } |
276 | rcu_read_unlock(); | 284 | return stack; |
277 | |||
278 | return ret; | ||
279 | } | 285 | } |
280 | 286 | ||
281 | static void | 287 | static void |
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
335 | 341 | ||
336 | name = arch_vma_name(vma); | 342 | name = arch_vma_name(vma); |
337 | if (!name) { | 343 | if (!name) { |
338 | pid_t tid; | ||
339 | |||
340 | if (!mm) { | 344 | if (!mm) { |
341 | name = "[vdso]"; | 345 | name = "[vdso]"; |
342 | goto done; | 346 | goto done; |
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
348 | goto done; | 352 | goto done; |
349 | } | 353 | } |
350 | 354 | ||
351 | tid = pid_of_stack(priv, vma, is_pid); | 355 | if (is_stack(priv, vma, is_pid)) |
352 | if (tid != 0) { | 356 | name = "[stack]"; |
353 | /* | ||
354 | * Thread stack in /proc/PID/task/TID/maps or | ||
355 | * the main process stack. | ||
356 | */ | ||
357 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
358 | vma->vm_end >= mm->start_stack)) { | ||
359 | name = "[stack]"; | ||
360 | } else { | ||
361 | /* Thread stack in /proc/PID/maps */ | ||
362 | seq_pad(m, ' '); | ||
363 | seq_printf(m, "[stack:%d]", tid); | ||
364 | } | ||
365 | } | ||
366 | } | 357 | } |
367 | 358 | ||
368 | done: | 359 | done: |
@@ -1552,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
1552 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, | 1543 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, |
1553 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 1544 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
1554 | { | 1545 | { |
1546 | pte_t huge_pte = huge_ptep_get(pte); | ||
1555 | struct numa_maps *md; | 1547 | struct numa_maps *md; |
1556 | struct page *page; | 1548 | struct page *page; |
1557 | 1549 | ||
1558 | if (!pte_present(*pte)) | 1550 | if (!pte_present(huge_pte)) |
1559 | return 0; | 1551 | return 0; |
1560 | 1552 | ||
1561 | page = pte_page(*pte); | 1553 | page = pte_page(huge_pte); |
1562 | if (!page) | 1554 | if (!page) |
1563 | return 0; | 1555 | return 0; |
1564 | 1556 | ||
1565 | md = walk->private; | 1557 | md = walk->private; |
1566 | gather_stats(page, md, pte_dirty(*pte), 1); | 1558 | gather_stats(page, md, pte_dirty(huge_pte), 1); |
1567 | return 0; | 1559 | return 0; |
1568 | } | 1560 | } |
1569 | 1561 | ||
@@ -1617,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1617 | seq_file_path(m, file, "\n\t= "); | 1609 | seq_file_path(m, file, "\n\t= "); |
1618 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1610 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1619 | seq_puts(m, " heap"); | 1611 | seq_puts(m, " heap"); |
1620 | } else { | 1612 | } else if (is_stack(proc_priv, vma, is_pid)) { |
1621 | pid_t tid = pid_of_stack(proc_priv, vma, is_pid); | 1613 | seq_puts(m, " stack"); |
1622 | if (tid != 0) { | ||
1623 | /* | ||
1624 | * Thread stack in /proc/PID/task/TID/maps or | ||
1625 | * the main process stack. | ||
1626 | */ | ||
1627 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
1628 | vma->vm_end >= mm->start_stack)) | ||
1629 | seq_puts(m, " stack"); | ||
1630 | else | ||
1631 | seq_printf(m, " stack:%d", tid); | ||
1632 | } | ||
1633 | } | 1614 | } |
1634 | 1615 | ||
1635 | if (is_vm_hugetlb_page(vma)) | 1616 | if (is_vm_hugetlb_page(vma)) |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index e0d64c92e4f6..faacb0c0d857 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm, | |||
123 | return size; | 123 | return size; |
124 | } | 124 | } |
125 | 125 | ||
126 | static pid_t pid_of_stack(struct proc_maps_private *priv, | 126 | static int is_stack(struct proc_maps_private *priv, |
127 | struct vm_area_struct *vma, bool is_pid) | 127 | struct vm_area_struct *vma, int is_pid) |
128 | { | 128 | { |
129 | struct inode *inode = priv->inode; | 129 | struct mm_struct *mm = vma->vm_mm; |
130 | struct task_struct *task; | 130 | int stack = 0; |
131 | pid_t ret = 0; | 131 | |
132 | 132 | if (is_pid) { | |
133 | rcu_read_lock(); | 133 | stack = vma->vm_start <= mm->start_stack && |
134 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 134 | vma->vm_end >= mm->start_stack; |
135 | if (task) { | 135 | } else { |
136 | task = task_of_stack(task, vma, is_pid); | 136 | struct inode *inode = priv->inode; |
137 | struct task_struct *task; | ||
138 | |||
139 | rcu_read_lock(); | ||
140 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
137 | if (task) | 141 | if (task) |
138 | ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | 142 | stack = vma_is_stack_for_task(vma, task); |
143 | rcu_read_unlock(); | ||
139 | } | 144 | } |
140 | rcu_read_unlock(); | 145 | return stack; |
141 | |||
142 | return ret; | ||
143 | } | 146 | } |
144 | 147 | ||
145 | /* | 148 | /* |
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | |||
181 | if (file) { | 184 | if (file) { |
182 | seq_pad(m, ' '); | 185 | seq_pad(m, ' '); |
183 | seq_file_path(m, file, ""); | 186 | seq_file_path(m, file, ""); |
184 | } else if (mm) { | 187 | } else if (mm && is_stack(priv, vma, is_pid)) { |
185 | pid_t tid = pid_of_stack(priv, vma, is_pid); | 188 | seq_pad(m, ' '); |
186 | 189 | seq_printf(m, "[stack]"); | |
187 | if (tid != 0) { | ||
188 | seq_pad(m, ' '); | ||
189 | /* | ||
190 | * Thread stack in /proc/PID/task/TID/maps or | ||
191 | * the main process stack. | ||
192 | */ | ||
193 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
194 | vma->vm_end >= mm->start_stack)) | ||
195 | seq_printf(m, "[stack]"); | ||
196 | else | ||
197 | seq_printf(m, "[stack:%d]", tid); | ||
198 | } | ||
199 | } | 190 | } |
200 | 191 | ||
201 | seq_putc(m, '\n'); | 192 | seq_putc(m, '\n'); |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9ae48d4aeb5e..792c8981e633 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index { | |||
51 | MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ | 51 | MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ |
52 | MEM_CGROUP_STAT_NSTATS, | 52 | MEM_CGROUP_STAT_NSTATS, |
53 | /* default hierarchy stats */ | 53 | /* default hierarchy stats */ |
54 | MEMCG_SOCK, | 54 | MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS, |
55 | MEMCG_NR_STAT, | 55 | MEMCG_NR_STAT, |
56 | }; | 56 | }; |
57 | 57 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index f1cd22f2df1a..516e14944339 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp); | |||
201 | #endif | 201 | #endif |
202 | 202 | ||
203 | #ifdef CONFIG_STACK_GROWSUP | 203 | #ifdef CONFIG_STACK_GROWSUP |
204 | #define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) | 204 | #define VM_STACK VM_GROWSUP |
205 | #else | 205 | #else |
206 | #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) | 206 | #define VM_STACK VM_GROWSDOWN |
207 | #endif | 207 | #endif |
208 | 208 | ||
209 | #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) | ||
210 | |||
209 | /* | 211 | /* |
210 | * Special vmas that are non-mergable, non-mlock()able. | 212 | * Special vmas that are non-mergable, non-mlock()able. |
211 | * Note: mm/huge_memory.c VM_NO_THP depends on this definition. | 213 | * Note: mm/huge_memory.c VM_NO_THP depends on this definition. |
@@ -1341,8 +1343,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma, | |||
1341 | !vma_growsup(vma->vm_next, addr); | 1343 | !vma_growsup(vma->vm_next, addr); |
1342 | } | 1344 | } |
1343 | 1345 | ||
1344 | extern struct task_struct *task_of_stack(struct task_struct *task, | 1346 | int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t); |
1345 | struct vm_area_struct *vma, bool in_group); | ||
1346 | 1347 | ||
1347 | extern unsigned long move_page_tables(struct vm_area_struct *vma, | 1348 | extern unsigned long move_page_tables(struct vm_area_struct *vma, |
1348 | unsigned long old_addr, struct vm_area_struct *new_vma, | 1349 | unsigned long old_addr, struct vm_area_struct *new_vma, |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d3ebb9d21a53..624b78b848b8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -424,9 +424,9 @@ struct mm_struct { | |||
424 | unsigned long total_vm; /* Total pages mapped */ | 424 | unsigned long total_vm; /* Total pages mapped */ |
425 | unsigned long locked_vm; /* Pages that have PG_mlocked set */ | 425 | unsigned long locked_vm; /* Pages that have PG_mlocked set */ |
426 | unsigned long pinned_vm; /* Refcount permanently increased */ | 426 | unsigned long pinned_vm; /* Refcount permanently increased */ |
427 | unsigned long data_vm; /* VM_WRITE & ~VM_SHARED/GROWSDOWN */ | 427 | unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ |
428 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ | 428 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ |
429 | unsigned long stack_vm; /* VM_GROWSUP/DOWN */ | 429 | unsigned long stack_vm; /* VM_STACK */ |
430 | unsigned long def_flags; | 430 | unsigned long def_flags; |
431 | unsigned long start_code, end_code, start_data, end_data; | 431 | unsigned long start_code, end_code, start_data, end_data; |
432 | unsigned long start_brk, brk, start_stack; | 432 | unsigned long start_brk, brk, start_stack; |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 33bb1b19273e..7b6c2cfee390 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -682,6 +682,12 @@ typedef struct pglist_data { | |||
682 | */ | 682 | */ |
683 | unsigned long first_deferred_pfn; | 683 | unsigned long first_deferred_pfn; |
684 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | 684 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
685 | |||
686 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
687 | spinlock_t split_queue_lock; | ||
688 | struct list_head split_queue; | ||
689 | unsigned long split_queue_len; | ||
690 | #endif | ||
685 | } pg_data_t; | 691 | } pg_data_t; |
686 | 692 | ||
687 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) | 693 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) |
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 7c88ad156a29..00b17c526c1f 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -379,6 +379,22 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
379 | struct radix_tree_iter *iter, unsigned flags); | 379 | struct radix_tree_iter *iter, unsigned flags); |
380 | 380 | ||
381 | /** | 381 | /** |
382 | * radix_tree_iter_retry - retry this chunk of the iteration | ||
383 | * @iter: iterator state | ||
384 | * | ||
385 | * If we iterate over a tree protected only by the RCU lock, a race | ||
386 | * against deletion or creation may result in seeing a slot for which | ||
387 | * radix_tree_deref_retry() returns true. If so, call this function | ||
388 | * and continue the iteration. | ||
389 | */ | ||
390 | static inline __must_check | ||
391 | void **radix_tree_iter_retry(struct radix_tree_iter *iter) | ||
392 | { | ||
393 | iter->next_index = iter->index; | ||
394 | return NULL; | ||
395 | } | ||
396 | |||
397 | /** | ||
382 | * radix_tree_chunk_size - get current chunk size | 398 | * radix_tree_chunk_size - get current chunk size |
383 | * | 399 | * |
384 | * @iter: pointer to radix tree iterator | 400 | * @iter: pointer to radix tree iterator |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index fcf5d98574ce..6b79e9026e24 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
1019 | return 0; | 1019 | return 0; |
1020 | 1020 | ||
1021 | radix_tree_for_each_slot(slot, root, &iter, first_index) { | 1021 | radix_tree_for_each_slot(slot, root, &iter, first_index) { |
1022 | results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); | 1022 | results[ret] = rcu_dereference_raw(*slot); |
1023 | if (!results[ret]) | 1023 | if (!results[ret]) |
1024 | continue; | 1024 | continue; |
1025 | if (radix_tree_is_indirect_ptr(results[ret])) { | ||
1026 | slot = radix_tree_iter_retry(&iter); | ||
1027 | continue; | ||
1028 | } | ||
1025 | if (++ret == max_items) | 1029 | if (++ret == max_items) |
1026 | break; | 1030 | break; |
1027 | } | 1031 | } |
@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
1098 | return 0; | 1102 | return 0; |
1099 | 1103 | ||
1100 | radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { | 1104 | radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { |
1101 | results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); | 1105 | results[ret] = rcu_dereference_raw(*slot); |
1102 | if (!results[ret]) | 1106 | if (!results[ret]) |
1103 | continue; | 1107 | continue; |
1108 | if (radix_tree_is_indirect_ptr(results[ret])) { | ||
1109 | slot = radix_tree_iter_retry(&iter); | ||
1110 | continue; | ||
1111 | } | ||
1104 | if (++ret == max_items) | 1112 | if (++ret == max_items) |
1105 | break; | 1113 | break; |
1106 | } | 1114 | } |
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 98866a770770..25b5cbfb7615 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c | |||
@@ -327,36 +327,67 @@ out: | |||
327 | } | 327 | } |
328 | 328 | ||
329 | #define string_get_size_maxbuf 16 | 329 | #define string_get_size_maxbuf 16 |
330 | #define test_string_get_size_one(size, blk_size, units, exp_result) \ | 330 | #define test_string_get_size_one(size, blk_size, exp_result10, exp_result2) \ |
331 | do { \ | 331 | do { \ |
332 | BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf); \ | 332 | BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf); \ |
333 | __test_string_get_size((size), (blk_size), (units), \ | 333 | BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf); \ |
334 | (exp_result)); \ | 334 | __test_string_get_size((size), (blk_size), (exp_result10), \ |
335 | (exp_result2)); \ | ||
335 | } while (0) | 336 | } while (0) |
336 | 337 | ||
337 | 338 | ||
338 | static __init void __test_string_get_size(const u64 size, const u64 blk_size, | 339 | static __init void test_string_get_size_check(const char *units, |
339 | const enum string_size_units units, | 340 | const char *exp, |
340 | const char *exp_result) | 341 | char *res, |
342 | const u64 size, | ||
343 | const u64 blk_size) | ||
341 | { | 344 | { |
342 | char buf[string_get_size_maxbuf]; | 345 | if (!memcmp(res, exp, strlen(exp) + 1)) |
343 | |||
344 | string_get_size(size, blk_size, units, buf, sizeof(buf)); | ||
345 | if (!memcmp(buf, exp_result, strlen(exp_result) + 1)) | ||
346 | return; | 346 | return; |
347 | 347 | ||
348 | buf[sizeof(buf) - 1] = '\0'; | 348 | res[string_get_size_maxbuf - 1] = '\0'; |
349 | pr_warn("Test 'test_string_get_size_one' failed!\n"); | 349 | |
350 | pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n", | 350 | pr_warn("Test 'test_string_get_size' failed!\n"); |
351 | pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n", | ||
351 | size, blk_size, units); | 352 | size, blk_size, units); |
352 | pr_warn("expected: '%s', got '%s'\n", exp_result, buf); | 353 | pr_warn("expected: '%s', got '%s'\n", exp, res); |
354 | } | ||
355 | |||
356 | static __init void __test_string_get_size(const u64 size, const u64 blk_size, | ||
357 | const char *exp_result10, | ||
358 | const char *exp_result2) | ||
359 | { | ||
360 | char buf10[string_get_size_maxbuf]; | ||
361 | char buf2[string_get_size_maxbuf]; | ||
362 | |||
363 | string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10)); | ||
364 | string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2)); | ||
365 | |||
366 | test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10, | ||
367 | size, blk_size); | ||
368 | |||
369 | test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2, | ||
370 | size, blk_size); | ||
353 | } | 371 | } |
354 | 372 | ||
355 | static __init void test_string_get_size(void) | 373 | static __init void test_string_get_size(void) |
356 | { | 374 | { |
357 | test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB"); | 375 | /* small values */ |
358 | test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB"); | 376 | test_string_get_size_one(0, 512, "0 B", "0 B"); |
359 | test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B"); | 377 | test_string_get_size_one(1, 512, "512 B", "512 B"); |
378 | test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB"); | ||
379 | |||
380 | /* normal values */ | ||
381 | test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB"); | ||
382 | test_string_get_size_one(500118192, 512, "256 GB", "238 GiB"); | ||
383 | test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB"); | ||
384 | |||
385 | /* weird block sizes */ | ||
386 | test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB"); | ||
387 | |||
388 | /* huge values */ | ||
389 | test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB"); | ||
390 | test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB"); | ||
360 | } | 391 | } |
361 | 392 | ||
362 | static int __init test_string_helpers_init(void) | 393 | static int __init test_string_helpers_init(void) |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fd3a07b3e6f4..36c070167b71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = { | |||
138 | .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), | 138 | .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static DEFINE_SPINLOCK(split_queue_lock); | ||
142 | static LIST_HEAD(split_queue); | ||
143 | static unsigned long split_queue_len; | ||
144 | static struct shrinker deferred_split_shrinker; | 141 | static struct shrinker deferred_split_shrinker; |
145 | 142 | ||
146 | static void set_recommended_min_free_kbytes(void) | 143 | static void set_recommended_min_free_kbytes(void) |
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | |||
861 | return false; | 858 | return false; |
862 | entry = mk_pmd(zero_page, vma->vm_page_prot); | 859 | entry = mk_pmd(zero_page, vma->vm_page_prot); |
863 | entry = pmd_mkhuge(entry); | 860 | entry = pmd_mkhuge(entry); |
864 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 861 | if (pgtable) |
862 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | ||
865 | set_pmd_at(mm, haddr, pmd, entry); | 863 | set_pmd_at(mm, haddr, pmd, entry); |
866 | atomic_long_inc(&mm->nr_ptes); | 864 | atomic_long_inc(&mm->nr_ptes); |
867 | return true; | 865 | return true; |
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1039 | spinlock_t *dst_ptl, *src_ptl; | 1037 | spinlock_t *dst_ptl, *src_ptl; |
1040 | struct page *src_page; | 1038 | struct page *src_page; |
1041 | pmd_t pmd; | 1039 | pmd_t pmd; |
1042 | pgtable_t pgtable; | 1040 | pgtable_t pgtable = NULL; |
1043 | int ret; | 1041 | int ret; |
1044 | 1042 | ||
1045 | ret = -ENOMEM; | 1043 | if (!vma_is_dax(vma)) { |
1046 | pgtable = pte_alloc_one(dst_mm, addr); | 1044 | ret = -ENOMEM; |
1047 | if (unlikely(!pgtable)) | 1045 | pgtable = pte_alloc_one(dst_mm, addr); |
1048 | goto out; | 1046 | if (unlikely(!pgtable)) |
1047 | goto out; | ||
1048 | } | ||
1049 | 1049 | ||
1050 | dst_ptl = pmd_lock(dst_mm, dst_pmd); | 1050 | dst_ptl = pmd_lock(dst_mm, dst_pmd); |
1051 | src_ptl = pmd_lockptr(src_mm, src_pmd); | 1051 | src_ptl = pmd_lockptr(src_mm, src_pmd); |
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1076 | goto out_unlock; | 1076 | goto out_unlock; |
1077 | } | 1077 | } |
1078 | 1078 | ||
1079 | if (pmd_trans_huge(pmd)) { | 1079 | if (!vma_is_dax(vma)) { |
1080 | /* thp accounting separate from pmd_devmap accounting */ | 1080 | /* thp accounting separate from pmd_devmap accounting */ |
1081 | src_page = pmd_page(pmd); | 1081 | src_page = pmd_page(pmd); |
1082 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); | 1082 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); |
@@ -3358,6 +3358,7 @@ int total_mapcount(struct page *page) | |||
3358 | int split_huge_page_to_list(struct page *page, struct list_head *list) | 3358 | int split_huge_page_to_list(struct page *page, struct list_head *list) |
3359 | { | 3359 | { |
3360 | struct page *head = compound_head(page); | 3360 | struct page *head = compound_head(page); |
3361 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); | ||
3361 | struct anon_vma *anon_vma; | 3362 | struct anon_vma *anon_vma; |
3362 | int count, mapcount, ret; | 3363 | int count, mapcount, ret; |
3363 | bool mlocked; | 3364 | bool mlocked; |
@@ -3401,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
3401 | lru_add_drain(); | 3402 | lru_add_drain(); |
3402 | 3403 | ||
3403 | /* Prevent deferred_split_scan() touching ->_count */ | 3404 | /* Prevent deferred_split_scan() touching ->_count */ |
3404 | spin_lock_irqsave(&split_queue_lock, flags); | 3405 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3405 | count = page_count(head); | 3406 | count = page_count(head); |
3406 | mapcount = total_mapcount(head); | 3407 | mapcount = total_mapcount(head); |
3407 | if (!mapcount && count == 1) { | 3408 | if (!mapcount && count == 1) { |
3408 | if (!list_empty(page_deferred_list(head))) { | 3409 | if (!list_empty(page_deferred_list(head))) { |
3409 | split_queue_len--; | 3410 | pgdata->split_queue_len--; |
3410 | list_del(page_deferred_list(head)); | 3411 | list_del(page_deferred_list(head)); |
3411 | } | 3412 | } |
3412 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3413 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3413 | __split_huge_page(page, list); | 3414 | __split_huge_page(page, list); |
3414 | ret = 0; | 3415 | ret = 0; |
3415 | } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { | 3416 | } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { |
3416 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3417 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3417 | pr_alert("total_mapcount: %u, page_count(): %u\n", | 3418 | pr_alert("total_mapcount: %u, page_count(): %u\n", |
3418 | mapcount, count); | 3419 | mapcount, count); |
3419 | if (PageTail(page)) | 3420 | if (PageTail(page)) |
@@ -3421,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
3421 | dump_page(page, "total_mapcount(head) > 0"); | 3422 | dump_page(page, "total_mapcount(head) > 0"); |
3422 | BUG(); | 3423 | BUG(); |
3423 | } else { | 3424 | } else { |
3424 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3425 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3425 | unfreeze_page(anon_vma, head); | 3426 | unfreeze_page(anon_vma, head); |
3426 | ret = -EBUSY; | 3427 | ret = -EBUSY; |
3427 | } | 3428 | } |
@@ -3436,64 +3437,65 @@ out: | |||
3436 | 3437 | ||
3437 | void free_transhuge_page(struct page *page) | 3438 | void free_transhuge_page(struct page *page) |
3438 | { | 3439 | { |
3440 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); | ||
3439 | unsigned long flags; | 3441 | unsigned long flags; |
3440 | 3442 | ||
3441 | spin_lock_irqsave(&split_queue_lock, flags); | 3443 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3442 | if (!list_empty(page_deferred_list(page))) { | 3444 | if (!list_empty(page_deferred_list(page))) { |
3443 | split_queue_len--; | 3445 | pgdata->split_queue_len--; |
3444 | list_del(page_deferred_list(page)); | 3446 | list_del(page_deferred_list(page)); |
3445 | } | 3447 | } |
3446 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3448 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3447 | free_compound_page(page); | 3449 | free_compound_page(page); |
3448 | } | 3450 | } |
3449 | 3451 | ||
3450 | void deferred_split_huge_page(struct page *page) | 3452 | void deferred_split_huge_page(struct page *page) |
3451 | { | 3453 | { |
3454 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); | ||
3452 | unsigned long flags; | 3455 | unsigned long flags; |
3453 | 3456 | ||
3454 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 3457 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
3455 | 3458 | ||
3456 | spin_lock_irqsave(&split_queue_lock, flags); | 3459 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3457 | if (list_empty(page_deferred_list(page))) { | 3460 | if (list_empty(page_deferred_list(page))) { |
3458 | list_add_tail(page_deferred_list(page), &split_queue); | 3461 | list_add_tail(page_deferred_list(page), &pgdata->split_queue); |
3459 | split_queue_len++; | 3462 | pgdata->split_queue_len++; |
3460 | } | 3463 | } |
3461 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3464 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3462 | } | 3465 | } |
3463 | 3466 | ||
3464 | static unsigned long deferred_split_count(struct shrinker *shrink, | 3467 | static unsigned long deferred_split_count(struct shrinker *shrink, |
3465 | struct shrink_control *sc) | 3468 | struct shrink_control *sc) |
3466 | { | 3469 | { |
3467 | /* | 3470 | struct pglist_data *pgdata = NODE_DATA(sc->nid); |
3468 | * Split a page from split_queue will free up at least one page, | 3471 | return ACCESS_ONCE(pgdata->split_queue_len); |
3469 | * at most HPAGE_PMD_NR - 1. We don't track exact number. | ||
3470 | * Let's use HPAGE_PMD_NR / 2 as ballpark. | ||
3471 | */ | ||
3472 | return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2; | ||
3473 | } | 3472 | } |
3474 | 3473 | ||
3475 | static unsigned long deferred_split_scan(struct shrinker *shrink, | 3474 | static unsigned long deferred_split_scan(struct shrinker *shrink, |
3476 | struct shrink_control *sc) | 3475 | struct shrink_control *sc) |
3477 | { | 3476 | { |
3477 | struct pglist_data *pgdata = NODE_DATA(sc->nid); | ||
3478 | unsigned long flags; | 3478 | unsigned long flags; |
3479 | LIST_HEAD(list), *pos, *next; | 3479 | LIST_HEAD(list), *pos, *next; |
3480 | struct page *page; | 3480 | struct page *page; |
3481 | int split = 0; | 3481 | int split = 0; |
3482 | 3482 | ||
3483 | spin_lock_irqsave(&split_queue_lock, flags); | 3483 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3484 | list_splice_init(&split_queue, &list); | ||
3485 | |||
3486 | /* Take pin on all head pages to avoid freeing them under us */ | 3484 | /* Take pin on all head pages to avoid freeing them under us */ |
3487 | list_for_each_safe(pos, next, &list) { | 3485 | list_for_each_safe(pos, next, &list) { |
3488 | page = list_entry((void *)pos, struct page, mapping); | 3486 | page = list_entry((void *)pos, struct page, mapping); |
3489 | page = compound_head(page); | 3487 | page = compound_head(page); |
3490 | /* race with put_compound_page() */ | 3488 | if (get_page_unless_zero(page)) { |
3491 | if (!get_page_unless_zero(page)) { | 3489 | list_move(page_deferred_list(page), &list); |
3490 | } else { | ||
3491 | /* We lost race with put_compound_page() */ | ||
3492 | list_del_init(page_deferred_list(page)); | 3492 | list_del_init(page_deferred_list(page)); |
3493 | split_queue_len--; | 3493 | pgdata->split_queue_len--; |
3494 | } | 3494 | } |
3495 | if (!--sc->nr_to_scan) | ||
3496 | break; | ||
3495 | } | 3497 | } |
3496 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3498 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3497 | 3499 | ||
3498 | list_for_each_safe(pos, next, &list) { | 3500 | list_for_each_safe(pos, next, &list) { |
3499 | page = list_entry((void *)pos, struct page, mapping); | 3501 | page = list_entry((void *)pos, struct page, mapping); |
@@ -3505,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, | |||
3505 | put_page(page); | 3507 | put_page(page); |
3506 | } | 3508 | } |
3507 | 3509 | ||
3508 | spin_lock_irqsave(&split_queue_lock, flags); | 3510 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3509 | list_splice_tail(&list, &split_queue); | 3511 | list_splice_tail(&list, &pgdata->split_queue); |
3510 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3512 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3511 | 3513 | ||
3512 | return split * HPAGE_PMD_NR / 2; | 3514 | /* |
3515 | * Stop shrinker if we didn't split any page, but the queue is empty. | ||
3516 | * This can happen if pages were freed under us. | ||
3517 | */ | ||
3518 | if (!split && list_empty(&pgdata->split_queue)) | ||
3519 | return SHRINK_STOP; | ||
3520 | return split; | ||
3513 | } | 3521 | } |
3514 | 3522 | ||
3515 | static struct shrinker deferred_split_shrinker = { | 3523 | static struct shrinker deferred_split_shrinker = { |
3516 | .count_objects = deferred_split_count, | 3524 | .count_objects = deferred_split_count, |
3517 | .scan_objects = deferred_split_scan, | 3525 | .scan_objects = deferred_split_scan, |
3518 | .seeks = DEFAULT_SEEKS, | 3526 | .seeks = DEFAULT_SEEKS, |
3527 | .flags = SHRINKER_NUMA_AWARE, | ||
3519 | }; | 3528 | }; |
3520 | 3529 | ||
3521 | #ifdef CONFIG_DEBUG_FS | 3530 | #ifdef CONFIG_DEBUG_FS |
diff --git a/mm/internal.h b/mm/internal.h index ed8b5ffcf9b1..a38a21ebddb4 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -216,6 +216,37 @@ static inline bool is_cow_mapping(vm_flags_t flags) | |||
216 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; | 216 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
217 | } | 217 | } |
218 | 218 | ||
219 | /* | ||
220 | * These three helpers classifies VMAs for virtual memory accounting. | ||
221 | */ | ||
222 | |||
223 | /* | ||
224 | * Executable code area - executable, not writable, not stack | ||
225 | */ | ||
226 | static inline bool is_exec_mapping(vm_flags_t flags) | ||
227 | { | ||
228 | return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Stack area - atomatically grows in one direction | ||
233 | * | ||
234 | * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: | ||
235 | * do_mmap() forbids all other combinations. | ||
236 | */ | ||
237 | static inline bool is_stack_mapping(vm_flags_t flags) | ||
238 | { | ||
239 | return (flags & VM_STACK) == VM_STACK; | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * Data area - private, writable, not stack | ||
244 | */ | ||
245 | static inline bool is_data_mapping(vm_flags_t flags) | ||
246 | { | ||
247 | return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; | ||
248 | } | ||
249 | |||
219 | /* mm/util.c */ | 250 | /* mm/util.c */ |
220 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | 251 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, |
221 | struct vm_area_struct *prev, struct rb_node *rb_parent); | 252 | struct vm_area_struct *prev, struct rb_node *rb_parent); |
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/memory.h> | 42 | #include <linux/memory.h> |
43 | #include <linux/printk.h> | 43 | #include <linux/printk.h> |
44 | #include <linux/userfaultfd_k.h> | 44 | #include <linux/userfaultfd_k.h> |
45 | #include <linux/moduleparam.h> | ||
45 | 46 | ||
46 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
47 | #include <asm/cacheflush.h> | 48 | #include <asm/cacheflush.h> |
@@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; | |||
69 | int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; | 70 | int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; |
70 | #endif | 71 | #endif |
71 | 72 | ||
73 | static bool ignore_rlimit_data = true; | ||
74 | core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); | ||
72 | 75 | ||
73 | static void unmap_region(struct mm_struct *mm, | 76 | static void unmap_region(struct mm_struct *mm, |
74 | struct vm_area_struct *vma, struct vm_area_struct *prev, | 77 | struct vm_area_struct *vma, struct vm_area_struct *prev, |
@@ -2982,9 +2985,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) | |||
2982 | if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) | 2985 | if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) |
2983 | return false; | 2986 | return false; |
2984 | 2987 | ||
2985 | if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS & | 2988 | if (is_data_mapping(flags) && |
2986 | (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE) | 2989 | mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) { |
2987 | return mm->data_vm + npages <= rlimit(RLIMIT_DATA); | 2990 | if (ignore_rlimit_data) |
2991 | pr_warn_once("%s (%d): VmData %lu exceed data ulimit " | ||
2992 | "%lu. Will be forbidden soon.\n", | ||
2993 | current->comm, current->pid, | ||
2994 | (mm->data_vm + npages) << PAGE_SHIFT, | ||
2995 | rlimit(RLIMIT_DATA)); | ||
2996 | else | ||
2997 | return false; | ||
2998 | } | ||
2988 | 2999 | ||
2989 | return true; | 3000 | return true; |
2990 | } | 3001 | } |
@@ -2993,11 +3004,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) | |||
2993 | { | 3004 | { |
2994 | mm->total_vm += npages; | 3005 | mm->total_vm += npages; |
2995 | 3006 | ||
2996 | if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC) | 3007 | if (is_exec_mapping(flags)) |
2997 | mm->exec_vm += npages; | 3008 | mm->exec_vm += npages; |
2998 | else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN))) | 3009 | else if (is_stack_mapping(flags)) |
2999 | mm->stack_vm += npages; | 3010 | mm->stack_vm += npages; |
3000 | else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | 3011 | else if (is_data_mapping(flags)) |
3001 | mm->data_vm += npages; | 3012 | mm->data_vm += npages; |
3002 | } | 3013 | } |
3003 | 3014 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 63358d9f9aa9..ea2c4d3e0c03 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -5210,6 +5210,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
5210 | pgdat->numabalancing_migrate_nr_pages = 0; | 5210 | pgdat->numabalancing_migrate_nr_pages = 0; |
5211 | pgdat->numabalancing_migrate_next_window = jiffies; | 5211 | pgdat->numabalancing_migrate_next_window = jiffies; |
5212 | #endif | 5212 | #endif |
5213 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
5214 | spin_lock_init(&pgdat->split_queue_lock); | ||
5215 | INIT_LIST_HEAD(&pgdat->split_queue); | ||
5216 | pgdat->split_queue_len = 0; | ||
5217 | #endif | ||
5213 | init_waitqueue_head(&pgdat->kswapd_wait); | 5218 | init_waitqueue_head(&pgdat->kswapd_wait); |
5214 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 5219 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
5215 | pgdat_page_ext_init(pgdat); | 5220 | pgdat_page_ext_init(pgdat); |
@@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | |||
230 | } | 230 | } |
231 | 231 | ||
232 | /* Check if the vma is being used as a stack by this task */ | 232 | /* Check if the vma is being used as a stack by this task */ |
233 | static int vm_is_stack_for_task(struct task_struct *t, | 233 | int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t) |
234 | struct vm_area_struct *vma) | ||
235 | { | 234 | { |
236 | return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); | 235 | return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); |
237 | } | 236 | } |
238 | 237 | ||
239 | /* | ||
240 | * Check if the vma is being used as a stack. | ||
241 | * If is_group is non-zero, check in the entire thread group or else | ||
242 | * just check in the current task. Returns the task_struct of the task | ||
243 | * that the vma is stack for. Must be called under rcu_read_lock(). | ||
244 | */ | ||
245 | struct task_struct *task_of_stack(struct task_struct *task, | ||
246 | struct vm_area_struct *vma, bool in_group) | ||
247 | { | ||
248 | if (vm_is_stack_for_task(task, vma)) | ||
249 | return task; | ||
250 | |||
251 | if (in_group) { | ||
252 | struct task_struct *t; | ||
253 | |||
254 | for_each_thread(task, t) { | ||
255 | if (vm_is_stack_for_task(t, vma)) | ||
256 | return t; | ||
257 | } | ||
258 | } | ||
259 | |||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) | 238 | #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) |
264 | void arch_pick_mmap_layout(struct mm_struct *mm) | 239 | void arch_pick_mmap_layout(struct mm_struct *mm) |
265 | { | 240 | { |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 9a6c0704211c..149fdf6c5c56 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
@@ -248,9 +248,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, | |||
248 | 248 | ||
249 | if (tree) { | 249 | if (tree) { |
250 | spin_lock(&vmpr->sr_lock); | 250 | spin_lock(&vmpr->sr_lock); |
251 | vmpr->tree_scanned += scanned; | 251 | scanned = vmpr->tree_scanned += scanned; |
252 | vmpr->tree_reclaimed += reclaimed; | 252 | vmpr->tree_reclaimed += reclaimed; |
253 | scanned = vmpr->scanned; | ||
254 | spin_unlock(&vmpr->sr_lock); | 253 | spin_unlock(&vmpr->sr_lock); |
255 | 254 | ||
256 | if (scanned < vmpressure_win) | 255 | if (scanned < vmpressure_win) |