diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:28 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:28 -0500 |
commit | 818099574b04c5301eacbbcd441022b353a65466 (patch) | |
tree | 77b3645b375105cb0389df2b4ea5ffa90329f7f8 /mm | |
parent | 802ea9d8645d33d24b7b4cd4537c14f3e698bde0 (diff) | |
parent | 6016daed58ee482a2f7684e93342e89139cf4419 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge third set of updates from Andrew Morton:
- the rest of MM
[ This includes getting rid of the numa hinting bits, in favor of
just generic protnone logic. Yay. - Linus ]
- core kernel
- procfs
- some of lib/ (lots of lib/ material this time)
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (104 commits)
lib/lcm.c: replace include
lib/percpu_ida.c: remove redundant includes
lib/strncpy_from_user.c: replace module.h include
lib/stmp_device.c: replace module.h include
lib/sort.c: move include inside #if 0
lib/show_mem.c: remove redundant include
lib/radix-tree.c: change to simpler include
lib/plist.c: remove redundant include
lib/nlattr.c: remove redundant include
lib/kobject_uevent.c: remove redundant include
lib/llist.c: remove redundant include
lib/md5.c: simplify include
lib/list_sort.c: rearrange includes
lib/genalloc.c: remove redundant include
lib/idr.c: remove redundant include
lib/halfmd4.c: simplify includes
lib/dynamic_queue_limits.c: simplify includes
lib/sort.c: use simpler includes
lib/interval_tree.c: simplify includes
hexdump: make it return number of bytes placed in buffer
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 10 | ||||
-rw-r--r-- | mm/compaction.c | 23 | ||||
-rw-r--r-- | mm/gup.c | 10 | ||||
-rw-r--r-- | mm/huge_memory.c | 50 | ||||
-rw-r--r-- | mm/internal.h | 6 | ||||
-rw-r--r-- | mm/list_lru.c | 467 | ||||
-rw-r--r-- | mm/memcontrol.c | 188 | ||||
-rw-r--r-- | mm/memory-failure.c | 13 | ||||
-rw-r--r-- | mm/memory.c | 20 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 8 | ||||
-rw-r--r-- | mm/mm_init.c | 4 | ||||
-rw-r--r-- | mm/mprotect.c | 48 | ||||
-rw-r--r-- | mm/page_alloc.c | 19 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 17 | ||||
-rw-r--r-- | mm/slab.h | 67 | ||||
-rw-r--r-- | mm/slab_common.c | 197 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 117 | ||||
-rw-r--r-- | mm/vmscan.c | 85 | ||||
-rw-r--r-- | mm/workingset.c | 9 | ||||
-rw-r--r-- | mm/zbud.c | 3 | ||||
-rw-r--r-- | mm/zpool.c | 6 | ||||
-rw-r--r-- | mm/zsmalloc.c | 239 | ||||
-rw-r--r-- | mm/zswap.c | 5 |
26 files changed, 1217 insertions, 400 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 4395b12869c8..de5239c152f9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -602,6 +602,16 @@ config PGTABLE_MAPPING | |||
602 | You can check speed with zsmalloc benchmark: | 602 | You can check speed with zsmalloc benchmark: |
603 | https://github.com/spartacus06/zsmapbench | 603 | https://github.com/spartacus06/zsmapbench |
604 | 604 | ||
605 | config ZSMALLOC_STAT | ||
606 | bool "Export zsmalloc statistics" | ||
607 | depends on ZSMALLOC | ||
608 | select DEBUG_FS | ||
609 | help | ||
610 | This option enables code in the zsmalloc to collect various | ||
611 | statistics about whats happening in zsmalloc and exports that | ||
612 | information to userspace via debugfs. | ||
613 | If unsure, say N. | ||
614 | |||
605 | config GENERIC_EARLY_IOREMAP | 615 | config GENERIC_EARLY_IOREMAP |
606 | bool | 616 | bool |
607 | 617 | ||
diff --git a/mm/compaction.c b/mm/compaction.c index b68736c8a1ce..d50d6de6f1b6 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -490,6 +490,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
490 | 490 | ||
491 | /* If a page was split, advance to the end of it */ | 491 | /* If a page was split, advance to the end of it */ |
492 | if (isolated) { | 492 | if (isolated) { |
493 | cc->nr_freepages += isolated; | ||
494 | if (!strict && | ||
495 | cc->nr_migratepages <= cc->nr_freepages) { | ||
496 | blockpfn += isolated; | ||
497 | break; | ||
498 | } | ||
499 | |||
493 | blockpfn += isolated - 1; | 500 | blockpfn += isolated - 1; |
494 | cursor += isolated - 1; | 501 | cursor += isolated - 1; |
495 | continue; | 502 | continue; |
@@ -899,7 +906,6 @@ static void isolate_freepages(struct compact_control *cc) | |||
899 | unsigned long isolate_start_pfn; /* exact pfn we start at */ | 906 | unsigned long isolate_start_pfn; /* exact pfn we start at */ |
900 | unsigned long block_end_pfn; /* end of current pageblock */ | 907 | unsigned long block_end_pfn; /* end of current pageblock */ |
901 | unsigned long low_pfn; /* lowest pfn scanner is able to scan */ | 908 | unsigned long low_pfn; /* lowest pfn scanner is able to scan */ |
902 | int nr_freepages = cc->nr_freepages; | ||
903 | struct list_head *freelist = &cc->freepages; | 909 | struct list_head *freelist = &cc->freepages; |
904 | 910 | ||
905 | /* | 911 | /* |
@@ -924,11 +930,11 @@ static void isolate_freepages(struct compact_control *cc) | |||
924 | * pages on cc->migratepages. We stop searching if the migrate | 930 | * pages on cc->migratepages. We stop searching if the migrate |
925 | * and free page scanners meet or enough free pages are isolated. | 931 | * and free page scanners meet or enough free pages are isolated. |
926 | */ | 932 | */ |
927 | for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages; | 933 | for (; block_start_pfn >= low_pfn && |
934 | cc->nr_migratepages > cc->nr_freepages; | ||
928 | block_end_pfn = block_start_pfn, | 935 | block_end_pfn = block_start_pfn, |
929 | block_start_pfn -= pageblock_nr_pages, | 936 | block_start_pfn -= pageblock_nr_pages, |
930 | isolate_start_pfn = block_start_pfn) { | 937 | isolate_start_pfn = block_start_pfn) { |
931 | unsigned long isolated; | ||
932 | 938 | ||
933 | /* | 939 | /* |
934 | * This can iterate a massively long zone without finding any | 940 | * This can iterate a massively long zone without finding any |
@@ -953,9 +959,8 @@ static void isolate_freepages(struct compact_control *cc) | |||
953 | continue; | 959 | continue; |
954 | 960 | ||
955 | /* Found a block suitable for isolating free pages from. */ | 961 | /* Found a block suitable for isolating free pages from. */ |
956 | isolated = isolate_freepages_block(cc, &isolate_start_pfn, | 962 | isolate_freepages_block(cc, &isolate_start_pfn, |
957 | block_end_pfn, freelist, false); | 963 | block_end_pfn, freelist, false); |
958 | nr_freepages += isolated; | ||
959 | 964 | ||
960 | /* | 965 | /* |
961 | * Remember where the free scanner should restart next time, | 966 | * Remember where the free scanner should restart next time, |
@@ -987,8 +992,6 @@ static void isolate_freepages(struct compact_control *cc) | |||
987 | */ | 992 | */ |
988 | if (block_start_pfn < low_pfn) | 993 | if (block_start_pfn < low_pfn) |
989 | cc->free_pfn = cc->migrate_pfn; | 994 | cc->free_pfn = cc->migrate_pfn; |
990 | |||
991 | cc->nr_freepages = nr_freepages; | ||
992 | } | 995 | } |
993 | 996 | ||
994 | /* | 997 | /* |
@@ -1100,8 +1103,10 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1100 | low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn, | 1103 | low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn, |
1101 | isolate_mode); | 1104 | isolate_mode); |
1102 | 1105 | ||
1103 | if (!low_pfn || cc->contended) | 1106 | if (!low_pfn || cc->contended) { |
1107 | acct_isolated(zone, cc); | ||
1104 | return ISOLATE_ABORT; | 1108 | return ISOLATE_ABORT; |
1109 | } | ||
1105 | 1110 | ||
1106 | /* | 1111 | /* |
1107 | * Either we isolated something and proceed with migration. Or | 1112 | * Either we isolated something and proceed with migration. Or |
@@ -1173,7 +1178,7 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc, | |||
1173 | return COMPACT_PARTIAL; | 1178 | return COMPACT_PARTIAL; |
1174 | 1179 | ||
1175 | /* Job done if allocation would set block type */ | 1180 | /* Job done if allocation would set block type */ |
1176 | if (cc->order >= pageblock_order && area->nr_free) | 1181 | if (order >= pageblock_order && area->nr_free) |
1177 | return COMPACT_PARTIAL; | 1182 | return COMPACT_PARTIAL; |
1178 | } | 1183 | } |
1179 | 1184 | ||
@@ -64,7 +64,7 @@ retry: | |||
64 | migration_entry_wait(mm, pmd, address); | 64 | migration_entry_wait(mm, pmd, address); |
65 | goto retry; | 65 | goto retry; |
66 | } | 66 | } |
67 | if ((flags & FOLL_NUMA) && pte_numa(pte)) | 67 | if ((flags & FOLL_NUMA) && pte_protnone(pte)) |
68 | goto no_page; | 68 | goto no_page; |
69 | if ((flags & FOLL_WRITE) && !pte_write(pte)) { | 69 | if ((flags & FOLL_WRITE) && !pte_write(pte)) { |
70 | pte_unmap_unlock(ptep, ptl); | 70 | pte_unmap_unlock(ptep, ptl); |
@@ -184,7 +184,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma, | |||
184 | return page; | 184 | return page; |
185 | return no_page_table(vma, flags); | 185 | return no_page_table(vma, flags); |
186 | } | 186 | } |
187 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | 187 | if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) |
188 | return no_page_table(vma, flags); | 188 | return no_page_table(vma, flags); |
189 | if (pmd_trans_huge(*pmd)) { | 189 | if (pmd_trans_huge(*pmd)) { |
190 | if (flags & FOLL_SPLIT) { | 190 | if (flags & FOLL_SPLIT) { |
@@ -906,10 +906,10 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, | |||
906 | 906 | ||
907 | /* | 907 | /* |
908 | * Similar to the PMD case below, NUMA hinting must take slow | 908 | * Similar to the PMD case below, NUMA hinting must take slow |
909 | * path | 909 | * path using the pte_protnone check. |
910 | */ | 910 | */ |
911 | if (!pte_present(pte) || pte_special(pte) || | 911 | if (!pte_present(pte) || pte_special(pte) || |
912 | pte_numa(pte) || (write && !pte_write(pte))) | 912 | pte_protnone(pte) || (write && !pte_write(pte))) |
913 | goto pte_unmap; | 913 | goto pte_unmap; |
914 | 914 | ||
915 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 915 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
@@ -1104,7 +1104,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
1104 | * slowpath for accounting purposes and so that they | 1104 | * slowpath for accounting purposes and so that they |
1105 | * can be serialised against THP migration. | 1105 | * can be serialised against THP migration. |
1106 | */ | 1106 | */ |
1107 | if (pmd_numa(pmd)) | 1107 | if (pmd_protnone(pmd)) |
1108 | return 0; | 1108 | return 0; |
1109 | 1109 | ||
1110 | if (!gup_huge_pmd(pmd, pmdp, addr, next, write, | 1110 | if (!gup_huge_pmd(pmd, pmdp, addr, next, write, |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cb7be110cad3..fc00c8cb5a82 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1211,7 +1211,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1211 | return ERR_PTR(-EFAULT); | 1211 | return ERR_PTR(-EFAULT); |
1212 | 1212 | ||
1213 | /* Full NUMA hinting faults to serialise migration in fault paths */ | 1213 | /* Full NUMA hinting faults to serialise migration in fault paths */ |
1214 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | 1214 | if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) |
1215 | goto out; | 1215 | goto out; |
1216 | 1216 | ||
1217 | page = pmd_page(*pmd); | 1217 | page = pmd_page(*pmd); |
@@ -1262,6 +1262,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1262 | bool migrated = false; | 1262 | bool migrated = false; |
1263 | int flags = 0; | 1263 | int flags = 0; |
1264 | 1264 | ||
1265 | /* A PROT_NONE fault should not end up here */ | ||
1266 | BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); | ||
1267 | |||
1265 | ptl = pmd_lock(mm, pmdp); | 1268 | ptl = pmd_lock(mm, pmdp); |
1266 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1269 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1267 | goto out_unlock; | 1270 | goto out_unlock; |
@@ -1272,8 +1275,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1272 | * check_same as the page may no longer be mapped. | 1275 | * check_same as the page may no longer be mapped. |
1273 | */ | 1276 | */ |
1274 | if (unlikely(pmd_trans_migrating(*pmdp))) { | 1277 | if (unlikely(pmd_trans_migrating(*pmdp))) { |
1278 | page = pmd_page(*pmdp); | ||
1275 | spin_unlock(ptl); | 1279 | spin_unlock(ptl); |
1276 | wait_migrate_huge_page(vma->anon_vma, pmdp); | 1280 | wait_on_page_locked(page); |
1277 | goto out; | 1281 | goto out; |
1278 | } | 1282 | } |
1279 | 1283 | ||
@@ -1341,7 +1345,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1341 | 1345 | ||
1342 | /* | 1346 | /* |
1343 | * Migrate the THP to the requested node, returns with page unlocked | 1347 | * Migrate the THP to the requested node, returns with page unlocked |
1344 | * and pmd_numa cleared. | 1348 | * and access rights restored. |
1345 | */ | 1349 | */ |
1346 | spin_unlock(ptl); | 1350 | spin_unlock(ptl); |
1347 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1351 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
@@ -1354,9 +1358,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1354 | goto out; | 1358 | goto out; |
1355 | clear_pmdnuma: | 1359 | clear_pmdnuma: |
1356 | BUG_ON(!PageLocked(page)); | 1360 | BUG_ON(!PageLocked(page)); |
1357 | pmd = pmd_mknonnuma(pmd); | 1361 | pmd = pmd_modify(pmd, vma->vm_page_prot); |
1358 | set_pmd_at(mm, haddr, pmdp, pmd); | 1362 | set_pmd_at(mm, haddr, pmdp, pmd); |
1359 | VM_BUG_ON(pmd_numa(*pmdp)); | ||
1360 | update_mmu_cache_pmd(vma, addr, pmdp); | 1363 | update_mmu_cache_pmd(vma, addr, pmdp); |
1361 | unlock_page(page); | 1364 | unlock_page(page); |
1362 | out_unlock: | 1365 | out_unlock: |
@@ -1479,29 +1482,24 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1479 | 1482 | ||
1480 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | 1483 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
1481 | pmd_t entry; | 1484 | pmd_t entry; |
1482 | ret = 1; | 1485 | |
1483 | if (!prot_numa) { | 1486 | /* |
1487 | * Avoid trapping faults against the zero page. The read-only | ||
1488 | * data is likely to be read-cached on the local CPU and | ||
1489 | * local/remote hits to the zero page are not interesting. | ||
1490 | */ | ||
1491 | if (prot_numa && is_huge_zero_pmd(*pmd)) { | ||
1492 | spin_unlock(ptl); | ||
1493 | return 0; | ||
1494 | } | ||
1495 | |||
1496 | if (!prot_numa || !pmd_protnone(*pmd)) { | ||
1497 | ret = 1; | ||
1484 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); | 1498 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); |
1485 | if (pmd_numa(entry)) | ||
1486 | entry = pmd_mknonnuma(entry); | ||
1487 | entry = pmd_modify(entry, newprot); | 1499 | entry = pmd_modify(entry, newprot); |
1488 | ret = HPAGE_PMD_NR; | 1500 | ret = HPAGE_PMD_NR; |
1489 | set_pmd_at(mm, addr, pmd, entry); | 1501 | set_pmd_at(mm, addr, pmd, entry); |
1490 | BUG_ON(pmd_write(entry)); | 1502 | BUG_ON(pmd_write(entry)); |
1491 | } else { | ||
1492 | struct page *page = pmd_page(*pmd); | ||
1493 | |||
1494 | /* | ||
1495 | * Do not trap faults against the zero page. The | ||
1496 | * read-only data is likely to be read-cached on the | ||
1497 | * local CPU cache and it is less useful to know about | ||
1498 | * local vs remote hits on the zero page. | ||
1499 | */ | ||
1500 | if (!is_huge_zero_page(page) && | ||
1501 | !pmd_numa(*pmd)) { | ||
1502 | pmdp_set_numa(mm, addr, pmd); | ||
1503 | ret = HPAGE_PMD_NR; | ||
1504 | } | ||
1505 | } | 1503 | } |
1506 | spin_unlock(ptl); | 1504 | spin_unlock(ptl); |
1507 | } | 1505 | } |
@@ -1766,9 +1764,9 @@ static int __split_huge_page_map(struct page *page, | |||
1766 | pte_t *pte, entry; | 1764 | pte_t *pte, entry; |
1767 | BUG_ON(PageCompound(page+i)); | 1765 | BUG_ON(PageCompound(page+i)); |
1768 | /* | 1766 | /* |
1769 | * Note that pmd_numa is not transferred deliberately | 1767 | * Note that NUMA hinting access restrictions are not |
1770 | * to avoid any possibility that pte_numa leaks to | 1768 | * transferred to avoid any possibility of altering |
1771 | * a PROT_NONE VMA by accident. | 1769 | * permissions across VMAs. |
1772 | */ | 1770 | */ |
1773 | entry = mk_pte(page + i, vma->vm_page_prot); | 1771 | entry = mk_pte(page + i, vma->vm_page_prot); |
1774 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1772 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
diff --git a/mm/internal.h b/mm/internal.h index c4d6c9b43491..a96da5b0029d 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -351,8 +351,10 @@ extern int mminit_loglevel; | |||
351 | #define mminit_dprintk(level, prefix, fmt, arg...) \ | 351 | #define mminit_dprintk(level, prefix, fmt, arg...) \ |
352 | do { \ | 352 | do { \ |
353 | if (level < mminit_loglevel) { \ | 353 | if (level < mminit_loglevel) { \ |
354 | printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \ | 354 | if (level <= MMINIT_WARNING) \ |
355 | printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \ | 355 | printk(KERN_WARNING "mminit::" prefix " " fmt, ##arg); \ |
356 | else \ | ||
357 | printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \ | ||
356 | } \ | 358 | } \ |
357 | } while (0) | 359 | } while (0) |
358 | 360 | ||
diff --git a/mm/list_lru.c b/mm/list_lru.c index f1a0db194173..909eca2c820e 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c | |||
@@ -9,18 +9,100 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/list_lru.h> | 10 | #include <linux/list_lru.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/mutex.h> | ||
13 | #include <linux/memcontrol.h> | ||
14 | |||
15 | #ifdef CONFIG_MEMCG_KMEM | ||
16 | static LIST_HEAD(list_lrus); | ||
17 | static DEFINE_MUTEX(list_lrus_mutex); | ||
18 | |||
19 | static void list_lru_register(struct list_lru *lru) | ||
20 | { | ||
21 | mutex_lock(&list_lrus_mutex); | ||
22 | list_add(&lru->list, &list_lrus); | ||
23 | mutex_unlock(&list_lrus_mutex); | ||
24 | } | ||
25 | |||
26 | static void list_lru_unregister(struct list_lru *lru) | ||
27 | { | ||
28 | mutex_lock(&list_lrus_mutex); | ||
29 | list_del(&lru->list); | ||
30 | mutex_unlock(&list_lrus_mutex); | ||
31 | } | ||
32 | #else | ||
33 | static void list_lru_register(struct list_lru *lru) | ||
34 | { | ||
35 | } | ||
36 | |||
37 | static void list_lru_unregister(struct list_lru *lru) | ||
38 | { | ||
39 | } | ||
40 | #endif /* CONFIG_MEMCG_KMEM */ | ||
41 | |||
42 | #ifdef CONFIG_MEMCG_KMEM | ||
43 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | ||
44 | { | ||
45 | return !!lru->node[0].memcg_lrus; | ||
46 | } | ||
47 | |||
48 | static inline struct list_lru_one * | ||
49 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | ||
50 | { | ||
51 | /* | ||
52 | * The lock protects the array of per cgroup lists from relocation | ||
53 | * (see memcg_update_list_lru_node). | ||
54 | */ | ||
55 | lockdep_assert_held(&nlru->lock); | ||
56 | if (nlru->memcg_lrus && idx >= 0) | ||
57 | return nlru->memcg_lrus->lru[idx]; | ||
58 | |||
59 | return &nlru->lru; | ||
60 | } | ||
61 | |||
62 | static inline struct list_lru_one * | ||
63 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | ||
64 | { | ||
65 | struct mem_cgroup *memcg; | ||
66 | |||
67 | if (!nlru->memcg_lrus) | ||
68 | return &nlru->lru; | ||
69 | |||
70 | memcg = mem_cgroup_from_kmem(ptr); | ||
71 | if (!memcg) | ||
72 | return &nlru->lru; | ||
73 | |||
74 | return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); | ||
75 | } | ||
76 | #else | ||
77 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | ||
78 | { | ||
79 | return false; | ||
80 | } | ||
81 | |||
82 | static inline struct list_lru_one * | ||
83 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | ||
84 | { | ||
85 | return &nlru->lru; | ||
86 | } | ||
87 | |||
88 | static inline struct list_lru_one * | ||
89 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | ||
90 | { | ||
91 | return &nlru->lru; | ||
92 | } | ||
93 | #endif /* CONFIG_MEMCG_KMEM */ | ||
12 | 94 | ||
13 | bool list_lru_add(struct list_lru *lru, struct list_head *item) | 95 | bool list_lru_add(struct list_lru *lru, struct list_head *item) |
14 | { | 96 | { |
15 | int nid = page_to_nid(virt_to_page(item)); | 97 | int nid = page_to_nid(virt_to_page(item)); |
16 | struct list_lru_node *nlru = &lru->node[nid]; | 98 | struct list_lru_node *nlru = &lru->node[nid]; |
99 | struct list_lru_one *l; | ||
17 | 100 | ||
18 | spin_lock(&nlru->lock); | 101 | spin_lock(&nlru->lock); |
19 | WARN_ON_ONCE(nlru->nr_items < 0); | 102 | l = list_lru_from_kmem(nlru, item); |
20 | if (list_empty(item)) { | 103 | if (list_empty(item)) { |
21 | list_add_tail(item, &nlru->list); | 104 | list_add_tail(item, &l->list); |
22 | if (nlru->nr_items++ == 0) | 105 | l->nr_items++; |
23 | node_set(nid, lru->active_nodes); | ||
24 | spin_unlock(&nlru->lock); | 106 | spin_unlock(&nlru->lock); |
25 | return true; | 107 | return true; |
26 | } | 108 | } |
@@ -33,13 +115,13 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) | |||
33 | { | 115 | { |
34 | int nid = page_to_nid(virt_to_page(item)); | 116 | int nid = page_to_nid(virt_to_page(item)); |
35 | struct list_lru_node *nlru = &lru->node[nid]; | 117 | struct list_lru_node *nlru = &lru->node[nid]; |
118 | struct list_lru_one *l; | ||
36 | 119 | ||
37 | spin_lock(&nlru->lock); | 120 | spin_lock(&nlru->lock); |
121 | l = list_lru_from_kmem(nlru, item); | ||
38 | if (!list_empty(item)) { | 122 | if (!list_empty(item)) { |
39 | list_del_init(item); | 123 | list_del_init(item); |
40 | if (--nlru->nr_items == 0) | 124 | l->nr_items--; |
41 | node_clear(nid, lru->active_nodes); | ||
42 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
43 | spin_unlock(&nlru->lock); | 125 | spin_unlock(&nlru->lock); |
44 | return true; | 126 | return true; |
45 | } | 127 | } |
@@ -48,33 +130,72 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) | |||
48 | } | 130 | } |
49 | EXPORT_SYMBOL_GPL(list_lru_del); | 131 | EXPORT_SYMBOL_GPL(list_lru_del); |
50 | 132 | ||
51 | unsigned long | 133 | void list_lru_isolate(struct list_lru_one *list, struct list_head *item) |
52 | list_lru_count_node(struct list_lru *lru, int nid) | 134 | { |
135 | list_del_init(item); | ||
136 | list->nr_items--; | ||
137 | } | ||
138 | EXPORT_SYMBOL_GPL(list_lru_isolate); | ||
139 | |||
140 | void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, | ||
141 | struct list_head *head) | ||
142 | { | ||
143 | list_move(item, head); | ||
144 | list->nr_items--; | ||
145 | } | ||
146 | EXPORT_SYMBOL_GPL(list_lru_isolate_move); | ||
147 | |||
148 | static unsigned long __list_lru_count_one(struct list_lru *lru, | ||
149 | int nid, int memcg_idx) | ||
53 | { | 150 | { |
54 | unsigned long count = 0; | ||
55 | struct list_lru_node *nlru = &lru->node[nid]; | 151 | struct list_lru_node *nlru = &lru->node[nid]; |
152 | struct list_lru_one *l; | ||
153 | unsigned long count; | ||
56 | 154 | ||
57 | spin_lock(&nlru->lock); | 155 | spin_lock(&nlru->lock); |
58 | WARN_ON_ONCE(nlru->nr_items < 0); | 156 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
59 | count += nlru->nr_items; | 157 | count = l->nr_items; |
60 | spin_unlock(&nlru->lock); | 158 | spin_unlock(&nlru->lock); |
61 | 159 | ||
62 | return count; | 160 | return count; |
63 | } | 161 | } |
162 | |||
163 | unsigned long list_lru_count_one(struct list_lru *lru, | ||
164 | int nid, struct mem_cgroup *memcg) | ||
165 | { | ||
166 | return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(list_lru_count_one); | ||
169 | |||
170 | unsigned long list_lru_count_node(struct list_lru *lru, int nid) | ||
171 | { | ||
172 | long count = 0; | ||
173 | int memcg_idx; | ||
174 | |||
175 | count += __list_lru_count_one(lru, nid, -1); | ||
176 | if (list_lru_memcg_aware(lru)) { | ||
177 | for_each_memcg_cache_index(memcg_idx) | ||
178 | count += __list_lru_count_one(lru, nid, memcg_idx); | ||
179 | } | ||
180 | return count; | ||
181 | } | ||
64 | EXPORT_SYMBOL_GPL(list_lru_count_node); | 182 | EXPORT_SYMBOL_GPL(list_lru_count_node); |
65 | 183 | ||
66 | unsigned long | 184 | static unsigned long |
67 | list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, | 185 | __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, |
68 | void *cb_arg, unsigned long *nr_to_walk) | 186 | list_lru_walk_cb isolate, void *cb_arg, |
187 | unsigned long *nr_to_walk) | ||
69 | { | 188 | { |
70 | 189 | ||
71 | struct list_lru_node *nlru = &lru->node[nid]; | 190 | struct list_lru_node *nlru = &lru->node[nid]; |
191 | struct list_lru_one *l; | ||
72 | struct list_head *item, *n; | 192 | struct list_head *item, *n; |
73 | unsigned long isolated = 0; | 193 | unsigned long isolated = 0; |
74 | 194 | ||
75 | spin_lock(&nlru->lock); | 195 | spin_lock(&nlru->lock); |
196 | l = list_lru_from_memcg_idx(nlru, memcg_idx); | ||
76 | restart: | 197 | restart: |
77 | list_for_each_safe(item, n, &nlru->list) { | 198 | list_for_each_safe(item, n, &l->list) { |
78 | enum lru_status ret; | 199 | enum lru_status ret; |
79 | 200 | ||
80 | /* | 201 | /* |
@@ -85,14 +206,11 @@ restart: | |||
85 | break; | 206 | break; |
86 | --*nr_to_walk; | 207 | --*nr_to_walk; |
87 | 208 | ||
88 | ret = isolate(item, &nlru->lock, cb_arg); | 209 | ret = isolate(item, l, &nlru->lock, cb_arg); |
89 | switch (ret) { | 210 | switch (ret) { |
90 | case LRU_REMOVED_RETRY: | 211 | case LRU_REMOVED_RETRY: |
91 | assert_spin_locked(&nlru->lock); | 212 | assert_spin_locked(&nlru->lock); |
92 | case LRU_REMOVED: | 213 | case LRU_REMOVED: |
93 | if (--nlru->nr_items == 0) | ||
94 | node_clear(nid, lru->active_nodes); | ||
95 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
96 | isolated++; | 214 | isolated++; |
97 | /* | 215 | /* |
98 | * If the lru lock has been dropped, our list | 216 | * If the lru lock has been dropped, our list |
@@ -103,7 +221,7 @@ restart: | |||
103 | goto restart; | 221 | goto restart; |
104 | break; | 222 | break; |
105 | case LRU_ROTATE: | 223 | case LRU_ROTATE: |
106 | list_move_tail(item, &nlru->list); | 224 | list_move_tail(item, &l->list); |
107 | break; | 225 | break; |
108 | case LRU_SKIP: | 226 | case LRU_SKIP: |
109 | break; | 227 | break; |
@@ -122,31 +240,322 @@ restart: | |||
122 | spin_unlock(&nlru->lock); | 240 | spin_unlock(&nlru->lock); |
123 | return isolated; | 241 | return isolated; |
124 | } | 242 | } |
243 | |||
244 | unsigned long | ||
245 | list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, | ||
246 | list_lru_walk_cb isolate, void *cb_arg, | ||
247 | unsigned long *nr_to_walk) | ||
248 | { | ||
249 | return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), | ||
250 | isolate, cb_arg, nr_to_walk); | ||
251 | } | ||
252 | EXPORT_SYMBOL_GPL(list_lru_walk_one); | ||
253 | |||
254 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | ||
255 | list_lru_walk_cb isolate, void *cb_arg, | ||
256 | unsigned long *nr_to_walk) | ||
257 | { | ||
258 | long isolated = 0; | ||
259 | int memcg_idx; | ||
260 | |||
261 | isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, | ||
262 | nr_to_walk); | ||
263 | if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { | ||
264 | for_each_memcg_cache_index(memcg_idx) { | ||
265 | isolated += __list_lru_walk_one(lru, nid, memcg_idx, | ||
266 | isolate, cb_arg, nr_to_walk); | ||
267 | if (*nr_to_walk <= 0) | ||
268 | break; | ||
269 | } | ||
270 | } | ||
271 | return isolated; | ||
272 | } | ||
125 | EXPORT_SYMBOL_GPL(list_lru_walk_node); | 273 | EXPORT_SYMBOL_GPL(list_lru_walk_node); |
126 | 274 | ||
127 | int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key) | 275 | static void init_one_lru(struct list_lru_one *l) |
276 | { | ||
277 | INIT_LIST_HEAD(&l->list); | ||
278 | l->nr_items = 0; | ||
279 | } | ||
280 | |||
281 | #ifdef CONFIG_MEMCG_KMEM | ||
282 | static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, | ||
283 | int begin, int end) | ||
284 | { | ||
285 | int i; | ||
286 | |||
287 | for (i = begin; i < end; i++) | ||
288 | kfree(memcg_lrus->lru[i]); | ||
289 | } | ||
290 | |||
291 | static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, | ||
292 | int begin, int end) | ||
293 | { | ||
294 | int i; | ||
295 | |||
296 | for (i = begin; i < end; i++) { | ||
297 | struct list_lru_one *l; | ||
298 | |||
299 | l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); | ||
300 | if (!l) | ||
301 | goto fail; | ||
302 | |||
303 | init_one_lru(l); | ||
304 | memcg_lrus->lru[i] = l; | ||
305 | } | ||
306 | return 0; | ||
307 | fail: | ||
308 | __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); | ||
309 | return -ENOMEM; | ||
310 | } | ||
311 | |||
312 | static int memcg_init_list_lru_node(struct list_lru_node *nlru) | ||
313 | { | ||
314 | int size = memcg_nr_cache_ids; | ||
315 | |||
316 | nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); | ||
317 | if (!nlru->memcg_lrus) | ||
318 | return -ENOMEM; | ||
319 | |||
320 | if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { | ||
321 | kfree(nlru->memcg_lrus); | ||
322 | return -ENOMEM; | ||
323 | } | ||
324 | |||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) | ||
329 | { | ||
330 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); | ||
331 | kfree(nlru->memcg_lrus); | ||
332 | } | ||
333 | |||
334 | static int memcg_update_list_lru_node(struct list_lru_node *nlru, | ||
335 | int old_size, int new_size) | ||
336 | { | ||
337 | struct list_lru_memcg *old, *new; | ||
338 | |||
339 | BUG_ON(old_size > new_size); | ||
340 | |||
341 | old = nlru->memcg_lrus; | ||
342 | new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); | ||
343 | if (!new) | ||
344 | return -ENOMEM; | ||
345 | |||
346 | if (__memcg_init_list_lru_node(new, old_size, new_size)) { | ||
347 | kfree(new); | ||
348 | return -ENOMEM; | ||
349 | } | ||
350 | |||
351 | memcpy(new, old, old_size * sizeof(void *)); | ||
352 | |||
353 | /* | ||
354 | * The lock guarantees that we won't race with a reader | ||
355 | * (see list_lru_from_memcg_idx). | ||
356 | * | ||
357 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | ||
358 | * we have to use IRQ-safe primitives here to avoid deadlock. | ||
359 | */ | ||
360 | spin_lock_irq(&nlru->lock); | ||
361 | nlru->memcg_lrus = new; | ||
362 | spin_unlock_irq(&nlru->lock); | ||
363 | |||
364 | kfree(old); | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, | ||
369 | int old_size, int new_size) | ||
370 | { | ||
371 | /* do not bother shrinking the array back to the old size, because we | ||
372 | * cannot handle allocation failures here */ | ||
373 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); | ||
374 | } | ||
375 | |||
376 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | ||
377 | { | ||
378 | int i; | ||
379 | |||
380 | for (i = 0; i < nr_node_ids; i++) { | ||
381 | if (!memcg_aware) | ||
382 | lru->node[i].memcg_lrus = NULL; | ||
383 | else if (memcg_init_list_lru_node(&lru->node[i])) | ||
384 | goto fail; | ||
385 | } | ||
386 | return 0; | ||
387 | fail: | ||
388 | for (i = i - 1; i >= 0; i--) | ||
389 | memcg_destroy_list_lru_node(&lru->node[i]); | ||
390 | return -ENOMEM; | ||
391 | } | ||
392 | |||
393 | static void memcg_destroy_list_lru(struct list_lru *lru) | ||
394 | { | ||
395 | int i; | ||
396 | |||
397 | if (!list_lru_memcg_aware(lru)) | ||
398 | return; | ||
399 | |||
400 | for (i = 0; i < nr_node_ids; i++) | ||
401 | memcg_destroy_list_lru_node(&lru->node[i]); | ||
402 | } | ||
403 | |||
404 | static int memcg_update_list_lru(struct list_lru *lru, | ||
405 | int old_size, int new_size) | ||
406 | { | ||
407 | int i; | ||
408 | |||
409 | if (!list_lru_memcg_aware(lru)) | ||
410 | return 0; | ||
411 | |||
412 | for (i = 0; i < nr_node_ids; i++) { | ||
413 | if (memcg_update_list_lru_node(&lru->node[i], | ||
414 | old_size, new_size)) | ||
415 | goto fail; | ||
416 | } | ||
417 | return 0; | ||
418 | fail: | ||
419 | for (i = i - 1; i >= 0; i--) | ||
420 | memcg_cancel_update_list_lru_node(&lru->node[i], | ||
421 | old_size, new_size); | ||
422 | return -ENOMEM; | ||
423 | } | ||
424 | |||
425 | static void memcg_cancel_update_list_lru(struct list_lru *lru, | ||
426 | int old_size, int new_size) | ||
427 | { | ||
428 | int i; | ||
429 | |||
430 | if (!list_lru_memcg_aware(lru)) | ||
431 | return; | ||
432 | |||
433 | for (i = 0; i < nr_node_ids; i++) | ||
434 | memcg_cancel_update_list_lru_node(&lru->node[i], | ||
435 | old_size, new_size); | ||
436 | } | ||
437 | |||
438 | int memcg_update_all_list_lrus(int new_size) | ||
439 | { | ||
440 | int ret = 0; | ||
441 | struct list_lru *lru; | ||
442 | int old_size = memcg_nr_cache_ids; | ||
443 | |||
444 | mutex_lock(&list_lrus_mutex); | ||
445 | list_for_each_entry(lru, &list_lrus, list) { | ||
446 | ret = memcg_update_list_lru(lru, old_size, new_size); | ||
447 | if (ret) | ||
448 | goto fail; | ||
449 | } | ||
450 | out: | ||
451 | mutex_unlock(&list_lrus_mutex); | ||
452 | return ret; | ||
453 | fail: | ||
454 | list_for_each_entry_continue_reverse(lru, &list_lrus, list) | ||
455 | memcg_cancel_update_list_lru(lru, old_size, new_size); | ||
456 | goto out; | ||
457 | } | ||
458 | |||
459 | static void memcg_drain_list_lru_node(struct list_lru_node *nlru, | ||
460 | int src_idx, int dst_idx) | ||
461 | { | ||
462 | struct list_lru_one *src, *dst; | ||
463 | |||
464 | /* | ||
465 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | ||
466 | * we have to use IRQ-safe primitives here to avoid deadlock. | ||
467 | */ | ||
468 | spin_lock_irq(&nlru->lock); | ||
469 | |||
470 | src = list_lru_from_memcg_idx(nlru, src_idx); | ||
471 | dst = list_lru_from_memcg_idx(nlru, dst_idx); | ||
472 | |||
473 | list_splice_init(&src->list, &dst->list); | ||
474 | dst->nr_items += src->nr_items; | ||
475 | src->nr_items = 0; | ||
476 | |||
477 | spin_unlock_irq(&nlru->lock); | ||
478 | } | ||
479 | |||
480 | static void memcg_drain_list_lru(struct list_lru *lru, | ||
481 | int src_idx, int dst_idx) | ||
482 | { | ||
483 | int i; | ||
484 | |||
485 | if (!list_lru_memcg_aware(lru)) | ||
486 | return; | ||
487 | |||
488 | for (i = 0; i < nr_node_ids; i++) | ||
489 | memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx); | ||
490 | } | ||
491 | |||
492 | void memcg_drain_all_list_lrus(int src_idx, int dst_idx) | ||
493 | { | ||
494 | struct list_lru *lru; | ||
495 | |||
496 | mutex_lock(&list_lrus_mutex); | ||
497 | list_for_each_entry(lru, &list_lrus, list) | ||
498 | memcg_drain_list_lru(lru, src_idx, dst_idx); | ||
499 | mutex_unlock(&list_lrus_mutex); | ||
500 | } | ||
501 | #else | ||
502 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | ||
503 | { | ||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | static void memcg_destroy_list_lru(struct list_lru *lru) | ||
508 | { | ||
509 | } | ||
510 | #endif /* CONFIG_MEMCG_KMEM */ | ||
511 | |||
512 | int __list_lru_init(struct list_lru *lru, bool memcg_aware, | ||
513 | struct lock_class_key *key) | ||
128 | { | 514 | { |
129 | int i; | 515 | int i; |
130 | size_t size = sizeof(*lru->node) * nr_node_ids; | 516 | size_t size = sizeof(*lru->node) * nr_node_ids; |
517 | int err = -ENOMEM; | ||
518 | |||
519 | memcg_get_cache_ids(); | ||
131 | 520 | ||
132 | lru->node = kzalloc(size, GFP_KERNEL); | 521 | lru->node = kzalloc(size, GFP_KERNEL); |
133 | if (!lru->node) | 522 | if (!lru->node) |
134 | return -ENOMEM; | 523 | goto out; |
135 | 524 | ||
136 | nodes_clear(lru->active_nodes); | ||
137 | for (i = 0; i < nr_node_ids; i++) { | 525 | for (i = 0; i < nr_node_ids; i++) { |
138 | spin_lock_init(&lru->node[i].lock); | 526 | spin_lock_init(&lru->node[i].lock); |
139 | if (key) | 527 | if (key) |
140 | lockdep_set_class(&lru->node[i].lock, key); | 528 | lockdep_set_class(&lru->node[i].lock, key); |
141 | INIT_LIST_HEAD(&lru->node[i].list); | 529 | init_one_lru(&lru->node[i].lru); |
142 | lru->node[i].nr_items = 0; | ||
143 | } | 530 | } |
144 | return 0; | 531 | |
532 | err = memcg_init_list_lru(lru, memcg_aware); | ||
533 | if (err) { | ||
534 | kfree(lru->node); | ||
535 | goto out; | ||
536 | } | ||
537 | |||
538 | list_lru_register(lru); | ||
539 | out: | ||
540 | memcg_put_cache_ids(); | ||
541 | return err; | ||
145 | } | 542 | } |
146 | EXPORT_SYMBOL_GPL(list_lru_init_key); | 543 | EXPORT_SYMBOL_GPL(__list_lru_init); |
147 | 544 | ||
148 | void list_lru_destroy(struct list_lru *lru) | 545 | void list_lru_destroy(struct list_lru *lru) |
149 | { | 546 | { |
547 | /* Already destroyed or not yet initialized? */ | ||
548 | if (!lru->node) | ||
549 | return; | ||
550 | |||
551 | memcg_get_cache_ids(); | ||
552 | |||
553 | list_lru_unregister(lru); | ||
554 | |||
555 | memcg_destroy_list_lru(lru); | ||
150 | kfree(lru->node); | 556 | kfree(lru->node); |
557 | lru->node = NULL; | ||
558 | |||
559 | memcg_put_cache_ids(); | ||
151 | } | 560 | } |
152 | EXPORT_SYMBOL_GPL(list_lru_destroy); | 561 | EXPORT_SYMBOL_GPL(list_lru_destroy); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 095c1f96fbec..d18d3a6e7337 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -332,8 +332,10 @@ struct mem_cgroup { | |||
332 | struct cg_proto tcp_mem; | 332 | struct cg_proto tcp_mem; |
333 | #endif | 333 | #endif |
334 | #if defined(CONFIG_MEMCG_KMEM) | 334 | #if defined(CONFIG_MEMCG_KMEM) |
335 | /* Index in the kmem_cache->memcg_params->memcg_caches array */ | 335 | /* Index in the kmem_cache->memcg_params.memcg_caches array */ |
336 | int kmemcg_id; | 336 | int kmemcg_id; |
337 | bool kmem_acct_activated; | ||
338 | bool kmem_acct_active; | ||
337 | #endif | 339 | #endif |
338 | 340 | ||
339 | int last_scanned_node; | 341 | int last_scanned_node; |
@@ -352,9 +354,9 @@ struct mem_cgroup { | |||
352 | }; | 354 | }; |
353 | 355 | ||
354 | #ifdef CONFIG_MEMCG_KMEM | 356 | #ifdef CONFIG_MEMCG_KMEM |
355 | static bool memcg_kmem_is_active(struct mem_cgroup *memcg) | 357 | bool memcg_kmem_is_active(struct mem_cgroup *memcg) |
356 | { | 358 | { |
357 | return memcg->kmemcg_id >= 0; | 359 | return memcg->kmem_acct_active; |
358 | } | 360 | } |
359 | #endif | 361 | #endif |
360 | 362 | ||
@@ -517,33 +519,35 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) | |||
517 | } | 519 | } |
518 | EXPORT_SYMBOL(tcp_proto_cgroup); | 520 | EXPORT_SYMBOL(tcp_proto_cgroup); |
519 | 521 | ||
520 | static void disarm_sock_keys(struct mem_cgroup *memcg) | ||
521 | { | ||
522 | if (!memcg_proto_activated(&memcg->tcp_mem)) | ||
523 | return; | ||
524 | static_key_slow_dec(&memcg_socket_limit_enabled); | ||
525 | } | ||
526 | #else | ||
527 | static void disarm_sock_keys(struct mem_cgroup *memcg) | ||
528 | { | ||
529 | } | ||
530 | #endif | 522 | #endif |
531 | 523 | ||
532 | #ifdef CONFIG_MEMCG_KMEM | 524 | #ifdef CONFIG_MEMCG_KMEM |
533 | /* | 525 | /* |
534 | * This will be the memcg's index in each cache's ->memcg_params->memcg_caches. | 526 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. |
535 | * The main reason for not using cgroup id for this: | 527 | * The main reason for not using cgroup id for this: |
536 | * this works better in sparse environments, where we have a lot of memcgs, | 528 | * this works better in sparse environments, where we have a lot of memcgs, |
537 | * but only a few kmem-limited. Or also, if we have, for instance, 200 | 529 | * but only a few kmem-limited. Or also, if we have, for instance, 200 |
538 | * memcgs, and none but the 200th is kmem-limited, we'd have to have a | 530 | * memcgs, and none but the 200th is kmem-limited, we'd have to have a |
539 | * 200 entry array for that. | 531 | * 200 entry array for that. |
540 | * | 532 | * |
541 | * The current size of the caches array is stored in | 533 | * The current size of the caches array is stored in memcg_nr_cache_ids. It |
542 | * memcg_limited_groups_array_size. It will double each time we have to | 534 | * will double each time we have to increase it. |
543 | * increase it. | ||
544 | */ | 535 | */ |
545 | static DEFINE_IDA(kmem_limited_groups); | 536 | static DEFINE_IDA(memcg_cache_ida); |
546 | int memcg_limited_groups_array_size; | 537 | int memcg_nr_cache_ids; |
538 | |||
539 | /* Protects memcg_nr_cache_ids */ | ||
540 | static DECLARE_RWSEM(memcg_cache_ids_sem); | ||
541 | |||
542 | void memcg_get_cache_ids(void) | ||
543 | { | ||
544 | down_read(&memcg_cache_ids_sem); | ||
545 | } | ||
546 | |||
547 | void memcg_put_cache_ids(void) | ||
548 | { | ||
549 | up_read(&memcg_cache_ids_sem); | ||
550 | } | ||
547 | 551 | ||
548 | /* | 552 | /* |
549 | * MIN_SIZE is different than 1, because we would like to avoid going through | 553 | * MIN_SIZE is different than 1, because we would like to avoid going through |
@@ -569,32 +573,8 @@ int memcg_limited_groups_array_size; | |||
569 | struct static_key memcg_kmem_enabled_key; | 573 | struct static_key memcg_kmem_enabled_key; |
570 | EXPORT_SYMBOL(memcg_kmem_enabled_key); | 574 | EXPORT_SYMBOL(memcg_kmem_enabled_key); |
571 | 575 | ||
572 | static void memcg_free_cache_id(int id); | ||
573 | |||
574 | static void disarm_kmem_keys(struct mem_cgroup *memcg) | ||
575 | { | ||
576 | if (memcg_kmem_is_active(memcg)) { | ||
577 | static_key_slow_dec(&memcg_kmem_enabled_key); | ||
578 | memcg_free_cache_id(memcg->kmemcg_id); | ||
579 | } | ||
580 | /* | ||
581 | * This check can't live in kmem destruction function, | ||
582 | * since the charges will outlive the cgroup | ||
583 | */ | ||
584 | WARN_ON(page_counter_read(&memcg->kmem)); | ||
585 | } | ||
586 | #else | ||
587 | static void disarm_kmem_keys(struct mem_cgroup *memcg) | ||
588 | { | ||
589 | } | ||
590 | #endif /* CONFIG_MEMCG_KMEM */ | 576 | #endif /* CONFIG_MEMCG_KMEM */ |
591 | 577 | ||
592 | static void disarm_static_keys(struct mem_cgroup *memcg) | ||
593 | { | ||
594 | disarm_sock_keys(memcg); | ||
595 | disarm_kmem_keys(memcg); | ||
596 | } | ||
597 | |||
598 | static struct mem_cgroup_per_zone * | 578 | static struct mem_cgroup_per_zone * |
599 | mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) | 579 | mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) |
600 | { | 580 | { |
@@ -2538,18 +2518,19 @@ static int memcg_alloc_cache_id(void) | |||
2538 | int id, size; | 2518 | int id, size; |
2539 | int err; | 2519 | int err; |
2540 | 2520 | ||
2541 | id = ida_simple_get(&kmem_limited_groups, | 2521 | id = ida_simple_get(&memcg_cache_ida, |
2542 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); | 2522 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); |
2543 | if (id < 0) | 2523 | if (id < 0) |
2544 | return id; | 2524 | return id; |
2545 | 2525 | ||
2546 | if (id < memcg_limited_groups_array_size) | 2526 | if (id < memcg_nr_cache_ids) |
2547 | return id; | 2527 | return id; |
2548 | 2528 | ||
2549 | /* | 2529 | /* |
2550 | * There's no space for the new id in memcg_caches arrays, | 2530 | * There's no space for the new id in memcg_caches arrays, |
2551 | * so we have to grow them. | 2531 | * so we have to grow them. |
2552 | */ | 2532 | */ |
2533 | down_write(&memcg_cache_ids_sem); | ||
2553 | 2534 | ||
2554 | size = 2 * (id + 1); | 2535 | size = 2 * (id + 1); |
2555 | if (size < MEMCG_CACHES_MIN_SIZE) | 2536 | if (size < MEMCG_CACHES_MIN_SIZE) |
@@ -2558,8 +2539,15 @@ static int memcg_alloc_cache_id(void) | |||
2558 | size = MEMCG_CACHES_MAX_SIZE; | 2539 | size = MEMCG_CACHES_MAX_SIZE; |
2559 | 2540 | ||
2560 | err = memcg_update_all_caches(size); | 2541 | err = memcg_update_all_caches(size); |
2542 | if (!err) | ||
2543 | err = memcg_update_all_list_lrus(size); | ||
2544 | if (!err) | ||
2545 | memcg_nr_cache_ids = size; | ||
2546 | |||
2547 | up_write(&memcg_cache_ids_sem); | ||
2548 | |||
2561 | if (err) { | 2549 | if (err) { |
2562 | ida_simple_remove(&kmem_limited_groups, id); | 2550 | ida_simple_remove(&memcg_cache_ida, id); |
2563 | return err; | 2551 | return err; |
2564 | } | 2552 | } |
2565 | return id; | 2553 | return id; |
@@ -2567,17 +2555,7 @@ static int memcg_alloc_cache_id(void) | |||
2567 | 2555 | ||
2568 | static void memcg_free_cache_id(int id) | 2556 | static void memcg_free_cache_id(int id) |
2569 | { | 2557 | { |
2570 | ida_simple_remove(&kmem_limited_groups, id); | 2558 | ida_simple_remove(&memcg_cache_ida, id); |
2571 | } | ||
2572 | |||
2573 | /* | ||
2574 | * We should update the current array size iff all caches updates succeed. This | ||
2575 | * can only be done from the slab side. The slab mutex needs to be held when | ||
2576 | * calling this. | ||
2577 | */ | ||
2578 | void memcg_update_array_size(int num) | ||
2579 | { | ||
2580 | memcg_limited_groups_array_size = num; | ||
2581 | } | 2559 | } |
2582 | 2560 | ||
2583 | struct memcg_kmem_cache_create_work { | 2561 | struct memcg_kmem_cache_create_work { |
@@ -2656,18 +2634,19 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) | |||
2656 | { | 2634 | { |
2657 | struct mem_cgroup *memcg; | 2635 | struct mem_cgroup *memcg; |
2658 | struct kmem_cache *memcg_cachep; | 2636 | struct kmem_cache *memcg_cachep; |
2637 | int kmemcg_id; | ||
2659 | 2638 | ||
2660 | VM_BUG_ON(!cachep->memcg_params); | 2639 | VM_BUG_ON(!is_root_cache(cachep)); |
2661 | VM_BUG_ON(!cachep->memcg_params->is_root_cache); | ||
2662 | 2640 | ||
2663 | if (current->memcg_kmem_skip_account) | 2641 | if (current->memcg_kmem_skip_account) |
2664 | return cachep; | 2642 | return cachep; |
2665 | 2643 | ||
2666 | memcg = get_mem_cgroup_from_mm(current->mm); | 2644 | memcg = get_mem_cgroup_from_mm(current->mm); |
2667 | if (!memcg_kmem_is_active(memcg)) | 2645 | kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id); |
2646 | if (kmemcg_id < 0) | ||
2668 | goto out; | 2647 | goto out; |
2669 | 2648 | ||
2670 | memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg)); | 2649 | memcg_cachep = cache_from_memcg_idx(cachep, kmemcg_id); |
2671 | if (likely(memcg_cachep)) | 2650 | if (likely(memcg_cachep)) |
2672 | return memcg_cachep; | 2651 | return memcg_cachep; |
2673 | 2652 | ||
@@ -2692,7 +2671,7 @@ out: | |||
2692 | void __memcg_kmem_put_cache(struct kmem_cache *cachep) | 2671 | void __memcg_kmem_put_cache(struct kmem_cache *cachep) |
2693 | { | 2672 | { |
2694 | if (!is_root_cache(cachep)) | 2673 | if (!is_root_cache(cachep)) |
2695 | css_put(&cachep->memcg_params->memcg->css); | 2674 | css_put(&cachep->memcg_params.memcg->css); |
2696 | } | 2675 | } |
2697 | 2676 | ||
2698 | /* | 2677 | /* |
@@ -2757,6 +2736,24 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) | |||
2757 | memcg_uncharge_kmem(memcg, 1 << order); | 2736 | memcg_uncharge_kmem(memcg, 1 << order); |
2758 | page->mem_cgroup = NULL; | 2737 | page->mem_cgroup = NULL; |
2759 | } | 2738 | } |
2739 | |||
2740 | struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr) | ||
2741 | { | ||
2742 | struct mem_cgroup *memcg = NULL; | ||
2743 | struct kmem_cache *cachep; | ||
2744 | struct page *page; | ||
2745 | |||
2746 | page = virt_to_head_page(ptr); | ||
2747 | if (PageSlab(page)) { | ||
2748 | cachep = page->slab_cache; | ||
2749 | if (!is_root_cache(cachep)) | ||
2750 | memcg = cachep->memcg_params.memcg; | ||
2751 | } else | ||
2752 | /* page allocated by alloc_kmem_pages */ | ||
2753 | memcg = page->mem_cgroup; | ||
2754 | |||
2755 | return memcg; | ||
2756 | } | ||
2760 | #endif /* CONFIG_MEMCG_KMEM */ | 2757 | #endif /* CONFIG_MEMCG_KMEM */ |
2761 | 2758 | ||
2762 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2759 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
@@ -3291,8 +3288,9 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg, | |||
3291 | int err = 0; | 3288 | int err = 0; |
3292 | int memcg_id; | 3289 | int memcg_id; |
3293 | 3290 | ||
3294 | if (memcg_kmem_is_active(memcg)) | 3291 | BUG_ON(memcg->kmemcg_id >= 0); |
3295 | return 0; | 3292 | BUG_ON(memcg->kmem_acct_activated); |
3293 | BUG_ON(memcg->kmem_acct_active); | ||
3296 | 3294 | ||
3297 | /* | 3295 | /* |
3298 | * For simplicity, we won't allow this to be disabled. It also can't | 3296 | * For simplicity, we won't allow this to be disabled. It also can't |
@@ -3335,6 +3333,8 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg, | |||
3335 | * patched. | 3333 | * patched. |
3336 | */ | 3334 | */ |
3337 | memcg->kmemcg_id = memcg_id; | 3335 | memcg->kmemcg_id = memcg_id; |
3336 | memcg->kmem_acct_activated = true; | ||
3337 | memcg->kmem_acct_active = true; | ||
3338 | out: | 3338 | out: |
3339 | return err; | 3339 | return err; |
3340 | } | 3340 | } |
@@ -4014,9 +4014,59 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
4014 | return mem_cgroup_sockets_init(memcg, ss); | 4014 | return mem_cgroup_sockets_init(memcg, ss); |
4015 | } | 4015 | } |
4016 | 4016 | ||
4017 | static void memcg_deactivate_kmem(struct mem_cgroup *memcg) | ||
4018 | { | ||
4019 | struct cgroup_subsys_state *css; | ||
4020 | struct mem_cgroup *parent, *child; | ||
4021 | int kmemcg_id; | ||
4022 | |||
4023 | if (!memcg->kmem_acct_active) | ||
4024 | return; | ||
4025 | |||
4026 | /* | ||
4027 | * Clear the 'active' flag before clearing memcg_caches arrays entries. | ||
4028 | * Since we take the slab_mutex in memcg_deactivate_kmem_caches(), it | ||
4029 | * guarantees no cache will be created for this cgroup after we are | ||
4030 | * done (see memcg_create_kmem_cache()). | ||
4031 | */ | ||
4032 | memcg->kmem_acct_active = false; | ||
4033 | |||
4034 | memcg_deactivate_kmem_caches(memcg); | ||
4035 | |||
4036 | kmemcg_id = memcg->kmemcg_id; | ||
4037 | BUG_ON(kmemcg_id < 0); | ||
4038 | |||
4039 | parent = parent_mem_cgroup(memcg); | ||
4040 | if (!parent) | ||
4041 | parent = root_mem_cgroup; | ||
4042 | |||
4043 | /* | ||
4044 | * Change kmemcg_id of this cgroup and all its descendants to the | ||
4045 | * parent's id, and then move all entries from this cgroup's list_lrus | ||
4046 | * to ones of the parent. After we have finished, all list_lrus | ||
4047 | * corresponding to this cgroup are guaranteed to remain empty. The | ||
4048 | * ordering is imposed by list_lru_node->lock taken by | ||
4049 | * memcg_drain_all_list_lrus(). | ||
4050 | */ | ||
4051 | css_for_each_descendant_pre(css, &memcg->css) { | ||
4052 | child = mem_cgroup_from_css(css); | ||
4053 | BUG_ON(child->kmemcg_id != kmemcg_id); | ||
4054 | child->kmemcg_id = parent->kmemcg_id; | ||
4055 | if (!memcg->use_hierarchy) | ||
4056 | break; | ||
4057 | } | ||
4058 | memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id); | ||
4059 | |||
4060 | memcg_free_cache_id(kmemcg_id); | ||
4061 | } | ||
4062 | |||
4017 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) | 4063 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) |
4018 | { | 4064 | { |
4019 | memcg_destroy_kmem_caches(memcg); | 4065 | if (memcg->kmem_acct_activated) { |
4066 | memcg_destroy_kmem_caches(memcg); | ||
4067 | static_key_slow_dec(&memcg_kmem_enabled_key); | ||
4068 | WARN_ON(page_counter_read(&memcg->kmem)); | ||
4069 | } | ||
4020 | mem_cgroup_sockets_destroy(memcg); | 4070 | mem_cgroup_sockets_destroy(memcg); |
4021 | } | 4071 | } |
4022 | #else | 4072 | #else |
@@ -4025,6 +4075,10 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
4025 | return 0; | 4075 | return 0; |
4026 | } | 4076 | } |
4027 | 4077 | ||
4078 | static void memcg_deactivate_kmem(struct mem_cgroup *memcg) | ||
4079 | { | ||
4080 | } | ||
4081 | |||
4028 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) | 4082 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) |
4029 | { | 4083 | { |
4030 | } | 4084 | } |
@@ -4443,8 +4497,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4443 | free_mem_cgroup_per_zone_info(memcg, node); | 4497 | free_mem_cgroup_per_zone_info(memcg, node); |
4444 | 4498 | ||
4445 | free_percpu(memcg->stat); | 4499 | free_percpu(memcg->stat); |
4446 | |||
4447 | disarm_static_keys(memcg); | ||
4448 | kfree(memcg); | 4500 | kfree(memcg); |
4449 | } | 4501 | } |
4450 | 4502 | ||
@@ -4581,6 +4633,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
4581 | spin_unlock(&memcg->event_list_lock); | 4633 | spin_unlock(&memcg->event_list_lock); |
4582 | 4634 | ||
4583 | vmpressure_cleanup(&memcg->vmpressure); | 4635 | vmpressure_cleanup(&memcg->vmpressure); |
4636 | |||
4637 | memcg_deactivate_kmem(memcg); | ||
4584 | } | 4638 | } |
4585 | 4639 | ||
4586 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | 4640 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index feb803bf3443..d487f8dc6d39 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access) | |||
242 | * Only call shrink_node_slabs here (which would also shrink | 242 | * Only call shrink_node_slabs here (which would also shrink |
243 | * other caches) if access is not potentially fatal. | 243 | * other caches) if access is not potentially fatal. |
244 | */ | 244 | */ |
245 | if (access) { | 245 | if (access) |
246 | int nr; | 246 | drop_slab_node(page_to_nid(p)); |
247 | int nid = page_to_nid(p); | ||
248 | do { | ||
249 | nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000); | ||
250 | if (page_count(p) == 1) | ||
251 | break; | ||
252 | } while (nr > 10); | ||
253 | } | ||
254 | } | 247 | } |
255 | EXPORT_SYMBOL_GPL(shake_page); | 248 | EXPORT_SYMBOL_GPL(shake_page); |
256 | 249 | ||
@@ -1654,8 +1647,6 @@ static int __soft_offline_page(struct page *page, int flags) | |||
1654 | * setting PG_hwpoison. | 1647 | * setting PG_hwpoison. |
1655 | */ | 1648 | */ |
1656 | if (!is_free_buddy_page(page)) | 1649 | if (!is_free_buddy_page(page)) |
1657 | lru_add_drain_all(); | ||
1658 | if (!is_free_buddy_page(page)) | ||
1659 | drain_all_pages(page_zone(page)); | 1650 | drain_all_pages(page_zone(page)); |
1660 | SetPageHWPoison(page); | 1651 | SetPageHWPoison(page); |
1661 | if (!is_free_buddy_page(page)) | 1652 | if (!is_free_buddy_page(page)) |
diff --git a/mm/memory.c b/mm/memory.c index bbe6a73a899d..99275325f303 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3013,14 +3013,17 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3013 | bool migrated = false; | 3013 | bool migrated = false; |
3014 | int flags = 0; | 3014 | int flags = 0; |
3015 | 3015 | ||
3016 | /* A PROT_NONE fault should not end up here */ | ||
3017 | BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); | ||
3018 | |||
3016 | /* | 3019 | /* |
3017 | * The "pte" at this point cannot be used safely without | 3020 | * The "pte" at this point cannot be used safely without |
3018 | * validation through pte_unmap_same(). It's of NUMA type but | 3021 | * validation through pte_unmap_same(). It's of NUMA type but |
3019 | * the pfn may be screwed if the read is non atomic. | 3022 | * the pfn may be screwed if the read is non atomic. |
3020 | * | 3023 | * |
3021 | * ptep_modify_prot_start is not called as this is clearing | 3024 | * We can safely just do a "set_pte_at()", because the old |
3022 | * the _PAGE_NUMA bit and it is not really expected that there | 3025 | * page table entry is not accessible, so there would be no |
3023 | * would be concurrent hardware modifications to the PTE. | 3026 | * concurrent hardware modifications to the PTE. |
3024 | */ | 3027 | */ |
3025 | ptl = pte_lockptr(mm, pmd); | 3028 | ptl = pte_lockptr(mm, pmd); |
3026 | spin_lock(ptl); | 3029 | spin_lock(ptl); |
@@ -3029,7 +3032,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3029 | goto out; | 3032 | goto out; |
3030 | } | 3033 | } |
3031 | 3034 | ||
3032 | pte = pte_mknonnuma(pte); | 3035 | /* Make it present again */ |
3036 | pte = pte_modify(pte, vma->vm_page_prot); | ||
3037 | pte = pte_mkyoung(pte); | ||
3033 | set_pte_at(mm, addr, ptep, pte); | 3038 | set_pte_at(mm, addr, ptep, pte); |
3034 | update_mmu_cache(vma, addr, ptep); | 3039 | update_mmu_cache(vma, addr, ptep); |
3035 | 3040 | ||
@@ -3038,7 +3043,6 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3038 | pte_unmap_unlock(ptep, ptl); | 3043 | pte_unmap_unlock(ptep, ptl); |
3039 | return 0; | 3044 | return 0; |
3040 | } | 3045 | } |
3041 | BUG_ON(is_zero_pfn(page_to_pfn(page))); | ||
3042 | 3046 | ||
3043 | /* | 3047 | /* |
3044 | * Avoid grouping on DSO/COW pages in specific and RO pages | 3048 | * Avoid grouping on DSO/COW pages in specific and RO pages |
@@ -3124,7 +3128,7 @@ static int handle_pte_fault(struct mm_struct *mm, | |||
3124 | pte, pmd, flags, entry); | 3128 | pte, pmd, flags, entry); |
3125 | } | 3129 | } |
3126 | 3130 | ||
3127 | if (pte_numa(entry)) | 3131 | if (pte_protnone(entry)) |
3128 | return do_numa_page(mm, vma, address, entry, pte, pmd); | 3132 | return do_numa_page(mm, vma, address, entry, pte, pmd); |
3129 | 3133 | ||
3130 | ptl = pte_lockptr(mm, pmd); | 3134 | ptl = pte_lockptr(mm, pmd); |
@@ -3202,7 +3206,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3202 | if (pmd_trans_splitting(orig_pmd)) | 3206 | if (pmd_trans_splitting(orig_pmd)) |
3203 | return 0; | 3207 | return 0; |
3204 | 3208 | ||
3205 | if (pmd_numa(orig_pmd)) | 3209 | if (pmd_protnone(orig_pmd)) |
3206 | return do_huge_pmd_numa_page(mm, vma, address, | 3210 | return do_huge_pmd_numa_page(mm, vma, address, |
3207 | orig_pmd, pmd); | 3211 | orig_pmd, pmd); |
3208 | 3212 | ||
@@ -3458,7 +3462,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | |||
3458 | if (follow_phys(vma, addr, write, &prot, &phys_addr)) | 3462 | if (follow_phys(vma, addr, write, &prot, &phys_addr)) |
3459 | return -EINVAL; | 3463 | return -EINVAL; |
3460 | 3464 | ||
3461 | maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); | 3465 | maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); |
3462 | if (write) | 3466 | if (write) |
3463 | memcpy_toio(maddr + offset, buf, len); | 3467 | memcpy_toio(maddr + offset, buf, len); |
3464 | else | 3468 | else |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f1bd23803576..c75f4dcec808 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -569,7 +569,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, | |||
569 | { | 569 | { |
570 | int nr_updated; | 570 | int nr_updated; |
571 | 571 | ||
572 | nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1); | 572 | nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1); |
573 | if (nr_updated) | 573 | if (nr_updated) |
574 | count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); | 574 | count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); |
575 | 575 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index f98067e5d353..85e042686031 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1654,12 +1654,6 @@ bool pmd_trans_migrating(pmd_t pmd) | |||
1654 | return PageLocked(page); | 1654 | return PageLocked(page); |
1655 | } | 1655 | } |
1656 | 1656 | ||
1657 | void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) | ||
1658 | { | ||
1659 | struct page *page = pmd_page(*pmd); | ||
1660 | wait_on_page_locked(page); | ||
1661 | } | ||
1662 | |||
1663 | /* | 1657 | /* |
1664 | * Attempt to migrate a misplaced page to the specified destination | 1658 | * Attempt to migrate a misplaced page to the specified destination |
1665 | * node. Caller is expected to have an elevated reference count on | 1659 | * node. Caller is expected to have an elevated reference count on |
@@ -1853,7 +1847,7 @@ out_fail: | |||
1853 | out_dropref: | 1847 | out_dropref: |
1854 | ptl = pmd_lock(mm, pmd); | 1848 | ptl = pmd_lock(mm, pmd); |
1855 | if (pmd_same(*pmd, entry)) { | 1849 | if (pmd_same(*pmd, entry)) { |
1856 | entry = pmd_mknonnuma(entry); | 1850 | entry = pmd_modify(entry, vma->vm_page_prot); |
1857 | set_pmd_at(mm, mmun_start, pmd, entry); | 1851 | set_pmd_at(mm, mmun_start, pmd, entry); |
1858 | update_mmu_cache_pmd(vma, address, &entry); | 1852 | update_mmu_cache_pmd(vma, address, &entry); |
1859 | } | 1853 | } |
diff --git a/mm/mm_init.c b/mm/mm_init.c index 4074caf9936b..5f420f7fafa1 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c | |||
@@ -14,14 +14,14 @@ | |||
14 | #include "internal.h" | 14 | #include "internal.h" |
15 | 15 | ||
16 | #ifdef CONFIG_DEBUG_MEMORY_INIT | 16 | #ifdef CONFIG_DEBUG_MEMORY_INIT |
17 | int mminit_loglevel; | 17 | int __meminitdata mminit_loglevel; |
18 | 18 | ||
19 | #ifndef SECTIONS_SHIFT | 19 | #ifndef SECTIONS_SHIFT |
20 | #define SECTIONS_SHIFT 0 | 20 | #define SECTIONS_SHIFT 0 |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | /* The zonelists are simply reported, validation is manual. */ | 23 | /* The zonelists are simply reported, validation is manual. */ |
24 | void mminit_verify_zonelist(void) | 24 | void __init mminit_verify_zonelist(void) |
25 | { | 25 | { |
26 | int nid; | 26 | int nid; |
27 | 27 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 33121662f08b..44727811bf4c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -75,36 +75,34 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
75 | oldpte = *pte; | 75 | oldpte = *pte; |
76 | if (pte_present(oldpte)) { | 76 | if (pte_present(oldpte)) { |
77 | pte_t ptent; | 77 | pte_t ptent; |
78 | bool updated = false; | ||
79 | 78 | ||
80 | if (!prot_numa) { | 79 | /* |
81 | ptent = ptep_modify_prot_start(mm, addr, pte); | 80 | * Avoid trapping faults against the zero or KSM |
82 | if (pte_numa(ptent)) | 81 | * pages. See similar comment in change_huge_pmd. |
83 | ptent = pte_mknonnuma(ptent); | 82 | */ |
84 | ptent = pte_modify(ptent, newprot); | 83 | if (prot_numa) { |
85 | /* | ||
86 | * Avoid taking write faults for pages we | ||
87 | * know to be dirty. | ||
88 | */ | ||
89 | if (dirty_accountable && pte_dirty(ptent) && | ||
90 | (pte_soft_dirty(ptent) || | ||
91 | !(vma->vm_flags & VM_SOFTDIRTY))) | ||
92 | ptent = pte_mkwrite(ptent); | ||
93 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
94 | updated = true; | ||
95 | } else { | ||
96 | struct page *page; | 84 | struct page *page; |
97 | 85 | ||
98 | page = vm_normal_page(vma, addr, oldpte); | 86 | page = vm_normal_page(vma, addr, oldpte); |
99 | if (page && !PageKsm(page)) { | 87 | if (!page || PageKsm(page)) |
100 | if (!pte_numa(oldpte)) { | 88 | continue; |
101 | ptep_set_numa(mm, addr, pte); | 89 | |
102 | updated = true; | 90 | /* Avoid TLB flush if possible */ |
103 | } | 91 | if (pte_protnone(oldpte)) |
104 | } | 92 | continue; |
105 | } | 93 | } |
106 | if (updated) | 94 | |
107 | pages++; | 95 | ptent = ptep_modify_prot_start(mm, addr, pte); |
96 | ptent = pte_modify(ptent, newprot); | ||
97 | |||
98 | /* Avoid taking write faults for known dirty pages */ | ||
99 | if (dirty_accountable && pte_dirty(ptent) && | ||
100 | (pte_soft_dirty(ptent) || | ||
101 | !(vma->vm_flags & VM_SOFTDIRTY))) { | ||
102 | ptent = pte_mkwrite(ptent); | ||
103 | } | ||
104 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
105 | pages++; | ||
108 | } else if (IS_ENABLED(CONFIG_MIGRATION)) { | 106 | } else if (IS_ENABLED(CONFIG_MIGRATION)) { |
109 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 107 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
110 | 108 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8d52ab18fe0d..cb4758263f6b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -172,7 +172,7 @@ static void __free_pages_ok(struct page *page, unsigned int order); | |||
172 | * 1G machine -> (16M dma, 784M normal, 224M high) | 172 | * 1G machine -> (16M dma, 784M normal, 224M high) |
173 | * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA | 173 | * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA |
174 | * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL | 174 | * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL |
175 | * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA | 175 | * HIGHMEM allocation will leave (224M+784M)/256 of ram reserved in ZONE_DMA |
176 | * | 176 | * |
177 | * TBD: should special case ZONE_DMA32 machines here - in those we normally | 177 | * TBD: should special case ZONE_DMA32 machines here - in those we normally |
178 | * don't need any ZONE_NORMAL reservation | 178 | * don't need any ZONE_NORMAL reservation |
@@ -3871,18 +3871,29 @@ static int __build_all_zonelists(void *data) | |||
3871 | return 0; | 3871 | return 0; |
3872 | } | 3872 | } |
3873 | 3873 | ||
3874 | static noinline void __init | ||
3875 | build_all_zonelists_init(void) | ||
3876 | { | ||
3877 | __build_all_zonelists(NULL); | ||
3878 | mminit_verify_zonelist(); | ||
3879 | cpuset_init_current_mems_allowed(); | ||
3880 | } | ||
3881 | |||
3874 | /* | 3882 | /* |
3875 | * Called with zonelists_mutex held always | 3883 | * Called with zonelists_mutex held always |
3876 | * unless system_state == SYSTEM_BOOTING. | 3884 | * unless system_state == SYSTEM_BOOTING. |
3885 | * | ||
3886 | * __ref due to (1) call of __meminit annotated setup_zone_pageset | ||
3887 | * [we're only called with non-NULL zone through __meminit paths] and | ||
3888 | * (2) call of __init annotated helper build_all_zonelists_init | ||
3889 | * [protected by SYSTEM_BOOTING]. | ||
3877 | */ | 3890 | */ |
3878 | void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) | 3891 | void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) |
3879 | { | 3892 | { |
3880 | set_zonelist_order(); | 3893 | set_zonelist_order(); |
3881 | 3894 | ||
3882 | if (system_state == SYSTEM_BOOTING) { | 3895 | if (system_state == SYSTEM_BOOTING) { |
3883 | __build_all_zonelists(NULL); | 3896 | build_all_zonelists_init(); |
3884 | mminit_verify_zonelist(); | ||
3885 | cpuset_init_current_mems_allowed(); | ||
3886 | } else { | 3897 | } else { |
3887 | #ifdef CONFIG_MEMORY_HOTPLUG | 3898 | #ifdef CONFIG_MEMORY_HOTPLUG |
3888 | if (zone) | 3899 | if (zone) |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index dfb79e028ecb..c25f94b33811 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -193,8 +193,6 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | |||
193 | pmd_t *pmdp) | 193 | pmd_t *pmdp) |
194 | { | 194 | { |
195 | pmd_t entry = *pmdp; | 195 | pmd_t entry = *pmdp; |
196 | if (pmd_numa(entry)) | ||
197 | entry = pmd_mknonnuma(entry); | ||
198 | set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); | 196 | set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); |
199 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 197 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); |
200 | } | 198 | } |
@@ -2382,7 +2382,7 @@ out: | |||
2382 | return nr_freed; | 2382 | return nr_freed; |
2383 | } | 2383 | } |
2384 | 2384 | ||
2385 | int __kmem_cache_shrink(struct kmem_cache *cachep) | 2385 | int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) |
2386 | { | 2386 | { |
2387 | int ret = 0; | 2387 | int ret = 0; |
2388 | int node; | 2388 | int node; |
@@ -2404,7 +2404,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) | |||
2404 | { | 2404 | { |
2405 | int i; | 2405 | int i; |
2406 | struct kmem_cache_node *n; | 2406 | struct kmem_cache_node *n; |
2407 | int rc = __kmem_cache_shrink(cachep); | 2407 | int rc = __kmem_cache_shrink(cachep, false); |
2408 | 2408 | ||
2409 | if (rc) | 2409 | if (rc) |
2410 | return rc; | 2410 | return rc; |
@@ -3708,8 +3708,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3708 | int batchcount, int shared, gfp_t gfp) | 3708 | int batchcount, int shared, gfp_t gfp) |
3709 | { | 3709 | { |
3710 | int ret; | 3710 | int ret; |
3711 | struct kmem_cache *c = NULL; | 3711 | struct kmem_cache *c; |
3712 | int i = 0; | ||
3713 | 3712 | ||
3714 | ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp); | 3713 | ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp); |
3715 | 3714 | ||
@@ -3719,12 +3718,10 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3719 | if ((ret < 0) || !is_root_cache(cachep)) | 3718 | if ((ret < 0) || !is_root_cache(cachep)) |
3720 | return ret; | 3719 | return ret; |
3721 | 3720 | ||
3722 | VM_BUG_ON(!mutex_is_locked(&slab_mutex)); | 3721 | lockdep_assert_held(&slab_mutex); |
3723 | for_each_memcg_cache_index(i) { | 3722 | for_each_memcg_cache(c, cachep) { |
3724 | c = cache_from_memcg_idx(cachep, i); | 3723 | /* return value determined by the root cache only */ |
3725 | if (c) | 3724 | __do_tune_cpucache(c, limit, batchcount, shared, gfp); |
3726 | /* return value determined by the parent cache only */ | ||
3727 | __do_tune_cpucache(c, limit, batchcount, shared, gfp); | ||
3728 | } | 3725 | } |
3729 | 3726 | ||
3730 | return ret; | 3727 | return ret; |
@@ -86,8 +86,6 @@ extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, | |||
86 | extern void create_boot_cache(struct kmem_cache *, const char *name, | 86 | extern void create_boot_cache(struct kmem_cache *, const char *name, |
87 | size_t size, unsigned long flags); | 87 | size_t size, unsigned long flags); |
88 | 88 | ||
89 | struct mem_cgroup; | ||
90 | |||
91 | int slab_unmergeable(struct kmem_cache *s); | 89 | int slab_unmergeable(struct kmem_cache *s); |
92 | struct kmem_cache *find_mergeable(size_t size, size_t align, | 90 | struct kmem_cache *find_mergeable(size_t size, size_t align, |
93 | unsigned long flags, const char *name, void (*ctor)(void *)); | 91 | unsigned long flags, const char *name, void (*ctor)(void *)); |
@@ -140,7 +138,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
140 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) | 138 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) |
141 | 139 | ||
142 | int __kmem_cache_shutdown(struct kmem_cache *); | 140 | int __kmem_cache_shutdown(struct kmem_cache *); |
143 | int __kmem_cache_shrink(struct kmem_cache *); | 141 | int __kmem_cache_shrink(struct kmem_cache *, bool); |
144 | void slab_kmem_cache_release(struct kmem_cache *); | 142 | void slab_kmem_cache_release(struct kmem_cache *); |
145 | 143 | ||
146 | struct seq_file; | 144 | struct seq_file; |
@@ -165,16 +163,27 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, | |||
165 | size_t count, loff_t *ppos); | 163 | size_t count, loff_t *ppos); |
166 | 164 | ||
167 | #ifdef CONFIG_MEMCG_KMEM | 165 | #ifdef CONFIG_MEMCG_KMEM |
166 | /* | ||
167 | * Iterate over all memcg caches of the given root cache. The caller must hold | ||
168 | * slab_mutex. | ||
169 | */ | ||
170 | #define for_each_memcg_cache(iter, root) \ | ||
171 | list_for_each_entry(iter, &(root)->memcg_params.list, \ | ||
172 | memcg_params.list) | ||
173 | |||
174 | #define for_each_memcg_cache_safe(iter, tmp, root) \ | ||
175 | list_for_each_entry_safe(iter, tmp, &(root)->memcg_params.list, \ | ||
176 | memcg_params.list) | ||
177 | |||
168 | static inline bool is_root_cache(struct kmem_cache *s) | 178 | static inline bool is_root_cache(struct kmem_cache *s) |
169 | { | 179 | { |
170 | return !s->memcg_params || s->memcg_params->is_root_cache; | 180 | return s->memcg_params.is_root_cache; |
171 | } | 181 | } |
172 | 182 | ||
173 | static inline bool slab_equal_or_root(struct kmem_cache *s, | 183 | static inline bool slab_equal_or_root(struct kmem_cache *s, |
174 | struct kmem_cache *p) | 184 | struct kmem_cache *p) |
175 | { | 185 | { |
176 | return (p == s) || | 186 | return p == s || p == s->memcg_params.root_cache; |
177 | (s->memcg_params && (p == s->memcg_params->root_cache)); | ||
178 | } | 187 | } |
179 | 188 | ||
180 | /* | 189 | /* |
@@ -185,37 +194,30 @@ static inline bool slab_equal_or_root(struct kmem_cache *s, | |||
185 | static inline const char *cache_name(struct kmem_cache *s) | 194 | static inline const char *cache_name(struct kmem_cache *s) |
186 | { | 195 | { |
187 | if (!is_root_cache(s)) | 196 | if (!is_root_cache(s)) |
188 | return s->memcg_params->root_cache->name; | 197 | s = s->memcg_params.root_cache; |
189 | return s->name; | 198 | return s->name; |
190 | } | 199 | } |
191 | 200 | ||
192 | /* | 201 | /* |
193 | * Note, we protect with RCU only the memcg_caches array, not per-memcg caches. | 202 | * Note, we protect with RCU only the memcg_caches array, not per-memcg caches. |
194 | * That said the caller must assure the memcg's cache won't go away. Since once | 203 | * That said the caller must assure the memcg's cache won't go away by either |
195 | * created a memcg's cache is destroyed only along with the root cache, it is | 204 | * taking a css reference to the owner cgroup, or holding the slab_mutex. |
196 | * true if we are going to allocate from the cache or hold a reference to the | ||
197 | * root cache by other means. Otherwise, we should hold either the slab_mutex | ||
198 | * or the memcg's slab_caches_mutex while calling this function and accessing | ||
199 | * the returned value. | ||
200 | */ | 205 | */ |
201 | static inline struct kmem_cache * | 206 | static inline struct kmem_cache * |
202 | cache_from_memcg_idx(struct kmem_cache *s, int idx) | 207 | cache_from_memcg_idx(struct kmem_cache *s, int idx) |
203 | { | 208 | { |
204 | struct kmem_cache *cachep; | 209 | struct kmem_cache *cachep; |
205 | struct memcg_cache_params *params; | 210 | struct memcg_cache_array *arr; |
206 | |||
207 | if (!s->memcg_params) | ||
208 | return NULL; | ||
209 | 211 | ||
210 | rcu_read_lock(); | 212 | rcu_read_lock(); |
211 | params = rcu_dereference(s->memcg_params); | 213 | arr = rcu_dereference(s->memcg_params.memcg_caches); |
212 | 214 | ||
213 | /* | 215 | /* |
214 | * Make sure we will access the up-to-date value. The code updating | 216 | * Make sure we will access the up-to-date value. The code updating |
215 | * memcg_caches issues a write barrier to match this (see | 217 | * memcg_caches issues a write barrier to match this (see |
216 | * memcg_register_cache()). | 218 | * memcg_create_kmem_cache()). |
217 | */ | 219 | */ |
218 | cachep = lockless_dereference(params->memcg_caches[idx]); | 220 | cachep = lockless_dereference(arr->entries[idx]); |
219 | rcu_read_unlock(); | 221 | rcu_read_unlock(); |
220 | 222 | ||
221 | return cachep; | 223 | return cachep; |
@@ -225,7 +227,7 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) | |||
225 | { | 227 | { |
226 | if (is_root_cache(s)) | 228 | if (is_root_cache(s)) |
227 | return s; | 229 | return s; |
228 | return s->memcg_params->root_cache; | 230 | return s->memcg_params.root_cache; |
229 | } | 231 | } |
230 | 232 | ||
231 | static __always_inline int memcg_charge_slab(struct kmem_cache *s, | 233 | static __always_inline int memcg_charge_slab(struct kmem_cache *s, |
@@ -235,7 +237,7 @@ static __always_inline int memcg_charge_slab(struct kmem_cache *s, | |||
235 | return 0; | 237 | return 0; |
236 | if (is_root_cache(s)) | 238 | if (is_root_cache(s)) |
237 | return 0; | 239 | return 0; |
238 | return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order); | 240 | return memcg_charge_kmem(s->memcg_params.memcg, gfp, 1 << order); |
239 | } | 241 | } |
240 | 242 | ||
241 | static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order) | 243 | static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order) |
@@ -244,9 +246,18 @@ static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order) | |||
244 | return; | 246 | return; |
245 | if (is_root_cache(s)) | 247 | if (is_root_cache(s)) |
246 | return; | 248 | return; |
247 | memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order); | 249 | memcg_uncharge_kmem(s->memcg_params.memcg, 1 << order); |
248 | } | 250 | } |
249 | #else | 251 | |
252 | extern void slab_init_memcg_params(struct kmem_cache *); | ||
253 | |||
254 | #else /* !CONFIG_MEMCG_KMEM */ | ||
255 | |||
256 | #define for_each_memcg_cache(iter, root) \ | ||
257 | for ((void)(iter), (void)(root); 0; ) | ||
258 | #define for_each_memcg_cache_safe(iter, tmp, root) \ | ||
259 | for ((void)(iter), (void)(tmp), (void)(root); 0; ) | ||
260 | |||
250 | static inline bool is_root_cache(struct kmem_cache *s) | 261 | static inline bool is_root_cache(struct kmem_cache *s) |
251 | { | 262 | { |
252 | return true; | 263 | return true; |
@@ -282,7 +293,11 @@ static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order) | |||
282 | static inline void memcg_uncharge_slab(struct kmem_cache *s, int order) | 293 | static inline void memcg_uncharge_slab(struct kmem_cache *s, int order) |
283 | { | 294 | { |
284 | } | 295 | } |
285 | #endif | 296 | |
297 | static inline void slab_init_memcg_params(struct kmem_cache *s) | ||
298 | { | ||
299 | } | ||
300 | #endif /* CONFIG_MEMCG_KMEM */ | ||
286 | 301 | ||
287 | static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) | 302 | static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) |
288 | { | 303 | { |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 6e1e4cf65836..1a1cc89acaa3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -106,62 +106,67 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size) | |||
106 | #endif | 106 | #endif |
107 | 107 | ||
108 | #ifdef CONFIG_MEMCG_KMEM | 108 | #ifdef CONFIG_MEMCG_KMEM |
109 | static int memcg_alloc_cache_params(struct mem_cgroup *memcg, | 109 | void slab_init_memcg_params(struct kmem_cache *s) |
110 | struct kmem_cache *s, struct kmem_cache *root_cache) | ||
111 | { | 110 | { |
112 | size_t size; | 111 | s->memcg_params.is_root_cache = true; |
112 | INIT_LIST_HEAD(&s->memcg_params.list); | ||
113 | RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); | ||
114 | } | ||
115 | |||
116 | static int init_memcg_params(struct kmem_cache *s, | ||
117 | struct mem_cgroup *memcg, struct kmem_cache *root_cache) | ||
118 | { | ||
119 | struct memcg_cache_array *arr; | ||
113 | 120 | ||
114 | if (!memcg_kmem_enabled()) | 121 | if (memcg) { |
122 | s->memcg_params.is_root_cache = false; | ||
123 | s->memcg_params.memcg = memcg; | ||
124 | s->memcg_params.root_cache = root_cache; | ||
115 | return 0; | 125 | return 0; |
126 | } | ||
116 | 127 | ||
117 | if (!memcg) { | 128 | slab_init_memcg_params(s); |
118 | size = offsetof(struct memcg_cache_params, memcg_caches); | ||
119 | size += memcg_limited_groups_array_size * sizeof(void *); | ||
120 | } else | ||
121 | size = sizeof(struct memcg_cache_params); | ||
122 | 129 | ||
123 | s->memcg_params = kzalloc(size, GFP_KERNEL); | 130 | if (!memcg_nr_cache_ids) |
124 | if (!s->memcg_params) | 131 | return 0; |
125 | return -ENOMEM; | ||
126 | 132 | ||
127 | if (memcg) { | 133 | arr = kzalloc(sizeof(struct memcg_cache_array) + |
128 | s->memcg_params->memcg = memcg; | 134 | memcg_nr_cache_ids * sizeof(void *), |
129 | s->memcg_params->root_cache = root_cache; | 135 | GFP_KERNEL); |
130 | } else | 136 | if (!arr) |
131 | s->memcg_params->is_root_cache = true; | 137 | return -ENOMEM; |
132 | 138 | ||
139 | RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr); | ||
133 | return 0; | 140 | return 0; |
134 | } | 141 | } |
135 | 142 | ||
136 | static void memcg_free_cache_params(struct kmem_cache *s) | 143 | static void destroy_memcg_params(struct kmem_cache *s) |
137 | { | 144 | { |
138 | kfree(s->memcg_params); | 145 | if (is_root_cache(s)) |
146 | kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); | ||
139 | } | 147 | } |
140 | 148 | ||
141 | static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs) | 149 | static int update_memcg_params(struct kmem_cache *s, int new_array_size) |
142 | { | 150 | { |
143 | int size; | 151 | struct memcg_cache_array *old, *new; |
144 | struct memcg_cache_params *new_params, *cur_params; | ||
145 | |||
146 | BUG_ON(!is_root_cache(s)); | ||
147 | 152 | ||
148 | size = offsetof(struct memcg_cache_params, memcg_caches); | 153 | if (!is_root_cache(s)) |
149 | size += num_memcgs * sizeof(void *); | 154 | return 0; |
150 | 155 | ||
151 | new_params = kzalloc(size, GFP_KERNEL); | 156 | new = kzalloc(sizeof(struct memcg_cache_array) + |
152 | if (!new_params) | 157 | new_array_size * sizeof(void *), GFP_KERNEL); |
158 | if (!new) | ||
153 | return -ENOMEM; | 159 | return -ENOMEM; |
154 | 160 | ||
155 | cur_params = s->memcg_params; | 161 | old = rcu_dereference_protected(s->memcg_params.memcg_caches, |
156 | memcpy(new_params->memcg_caches, cur_params->memcg_caches, | 162 | lockdep_is_held(&slab_mutex)); |
157 | memcg_limited_groups_array_size * sizeof(void *)); | 163 | if (old) |
158 | 164 | memcpy(new->entries, old->entries, | |
159 | new_params->is_root_cache = true; | 165 | memcg_nr_cache_ids * sizeof(void *)); |
160 | |||
161 | rcu_assign_pointer(s->memcg_params, new_params); | ||
162 | if (cur_params) | ||
163 | kfree_rcu(cur_params, rcu_head); | ||
164 | 166 | ||
167 | rcu_assign_pointer(s->memcg_params.memcg_caches, new); | ||
168 | if (old) | ||
169 | kfree_rcu(old, rcu); | ||
165 | return 0; | 170 | return 0; |
166 | } | 171 | } |
167 | 172 | ||
@@ -169,34 +174,28 @@ int memcg_update_all_caches(int num_memcgs) | |||
169 | { | 174 | { |
170 | struct kmem_cache *s; | 175 | struct kmem_cache *s; |
171 | int ret = 0; | 176 | int ret = 0; |
172 | mutex_lock(&slab_mutex); | ||
173 | 177 | ||
178 | mutex_lock(&slab_mutex); | ||
174 | list_for_each_entry(s, &slab_caches, list) { | 179 | list_for_each_entry(s, &slab_caches, list) { |
175 | if (!is_root_cache(s)) | 180 | ret = update_memcg_params(s, num_memcgs); |
176 | continue; | ||
177 | |||
178 | ret = memcg_update_cache_params(s, num_memcgs); | ||
179 | /* | 181 | /* |
180 | * Instead of freeing the memory, we'll just leave the caches | 182 | * Instead of freeing the memory, we'll just leave the caches |
181 | * up to this point in an updated state. | 183 | * up to this point in an updated state. |
182 | */ | 184 | */ |
183 | if (ret) | 185 | if (ret) |
184 | goto out; | 186 | break; |
185 | } | 187 | } |
186 | |||
187 | memcg_update_array_size(num_memcgs); | ||
188 | out: | ||
189 | mutex_unlock(&slab_mutex); | 188 | mutex_unlock(&slab_mutex); |
190 | return ret; | 189 | return ret; |
191 | } | 190 | } |
192 | #else | 191 | #else |
193 | static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, | 192 | static inline int init_memcg_params(struct kmem_cache *s, |
194 | struct kmem_cache *s, struct kmem_cache *root_cache) | 193 | struct mem_cgroup *memcg, struct kmem_cache *root_cache) |
195 | { | 194 | { |
196 | return 0; | 195 | return 0; |
197 | } | 196 | } |
198 | 197 | ||
199 | static inline void memcg_free_cache_params(struct kmem_cache *s) | 198 | static inline void destroy_memcg_params(struct kmem_cache *s) |
200 | { | 199 | { |
201 | } | 200 | } |
202 | #endif /* CONFIG_MEMCG_KMEM */ | 201 | #endif /* CONFIG_MEMCG_KMEM */ |
@@ -314,7 +313,7 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, | |||
314 | s->align = align; | 313 | s->align = align; |
315 | s->ctor = ctor; | 314 | s->ctor = ctor; |
316 | 315 | ||
317 | err = memcg_alloc_cache_params(memcg, s, root_cache); | 316 | err = init_memcg_params(s, memcg, root_cache); |
318 | if (err) | 317 | if (err) |
319 | goto out_free_cache; | 318 | goto out_free_cache; |
320 | 319 | ||
@@ -330,7 +329,7 @@ out: | |||
330 | return s; | 329 | return s; |
331 | 330 | ||
332 | out_free_cache: | 331 | out_free_cache: |
333 | memcg_free_cache_params(s); | 332 | destroy_memcg_params(s); |
334 | kmem_cache_free(kmem_cache, s); | 333 | kmem_cache_free(kmem_cache, s); |
335 | goto out; | 334 | goto out; |
336 | } | 335 | } |
@@ -369,6 +368,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
369 | 368 | ||
370 | get_online_cpus(); | 369 | get_online_cpus(); |
371 | get_online_mems(); | 370 | get_online_mems(); |
371 | memcg_get_cache_ids(); | ||
372 | 372 | ||
373 | mutex_lock(&slab_mutex); | 373 | mutex_lock(&slab_mutex); |
374 | 374 | ||
@@ -407,6 +407,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
407 | out_unlock: | 407 | out_unlock: |
408 | mutex_unlock(&slab_mutex); | 408 | mutex_unlock(&slab_mutex); |
409 | 409 | ||
410 | memcg_put_cache_ids(); | ||
410 | put_online_mems(); | 411 | put_online_mems(); |
411 | put_online_cpus(); | 412 | put_online_cpus(); |
412 | 413 | ||
@@ -439,13 +440,8 @@ static int do_kmem_cache_shutdown(struct kmem_cache *s, | |||
439 | *need_rcu_barrier = true; | 440 | *need_rcu_barrier = true; |
440 | 441 | ||
441 | #ifdef CONFIG_MEMCG_KMEM | 442 | #ifdef CONFIG_MEMCG_KMEM |
442 | if (!is_root_cache(s)) { | 443 | if (!is_root_cache(s)) |
443 | struct kmem_cache *root_cache = s->memcg_params->root_cache; | 444 | list_del(&s->memcg_params.list); |
444 | int memcg_id = memcg_cache_id(s->memcg_params->memcg); | ||
445 | |||
446 | BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s); | ||
447 | root_cache->memcg_params->memcg_caches[memcg_id] = NULL; | ||
448 | } | ||
449 | #endif | 445 | #endif |
450 | list_move(&s->list, release); | 446 | list_move(&s->list, release); |
451 | return 0; | 447 | return 0; |
@@ -482,9 +478,11 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, | |||
482 | struct kmem_cache *root_cache) | 478 | struct kmem_cache *root_cache) |
483 | { | 479 | { |
484 | static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ | 480 | static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ |
485 | int memcg_id = memcg_cache_id(memcg); | 481 | struct cgroup_subsys_state *css = mem_cgroup_css(memcg); |
482 | struct memcg_cache_array *arr; | ||
486 | struct kmem_cache *s = NULL; | 483 | struct kmem_cache *s = NULL; |
487 | char *cache_name; | 484 | char *cache_name; |
485 | int idx; | ||
488 | 486 | ||
489 | get_online_cpus(); | 487 | get_online_cpus(); |
490 | get_online_mems(); | 488 | get_online_mems(); |
@@ -492,17 +490,27 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, | |||
492 | mutex_lock(&slab_mutex); | 490 | mutex_lock(&slab_mutex); |
493 | 491 | ||
494 | /* | 492 | /* |
493 | * The memory cgroup could have been deactivated while the cache | ||
494 | * creation work was pending. | ||
495 | */ | ||
496 | if (!memcg_kmem_is_active(memcg)) | ||
497 | goto out_unlock; | ||
498 | |||
499 | idx = memcg_cache_id(memcg); | ||
500 | arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches, | ||
501 | lockdep_is_held(&slab_mutex)); | ||
502 | |||
503 | /* | ||
495 | * Since per-memcg caches are created asynchronously on first | 504 | * Since per-memcg caches are created asynchronously on first |
496 | * allocation (see memcg_kmem_get_cache()), several threads can try to | 505 | * allocation (see memcg_kmem_get_cache()), several threads can try to |
497 | * create the same cache, but only one of them may succeed. | 506 | * create the same cache, but only one of them may succeed. |
498 | */ | 507 | */ |
499 | if (cache_from_memcg_idx(root_cache, memcg_id)) | 508 | if (arr->entries[idx]) |
500 | goto out_unlock; | 509 | goto out_unlock; |
501 | 510 | ||
502 | cgroup_name(mem_cgroup_css(memcg)->cgroup, | 511 | cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); |
503 | memcg_name_buf, sizeof(memcg_name_buf)); | ||
504 | cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, | 512 | cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, |
505 | memcg_cache_id(memcg), memcg_name_buf); | 513 | css->id, memcg_name_buf); |
506 | if (!cache_name) | 514 | if (!cache_name) |
507 | goto out_unlock; | 515 | goto out_unlock; |
508 | 516 | ||
@@ -520,13 +528,15 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, | |||
520 | goto out_unlock; | 528 | goto out_unlock; |
521 | } | 529 | } |
522 | 530 | ||
531 | list_add(&s->memcg_params.list, &root_cache->memcg_params.list); | ||
532 | |||
523 | /* | 533 | /* |
524 | * Since readers won't lock (see cache_from_memcg_idx()), we need a | 534 | * Since readers won't lock (see cache_from_memcg_idx()), we need a |
525 | * barrier here to ensure nobody will see the kmem_cache partially | 535 | * barrier here to ensure nobody will see the kmem_cache partially |
526 | * initialized. | 536 | * initialized. |
527 | */ | 537 | */ |
528 | smp_wmb(); | 538 | smp_wmb(); |
529 | root_cache->memcg_params->memcg_caches[memcg_id] = s; | 539 | arr->entries[idx] = s; |
530 | 540 | ||
531 | out_unlock: | 541 | out_unlock: |
532 | mutex_unlock(&slab_mutex); | 542 | mutex_unlock(&slab_mutex); |
@@ -535,6 +545,37 @@ out_unlock: | |||
535 | put_online_cpus(); | 545 | put_online_cpus(); |
536 | } | 546 | } |
537 | 547 | ||
548 | void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) | ||
549 | { | ||
550 | int idx; | ||
551 | struct memcg_cache_array *arr; | ||
552 | struct kmem_cache *s, *c; | ||
553 | |||
554 | idx = memcg_cache_id(memcg); | ||
555 | |||
556 | get_online_cpus(); | ||
557 | get_online_mems(); | ||
558 | |||
559 | mutex_lock(&slab_mutex); | ||
560 | list_for_each_entry(s, &slab_caches, list) { | ||
561 | if (!is_root_cache(s)) | ||
562 | continue; | ||
563 | |||
564 | arr = rcu_dereference_protected(s->memcg_params.memcg_caches, | ||
565 | lockdep_is_held(&slab_mutex)); | ||
566 | c = arr->entries[idx]; | ||
567 | if (!c) | ||
568 | continue; | ||
569 | |||
570 | __kmem_cache_shrink(c, true); | ||
571 | arr->entries[idx] = NULL; | ||
572 | } | ||
573 | mutex_unlock(&slab_mutex); | ||
574 | |||
575 | put_online_mems(); | ||
576 | put_online_cpus(); | ||
577 | } | ||
578 | |||
538 | void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) | 579 | void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) |
539 | { | 580 | { |
540 | LIST_HEAD(release); | 581 | LIST_HEAD(release); |
@@ -546,7 +587,7 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) | |||
546 | 587 | ||
547 | mutex_lock(&slab_mutex); | 588 | mutex_lock(&slab_mutex); |
548 | list_for_each_entry_safe(s, s2, &slab_caches, list) { | 589 | list_for_each_entry_safe(s, s2, &slab_caches, list) { |
549 | if (is_root_cache(s) || s->memcg_params->memcg != memcg) | 590 | if (is_root_cache(s) || s->memcg_params.memcg != memcg) |
550 | continue; | 591 | continue; |
551 | /* | 592 | /* |
552 | * The cgroup is about to be freed and therefore has no charges | 593 | * The cgroup is about to be freed and therefore has no charges |
@@ -565,18 +606,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) | |||
565 | 606 | ||
566 | void slab_kmem_cache_release(struct kmem_cache *s) | 607 | void slab_kmem_cache_release(struct kmem_cache *s) |
567 | { | 608 | { |
568 | memcg_free_cache_params(s); | 609 | destroy_memcg_params(s); |
569 | kfree(s->name); | 610 | kfree(s->name); |
570 | kmem_cache_free(kmem_cache, s); | 611 | kmem_cache_free(kmem_cache, s); |
571 | } | 612 | } |
572 | 613 | ||
573 | void kmem_cache_destroy(struct kmem_cache *s) | 614 | void kmem_cache_destroy(struct kmem_cache *s) |
574 | { | 615 | { |
575 | int i; | 616 | struct kmem_cache *c, *c2; |
576 | LIST_HEAD(release); | 617 | LIST_HEAD(release); |
577 | bool need_rcu_barrier = false; | 618 | bool need_rcu_barrier = false; |
578 | bool busy = false; | 619 | bool busy = false; |
579 | 620 | ||
621 | BUG_ON(!is_root_cache(s)); | ||
622 | |||
580 | get_online_cpus(); | 623 | get_online_cpus(); |
581 | get_online_mems(); | 624 | get_online_mems(); |
582 | 625 | ||
@@ -586,10 +629,8 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
586 | if (s->refcount) | 629 | if (s->refcount) |
587 | goto out_unlock; | 630 | goto out_unlock; |
588 | 631 | ||
589 | for_each_memcg_cache_index(i) { | 632 | for_each_memcg_cache_safe(c, c2, s) { |
590 | struct kmem_cache *c = cache_from_memcg_idx(s, i); | 633 | if (do_kmem_cache_shutdown(c, &release, &need_rcu_barrier)) |
591 | |||
592 | if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier)) | ||
593 | busy = true; | 634 | busy = true; |
594 | } | 635 | } |
595 | 636 | ||
@@ -619,7 +660,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep) | |||
619 | 660 | ||
620 | get_online_cpus(); | 661 | get_online_cpus(); |
621 | get_online_mems(); | 662 | get_online_mems(); |
622 | ret = __kmem_cache_shrink(cachep); | 663 | ret = __kmem_cache_shrink(cachep, false); |
623 | put_online_mems(); | 664 | put_online_mems(); |
624 | put_online_cpus(); | 665 | put_online_cpus(); |
625 | return ret; | 666 | return ret; |
@@ -641,6 +682,9 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz | |||
641 | s->name = name; | 682 | s->name = name; |
642 | s->size = s->object_size = size; | 683 | s->size = s->object_size = size; |
643 | s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); | 684 | s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); |
685 | |||
686 | slab_init_memcg_params(s); | ||
687 | |||
644 | err = __kmem_cache_create(s, flags); | 688 | err = __kmem_cache_create(s, flags); |
645 | 689 | ||
646 | if (err) | 690 | if (err) |
@@ -920,16 +964,11 @@ memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) | |||
920 | { | 964 | { |
921 | struct kmem_cache *c; | 965 | struct kmem_cache *c; |
922 | struct slabinfo sinfo; | 966 | struct slabinfo sinfo; |
923 | int i; | ||
924 | 967 | ||
925 | if (!is_root_cache(s)) | 968 | if (!is_root_cache(s)) |
926 | return; | 969 | return; |
927 | 970 | ||
928 | for_each_memcg_cache_index(i) { | 971 | for_each_memcg_cache(c, s) { |
929 | c = cache_from_memcg_idx(s, i); | ||
930 | if (!c) | ||
931 | continue; | ||
932 | |||
933 | memset(&sinfo, 0, sizeof(sinfo)); | 972 | memset(&sinfo, 0, sizeof(sinfo)); |
934 | get_slabinfo(c, &sinfo); | 973 | get_slabinfo(c, &sinfo); |
935 | 974 | ||
@@ -981,7 +1020,7 @@ int memcg_slab_show(struct seq_file *m, void *p) | |||
981 | 1020 | ||
982 | if (p == slab_caches.next) | 1021 | if (p == slab_caches.next) |
983 | print_slabinfo_header(m); | 1022 | print_slabinfo_header(m); |
984 | if (!is_root_cache(s) && s->memcg_params->memcg == memcg) | 1023 | if (!is_root_cache(s) && s->memcg_params.memcg == memcg) |
985 | cache_show(s, m); | 1024 | cache_show(s, m); |
986 | return 0; | 1025 | return 0; |
987 | } | 1026 | } |
@@ -618,7 +618,7 @@ int __kmem_cache_shutdown(struct kmem_cache *c) | |||
618 | return 0; | 618 | return 0; |
619 | } | 619 | } |
620 | 620 | ||
621 | int __kmem_cache_shrink(struct kmem_cache *d) | 621 | int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate) |
622 | { | 622 | { |
623 | return 0; | 623 | return 0; |
624 | } | 624 | } |
@@ -2007,6 +2007,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |||
2007 | int pages; | 2007 | int pages; |
2008 | int pobjects; | 2008 | int pobjects; |
2009 | 2009 | ||
2010 | preempt_disable(); | ||
2010 | do { | 2011 | do { |
2011 | pages = 0; | 2012 | pages = 0; |
2012 | pobjects = 0; | 2013 | pobjects = 0; |
@@ -2040,6 +2041,14 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |||
2040 | 2041 | ||
2041 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) | 2042 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) |
2042 | != oldpage); | 2043 | != oldpage); |
2044 | if (unlikely(!s->cpu_partial)) { | ||
2045 | unsigned long flags; | ||
2046 | |||
2047 | local_irq_save(flags); | ||
2048 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | ||
2049 | local_irq_restore(flags); | ||
2050 | } | ||
2051 | preempt_enable(); | ||
2043 | #endif | 2052 | #endif |
2044 | } | 2053 | } |
2045 | 2054 | ||
@@ -3358,69 +3367,92 @@ void kfree(const void *x) | |||
3358 | } | 3367 | } |
3359 | EXPORT_SYMBOL(kfree); | 3368 | EXPORT_SYMBOL(kfree); |
3360 | 3369 | ||
3370 | #define SHRINK_PROMOTE_MAX 32 | ||
3371 | |||
3361 | /* | 3372 | /* |
3362 | * kmem_cache_shrink removes empty slabs from the partial lists and sorts | 3373 | * kmem_cache_shrink discards empty slabs and promotes the slabs filled |
3363 | * the remaining slabs by the number of items in use. The slabs with the | 3374 | * up most to the head of the partial lists. New allocations will then |
3364 | * most items in use come first. New allocations will then fill those up | 3375 | * fill those up and thus they can be removed from the partial lists. |
3365 | * and thus they can be removed from the partial lists. | ||
3366 | * | 3376 | * |
3367 | * The slabs with the least items are placed last. This results in them | 3377 | * The slabs with the least items are placed last. This results in them |
3368 | * being allocated from last increasing the chance that the last objects | 3378 | * being allocated from last increasing the chance that the last objects |
3369 | * are freed in them. | 3379 | * are freed in them. |
3370 | */ | 3380 | */ |
3371 | int __kmem_cache_shrink(struct kmem_cache *s) | 3381 | int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) |
3372 | { | 3382 | { |
3373 | int node; | 3383 | int node; |
3374 | int i; | 3384 | int i; |
3375 | struct kmem_cache_node *n; | 3385 | struct kmem_cache_node *n; |
3376 | struct page *page; | 3386 | struct page *page; |
3377 | struct page *t; | 3387 | struct page *t; |
3378 | int objects = oo_objects(s->max); | 3388 | struct list_head discard; |
3379 | struct list_head *slabs_by_inuse = | 3389 | struct list_head promote[SHRINK_PROMOTE_MAX]; |
3380 | kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); | ||
3381 | unsigned long flags; | 3390 | unsigned long flags; |
3391 | int ret = 0; | ||
3382 | 3392 | ||
3383 | if (!slabs_by_inuse) | 3393 | if (deactivate) { |
3384 | return -ENOMEM; | 3394 | /* |
3395 | * Disable empty slabs caching. Used to avoid pinning offline | ||
3396 | * memory cgroups by kmem pages that can be freed. | ||
3397 | */ | ||
3398 | s->cpu_partial = 0; | ||
3399 | s->min_partial = 0; | ||
3400 | |||
3401 | /* | ||
3402 | * s->cpu_partial is checked locklessly (see put_cpu_partial), | ||
3403 | * so we have to make sure the change is visible. | ||
3404 | */ | ||
3405 | kick_all_cpus_sync(); | ||
3406 | } | ||
3385 | 3407 | ||
3386 | flush_all(s); | 3408 | flush_all(s); |
3387 | for_each_kmem_cache_node(s, node, n) { | 3409 | for_each_kmem_cache_node(s, node, n) { |
3388 | if (!n->nr_partial) | 3410 | INIT_LIST_HEAD(&discard); |
3389 | continue; | 3411 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) |
3390 | 3412 | INIT_LIST_HEAD(promote + i); | |
3391 | for (i = 0; i < objects; i++) | ||
3392 | INIT_LIST_HEAD(slabs_by_inuse + i); | ||
3393 | 3413 | ||
3394 | spin_lock_irqsave(&n->list_lock, flags); | 3414 | spin_lock_irqsave(&n->list_lock, flags); |
3395 | 3415 | ||
3396 | /* | 3416 | /* |
3397 | * Build lists indexed by the items in use in each slab. | 3417 | * Build lists of slabs to discard or promote. |
3398 | * | 3418 | * |
3399 | * Note that concurrent frees may occur while we hold the | 3419 | * Note that concurrent frees may occur while we hold the |
3400 | * list_lock. page->inuse here is the upper limit. | 3420 | * list_lock. page->inuse here is the upper limit. |
3401 | */ | 3421 | */ |
3402 | list_for_each_entry_safe(page, t, &n->partial, lru) { | 3422 | list_for_each_entry_safe(page, t, &n->partial, lru) { |
3403 | list_move(&page->lru, slabs_by_inuse + page->inuse); | 3423 | int free = page->objects - page->inuse; |
3404 | if (!page->inuse) | 3424 | |
3425 | /* Do not reread page->inuse */ | ||
3426 | barrier(); | ||
3427 | |||
3428 | /* We do not keep full slabs on the list */ | ||
3429 | BUG_ON(free <= 0); | ||
3430 | |||
3431 | if (free == page->objects) { | ||
3432 | list_move(&page->lru, &discard); | ||
3405 | n->nr_partial--; | 3433 | n->nr_partial--; |
3434 | } else if (free <= SHRINK_PROMOTE_MAX) | ||
3435 | list_move(&page->lru, promote + free - 1); | ||
3406 | } | 3436 | } |
3407 | 3437 | ||
3408 | /* | 3438 | /* |
3409 | * Rebuild the partial list with the slabs filled up most | 3439 | * Promote the slabs filled up most to the head of the |
3410 | * first and the least used slabs at the end. | 3440 | * partial list. |
3411 | */ | 3441 | */ |
3412 | for (i = objects - 1; i > 0; i--) | 3442 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) |
3413 | list_splice(slabs_by_inuse + i, n->partial.prev); | 3443 | list_splice(promote + i, &n->partial); |
3414 | 3444 | ||
3415 | spin_unlock_irqrestore(&n->list_lock, flags); | 3445 | spin_unlock_irqrestore(&n->list_lock, flags); |
3416 | 3446 | ||
3417 | /* Release empty slabs */ | 3447 | /* Release empty slabs */ |
3418 | list_for_each_entry_safe(page, t, slabs_by_inuse, lru) | 3448 | list_for_each_entry_safe(page, t, &discard, lru) |
3419 | discard_slab(s, page); | 3449 | discard_slab(s, page); |
3450 | |||
3451 | if (slabs_node(s, node)) | ||
3452 | ret = 1; | ||
3420 | } | 3453 | } |
3421 | 3454 | ||
3422 | kfree(slabs_by_inuse); | 3455 | return ret; |
3423 | return 0; | ||
3424 | } | 3456 | } |
3425 | 3457 | ||
3426 | static int slab_mem_going_offline_callback(void *arg) | 3458 | static int slab_mem_going_offline_callback(void *arg) |
@@ -3429,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg) | |||
3429 | 3461 | ||
3430 | mutex_lock(&slab_mutex); | 3462 | mutex_lock(&slab_mutex); |
3431 | list_for_each_entry(s, &slab_caches, list) | 3463 | list_for_each_entry(s, &slab_caches, list) |
3432 | __kmem_cache_shrink(s); | 3464 | __kmem_cache_shrink(s, false); |
3433 | mutex_unlock(&slab_mutex); | 3465 | mutex_unlock(&slab_mutex); |
3434 | 3466 | ||
3435 | return 0; | 3467 | return 0; |
@@ -3577,6 +3609,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) | |||
3577 | p->slab_cache = s; | 3609 | p->slab_cache = s; |
3578 | #endif | 3610 | #endif |
3579 | } | 3611 | } |
3612 | slab_init_memcg_params(s); | ||
3580 | list_add(&s->list, &slab_caches); | 3613 | list_add(&s->list, &slab_caches); |
3581 | return s; | 3614 | return s; |
3582 | } | 3615 | } |
@@ -3635,13 +3668,10 @@ struct kmem_cache * | |||
3635 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 3668 | __kmem_cache_alias(const char *name, size_t size, size_t align, |
3636 | unsigned long flags, void (*ctor)(void *)) | 3669 | unsigned long flags, void (*ctor)(void *)) |
3637 | { | 3670 | { |
3638 | struct kmem_cache *s; | 3671 | struct kmem_cache *s, *c; |
3639 | 3672 | ||
3640 | s = find_mergeable(size, align, flags, name, ctor); | 3673 | s = find_mergeable(size, align, flags, name, ctor); |
3641 | if (s) { | 3674 | if (s) { |
3642 | int i; | ||
3643 | struct kmem_cache *c; | ||
3644 | |||
3645 | s->refcount++; | 3675 | s->refcount++; |
3646 | 3676 | ||
3647 | /* | 3677 | /* |
@@ -3651,10 +3681,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
3651 | s->object_size = max(s->object_size, (int)size); | 3681 | s->object_size = max(s->object_size, (int)size); |
3652 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3682 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3653 | 3683 | ||
3654 | for_each_memcg_cache_index(i) { | 3684 | for_each_memcg_cache(c, s) { |
3655 | c = cache_from_memcg_idx(s, i); | ||
3656 | if (!c) | ||
3657 | continue; | ||
3658 | c->object_size = s->object_size; | 3685 | c->object_size = s->object_size; |
3659 | c->inuse = max_t(int, c->inuse, | 3686 | c->inuse = max_t(int, c->inuse, |
3660 | ALIGN(size, sizeof(void *))); | 3687 | ALIGN(size, sizeof(void *))); |
@@ -4691,12 +4718,9 @@ static ssize_t shrink_show(struct kmem_cache *s, char *buf) | |||
4691 | static ssize_t shrink_store(struct kmem_cache *s, | 4718 | static ssize_t shrink_store(struct kmem_cache *s, |
4692 | const char *buf, size_t length) | 4719 | const char *buf, size_t length) |
4693 | { | 4720 | { |
4694 | if (buf[0] == '1') { | 4721 | if (buf[0] == '1') |
4695 | int rc = kmem_cache_shrink(s); | 4722 | kmem_cache_shrink(s); |
4696 | 4723 | else | |
4697 | if (rc) | ||
4698 | return rc; | ||
4699 | } else | ||
4700 | return -EINVAL; | 4724 | return -EINVAL; |
4701 | return length; | 4725 | return length; |
4702 | } | 4726 | } |
@@ -4920,7 +4944,7 @@ static ssize_t slab_attr_store(struct kobject *kobj, | |||
4920 | err = attribute->store(s, buf, len); | 4944 | err = attribute->store(s, buf, len); |
4921 | #ifdef CONFIG_MEMCG_KMEM | 4945 | #ifdef CONFIG_MEMCG_KMEM |
4922 | if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { | 4946 | if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { |
4923 | int i; | 4947 | struct kmem_cache *c; |
4924 | 4948 | ||
4925 | mutex_lock(&slab_mutex); | 4949 | mutex_lock(&slab_mutex); |
4926 | if (s->max_attr_size < len) | 4950 | if (s->max_attr_size < len) |
@@ -4943,11 +4967,8 @@ static ssize_t slab_attr_store(struct kobject *kobj, | |||
4943 | * directly either failed or succeeded, in which case we loop | 4967 | * directly either failed or succeeded, in which case we loop |
4944 | * through the descendants with best-effort propagation. | 4968 | * through the descendants with best-effort propagation. |
4945 | */ | 4969 | */ |
4946 | for_each_memcg_cache_index(i) { | 4970 | for_each_memcg_cache(c, s) |
4947 | struct kmem_cache *c = cache_from_memcg_idx(s, i); | 4971 | attribute->store(c, buf, len); |
4948 | if (c) | ||
4949 | attribute->store(c, buf, len); | ||
4950 | } | ||
4951 | mutex_unlock(&slab_mutex); | 4972 | mutex_unlock(&slab_mutex); |
4952 | } | 4973 | } |
4953 | #endif | 4974 | #endif |
@@ -4964,7 +4985,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) | |||
4964 | if (is_root_cache(s)) | 4985 | if (is_root_cache(s)) |
4965 | return; | 4986 | return; |
4966 | 4987 | ||
4967 | root_cache = s->memcg_params->root_cache; | 4988 | root_cache = s->memcg_params.root_cache; |
4968 | 4989 | ||
4969 | /* | 4990 | /* |
4970 | * This mean this cache had no attribute written. Therefore, no point | 4991 | * This mean this cache had no attribute written. Therefore, no point |
@@ -5044,7 +5065,7 @@ static inline struct kset *cache_kset(struct kmem_cache *s) | |||
5044 | { | 5065 | { |
5045 | #ifdef CONFIG_MEMCG_KMEM | 5066 | #ifdef CONFIG_MEMCG_KMEM |
5046 | if (!is_root_cache(s)) | 5067 | if (!is_root_cache(s)) |
5047 | return s->memcg_params->root_cache->memcg_kset; | 5068 | return s->memcg_params.root_cache->memcg_kset; |
5048 | #endif | 5069 | #endif |
5049 | return slab_kset; | 5070 | return slab_kset; |
5050 | } | 5071 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 224dd298fdcd..5e8eadd71bac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker); | |||
232 | 232 | ||
233 | #define SHRINK_BATCH 128 | 233 | #define SHRINK_BATCH 128 |
234 | 234 | ||
235 | static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | 235 | static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, |
236 | struct shrinker *shrinker, | 236 | struct shrinker *shrinker, |
237 | unsigned long nr_scanned, | 237 | unsigned long nr_scanned, |
238 | unsigned long nr_eligible) | 238 | unsigned long nr_eligible) |
239 | { | 239 | { |
240 | unsigned long freed = 0; | 240 | unsigned long freed = 0; |
241 | unsigned long long delta; | 241 | unsigned long long delta; |
@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
344 | } | 344 | } |
345 | 345 | ||
346 | /** | 346 | /** |
347 | * shrink_node_slabs - shrink slab caches of a given node | 347 | * shrink_slab - shrink slab caches |
348 | * @gfp_mask: allocation context | 348 | * @gfp_mask: allocation context |
349 | * @nid: node whose slab caches to target | 349 | * @nid: node whose slab caches to target |
350 | * @memcg: memory cgroup whose slab caches to target | ||
350 | * @nr_scanned: pressure numerator | 351 | * @nr_scanned: pressure numerator |
351 | * @nr_eligible: pressure denominator | 352 | * @nr_eligible: pressure denominator |
352 | * | 353 | * |
@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
355 | * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, | 356 | * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, |
356 | * unaware shrinkers will receive a node id of 0 instead. | 357 | * unaware shrinkers will receive a node id of 0 instead. |
357 | * | 358 | * |
359 | * @memcg specifies the memory cgroup to target. If it is not NULL, | ||
360 | * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan | ||
361 | * objects from the memory cgroup specified. Otherwise all shrinkers | ||
362 | * are called, and memcg aware shrinkers are supposed to scan the | ||
363 | * global list then. | ||
364 | * | ||
358 | * @nr_scanned and @nr_eligible form a ratio that indicate how much of | 365 | * @nr_scanned and @nr_eligible form a ratio that indicate how much of |
359 | * the available objects should be scanned. Page reclaim for example | 366 | * the available objects should be scanned. Page reclaim for example |
360 | * passes the number of pages scanned and the number of pages on the | 367 | * passes the number of pages scanned and the number of pages on the |
@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
365 | * | 372 | * |
366 | * Returns the number of reclaimed slab objects. | 373 | * Returns the number of reclaimed slab objects. |
367 | */ | 374 | */ |
368 | unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | 375 | static unsigned long shrink_slab(gfp_t gfp_mask, int nid, |
369 | unsigned long nr_scanned, | 376 | struct mem_cgroup *memcg, |
370 | unsigned long nr_eligible) | 377 | unsigned long nr_scanned, |
378 | unsigned long nr_eligible) | ||
371 | { | 379 | { |
372 | struct shrinker *shrinker; | 380 | struct shrinker *shrinker; |
373 | unsigned long freed = 0; | 381 | unsigned long freed = 0; |
374 | 382 | ||
383 | if (memcg && !memcg_kmem_is_active(memcg)) | ||
384 | return 0; | ||
385 | |||
375 | if (nr_scanned == 0) | 386 | if (nr_scanned == 0) |
376 | nr_scanned = SWAP_CLUSTER_MAX; | 387 | nr_scanned = SWAP_CLUSTER_MAX; |
377 | 388 | ||
@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | |||
390 | struct shrink_control sc = { | 401 | struct shrink_control sc = { |
391 | .gfp_mask = gfp_mask, | 402 | .gfp_mask = gfp_mask, |
392 | .nid = nid, | 403 | .nid = nid, |
404 | .memcg = memcg, | ||
393 | }; | 405 | }; |
394 | 406 | ||
407 | if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE)) | ||
408 | continue; | ||
409 | |||
395 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) | 410 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
396 | sc.nid = 0; | 411 | sc.nid = 0; |
397 | 412 | ||
398 | freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); | 413 | freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible); |
399 | } | 414 | } |
400 | 415 | ||
401 | up_read(&shrinker_rwsem); | 416 | up_read(&shrinker_rwsem); |
@@ -404,6 +419,29 @@ out: | |||
404 | return freed; | 419 | return freed; |
405 | } | 420 | } |
406 | 421 | ||
422 | void drop_slab_node(int nid) | ||
423 | { | ||
424 | unsigned long freed; | ||
425 | |||
426 | do { | ||
427 | struct mem_cgroup *memcg = NULL; | ||
428 | |||
429 | freed = 0; | ||
430 | do { | ||
431 | freed += shrink_slab(GFP_KERNEL, nid, memcg, | ||
432 | 1000, 1000); | ||
433 | } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); | ||
434 | } while (freed > 10); | ||
435 | } | ||
436 | |||
437 | void drop_slab(void) | ||
438 | { | ||
439 | int nid; | ||
440 | |||
441 | for_each_online_node(nid) | ||
442 | drop_slab_node(nid); | ||
443 | } | ||
444 | |||
407 | static inline int is_page_cache_freeable(struct page *page) | 445 | static inline int is_page_cache_freeable(struct page *page) |
408 | { | 446 | { |
409 | /* | 447 | /* |
@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2276 | static bool shrink_zone(struct zone *zone, struct scan_control *sc, | 2314 | static bool shrink_zone(struct zone *zone, struct scan_control *sc, |
2277 | bool is_classzone) | 2315 | bool is_classzone) |
2278 | { | 2316 | { |
2317 | struct reclaim_state *reclaim_state = current->reclaim_state; | ||
2279 | unsigned long nr_reclaimed, nr_scanned; | 2318 | unsigned long nr_reclaimed, nr_scanned; |
2280 | bool reclaimable = false; | 2319 | bool reclaimable = false; |
2281 | 2320 | ||
@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2294 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 2333 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
2295 | do { | 2334 | do { |
2296 | unsigned long lru_pages; | 2335 | unsigned long lru_pages; |
2336 | unsigned long scanned; | ||
2297 | struct lruvec *lruvec; | 2337 | struct lruvec *lruvec; |
2298 | int swappiness; | 2338 | int swappiness; |
2299 | 2339 | ||
@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2305 | 2345 | ||
2306 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2346 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2307 | swappiness = mem_cgroup_swappiness(memcg); | 2347 | swappiness = mem_cgroup_swappiness(memcg); |
2348 | scanned = sc->nr_scanned; | ||
2308 | 2349 | ||
2309 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); | 2350 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); |
2310 | zone_lru_pages += lru_pages; | 2351 | zone_lru_pages += lru_pages; |
2311 | 2352 | ||
2353 | if (memcg && is_classzone) | ||
2354 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), | ||
2355 | memcg, sc->nr_scanned - scanned, | ||
2356 | lru_pages); | ||
2357 | |||
2312 | /* | 2358 | /* |
2313 | * Direct reclaim and kswapd have to scan all memory | 2359 | * Direct reclaim and kswapd have to scan all memory |
2314 | * cgroups to fulfill the overall scan target for the | 2360 | * cgroups to fulfill the overall scan target for the |
@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2330 | * Shrink the slab caches in the same proportion that | 2376 | * Shrink the slab caches in the same proportion that |
2331 | * the eligible LRU pages were scanned. | 2377 | * the eligible LRU pages were scanned. |
2332 | */ | 2378 | */ |
2333 | if (global_reclaim(sc) && is_classzone) { | 2379 | if (global_reclaim(sc) && is_classzone) |
2334 | struct reclaim_state *reclaim_state; | 2380 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, |
2335 | 2381 | sc->nr_scanned - nr_scanned, | |
2336 | shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), | 2382 | zone_lru_pages); |
2337 | sc->nr_scanned - nr_scanned, | 2383 | |
2338 | zone_lru_pages); | 2384 | if (reclaim_state) { |
2339 | 2385 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | |
2340 | reclaim_state = current->reclaim_state; | 2386 | reclaim_state->reclaimed_slab = 0; |
2341 | if (reclaim_state) { | ||
2342 | sc->nr_reclaimed += | ||
2343 | reclaim_state->reclaimed_slab; | ||
2344 | reclaim_state->reclaimed_slab = 0; | ||
2345 | } | ||
2346 | } | 2387 | } |
2347 | 2388 | ||
2348 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | 2389 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, |
diff --git a/mm/workingset.c b/mm/workingset.c index f7216fa7da27..aa017133744b 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
@@ -275,7 +275,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |||
275 | 275 | ||
276 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | 276 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ |
277 | local_irq_disable(); | 277 | local_irq_disable(); |
278 | shadow_nodes = list_lru_count_node(&workingset_shadow_nodes, sc->nid); | 278 | shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); |
279 | local_irq_enable(); | 279 | local_irq_enable(); |
280 | 280 | ||
281 | pages = node_present_pages(sc->nid); | 281 | pages = node_present_pages(sc->nid); |
@@ -302,6 +302,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |||
302 | } | 302 | } |
303 | 303 | ||
304 | static enum lru_status shadow_lru_isolate(struct list_head *item, | 304 | static enum lru_status shadow_lru_isolate(struct list_head *item, |
305 | struct list_lru_one *lru, | ||
305 | spinlock_t *lru_lock, | 306 | spinlock_t *lru_lock, |
306 | void *arg) | 307 | void *arg) |
307 | { | 308 | { |
@@ -332,7 +333,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |||
332 | goto out; | 333 | goto out; |
333 | } | 334 | } |
334 | 335 | ||
335 | list_del_init(item); | 336 | list_lru_isolate(lru, item); |
336 | spin_unlock(lru_lock); | 337 | spin_unlock(lru_lock); |
337 | 338 | ||
338 | /* | 339 | /* |
@@ -376,8 +377,8 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, | |||
376 | 377 | ||
377 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | 378 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ |
378 | local_irq_disable(); | 379 | local_irq_disable(); |
379 | ret = list_lru_walk_node(&workingset_shadow_nodes, sc->nid, | 380 | ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, |
380 | shadow_lru_isolate, NULL, &sc->nr_to_scan); | 381 | shadow_lru_isolate, NULL); |
381 | local_irq_enable(); | 382 | local_irq_enable(); |
382 | return ret; | 383 | return ret; |
383 | } | 384 | } |
@@ -130,7 +130,8 @@ static struct zbud_ops zbud_zpool_ops = { | |||
130 | .evict = zbud_zpool_evict | 130 | .evict = zbud_zpool_evict |
131 | }; | 131 | }; |
132 | 132 | ||
133 | static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops) | 133 | static void *zbud_zpool_create(char *name, gfp_t gfp, |
134 | struct zpool_ops *zpool_ops) | ||
134 | { | 135 | { |
135 | return zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL); | 136 | return zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL); |
136 | } | 137 | } |
diff --git a/mm/zpool.c b/mm/zpool.c index 739cdf0d183a..bacdab6e47de 100644 --- a/mm/zpool.c +++ b/mm/zpool.c | |||
@@ -129,6 +129,7 @@ static void zpool_put_driver(struct zpool_driver *driver) | |||
129 | /** | 129 | /** |
130 | * zpool_create_pool() - Create a new zpool | 130 | * zpool_create_pool() - Create a new zpool |
131 | * @type The type of the zpool to create (e.g. zbud, zsmalloc) | 131 | * @type The type of the zpool to create (e.g. zbud, zsmalloc) |
132 | * @name The name of the zpool (e.g. zram0, zswap) | ||
132 | * @gfp The GFP flags to use when allocating the pool. | 133 | * @gfp The GFP flags to use when allocating the pool. |
133 | * @ops The optional ops callback. | 134 | * @ops The optional ops callback. |
134 | * | 135 | * |
@@ -140,7 +141,8 @@ static void zpool_put_driver(struct zpool_driver *driver) | |||
140 | * | 141 | * |
141 | * Returns: New zpool on success, NULL on failure. | 142 | * Returns: New zpool on success, NULL on failure. |
142 | */ | 143 | */ |
143 | struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops) | 144 | struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, |
145 | struct zpool_ops *ops) | ||
144 | { | 146 | { |
145 | struct zpool_driver *driver; | 147 | struct zpool_driver *driver; |
146 | struct zpool *zpool; | 148 | struct zpool *zpool; |
@@ -168,7 +170,7 @@ struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops) | |||
168 | 170 | ||
169 | zpool->type = driver->type; | 171 | zpool->type = driver->type; |
170 | zpool->driver = driver; | 172 | zpool->driver = driver; |
171 | zpool->pool = driver->create(gfp, ops); | 173 | zpool->pool = driver->create(name, gfp, ops); |
172 | zpool->ops = ops; | 174 | zpool->ops = ops; |
173 | 175 | ||
174 | if (!zpool->pool) { | 176 | if (!zpool->pool) { |
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b72403927aa4..0dec1fa5f656 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -91,6 +91,7 @@ | |||
91 | #include <linux/hardirq.h> | 91 | #include <linux/hardirq.h> |
92 | #include <linux/spinlock.h> | 92 | #include <linux/spinlock.h> |
93 | #include <linux/types.h> | 93 | #include <linux/types.h> |
94 | #include <linux/debugfs.h> | ||
94 | #include <linux/zsmalloc.h> | 95 | #include <linux/zsmalloc.h> |
95 | #include <linux/zpool.h> | 96 | #include <linux/zpool.h> |
96 | 97 | ||
@@ -168,6 +169,22 @@ enum fullness_group { | |||
168 | ZS_FULL | 169 | ZS_FULL |
169 | }; | 170 | }; |
170 | 171 | ||
172 | enum zs_stat_type { | ||
173 | OBJ_ALLOCATED, | ||
174 | OBJ_USED, | ||
175 | NR_ZS_STAT_TYPE, | ||
176 | }; | ||
177 | |||
178 | #ifdef CONFIG_ZSMALLOC_STAT | ||
179 | |||
180 | static struct dentry *zs_stat_root; | ||
181 | |||
182 | struct zs_size_stat { | ||
183 | unsigned long objs[NR_ZS_STAT_TYPE]; | ||
184 | }; | ||
185 | |||
186 | #endif | ||
187 | |||
171 | /* | 188 | /* |
172 | * number of size_classes | 189 | * number of size_classes |
173 | */ | 190 | */ |
@@ -200,6 +217,10 @@ struct size_class { | |||
200 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ | 217 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ |
201 | int pages_per_zspage; | 218 | int pages_per_zspage; |
202 | 219 | ||
220 | #ifdef CONFIG_ZSMALLOC_STAT | ||
221 | struct zs_size_stat stats; | ||
222 | #endif | ||
223 | |||
203 | spinlock_t lock; | 224 | spinlock_t lock; |
204 | 225 | ||
205 | struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; | 226 | struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; |
@@ -217,10 +238,16 @@ struct link_free { | |||
217 | }; | 238 | }; |
218 | 239 | ||
219 | struct zs_pool { | 240 | struct zs_pool { |
241 | char *name; | ||
242 | |||
220 | struct size_class **size_class; | 243 | struct size_class **size_class; |
221 | 244 | ||
222 | gfp_t flags; /* allocation flags used when growing pool */ | 245 | gfp_t flags; /* allocation flags used when growing pool */ |
223 | atomic_long_t pages_allocated; | 246 | atomic_long_t pages_allocated; |
247 | |||
248 | #ifdef CONFIG_ZSMALLOC_STAT | ||
249 | struct dentry *stat_dentry; | ||
250 | #endif | ||
224 | }; | 251 | }; |
225 | 252 | ||
226 | /* | 253 | /* |
@@ -246,9 +273,9 @@ struct mapping_area { | |||
246 | 273 | ||
247 | #ifdef CONFIG_ZPOOL | 274 | #ifdef CONFIG_ZPOOL |
248 | 275 | ||
249 | static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops) | 276 | static void *zs_zpool_create(char *name, gfp_t gfp, struct zpool_ops *zpool_ops) |
250 | { | 277 | { |
251 | return zs_create_pool(gfp); | 278 | return zs_create_pool(name, gfp); |
252 | } | 279 | } |
253 | 280 | ||
254 | static void zs_zpool_destroy(void *pool) | 281 | static void zs_zpool_destroy(void *pool) |
@@ -942,6 +969,166 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) | |||
942 | return true; | 969 | return true; |
943 | } | 970 | } |
944 | 971 | ||
972 | #ifdef CONFIG_ZSMALLOC_STAT | ||
973 | |||
974 | static inline void zs_stat_inc(struct size_class *class, | ||
975 | enum zs_stat_type type, unsigned long cnt) | ||
976 | { | ||
977 | class->stats.objs[type] += cnt; | ||
978 | } | ||
979 | |||
980 | static inline void zs_stat_dec(struct size_class *class, | ||
981 | enum zs_stat_type type, unsigned long cnt) | ||
982 | { | ||
983 | class->stats.objs[type] -= cnt; | ||
984 | } | ||
985 | |||
986 | static inline unsigned long zs_stat_get(struct size_class *class, | ||
987 | enum zs_stat_type type) | ||
988 | { | ||
989 | return class->stats.objs[type]; | ||
990 | } | ||
991 | |||
992 | static int __init zs_stat_init(void) | ||
993 | { | ||
994 | if (!debugfs_initialized()) | ||
995 | return -ENODEV; | ||
996 | |||
997 | zs_stat_root = debugfs_create_dir("zsmalloc", NULL); | ||
998 | if (!zs_stat_root) | ||
999 | return -ENOMEM; | ||
1000 | |||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | static void __exit zs_stat_exit(void) | ||
1005 | { | ||
1006 | debugfs_remove_recursive(zs_stat_root); | ||
1007 | } | ||
1008 | |||
1009 | static int zs_stats_size_show(struct seq_file *s, void *v) | ||
1010 | { | ||
1011 | int i; | ||
1012 | struct zs_pool *pool = s->private; | ||
1013 | struct size_class *class; | ||
1014 | int objs_per_zspage; | ||
1015 | unsigned long obj_allocated, obj_used, pages_used; | ||
1016 | unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0; | ||
1017 | |||
1018 | seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size", | ||
1019 | "obj_allocated", "obj_used", "pages_used"); | ||
1020 | |||
1021 | for (i = 0; i < zs_size_classes; i++) { | ||
1022 | class = pool->size_class[i]; | ||
1023 | |||
1024 | if (class->index != i) | ||
1025 | continue; | ||
1026 | |||
1027 | spin_lock(&class->lock); | ||
1028 | obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); | ||
1029 | obj_used = zs_stat_get(class, OBJ_USED); | ||
1030 | spin_unlock(&class->lock); | ||
1031 | |||
1032 | objs_per_zspage = get_maxobj_per_zspage(class->size, | ||
1033 | class->pages_per_zspage); | ||
1034 | pages_used = obj_allocated / objs_per_zspage * | ||
1035 | class->pages_per_zspage; | ||
1036 | |||
1037 | seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i, | ||
1038 | class->size, obj_allocated, obj_used, pages_used); | ||
1039 | |||
1040 | total_objs += obj_allocated; | ||
1041 | total_used_objs += obj_used; | ||
1042 | total_pages += pages_used; | ||
1043 | } | ||
1044 | |||
1045 | seq_puts(s, "\n"); | ||
1046 | seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "", | ||
1047 | total_objs, total_used_objs, total_pages); | ||
1048 | |||
1049 | return 0; | ||
1050 | } | ||
1051 | |||
1052 | static int zs_stats_size_open(struct inode *inode, struct file *file) | ||
1053 | { | ||
1054 | return single_open(file, zs_stats_size_show, inode->i_private); | ||
1055 | } | ||
1056 | |||
1057 | static const struct file_operations zs_stat_size_ops = { | ||
1058 | .open = zs_stats_size_open, | ||
1059 | .read = seq_read, | ||
1060 | .llseek = seq_lseek, | ||
1061 | .release = single_release, | ||
1062 | }; | ||
1063 | |||
1064 | static int zs_pool_stat_create(char *name, struct zs_pool *pool) | ||
1065 | { | ||
1066 | struct dentry *entry; | ||
1067 | |||
1068 | if (!zs_stat_root) | ||
1069 | return -ENODEV; | ||
1070 | |||
1071 | entry = debugfs_create_dir(name, zs_stat_root); | ||
1072 | if (!entry) { | ||
1073 | pr_warn("debugfs dir <%s> creation failed\n", name); | ||
1074 | return -ENOMEM; | ||
1075 | } | ||
1076 | pool->stat_dentry = entry; | ||
1077 | |||
1078 | entry = debugfs_create_file("obj_in_classes", S_IFREG | S_IRUGO, | ||
1079 | pool->stat_dentry, pool, &zs_stat_size_ops); | ||
1080 | if (!entry) { | ||
1081 | pr_warn("%s: debugfs file entry <%s> creation failed\n", | ||
1082 | name, "obj_in_classes"); | ||
1083 | return -ENOMEM; | ||
1084 | } | ||
1085 | |||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | static void zs_pool_stat_destroy(struct zs_pool *pool) | ||
1090 | { | ||
1091 | debugfs_remove_recursive(pool->stat_dentry); | ||
1092 | } | ||
1093 | |||
1094 | #else /* CONFIG_ZSMALLOC_STAT */ | ||
1095 | |||
1096 | static inline void zs_stat_inc(struct size_class *class, | ||
1097 | enum zs_stat_type type, unsigned long cnt) | ||
1098 | { | ||
1099 | } | ||
1100 | |||
1101 | static inline void zs_stat_dec(struct size_class *class, | ||
1102 | enum zs_stat_type type, unsigned long cnt) | ||
1103 | { | ||
1104 | } | ||
1105 | |||
1106 | static inline unsigned long zs_stat_get(struct size_class *class, | ||
1107 | enum zs_stat_type type) | ||
1108 | { | ||
1109 | return 0; | ||
1110 | } | ||
1111 | |||
1112 | static int __init zs_stat_init(void) | ||
1113 | { | ||
1114 | return 0; | ||
1115 | } | ||
1116 | |||
1117 | static void __exit zs_stat_exit(void) | ||
1118 | { | ||
1119 | } | ||
1120 | |||
1121 | static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) | ||
1122 | { | ||
1123 | return 0; | ||
1124 | } | ||
1125 | |||
1126 | static inline void zs_pool_stat_destroy(struct zs_pool *pool) | ||
1127 | { | ||
1128 | } | ||
1129 | |||
1130 | #endif | ||
1131 | |||
945 | unsigned long zs_get_total_pages(struct zs_pool *pool) | 1132 | unsigned long zs_get_total_pages(struct zs_pool *pool) |
946 | { | 1133 | { |
947 | return atomic_long_read(&pool->pages_allocated); | 1134 | return atomic_long_read(&pool->pages_allocated); |
@@ -1074,7 +1261,10 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) | |||
1074 | set_zspage_mapping(first_page, class->index, ZS_EMPTY); | 1261 | set_zspage_mapping(first_page, class->index, ZS_EMPTY); |
1075 | atomic_long_add(class->pages_per_zspage, | 1262 | atomic_long_add(class->pages_per_zspage, |
1076 | &pool->pages_allocated); | 1263 | &pool->pages_allocated); |
1264 | |||
1077 | spin_lock(&class->lock); | 1265 | spin_lock(&class->lock); |
1266 | zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage( | ||
1267 | class->size, class->pages_per_zspage)); | ||
1078 | } | 1268 | } |
1079 | 1269 | ||
1080 | obj = (unsigned long)first_page->freelist; | 1270 | obj = (unsigned long)first_page->freelist; |
@@ -1088,6 +1278,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) | |||
1088 | kunmap_atomic(vaddr); | 1278 | kunmap_atomic(vaddr); |
1089 | 1279 | ||
1090 | first_page->inuse++; | 1280 | first_page->inuse++; |
1281 | zs_stat_inc(class, OBJ_USED, 1); | ||
1091 | /* Now move the zspage to another fullness group, if required */ | 1282 | /* Now move the zspage to another fullness group, if required */ |
1092 | fix_fullness_group(pool, first_page); | 1283 | fix_fullness_group(pool, first_page); |
1093 | spin_unlock(&class->lock); | 1284 | spin_unlock(&class->lock); |
@@ -1128,6 +1319,12 @@ void zs_free(struct zs_pool *pool, unsigned long obj) | |||
1128 | 1319 | ||
1129 | first_page->inuse--; | 1320 | first_page->inuse--; |
1130 | fullness = fix_fullness_group(pool, first_page); | 1321 | fullness = fix_fullness_group(pool, first_page); |
1322 | |||
1323 | zs_stat_dec(class, OBJ_USED, 1); | ||
1324 | if (fullness == ZS_EMPTY) | ||
1325 | zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( | ||
1326 | class->size, class->pages_per_zspage)); | ||
1327 | |||
1131 | spin_unlock(&class->lock); | 1328 | spin_unlock(&class->lock); |
1132 | 1329 | ||
1133 | if (fullness == ZS_EMPTY) { | 1330 | if (fullness == ZS_EMPTY) { |
@@ -1148,7 +1345,7 @@ EXPORT_SYMBOL_GPL(zs_free); | |||
1148 | * On success, a pointer to the newly created pool is returned, | 1345 | * On success, a pointer to the newly created pool is returned, |
1149 | * otherwise NULL. | 1346 | * otherwise NULL. |
1150 | */ | 1347 | */ |
1151 | struct zs_pool *zs_create_pool(gfp_t flags) | 1348 | struct zs_pool *zs_create_pool(char *name, gfp_t flags) |
1152 | { | 1349 | { |
1153 | int i; | 1350 | int i; |
1154 | struct zs_pool *pool; | 1351 | struct zs_pool *pool; |
@@ -1158,9 +1355,16 @@ struct zs_pool *zs_create_pool(gfp_t flags) | |||
1158 | if (!pool) | 1355 | if (!pool) |
1159 | return NULL; | 1356 | return NULL; |
1160 | 1357 | ||
1358 | pool->name = kstrdup(name, GFP_KERNEL); | ||
1359 | if (!pool->name) { | ||
1360 | kfree(pool); | ||
1361 | return NULL; | ||
1362 | } | ||
1363 | |||
1161 | pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), | 1364 | pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), |
1162 | GFP_KERNEL); | 1365 | GFP_KERNEL); |
1163 | if (!pool->size_class) { | 1366 | if (!pool->size_class) { |
1367 | kfree(pool->name); | ||
1164 | kfree(pool); | 1368 | kfree(pool); |
1165 | return NULL; | 1369 | return NULL; |
1166 | } | 1370 | } |
@@ -1210,6 +1414,9 @@ struct zs_pool *zs_create_pool(gfp_t flags) | |||
1210 | 1414 | ||
1211 | pool->flags = flags; | 1415 | pool->flags = flags; |
1212 | 1416 | ||
1417 | if (zs_pool_stat_create(name, pool)) | ||
1418 | goto err; | ||
1419 | |||
1213 | return pool; | 1420 | return pool; |
1214 | 1421 | ||
1215 | err: | 1422 | err: |
@@ -1222,6 +1429,8 @@ void zs_destroy_pool(struct zs_pool *pool) | |||
1222 | { | 1429 | { |
1223 | int i; | 1430 | int i; |
1224 | 1431 | ||
1432 | zs_pool_stat_destroy(pool); | ||
1433 | |||
1225 | for (i = 0; i < zs_size_classes; i++) { | 1434 | for (i = 0; i < zs_size_classes; i++) { |
1226 | int fg; | 1435 | int fg; |
1227 | struct size_class *class = pool->size_class[i]; | 1436 | struct size_class *class = pool->size_class[i]; |
@@ -1242,6 +1451,7 @@ void zs_destroy_pool(struct zs_pool *pool) | |||
1242 | } | 1451 | } |
1243 | 1452 | ||
1244 | kfree(pool->size_class); | 1453 | kfree(pool->size_class); |
1454 | kfree(pool->name); | ||
1245 | kfree(pool); | 1455 | kfree(pool); |
1246 | } | 1456 | } |
1247 | EXPORT_SYMBOL_GPL(zs_destroy_pool); | 1457 | EXPORT_SYMBOL_GPL(zs_destroy_pool); |
@@ -1250,17 +1460,30 @@ static int __init zs_init(void) | |||
1250 | { | 1460 | { |
1251 | int ret = zs_register_cpu_notifier(); | 1461 | int ret = zs_register_cpu_notifier(); |
1252 | 1462 | ||
1253 | if (ret) { | 1463 | if (ret) |
1254 | zs_unregister_cpu_notifier(); | 1464 | goto notifier_fail; |
1255 | return ret; | ||
1256 | } | ||
1257 | 1465 | ||
1258 | init_zs_size_classes(); | 1466 | init_zs_size_classes(); |
1259 | 1467 | ||
1260 | #ifdef CONFIG_ZPOOL | 1468 | #ifdef CONFIG_ZPOOL |
1261 | zpool_register_driver(&zs_zpool_driver); | 1469 | zpool_register_driver(&zs_zpool_driver); |
1262 | #endif | 1470 | #endif |
1471 | |||
1472 | ret = zs_stat_init(); | ||
1473 | if (ret) { | ||
1474 | pr_err("zs stat initialization failed\n"); | ||
1475 | goto stat_fail; | ||
1476 | } | ||
1263 | return 0; | 1477 | return 0; |
1478 | |||
1479 | stat_fail: | ||
1480 | #ifdef CONFIG_ZPOOL | ||
1481 | zpool_unregister_driver(&zs_zpool_driver); | ||
1482 | #endif | ||
1483 | notifier_fail: | ||
1484 | zs_unregister_cpu_notifier(); | ||
1485 | |||
1486 | return ret; | ||
1264 | } | 1487 | } |
1265 | 1488 | ||
1266 | static void __exit zs_exit(void) | 1489 | static void __exit zs_exit(void) |
@@ -1269,6 +1492,8 @@ static void __exit zs_exit(void) | |||
1269 | zpool_unregister_driver(&zs_zpool_driver); | 1492 | zpool_unregister_driver(&zs_zpool_driver); |
1270 | #endif | 1493 | #endif |
1271 | zs_unregister_cpu_notifier(); | 1494 | zs_unregister_cpu_notifier(); |
1495 | |||
1496 | zs_stat_exit(); | ||
1272 | } | 1497 | } |
1273 | 1498 | ||
1274 | module_init(zs_init); | 1499 | module_init(zs_init); |
diff --git a/mm/zswap.c b/mm/zswap.c index 0cfce9bc51e4..4249e82ff934 100644 --- a/mm/zswap.c +++ b/mm/zswap.c | |||
@@ -906,11 +906,12 @@ static int __init init_zswap(void) | |||
906 | 906 | ||
907 | pr_info("loading zswap\n"); | 907 | pr_info("loading zswap\n"); |
908 | 908 | ||
909 | zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops); | 909 | zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, |
910 | &zswap_zpool_ops); | ||
910 | if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { | 911 | if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { |
911 | pr_info("%s zpool not available\n", zswap_zpool_type); | 912 | pr_info("%s zpool not available\n", zswap_zpool_type); |
912 | zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; | 913 | zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; |
913 | zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, | 914 | zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, |
914 | &zswap_zpool_ops); | 915 | &zswap_zpool_ops); |
915 | } | 916 | } |
916 | if (!zswap_pool) { | 917 | if (!zswap_pool) { |