diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 21:46:22 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 21:46:22 -0500 |
| commit | 73da9e1a9f310a449eeb9bf5735a9cd475fef5e2 (patch) | |
| tree | 82cd78255b0a480340a8427e7ba5586df8280ac4 /include/linux | |
| parent | b2fe5fa68642860e7de76167c3111623aa0d5de1 (diff) | |
| parent | 3f56a2f8030071cf86520ef4fc3045ba6856e610 (diff) | |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- misc fixes
- ocfs2 updates
- most of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits)
mm: remove PG_highmem description
tools, vm: new option to specify kpageflags file
mm/swap.c: make functions and their kernel-doc agree
mm, memory_hotplug: fix memmap initialization
mm: correct comments regarding do_fault_around()
mm: numa: do not trap faults on shared data section pages.
hugetlb, mbind: fall back to default policy if vma is NULL
hugetlb, mempolicy: fix the mbind hugetlb migration
mm, hugetlb: further simplify hugetlb allocation API
mm, hugetlb: get rid of surplus page accounting tricks
mm, hugetlb: do not rely on overcommit limit during migration
mm, hugetlb: integrate giga hugetlb more naturally to the allocation path
mm, hugetlb: unify core page allocation accounting and initialization
mm/memcontrol.c: try harder to decrease [memory,memsw].limit_in_bytes
mm/memcontrol.c: make local symbol static
mm/hmm: fix uninitialized use of 'entry' in hmm_vma_walk_pmd()
include/linux/mmzone.h: fix explanation of lower bits in the SPARSEMEM mem_map pointer
mm/compaction.c: fix comment for try_to_compact_pages()
mm/page_ext.c: make page_ext_init a noop when CONFIG_PAGE_EXTENSION but nothing uses it
zsmalloc: use U suffix for negative literals being shifted
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/hugetlb.h | 21 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 165 | ||||
| -rw-r--r-- | include/linux/mm.h | 26 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 154 | ||||
| -rw-r--r-- | include/linux/mmu_notifier.h | 30 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 12 | ||||
| -rw-r--r-- | include/linux/page-flags.h | 5 | ||||
| -rw-r--r-- | include/linux/pagevec.h | 6 | ||||
| -rw-r--r-- | include/linux/sched/mm.h | 24 | ||||
| -rw-r--r-- | include/linux/shmem_fs.h | 6 | ||||
| -rw-r--r-- | include/linux/swap.h | 2 | ||||
| -rw-r--r-- | include/linux/vmstat.h | 17 | ||||
| -rw-r--r-- | include/linux/zpool.h | 2 |
13 files changed, 254 insertions, 216 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 82a25880714a..36fa6a2a82e3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
| @@ -119,6 +119,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, | |||
| 119 | long freed); | 119 | long freed); |
| 120 | bool isolate_huge_page(struct page *page, struct list_head *list); | 120 | bool isolate_huge_page(struct page *page, struct list_head *list); |
| 121 | void putback_active_hugepage(struct page *page); | 121 | void putback_active_hugepage(struct page *page); |
| 122 | void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); | ||
| 122 | void free_huge_page(struct page *page); | 123 | void free_huge_page(struct page *page); |
| 123 | void hugetlb_fix_reserve_counts(struct inode *inode); | 124 | void hugetlb_fix_reserve_counts(struct inode *inode); |
| 124 | extern struct mutex *hugetlb_fault_mutex_table; | 125 | extern struct mutex *hugetlb_fault_mutex_table; |
| @@ -129,7 +130,6 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, | |||
| 129 | 130 | ||
| 130 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); | 131 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); |
| 131 | 132 | ||
| 132 | extern int hugepages_treat_as_movable; | ||
| 133 | extern int sysctl_hugetlb_shm_group; | 133 | extern int sysctl_hugetlb_shm_group; |
| 134 | extern struct list_head huge_boot_pages; | 134 | extern struct list_head huge_boot_pages; |
| 135 | 135 | ||
| @@ -158,6 +158,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
| 158 | unsigned long address, unsigned long end, pgprot_t newprot); | 158 | unsigned long address, unsigned long end, pgprot_t newprot); |
| 159 | 159 | ||
| 160 | bool is_hugetlb_entry_migration(pte_t pte); | 160 | bool is_hugetlb_entry_migration(pte_t pte); |
| 161 | |||
| 161 | #else /* !CONFIG_HUGETLB_PAGE */ | 162 | #else /* !CONFIG_HUGETLB_PAGE */ |
| 162 | 163 | ||
| 163 | static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) | 164 | static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) |
| @@ -198,6 +199,7 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list) | |||
| 198 | return false; | 199 | return false; |
| 199 | } | 200 | } |
| 200 | #define putback_active_hugepage(p) do {} while (0) | 201 | #define putback_active_hugepage(p) do {} while (0) |
| 202 | #define move_hugetlb_state(old, new, reason) do {} while (0) | ||
| 201 | 203 | ||
| 202 | static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | 204 | static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, |
| 203 | unsigned long address, unsigned long end, pgprot_t newprot) | 205 | unsigned long address, unsigned long end, pgprot_t newprot) |
| @@ -271,6 +273,17 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) | |||
| 271 | return sb->s_fs_info; | 273 | return sb->s_fs_info; |
| 272 | } | 274 | } |
| 273 | 275 | ||
| 276 | struct hugetlbfs_inode_info { | ||
| 277 | struct shared_policy policy; | ||
| 278 | struct inode vfs_inode; | ||
| 279 | unsigned int seals; | ||
| 280 | }; | ||
| 281 | |||
| 282 | static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) | ||
| 283 | { | ||
| 284 | return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); | ||
| 285 | } | ||
| 286 | |||
| 274 | extern const struct file_operations hugetlbfs_file_operations; | 287 | extern const struct file_operations hugetlbfs_file_operations; |
| 275 | extern const struct vm_operations_struct hugetlb_vm_ops; | 288 | extern const struct vm_operations_struct hugetlb_vm_ops; |
| 276 | struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, | 289 | struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, |
| @@ -343,10 +356,10 @@ struct huge_bootmem_page { | |||
| 343 | struct page *alloc_huge_page(struct vm_area_struct *vma, | 356 | struct page *alloc_huge_page(struct vm_area_struct *vma, |
| 344 | unsigned long addr, int avoid_reserve); | 357 | unsigned long addr, int avoid_reserve); |
| 345 | struct page *alloc_huge_page_node(struct hstate *h, int nid); | 358 | struct page *alloc_huge_page_node(struct hstate *h, int nid); |
| 346 | struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, | ||
| 347 | unsigned long addr, int avoid_reserve); | ||
| 348 | struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, | 359 | struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, |
| 349 | nodemask_t *nmask); | 360 | nodemask_t *nmask); |
| 361 | struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, | ||
| 362 | unsigned long address); | ||
| 350 | int huge_add_to_page_cache(struct page *page, struct address_space *mapping, | 363 | int huge_add_to_page_cache(struct page *page, struct address_space *mapping, |
| 351 | pgoff_t idx); | 364 | pgoff_t idx); |
| 352 | 365 | ||
| @@ -524,7 +537,7 @@ struct hstate {}; | |||
| 524 | #define alloc_huge_page(v, a, r) NULL | 537 | #define alloc_huge_page(v, a, r) NULL |
| 525 | #define alloc_huge_page_node(h, nid) NULL | 538 | #define alloc_huge_page_node(h, nid) NULL |
| 526 | #define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL | 539 | #define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL |
| 527 | #define alloc_huge_page_noerr(v, a, r) NULL | 540 | #define alloc_huge_page_vma(h, vma, address) NULL |
| 528 | #define alloc_bootmem_huge_page(h) NULL | 541 | #define alloc_bootmem_huge_page(h) NULL |
| 529 | #define hstate_file(f) NULL | 542 | #define hstate_file(f) NULL |
| 530 | #define hstate_sizelog(s) NULL | 543 | #define hstate_sizelog(s) NULL |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 69966c461d1c..882046863581 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
| @@ -108,7 +108,10 @@ struct lruvec_stat { | |||
| 108 | */ | 108 | */ |
| 109 | struct mem_cgroup_per_node { | 109 | struct mem_cgroup_per_node { |
| 110 | struct lruvec lruvec; | 110 | struct lruvec lruvec; |
| 111 | struct lruvec_stat __percpu *lruvec_stat; | 111 | |
| 112 | struct lruvec_stat __percpu *lruvec_stat_cpu; | ||
| 113 | atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; | ||
| 114 | |||
| 112 | unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; | 115 | unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; |
| 113 | 116 | ||
| 114 | struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; | 117 | struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; |
| @@ -227,10 +230,10 @@ struct mem_cgroup { | |||
| 227 | spinlock_t move_lock; | 230 | spinlock_t move_lock; |
| 228 | struct task_struct *move_lock_task; | 231 | struct task_struct *move_lock_task; |
| 229 | unsigned long move_lock_flags; | 232 | unsigned long move_lock_flags; |
| 230 | /* | 233 | |
| 231 | * percpu counter. | 234 | struct mem_cgroup_stat_cpu __percpu *stat_cpu; |
| 232 | */ | 235 | atomic_long_t stat[MEMCG_NR_STAT]; |
| 233 | struct mem_cgroup_stat_cpu __percpu *stat; | 236 | atomic_long_t events[MEMCG_NR_EVENTS]; |
| 234 | 237 | ||
| 235 | unsigned long socket_pressure; | 238 | unsigned long socket_pressure; |
| 236 | 239 | ||
| @@ -265,6 +268,12 @@ struct mem_cgroup { | |||
| 265 | /* WARNING: nodeinfo must be the last member here */ | 268 | /* WARNING: nodeinfo must be the last member here */ |
| 266 | }; | 269 | }; |
| 267 | 270 | ||
| 271 | /* | ||
| 272 | * size of first charge trial. "32" comes from vmscan.c's magic value. | ||
| 273 | * TODO: maybe necessary to use big numbers in big irons. | ||
| 274 | */ | ||
| 275 | #define MEMCG_CHARGE_BATCH 32U | ||
| 276 | |||
| 268 | extern struct mem_cgroup *root_mem_cgroup; | 277 | extern struct mem_cgroup *root_mem_cgroup; |
| 269 | 278 | ||
| 270 | static inline bool mem_cgroup_disabled(void) | 279 | static inline bool mem_cgroup_disabled(void) |
| @@ -272,13 +281,6 @@ static inline bool mem_cgroup_disabled(void) | |||
| 272 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | 281 | return !cgroup_subsys_enabled(memory_cgrp_subsys); |
| 273 | } | 282 | } |
| 274 | 283 | ||
| 275 | static inline void mem_cgroup_event(struct mem_cgroup *memcg, | ||
| 276 | enum memcg_event_item event) | ||
| 277 | { | ||
| 278 | this_cpu_inc(memcg->stat->events[event]); | ||
| 279 | cgroup_file_notify(&memcg->events_file); | ||
| 280 | } | ||
| 281 | |||
| 282 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); | 284 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); |
| 283 | 285 | ||
| 284 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | 286 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, |
| @@ -492,32 +494,38 @@ void unlock_page_memcg(struct page *page); | |||
| 492 | static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, | 494 | static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, |
| 493 | int idx) | 495 | int idx) |
| 494 | { | 496 | { |
| 495 | long val = 0; | 497 | long x = atomic_long_read(&memcg->stat[idx]); |
| 496 | int cpu; | 498 | #ifdef CONFIG_SMP |
| 497 | 499 | if (x < 0) | |
| 498 | for_each_possible_cpu(cpu) | 500 | x = 0; |
| 499 | val += per_cpu(memcg->stat->count[idx], cpu); | 501 | #endif |
| 500 | 502 | return x; | |
| 501 | if (val < 0) | ||
| 502 | val = 0; | ||
| 503 | |||
| 504 | return val; | ||
| 505 | } | 503 | } |
| 506 | 504 | ||
| 507 | /* idx can be of type enum memcg_stat_item or node_stat_item */ | 505 | /* idx can be of type enum memcg_stat_item or node_stat_item */ |
| 508 | static inline void __mod_memcg_state(struct mem_cgroup *memcg, | 506 | static inline void __mod_memcg_state(struct mem_cgroup *memcg, |
| 509 | int idx, int val) | 507 | int idx, int val) |
| 510 | { | 508 | { |
| 511 | if (!mem_cgroup_disabled()) | 509 | long x; |
| 512 | __this_cpu_add(memcg->stat->count[idx], val); | 510 | |
| 511 | if (mem_cgroup_disabled()) | ||
| 512 | return; | ||
| 513 | |||
| 514 | x = val + __this_cpu_read(memcg->stat_cpu->count[idx]); | ||
| 515 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | ||
| 516 | atomic_long_add(x, &memcg->stat[idx]); | ||
| 517 | x = 0; | ||
| 518 | } | ||
| 519 | __this_cpu_write(memcg->stat_cpu->count[idx], x); | ||
| 513 | } | 520 | } |
| 514 | 521 | ||
| 515 | /* idx can be of type enum memcg_stat_item or node_stat_item */ | 522 | /* idx can be of type enum memcg_stat_item or node_stat_item */ |
| 516 | static inline void mod_memcg_state(struct mem_cgroup *memcg, | 523 | static inline void mod_memcg_state(struct mem_cgroup *memcg, |
| 517 | int idx, int val) | 524 | int idx, int val) |
| 518 | { | 525 | { |
| 519 | if (!mem_cgroup_disabled()) | 526 | preempt_disable(); |
| 520 | this_cpu_add(memcg->stat->count[idx], val); | 527 | __mod_memcg_state(memcg, idx, val); |
| 528 | preempt_enable(); | ||
| 521 | } | 529 | } |
| 522 | 530 | ||
| 523 | /** | 531 | /** |
| @@ -555,87 +563,108 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec, | |||
| 555 | enum node_stat_item idx) | 563 | enum node_stat_item idx) |
| 556 | { | 564 | { |
| 557 | struct mem_cgroup_per_node *pn; | 565 | struct mem_cgroup_per_node *pn; |
| 558 | long val = 0; | 566 | long x; |
| 559 | int cpu; | ||
| 560 | 567 | ||
| 561 | if (mem_cgroup_disabled()) | 568 | if (mem_cgroup_disabled()) |
| 562 | return node_page_state(lruvec_pgdat(lruvec), idx); | 569 | return node_page_state(lruvec_pgdat(lruvec), idx); |
| 563 | 570 | ||
| 564 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | 571 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |
| 565 | for_each_possible_cpu(cpu) | 572 | x = atomic_long_read(&pn->lruvec_stat[idx]); |
| 566 | val += per_cpu(pn->lruvec_stat->count[idx], cpu); | 573 | #ifdef CONFIG_SMP |
| 567 | 574 | if (x < 0) | |
| 568 | if (val < 0) | 575 | x = 0; |
| 569 | val = 0; | 576 | #endif |
| 570 | 577 | return x; | |
| 571 | return val; | ||
| 572 | } | 578 | } |
| 573 | 579 | ||
| 574 | static inline void __mod_lruvec_state(struct lruvec *lruvec, | 580 | static inline void __mod_lruvec_state(struct lruvec *lruvec, |
| 575 | enum node_stat_item idx, int val) | 581 | enum node_stat_item idx, int val) |
| 576 | { | 582 | { |
| 577 | struct mem_cgroup_per_node *pn; | 583 | struct mem_cgroup_per_node *pn; |
| 584 | long x; | ||
| 578 | 585 | ||
| 586 | /* Update node */ | ||
| 579 | __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); | 587 | __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); |
| 588 | |||
| 580 | if (mem_cgroup_disabled()) | 589 | if (mem_cgroup_disabled()) |
| 581 | return; | 590 | return; |
| 591 | |||
| 582 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | 592 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |
| 593 | |||
| 594 | /* Update memcg */ | ||
| 583 | __mod_memcg_state(pn->memcg, idx, val); | 595 | __mod_memcg_state(pn->memcg, idx, val); |
| 584 | __this_cpu_add(pn->lruvec_stat->count[idx], val); | 596 | |
| 597 | /* Update lruvec */ | ||
| 598 | x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); | ||
| 599 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | ||
| 600 | atomic_long_add(x, &pn->lruvec_stat[idx]); | ||
| 601 | x = 0; | ||
| 602 | } | ||
| 603 | __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); | ||
| 585 | } | 604 | } |
| 586 | 605 | ||
| 587 | static inline void mod_lruvec_state(struct lruvec *lruvec, | 606 | static inline void mod_lruvec_state(struct lruvec *lruvec, |
| 588 | enum node_stat_item idx, int val) | 607 | enum node_stat_item idx, int val) |
| 589 | { | 608 | { |
| 590 | struct mem_cgroup_per_node *pn; | 609 | preempt_disable(); |
| 591 | 610 | __mod_lruvec_state(lruvec, idx, val); | |
| 592 | mod_node_page_state(lruvec_pgdat(lruvec), idx, val); | 611 | preempt_enable(); |
| 593 | if (mem_cgroup_disabled()) | ||
| 594 | return; | ||
| 595 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | ||
| 596 | mod_memcg_state(pn->memcg, idx, val); | ||
| 597 | this_cpu_add(pn->lruvec_stat->count[idx], val); | ||
| 598 | } | 612 | } |
| 599 | 613 | ||
| 600 | static inline void __mod_lruvec_page_state(struct page *page, | 614 | static inline void __mod_lruvec_page_state(struct page *page, |
| 601 | enum node_stat_item idx, int val) | 615 | enum node_stat_item idx, int val) |
| 602 | { | 616 | { |
| 603 | struct mem_cgroup_per_node *pn; | 617 | pg_data_t *pgdat = page_pgdat(page); |
| 618 | struct lruvec *lruvec; | ||
| 604 | 619 | ||
| 605 | __mod_node_page_state(page_pgdat(page), idx, val); | 620 | /* Untracked pages have no memcg, no lruvec. Update only the node */ |
| 606 | if (mem_cgroup_disabled() || !page->mem_cgroup) | 621 | if (!page->mem_cgroup) { |
| 622 | __mod_node_page_state(pgdat, idx, val); | ||
| 607 | return; | 623 | return; |
| 608 | __mod_memcg_state(page->mem_cgroup, idx, val); | 624 | } |
| 609 | pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; | 625 | |
| 610 | __this_cpu_add(pn->lruvec_stat->count[idx], val); | 626 | lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); |
| 627 | __mod_lruvec_state(lruvec, idx, val); | ||
| 611 | } | 628 | } |
| 612 | 629 | ||
| 613 | static inline void mod_lruvec_page_state(struct page *page, | 630 | static inline void mod_lruvec_page_state(struct page *page, |
| 614 | enum node_stat_item idx, int val) | 631 | enum node_stat_item idx, int val) |
| 615 | { | 632 | { |
| 616 | struct mem_cgroup_per_node *pn; | 633 | preempt_disable(); |
| 617 | 634 | __mod_lruvec_page_state(page, idx, val); | |
| 618 | mod_node_page_state(page_pgdat(page), idx, val); | 635 | preempt_enable(); |
| 619 | if (mem_cgroup_disabled() || !page->mem_cgroup) | ||
| 620 | return; | ||
| 621 | mod_memcg_state(page->mem_cgroup, idx, val); | ||
| 622 | pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; | ||
| 623 | this_cpu_add(pn->lruvec_stat->count[idx], val); | ||
| 624 | } | 636 | } |
| 625 | 637 | ||
| 626 | unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, | 638 | unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, |
| 627 | gfp_t gfp_mask, | 639 | gfp_t gfp_mask, |
| 628 | unsigned long *total_scanned); | 640 | unsigned long *total_scanned); |
| 629 | 641 | ||
| 642 | /* idx can be of type enum memcg_event_item or vm_event_item */ | ||
| 643 | static inline void __count_memcg_events(struct mem_cgroup *memcg, | ||
| 644 | int idx, unsigned long count) | ||
| 645 | { | ||
| 646 | unsigned long x; | ||
| 647 | |||
| 648 | if (mem_cgroup_disabled()) | ||
| 649 | return; | ||
| 650 | |||
| 651 | x = count + __this_cpu_read(memcg->stat_cpu->events[idx]); | ||
| 652 | if (unlikely(x > MEMCG_CHARGE_BATCH)) { | ||
| 653 | atomic_long_add(x, &memcg->events[idx]); | ||
| 654 | x = 0; | ||
| 655 | } | ||
| 656 | __this_cpu_write(memcg->stat_cpu->events[idx], x); | ||
| 657 | } | ||
| 658 | |||
| 630 | static inline void count_memcg_events(struct mem_cgroup *memcg, | 659 | static inline void count_memcg_events(struct mem_cgroup *memcg, |
| 631 | enum vm_event_item idx, | 660 | int idx, unsigned long count) |
| 632 | unsigned long count) | ||
| 633 | { | 661 | { |
| 634 | if (!mem_cgroup_disabled()) | 662 | preempt_disable(); |
| 635 | this_cpu_add(memcg->stat->events[idx], count); | 663 | __count_memcg_events(memcg, idx, count); |
| 664 | preempt_enable(); | ||
| 636 | } | 665 | } |
| 637 | 666 | ||
| 638 | /* idx can be of type enum memcg_stat_item or node_stat_item */ | 667 | /* idx can be of type enum memcg_event_item or vm_event_item */ |
| 639 | static inline void count_memcg_page_event(struct page *page, | 668 | static inline void count_memcg_page_event(struct page *page, |
| 640 | int idx) | 669 | int idx) |
| 641 | { | 670 | { |
| @@ -654,12 +683,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, | |||
| 654 | rcu_read_lock(); | 683 | rcu_read_lock(); |
| 655 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 684 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
| 656 | if (likely(memcg)) { | 685 | if (likely(memcg)) { |
| 657 | this_cpu_inc(memcg->stat->events[idx]); | 686 | count_memcg_events(memcg, idx, 1); |
| 658 | if (idx == OOM_KILL) | 687 | if (idx == OOM_KILL) |
| 659 | cgroup_file_notify(&memcg->events_file); | 688 | cgroup_file_notify(&memcg->events_file); |
| 660 | } | 689 | } |
| 661 | rcu_read_unlock(); | 690 | rcu_read_unlock(); |
| 662 | } | 691 | } |
| 692 | |||
| 693 | static inline void mem_cgroup_event(struct mem_cgroup *memcg, | ||
| 694 | enum memcg_event_item event) | ||
| 695 | { | ||
| 696 | count_memcg_events(memcg, event, 1); | ||
| 697 | cgroup_file_notify(&memcg->events_file); | ||
| 698 | } | ||
| 699 | |||
| 663 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 700 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 664 | void mem_cgroup_split_huge_fixup(struct page *head); | 701 | void mem_cgroup_split_huge_fixup(struct page *head); |
| 665 | #endif | 702 | #endif |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7fc92384977e..173d2484f6e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -1312,8 +1312,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, | |||
| 1312 | unsigned long end, unsigned long floor, unsigned long ceiling); | 1312 | unsigned long end, unsigned long floor, unsigned long ceiling); |
| 1313 | int copy_page_range(struct mm_struct *dst, struct mm_struct *src, | 1313 | int copy_page_range(struct mm_struct *dst, struct mm_struct *src, |
| 1314 | struct vm_area_struct *vma); | 1314 | struct vm_area_struct *vma); |
| 1315 | void unmap_mapping_range(struct address_space *mapping, | ||
| 1316 | loff_t const holebegin, loff_t const holelen, int even_cows); | ||
| 1317 | int follow_pte_pmd(struct mm_struct *mm, unsigned long address, | 1315 | int follow_pte_pmd(struct mm_struct *mm, unsigned long address, |
| 1318 | unsigned long *start, unsigned long *end, | 1316 | unsigned long *start, unsigned long *end, |
| 1319 | pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); | 1317 | pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); |
| @@ -1324,12 +1322,6 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address, | |||
| 1324 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | 1322 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
| 1325 | void *buf, int len, int write); | 1323 | void *buf, int len, int write); |
| 1326 | 1324 | ||
| 1327 | static inline void unmap_shared_mapping_range(struct address_space *mapping, | ||
| 1328 | loff_t const holebegin, loff_t const holelen) | ||
| 1329 | { | ||
| 1330 | unmap_mapping_range(mapping, holebegin, holelen, 0); | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | extern void truncate_pagecache(struct inode *inode, loff_t new); | 1325 | extern void truncate_pagecache(struct inode *inode, loff_t new); |
| 1334 | extern void truncate_setsize(struct inode *inode, loff_t newsize); | 1326 | extern void truncate_setsize(struct inode *inode, loff_t newsize); |
| 1335 | void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); | 1327 | void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); |
| @@ -1344,6 +1336,10 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, | |||
| 1344 | extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, | 1336 | extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, |
| 1345 | unsigned long address, unsigned int fault_flags, | 1337 | unsigned long address, unsigned int fault_flags, |
| 1346 | bool *unlocked); | 1338 | bool *unlocked); |
| 1339 | void unmap_mapping_pages(struct address_space *mapping, | ||
| 1340 | pgoff_t start, pgoff_t nr, bool even_cows); | ||
| 1341 | void unmap_mapping_range(struct address_space *mapping, | ||
| 1342 | loff_t const holebegin, loff_t const holelen, int even_cows); | ||
| 1347 | #else | 1343 | #else |
| 1348 | static inline int handle_mm_fault(struct vm_area_struct *vma, | 1344 | static inline int handle_mm_fault(struct vm_area_struct *vma, |
| 1349 | unsigned long address, unsigned int flags) | 1345 | unsigned long address, unsigned int flags) |
| @@ -1360,10 +1356,20 @@ static inline int fixup_user_fault(struct task_struct *tsk, | |||
| 1360 | BUG(); | 1356 | BUG(); |
| 1361 | return -EFAULT; | 1357 | return -EFAULT; |
| 1362 | } | 1358 | } |
| 1359 | static inline void unmap_mapping_pages(struct address_space *mapping, | ||
| 1360 | pgoff_t start, pgoff_t nr, bool even_cows) { } | ||
| 1361 | static inline void unmap_mapping_range(struct address_space *mapping, | ||
| 1362 | loff_t const holebegin, loff_t const holelen, int even_cows) { } | ||
| 1363 | #endif | 1363 | #endif |
| 1364 | 1364 | ||
| 1365 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, | 1365 | static inline void unmap_shared_mapping_range(struct address_space *mapping, |
| 1366 | unsigned int gup_flags); | 1366 | loff_t const holebegin, loff_t const holelen) |
| 1367 | { | ||
| 1368 | unmap_mapping_range(mapping, holebegin, holelen, 0); | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, | ||
| 1372 | void *buf, int len, unsigned int gup_flags); | ||
| 1367 | extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, | 1373 | extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, |
| 1368 | void *buf, int len, unsigned int gup_flags); | 1374 | void *buf, int len, unsigned int gup_flags); |
| 1369 | extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, | 1375 | extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cfd0ac4e5e0e..fd1af6b9591d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -31,28 +31,56 @@ struct hmm; | |||
| 31 | * it to keep track of whatever it is we are using the page for at the | 31 | * it to keep track of whatever it is we are using the page for at the |
| 32 | * moment. Note that we have no way to track which tasks are using | 32 | * moment. Note that we have no way to track which tasks are using |
| 33 | * a page, though if it is a pagecache page, rmap structures can tell us | 33 | * a page, though if it is a pagecache page, rmap structures can tell us |
| 34 | * who is mapping it. | 34 | * who is mapping it. If you allocate the page using alloc_pages(), you |
| 35 | * can use some of the space in struct page for your own purposes. | ||
| 35 | * | 36 | * |
| 36 | * The objects in struct page are organized in double word blocks in | 37 | * Pages that were once in the page cache may be found under the RCU lock |
| 37 | * order to allows us to use atomic double word operations on portions | 38 | * even after they have been recycled to a different purpose. The page |
| 38 | * of struct page. That is currently only used by slub but the arrangement | 39 | * cache reads and writes some of the fields in struct page to pin the |
| 39 | * allows the use of atomic double word operations on the flags/mapping | 40 | * page before checking that it's still in the page cache. It is vital |
| 40 | * and lru list pointers also. | 41 | * that all users of struct page: |
| 42 | * 1. Use the first word as PageFlags. | ||
| 43 | * 2. Clear or preserve bit 0 of page->compound_head. It is used as | ||
| 44 | * PageTail for compound pages, and the page cache must not see false | ||
| 45 | * positives. Some users put a pointer here (guaranteed to be at least | ||
| 46 | * 4-byte aligned), other users avoid using the field altogether. | ||
| 47 | * 3. page->_refcount must either not be used, or must be used in such a | ||
| 48 | * way that other CPUs temporarily incrementing and then decrementing the | ||
| 49 | * refcount does not cause problems. On receiving the page from | ||
| 50 | * alloc_pages(), the refcount will be positive. | ||
| 51 | * 4. Either preserve page->_mapcount or restore it to -1 before freeing it. | ||
| 52 | * | ||
| 53 | * If you allocate pages of order > 0, you can use the fields in the struct | ||
| 54 | * page associated with each page, but bear in mind that the pages may have | ||
| 55 | * been inserted individually into the page cache, so you must use the above | ||
| 56 | * four fields in a compatible way for each struct page. | ||
| 57 | * | ||
| 58 | * SLUB uses cmpxchg_double() to atomically update its freelist and | ||
| 59 | * counters. That requires that freelist & counters be adjacent and | ||
| 60 | * double-word aligned. We align all struct pages to double-word | ||
| 61 | * boundaries, and ensure that 'freelist' is aligned within the | ||
| 62 | * struct. | ||
| 41 | */ | 63 | */ |
| 64 | #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE | ||
| 65 | #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) | ||
| 66 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) | ||
| 67 | #define _slub_counter_t unsigned long | ||
| 68 | #else | ||
| 69 | #define _slub_counter_t unsigned int | ||
| 70 | #endif | ||
| 71 | #else /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */ | ||
| 72 | #define _struct_page_alignment | ||
| 73 | #define _slub_counter_t unsigned int | ||
| 74 | #endif /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */ | ||
| 75 | |||
| 42 | struct page { | 76 | struct page { |
| 43 | /* First double word block */ | 77 | /* First double word block */ |
| 44 | unsigned long flags; /* Atomic flags, some possibly | 78 | unsigned long flags; /* Atomic flags, some possibly |
| 45 | * updated asynchronously */ | 79 | * updated asynchronously */ |
| 46 | union { | 80 | union { |
| 47 | struct address_space *mapping; /* If low bit clear, points to | 81 | /* See page-flags.h for the definition of PAGE_MAPPING_FLAGS */ |
| 48 | * inode address_space, or NULL. | 82 | struct address_space *mapping; |
| 49 | * If page mapped as anonymous | 83 | |
| 50 | * memory, low bit is set, and | ||
| 51 | * it points to anon_vma object | ||
| 52 | * or KSM private structure. See | ||
| 53 | * PAGE_MAPPING_ANON and | ||
| 54 | * PAGE_MAPPING_KSM. | ||
| 55 | */ | ||
| 56 | void *s_mem; /* slab first object */ | 84 | void *s_mem; /* slab first object */ |
| 57 | atomic_t compound_mapcount; /* first tail page */ | 85 | atomic_t compound_mapcount; /* first tail page */ |
| 58 | /* page_deferred_list().next -- second tail page */ | 86 | /* page_deferred_list().next -- second tail page */ |
| @@ -66,40 +94,27 @@ struct page { | |||
| 66 | }; | 94 | }; |
| 67 | 95 | ||
| 68 | union { | 96 | union { |
| 69 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ | 97 | _slub_counter_t counters; |
| 70 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) | 98 | unsigned int active; /* SLAB */ |
| 71 | /* Used for cmpxchg_double in slub */ | 99 | struct { /* SLUB */ |
| 72 | unsigned long counters; | 100 | unsigned inuse:16; |
| 73 | #else | 101 | unsigned objects:15; |
| 74 | /* | 102 | unsigned frozen:1; |
| 75 | * Keep _refcount separate from slub cmpxchg_double data. | 103 | }; |
| 76 | * As the rest of the double word is protected by slab_lock | 104 | int units; /* SLOB */ |
| 77 | * but _refcount is not. | 105 | |
| 78 | */ | 106 | struct { /* Page cache */ |
| 79 | unsigned counters; | 107 | /* |
| 80 | #endif | 108 | * Count of ptes mapped in mms, to show when |
| 81 | struct { | 109 | * page is mapped & limit reverse map searches. |
| 110 | * | ||
| 111 | * Extra information about page type may be | ||
| 112 | * stored here for pages that are never mapped, | ||
| 113 | * in which case the value MUST BE <= -2. | ||
| 114 | * See page-flags.h for more details. | ||
| 115 | */ | ||
| 116 | atomic_t _mapcount; | ||
| 82 | 117 | ||
| 83 | union { | ||
| 84 | /* | ||
| 85 | * Count of ptes mapped in mms, to show when | ||
| 86 | * page is mapped & limit reverse map searches. | ||
| 87 | * | ||
| 88 | * Extra information about page type may be | ||
| 89 | * stored here for pages that are never mapped, | ||
| 90 | * in which case the value MUST BE <= -2. | ||
| 91 | * See page-flags.h for more details. | ||
| 92 | */ | ||
| 93 | atomic_t _mapcount; | ||
| 94 | |||
| 95 | unsigned int active; /* SLAB */ | ||
| 96 | struct { /* SLUB */ | ||
| 97 | unsigned inuse:16; | ||
| 98 | unsigned objects:15; | ||
| 99 | unsigned frozen:1; | ||
| 100 | }; | ||
| 101 | int units; /* SLOB */ | ||
| 102 | }; | ||
| 103 | /* | 118 | /* |
| 104 | * Usage count, *USE WRAPPER FUNCTION* when manual | 119 | * Usage count, *USE WRAPPER FUNCTION* when manual |
| 105 | * accounting. See page_ref.h | 120 | * accounting. See page_ref.h |
| @@ -109,8 +124,6 @@ struct page { | |||
| 109 | }; | 124 | }; |
| 110 | 125 | ||
| 111 | /* | 126 | /* |
| 112 | * Third double word block | ||
| 113 | * | ||
| 114 | * WARNING: bit 0 of the first word encode PageTail(). That means | 127 | * WARNING: bit 0 of the first word encode PageTail(). That means |
| 115 | * the rest users of the storage space MUST NOT use the bit to | 128 | * the rest users of the storage space MUST NOT use the bit to |
| 116 | * avoid collision and false-positive PageTail(). | 129 | * avoid collision and false-positive PageTail(). |
| @@ -145,19 +158,9 @@ struct page { | |||
| 145 | unsigned long compound_head; /* If bit zero is set */ | 158 | unsigned long compound_head; /* If bit zero is set */ |
| 146 | 159 | ||
| 147 | /* First tail page only */ | 160 | /* First tail page only */ |
| 148 | #ifdef CONFIG_64BIT | 161 | unsigned char compound_dtor; |
| 149 | /* | 162 | unsigned char compound_order; |
| 150 | * On 64 bit system we have enough space in struct page | 163 | /* two/six bytes available here */ |
| 151 | * to encode compound_dtor and compound_order with | ||
| 152 | * unsigned int. It can help compiler generate better or | ||
| 153 | * smaller code on some archtectures. | ||
| 154 | */ | ||
| 155 | unsigned int compound_dtor; | ||
| 156 | unsigned int compound_order; | ||
| 157 | #else | ||
| 158 | unsigned short int compound_dtor; | ||
| 159 | unsigned short int compound_order; | ||
| 160 | #endif | ||
| 161 | }; | 164 | }; |
| 162 | 165 | ||
| 163 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS | 166 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS |
| @@ -171,15 +174,14 @@ struct page { | |||
| 171 | #endif | 174 | #endif |
| 172 | }; | 175 | }; |
| 173 | 176 | ||
| 174 | /* Remainder is not double word aligned */ | ||
| 175 | union { | 177 | union { |
| 176 | unsigned long private; /* Mapping-private opaque data: | 178 | /* |
| 177 | * usually used for buffer_heads | 179 | * Mapping-private opaque data: |
| 178 | * if PagePrivate set; used for | 180 | * Usually used for buffer_heads if PagePrivate |
| 179 | * swp_entry_t if PageSwapCache; | 181 | * Used for swp_entry_t if PageSwapCache |
| 180 | * indicates order in the buddy | 182 | * Indicates order in the buddy system if PageBuddy |
| 181 | * system if PG_buddy is set. | 183 | */ |
| 182 | */ | 184 | unsigned long private; |
| 183 | #if USE_SPLIT_PTE_PTLOCKS | 185 | #if USE_SPLIT_PTE_PTLOCKS |
| 184 | #if ALLOC_SPLIT_PTLOCKS | 186 | #if ALLOC_SPLIT_PTLOCKS |
| 185 | spinlock_t *ptl; | 187 | spinlock_t *ptl; |
| @@ -212,15 +214,7 @@ struct page { | |||
| 212 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS | 214 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
| 213 | int _last_cpupid; | 215 | int _last_cpupid; |
| 214 | #endif | 216 | #endif |
| 215 | } | 217 | } _struct_page_alignment; |
| 216 | /* | ||
| 217 | * The struct page can be forced to be double word aligned so that atomic ops | ||
| 218 | * on double words work. The SLUB allocator can make use of such a feature. | ||
| 219 | */ | ||
| 220 | #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE | ||
| 221 | __aligned(2 * sizeof(unsigned long)) | ||
| 222 | #endif | ||
| 223 | ; | ||
| 224 | 218 | ||
| 225 | #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) | 219 | #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) |
| 226 | #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) | 220 | #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) |
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b25dc9db19fc..2d07a1ed5a31 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #ifndef _LINUX_MMU_NOTIFIER_H | 2 | #ifndef _LINUX_MMU_NOTIFIER_H |
| 3 | #define _LINUX_MMU_NOTIFIER_H | 3 | #define _LINUX_MMU_NOTIFIER_H |
| 4 | 4 | ||
| 5 | #include <linux/types.h> | ||
| 5 | #include <linux/list.h> | 6 | #include <linux/list.h> |
| 6 | #include <linux/spinlock.h> | 7 | #include <linux/spinlock.h> |
| 7 | #include <linux/mm_types.h> | 8 | #include <linux/mm_types.h> |
| @@ -10,6 +11,9 @@ | |||
| 10 | struct mmu_notifier; | 11 | struct mmu_notifier; |
| 11 | struct mmu_notifier_ops; | 12 | struct mmu_notifier_ops; |
| 12 | 13 | ||
| 14 | /* mmu_notifier_ops flags */ | ||
| 15 | #define MMU_INVALIDATE_DOES_NOT_BLOCK (0x01) | ||
| 16 | |||
| 13 | #ifdef CONFIG_MMU_NOTIFIER | 17 | #ifdef CONFIG_MMU_NOTIFIER |
| 14 | 18 | ||
| 15 | /* | 19 | /* |
| @@ -27,6 +31,15 @@ struct mmu_notifier_mm { | |||
| 27 | 31 | ||
| 28 | struct mmu_notifier_ops { | 32 | struct mmu_notifier_ops { |
| 29 | /* | 33 | /* |
| 34 | * Flags to specify behavior of callbacks for this MMU notifier. | ||
| 35 | * Used to determine which context an operation may be called. | ||
| 36 | * | ||
| 37 | * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not | ||
| 38 | * block | ||
| 39 | */ | ||
| 40 | int flags; | ||
| 41 | |||
| 42 | /* | ||
| 30 | * Called either by mmu_notifier_unregister or when the mm is | 43 | * Called either by mmu_notifier_unregister or when the mm is |
| 31 | * being destroyed by exit_mmap, always before all pages are | 44 | * being destroyed by exit_mmap, always before all pages are |
| 32 | * freed. This can run concurrently with other mmu notifier | 45 | * freed. This can run concurrently with other mmu notifier |
| @@ -137,6 +150,10 @@ struct mmu_notifier_ops { | |||
| 137 | * page. Pages will no longer be referenced by the linux | 150 | * page. Pages will no longer be referenced by the linux |
| 138 | * address space but may still be referenced by sptes until | 151 | * address space but may still be referenced by sptes until |
| 139 | * the last refcount is dropped. | 152 | * the last refcount is dropped. |
| 153 | * | ||
| 154 | * If both of these callbacks cannot block, and invalidate_range | ||
| 155 | * cannot block, mmu_notifier_ops.flags should have | ||
| 156 | * MMU_INVALIDATE_DOES_NOT_BLOCK set. | ||
| 140 | */ | 157 | */ |
| 141 | void (*invalidate_range_start)(struct mmu_notifier *mn, | 158 | void (*invalidate_range_start)(struct mmu_notifier *mn, |
| 142 | struct mm_struct *mm, | 159 | struct mm_struct *mm, |
| @@ -159,12 +176,13 @@ struct mmu_notifier_ops { | |||
| 159 | * external TLB range needs to be flushed. For more in depth | 176 | * external TLB range needs to be flushed. For more in depth |
| 160 | * discussion on this see Documentation/vm/mmu_notifier.txt | 177 | * discussion on this see Documentation/vm/mmu_notifier.txt |
| 161 | * | 178 | * |
| 162 | * The invalidate_range() function is called under the ptl | ||
| 163 | * spin-lock and not allowed to sleep. | ||
| 164 | * | ||
| 165 | * Note that this function might be called with just a sub-range | 179 | * Note that this function might be called with just a sub-range |
| 166 | * of what was passed to invalidate_range_start()/end(), if | 180 | * of what was passed to invalidate_range_start()/end(), if |
| 167 | * called between those functions. | 181 | * called between those functions. |
| 182 | * | ||
| 183 | * If this callback cannot block, and invalidate_range_{start,end} | ||
| 184 | * cannot block, mmu_notifier_ops.flags should have | ||
| 185 | * MMU_INVALIDATE_DOES_NOT_BLOCK set. | ||
| 168 | */ | 186 | */ |
| 169 | void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, | 187 | void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, |
| 170 | unsigned long start, unsigned long end); | 188 | unsigned long start, unsigned long end); |
| @@ -218,6 +236,7 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, | |||
| 218 | bool only_end); | 236 | bool only_end); |
| 219 | extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, | 237 | extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, |
| 220 | unsigned long start, unsigned long end); | 238 | unsigned long start, unsigned long end); |
| 239 | extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm); | ||
| 221 | 240 | ||
| 222 | static inline void mmu_notifier_release(struct mm_struct *mm) | 241 | static inline void mmu_notifier_release(struct mm_struct *mm) |
| 223 | { | 242 | { |
| @@ -457,6 +476,11 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, | |||
| 457 | { | 476 | { |
| 458 | } | 477 | } |
| 459 | 478 | ||
| 479 | static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm) | ||
| 480 | { | ||
| 481 | return false; | ||
| 482 | } | ||
| 483 | |||
| 460 | static inline void mmu_notifier_mm_init(struct mm_struct *mm) | 484 | static inline void mmu_notifier_mm_init(struct mm_struct *mm) |
| 461 | { | 485 | { |
| 462 | } | 486 | } |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 67f2e3c38939..7522a6987595 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -1166,8 +1166,16 @@ extern unsigned long usemap_size(void); | |||
| 1166 | 1166 | ||
| 1167 | /* | 1167 | /* |
| 1168 | * We use the lower bits of the mem_map pointer to store | 1168 | * We use the lower bits of the mem_map pointer to store |
| 1169 | * a little bit of information. There should be at least | 1169 | * a little bit of information. The pointer is calculated |
| 1170 | * 3 bits here due to 32-bit alignment. | 1170 | * as mem_map - section_nr_to_pfn(pnum). The result is |
| 1171 | * aligned to the minimum alignment of the two values: | ||
| 1172 | * 1. All mem_map arrays are page-aligned. | ||
| 1173 | * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT | ||
| 1174 | * lowest bits. PFN_SECTION_SHIFT is arch-specific | ||
| 1175 | * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the | ||
| 1176 | * worst combination is powerpc with 256k pages, | ||
| 1177 | * which results in PFN_SECTION_SHIFT equal 6. | ||
| 1178 | * To sum it up, at least 6 bits are available. | ||
| 1171 | */ | 1179 | */ |
| 1172 | #define SECTION_MARKED_PRESENT (1UL<<0) | 1180 | #define SECTION_MARKED_PRESENT (1UL<<0) |
| 1173 | #define SECTION_HAS_MEM_MAP (1UL<<1) | 1181 | #define SECTION_HAS_MEM_MAP (1UL<<1) |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3ec44e27aa9d..50c2b8786831 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
| @@ -46,11 +46,6 @@ | |||
| 46 | * guarantees that this bit is cleared for a page when it first is entered into | 46 | * guarantees that this bit is cleared for a page when it first is entered into |
| 47 | * the page cache. | 47 | * the page cache. |
| 48 | * | 48 | * |
| 49 | * PG_highmem pages are not permanently mapped into the kernel virtual address | ||
| 50 | * space, they need to be kmapped separately for doing IO on the pages. The | ||
| 51 | * struct page (these bits with information) are always mapped into kernel | ||
| 52 | * address space... | ||
| 53 | * | ||
| 54 | * PG_hwpoison indicates that a page got corrupted in hardware and contains | 49 | * PG_hwpoison indicates that a page got corrupted in hardware and contains |
| 55 | * data with incorrect ECC bits that triggered a machine check. Accessing is | 50 | * data with incorrect ECC bits that triggered a machine check. Accessing is |
| 56 | * not safe since it may cause another machine check. Don't touch! | 51 | * not safe since it may cause another machine check. Don't touch! |
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 5fb6580f7f23..6dc456ac6136 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h | |||
| @@ -9,14 +9,14 @@ | |||
| 9 | #ifndef _LINUX_PAGEVEC_H | 9 | #ifndef _LINUX_PAGEVEC_H |
| 10 | #define _LINUX_PAGEVEC_H | 10 | #define _LINUX_PAGEVEC_H |
| 11 | 11 | ||
| 12 | /* 14 pointers + two long's align the pagevec structure to a power of two */ | 12 | /* 15 pointers + header align the pagevec structure to a power of two */ |
| 13 | #define PAGEVEC_SIZE 14 | 13 | #define PAGEVEC_SIZE 15 |
| 14 | 14 | ||
| 15 | struct page; | 15 | struct page; |
| 16 | struct address_space; | 16 | struct address_space; |
| 17 | 17 | ||
| 18 | struct pagevec { | 18 | struct pagevec { |
| 19 | unsigned long nr; | 19 | unsigned char nr; |
| 20 | bool percpu_pvec_drained; | 20 | bool percpu_pvec_drained; |
| 21 | struct page *pages[PAGEVEC_SIZE]; | 21 | struct page *pages[PAGEVEC_SIZE]; |
| 22 | }; | 22 | }; |
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3d49b91b674d..bd422561a75e 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | /* | 11 | /* |
| 12 | * Routines for handling mm_structs | 12 | * Routines for handling mm_structs |
| 13 | */ | 13 | */ |
| 14 | extern struct mm_struct * mm_alloc(void); | 14 | extern struct mm_struct *mm_alloc(void); |
| 15 | 15 | ||
| 16 | /** | 16 | /** |
| 17 | * mmgrab() - Pin a &struct mm_struct. | 17 | * mmgrab() - Pin a &struct mm_struct. |
| @@ -35,27 +35,7 @@ static inline void mmgrab(struct mm_struct *mm) | |||
| 35 | atomic_inc(&mm->mm_count); | 35 | atomic_inc(&mm->mm_count); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | /* mmdrop drops the mm and the page tables */ | 38 | extern void mmdrop(struct mm_struct *mm); |
| 39 | extern void __mmdrop(struct mm_struct *); | ||
| 40 | static inline void mmdrop(struct mm_struct *mm) | ||
| 41 | { | ||
| 42 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) | ||
| 43 | __mmdrop(mm); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void mmdrop_async_fn(struct work_struct *work) | ||
| 47 | { | ||
| 48 | struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); | ||
| 49 | __mmdrop(mm); | ||
| 50 | } | ||
| 51 | |||
| 52 | static inline void mmdrop_async(struct mm_struct *mm) | ||
| 53 | { | ||
| 54 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) { | ||
| 55 | INIT_WORK(&mm->async_put_work, mmdrop_async_fn); | ||
| 56 | schedule_work(&mm->async_put_work); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | 39 | ||
| 60 | /** | 40 | /** |
| 61 | * mmget() - Pin the address space associated with a &struct mm_struct. | 41 | * mmget() - Pin the address space associated with a &struct mm_struct. |
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 06b295bec00d..73b5e655a76e 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h | |||
| @@ -112,13 +112,11 @@ extern void shmem_uncharge(struct inode *inode, long pages); | |||
| 112 | 112 | ||
| 113 | #ifdef CONFIG_TMPFS | 113 | #ifdef CONFIG_TMPFS |
| 114 | 114 | ||
| 115 | extern int shmem_add_seals(struct file *file, unsigned int seals); | 115 | extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg); |
| 116 | extern int shmem_get_seals(struct file *file); | ||
| 117 | extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg); | ||
| 118 | 116 | ||
| 119 | #else | 117 | #else |
| 120 | 118 | ||
| 121 | static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a) | 119 | static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a) |
| 122 | { | 120 | { |
| 123 | return -EINVAL; | 121 | return -EINVAL; |
| 124 | } | 122 | } |
diff --git a/include/linux/swap.h b/include/linux/swap.h index c2b8128799c1..7b6a59f722a3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
| @@ -332,7 +332,6 @@ extern void mark_page_accessed(struct page *); | |||
| 332 | extern void lru_add_drain(void); | 332 | extern void lru_add_drain(void); |
| 333 | extern void lru_add_drain_cpu(int cpu); | 333 | extern void lru_add_drain_cpu(int cpu); |
| 334 | extern void lru_add_drain_all(void); | 334 | extern void lru_add_drain_all(void); |
| 335 | extern void lru_add_drain_all_cpuslocked(void); | ||
| 336 | extern void rotate_reclaimable_page(struct page *page); | 335 | extern void rotate_reclaimable_page(struct page *page); |
| 337 | extern void deactivate_file_page(struct page *page); | 336 | extern void deactivate_file_page(struct page *page); |
| 338 | extern void mark_page_lazyfree(struct page *page); | 337 | extern void mark_page_lazyfree(struct page *page); |
| @@ -345,7 +344,6 @@ extern void lru_cache_add_active_or_unevictable(struct page *page, | |||
| 345 | 344 | ||
| 346 | /* linux/mm/vmscan.c */ | 345 | /* linux/mm/vmscan.c */ |
| 347 | extern unsigned long zone_reclaimable_pages(struct zone *zone); | 346 | extern unsigned long zone_reclaimable_pages(struct zone *zone); |
| 348 | extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat); | ||
| 349 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 347 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
| 350 | gfp_t gfp_mask, nodemask_t *mask); | 348 | gfp_t gfp_mask, nodemask_t *mask); |
| 351 | extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); | 349 | extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 1779c9817b39..a4c2317d8b9f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
| @@ -216,23 +216,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, | |||
| 216 | return x; | 216 | return x; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat, | ||
| 220 | enum node_stat_item item) | ||
| 221 | { | ||
| 222 | long x = atomic_long_read(&pgdat->vm_stat[item]); | ||
| 223 | |||
| 224 | #ifdef CONFIG_SMP | ||
| 225 | int cpu; | ||
| 226 | for_each_online_cpu(cpu) | ||
| 227 | x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item]; | ||
| 228 | |||
| 229 | if (x < 0) | ||
| 230 | x = 0; | ||
| 231 | #endif | ||
| 232 | return x; | ||
| 233 | } | ||
| 234 | |||
| 235 | |||
| 236 | #ifdef CONFIG_NUMA | 219 | #ifdef CONFIG_NUMA |
| 237 | extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); | 220 | extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); |
| 238 | extern unsigned long sum_zone_node_page_state(int node, | 221 | extern unsigned long sum_zone_node_page_state(int node, |
diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 004ba807df96..7238865e75b0 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h | |||
| @@ -108,4 +108,6 @@ void zpool_register_driver(struct zpool_driver *driver); | |||
| 108 | 108 | ||
| 109 | int zpool_unregister_driver(struct zpool_driver *driver); | 109 | int zpool_unregister_driver(struct zpool_driver *driver); |
| 110 | 110 | ||
| 111 | bool zpool_evictable(struct zpool *pool); | ||
| 112 | |||
| 111 | #endif | 113 | #endif |
