diff options
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r-- | include/linux/mm.h | 150 |
1 files changed, 110 insertions, 40 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index dd5ea3016fc4..47a93928b90f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp); | |||
138 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ | 138 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ |
139 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ | 139 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ |
140 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ | 140 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
141 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ | ||
142 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ | 141 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ |
143 | #define VM_ARCH_2 0x02000000 | 142 | #define VM_ARCH_2 0x02000000 |
144 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ | 143 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ |
@@ -206,27 +205,26 @@ extern unsigned int kobjsize(const void *objp); | |||
206 | extern pgprot_t protection_map[16]; | 205 | extern pgprot_t protection_map[16]; |
207 | 206 | ||
208 | #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ | 207 | #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ |
209 | #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ | 208 | #define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ |
210 | #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ | 209 | #define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ |
211 | #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ | 210 | #define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ |
212 | #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ | 211 | #define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ |
213 | #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ | 212 | #define FAULT_FLAG_TRIED 0x20 /* Second try */ |
214 | #define FAULT_FLAG_TRIED 0x40 /* second try */ | 213 | #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ |
215 | #define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ | ||
216 | 214 | ||
217 | /* | 215 | /* |
218 | * vm_fault is filled by the the pagefault handler and passed to the vma's | 216 | * vm_fault is filled by the the pagefault handler and passed to the vma's |
219 | * ->fault function. The vma's ->fault is responsible for returning a bitmask | 217 | * ->fault function. The vma's ->fault is responsible for returning a bitmask |
220 | * of VM_FAULT_xxx flags that give details about how the fault was handled. | 218 | * of VM_FAULT_xxx flags that give details about how the fault was handled. |
221 | * | 219 | * |
222 | * pgoff should be used in favour of virtual_address, if possible. If pgoff | 220 | * pgoff should be used in favour of virtual_address, if possible. |
223 | * is used, one may implement ->remap_pages to get nonlinear mapping support. | ||
224 | */ | 221 | */ |
225 | struct vm_fault { | 222 | struct vm_fault { |
226 | unsigned int flags; /* FAULT_FLAG_xxx flags */ | 223 | unsigned int flags; /* FAULT_FLAG_xxx flags */ |
227 | pgoff_t pgoff; /* Logical page offset based on vma */ | 224 | pgoff_t pgoff; /* Logical page offset based on vma */ |
228 | void __user *virtual_address; /* Faulting virtual address */ | 225 | void __user *virtual_address; /* Faulting virtual address */ |
229 | 226 | ||
227 | struct page *cow_page; /* Handler may choose to COW */ | ||
230 | struct page *page; /* ->fault handlers should return a | 228 | struct page *page; /* ->fault handlers should return a |
231 | * page here, unless VM_FAULT_NOPAGE | 229 | * page here, unless VM_FAULT_NOPAGE |
232 | * is set (which is also implied by | 230 | * is set (which is also implied by |
@@ -287,9 +285,13 @@ struct vm_operations_struct { | |||
287 | struct mempolicy *(*get_policy)(struct vm_area_struct *vma, | 285 | struct mempolicy *(*get_policy)(struct vm_area_struct *vma, |
288 | unsigned long addr); | 286 | unsigned long addr); |
289 | #endif | 287 | #endif |
290 | /* called by sys_remap_file_pages() to populate non-linear mapping */ | 288 | /* |
291 | int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, | 289 | * Called by vm_normal_page() for special PTEs to find the |
292 | unsigned long size, pgoff_t pgoff); | 290 | * page for @addr. This is useful if the default behavior |
291 | * (using pte_page()) would not find the correct page. | ||
292 | */ | ||
293 | struct page *(*find_special_page)(struct vm_area_struct *vma, | ||
294 | unsigned long addr); | ||
293 | }; | 295 | }; |
294 | 296 | ||
295 | struct mmu_gather; | 297 | struct mmu_gather; |
@@ -446,6 +448,12 @@ static inline struct page *compound_head_by_tail(struct page *tail) | |||
446 | return tail; | 448 | return tail; |
447 | } | 449 | } |
448 | 450 | ||
451 | /* | ||
452 | * Since either compound page could be dismantled asynchronously in THP | ||
453 | * or we access asynchronously arbitrary positioned struct page, there | ||
454 | * would be tail flag race. To handle this race, we should call | ||
455 | * smp_rmb() before checking tail flag. compound_head_by_tail() did it. | ||
456 | */ | ||
449 | static inline struct page *compound_head(struct page *page) | 457 | static inline struct page *compound_head(struct page *page) |
450 | { | 458 | { |
451 | if (unlikely(PageTail(page))) | 459 | if (unlikely(PageTail(page))) |
@@ -454,6 +462,18 @@ static inline struct page *compound_head(struct page *page) | |||
454 | } | 462 | } |
455 | 463 | ||
456 | /* | 464 | /* |
465 | * If we access compound page synchronously such as access to | ||
466 | * allocated page, there is no need to handle tail flag race, so we can | ||
467 | * check tail flag directly without any synchronization primitive. | ||
468 | */ | ||
469 | static inline struct page *compound_head_fast(struct page *page) | ||
470 | { | ||
471 | if (unlikely(PageTail(page))) | ||
472 | return page->first_page; | ||
473 | return page; | ||
474 | } | ||
475 | |||
476 | /* | ||
457 | * The atomic page->_mapcount, starts from -1: so that transitions | 477 | * The atomic page->_mapcount, starts from -1: so that transitions |
458 | * both from it and to it can be tracked, using atomic_inc_and_test | 478 | * both from it and to it can be tracked, using atomic_inc_and_test |
459 | * and atomic_add_negative(-1). | 479 | * and atomic_add_negative(-1). |
@@ -465,7 +485,8 @@ static inline void page_mapcount_reset(struct page *page) | |||
465 | 485 | ||
466 | static inline int page_mapcount(struct page *page) | 486 | static inline int page_mapcount(struct page *page) |
467 | { | 487 | { |
468 | return atomic_read(&(page)->_mapcount) + 1; | 488 | VM_BUG_ON_PAGE(PageSlab(page), page); |
489 | return atomic_read(&page->_mapcount) + 1; | ||
469 | } | 490 | } |
470 | 491 | ||
471 | static inline int page_count(struct page *page) | 492 | static inline int page_count(struct page *page) |
@@ -531,7 +552,14 @@ static inline void get_page(struct page *page) | |||
531 | static inline struct page *virt_to_head_page(const void *x) | 552 | static inline struct page *virt_to_head_page(const void *x) |
532 | { | 553 | { |
533 | struct page *page = virt_to_page(x); | 554 | struct page *page = virt_to_page(x); |
534 | return compound_head(page); | 555 | |
556 | /* | ||
557 | * We don't need to worry about synchronization of tail flag | ||
558 | * when we call virt_to_head_page() since it is only called for | ||
559 | * already allocated page and this page won't be freed until | ||
560 | * this virt_to_head_page() is finished. So use _fast variant. | ||
561 | */ | ||
562 | return compound_head_fast(page); | ||
535 | } | 563 | } |
536 | 564 | ||
537 | /* | 565 | /* |
@@ -601,29 +629,28 @@ int split_free_page(struct page *page); | |||
601 | * prototype for that function and accessor functions. | 629 | * prototype for that function and accessor functions. |
602 | * These are _only_ valid on the head of a PG_compound page. | 630 | * These are _only_ valid on the head of a PG_compound page. |
603 | */ | 631 | */ |
604 | typedef void compound_page_dtor(struct page *); | ||
605 | 632 | ||
606 | static inline void set_compound_page_dtor(struct page *page, | 633 | static inline void set_compound_page_dtor(struct page *page, |
607 | compound_page_dtor *dtor) | 634 | compound_page_dtor *dtor) |
608 | { | 635 | { |
609 | page[1].lru.next = (void *)dtor; | 636 | page[1].compound_dtor = dtor; |
610 | } | 637 | } |
611 | 638 | ||
612 | static inline compound_page_dtor *get_compound_page_dtor(struct page *page) | 639 | static inline compound_page_dtor *get_compound_page_dtor(struct page *page) |
613 | { | 640 | { |
614 | return (compound_page_dtor *)page[1].lru.next; | 641 | return page[1].compound_dtor; |
615 | } | 642 | } |
616 | 643 | ||
617 | static inline int compound_order(struct page *page) | 644 | static inline int compound_order(struct page *page) |
618 | { | 645 | { |
619 | if (!PageHead(page)) | 646 | if (!PageHead(page)) |
620 | return 0; | 647 | return 0; |
621 | return (unsigned long)page[1].lru.prev; | 648 | return page[1].compound_order; |
622 | } | 649 | } |
623 | 650 | ||
624 | static inline void set_compound_order(struct page *page, unsigned long order) | 651 | static inline void set_compound_order(struct page *page, unsigned long order) |
625 | { | 652 | { |
626 | page[1].lru.prev = (void *)order; | 653 | page[1].compound_order = order; |
627 | } | 654 | } |
628 | 655 | ||
629 | #ifdef CONFIG_MMU | 656 | #ifdef CONFIG_MMU |
@@ -1121,7 +1148,6 @@ extern void user_shm_unlock(size_t, struct user_struct *); | |||
1121 | * Parameter block passed down to zap_pte_range in exceptional cases. | 1148 | * Parameter block passed down to zap_pte_range in exceptional cases. |
1122 | */ | 1149 | */ |
1123 | struct zap_details { | 1150 | struct zap_details { |
1124 | struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ | ||
1125 | struct address_space *check_mapping; /* Check page->mapping if set */ | 1151 | struct address_space *check_mapping; /* Check page->mapping if set */ |
1126 | pgoff_t first_index; /* Lowest page->index to unmap */ | 1152 | pgoff_t first_index; /* Lowest page->index to unmap */ |
1127 | pgoff_t last_index; /* Highest page->index to unmap */ | 1153 | pgoff_t last_index; /* Highest page->index to unmap */ |
@@ -1139,8 +1165,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
1139 | 1165 | ||
1140 | /** | 1166 | /** |
1141 | * mm_walk - callbacks for walk_page_range | 1167 | * mm_walk - callbacks for walk_page_range |
1142 | * @pgd_entry: if set, called for each non-empty PGD (top-level) entry | ||
1143 | * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry | ||
1144 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry | 1168 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry |
1145 | * this handler is required to be able to handle | 1169 | * this handler is required to be able to handle |
1146 | * pmd_trans_huge() pmds. They may simply choose to | 1170 | * pmd_trans_huge() pmds. They may simply choose to |
@@ -1148,16 +1172,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
1148 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry | 1172 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry |
1149 | * @pte_hole: if set, called for each hole at all levels | 1173 | * @pte_hole: if set, called for each hole at all levels |
1150 | * @hugetlb_entry: if set, called for each hugetlb entry | 1174 | * @hugetlb_entry: if set, called for each hugetlb entry |
1151 | * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry | 1175 | * @test_walk: caller specific callback function to determine whether |
1152 | * is used. | 1176 | * we walk over the current vma or not. A positive returned |
1177 | * value means "do page table walk over the current vma," | ||
1178 | * and a negative one means "abort current page table walk | ||
1179 | * right now." 0 means "skip the current vma." | ||
1180 | * @mm: mm_struct representing the target process of page table walk | ||
1181 | * @vma: vma currently walked (NULL if walking outside vmas) | ||
1182 | * @private: private data for callbacks' usage | ||
1153 | * | 1183 | * |
1154 | * (see walk_page_range for more details) | 1184 | * (see the comment on walk_page_range() for more details) |
1155 | */ | 1185 | */ |
1156 | struct mm_walk { | 1186 | struct mm_walk { |
1157 | int (*pgd_entry)(pgd_t *pgd, unsigned long addr, | ||
1158 | unsigned long next, struct mm_walk *walk); | ||
1159 | int (*pud_entry)(pud_t *pud, unsigned long addr, | ||
1160 | unsigned long next, struct mm_walk *walk); | ||
1161 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, | 1187 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
1162 | unsigned long next, struct mm_walk *walk); | 1188 | unsigned long next, struct mm_walk *walk); |
1163 | int (*pte_entry)(pte_t *pte, unsigned long addr, | 1189 | int (*pte_entry)(pte_t *pte, unsigned long addr, |
@@ -1167,12 +1193,16 @@ struct mm_walk { | |||
1167 | int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, | 1193 | int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, |
1168 | unsigned long addr, unsigned long next, | 1194 | unsigned long addr, unsigned long next, |
1169 | struct mm_walk *walk); | 1195 | struct mm_walk *walk); |
1196 | int (*test_walk)(unsigned long addr, unsigned long next, | ||
1197 | struct mm_walk *walk); | ||
1170 | struct mm_struct *mm; | 1198 | struct mm_struct *mm; |
1199 | struct vm_area_struct *vma; | ||
1171 | void *private; | 1200 | void *private; |
1172 | }; | 1201 | }; |
1173 | 1202 | ||
1174 | int walk_page_range(unsigned long addr, unsigned long end, | 1203 | int walk_page_range(unsigned long addr, unsigned long end, |
1175 | struct mm_walk *walk); | 1204 | struct mm_walk *walk); |
1205 | int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); | ||
1176 | void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, | 1206 | void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, |
1177 | unsigned long end, unsigned long floor, unsigned long ceiling); | 1207 | unsigned long end, unsigned long floor, unsigned long ceiling); |
1178 | int copy_page_range(struct mm_struct *dst, struct mm_struct *src, | 1208 | int copy_page_range(struct mm_struct *dst, struct mm_struct *src, |
@@ -1236,6 +1266,17 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1236 | unsigned long start, unsigned long nr_pages, | 1266 | unsigned long start, unsigned long nr_pages, |
1237 | int write, int force, struct page **pages, | 1267 | int write, int force, struct page **pages, |
1238 | struct vm_area_struct **vmas); | 1268 | struct vm_area_struct **vmas); |
1269 | long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, | ||
1270 | unsigned long start, unsigned long nr_pages, | ||
1271 | int write, int force, struct page **pages, | ||
1272 | int *locked); | ||
1273 | long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, | ||
1274 | unsigned long start, unsigned long nr_pages, | ||
1275 | int write, int force, struct page **pages, | ||
1276 | unsigned int gup_flags); | ||
1277 | long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, | ||
1278 | unsigned long start, unsigned long nr_pages, | ||
1279 | int write, int force, struct page **pages); | ||
1239 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | 1280 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, |
1240 | struct page **pages); | 1281 | struct page **pages); |
1241 | struct kvec; | 1282 | struct kvec; |
@@ -1368,6 +1409,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm) | |||
1368 | mm->hiwater_vm = mm->total_vm; | 1409 | mm->hiwater_vm = mm->total_vm; |
1369 | } | 1410 | } |
1370 | 1411 | ||
1412 | static inline void reset_mm_hiwater_rss(struct mm_struct *mm) | ||
1413 | { | ||
1414 | mm->hiwater_rss = get_mm_rss(mm); | ||
1415 | } | ||
1416 | |||
1371 | static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, | 1417 | static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, |
1372 | struct mm_struct *mm) | 1418 | struct mm_struct *mm) |
1373 | { | 1419 | { |
@@ -1407,14 +1453,45 @@ static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, | |||
1407 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); | 1453 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); |
1408 | #endif | 1454 | #endif |
1409 | 1455 | ||
1410 | #ifdef __PAGETABLE_PMD_FOLDED | 1456 | #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) |
1411 | static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, | 1457 | static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, |
1412 | unsigned long address) | 1458 | unsigned long address) |
1413 | { | 1459 | { |
1414 | return 0; | 1460 | return 0; |
1415 | } | 1461 | } |
1462 | |||
1463 | static inline void mm_nr_pmds_init(struct mm_struct *mm) {} | ||
1464 | |||
1465 | static inline unsigned long mm_nr_pmds(struct mm_struct *mm) | ||
1466 | { | ||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | static inline void mm_inc_nr_pmds(struct mm_struct *mm) {} | ||
1471 | static inline void mm_dec_nr_pmds(struct mm_struct *mm) {} | ||
1472 | |||
1416 | #else | 1473 | #else |
1417 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); | 1474 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); |
1475 | |||
1476 | static inline void mm_nr_pmds_init(struct mm_struct *mm) | ||
1477 | { | ||
1478 | atomic_long_set(&mm->nr_pmds, 0); | ||
1479 | } | ||
1480 | |||
1481 | static inline unsigned long mm_nr_pmds(struct mm_struct *mm) | ||
1482 | { | ||
1483 | return atomic_long_read(&mm->nr_pmds); | ||
1484 | } | ||
1485 | |||
1486 | static inline void mm_inc_nr_pmds(struct mm_struct *mm) | ||
1487 | { | ||
1488 | atomic_long_inc(&mm->nr_pmds); | ||
1489 | } | ||
1490 | |||
1491 | static inline void mm_dec_nr_pmds(struct mm_struct *mm) | ||
1492 | { | ||
1493 | atomic_long_dec(&mm->nr_pmds); | ||
1494 | } | ||
1418 | #endif | 1495 | #endif |
1419 | 1496 | ||
1420 | int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, | 1497 | int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -1777,12 +1854,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, | |||
1777 | for (vma = vma_interval_tree_iter_first(root, start, last); \ | 1854 | for (vma = vma_interval_tree_iter_first(root, start, last); \ |
1778 | vma; vma = vma_interval_tree_iter_next(vma, start, last)) | 1855 | vma; vma = vma_interval_tree_iter_next(vma, start, last)) |
1779 | 1856 | ||
1780 | static inline void vma_nonlinear_insert(struct vm_area_struct *vma, | ||
1781 | struct list_head *list) | ||
1782 | { | ||
1783 | list_add_tail(&vma->shared.nonlinear, list); | ||
1784 | } | ||
1785 | |||
1786 | void anon_vma_interval_tree_insert(struct anon_vma_chain *node, | 1857 | void anon_vma_interval_tree_insert(struct anon_vma_chain *node, |
1787 | struct rb_root *root); | 1858 | struct rb_root *root); |
1788 | void anon_vma_interval_tree_remove(struct anon_vma_chain *node, | 1859 | void anon_vma_interval_tree_remove(struct anon_vma_chain *node, |
@@ -2110,9 +2181,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, | |||
2110 | void __user *, size_t *, loff_t *); | 2181 | void __user *, size_t *, loff_t *); |
2111 | #endif | 2182 | #endif |
2112 | 2183 | ||
2113 | unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | 2184 | void drop_slab(void); |
2114 | unsigned long nr_scanned, | 2185 | void drop_slab_node(int nid); |
2115 | unsigned long nr_eligible); | ||
2116 | 2186 | ||
2117 | #ifndef CONFIG_MMU | 2187 | #ifndef CONFIG_MMU |
2118 | #define randomize_va_space 0 | 2188 | #define randomize_va_space 0 |