aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h150
1 files changed, 110 insertions, 40 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dd5ea3016fc4..47a93928b90f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
138#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 138#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
139#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ 139#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
140#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 140#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
141#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
142#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ 141#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
143#define VM_ARCH_2 0x02000000 142#define VM_ARCH_2 0x02000000
144#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ 143#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
@@ -206,27 +205,26 @@ extern unsigned int kobjsize(const void *objp);
206extern pgprot_t protection_map[16]; 205extern pgprot_t protection_map[16];
207 206
208#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ 207#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
209#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ 208#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
210#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ 209#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
211#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ 210#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
212#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ 211#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
213#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ 212#define FAULT_FLAG_TRIED 0x20 /* Second try */
214#define FAULT_FLAG_TRIED 0x40 /* second try */ 213#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
215#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
216 214
217/* 215/*
218 * vm_fault is filled by the the pagefault handler and passed to the vma's 216 * vm_fault is filled by the the pagefault handler and passed to the vma's
219 * ->fault function. The vma's ->fault is responsible for returning a bitmask 217 * ->fault function. The vma's ->fault is responsible for returning a bitmask
220 * of VM_FAULT_xxx flags that give details about how the fault was handled. 218 * of VM_FAULT_xxx flags that give details about how the fault was handled.
221 * 219 *
222 * pgoff should be used in favour of virtual_address, if possible. If pgoff 220 * pgoff should be used in favour of virtual_address, if possible.
223 * is used, one may implement ->remap_pages to get nonlinear mapping support.
224 */ 221 */
225struct vm_fault { 222struct vm_fault {
226 unsigned int flags; /* FAULT_FLAG_xxx flags */ 223 unsigned int flags; /* FAULT_FLAG_xxx flags */
227 pgoff_t pgoff; /* Logical page offset based on vma */ 224 pgoff_t pgoff; /* Logical page offset based on vma */
228 void __user *virtual_address; /* Faulting virtual address */ 225 void __user *virtual_address; /* Faulting virtual address */
229 226
227 struct page *cow_page; /* Handler may choose to COW */
230 struct page *page; /* ->fault handlers should return a 228 struct page *page; /* ->fault handlers should return a
231 * page here, unless VM_FAULT_NOPAGE 229 * page here, unless VM_FAULT_NOPAGE
232 * is set (which is also implied by 230 * is set (which is also implied by
@@ -287,9 +285,13 @@ struct vm_operations_struct {
287 struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 285 struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
288 unsigned long addr); 286 unsigned long addr);
289#endif 287#endif
290 /* called by sys_remap_file_pages() to populate non-linear mapping */ 288 /*
291 int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, 289 * Called by vm_normal_page() for special PTEs to find the
292 unsigned long size, pgoff_t pgoff); 290 * page for @addr. This is useful if the default behavior
291 * (using pte_page()) would not find the correct page.
292 */
293 struct page *(*find_special_page)(struct vm_area_struct *vma,
294 unsigned long addr);
293}; 295};
294 296
295struct mmu_gather; 297struct mmu_gather;
@@ -446,6 +448,12 @@ static inline struct page *compound_head_by_tail(struct page *tail)
446 return tail; 448 return tail;
447} 449}
448 450
451/*
452 * Since either compound page could be dismantled asynchronously in THP
453 * or we access asynchronously arbitrary positioned struct page, there
454 * would be tail flag race. To handle this race, we should call
455 * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
456 */
449static inline struct page *compound_head(struct page *page) 457static inline struct page *compound_head(struct page *page)
450{ 458{
451 if (unlikely(PageTail(page))) 459 if (unlikely(PageTail(page)))
@@ -454,6 +462,18 @@ static inline struct page *compound_head(struct page *page)
454} 462}
455 463
456/* 464/*
465 * If we access compound page synchronously such as access to
466 * allocated page, there is no need to handle tail flag race, so we can
467 * check tail flag directly without any synchronization primitive.
468 */
469static inline struct page *compound_head_fast(struct page *page)
470{
471 if (unlikely(PageTail(page)))
472 return page->first_page;
473 return page;
474}
475
476/*
457 * The atomic page->_mapcount, starts from -1: so that transitions 477 * The atomic page->_mapcount, starts from -1: so that transitions
458 * both from it and to it can be tracked, using atomic_inc_and_test 478 * both from it and to it can be tracked, using atomic_inc_and_test
459 * and atomic_add_negative(-1). 479 * and atomic_add_negative(-1).
@@ -465,7 +485,8 @@ static inline void page_mapcount_reset(struct page *page)
465 485
466static inline int page_mapcount(struct page *page) 486static inline int page_mapcount(struct page *page)
467{ 487{
468 return atomic_read(&(page)->_mapcount) + 1; 488 VM_BUG_ON_PAGE(PageSlab(page), page);
489 return atomic_read(&page->_mapcount) + 1;
469} 490}
470 491
471static inline int page_count(struct page *page) 492static inline int page_count(struct page *page)
@@ -531,7 +552,14 @@ static inline void get_page(struct page *page)
531static inline struct page *virt_to_head_page(const void *x) 552static inline struct page *virt_to_head_page(const void *x)
532{ 553{
533 struct page *page = virt_to_page(x); 554 struct page *page = virt_to_page(x);
534 return compound_head(page); 555
556 /*
557 * We don't need to worry about synchronization of tail flag
558 * when we call virt_to_head_page() since it is only called for
559 * already allocated page and this page won't be freed until
560 * this virt_to_head_page() is finished. So use _fast variant.
561 */
562 return compound_head_fast(page);
535} 563}
536 564
537/* 565/*
@@ -601,29 +629,28 @@ int split_free_page(struct page *page);
601 * prototype for that function and accessor functions. 629 * prototype for that function and accessor functions.
602 * These are _only_ valid on the head of a PG_compound page. 630 * These are _only_ valid on the head of a PG_compound page.
603 */ 631 */
604typedef void compound_page_dtor(struct page *);
605 632
606static inline void set_compound_page_dtor(struct page *page, 633static inline void set_compound_page_dtor(struct page *page,
607 compound_page_dtor *dtor) 634 compound_page_dtor *dtor)
608{ 635{
609 page[1].lru.next = (void *)dtor; 636 page[1].compound_dtor = dtor;
610} 637}
611 638
612static inline compound_page_dtor *get_compound_page_dtor(struct page *page) 639static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
613{ 640{
614 return (compound_page_dtor *)page[1].lru.next; 641 return page[1].compound_dtor;
615} 642}
616 643
617static inline int compound_order(struct page *page) 644static inline int compound_order(struct page *page)
618{ 645{
619 if (!PageHead(page)) 646 if (!PageHead(page))
620 return 0; 647 return 0;
621 return (unsigned long)page[1].lru.prev; 648 return page[1].compound_order;
622} 649}
623 650
624static inline void set_compound_order(struct page *page, unsigned long order) 651static inline void set_compound_order(struct page *page, unsigned long order)
625{ 652{
626 page[1].lru.prev = (void *)order; 653 page[1].compound_order = order;
627} 654}
628 655
629#ifdef CONFIG_MMU 656#ifdef CONFIG_MMU
@@ -1121,7 +1148,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
1121 * Parameter block passed down to zap_pte_range in exceptional cases. 1148 * Parameter block passed down to zap_pte_range in exceptional cases.
1122 */ 1149 */
1123struct zap_details { 1150struct zap_details {
1124 struct vm_area_struct *nonlinear_vma; /* Check page->index if set */
1125 struct address_space *check_mapping; /* Check page->mapping if set */ 1151 struct address_space *check_mapping; /* Check page->mapping if set */
1126 pgoff_t first_index; /* Lowest page->index to unmap */ 1152 pgoff_t first_index; /* Lowest page->index to unmap */
1127 pgoff_t last_index; /* Highest page->index to unmap */ 1153 pgoff_t last_index; /* Highest page->index to unmap */
@@ -1139,8 +1165,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
1139 1165
1140/** 1166/**
1141 * mm_walk - callbacks for walk_page_range 1167 * mm_walk - callbacks for walk_page_range
1142 * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
1143 * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
1144 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry 1168 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
1145 * this handler is required to be able to handle 1169 * this handler is required to be able to handle
1146 * pmd_trans_huge() pmds. They may simply choose to 1170 * pmd_trans_huge() pmds. They may simply choose to
@@ -1148,16 +1172,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
1148 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry 1172 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
1149 * @pte_hole: if set, called for each hole at all levels 1173 * @pte_hole: if set, called for each hole at all levels
1150 * @hugetlb_entry: if set, called for each hugetlb entry 1174 * @hugetlb_entry: if set, called for each hugetlb entry
1151 * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry 1175 * @test_walk: caller specific callback function to determine whether
1152 * is used. 1176 * we walk over the current vma or not. A positive returned
1177 * value means "do page table walk over the current vma,"
1178 * and a negative one means "abort current page table walk
1179 * right now." 0 means "skip the current vma."
1180 * @mm: mm_struct representing the target process of page table walk
1181 * @vma: vma currently walked (NULL if walking outside vmas)
1182 * @private: private data for callbacks' usage
1153 * 1183 *
1154 * (see walk_page_range for more details) 1184 * (see the comment on walk_page_range() for more details)
1155 */ 1185 */
1156struct mm_walk { 1186struct mm_walk {
1157 int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
1158 unsigned long next, struct mm_walk *walk);
1159 int (*pud_entry)(pud_t *pud, unsigned long addr,
1160 unsigned long next, struct mm_walk *walk);
1161 int (*pmd_entry)(pmd_t *pmd, unsigned long addr, 1187 int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
1162 unsigned long next, struct mm_walk *walk); 1188 unsigned long next, struct mm_walk *walk);
1163 int (*pte_entry)(pte_t *pte, unsigned long addr, 1189 int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1167,12 +1193,16 @@ struct mm_walk {
1167 int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, 1193 int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
1168 unsigned long addr, unsigned long next, 1194 unsigned long addr, unsigned long next,
1169 struct mm_walk *walk); 1195 struct mm_walk *walk);
1196 int (*test_walk)(unsigned long addr, unsigned long next,
1197 struct mm_walk *walk);
1170 struct mm_struct *mm; 1198 struct mm_struct *mm;
1199 struct vm_area_struct *vma;
1171 void *private; 1200 void *private;
1172}; 1201};
1173 1202
1174int walk_page_range(unsigned long addr, unsigned long end, 1203int walk_page_range(unsigned long addr, unsigned long end,
1175 struct mm_walk *walk); 1204 struct mm_walk *walk);
1205int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
1176void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, 1206void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
1177 unsigned long end, unsigned long floor, unsigned long ceiling); 1207 unsigned long end, unsigned long floor, unsigned long ceiling);
1178int copy_page_range(struct mm_struct *dst, struct mm_struct *src, 1208int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
@@ -1236,6 +1266,17 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1236 unsigned long start, unsigned long nr_pages, 1266 unsigned long start, unsigned long nr_pages,
1237 int write, int force, struct page **pages, 1267 int write, int force, struct page **pages,
1238 struct vm_area_struct **vmas); 1268 struct vm_area_struct **vmas);
1269long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
1270 unsigned long start, unsigned long nr_pages,
1271 int write, int force, struct page **pages,
1272 int *locked);
1273long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
1274 unsigned long start, unsigned long nr_pages,
1275 int write, int force, struct page **pages,
1276 unsigned int gup_flags);
1277long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
1278 unsigned long start, unsigned long nr_pages,
1279 int write, int force, struct page **pages);
1239int get_user_pages_fast(unsigned long start, int nr_pages, int write, 1280int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1240 struct page **pages); 1281 struct page **pages);
1241struct kvec; 1282struct kvec;
@@ -1368,6 +1409,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
1368 mm->hiwater_vm = mm->total_vm; 1409 mm->hiwater_vm = mm->total_vm;
1369} 1410}
1370 1411
1412static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
1413{
1414 mm->hiwater_rss = get_mm_rss(mm);
1415}
1416
1371static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, 1417static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
1372 struct mm_struct *mm) 1418 struct mm_struct *mm)
1373{ 1419{
@@ -1407,14 +1453,45 @@ static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
1407int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); 1453int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
1408#endif 1454#endif
1409 1455
1410#ifdef __PAGETABLE_PMD_FOLDED 1456#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
1411static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 1457static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
1412 unsigned long address) 1458 unsigned long address)
1413{ 1459{
1414 return 0; 1460 return 0;
1415} 1461}
1462
1463static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
1464
1465static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
1466{
1467 return 0;
1468}
1469
1470static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
1471static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
1472
1416#else 1473#else
1417int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); 1474int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
1475
1476static inline void mm_nr_pmds_init(struct mm_struct *mm)
1477{
1478 atomic_long_set(&mm->nr_pmds, 0);
1479}
1480
1481static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
1482{
1483 return atomic_long_read(&mm->nr_pmds);
1484}
1485
1486static inline void mm_inc_nr_pmds(struct mm_struct *mm)
1487{
1488 atomic_long_inc(&mm->nr_pmds);
1489}
1490
1491static inline void mm_dec_nr_pmds(struct mm_struct *mm)
1492{
1493 atomic_long_dec(&mm->nr_pmds);
1494}
1418#endif 1495#endif
1419 1496
1420int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 1497int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1777,12 +1854,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
1777 for (vma = vma_interval_tree_iter_first(root, start, last); \ 1854 for (vma = vma_interval_tree_iter_first(root, start, last); \
1778 vma; vma = vma_interval_tree_iter_next(vma, start, last)) 1855 vma; vma = vma_interval_tree_iter_next(vma, start, last))
1779 1856
1780static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1781 struct list_head *list)
1782{
1783 list_add_tail(&vma->shared.nonlinear, list);
1784}
1785
1786void anon_vma_interval_tree_insert(struct anon_vma_chain *node, 1857void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
1787 struct rb_root *root); 1858 struct rb_root *root);
1788void anon_vma_interval_tree_remove(struct anon_vma_chain *node, 1859void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
@@ -2110,9 +2181,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
2110 void __user *, size_t *, loff_t *); 2181 void __user *, size_t *, loff_t *);
2111#endif 2182#endif
2112 2183
2113unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, 2184void drop_slab(void);
2114 unsigned long nr_scanned, 2185void drop_slab_node(int nid);
2115 unsigned long nr_eligible);
2116 2186
2117#ifndef CONFIG_MMU 2187#ifndef CONFIG_MMU
2118#define randomize_va_space 0 2188#define randomize_va_space 0