aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mm.h
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /include/linux/mm.h
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h384
1 files changed, 269 insertions, 115 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74949fbef8c6..9670f71d7be9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
14#include <linux/mm_types.h> 14#include <linux/mm_types.h>
15#include <linux/range.h> 15#include <linux/range.h>
16#include <linux/pfn.h> 16#include <linux/pfn.h>
17#include <linux/bit_spinlock.h>
17 18
18struct mempolicy; 19struct mempolicy;
19struct anon_vma; 20struct anon_vma;
@@ -82,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
82#define VM_GROWSUP 0x00000200 83#define VM_GROWSUP 0x00000200
83#else 84#else
84#define VM_GROWSUP 0x00000000 85#define VM_GROWSUP 0x00000000
86#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
85#endif 87#endif
86#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ 88#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
87#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ 89#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
@@ -101,7 +103,11 @@ extern unsigned int kobjsize(const void *objp);
101#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ 103#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
102#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 104#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
103#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 105#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
106#ifndef CONFIG_TRANSPARENT_HUGEPAGE
104#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ 107#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
108#else
109#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */
110#endif
105#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ 111#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
106#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ 112#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
107 113
@@ -131,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
131#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) 137#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
132 138
133/* 139/*
134 * special vmas that are non-mergable, non-mlock()able 140 * Special vmas that are non-mergable, non-mlock()able.
141 * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
135 */ 142 */
136#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) 143#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
137 144
@@ -144,6 +151,9 @@ extern pgprot_t protection_map[16];
144#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ 151#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
145#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ 152#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
146#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ 153#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
154#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
155#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
156#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
147 157
148/* 158/*
149 * This interface is used by x86 PAT code to identify a pfn mapping that is 159 * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -155,12 +165,12 @@ extern pgprot_t protection_map[16];
155 */ 165 */
156static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) 166static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
157{ 167{
158 return (vma->vm_flags & VM_PFN_AT_MMAP); 168 return !!(vma->vm_flags & VM_PFN_AT_MMAP);
159} 169}
160 170
161static inline int is_pfn_mapping(struct vm_area_struct *vma) 171static inline int is_pfn_mapping(struct vm_area_struct *vma)
162{ 172{
163 return (vma->vm_flags & VM_PFNMAP); 173 return !!(vma->vm_flags & VM_PFNMAP);
164} 174}
165 175
166/* 176/*
@@ -241,6 +251,7 @@ struct inode;
241 * files which need it (119 of them) 251 * files which need it (119 of them)
242 */ 252 */
243#include <linux/page-flags.h> 253#include <linux/page-flags.h>
254#include <linux/huge_mm.h>
244 255
245/* 256/*
246 * Methods to modify the page usage count. 257 * Methods to modify the page usage count.
@@ -304,6 +315,39 @@ static inline int is_vmalloc_or_module_addr(const void *x)
304} 315}
305#endif 316#endif
306 317
318static inline void compound_lock(struct page *page)
319{
320#ifdef CONFIG_TRANSPARENT_HUGEPAGE
321 bit_spin_lock(PG_compound_lock, &page->flags);
322#endif
323}
324
325static inline void compound_unlock(struct page *page)
326{
327#ifdef CONFIG_TRANSPARENT_HUGEPAGE
328 bit_spin_unlock(PG_compound_lock, &page->flags);
329#endif
330}
331
332static inline unsigned long compound_lock_irqsave(struct page *page)
333{
334 unsigned long uninitialized_var(flags);
335#ifdef CONFIG_TRANSPARENT_HUGEPAGE
336 local_irq_save(flags);
337 compound_lock(page);
338#endif
339 return flags;
340}
341
342static inline void compound_unlock_irqrestore(struct page *page,
343 unsigned long flags)
344{
345#ifdef CONFIG_TRANSPARENT_HUGEPAGE
346 compound_unlock(page);
347 local_irq_restore(flags);
348#endif
349}
350
307static inline struct page *compound_head(struct page *page) 351static inline struct page *compound_head(struct page *page)
308{ 352{
309 if (unlikely(PageTail(page))) 353 if (unlikely(PageTail(page)))
@@ -318,9 +362,29 @@ static inline int page_count(struct page *page)
318 362
319static inline void get_page(struct page *page) 363static inline void get_page(struct page *page)
320{ 364{
321 page = compound_head(page); 365 /*
322 VM_BUG_ON(atomic_read(&page->_count) == 0); 366 * Getting a normal page or the head of a compound page
367 * requires to already have an elevated page->_count. Only if
368 * we're getting a tail page, the elevated page->_count is
369 * required only in the head page, so for tail pages the
370 * bugcheck only verifies that the page->_count isn't
371 * negative.
372 */
373 VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
323 atomic_inc(&page->_count); 374 atomic_inc(&page->_count);
375 /*
376 * Getting a tail page will elevate both the head and tail
377 * page->_count(s).
378 */
379 if (unlikely(PageTail(page))) {
380 /*
381 * This is safe only because
382 * __split_huge_page_refcount can't run under
383 * get_page().
384 */
385 VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
386 atomic_inc(&page->first_page->_count);
387 }
324} 388}
325 389
326static inline struct page *virt_to_head_page(const void *x) 390static inline struct page *virt_to_head_page(const void *x)
@@ -338,6 +402,34 @@ static inline void init_page_count(struct page *page)
338 atomic_set(&page->_count, 1); 402 atomic_set(&page->_count, 1);
339} 403}
340 404
405/*
406 * PageBuddy() indicate that the page is free and in the buddy system
407 * (see mm/page_alloc.c).
408 *
409 * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to
410 * -2 so that an underflow of the page_mapcount() won't be mistaken
411 * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very
412 * efficiently by most CPU architectures.
413 */
414#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
415
416static inline int PageBuddy(struct page *page)
417{
418 return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
419}
420
421static inline void __SetPageBuddy(struct page *page)
422{
423 VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
424 atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
425}
426
427static inline void __ClearPageBuddy(struct page *page)
428{
429 VM_BUG_ON(!PageBuddy(page));
430 atomic_set(&page->_mapcount, -1);
431}
432
341void put_page(struct page *page); 433void put_page(struct page *page);
342void put_pages_list(struct list_head *pages); 434void put_pages_list(struct list_head *pages);
343 435
@@ -369,11 +461,40 @@ static inline int compound_order(struct page *page)
369 return (unsigned long)page[1].lru.prev; 461 return (unsigned long)page[1].lru.prev;
370} 462}
371 463
464static inline int compound_trans_order(struct page *page)
465{
466 int order;
467 unsigned long flags;
468
469 if (!PageHead(page))
470 return 0;
471
472 flags = compound_lock_irqsave(page);
473 order = compound_order(page);
474 compound_unlock_irqrestore(page, flags);
475 return order;
476}
477
372static inline void set_compound_order(struct page *page, unsigned long order) 478static inline void set_compound_order(struct page *page, unsigned long order)
373{ 479{
374 page[1].lru.prev = (void *)order; 480 page[1].lru.prev = (void *)order;
375} 481}
376 482
483#ifdef CONFIG_MMU
484/*
485 * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
486 * servicing faults for write access. In the normal case, do always want
487 * pte_mkwrite. But get_user_pages can cause write faults for mappings
488 * that do not have writing enabled, when used by access_process_vm.
489 */
490static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
491{
492 if (likely(vma->vm_flags & VM_WRITE))
493 pte = pte_mkwrite(pte);
494 return pte;
495}
496#endif
497
377/* 498/*
378 * Multiple processes may "see" the same page. E.g. for untouched 499 * Multiple processes may "see" the same page. E.g. for untouched
379 * mappings of /dev/null, all processes see the same page full of 500 * mappings of /dev/null, all processes see the same page full of
@@ -484,12 +605,8 @@ static inline void set_compound_order(struct page *page, unsigned long order)
484#define NODE_NOT_IN_PAGE_FLAGS 605#define NODE_NOT_IN_PAGE_FLAGS
485#endif 606#endif
486 607
487#ifndef PFN_SECTION_SHIFT
488#define PFN_SECTION_SHIFT 0
489#endif
490
491/* 608/*
492 * Define the bit shifts to access each section. For non-existant 609 * Define the bit shifts to access each section. For non-existent
493 * sections we define the shift as 0; that plus a 0 mask ensures 610 * sections we define the shift as 0; that plus a 0 mask ensures
494 * the compiler will optimise away reference to them. 611 * the compiler will optimise away reference to them.
495 */ 612 */
@@ -497,8 +614,8 @@ static inline void set_compound_order(struct page *page, unsigned long order)
497#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) 614#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
498#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) 615#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
499 616
500/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */ 617/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
501#ifdef NODE_NOT_IN_PAGEFLAGS 618#ifdef NODE_NOT_IN_PAGE_FLAGS
502#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) 619#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
503#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ 620#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \
504 SECTIONS_PGOFF : ZONES_PGOFF) 621 SECTIONS_PGOFF : ZONES_PGOFF)
@@ -561,6 +678,12 @@ static inline struct zone *page_zone(struct page *page)
561} 678}
562 679
563#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) 680#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
681static inline void set_page_section(struct page *page, unsigned long section)
682{
683 page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
684 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
685}
686
564static inline unsigned long page_to_section(struct page *page) 687static inline unsigned long page_to_section(struct page *page)
565{ 688{
566 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; 689 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
@@ -579,18 +702,14 @@ static inline void set_page_node(struct page *page, unsigned long node)
579 page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; 702 page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
580} 703}
581 704
582static inline void set_page_section(struct page *page, unsigned long section)
583{
584 page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
585 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
586}
587
588static inline void set_page_links(struct page *page, enum zone_type zone, 705static inline void set_page_links(struct page *page, enum zone_type zone,
589 unsigned long node, unsigned long pfn) 706 unsigned long node, unsigned long pfn)
590{ 707{
591 set_page_zone(page, zone); 708 set_page_zone(page, zone);
592 set_page_node(page, node); 709 set_page_node(page, node);
710#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
593 set_page_section(page, pfn_to_section_nr(pfn)); 711 set_page_section(page, pfn_to_section_nr(pfn));
712#endif
594} 713}
595 714
596/* 715/*
@@ -656,7 +775,7 @@ static inline struct address_space *page_mapping(struct page *page)
656 VM_BUG_ON(PageSlab(page)); 775 VM_BUG_ON(PageSlab(page));
657 if (unlikely(PageSwapCache(page))) 776 if (unlikely(PageSwapCache(page)))
658 mapping = &swapper_space; 777 mapping = &swapper_space;
659 else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) 778 else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
660 mapping = NULL; 779 mapping = NULL;
661 return mapping; 780 return mapping;
662} 781}
@@ -718,12 +837,21 @@ static inline int page_mapped(struct page *page)
718#define VM_FAULT_SIGBUS 0x0002 837#define VM_FAULT_SIGBUS 0x0002
719#define VM_FAULT_MAJOR 0x0004 838#define VM_FAULT_MAJOR 0x0004
720#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ 839#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
721#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ 840#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */
841#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */
722 842
723#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ 843#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
724#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ 844#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
845#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
846
847#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
725 848
726#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) 849#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
850 VM_FAULT_HWPOISON_LARGE)
851
852/* Encode hstate index for a hwpoisoned large page */
853#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
854#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
727 855
728/* 856/*
729 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. 857 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
@@ -732,20 +860,19 @@ extern void pagefault_out_of_memory(void);
732 860
733#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 861#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
734 862
735extern void show_free_areas(void); 863/*
864 * Flags passed to show_mem() and show_free_areas() to suppress output in
865 * various contexts.
866 */
867#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */
868
869extern void show_free_areas(unsigned int flags);
870extern bool skip_free_areas_node(unsigned int flags, int nid);
736 871
737int shmem_lock(struct file *file, int lock, struct user_struct *user); 872int shmem_lock(struct file *file, int lock, struct user_struct *user);
738struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); 873struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
739int shmem_zero_setup(struct vm_area_struct *); 874int shmem_zero_setup(struct vm_area_struct *);
740 875
741#ifndef CONFIG_MMU
742extern unsigned long shmem_get_unmapped_area(struct file *file,
743 unsigned long addr,
744 unsigned long len,
745 unsigned long pgoff,
746 unsigned long flags);
747#endif
748
749extern int can_do_mlock(void); 876extern int can_do_mlock(void);
750extern int user_shm_lock(size_t, struct user_struct *); 877extern int user_shm_lock(size_t, struct user_struct *);
751extern void user_shm_unlock(size_t, struct user_struct *); 878extern void user_shm_unlock(size_t, struct user_struct *);
@@ -758,8 +885,6 @@ struct zap_details {
758 struct address_space *check_mapping; /* Check page->mapping if set */ 885 struct address_space *check_mapping; /* Check page->mapping if set */
759 pgoff_t first_index; /* Lowest page->index to unmap */ 886 pgoff_t first_index; /* Lowest page->index to unmap */
760 pgoff_t last_index; /* Highest page->index to unmap */ 887 pgoff_t last_index; /* Highest page->index to unmap */
761 spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */
762 unsigned long truncate_count; /* Compare vm_truncate_count */
763}; 888};
764 889
765struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 890struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -769,7 +894,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
769 unsigned long size); 894 unsigned long size);
770unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 895unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
771 unsigned long size, struct zap_details *); 896 unsigned long size, struct zap_details *);
772unsigned long unmap_vmas(struct mmu_gather **tlb, 897unsigned long unmap_vmas(struct mmu_gather *tlb,
773 struct vm_area_struct *start_vma, unsigned long start_addr, 898 struct vm_area_struct *start_vma, unsigned long start_addr,
774 unsigned long end_addr, unsigned long *nr_accounted, 899 unsigned long end_addr, unsigned long *nr_accounted,
775 struct zap_details *); 900 struct zap_details *);
@@ -779,6 +904,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
779 * @pgd_entry: if set, called for each non-empty PGD (top-level) entry 904 * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
780 * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry 905 * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
781 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry 906 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
907 * this handler is required to be able to handle
908 * pmd_trans_huge() pmds. They may simply choose to
909 * split_huge_page() instead of handling it explicitly.
782 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry 910 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
783 * @pte_hole: if set, called for each hole at all levels 911 * @pte_hole: if set, called for each hole at all levels
784 * @hugetlb_entry: if set, called for each hugetlb entry 912 * @hugetlb_entry: if set, called for each hugetlb entry
@@ -844,7 +972,13 @@ static inline int handle_mm_fault(struct mm_struct *mm,
844 972
845extern int make_pages_present(unsigned long addr, unsigned long end); 973extern int make_pages_present(unsigned long addr, unsigned long end);
846extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 974extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
975extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
976 void *buf, int len, int write);
847 977
978int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
979 unsigned long start, int len, unsigned int foll_flags,
980 struct page **pages, struct vm_area_struct **vmas,
981 int *nonblocking);
848int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 982int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
849 unsigned long start, int nr_pages, int write, int force, 983 unsigned long start, int nr_pages, int write, int force,
850 struct page **pages, struct vm_area_struct **vmas); 984 struct page **pages, struct vm_area_struct **vmas);
@@ -860,16 +994,39 @@ int __set_page_dirty_no_writeback(struct page *page);
860int redirty_page_for_writepage(struct writeback_control *wbc, 994int redirty_page_for_writepage(struct writeback_control *wbc,
861 struct page *page); 995 struct page *page);
862void account_page_dirtied(struct page *page, struct address_space *mapping); 996void account_page_dirtied(struct page *page, struct address_space *mapping);
997void account_page_writeback(struct page *page);
863int set_page_dirty(struct page *page); 998int set_page_dirty(struct page *page);
864int set_page_dirty_lock(struct page *page); 999int set_page_dirty_lock(struct page *page);
865int clear_page_dirty_for_io(struct page *page); 1000int clear_page_dirty_for_io(struct page *page);
866 1001
867/* Is the vma a continuation of the stack vma above it? */ 1002/* Is the vma a continuation of the stack vma above it? */
868static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) 1003static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
869{ 1004{
870 return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); 1005 return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
871} 1006}
872 1007
1008static inline int stack_guard_page_start(struct vm_area_struct *vma,
1009 unsigned long addr)
1010{
1011 return (vma->vm_flags & VM_GROWSDOWN) &&
1012 (vma->vm_start == addr) &&
1013 !vma_growsdown(vma->vm_prev, addr);
1014}
1015
1016/* Is the vma a continuation of the stack vma below it? */
1017static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
1018{
1019 return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
1020}
1021
1022static inline int stack_guard_page_end(struct vm_area_struct *vma,
1023 unsigned long addr)
1024{
1025 return (vma->vm_flags & VM_GROWSUP) &&
1026 (vma->vm_end == addr) &&
1027 !vma_growsup(vma->vm_next, addr);
1028}
1029
873extern unsigned long move_page_tables(struct vm_area_struct *vma, 1030extern unsigned long move_page_tables(struct vm_area_struct *vma,
874 unsigned long old_addr, struct vm_area_struct *new_vma, 1031 unsigned long old_addr, struct vm_area_struct *new_vma,
875 unsigned long new_addr, unsigned long len); 1032 unsigned long new_addr, unsigned long len);
@@ -888,65 +1045,35 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
888/* 1045/*
889 * per-process(per-mm_struct) statistics. 1046 * per-process(per-mm_struct) statistics.
890 */ 1047 */
891#if defined(SPLIT_RSS_COUNTING)
892/*
893 * The mm counters are not protected by its page_table_lock,
894 * so must be incremented atomically.
895 */
896static inline void set_mm_counter(struct mm_struct *mm, int member, long value) 1048static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
897{ 1049{
898 atomic_long_set(&mm->rss_stat.count[member], value); 1050 atomic_long_set(&mm->rss_stat.count[member], value);
899} 1051}
900 1052
1053#if defined(SPLIT_RSS_COUNTING)
901unsigned long get_mm_counter(struct mm_struct *mm, int member); 1054unsigned long get_mm_counter(struct mm_struct *mm, int member);
902 1055#else
903static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
904{
905 atomic_long_add(value, &mm->rss_stat.count[member]);
906}
907
908static inline void inc_mm_counter(struct mm_struct *mm, int member)
909{
910 atomic_long_inc(&mm->rss_stat.count[member]);
911}
912
913static inline void dec_mm_counter(struct mm_struct *mm, int member)
914{
915 atomic_long_dec(&mm->rss_stat.count[member]);
916}
917
918#else /* !USE_SPLIT_PTLOCKS */
919/*
920 * The mm counters are protected by its page_table_lock,
921 * so can be incremented directly.
922 */
923static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
924{
925 mm->rss_stat.count[member] = value;
926}
927
928static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) 1056static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
929{ 1057{
930 return mm->rss_stat.count[member]; 1058 return atomic_long_read(&mm->rss_stat.count[member]);
931} 1059}
1060#endif
932 1061
933static inline void add_mm_counter(struct mm_struct *mm, int member, long value) 1062static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
934{ 1063{
935 mm->rss_stat.count[member] += value; 1064 atomic_long_add(value, &mm->rss_stat.count[member]);
936} 1065}
937 1066
938static inline void inc_mm_counter(struct mm_struct *mm, int member) 1067static inline void inc_mm_counter(struct mm_struct *mm, int member)
939{ 1068{
940 mm->rss_stat.count[member]++; 1069 atomic_long_inc(&mm->rss_stat.count[member]);
941} 1070}
942 1071
943static inline void dec_mm_counter(struct mm_struct *mm, int member) 1072static inline void dec_mm_counter(struct mm_struct *mm, int member)
944{ 1073{
945 mm->rss_stat.count[member]--; 1074 atomic_long_dec(&mm->rss_stat.count[member]);
946} 1075}
947 1076
948#endif /* !USE_SPLIT_PTLOCKS */
949
950static inline unsigned long get_mm_rss(struct mm_struct *mm) 1077static inline unsigned long get_mm_rss(struct mm_struct *mm)
951{ 1078{
952 return get_mm_counter(mm, MM_FILEPAGES) + 1079 return get_mm_counter(mm, MM_FILEPAGES) +
@@ -995,13 +1122,24 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
995#endif 1122#endif
996 1123
997/* 1124/*
1125 * This struct is used to pass information from page reclaim to the shrinkers.
1126 * We consolidate the values for easier extention later.
1127 */
1128struct shrink_control {
1129 gfp_t gfp_mask;
1130
1131 /* How many slab objects shrinker() should scan and try to reclaim */
1132 unsigned long nr_to_scan;
1133};
1134
1135/*
998 * A callback you can register to apply pressure to ageable caches. 1136 * A callback you can register to apply pressure to ageable caches.
999 * 1137 *
1000 * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should 1138 * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
1001 * look through the least-recently-used 'nr_to_scan' entries and 1139 * and a 'gfpmask'. It should look through the least-recently-used
1002 * attempt to free them up. It should return the number of objects 1140 * 'nr_to_scan' entries and attempt to free them up. It should return
1003 * which remain in the cache. If it returns -1, it means it cannot do 1141 * the number of objects which remain in the cache. If it returns -1, it means
1004 * any scanning at this time (eg. there is a risk of deadlock). 1142 * it cannot do any scanning at this time (eg. there is a risk of deadlock).
1005 * 1143 *
1006 * The 'gfpmask' refers to the allocation we are currently trying to 1144 * The 'gfpmask' refers to the allocation we are currently trying to
1007 * fulfil. 1145 * fulfil.
@@ -1010,7 +1148,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
1010 * querying the cache size, so a fastpath for that case is appropriate. 1148 * querying the cache size, so a fastpath for that case is appropriate.
1011 */ 1149 */
1012struct shrinker { 1150struct shrinker {
1013 int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); 1151 int (*shrink)(struct shrinker *, struct shrink_control *sc);
1014 int seeks; /* seeks to recreate an obj */ 1152 int seeks; /* seeks to recreate an obj */
1015 1153
1016 /* These are for internal use */ 1154 /* These are for internal use */
@@ -1023,7 +1161,15 @@ extern void unregister_shrinker(struct shrinker *);
1023 1161
1024int vma_wants_writenotify(struct vm_area_struct *vma); 1162int vma_wants_writenotify(struct vm_area_struct *vma);
1025 1163
1026extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); 1164extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
1165 spinlock_t **ptl);
1166static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
1167 spinlock_t **ptl)
1168{
1169 pte_t *ptep;
1170 __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));
1171 return ptep;
1172}
1027 1173
1028#ifdef __PAGETABLE_PUD_FOLDED 1174#ifdef __PAGETABLE_PUD_FOLDED
1029static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, 1175static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
@@ -1045,7 +1191,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
1045int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); 1191int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
1046#endif 1192#endif
1047 1193
1048int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); 1194int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
1195 pmd_t *pmd, unsigned long address);
1049int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); 1196int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
1050 1197
1051/* 1198/*
@@ -1114,16 +1261,18 @@ static inline void pgtable_page_dtor(struct page *page)
1114 pte_unmap(pte); \ 1261 pte_unmap(pte); \
1115} while (0) 1262} while (0)
1116 1263
1117#define pte_alloc_map(mm, pmd, address) \ 1264#define pte_alloc_map(mm, vma, pmd, address) \
1118 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 1265 ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \
1119 NULL: pte_offset_map(pmd, address)) 1266 pmd, address))? \
1267 NULL: pte_offset_map(pmd, address))
1120 1268
1121#define pte_alloc_map_lock(mm, pmd, address, ptlp) \ 1269#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
1122 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 1270 ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \
1271 pmd, address))? \
1123 NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) 1272 NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
1124 1273
1125#define pte_alloc_kernel(pmd, address) \ 1274#define pte_alloc_kernel(pmd, address) \
1126 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ 1275 ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
1127 NULL: pte_offset_kernel(pmd, address)) 1276 NULL: pte_offset_kernel(pmd, address))
1128 1277
1129extern void free_area_init(unsigned long * zones_size); 1278extern void free_area_init(unsigned long * zones_size);
@@ -1175,8 +1324,8 @@ extern void free_bootmem_with_active_regions(int nid,
1175 unsigned long max_low_pfn); 1324 unsigned long max_low_pfn);
1176int add_from_early_node_map(struct range *range, int az, 1325int add_from_early_node_map(struct range *range, int az,
1177 int nr_range, int nid); 1326 int nr_range, int nid);
1178void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, 1327u64 __init find_memory_core_early(int nid, u64 size, u64 align,
1179 u64 goal, u64 limit); 1328 u64 goal, u64 limit);
1180typedef int (*work_fn_t)(unsigned long, unsigned long, void *); 1329typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1181extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); 1330extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1182extern void sparse_memory_present_with_active_regions(int nid); 1331extern void sparse_memory_present_with_active_regions(int nid);
@@ -1201,14 +1350,16 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
1201extern void memmap_init_zone(unsigned long, int, unsigned long, 1350extern void memmap_init_zone(unsigned long, int, unsigned long,
1202 unsigned long, enum memmap_context); 1351 unsigned long, enum memmap_context);
1203extern void setup_per_zone_wmarks(void); 1352extern void setup_per_zone_wmarks(void);
1204extern void calculate_zone_inactive_ratio(struct zone *zone); 1353extern int __meminit init_per_zone_wmark_min(void);
1205extern void mem_init(void); 1354extern void mem_init(void);
1206extern void __init mmap_init(void); 1355extern void __init mmap_init(void);
1207extern void show_mem(void); 1356extern void show_mem(unsigned int flags);
1208extern void si_meminfo(struct sysinfo * val); 1357extern void si_meminfo(struct sysinfo * val);
1209extern void si_meminfo_node(struct sysinfo *val, int nid); 1358extern void si_meminfo_node(struct sysinfo *val, int nid);
1210extern int after_bootmem; 1359extern int after_bootmem;
1211 1360
1361extern void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
1362
1212extern void setup_per_cpu_pageset(void); 1363extern void setup_per_cpu_pageset(void);
1213 1364
1214extern void zone_pcp_update(struct zone *zone); 1365extern void zone_pcp_update(struct zone *zone);
@@ -1257,17 +1408,11 @@ extern void exit_mmap(struct mm_struct *);
1257extern int mm_take_all_locks(struct mm_struct *mm); 1408extern int mm_take_all_locks(struct mm_struct *mm);
1258extern void mm_drop_all_locks(struct mm_struct *mm); 1409extern void mm_drop_all_locks(struct mm_struct *mm);
1259 1410
1260#ifdef CONFIG_PROC_FS
1261/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ 1411/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
1262extern void added_exe_file_vma(struct mm_struct *mm); 1412extern void added_exe_file_vma(struct mm_struct *mm);
1263extern void removed_exe_file_vma(struct mm_struct *mm); 1413extern void removed_exe_file_vma(struct mm_struct *mm);
1264#else 1414extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
1265static inline void added_exe_file_vma(struct mm_struct *mm) 1415extern struct file *get_mm_exe_file(struct mm_struct *mm);
1266{}
1267
1268static inline void removed_exe_file_vma(struct mm_struct *mm)
1269{}
1270#endif /* CONFIG_PROC_FS */
1271 1416
1272extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); 1417extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
1273extern int install_special_mapping(struct mm_struct *mm, 1418extern int install_special_mapping(struct mm_struct *mm,
@@ -1281,7 +1426,7 @@ extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1281 unsigned long flag, unsigned long pgoff); 1426 unsigned long flag, unsigned long pgoff);
1282extern unsigned long mmap_region(struct file *file, unsigned long addr, 1427extern unsigned long mmap_region(struct file *file, unsigned long addr,
1283 unsigned long len, unsigned long flags, 1428 unsigned long len, unsigned long flags,
1284 unsigned int vm_flags, unsigned long pgoff); 1429 vm_flags_t vm_flags, unsigned long pgoff);
1285 1430
1286static inline unsigned long do_mmap(struct file *file, unsigned long addr, 1431static inline unsigned long do_mmap(struct file *file, unsigned long addr,
1287 unsigned long len, unsigned long prot, 1432 unsigned long len, unsigned long prot,
@@ -1338,15 +1483,17 @@ unsigned long ra_submit(struct file_ra_state *ra,
1338 struct address_space *mapping, 1483 struct address_space *mapping,
1339 struct file *filp); 1484 struct file *filp);
1340 1485
1341/* Do stack extension */ 1486/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
1342extern int expand_stack(struct vm_area_struct *vma, unsigned long address); 1487extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
1488
1489/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
1490extern int expand_downwards(struct vm_area_struct *vma,
1491 unsigned long address);
1343#if VM_GROWSUP 1492#if VM_GROWSUP
1344extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); 1493extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
1345#else 1494#else
1346 #define expand_upwards(vma, address) do { } while (0) 1495 #define expand_upwards(vma, address) do { } while (0)
1347#endif 1496#endif
1348extern int expand_stack_downwards(struct vm_area_struct *vma,
1349 unsigned long address);
1350 1497
1351/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 1498/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1352extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 1499extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -1394,6 +1541,11 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
1394#define FOLL_GET 0x04 /* do get_page on page */ 1541#define FOLL_GET 0x04 /* do get_page on page */
1395#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ 1542#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
1396#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ 1543#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
1544#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
1545 * and return without waiting upon it */
1546#define FOLL_MLOCK 0x40 /* mark page as mlocked */
1547#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
1548#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
1397 1549
1398typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, 1550typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
1399 void *data); 1551 void *data);
@@ -1432,19 +1584,20 @@ static inline bool kernel_page_present(struct page *page) { return true; }
1432#endif /* CONFIG_HIBERNATION */ 1584#endif /* CONFIG_HIBERNATION */
1433#endif 1585#endif
1434 1586
1435extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); 1587extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
1436#ifdef __HAVE_ARCH_GATE_AREA 1588#ifdef __HAVE_ARCH_GATE_AREA
1437int in_gate_area_no_task(unsigned long addr); 1589int in_gate_area_no_mm(unsigned long addr);
1438int in_gate_area(struct task_struct *task, unsigned long addr); 1590int in_gate_area(struct mm_struct *mm, unsigned long addr);
1439#else 1591#else
1440int in_gate_area_no_task(unsigned long addr); 1592int in_gate_area_no_mm(unsigned long addr);
1441#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) 1593#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
1442#endif /* __HAVE_ARCH_GATE_AREA */ 1594#endif /* __HAVE_ARCH_GATE_AREA */
1443 1595
1444int drop_caches_sysctl_handler(struct ctl_table *, int, 1596int drop_caches_sysctl_handler(struct ctl_table *, int,
1445 void __user *, size_t *, loff_t *); 1597 void __user *, size_t *, loff_t *);
1446unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, 1598unsigned long shrink_slab(struct shrink_control *shrink,
1447 unsigned long lru_pages); 1599 unsigned long nr_pages_scanned,
1600 unsigned long lru_pages);
1448 1601
1449#ifndef CONFIG_MMU 1602#ifndef CONFIG_MMU
1450#define randomize_va_space 0 1603#define randomize_va_space 0
@@ -1486,16 +1639,17 @@ extern int sysctl_memory_failure_recovery;
1486extern void shake_page(struct page *p, int access); 1639extern void shake_page(struct page *p, int access);
1487extern atomic_long_t mce_bad_pages; 1640extern atomic_long_t mce_bad_pages;
1488extern int soft_offline_page(struct page *page, int flags); 1641extern int soft_offline_page(struct page *page, int flags);
1489#ifdef CONFIG_MEMORY_FAILURE
1490int is_hwpoison_address(unsigned long addr);
1491#else
1492static inline int is_hwpoison_address(unsigned long addr)
1493{
1494 return 0;
1495}
1496#endif
1497 1642
1498extern void dump_page(struct page *page); 1643extern void dump_page(struct page *page);
1499 1644
1645#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
1646extern void clear_huge_page(struct page *page,
1647 unsigned long addr,
1648 unsigned int pages_per_huge_page);
1649extern void copy_user_huge_page(struct page *dst, struct page *src,
1650 unsigned long addr, struct vm_area_struct *vma,
1651 unsigned int pages_per_huge_page);
1652#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
1653
1500#endif /* __KERNEL__ */ 1654#endif /* __KERNEL__ */
1501#endif /* _LINUX_MM_H */ 1655#endif /* _LINUX_MM_H */