aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/memory.h94
-rw-r--r--include/linux/memory_hotplug.h104
-rw-r--r--include/linux/mempolicy.h7
-rw-r--r--include/linux/mm.h150
-rw-r--r--include/linux/mmzone.h28
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/rwsem-spinlock.h5
-rw-r--r--include/linux/scatterlist.h17
-rw-r--r--include/linux/sched.h65
-rw-r--r--include/linux/vmalloc.h8
12 files changed, 409 insertions, 81 deletions
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 88af42f5e04a..c937d6e65502 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp)
126/* If we *know* page->private refers to buffer_heads */ 126/* If we *know* page->private refers to buffer_heads */
127#define page_buffers(page) \ 127#define page_buffers(page) \
128 ({ \ 128 ({ \
129 BUG_ON(!PagePrivate(page)); \ 129 BUG_ON(!PagePrivate(page)); \
130 ((struct buffer_head *)(page)->private); \ 130 ((struct buffer_head *)page_private(page)); \
131 }) 131 })
132#define page_has_buffers(page) PagePrivate(page) 132#define page_has_buffers(page) PagePrivate(page)
133 133
@@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page,
219{ 219{
220 page_cache_get(page); 220 page_cache_get(page);
221 SetPagePrivate(page); 221 SetPagePrivate(page);
222 page->private = (unsigned long)head; 222 set_page_private(page, (unsigned long)head);
223} 223}
224 224
225static inline void get_bh(struct buffer_head *bh) 225static inline void get_bh(struct buffer_head *bh)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d664330d900e..0cea162b08c0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
16int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 16int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
17int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 17int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
18int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); 18int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
19void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
20void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); 19void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
21int hugetlb_prefault(struct address_space *, struct vm_area_struct *); 20int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
22int hugetlb_report_meminfo(char *); 21int hugetlb_report_meminfo(char *);
@@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void)
87#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) 86#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
88#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) 87#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
89#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) 88#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
90#define zap_hugepage_range(vma, start, len) BUG()
91#define unmap_hugepage_range(vma, start, end) BUG() 89#define unmap_hugepage_range(vma, start, end) BUG()
92#define is_hugepage_mem_enough(size) 0 90#define is_hugepage_mem_enough(size) 0
93#define hugetlb_report_meminfo(buf) 0 91#define hugetlb_report_meminfo(buf) 0
diff --git a/include/linux/memory.h b/include/linux/memory.h
new file mode 100644
index 000000000000..0def328ab5cf
--- /dev/null
+++ b/include/linux/memory.h
@@ -0,0 +1,94 @@
1/*
2 * include/linux/memory.h - generic memory definition
3 *
4 * This is mainly for topological representation. We define the
5 * basic "struct memory_block" here, which can be embedded in per-arch
6 * definitions or NUMA information.
7 *
8 * Basic handling of the devices is done in drivers/base/memory.c
9 * and system devices are handled in drivers/base/sys.c.
10 *
11 * Memory block are exported via sysfs in the class/memory/devices/
12 * directory.
13 *
14 */
15#ifndef _LINUX_MEMORY_H_
16#define _LINUX_MEMORY_H_
17
18#include <linux/sysdev.h>
19#include <linux/node.h>
20#include <linux/compiler.h>
21
22#include <asm/semaphore.h>
23
24struct memory_block {
25 unsigned long phys_index;
26 unsigned long state;
27 /*
28 * This serializes all state change requests. It isn't
29 * held during creation because the control files are
30 * created long after the critical areas during
31 * initialization.
32 */
33 struct semaphore state_sem;
34 int phys_device; /* to which fru does this belong? */
35 void *hw; /* optional pointer to fw/hw data */
36 int (*phys_callback)(struct memory_block *);
37 struct sys_device sysdev;
38};
39
40/* These states are exposed to userspace as text strings in sysfs */
41#define MEM_ONLINE (1<<0) /* exposed to userspace */
42#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
43#define MEM_OFFLINE (1<<2) /* exposed to userspace */
44
45/*
46 * All of these states are currently kernel-internal for notifying
47 * kernel components and architectures.
48 *
49 * For MEM_MAPPING_INVALID, all notifier chains with priority >0
50 * are called before pfn_to_page() becomes invalid. The priority=0
51 * entry is reserved for the function that actually makes
52 * pfn_to_page() stop working. Any notifiers that want to be called
53 * after that should have priority <0.
54 */
55#define MEM_MAPPING_INVALID (1<<3)
56
57#ifndef CONFIG_MEMORY_HOTPLUG
58static inline int memory_dev_init(void)
59{
60 return 0;
61}
62static inline int register_memory_notifier(struct notifier_block *nb)
63{
64 return 0;
65}
66static inline void unregister_memory_notifier(struct notifier_block *nb)
67{
68}
69#else
70extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
71extern int register_new_memory(struct mem_section *);
72extern int unregister_memory_section(struct mem_section *);
73extern int memory_dev_init(void);
74extern int register_memory_notifier(struct notifier_block *nb);
75extern void unregister_memory_notifier(struct notifier_block *nb);
76
77#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
78
79extern int invalidate_phys_mapping(unsigned long, unsigned long);
80struct notifier_block;
81
82extern int register_memory_notifier(struct notifier_block *nb);
83extern void unregister_memory_notifier(struct notifier_block *nb);
84
85extern struct sysdev_class memory_sysdev_class;
86#endif /* CONFIG_MEMORY_HOTPLUG */
87
88#define hotplug_memory_notifier(fn, pri) { \
89 static struct notifier_block fn##_mem_nb = \
90 { .notifier_call = fn, .priority = pri }; \
91 register_memory_notifier(&fn##_mem_nb); \
92}
93
94#endif /* _LINUX_MEMORY_H_ */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
new file mode 100644
index 000000000000..01f03bc06eff
--- /dev/null
+++ b/include/linux/memory_hotplug.h
@@ -0,0 +1,104 @@
1#ifndef __LINUX_MEMORY_HOTPLUG_H
2#define __LINUX_MEMORY_HOTPLUG_H
3
4#include <linux/mmzone.h>
5#include <linux/spinlock.h>
6#include <linux/mmzone.h>
7#include <linux/notifier.h>
8
9#ifdef CONFIG_MEMORY_HOTPLUG
10/*
11 * pgdat resizing functions
12 */
13static inline
14void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
15{
16 spin_lock_irqsave(&pgdat->node_size_lock, *flags);
17}
18static inline
19void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
20{
21 spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
22}
23static inline
24void pgdat_resize_init(struct pglist_data *pgdat)
25{
26 spin_lock_init(&pgdat->node_size_lock);
27}
28/*
29 * Zone resizing functions
30 */
31static inline unsigned zone_span_seqbegin(struct zone *zone)
32{
33 return read_seqbegin(&zone->span_seqlock);
34}
35static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
36{
37 return read_seqretry(&zone->span_seqlock, iv);
38}
39static inline void zone_span_writelock(struct zone *zone)
40{
41 write_seqlock(&zone->span_seqlock);
42}
43static inline void zone_span_writeunlock(struct zone *zone)
44{
45 write_sequnlock(&zone->span_seqlock);
46}
47static inline void zone_seqlock_init(struct zone *zone)
48{
49 seqlock_init(&zone->span_seqlock);
50}
51extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
52extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
53extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
54/* need some defines for these for archs that don't support it */
55extern void online_page(struct page *page);
56/* VM interface that may be used by firmware interface */
57extern int add_memory(u64 start, u64 size);
58extern int remove_memory(u64 start, u64 size);
59extern int online_pages(unsigned long, unsigned long);
60
61/* reasonably generic interface to expand the physical pages in a zone */
62extern int __add_pages(struct zone *zone, unsigned long start_pfn,
63 unsigned long nr_pages);
64#else /* ! CONFIG_MEMORY_HOTPLUG */
65/*
66 * Stub functions for when hotplug is off
67 */
68static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
69static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
70static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
71
72static inline unsigned zone_span_seqbegin(struct zone *zone)
73{
74 return 0;
75}
76static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
77{
78 return 0;
79}
80static inline void zone_span_writelock(struct zone *zone) {}
81static inline void zone_span_writeunlock(struct zone *zone) {}
82static inline void zone_seqlock_init(struct zone *zone) {}
83
84static inline int mhp_notimplemented(const char *func)
85{
86 printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
87 dump_stack();
88 return -ENOSYS;
89}
90
91static inline int __add_pages(struct zone *zone, unsigned long start_pfn,
92 unsigned long nr_pages)
93{
94 return mhp_notimplemented(__FUNCTION__);
95}
96#endif /* ! CONFIG_MEMORY_HOTPLUG */
97static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
98 unsigned long nr_pages)
99{
100 printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__);
101 dump_stack();
102 return -ENOSYS;
103}
104#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 58385ee1c0ac..7af8cb836e78 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -27,10 +27,10 @@
27 27
28#include <linux/config.h> 28#include <linux/config.h>
29#include <linux/mmzone.h> 29#include <linux/mmzone.h>
30#include <linux/bitmap.h>
31#include <linux/slab.h> 30#include <linux/slab.h>
32#include <linux/rbtree.h> 31#include <linux/rbtree.h>
33#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <linux/nodemask.h>
34 34
35struct vm_area_struct; 35struct vm_area_struct;
36 36
@@ -47,8 +47,7 @@ struct vm_area_struct;
47 * Locking policy for interlave: 47 * Locking policy for interlave:
48 * In process context there is no locking because only the process accesses 48 * In process context there is no locking because only the process accesses
49 * its own state. All vma manipulation is somewhat protected by a down_read on 49 * its own state. All vma manipulation is somewhat protected by a down_read on
50 * mmap_sem. For allocating in the interleave policy the page_table_lock 50 * mmap_sem.
51 * must be also aquired to protect il_next.
52 * 51 *
53 * Freeing policy: 52 * Freeing policy:
54 * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. 53 * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd.
@@ -63,7 +62,7 @@ struct mempolicy {
63 union { 62 union {
64 struct zonelist *zonelist; /* bind */ 63 struct zonelist *zonelist; /* bind */
65 short preferred_node; /* preferred */ 64 short preferred_node; /* preferred */
66 DECLARE_BITMAP(nodes, MAX_NUMNODES); /* interleave */ 65 nodemask_t nodes; /* interleave */
67 /* undefined for default */ 66 /* undefined for default */
68 } v; 67 } v;
69}; 68};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1649578fb0c..5c1fb0a2e806 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);
157 157
158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
160#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ 160#define VM_RESERVED 0x00080000 /* Pages managed in a special way */
161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
@@ -226,13 +226,18 @@ struct page {
226 * to show when page is mapped 226 * to show when page is mapped
227 * & limit reverse map searches. 227 * & limit reverse map searches.
228 */ 228 */
229 unsigned long private; /* Mapping-private opaque data: 229 union {
230 unsigned long private; /* Mapping-private opaque data:
230 * usually used for buffer_heads 231 * usually used for buffer_heads
231 * if PagePrivate set; used for 232 * if PagePrivate set; used for
232 * swp_entry_t if PageSwapCache 233 * swp_entry_t if PageSwapCache
233 * When page is free, this indicates 234 * When page is free, this indicates
234 * order in the buddy system. 235 * order in the buddy system.
235 */ 236 */
237#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
238 spinlock_t ptl;
239#endif
240 } u;
236 struct address_space *mapping; /* If low bit clear, points to 241 struct address_space *mapping; /* If low bit clear, points to
237 * inode address_space, or NULL. 242 * inode address_space, or NULL.
238 * If page mapped as anonymous 243 * If page mapped as anonymous
@@ -260,6 +265,9 @@ struct page {
260#endif /* WANT_PAGE_VIRTUAL */ 265#endif /* WANT_PAGE_VIRTUAL */
261}; 266};
262 267
268#define page_private(page) ((page)->u.private)
269#define set_page_private(page, v) ((page)->u.private = (v))
270
263/* 271/*
264 * FIXME: take this include out, include page-flags.h in 272 * FIXME: take this include out, include page-flags.h in
265 * files which need it (119 of them) 273 * files which need it (119 of them)
@@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *));
311 319
312#ifdef CONFIG_HUGETLB_PAGE 320#ifdef CONFIG_HUGETLB_PAGE
313 321
314static inline int page_count(struct page *p) 322static inline int page_count(struct page *page)
315{ 323{
316 if (PageCompound(p)) 324 if (PageCompound(page))
317 p = (struct page *)p->private; 325 page = (struct page *)page_private(page);
318 return atomic_read(&(p)->_count) + 1; 326 return atomic_read(&page->_count) + 1;
319} 327}
320 328
321static inline void get_page(struct page *page) 329static inline void get_page(struct page *page)
322{ 330{
323 if (unlikely(PageCompound(page))) 331 if (unlikely(PageCompound(page)))
324 page = (struct page *)page->private; 332 page = (struct page *)page_private(page);
325 atomic_inc(&page->_count); 333 atomic_inc(&page->_count);
326} 334}
327 335
@@ -338,7 +346,7 @@ static inline void get_page(struct page *page)
338 346
339static inline void put_page(struct page *page) 347static inline void put_page(struct page *page)
340{ 348{
341 if (!PageReserved(page) && put_page_testzero(page)) 349 if (put_page_testzero(page))
342 __page_cache_release(page); 350 __page_cache_release(page);
343} 351}
344 352
@@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page)
587static inline pgoff_t page_index(struct page *page) 595static inline pgoff_t page_index(struct page *page)
588{ 596{
589 if (unlikely(PageSwapCache(page))) 597 if (unlikely(PageSwapCache(page)))
590 return page->private; 598 return page_private(page);
591 return page->index; 599 return page->index;
592} 600}
593 601
@@ -682,7 +690,7 @@ struct zap_details {
682 690
683unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 691unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
684 unsigned long size, struct zap_details *); 692 unsigned long size, struct zap_details *);
685unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm, 693unsigned long unmap_vmas(struct mmu_gather **tlb,
686 struct vm_area_struct *start_vma, unsigned long start_addr, 694 struct vm_area_struct *start_vma, unsigned long start_addr,
687 unsigned long end_addr, unsigned long *nr_accounted, 695 unsigned long end_addr, unsigned long *nr_accounted,
688 struct zap_details *); 696 struct zap_details *);
@@ -704,10 +712,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
704} 712}
705 713
706extern int vmtruncate(struct inode * inode, loff_t offset); 714extern int vmtruncate(struct inode * inode, loff_t offset);
707extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
708extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
709extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
710extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
711extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); 715extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
712extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); 716extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
713extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); 717extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
@@ -723,6 +727,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
723 727
724int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 728int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
725 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 729 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
730void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
726 731
727int __set_page_dirty_buffers(struct page *page); 732int __set_page_dirty_buffers(struct page *page);
728int __set_page_dirty_nobuffers(struct page *page); 733int __set_page_dirty_nobuffers(struct page *page);
@@ -759,38 +764,83 @@ struct shrinker;
759extern struct shrinker *set_shrinker(int, shrinker_t); 764extern struct shrinker *set_shrinker(int, shrinker_t);
760extern void remove_shrinker(struct shrinker *shrinker); 765extern void remove_shrinker(struct shrinker *shrinker);
761 766
762/* 767int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
763 * On a two-level or three-level page table, this ends up being trivial. Thus 768int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
764 * the inlining and the symmetry break with pte_alloc_map() that does all 769int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
765 * of this out-of-line. 770int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
766 */ 771
767/* 772/*
768 * The following ifdef needed to get the 4level-fixup.h header to work. 773 * The following ifdef needed to get the 4level-fixup.h header to work.
769 * Remove it when 4level-fixup.h has been removed. 774 * Remove it when 4level-fixup.h has been removed.
770 */ 775 */
771#ifdef CONFIG_MMU 776#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
772#ifndef __ARCH_HAS_4LEVEL_HACK
773static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 777static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
774{ 778{
775 if (pgd_none(*pgd)) 779 return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
776 return __pud_alloc(mm, pgd, address); 780 NULL: pud_offset(pgd, address);
777 return pud_offset(pgd, address);
778} 781}
779 782
780static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 783static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
781{ 784{
782 if (pud_none(*pud)) 785 return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
783 return __pmd_alloc(mm, pud, address); 786 NULL: pmd_offset(pud, address);
784 return pmd_offset(pud, address);
785} 787}
786#endif 788#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
787#endif /* CONFIG_MMU */ 789
790#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
791/*
792 * We tuck a spinlock to guard each pagetable page into its struct page,
793 * at page->private, with BUILD_BUG_ON to make sure that this will not
794 * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
795 * When freeing, reset page->mapping so free_pages_check won't complain.
796 */
797#define __pte_lockptr(page) &((page)->u.ptl)
798#define pte_lock_init(_page) do { \
799 spin_lock_init(__pte_lockptr(_page)); \
800} while (0)
801#define pte_lock_deinit(page) ((page)->mapping = NULL)
802#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
803#else
804/*
805 * We use mm->page_table_lock to guard all pagetable pages of the mm.
806 */
807#define pte_lock_init(page) do {} while (0)
808#define pte_lock_deinit(page) do {} while (0)
809#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
810#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
811
812#define pte_offset_map_lock(mm, pmd, address, ptlp) \
813({ \
814 spinlock_t *__ptl = pte_lockptr(mm, pmd); \
815 pte_t *__pte = pte_offset_map(pmd, address); \
816 *(ptlp) = __ptl; \
817 spin_lock(__ptl); \
818 __pte; \
819})
820
821#define pte_unmap_unlock(pte, ptl) do { \
822 spin_unlock(ptl); \
823 pte_unmap(pte); \
824} while (0)
825
826#define pte_alloc_map(mm, pmd, address) \
827 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
828 NULL: pte_offset_map(pmd, address))
829
830#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
831 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
832 NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
833
834#define pte_alloc_kernel(pmd, address) \
835 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
836 NULL: pte_offset_kernel(pmd, address))
788 837
789extern void free_area_init(unsigned long * zones_size); 838extern void free_area_init(unsigned long * zones_size);
790extern void free_area_init_node(int nid, pg_data_t *pgdat, 839extern void free_area_init_node(int nid, pg_data_t *pgdat,
791 unsigned long * zones_size, unsigned long zone_start_pfn, 840 unsigned long * zones_size, unsigned long zone_start_pfn,
792 unsigned long *zholes_size); 841 unsigned long *zholes_size);
793extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); 842extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
843extern void setup_per_zone_pages_min(void);
794extern void mem_init(void); 844extern void mem_init(void);
795extern void show_mem(void); 845extern void show_mem(void);
796extern void si_meminfo(struct sysinfo * val); 846extern void si_meminfo(struct sysinfo * val);
@@ -834,6 +884,7 @@ extern int split_vma(struct mm_struct *,
834extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 884extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
835extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, 885extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
836 struct rb_node **, struct rb_node *); 886 struct rb_node **, struct rb_node *);
887extern void unlink_file_vma(struct vm_area_struct *);
837extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 888extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
838 unsigned long addr, unsigned long len, pgoff_t pgoff); 889 unsigned long addr, unsigned long len, pgoff_t pgoff);
839extern void exit_mmap(struct mm_struct *); 890extern void exit_mmap(struct mm_struct *);
@@ -894,7 +945,8 @@ void handle_ra_miss(struct address_space *mapping,
894unsigned long max_sane_readahead(unsigned long nr); 945unsigned long max_sane_readahead(unsigned long nr);
895 946
896/* Do stack extension */ 947/* Do stack extension */
897extern int expand_stack(struct vm_area_struct * vma, unsigned long address); 948extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
949extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
898 950
899/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 951/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
900extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 952extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -917,40 +969,28 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
917 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 969 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
918} 970}
919 971
920extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); 972struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
973struct page *vmalloc_to_page(void *addr);
974unsigned long vmalloc_to_pfn(void *addr);
975int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
976 unsigned long pfn, unsigned long size, pgprot_t);
921 977
922extern struct page * vmalloc_to_page(void *addr); 978struct page *follow_page(struct mm_struct *, unsigned long address,
923extern unsigned long vmalloc_to_pfn(void *addr); 979 unsigned int foll_flags);
924extern struct page * follow_page(struct mm_struct *mm, unsigned long address, 980#define FOLL_WRITE 0x01 /* check pte is writable */
925 int write); 981#define FOLL_TOUCH 0x02 /* mark page accessed */
926extern int check_user_page_readable(struct mm_struct *mm, unsigned long address); 982#define FOLL_GET 0x04 /* do get_page on page */
927int remap_pfn_range(struct vm_area_struct *, unsigned long, 983#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
928 unsigned long, unsigned long, pgprot_t);
929 984
930#ifdef CONFIG_PROC_FS 985#ifdef CONFIG_PROC_FS
931void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); 986void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
932#else 987#else
933static inline void __vm_stat_account(struct mm_struct *mm, 988static inline void vm_stat_account(struct mm_struct *mm,
934 unsigned long flags, struct file *file, long pages) 989 unsigned long flags, struct file *file, long pages)
935{ 990{
936} 991}
937#endif /* CONFIG_PROC_FS */ 992#endif /* CONFIG_PROC_FS */
938 993
939static inline void vm_stat_account(struct vm_area_struct *vma)
940{
941 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
942 vma_pages(vma));
943}
944
945static inline void vm_stat_unaccount(struct vm_area_struct *vma)
946{
947 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
948 -vma_pages(vma));
949}
950
951/* update per process rss and vm hiwater data */
952extern void update_mem_hiwater(struct task_struct *tsk);
953
954#ifndef CONFIG_DEBUG_PAGEALLOC 994#ifndef CONFIG_DEBUG_PAGEALLOC
955static inline void 995static inline void
956kernel_map_pages(struct page *page, int numpages, int enable) 996kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7519eb4191e7..f5fa3082fd6a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -12,6 +12,7 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/seqlock.h>
15#include <asm/atomic.h> 16#include <asm/atomic.h>
16 17
17/* Free memory management - zoned buddy allocator. */ 18/* Free memory management - zoned buddy allocator. */
@@ -137,6 +138,10 @@ struct zone {
137 * free areas of different sizes 138 * free areas of different sizes
138 */ 139 */
139 spinlock_t lock; 140 spinlock_t lock;
141#ifdef CONFIG_MEMORY_HOTPLUG
142 /* see spanned/present_pages for more description */
143 seqlock_t span_seqlock;
144#endif
140 struct free_area free_area[MAX_ORDER]; 145 struct free_area free_area[MAX_ORDER];
141 146
142 147
@@ -220,6 +225,16 @@ struct zone {
220 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 225 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
221 unsigned long zone_start_pfn; 226 unsigned long zone_start_pfn;
222 227
228 /*
229 * zone_start_pfn, spanned_pages and present_pages are all
230 * protected by span_seqlock. It is a seqlock because it has
231 * to be read outside of zone->lock, and it is done in the main
232 * allocator path. But, it is written quite infrequently.
233 *
234 * The lock is declared along with zone->lock because it is
235 * frequently read in proximity to zone->lock. It's good to
236 * give them a chance of being in the same cacheline.
237 */
223 unsigned long spanned_pages; /* total size, including holes */ 238 unsigned long spanned_pages; /* total size, including holes */
224 unsigned long present_pages; /* amount of memory (excluding holes) */ 239 unsigned long present_pages; /* amount of memory (excluding holes) */
225 240
@@ -273,6 +288,16 @@ typedef struct pglist_data {
273 struct page *node_mem_map; 288 struct page *node_mem_map;
274#endif 289#endif
275 struct bootmem_data *bdata; 290 struct bootmem_data *bdata;
291#ifdef CONFIG_MEMORY_HOTPLUG
292 /*
293 * Must be held any time you expect node_start_pfn, node_present_pages
294 * or node_spanned_pages stay constant. Holding this will also
295 * guarantee that any pfn_valid() stays that way.
296 *
297 * Nests above zone->lock and zone->size_seqlock.
298 */
299 spinlock_t node_size_lock;
300#endif
276 unsigned long node_start_pfn; 301 unsigned long node_start_pfn;
277 unsigned long node_present_pages; /* total number of physical pages */ 302 unsigned long node_present_pages; /* total number of physical pages */
278 unsigned long node_spanned_pages; /* total size of physical page 303 unsigned long node_spanned_pages; /* total size of physical page
@@ -293,6 +318,8 @@ typedef struct pglist_data {
293#endif 318#endif
294#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) 319#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
295 320
321#include <linux/memory_hotplug.h>
322
296extern struct pglist_data *pgdat_list; 323extern struct pglist_data *pgdat_list;
297 324
298void __get_zone_counts(unsigned long *active, unsigned long *inactive, 325void __get_zone_counts(unsigned long *active, unsigned long *inactive,
@@ -509,6 +536,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
509 return NULL; 536 return NULL;
510 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; 537 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
511} 538}
539extern int __section_nr(struct mem_section* ms);
512 540
513/* 541/*
514 * We use the lower bits of the mem_map pointer to store 542 * We use the lower bits of the mem_map pointer to store
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e80fb7ee6efd..35b30e6c8cf8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -95,8 +95,8 @@ int try_to_unmap(struct page *);
95/* 95/*
96 * Called from mm/filemap_xip.c to unmap empty zero page 96 * Called from mm/filemap_xip.c to unmap empty zero page
97 */ 97 */
98pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); 98pte_t *page_check_address(struct page *, struct mm_struct *,
99 99 unsigned long, spinlock_t **);
100 100
101/* 101/*
102 * Used by swapoff to help locate where page is expected in vma. 102 * Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index b52a2af25f1f..f30f805080ae 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -61,5 +61,10 @@ extern void FASTCALL(__up_read(struct rw_semaphore *sem));
61extern void FASTCALL(__up_write(struct rw_semaphore *sem)); 61extern void FASTCALL(__up_write(struct rw_semaphore *sem));
62extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem)); 62extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
63 63
64static inline int rwsem_is_locked(struct rw_semaphore *sem)
65{
66 return (sem->activity != 0);
67}
68
64#endif /* __KERNEL__ */ 69#endif /* __KERNEL__ */
65#endif /* _LINUX_RWSEM_SPINLOCK_H */ 70#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 7f717e95ae37..66ff545552f7 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -1,14 +1,23 @@
1#ifndef _LINUX_SCATTERLIST_H 1#ifndef _LINUX_SCATTERLIST_H
2#define _LINUX_SCATTERLIST_H 2#define _LINUX_SCATTERLIST_H
3 3
4static inline void sg_init_one(struct scatterlist *sg, 4#include <asm/scatterlist.h>
5 u8 *buf, unsigned int buflen) 5#include <linux/mm.h>
6{ 6#include <linux/string.h>
7 memset(sg, 0, sizeof(*sg));
8 7
8static inline void sg_set_buf(struct scatterlist *sg, void *buf,
9 unsigned int buflen)
10{
9 sg->page = virt_to_page(buf); 11 sg->page = virt_to_page(buf);
10 sg->offset = offset_in_page(buf); 12 sg->offset = offset_in_page(buf);
11 sg->length = buflen; 13 sg->length = buflen;
12} 14}
13 15
16static inline void sg_init_one(struct scatterlist *sg, void *buf,
17 unsigned int buflen)
18{
19 memset(sg, 0, sizeof(*sg));
20 sg_set_buf(sg, buf, buflen);
21}
22
14#endif /* _LINUX_SCATTERLIST_H */ 23#endif /* _LINUX_SCATTERLIST_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 27519df0f987..1c30bc308ef1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -249,6 +249,36 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
249extern void arch_unmap_area(struct mm_struct *, unsigned long); 249extern void arch_unmap_area(struct mm_struct *, unsigned long);
250extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); 250extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
251 251
252#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
253/*
254 * The mm counters are not protected by its page_table_lock,
255 * so must be incremented atomically.
256 */
257#ifdef ATOMIC64_INIT
258#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
259#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
260#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
261#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
262#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
263typedef atomic64_t mm_counter_t;
264#else /* !ATOMIC64_INIT */
265/*
266 * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
267 * that is, at 16TB if using 4kB page size.
268 */
269#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
270#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
271#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
272#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
273#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
274typedef atomic_t mm_counter_t;
275#endif /* !ATOMIC64_INIT */
276
277#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
278/*
279 * The mm counters are protected by its page_table_lock,
280 * so can be incremented directly.
281 */
252#define set_mm_counter(mm, member, value) (mm)->_##member = (value) 282#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
253#define get_mm_counter(mm, member) ((mm)->_##member) 283#define get_mm_counter(mm, member) ((mm)->_##member)
254#define add_mm_counter(mm, member, value) (mm)->_##member += (value) 284#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
@@ -256,6 +286,20 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
256#define dec_mm_counter(mm, member) (mm)->_##member-- 286#define dec_mm_counter(mm, member) (mm)->_##member--
257typedef unsigned long mm_counter_t; 287typedef unsigned long mm_counter_t;
258 288
289#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
290
291#define get_mm_rss(mm) \
292 (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
293#define update_hiwater_rss(mm) do { \
294 unsigned long _rss = get_mm_rss(mm); \
295 if ((mm)->hiwater_rss < _rss) \
296 (mm)->hiwater_rss = _rss; \
297} while (0)
298#define update_hiwater_vm(mm) do { \
299 if ((mm)->hiwater_vm < (mm)->total_vm) \
300 (mm)->hiwater_vm = (mm)->total_vm; \
301} while (0)
302
259struct mm_struct { 303struct mm_struct {
260 struct vm_area_struct * mmap; /* list of VMAs */ 304 struct vm_area_struct * mmap; /* list of VMAs */
261 struct rb_root mm_rb; 305 struct rb_root mm_rb;
@@ -279,15 +323,20 @@ struct mm_struct {
279 * by mmlist_lock 323 * by mmlist_lock
280 */ 324 */
281 325
326 /* Special counters, in some configurations protected by the
327 * page_table_lock, in other configurations by being atomic.
328 */
329 mm_counter_t _file_rss;
330 mm_counter_t _anon_rss;
331
332 unsigned long hiwater_rss; /* High-watermark of RSS usage */
333 unsigned long hiwater_vm; /* High-water virtual memory usage */
334
335 unsigned long total_vm, locked_vm, shared_vm, exec_vm;
336 unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
282 unsigned long start_code, end_code, start_data, end_data; 337 unsigned long start_code, end_code, start_data, end_data;
283 unsigned long start_brk, brk, start_stack; 338 unsigned long start_brk, brk, start_stack;
284 unsigned long arg_start, arg_end, env_start, env_end; 339 unsigned long arg_start, arg_end, env_start, env_end;
285 unsigned long total_vm, locked_vm, shared_vm;
286 unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes;
287
288 /* Special counters protected by the page_table_lock */
289 mm_counter_t _rss;
290 mm_counter_t _anon_rss;
291 340
292 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ 341 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
293 342
@@ -308,11 +357,7 @@ struct mm_struct {
308 /* aio bits */ 357 /* aio bits */
309 rwlock_t ioctx_list_lock; 358 rwlock_t ioctx_list_lock;
310 struct kioctx *ioctx_list; 359 struct kioctx *ioctx_list;
311
312 struct kioctx default_kioctx; 360 struct kioctx default_kioctx;
313
314 unsigned long hiwater_rss; /* High-water RSS usage */
315 unsigned long hiwater_vm; /* High-water virtual memory usage */
316}; 361};
317 362
318struct sighand_struct { 363struct sighand_struct {
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3701a0673d2c..1d5577b2b752 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -32,10 +32,14 @@ struct vm_struct {
32 * Highlevel APIs for driver use 32 * Highlevel APIs for driver use
33 */ 33 */
34extern void *vmalloc(unsigned long size); 34extern void *vmalloc(unsigned long size);
35extern void *vmalloc_node(unsigned long size, int node);
35extern void *vmalloc_exec(unsigned long size); 36extern void *vmalloc_exec(unsigned long size);
36extern void *vmalloc_32(unsigned long size); 37extern void *vmalloc_32(unsigned long size);
37extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); 38extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
38extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); 39extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
40 pgprot_t prot);
41extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
42 pgprot_t prot, int node);
39extern void vfree(void *addr); 43extern void vfree(void *addr);
40 44
41extern void *vmap(struct page **pages, unsigned int count, 45extern void *vmap(struct page **pages, unsigned int count,
@@ -48,6 +52,8 @@ extern void vunmap(void *addr);
48extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); 52extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
49extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, 53extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
50 unsigned long start, unsigned long end); 54 unsigned long start, unsigned long end);
55extern struct vm_struct *get_vm_area_node(unsigned long size,
56 unsigned long flags, int node);
51extern struct vm_struct *remove_vm_area(void *addr); 57extern struct vm_struct *remove_vm_area(void *addr);
52extern struct vm_struct *__remove_vm_area(void *addr); 58extern struct vm_struct *__remove_vm_area(void *addr);
53extern int map_vm_area(struct vm_struct *area, pgprot_t prot, 59extern int map_vm_area(struct vm_struct *area, pgprot_t prot,