aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-10-30 01:56:31 -0500
committerJeff Garzik <jgarzik@pobox.com>2005-10-30 01:56:31 -0500
commit81cfb8864c73230eb1c37753aba517db15cf4d8f (patch)
tree649ff25543834cf9983ea41b93126bea97d75475 /include
parent0169e284f6b6b263cc7c2ed25986b96cd6fda610 (diff)
parent9f75e1eff3edb2bb07349b94c28f4f2a6c66ca43 (diff)
Merge branch 'master'
Diffstat (limited to 'include')
-rw-r--r--include/asm-alpha/barrier.h2
-rw-r--r--include/asm-alpha/rwsem.h5
-rw-r--r--include/asm-arm/tlb.h23
-rw-r--r--include/asm-arm26/tlb.h47
-rw-r--r--include/asm-generic/4level-fixup.h11
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/asm-generic/tlb.h23
-rw-r--r--include/asm-i386/mmzone.h6
-rw-r--r--include/asm-i386/pgtable.h3
-rw-r--r--include/asm-i386/rwsem.h5
-rw-r--r--include/asm-ia64/rwsem.h5
-rw-r--r--include/asm-ia64/tlb.h19
-rw-r--r--include/asm-m32r/mmzone.h6
-rw-r--r--include/asm-parisc/cacheflush.h35
-rw-r--r--include/asm-parisc/mmzone.h6
-rw-r--r--include/asm-parisc/tlbflush.h3
-rw-r--r--include/asm-ppc/rwsem.h5
-rw-r--r--include/asm-ppc64/mmzone.h3
-rw-r--r--include/asm-ppc64/pgtable.h4
-rw-r--r--include/asm-ppc64/rwsem.h5
-rw-r--r--include/asm-s390/rwsem.h5
-rw-r--r--include/asm-sh/rwsem.h5
-rw-r--r--include/asm-sparc64/rwsem.h5
-rw-r--r--include/asm-sparc64/tlb.h29
-rw-r--r--include/asm-um/pgtable.h2
-rw-r--r--include/asm-x86_64/rwsem.h5
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/memory.h94
-rw-r--r--include/linux/memory_hotplug.h104
-rw-r--r--include/linux/mempolicy.h7
-rw-r--r--include/linux/mm.h150
-rw-r--r--include/linux/mmzone.h28
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/rwsem-spinlock.h5
-rw-r--r--include/linux/scatterlist.h17
-rw-r--r--include/linux/sched.h65
-rw-r--r--include/linux/vmalloc.h8
38 files changed, 521 insertions, 238 deletions
diff --git a/include/asm-alpha/barrier.h b/include/asm-alpha/barrier.h
index 229c83fe77cb..681ff581afa5 100644
--- a/include/asm-alpha/barrier.h
+++ b/include/asm-alpha/barrier.h
@@ -1,6 +1,8 @@
1#ifndef __BARRIER_H 1#ifndef __BARRIER_H
2#define __BARRIER_H 2#define __BARRIER_H
3 3
4#include <asm/compiler.h>
5
4#define mb() \ 6#define mb() \
5__asm__ __volatile__("mb": : :"memory") 7__asm__ __volatile__("mb": : :"memory")
6 8
diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h
index 8e058a67c9a4..fafdd4f7010a 100644
--- a/include/asm-alpha/rwsem.h
+++ b/include/asm-alpha/rwsem.h
@@ -262,5 +262,10 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
262#endif 262#endif
263} 263}
264 264
265static inline int rwsem_is_locked(struct rw_semaphore *sem)
266{
267 return (sem->count != 0);
268}
269
265#endif /* __KERNEL__ */ 270#endif /* __KERNEL__ */
266#endif /* _ALPHA_RWSEM_H */ 271#endif /* _ALPHA_RWSEM_H */
diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h
index 9bb325c54645..f49bfb78c221 100644
--- a/include/asm-arm/tlb.h
+++ b/include/asm-arm/tlb.h
@@ -27,11 +27,7 @@
27 */ 27 */
28struct mmu_gather { 28struct mmu_gather {
29 struct mm_struct *mm; 29 struct mm_struct *mm;
30 unsigned int freed;
31 unsigned int fullmm; 30 unsigned int fullmm;
32
33 unsigned int flushes;
34 unsigned int avoided_flushes;
35}; 31};
36 32
37DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); 33DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -39,11 +35,9 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
39static inline struct mmu_gather * 35static inline struct mmu_gather *
40tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) 36tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
41{ 37{
42 int cpu = smp_processor_id(); 38 struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
43 struct mmu_gather *tlb = &per_cpu(mmu_gathers, cpu);
44 39
45 tlb->mm = mm; 40 tlb->mm = mm;
46 tlb->freed = 0;
47 tlb->fullmm = full_mm_flush; 41 tlb->fullmm = full_mm_flush;
48 42
49 return tlb; 43 return tlb;
@@ -52,24 +46,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
52static inline void 46static inline void
53tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) 47tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
54{ 48{
55 struct mm_struct *mm = tlb->mm;
56 unsigned long freed = tlb->freed;
57 int rss = get_mm_counter(mm, rss);
58
59 if (rss < freed)
60 freed = rss;
61 add_mm_counter(mm, rss, -freed);
62
63 if (tlb->fullmm) 49 if (tlb->fullmm)
64 flush_tlb_mm(mm); 50 flush_tlb_mm(tlb->mm);
65 51
66 /* keep the page table cache within bounds */ 52 /* keep the page table cache within bounds */
67 check_pgt_cache(); 53 check_pgt_cache();
68}
69 54
70static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb) 55 put_cpu_var(mmu_gathers);
71{
72 return tlb->fullmm;
73} 56}
74 57
75#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0) 58#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h
index 1316352a58f3..08ddd85b8d35 100644
--- a/include/asm-arm26/tlb.h
+++ b/include/asm-arm26/tlb.h
@@ -10,24 +10,20 @@
10 */ 10 */
11struct mmu_gather { 11struct mmu_gather {
12 struct mm_struct *mm; 12 struct mm_struct *mm;
13 unsigned int freed; 13 unsigned int need_flush;
14 unsigned int fullmm; 14 unsigned int fullmm;
15
16 unsigned int flushes;
17 unsigned int avoided_flushes;
18}; 15};
19 16
20extern struct mmu_gather mmu_gathers[NR_CPUS]; 17DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
21 18
22static inline struct mmu_gather * 19static inline struct mmu_gather *
23tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) 20tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
24{ 21{
25 int cpu = smp_processor_id(); 22 struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
26 struct mmu_gather *tlb = &mmu_gathers[cpu];
27 23
28 tlb->mm = mm; 24 tlb->mm = mm;
29 tlb->freed = 0; 25 tlb->need_flush = 0;
30 tlb->fullmm = full_mm_flush; 26 tlb->fullmm = full_mm_flush;
31 27
32 return tlb; 28 return tlb;
33} 29}
@@ -35,30 +31,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
35static inline void 31static inline void
36tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) 32tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
37{ 33{
38 struct mm_struct *mm = tlb->mm; 34 if (tlb->need_flush)
39 unsigned long freed = tlb->freed; 35 flush_tlb_mm(tlb->mm);
40 int rss = get_mm_counter(mm, rss);
41
42 if (rss < freed)
43 freed = rss;
44 add_mm_counter(mm, rss, -freed);
45
46 if (freed) {
47 flush_tlb_mm(mm);
48 tlb->flushes++;
49 } else {
50 tlb->avoided_flushes++;
51 }
52 36
53 /* keep the page table cache within bounds */ 37 /* keep the page table cache within bounds */
54 check_pgt_cache(); 38 check_pgt_cache();
55}
56
57 39
58static inline unsigned int 40 put_cpu_var(mmu_gathers);
59tlb_is_full_mm(struct mmu_gather *tlb)
60{
61 return tlb->fullmm;
62} 41}
63 42
64#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0) 43#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
@@ -71,7 +50,13 @@ tlb_is_full_mm(struct mmu_gather *tlb)
71 } while (0) 50 } while (0)
72#define tlb_end_vma(tlb,vma) do { } while (0) 51#define tlb_end_vma(tlb,vma) do { } while (0)
73 52
74#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) 53static inline void
54tlb_remove_page(struct mmu_gather *tlb, struct page *page)
55{
56 tlb->need_flush = 1;
57 free_page_and_swap_cache(page);
58}
59
75#define pte_free_tlb(tlb,ptep) pte_free(ptep) 60#define pte_free_tlb(tlb,ptep) pte_free(ptep)
76#define pmd_free_tlb(tlb,pmdp) pmd_free(pmdp) 61#define pmd_free_tlb(tlb,pmdp) pmd_free(pmdp)
77 62
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index c20ec257ecc0..68c6fea994d9 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -10,14 +10,9 @@
10 10
11#define pud_t pgd_t 11#define pud_t pgd_t
12 12
13#define pmd_alloc(mm, pud, address) \ 13#define pmd_alloc(mm, pud, address) \
14({ pmd_t *ret; \ 14 ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
15 if (pgd_none(*pud)) \ 15 NULL: pmd_offset(pud, address))
16 ret = __pmd_alloc(mm, pud, address); \
17 else \
18 ret = pmd_offset(pud, address); \
19 ret; \
20})
21 16
22#define pud_alloc(mm, pgd, address) (pgd) 17#define pud_alloc(mm, pgd, address) (pgd)
23#define pud_offset(pgd, start) (pgd) 18#define pud_offset(pgd, start) (pgd)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ff28c8b31f58..7dca30a26c53 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -8,7 +8,7 @@
8 * - update the page tables 8 * - update the page tables
9 * - inform the TLB about the new one 9 * - inform the TLB about the new one
10 * 10 *
11 * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock. 11 * We hold the mm semaphore for reading, and the pte lock.
12 * 12 *
13 * Note: the old pte is known to not be writable, so we don't need to 13 * Note: the old pte is known to not be writable, so we don't need to
14 * worry about dirty bits etc getting lost. 14 * worry about dirty bits etc getting lost.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 7d0298347ee7..cdd4145243cd 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -35,16 +35,13 @@
35#endif 35#endif
36 36
37/* struct mmu_gather is an opaque type used by the mm code for passing around 37/* struct mmu_gather is an opaque type used by the mm code for passing around
38 * any data needed by arch specific code for tlb_remove_page. This structure 38 * any data needed by arch specific code for tlb_remove_page.
39 * can be per-CPU or per-MM as the page table lock is held for the duration of
40 * TLB shootdown.
41 */ 39 */
42struct mmu_gather { 40struct mmu_gather {
43 struct mm_struct *mm; 41 struct mm_struct *mm;
44 unsigned int nr; /* set to ~0U means fast mode */ 42 unsigned int nr; /* set to ~0U means fast mode */
45 unsigned int need_flush;/* Really unmapped some ptes? */ 43 unsigned int need_flush;/* Really unmapped some ptes? */
46 unsigned int fullmm; /* non-zero means full mm flush */ 44 unsigned int fullmm; /* non-zero means full mm flush */
47 unsigned long freed;
48 struct page * pages[FREE_PTE_NR]; 45 struct page * pages[FREE_PTE_NR];
49}; 46};
50 47
@@ -57,7 +54,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
57static inline struct mmu_gather * 54static inline struct mmu_gather *
58tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) 55tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
59{ 56{
60 struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id()); 57 struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
61 58
62 tlb->mm = mm; 59 tlb->mm = mm;
63 60
@@ -65,7 +62,6 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
65 tlb->nr = num_online_cpus() > 1 ? 0U : ~0U; 62 tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
66 63
67 tlb->fullmm = full_mm_flush; 64 tlb->fullmm = full_mm_flush;
68 tlb->freed = 0;
69 65
70 return tlb; 66 return tlb;
71} 67}
@@ -85,28 +81,17 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
85 81
86/* tlb_finish_mmu 82/* tlb_finish_mmu
87 * Called at the end of the shootdown operation to free up any resources 83 * Called at the end of the shootdown operation to free up any resources
88 * that were required. The page table lock is still held at this point. 84 * that were required.
89 */ 85 */
90static inline void 86static inline void
91tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) 87tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
92{ 88{
93 int freed = tlb->freed;
94 struct mm_struct *mm = tlb->mm;
95 int rss = get_mm_counter(mm, rss);
96
97 if (rss < freed)
98 freed = rss;
99 add_mm_counter(mm, rss, -freed);
100 tlb_flush_mmu(tlb, start, end); 89 tlb_flush_mmu(tlb, start, end);
101 90
102 /* keep the page table cache within bounds */ 91 /* keep the page table cache within bounds */
103 check_pgt_cache(); 92 check_pgt_cache();
104}
105 93
106static inline unsigned int 94 put_cpu_var(mmu_gathers);
107tlb_is_full_mm(struct mmu_gather *tlb)
108{
109 return tlb->fullmm;
110} 95}
111 96
112/* tlb_remove_page 97/* tlb_remove_page
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 348fe3a4879d..620a90641ea8 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -88,12 +88,6 @@ static inline int pfn_to_nid(unsigned long pfn)
88 __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ 88 __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
89}) 89})
90 90
91#define local_mapnr(kvaddr) \
92({ \
93 unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \
94 (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \
95})
96
97/* XXX: FIXME -- wli */ 91/* XXX: FIXME -- wli */
98#define kern_addr_valid(kaddr) (0) 92#define kern_addr_valid(kaddr) (0)
99 93
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index d101ac414f07..0e3ec809352d 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -203,7 +203,8 @@ extern unsigned long pg0[];
203#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) 203#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
204#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) 204#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
205 205
206#define pmd_none(x) (!pmd_val(x)) 206/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
207#define pmd_none(x) (!(unsigned long)pmd_val(x))
207#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) 208#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
208#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 209#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
209#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) 210#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index 7625a675852f..be4ab859238e 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -284,5 +284,10 @@ LOCK_PREFIX "xadd %0,(%2)"
284 return tmp+delta; 284 return tmp+delta;
285} 285}
286 286
287static inline int rwsem_is_locked(struct rw_semaphore *sem)
288{
289 return (sem->count != 0);
290}
291
287#endif /* __KERNEL__ */ 292#endif /* __KERNEL__ */
288#endif /* _I386_RWSEM_H */ 293#endif /* _I386_RWSEM_H */
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index e18b5ab0cb75..1327c91ea39c 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -186,4 +186,9 @@ __downgrade_write (struct rw_semaphore *sem)
186#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) 186#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
187#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) 187#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
188 188
189static inline int rwsem_is_locked(struct rw_semaphore *sem)
190{
191 return (sem->count != 0);
192}
193
189#endif /* _ASM_IA64_RWSEM_H */ 194#endif /* _ASM_IA64_RWSEM_H */
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 3a9a6d1be75c..834370b9dea1 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -60,7 +60,6 @@ struct mmu_gather {
60 unsigned int nr; /* == ~0U => fast mode */ 60 unsigned int nr; /* == ~0U => fast mode */
61 unsigned char fullmm; /* non-zero means full mm flush */ 61 unsigned char fullmm; /* non-zero means full mm flush */
62 unsigned char need_flush; /* really unmapped some PTEs? */ 62 unsigned char need_flush; /* really unmapped some PTEs? */
63 unsigned long freed; /* number of pages freed */
64 unsigned long start_addr; 63 unsigned long start_addr;
65 unsigned long end_addr; 64 unsigned long end_addr;
66 struct page *pages[FREE_PTE_NR]; 65 struct page *pages[FREE_PTE_NR];
@@ -129,7 +128,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e
129static inline struct mmu_gather * 128static inline struct mmu_gather *
130tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush) 129tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
131{ 130{
132 struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers); 131 struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
133 132
134 tlb->mm = mm; 133 tlb->mm = mm;
135 /* 134 /*
@@ -147,25 +146,17 @@ tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
147 */ 146 */
148 tlb->nr = (num_online_cpus() == 1) ? ~0U : 0; 147 tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
149 tlb->fullmm = full_mm_flush; 148 tlb->fullmm = full_mm_flush;
150 tlb->freed = 0;
151 tlb->start_addr = ~0UL; 149 tlb->start_addr = ~0UL;
152 return tlb; 150 return tlb;
153} 151}
154 152
155/* 153/*
156 * Called at the end of the shootdown operation to free up any resources that were 154 * Called at the end of the shootdown operation to free up any resources that were
157 * collected. The page table lock is still held at this point. 155 * collected.
158 */ 156 */
159static inline void 157static inline void
160tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) 158tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
161{ 159{
162 unsigned long freed = tlb->freed;
163 struct mm_struct *mm = tlb->mm;
164 unsigned long rss = get_mm_counter(mm, rss);
165
166 if (rss < freed)
167 freed = rss;
168 add_mm_counter(mm, rss, -freed);
169 /* 160 /*
170 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and 161 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
171 * tlb->end_addr. 162 * tlb->end_addr.
@@ -174,12 +165,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
174 165
175 /* keep the page table cache within bounds */ 166 /* keep the page table cache within bounds */
176 check_pgt_cache(); 167 check_pgt_cache();
177}
178 168
179static inline unsigned int 169 put_cpu_var(mmu_gathers);
180tlb_is_full_mm(struct mmu_gather *tlb)
181{
182 return tlb->fullmm;
183} 170}
184 171
185/* 172/*
diff --git a/include/asm-m32r/mmzone.h b/include/asm-m32r/mmzone.h
index d58878ec899e..adc7970a77ec 100644
--- a/include/asm-m32r/mmzone.h
+++ b/include/asm-m32r/mmzone.h
@@ -21,12 +21,6 @@ extern struct pglist_data *node_data[];
21 __pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1; \ 21 __pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1; \
22}) 22})
23 23
24#define local_mapnr(kvaddr) \
25({ \
26 unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \
27 (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \
28})
29
30#define pfn_to_page(pfn) \ 24#define pfn_to_page(pfn) \
31({ \ 25({ \
32 unsigned long __pfn = pfn; \ 26 unsigned long __pfn = pfn; \
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index aa592d8c0e39..1bc3c83ee74b 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -100,30 +100,34 @@ static inline void flush_cache_range(struct vm_area_struct *vma,
100 100
101/* Simple function to work out if we have an existing address translation 101/* Simple function to work out if we have an existing address translation
102 * for a user space vma. */ 102 * for a user space vma. */
103static inline pte_t *__translation_exists(struct mm_struct *mm, 103static inline int translation_exists(struct vm_area_struct *vma,
104 unsigned long addr) 104 unsigned long addr, unsigned long pfn)
105{ 105{
106 pgd_t *pgd = pgd_offset(mm, addr); 106 pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
107 pmd_t *pmd; 107 pmd_t *pmd;
108 pte_t *pte; 108 pte_t pte;
109 109
110 if(pgd_none(*pgd)) 110 if(pgd_none(*pgd))
111 return NULL; 111 return 0;
112 112
113 pmd = pmd_offset(pgd, addr); 113 pmd = pmd_offset(pgd, addr);
114 if(pmd_none(*pmd) || pmd_bad(*pmd)) 114 if(pmd_none(*pmd) || pmd_bad(*pmd))
115 return NULL; 115 return 0;
116 116
117 pte = pte_offset_map(pmd, addr); 117 /* We cannot take the pte lock here: flush_cache_page is usually
118 * called with pte lock already held. Whereas flush_dcache_page
119 * takes flush_dcache_mmap_lock, which is lower in the hierarchy:
120 * the vma itself is secure, but the pte might come or go racily.
121 */
122 pte = *pte_offset_map(pmd, addr);
123 /* But pte_unmap() does nothing on this architecture */
118 124
119 /* The PA flush mappings show up as pte_none, but they're 125 /* Filter out coincidental file entries and swap entries */
120 * valid none the less */ 126 if (!(pte_val(pte) & (_PAGE_FLUSH|_PAGE_PRESENT)))
121 if(pte_none(*pte) && ((pte_val(*pte) & _PAGE_FLUSH) == 0)) 127 return 0;
122 return NULL;
123 return pte;
124}
125#define translation_exists(vma, addr) __translation_exists((vma)->vm_mm, addr)
126 128
129 return pte_pfn(pte) == pfn;
130}
127 131
128/* Private function to flush a page from the cache of a non-current 132/* Private function to flush a page from the cache of a non-current
129 * process. cr25 contains the Page Directory of the current user 133 * process. cr25 contains the Page Directory of the current user
@@ -175,9 +179,8 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
175{ 179{
176 BUG_ON(!vma->vm_mm->context); 180 BUG_ON(!vma->vm_mm->context);
177 181
178 if(likely(translation_exists(vma, vmaddr))) 182 if (likely(translation_exists(vma, vmaddr, pfn)))
179 __flush_cache_page(vma, vmaddr); 183 __flush_cache_page(vma, vmaddr);
180 184
181} 185}
182#endif 186#endif
183
diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h
index 595d3dce120a..ae039f4fd711 100644
--- a/include/asm-parisc/mmzone.h
+++ b/include/asm-parisc/mmzone.h
@@ -27,12 +27,6 @@ extern struct node_map_data node_data[];
27}) 27})
28#define node_localnr(pfn, nid) ((pfn) - node_start_pfn(nid)) 28#define node_localnr(pfn, nid) ((pfn) - node_start_pfn(nid))
29 29
30#define local_mapnr(kvaddr) \
31({ \
32 unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \
33 (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \
34})
35
36#define pfn_to_page(pfn) \ 30#define pfn_to_page(pfn) \
37({ \ 31({ \
38 unsigned long __pfn = (pfn); \ 32 unsigned long __pfn = (pfn); \
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
index 84af4ab1fe51..e97aa8d1eff5 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/include/asm-parisc/tlbflush.h
@@ -88,7 +88,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
88 if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ 88 if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */
89 flush_tlb_all(); 89 flush_tlb_all();
90 else { 90 else {
91 91 preempt_disable();
92 mtsp(vma->vm_mm->context,1); 92 mtsp(vma->vm_mm->context,1);
93 purge_tlb_start(); 93 purge_tlb_start();
94 if (split_tlb) { 94 if (split_tlb) {
@@ -102,6 +102,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
102 pdtlb(start); 102 pdtlb(start);
103 start += PAGE_SIZE; 103 start += PAGE_SIZE;
104 } 104 }
105 preempt_enable();
105 } 106 }
106 purge_tlb_end(); 107 purge_tlb_end();
107 } 108 }
diff --git a/include/asm-ppc/rwsem.h b/include/asm-ppc/rwsem.h
index 3e738f483c11..3501ea72f88c 100644
--- a/include/asm-ppc/rwsem.h
+++ b/include/asm-ppc/rwsem.h
@@ -168,5 +168,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
168 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 168 return atomic_add_return(delta, (atomic_t *)(&sem->count));
169} 169}
170 170
171static inline int rwsem_is_locked(struct rw_semaphore *sem)
172{
173 return (sem->count != 0);
174}
175
171#endif /* __KERNEL__ */ 176#endif /* __KERNEL__ */
172#endif /* _PPC_RWSEM_XADD_H */ 177#endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h
index ed473f4b0152..80a708e7093a 100644
--- a/include/asm-ppc64/mmzone.h
+++ b/include/asm-ppc64/mmzone.h
@@ -67,9 +67,6 @@ static inline int pa_to_nid(unsigned long pa)
67#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 67#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
68#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) 68#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn)
69 69
70#define local_mapnr(kvaddr) \
71 ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr))
72
73#ifdef CONFIG_DISCONTIGMEM 70#ifdef CONFIG_DISCONTIGMEM
74 71
75/* 72/*
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index c83679c9d2b0..2eb1778a3a15 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -478,10 +478,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
478#define __HAVE_ARCH_PTE_SAME 478#define __HAVE_ARCH_PTE_SAME
479#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) 479#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
480 480
481#define pte_ERROR(e) \
482 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
481#define pmd_ERROR(e) \ 483#define pmd_ERROR(e) \
482 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) 484 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
483#define pud_ERROR(e) \ 485#define pud_ERROR(e) \
484 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) 486 printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
485#define pgd_ERROR(e) \ 487#define pgd_ERROR(e) \
486 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 488 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
487 489
diff --git a/include/asm-ppc64/rwsem.h b/include/asm-ppc64/rwsem.h
index bd5c2f093575..7a647fae3765 100644
--- a/include/asm-ppc64/rwsem.h
+++ b/include/asm-ppc64/rwsem.h
@@ -163,5 +163,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
163 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 163 return atomic_add_return(delta, (atomic_t *)(&sem->count));
164} 164}
165 165
166static inline int rwsem_is_locked(struct rw_semaphore *sem)
167{
168 return (sem->count != 0);
169}
170
166#endif /* __KERNEL__ */ 171#endif /* __KERNEL__ */
167#endif /* _PPC_RWSEM_XADD_H */ 172#endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 8c0cebbfc034..0422a085dd56 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -351,5 +351,10 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
351 return new; 351 return new;
352} 352}
353 353
354static inline int rwsem_is_locked(struct rw_semaphore *sem)
355{
356 return (sem->count != 0);
357}
358
354#endif /* __KERNEL__ */ 359#endif /* __KERNEL__ */
355#endif /* _S390_RWSEM_H */ 360#endif /* _S390_RWSEM_H */
diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h
index 1be4337f5259..0262d3d1e5e0 100644
--- a/include/asm-sh/rwsem.h
+++ b/include/asm-sh/rwsem.h
@@ -166,5 +166,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
166 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 166 return atomic_add_return(delta, (atomic_t *)(&sem->count));
167} 167}
168 168
169static inline int rwsem_is_locked(struct rw_semaphore *sem)
170{
171 return (sem->count != 0);
172}
173
169#endif /* __KERNEL__ */ 174#endif /* __KERNEL__ */
170#endif /* _ASM_SH_RWSEM_H */ 175#endif /* _ASM_SH_RWSEM_H */
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h
index 4568ee4022df..cef5e8270421 100644
--- a/include/asm-sparc64/rwsem.h
+++ b/include/asm-sparc64/rwsem.h
@@ -56,6 +56,11 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
56 atomic_add(delta, (atomic_t *)(&sem->count)); 56 atomic_add(delta, (atomic_t *)(&sem->count));
57} 57}
58 58
59static inline int rwsem_is_locked(struct rw_semaphore *sem)
60{
61 return (sem->count != 0);
62}
63
59#endif /* __KERNEL__ */ 64#endif /* __KERNEL__ */
60 65
61#endif /* _SPARC64_RWSEM_H */ 66#endif /* _SPARC64_RWSEM_H */
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 9baf57db01d2..66138d959df5 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -25,9 +25,8 @@ struct mmu_gather {
25 struct mm_struct *mm; 25 struct mm_struct *mm;
26 unsigned int pages_nr; 26 unsigned int pages_nr;
27 unsigned int need_flush; 27 unsigned int need_flush;
28 unsigned int tlb_frozen; 28 unsigned int fullmm;
29 unsigned int tlb_nr; 29 unsigned int tlb_nr;
30 unsigned long freed;
31 unsigned long vaddrs[TLB_BATCH_NR]; 30 unsigned long vaddrs[TLB_BATCH_NR];
32 struct page *pages[FREE_PTE_NR]; 31 struct page *pages[FREE_PTE_NR];
33}; 32};
@@ -44,14 +43,13 @@ extern void flush_tlb_pending(void);
44 43
45static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) 44static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
46{ 45{
47 struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); 46 struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
48 47
49 BUG_ON(mp->tlb_nr); 48 BUG_ON(mp->tlb_nr);
50 49
51 mp->mm = mm; 50 mp->mm = mm;
52 mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U; 51 mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
53 mp->tlb_frozen = full_mm_flush; 52 mp->fullmm = full_mm_flush;
54 mp->freed = 0;
55 53
56 return mp; 54 return mp;
57} 55}
@@ -78,30 +76,19 @@ extern void smp_flush_tlb_mm(struct mm_struct *mm);
78 76
79static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end) 77static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
80{ 78{
81 unsigned long freed = mp->freed;
82 struct mm_struct *mm = mp->mm;
83 unsigned long rss = get_mm_counter(mm, rss);
84
85 if (rss < freed)
86 freed = rss;
87 add_mm_counter(mm, rss, -freed);
88
89 tlb_flush_mmu(mp); 79 tlb_flush_mmu(mp);
90 80
91 if (mp->tlb_frozen) { 81 if (mp->fullmm) {
92 if (CTX_VALID(mm->context)) 82 if (CTX_VALID(mp->mm->context))
93 do_flush_tlb_mm(mm); 83 do_flush_tlb_mm(mp->mm);
94 mp->tlb_frozen = 0; 84 mp->fullmm = 0;
95 } else 85 } else
96 flush_tlb_pending(); 86 flush_tlb_pending();
97 87
98 /* keep the page table cache within bounds */ 88 /* keep the page table cache within bounds */
99 check_pgt_cache(); 89 check_pgt_cache();
100}
101 90
102static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp) 91 put_cpu_var(mmu_gathers);
103{
104 return mp->tlb_frozen;
105} 92}
106 93
107static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page) 94static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index 616d02b57ea9..ac64eb955868 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -138,7 +138,7 @@ extern unsigned long pg0[1024];
138 138
139#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) 139#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
140 140
141#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) 141#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE))
142#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) 142#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
143#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) 143#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
144#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) 144#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h
index c002175b6e82..46077e9c1910 100644
--- a/include/asm-x86_64/rwsem.h
+++ b/include/asm-x86_64/rwsem.h
@@ -274,5 +274,10 @@ LOCK_PREFIX "xaddl %0,(%2)"
274 return tmp+delta; 274 return tmp+delta;
275} 275}
276 276
277static inline int rwsem_is_locked(struct rw_semaphore *sem)
278{
279 return (sem->count != 0);
280}
281
277#endif /* __KERNEL__ */ 282#endif /* __KERNEL__ */
278#endif /* _X8664_RWSEM_H */ 283#endif /* _X8664_RWSEM_H */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 88af42f5e04a..c937d6e65502 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp)
126/* If we *know* page->private refers to buffer_heads */ 126/* If we *know* page->private refers to buffer_heads */
127#define page_buffers(page) \ 127#define page_buffers(page) \
128 ({ \ 128 ({ \
129 BUG_ON(!PagePrivate(page)); \ 129 BUG_ON(!PagePrivate(page)); \
130 ((struct buffer_head *)(page)->private); \ 130 ((struct buffer_head *)page_private(page)); \
131 }) 131 })
132#define page_has_buffers(page) PagePrivate(page) 132#define page_has_buffers(page) PagePrivate(page)
133 133
@@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page,
219{ 219{
220 page_cache_get(page); 220 page_cache_get(page);
221 SetPagePrivate(page); 221 SetPagePrivate(page);
222 page->private = (unsigned long)head; 222 set_page_private(page, (unsigned long)head);
223} 223}
224 224
225static inline void get_bh(struct buffer_head *bh) 225static inline void get_bh(struct buffer_head *bh)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d664330d900e..0cea162b08c0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
16int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 16int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
17int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 17int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
18int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); 18int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
19void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
20void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); 19void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
21int hugetlb_prefault(struct address_space *, struct vm_area_struct *); 20int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
22int hugetlb_report_meminfo(char *); 21int hugetlb_report_meminfo(char *);
@@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void)
87#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) 86#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
88#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) 87#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
89#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) 88#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
90#define zap_hugepage_range(vma, start, len) BUG()
91#define unmap_hugepage_range(vma, start, end) BUG() 89#define unmap_hugepage_range(vma, start, end) BUG()
92#define is_hugepage_mem_enough(size) 0 90#define is_hugepage_mem_enough(size) 0
93#define hugetlb_report_meminfo(buf) 0 91#define hugetlb_report_meminfo(buf) 0
diff --git a/include/linux/memory.h b/include/linux/memory.h
new file mode 100644
index 000000000000..0def328ab5cf
--- /dev/null
+++ b/include/linux/memory.h
@@ -0,0 +1,94 @@
1/*
2 * include/linux/memory.h - generic memory definition
3 *
4 * This is mainly for topological representation. We define the
5 * basic "struct memory_block" here, which can be embedded in per-arch
6 * definitions or NUMA information.
7 *
8 * Basic handling of the devices is done in drivers/base/memory.c
9 * and system devices are handled in drivers/base/sys.c.
10 *
11 * Memory block are exported via sysfs in the class/memory/devices/
12 * directory.
13 *
14 */
15#ifndef _LINUX_MEMORY_H_
16#define _LINUX_MEMORY_H_
17
18#include <linux/sysdev.h>
19#include <linux/node.h>
20#include <linux/compiler.h>
21
22#include <asm/semaphore.h>
23
24struct memory_block {
25 unsigned long phys_index;
26 unsigned long state;
27 /*
28 * This serializes all state change requests. It isn't
29 * held during creation because the control files are
30 * created long after the critical areas during
31 * initialization.
32 */
33 struct semaphore state_sem;
34 int phys_device; /* to which fru does this belong? */
35 void *hw; /* optional pointer to fw/hw data */
36 int (*phys_callback)(struct memory_block *);
37 struct sys_device sysdev;
38};
39
40/* These states are exposed to userspace as text strings in sysfs */
41#define MEM_ONLINE (1<<0) /* exposed to userspace */
42#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
43#define MEM_OFFLINE (1<<2) /* exposed to userspace */
44
45/*
46 * All of these states are currently kernel-internal for notifying
47 * kernel components and architectures.
48 *
49 * For MEM_MAPPING_INVALID, all notifier chains with priority >0
50 * are called before pfn_to_page() becomes invalid. The priority=0
51 * entry is reserved for the function that actually makes
52 * pfn_to_page() stop working. Any notifiers that want to be called
53 * after that should have priority <0.
54 */
55#define MEM_MAPPING_INVALID (1<<3)
56
57#ifndef CONFIG_MEMORY_HOTPLUG
58static inline int memory_dev_init(void)
59{
60 return 0;
61}
62static inline int register_memory_notifier(struct notifier_block *nb)
63{
64 return 0;
65}
66static inline void unregister_memory_notifier(struct notifier_block *nb)
67{
68}
69#else
70extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
71extern int register_new_memory(struct mem_section *);
72extern int unregister_memory_section(struct mem_section *);
73extern int memory_dev_init(void);
74extern int register_memory_notifier(struct notifier_block *nb);
75extern void unregister_memory_notifier(struct notifier_block *nb);
76
77#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
78
79extern int invalidate_phys_mapping(unsigned long, unsigned long);
80struct notifier_block;
81
82extern int register_memory_notifier(struct notifier_block *nb);
83extern void unregister_memory_notifier(struct notifier_block *nb);
84
85extern struct sysdev_class memory_sysdev_class;
86#endif /* CONFIG_MEMORY_HOTPLUG */
87
88#define hotplug_memory_notifier(fn, pri) { \
89 static struct notifier_block fn##_mem_nb = \
90 { .notifier_call = fn, .priority = pri }; \
91 register_memory_notifier(&fn##_mem_nb); \
92}
93
94#endif /* _LINUX_MEMORY_H_ */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
new file mode 100644
index 000000000000..01f03bc06eff
--- /dev/null
+++ b/include/linux/memory_hotplug.h
@@ -0,0 +1,104 @@
1#ifndef __LINUX_MEMORY_HOTPLUG_H
2#define __LINUX_MEMORY_HOTPLUG_H
3
4#include <linux/mmzone.h>
5#include <linux/spinlock.h>
6#include <linux/mmzone.h>
7#include <linux/notifier.h>
8
9#ifdef CONFIG_MEMORY_HOTPLUG
10/*
11 * pgdat resizing functions
12 */
13static inline
14void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
15{
16 spin_lock_irqsave(&pgdat->node_size_lock, *flags);
17}
18static inline
19void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
20{
21 spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
22}
23static inline
24void pgdat_resize_init(struct pglist_data *pgdat)
25{
26 spin_lock_init(&pgdat->node_size_lock);
27}
28/*
29 * Zone resizing functions
30 */
31static inline unsigned zone_span_seqbegin(struct zone *zone)
32{
33 return read_seqbegin(&zone->span_seqlock);
34}
35static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
36{
37 return read_seqretry(&zone->span_seqlock, iv);
38}
39static inline void zone_span_writelock(struct zone *zone)
40{
41 write_seqlock(&zone->span_seqlock);
42}
43static inline void zone_span_writeunlock(struct zone *zone)
44{
45 write_sequnlock(&zone->span_seqlock);
46}
47static inline void zone_seqlock_init(struct zone *zone)
48{
49 seqlock_init(&zone->span_seqlock);
50}
51extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
52extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
53extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
54/* need some defines for these for archs that don't support it */
55extern void online_page(struct page *page);
56/* VM interface that may be used by firmware interface */
57extern int add_memory(u64 start, u64 size);
58extern int remove_memory(u64 start, u64 size);
59extern int online_pages(unsigned long, unsigned long);
60
61/* reasonably generic interface to expand the physical pages in a zone */
62extern int __add_pages(struct zone *zone, unsigned long start_pfn,
63 unsigned long nr_pages);
64#else /* ! CONFIG_MEMORY_HOTPLUG */
65/*
66 * Stub functions for when hotplug is off
67 */
68static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
69static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
70static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
71
72static inline unsigned zone_span_seqbegin(struct zone *zone)
73{
74 return 0;
75}
76static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
77{
78 return 0;
79}
80static inline void zone_span_writelock(struct zone *zone) {}
81static inline void zone_span_writeunlock(struct zone *zone) {}
82static inline void zone_seqlock_init(struct zone *zone) {}
83
84static inline int mhp_notimplemented(const char *func)
85{
86 printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
87 dump_stack();
88 return -ENOSYS;
89}
90
91static inline int __add_pages(struct zone *zone, unsigned long start_pfn,
92 unsigned long nr_pages)
93{
94 return mhp_notimplemented(__FUNCTION__);
95}
96#endif /* ! CONFIG_MEMORY_HOTPLUG */
97static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
98 unsigned long nr_pages)
99{
100 printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__);
101 dump_stack();
102 return -ENOSYS;
103}
104#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 58385ee1c0ac..7af8cb836e78 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -27,10 +27,10 @@
27 27
28#include <linux/config.h> 28#include <linux/config.h>
29#include <linux/mmzone.h> 29#include <linux/mmzone.h>
30#include <linux/bitmap.h>
31#include <linux/slab.h> 30#include <linux/slab.h>
32#include <linux/rbtree.h> 31#include <linux/rbtree.h>
33#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <linux/nodemask.h>
34 34
35struct vm_area_struct; 35struct vm_area_struct;
36 36
@@ -47,8 +47,7 @@ struct vm_area_struct;
47 * Locking policy for interlave: 47 * Locking policy for interlave:
48 * In process context there is no locking because only the process accesses 48 * In process context there is no locking because only the process accesses
49 * its own state. All vma manipulation is somewhat protected by a down_read on 49 * its own state. All vma manipulation is somewhat protected by a down_read on
50 * mmap_sem. For allocating in the interleave policy the page_table_lock 50 * mmap_sem.
51 * must be also aquired to protect il_next.
52 * 51 *
53 * Freeing policy: 52 * Freeing policy:
54 * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. 53 * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd.
@@ -63,7 +62,7 @@ struct mempolicy {
63 union { 62 union {
64 struct zonelist *zonelist; /* bind */ 63 struct zonelist *zonelist; /* bind */
65 short preferred_node; /* preferred */ 64 short preferred_node; /* preferred */
66 DECLARE_BITMAP(nodes, MAX_NUMNODES); /* interleave */ 65 nodemask_t nodes; /* interleave */
67 /* undefined for default */ 66 /* undefined for default */
68 } v; 67 } v;
69}; 68};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1649578fb0c..5c1fb0a2e806 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);
157 157
158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
160#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ 160#define VM_RESERVED 0x00080000 /* Pages managed in a special way */
161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
@@ -226,13 +226,18 @@ struct page {
226 * to show when page is mapped 226 * to show when page is mapped
227 * & limit reverse map searches. 227 * & limit reverse map searches.
228 */ 228 */
229 unsigned long private; /* Mapping-private opaque data: 229 union {
230 unsigned long private; /* Mapping-private opaque data:
230 * usually used for buffer_heads 231 * usually used for buffer_heads
231 * if PagePrivate set; used for 232 * if PagePrivate set; used for
232 * swp_entry_t if PageSwapCache 233 * swp_entry_t if PageSwapCache
233 * When page is free, this indicates 234 * When page is free, this indicates
234 * order in the buddy system. 235 * order in the buddy system.
235 */ 236 */
237#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
238 spinlock_t ptl;
239#endif
240 } u;
236 struct address_space *mapping; /* If low bit clear, points to 241 struct address_space *mapping; /* If low bit clear, points to
237 * inode address_space, or NULL. 242 * inode address_space, or NULL.
238 * If page mapped as anonymous 243 * If page mapped as anonymous
@@ -260,6 +265,9 @@ struct page {
260#endif /* WANT_PAGE_VIRTUAL */ 265#endif /* WANT_PAGE_VIRTUAL */
261}; 266};
262 267
268#define page_private(page) ((page)->u.private)
269#define set_page_private(page, v) ((page)->u.private = (v))
270
263/* 271/*
264 * FIXME: take this include out, include page-flags.h in 272 * FIXME: take this include out, include page-flags.h in
265 * files which need it (119 of them) 273 * files which need it (119 of them)
@@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *));
311 319
312#ifdef CONFIG_HUGETLB_PAGE 320#ifdef CONFIG_HUGETLB_PAGE
313 321
314static inline int page_count(struct page *p) 322static inline int page_count(struct page *page)
315{ 323{
316 if (PageCompound(p)) 324 if (PageCompound(page))
317 p = (struct page *)p->private; 325 page = (struct page *)page_private(page);
318 return atomic_read(&(p)->_count) + 1; 326 return atomic_read(&page->_count) + 1;
319} 327}
320 328
321static inline void get_page(struct page *page) 329static inline void get_page(struct page *page)
322{ 330{
323 if (unlikely(PageCompound(page))) 331 if (unlikely(PageCompound(page)))
324 page = (struct page *)page->private; 332 page = (struct page *)page_private(page);
325 atomic_inc(&page->_count); 333 atomic_inc(&page->_count);
326} 334}
327 335
@@ -338,7 +346,7 @@ static inline void get_page(struct page *page)
338 346
339static inline void put_page(struct page *page) 347static inline void put_page(struct page *page)
340{ 348{
341 if (!PageReserved(page) && put_page_testzero(page)) 349 if (put_page_testzero(page))
342 __page_cache_release(page); 350 __page_cache_release(page);
343} 351}
344 352
@@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page)
587static inline pgoff_t page_index(struct page *page) 595static inline pgoff_t page_index(struct page *page)
588{ 596{
589 if (unlikely(PageSwapCache(page))) 597 if (unlikely(PageSwapCache(page)))
590 return page->private; 598 return page_private(page);
591 return page->index; 599 return page->index;
592} 600}
593 601
@@ -682,7 +690,7 @@ struct zap_details {
682 690
683unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 691unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
684 unsigned long size, struct zap_details *); 692 unsigned long size, struct zap_details *);
685unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm, 693unsigned long unmap_vmas(struct mmu_gather **tlb,
686 struct vm_area_struct *start_vma, unsigned long start_addr, 694 struct vm_area_struct *start_vma, unsigned long start_addr,
687 unsigned long end_addr, unsigned long *nr_accounted, 695 unsigned long end_addr, unsigned long *nr_accounted,
688 struct zap_details *); 696 struct zap_details *);
@@ -704,10 +712,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
704} 712}
705 713
706extern int vmtruncate(struct inode * inode, loff_t offset); 714extern int vmtruncate(struct inode * inode, loff_t offset);
707extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
708extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
709extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
710extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
711extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); 715extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
712extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); 716extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
713extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); 717extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
@@ -723,6 +727,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
723 727
724int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 728int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
725 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 729 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
730void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
726 731
727int __set_page_dirty_buffers(struct page *page); 732int __set_page_dirty_buffers(struct page *page);
728int __set_page_dirty_nobuffers(struct page *page); 733int __set_page_dirty_nobuffers(struct page *page);
@@ -759,38 +764,83 @@ struct shrinker;
759extern struct shrinker *set_shrinker(int, shrinker_t); 764extern struct shrinker *set_shrinker(int, shrinker_t);
760extern void remove_shrinker(struct shrinker *shrinker); 765extern void remove_shrinker(struct shrinker *shrinker);
761 766
762/* 767int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
763 * On a two-level or three-level page table, this ends up being trivial. Thus 768int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
764 * the inlining and the symmetry break with pte_alloc_map() that does all 769int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
765 * of this out-of-line. 770int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
766 */ 771
767/* 772/*
768 * The following ifdef needed to get the 4level-fixup.h header to work. 773 * The following ifdef needed to get the 4level-fixup.h header to work.
769 * Remove it when 4level-fixup.h has been removed. 774 * Remove it when 4level-fixup.h has been removed.
770 */ 775 */
771#ifdef CONFIG_MMU 776#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
772#ifndef __ARCH_HAS_4LEVEL_HACK
773static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 777static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
774{ 778{
775 if (pgd_none(*pgd)) 779 return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
776 return __pud_alloc(mm, pgd, address); 780 NULL: pud_offset(pgd, address);
777 return pud_offset(pgd, address);
778} 781}
779 782
780static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 783static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
781{ 784{
782 if (pud_none(*pud)) 785 return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
783 return __pmd_alloc(mm, pud, address); 786 NULL: pmd_offset(pud, address);
784 return pmd_offset(pud, address);
785} 787}
786#endif 788#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
787#endif /* CONFIG_MMU */ 789
790#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
791/*
792 * We tuck a spinlock to guard each pagetable page into its struct page,
793 * at page->private, with BUILD_BUG_ON to make sure that this will not
794 * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
795 * When freeing, reset page->mapping so free_pages_check won't complain.
796 */
797#define __pte_lockptr(page) &((page)->u.ptl)
798#define pte_lock_init(_page) do { \
799 spin_lock_init(__pte_lockptr(_page)); \
800} while (0)
801#define pte_lock_deinit(page) ((page)->mapping = NULL)
802#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
803#else
804/*
805 * We use mm->page_table_lock to guard all pagetable pages of the mm.
806 */
807#define pte_lock_init(page) do {} while (0)
808#define pte_lock_deinit(page) do {} while (0)
809#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
810#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
811
812#define pte_offset_map_lock(mm, pmd, address, ptlp) \
813({ \
814 spinlock_t *__ptl = pte_lockptr(mm, pmd); \
815 pte_t *__pte = pte_offset_map(pmd, address); \
816 *(ptlp) = __ptl; \
817 spin_lock(__ptl); \
818 __pte; \
819})
820
821#define pte_unmap_unlock(pte, ptl) do { \
822 spin_unlock(ptl); \
823 pte_unmap(pte); \
824} while (0)
825
826#define pte_alloc_map(mm, pmd, address) \
827 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
828 NULL: pte_offset_map(pmd, address))
829
830#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
831 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
832 NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
833
834#define pte_alloc_kernel(pmd, address) \
835 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
836 NULL: pte_offset_kernel(pmd, address))
788 837
789extern void free_area_init(unsigned long * zones_size); 838extern void free_area_init(unsigned long * zones_size);
790extern void free_area_init_node(int nid, pg_data_t *pgdat, 839extern void free_area_init_node(int nid, pg_data_t *pgdat,
791 unsigned long * zones_size, unsigned long zone_start_pfn, 840 unsigned long * zones_size, unsigned long zone_start_pfn,
792 unsigned long *zholes_size); 841 unsigned long *zholes_size);
793extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); 842extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
843extern void setup_per_zone_pages_min(void);
794extern void mem_init(void); 844extern void mem_init(void);
795extern void show_mem(void); 845extern void show_mem(void);
796extern void si_meminfo(struct sysinfo * val); 846extern void si_meminfo(struct sysinfo * val);
@@ -834,6 +884,7 @@ extern int split_vma(struct mm_struct *,
834extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 884extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
835extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, 885extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
836 struct rb_node **, struct rb_node *); 886 struct rb_node **, struct rb_node *);
887extern void unlink_file_vma(struct vm_area_struct *);
837extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 888extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
838 unsigned long addr, unsigned long len, pgoff_t pgoff); 889 unsigned long addr, unsigned long len, pgoff_t pgoff);
839extern void exit_mmap(struct mm_struct *); 890extern void exit_mmap(struct mm_struct *);
@@ -894,7 +945,8 @@ void handle_ra_miss(struct address_space *mapping,
894unsigned long max_sane_readahead(unsigned long nr); 945unsigned long max_sane_readahead(unsigned long nr);
895 946
896/* Do stack extension */ 947/* Do stack extension */
897extern int expand_stack(struct vm_area_struct * vma, unsigned long address); 948extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
949extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
898 950
899/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 951/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
900extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 952extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -917,40 +969,28 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
917 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 969 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
918} 970}
919 971
920extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); 972struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
973struct page *vmalloc_to_page(void *addr);
974unsigned long vmalloc_to_pfn(void *addr);
975int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
976 unsigned long pfn, unsigned long size, pgprot_t);
921 977
922extern struct page * vmalloc_to_page(void *addr); 978struct page *follow_page(struct mm_struct *, unsigned long address,
923extern unsigned long vmalloc_to_pfn(void *addr); 979 unsigned int foll_flags);
924extern struct page * follow_page(struct mm_struct *mm, unsigned long address, 980#define FOLL_WRITE 0x01 /* check pte is writable */
925 int write); 981#define FOLL_TOUCH 0x02 /* mark page accessed */
926extern int check_user_page_readable(struct mm_struct *mm, unsigned long address); 982#define FOLL_GET 0x04 /* do get_page on page */
927int remap_pfn_range(struct vm_area_struct *, unsigned long, 983#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
928 unsigned long, unsigned long, pgprot_t);
929 984
930#ifdef CONFIG_PROC_FS 985#ifdef CONFIG_PROC_FS
931void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); 986void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
932#else 987#else
933static inline void __vm_stat_account(struct mm_struct *mm, 988static inline void vm_stat_account(struct mm_struct *mm,
934 unsigned long flags, struct file *file, long pages) 989 unsigned long flags, struct file *file, long pages)
935{ 990{
936} 991}
937#endif /* CONFIG_PROC_FS */ 992#endif /* CONFIG_PROC_FS */
938 993
939static inline void vm_stat_account(struct vm_area_struct *vma)
940{
941 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
942 vma_pages(vma));
943}
944
945static inline void vm_stat_unaccount(struct vm_area_struct *vma)
946{
947 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
948 -vma_pages(vma));
949}
950
951/* update per process rss and vm hiwater data */
952extern void update_mem_hiwater(struct task_struct *tsk);
953
954#ifndef CONFIG_DEBUG_PAGEALLOC 994#ifndef CONFIG_DEBUG_PAGEALLOC
955static inline void 995static inline void
956kernel_map_pages(struct page *page, int numpages, int enable) 996kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7519eb4191e7..f5fa3082fd6a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -12,6 +12,7 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/seqlock.h>
15#include <asm/atomic.h> 16#include <asm/atomic.h>
16 17
17/* Free memory management - zoned buddy allocator. */ 18/* Free memory management - zoned buddy allocator. */
@@ -137,6 +138,10 @@ struct zone {
137 * free areas of different sizes 138 * free areas of different sizes
138 */ 139 */
139 spinlock_t lock; 140 spinlock_t lock;
141#ifdef CONFIG_MEMORY_HOTPLUG
142 /* see spanned/present_pages for more description */
143 seqlock_t span_seqlock;
144#endif
140 struct free_area free_area[MAX_ORDER]; 145 struct free_area free_area[MAX_ORDER];
141 146
142 147
@@ -220,6 +225,16 @@ struct zone {
220 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 225 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
221 unsigned long zone_start_pfn; 226 unsigned long zone_start_pfn;
222 227
228 /*
229 * zone_start_pfn, spanned_pages and present_pages are all
230 * protected by span_seqlock. It is a seqlock because it has
231 * to be read outside of zone->lock, and it is done in the main
232 * allocator path. But, it is written quite infrequently.
233 *
234 * The lock is declared along with zone->lock because it is
235 * frequently read in proximity to zone->lock. It's good to
236 * give them a chance of being in the same cacheline.
237 */
223 unsigned long spanned_pages; /* total size, including holes */ 238 unsigned long spanned_pages; /* total size, including holes */
224 unsigned long present_pages; /* amount of memory (excluding holes) */ 239 unsigned long present_pages; /* amount of memory (excluding holes) */
225 240
@@ -273,6 +288,16 @@ typedef struct pglist_data {
273 struct page *node_mem_map; 288 struct page *node_mem_map;
274#endif 289#endif
275 struct bootmem_data *bdata; 290 struct bootmem_data *bdata;
291#ifdef CONFIG_MEMORY_HOTPLUG
292 /*
293 * Must be held any time you expect node_start_pfn, node_present_pages
294 * or node_spanned_pages stay constant. Holding this will also
295 * guarantee that any pfn_valid() stays that way.
296 *
297 * Nests above zone->lock and zone->size_seqlock.
298 */
299 spinlock_t node_size_lock;
300#endif
276 unsigned long node_start_pfn; 301 unsigned long node_start_pfn;
277 unsigned long node_present_pages; /* total number of physical pages */ 302 unsigned long node_present_pages; /* total number of physical pages */
278 unsigned long node_spanned_pages; /* total size of physical page 303 unsigned long node_spanned_pages; /* total size of physical page
@@ -293,6 +318,8 @@ typedef struct pglist_data {
293#endif 318#endif
294#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) 319#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
295 320
321#include <linux/memory_hotplug.h>
322
296extern struct pglist_data *pgdat_list; 323extern struct pglist_data *pgdat_list;
297 324
298void __get_zone_counts(unsigned long *active, unsigned long *inactive, 325void __get_zone_counts(unsigned long *active, unsigned long *inactive,
@@ -509,6 +536,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
509 return NULL; 536 return NULL;
510 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; 537 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
511} 538}
539extern int __section_nr(struct mem_section* ms);
512 540
513/* 541/*
514 * We use the lower bits of the mem_map pointer to store 542 * We use the lower bits of the mem_map pointer to store
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e80fb7ee6efd..35b30e6c8cf8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -95,8 +95,8 @@ int try_to_unmap(struct page *);
95/* 95/*
96 * Called from mm/filemap_xip.c to unmap empty zero page 96 * Called from mm/filemap_xip.c to unmap empty zero page
97 */ 97 */
98pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); 98pte_t *page_check_address(struct page *, struct mm_struct *,
99 99 unsigned long, spinlock_t **);
100 100
101/* 101/*
102 * Used by swapoff to help locate where page is expected in vma. 102 * Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index b52a2af25f1f..f30f805080ae 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -61,5 +61,10 @@ extern void FASTCALL(__up_read(struct rw_semaphore *sem));
61extern void FASTCALL(__up_write(struct rw_semaphore *sem)); 61extern void FASTCALL(__up_write(struct rw_semaphore *sem));
62extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem)); 62extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
63 63
64static inline int rwsem_is_locked(struct rw_semaphore *sem)
65{
66 return (sem->activity != 0);
67}
68
64#endif /* __KERNEL__ */ 69#endif /* __KERNEL__ */
65#endif /* _LINUX_RWSEM_SPINLOCK_H */ 70#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 7f717e95ae37..66ff545552f7 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -1,14 +1,23 @@
1#ifndef _LINUX_SCATTERLIST_H 1#ifndef _LINUX_SCATTERLIST_H
2#define _LINUX_SCATTERLIST_H 2#define _LINUX_SCATTERLIST_H
3 3
4static inline void sg_init_one(struct scatterlist *sg, 4#include <asm/scatterlist.h>
5 u8 *buf, unsigned int buflen) 5#include <linux/mm.h>
6{ 6#include <linux/string.h>
7 memset(sg, 0, sizeof(*sg));
8 7
8static inline void sg_set_buf(struct scatterlist *sg, void *buf,
9 unsigned int buflen)
10{
9 sg->page = virt_to_page(buf); 11 sg->page = virt_to_page(buf);
10 sg->offset = offset_in_page(buf); 12 sg->offset = offset_in_page(buf);
11 sg->length = buflen; 13 sg->length = buflen;
12} 14}
13 15
16static inline void sg_init_one(struct scatterlist *sg, void *buf,
17 unsigned int buflen)
18{
19 memset(sg, 0, sizeof(*sg));
20 sg_set_buf(sg, buf, buflen);
21}
22
14#endif /* _LINUX_SCATTERLIST_H */ 23#endif /* _LINUX_SCATTERLIST_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 27519df0f987..1c30bc308ef1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -249,6 +249,36 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
249extern void arch_unmap_area(struct mm_struct *, unsigned long); 249extern void arch_unmap_area(struct mm_struct *, unsigned long);
250extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); 250extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
251 251
252#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
253/*
254 * The mm counters are not protected by its page_table_lock,
255 * so must be incremented atomically.
256 */
257#ifdef ATOMIC64_INIT
258#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
259#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
260#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
261#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
262#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
263typedef atomic64_t mm_counter_t;
264#else /* !ATOMIC64_INIT */
265/*
266 * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
267 * that is, at 16TB if using 4kB page size.
268 */
269#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
270#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
271#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
272#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
273#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
274typedef atomic_t mm_counter_t;
275#endif /* !ATOMIC64_INIT */
276
277#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
278/*
279 * The mm counters are protected by its page_table_lock,
280 * so can be incremented directly.
281 */
252#define set_mm_counter(mm, member, value) (mm)->_##member = (value) 282#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
253#define get_mm_counter(mm, member) ((mm)->_##member) 283#define get_mm_counter(mm, member) ((mm)->_##member)
254#define add_mm_counter(mm, member, value) (mm)->_##member += (value) 284#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
@@ -256,6 +286,20 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
256#define dec_mm_counter(mm, member) (mm)->_##member-- 286#define dec_mm_counter(mm, member) (mm)->_##member--
257typedef unsigned long mm_counter_t; 287typedef unsigned long mm_counter_t;
258 288
289#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
290
291#define get_mm_rss(mm) \
292 (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
293#define update_hiwater_rss(mm) do { \
294 unsigned long _rss = get_mm_rss(mm); \
295 if ((mm)->hiwater_rss < _rss) \
296 (mm)->hiwater_rss = _rss; \
297} while (0)
298#define update_hiwater_vm(mm) do { \
299 if ((mm)->hiwater_vm < (mm)->total_vm) \
300 (mm)->hiwater_vm = (mm)->total_vm; \
301} while (0)
302
259struct mm_struct { 303struct mm_struct {
260 struct vm_area_struct * mmap; /* list of VMAs */ 304 struct vm_area_struct * mmap; /* list of VMAs */
261 struct rb_root mm_rb; 305 struct rb_root mm_rb;
@@ -279,15 +323,20 @@ struct mm_struct {
279 * by mmlist_lock 323 * by mmlist_lock
280 */ 324 */
281 325
326 /* Special counters, in some configurations protected by the
327 * page_table_lock, in other configurations by being atomic.
328 */
329 mm_counter_t _file_rss;
330 mm_counter_t _anon_rss;
331
332 unsigned long hiwater_rss; /* High-watermark of RSS usage */
333 unsigned long hiwater_vm; /* High-water virtual memory usage */
334
335 unsigned long total_vm, locked_vm, shared_vm, exec_vm;
336 unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
282 unsigned long start_code, end_code, start_data, end_data; 337 unsigned long start_code, end_code, start_data, end_data;
283 unsigned long start_brk, brk, start_stack; 338 unsigned long start_brk, brk, start_stack;
284 unsigned long arg_start, arg_end, env_start, env_end; 339 unsigned long arg_start, arg_end, env_start, env_end;
285 unsigned long total_vm, locked_vm, shared_vm;
286 unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes;
287
288 /* Special counters protected by the page_table_lock */
289 mm_counter_t _rss;
290 mm_counter_t _anon_rss;
291 340
292 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ 341 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
293 342
@@ -308,11 +357,7 @@ struct mm_struct {
308 /* aio bits */ 357 /* aio bits */
309 rwlock_t ioctx_list_lock; 358 rwlock_t ioctx_list_lock;
310 struct kioctx *ioctx_list; 359 struct kioctx *ioctx_list;
311
312 struct kioctx default_kioctx; 360 struct kioctx default_kioctx;
313
314 unsigned long hiwater_rss; /* High-water RSS usage */
315 unsigned long hiwater_vm; /* High-water virtual memory usage */
316}; 361};
317 362
318struct sighand_struct { 363struct sighand_struct {
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3701a0673d2c..1d5577b2b752 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -32,10 +32,14 @@ struct vm_struct {
32 * Highlevel APIs for driver use 32 * Highlevel APIs for driver use
33 */ 33 */
34extern void *vmalloc(unsigned long size); 34extern void *vmalloc(unsigned long size);
35extern void *vmalloc_node(unsigned long size, int node);
35extern void *vmalloc_exec(unsigned long size); 36extern void *vmalloc_exec(unsigned long size);
36extern void *vmalloc_32(unsigned long size); 37extern void *vmalloc_32(unsigned long size);
37extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); 38extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
38extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); 39extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
40 pgprot_t prot);
41extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
42 pgprot_t prot, int node);
39extern void vfree(void *addr); 43extern void vfree(void *addr);
40 44
41extern void *vmap(struct page **pages, unsigned int count, 45extern void *vmap(struct page **pages, unsigned int count,
@@ -48,6 +52,8 @@ extern void vunmap(void *addr);
48extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); 52extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
49extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, 53extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
50 unsigned long start, unsigned long end); 54 unsigned long start, unsigned long end);
55extern struct vm_struct *get_vm_area_node(unsigned long size,
56 unsigned long flags, int node);
51extern struct vm_struct *remove_vm_area(void *addr); 57extern struct vm_struct *remove_vm_area(void *addr);
52extern struct vm_struct *__remove_vm_area(void *addr); 58extern struct vm_struct *__remove_vm_area(void *addr);
53extern int map_vm_area(struct vm_struct *area, pgprot_t prot, 59extern int map_vm_area(struct vm_struct *area, pgprot_t prot,