aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/mm/numa.c3
-rw-r--r--arch/alpha/mm/remap.c6
-rw-r--r--arch/arm/kernel/signal.c96
-rw-r--r--arch/arm/kernel/traps.c14
-rw-r--r--arch/arm/mm/consistent.c6
-rw-r--r--arch/arm/mm/fault-armv.c7
-rw-r--r--arch/arm/mm/ioremap.c4
-rw-r--r--arch/arm/mm/mm-armv.c15
-rw-r--r--arch/arm/oprofile/backtrace.c46
-rw-r--r--arch/arm26/mm/memc.c18
-rw-r--r--arch/cris/arch-v32/mm/tlb.c6
-rw-r--r--arch/cris/mm/ioremap.c4
-rw-r--r--arch/frv/mm/dma-alloc.c5
-rw-r--r--arch/frv/mm/pgalloc.c4
-rw-r--r--arch/i386/kernel/vm86.c17
-rw-r--r--arch/i386/mm/discontig.c4
-rw-r--r--arch/i386/mm/init.c62
-rw-r--r--arch/i386/mm/ioremap.c4
-rw-r--r--arch/i386/mm/pgtable.c11
-rw-r--r--arch/i386/oprofile/backtrace.c38
-rw-r--r--arch/ia64/kernel/perfmon.c3
-rw-r--r--arch/ia64/mm/discontig.c7
-rw-r--r--arch/ia64/mm/fault.c34
-rw-r--r--arch/ia64/mm/init.c13
-rw-r--r--arch/ia64/mm/tlb.c2
-rw-r--r--arch/m32r/mm/init.c9
-rw-r--r--arch/m32r/mm/ioremap.c4
-rw-r--r--arch/m68k/Kconfig24
-rw-r--r--arch/m68k/atari/stram.c918
-rw-r--r--arch/m68k/mm/kmap.c2
-rw-r--r--arch/m68k/sun3x/dvma.c2
-rw-r--r--arch/mips/kernel/irixelf.c1
-rw-r--r--arch/mips/mm/ioremap.c4
-rw-r--r--arch/parisc/kernel/cache.c24
-rw-r--r--arch/parisc/kernel/pci-dma.c2
-rw-r--r--arch/parisc/mm/init.c3
-rw-r--r--arch/parisc/mm/ioremap.c6
-rw-r--r--arch/ppc/kernel/dma-mapping.c6
-rw-r--r--arch/ppc/mm/4xx_mmu.c4
-rw-r--r--arch/ppc/mm/pgtable.c4
-rw-r--r--arch/ppc64/kernel/vdso.c12
-rw-r--r--arch/ppc64/mm/imalloc.c5
-rw-r--r--arch/ppc64/mm/init.c87
-rw-r--r--arch/s390/mm/ioremap.c4
-rw-r--r--arch/sh/mm/fault.c40
-rw-r--r--arch/sh/mm/hugetlbpage.c2
-rw-r--r--arch/sh/mm/ioremap.c4
-rw-r--r--arch/sh64/mm/cache.c68
-rw-r--r--arch/sh64/mm/hugetlbpage.c188
-rw-r--r--arch/sh64/mm/ioremap.c4
-rw-r--r--arch/sparc/mm/generic.c7
-rw-r--r--arch/sparc64/kernel/binfmt_aout32.c1
-rw-r--r--arch/sparc64/mm/generic.c9
-rw-r--r--arch/sparc64/mm/tlb.c7
-rw-r--r--arch/um/include/tlb.h1
-rw-r--r--arch/um/kernel/process_kern.c8
-rw-r--r--arch/um/kernel/skas/mmu.c4
-rw-r--r--arch/um/kernel/tt/tlb.c36
-rw-r--r--arch/x86_64/ia32/ia32_aout.c1
-rw-r--r--arch/x86_64/mm/ioremap.c4
60 files changed, 390 insertions, 1544 deletions
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index c7481d59b6df..6d5251254f68 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -371,6 +371,8 @@ show_mem(void)
371 show_free_areas(); 371 show_free_areas();
372 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 372 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
373 for_each_online_node(nid) { 373 for_each_online_node(nid) {
374 unsigned long flags;
375 pgdat_resize_lock(NODE_DATA(nid), &flags);
374 i = node_spanned_pages(nid); 376 i = node_spanned_pages(nid);
375 while (i-- > 0) { 377 while (i-- > 0) {
376 struct page *page = nid_page_nr(nid, i); 378 struct page *page = nid_page_nr(nid, i);
@@ -384,6 +386,7 @@ show_mem(void)
384 else 386 else
385 shared += page_count(page) - 1; 387 shared += page_count(page) - 1;
386 } 388 }
389 pgdat_resize_unlock(NODE_DATA(nid), &flags);
387 } 390 }
388 printk("%ld pages of RAM\n",total); 391 printk("%ld pages of RAM\n",total);
389 printk("%ld free pages\n",free); 392 printk("%ld free pages\n",free);
diff --git a/arch/alpha/mm/remap.c b/arch/alpha/mm/remap.c
index 19817ad3d89b..a78356c3ead5 100644
--- a/arch/alpha/mm/remap.c
+++ b/arch/alpha/mm/remap.c
@@ -2,7 +2,6 @@
2#include <asm/pgalloc.h> 2#include <asm/pgalloc.h>
3#include <asm/cacheflush.h> 3#include <asm/cacheflush.h>
4 4
5/* called with the page_table_lock held */
6static inline void 5static inline void
7remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, 6remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
8 unsigned long phys_addr, unsigned long flags) 7 unsigned long phys_addr, unsigned long flags)
@@ -31,7 +30,6 @@ remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
31 } while (address && (address < end)); 30 } while (address && (address < end));
32} 31}
33 32
34/* called with the page_table_lock held */
35static inline int 33static inline int
36remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, 34remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
37 unsigned long phys_addr, unsigned long flags) 35 unsigned long phys_addr, unsigned long flags)
@@ -46,7 +44,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
46 if (address >= end) 44 if (address >= end)
47 BUG(); 45 BUG();
48 do { 46 do {
49 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 47 pte_t * pte = pte_alloc_kernel(pmd, address);
50 if (!pte) 48 if (!pte)
51 return -ENOMEM; 49 return -ENOMEM;
52 remap_area_pte(pte, address, end - address, 50 remap_area_pte(pte, address, end - address,
@@ -70,7 +68,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
70 flush_cache_all(); 68 flush_cache_all();
71 if (address >= end) 69 if (address >= end)
72 BUG(); 70 BUG();
73 spin_lock(&init_mm.page_table_lock);
74 do { 71 do {
75 pmd_t *pmd; 72 pmd_t *pmd;
76 pmd = pmd_alloc(&init_mm, dir, address); 73 pmd = pmd_alloc(&init_mm, dir, address);
@@ -84,7 +81,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
84 address = (address + PGDIR_SIZE) & PGDIR_MASK; 81 address = (address + PGDIR_SIZE) & PGDIR_MASK;
85 dir++; 82 dir++;
86 } while (address && (address < end)); 83 } while (address && (address < end));
87 spin_unlock(&init_mm.page_table_lock);
88 return error; 84 return error;
89} 85}
90 86
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index a94d75fef598..a917e3dd3666 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -139,93 +139,33 @@ struct iwmmxt_sigframe {
139 unsigned long storage[0x98/4]; 139 unsigned long storage[0x98/4];
140}; 140};
141 141
142static int page_present(struct mm_struct *mm, void __user *uptr, int wr)
143{
144 unsigned long addr = (unsigned long)uptr;
145 pgd_t *pgd = pgd_offset(mm, addr);
146 if (pgd_present(*pgd)) {
147 pmd_t *pmd = pmd_offset(pgd, addr);
148 if (pmd_present(*pmd)) {
149 pte_t *pte = pte_offset_map(pmd, addr);
150 return (pte_present(*pte) && (!wr || pte_write(*pte)));
151 }
152 }
153 return 0;
154}
155
156static int copy_locked(void __user *uptr, void *kptr, size_t size, int write,
157 void (*copyfn)(void *, void __user *))
158{
159 unsigned char v, __user *userptr = uptr;
160 int err = 0;
161
162 do {
163 struct mm_struct *mm;
164
165 if (write) {
166 __put_user_error(0, userptr, err);
167 __put_user_error(0, userptr + size - 1, err);
168 } else {
169 __get_user_error(v, userptr, err);
170 __get_user_error(v, userptr + size - 1, err);
171 }
172
173 if (err)
174 break;
175
176 mm = current->mm;
177 spin_lock(&mm->page_table_lock);
178 if (page_present(mm, userptr, write) &&
179 page_present(mm, userptr + size - 1, write)) {
180 copyfn(kptr, uptr);
181 } else
182 err = 1;
183 spin_unlock(&mm->page_table_lock);
184 } while (err);
185
186 return err;
187}
188
189static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame) 142static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame)
190{ 143{
191 int err = 0; 144 char kbuf[sizeof(*frame) + 8];
145 struct iwmmxt_sigframe *kframe;
192 146
193 /* the iWMMXt context must be 64 bit aligned */ 147 /* the iWMMXt context must be 64 bit aligned */
194 WARN_ON((unsigned long)frame & 7); 148 kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
195 149 kframe->magic0 = IWMMXT_MAGIC0;
196 __put_user_error(IWMMXT_MAGIC0, &frame->magic0, err); 150 kframe->magic1 = IWMMXT_MAGIC1;
197 __put_user_error(IWMMXT_MAGIC1, &frame->magic1, err); 151 iwmmxt_task_copy(current_thread_info(), &kframe->storage);
198 152 return __copy_to_user(frame, kframe, sizeof(*frame));
199 /*
200 * iwmmxt_task_copy() doesn't check user permissions.
201 * Let's do a dummy write on the upper boundary to ensure
202 * access to user mem is OK all way up.
203 */
204 err |= copy_locked(&frame->storage, current_thread_info(),
205 sizeof(frame->storage), 1, iwmmxt_task_copy);
206 return err;
207} 153}
208 154
209static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) 155static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame)
210{ 156{
211 unsigned long magic0, magic1; 157 char kbuf[sizeof(*frame) + 8];
212 int err = 0; 158 struct iwmmxt_sigframe *kframe;
213 159
214 /* the iWMMXt context is 64 bit aligned */ 160 /* the iWMMXt context must be 64 bit aligned */
215 WARN_ON((unsigned long)frame & 7); 161 kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
216 162 if (__copy_from_user(kframe, frame, sizeof(*frame)))
217 /* 163 return -1;
218 * Validate iWMMXt context signature. 164 if (kframe->magic0 != IWMMXT_MAGIC0 ||
219 * Also, iwmmxt_task_restore() doesn't check user permissions. 165 kframe->magic1 != IWMMXT_MAGIC1)
220 * Let's do a dummy write on the upper boundary to ensure 166 return -1;
221 * access to user mem is OK all way up. 167 iwmmxt_task_restore(current_thread_info(), &kframe->storage);
222 */ 168 return 0;
223 __get_user_error(magic0, &frame->magic0, err);
224 __get_user_error(magic1, &frame->magic1, err);
225 if (!err && magic0 == IWMMXT_MAGIC0 && magic1 == IWMMXT_MAGIC1)
226 err = copy_locked(&frame->storage, current_thread_info(),
227 sizeof(frame->storage), 0, iwmmxt_task_restore);
228 return err;
229} 169}
230 170
231#endif 171#endif
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index baa09601a64e..66e5a0516f23 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -483,29 +483,33 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
483 unsigned long addr = regs->ARM_r2; 483 unsigned long addr = regs->ARM_r2;
484 struct mm_struct *mm = current->mm; 484 struct mm_struct *mm = current->mm;
485 pgd_t *pgd; pmd_t *pmd; pte_t *pte; 485 pgd_t *pgd; pmd_t *pmd; pte_t *pte;
486 spinlock_t *ptl;
486 487
487 regs->ARM_cpsr &= ~PSR_C_BIT; 488 regs->ARM_cpsr &= ~PSR_C_BIT;
488 spin_lock(&mm->page_table_lock); 489 down_read(&mm->mmap_sem);
489 pgd = pgd_offset(mm, addr); 490 pgd = pgd_offset(mm, addr);
490 if (!pgd_present(*pgd)) 491 if (!pgd_present(*pgd))
491 goto bad_access; 492 goto bad_access;
492 pmd = pmd_offset(pgd, addr); 493 pmd = pmd_offset(pgd, addr);
493 if (!pmd_present(*pmd)) 494 if (!pmd_present(*pmd))
494 goto bad_access; 495 goto bad_access;
495 pte = pte_offset_map(pmd, addr); 496 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
496 if (!pte_present(*pte) || !pte_write(*pte)) 497 if (!pte_present(*pte) || !pte_write(*pte)) {
498 pte_unmap_unlock(pte, ptl);
497 goto bad_access; 499 goto bad_access;
500 }
498 val = *(unsigned long *)addr; 501 val = *(unsigned long *)addr;
499 val -= regs->ARM_r0; 502 val -= regs->ARM_r0;
500 if (val == 0) { 503 if (val == 0) {
501 *(unsigned long *)addr = regs->ARM_r1; 504 *(unsigned long *)addr = regs->ARM_r1;
502 regs->ARM_cpsr |= PSR_C_BIT; 505 regs->ARM_cpsr |= PSR_C_BIT;
503 } 506 }
504 spin_unlock(&mm->page_table_lock); 507 pte_unmap_unlock(pte, ptl);
508 up_read(&mm->mmap_sem);
505 return val; 509 return val;
506 510
507 bad_access: 511 bad_access:
508 spin_unlock(&mm->page_table_lock); 512 up_read(&mm->mmap_sem);
509 /* simulate a write access fault */ 513 /* simulate a write access fault */
510 do_DataAbort(addr, 15 + (1 << 11), regs); 514 do_DataAbort(addr, 15 + (1 << 11), regs);
511 return -1; 515 return -1;
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 82f4d5e27c54..47b0b767f080 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -397,8 +397,6 @@ static int __init consistent_init(void)
397 pte_t *pte; 397 pte_t *pte;
398 int ret = 0; 398 int ret = 0;
399 399
400 spin_lock(&init_mm.page_table_lock);
401
402 do { 400 do {
403 pgd = pgd_offset(&init_mm, CONSISTENT_BASE); 401 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
404 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); 402 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -409,7 +407,7 @@ static int __init consistent_init(void)
409 } 407 }
410 WARN_ON(!pmd_none(*pmd)); 408 WARN_ON(!pmd_none(*pmd));
411 409
412 pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE); 410 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
413 if (!pte) { 411 if (!pte) {
414 printk(KERN_ERR "%s: no pte tables\n", __func__); 412 printk(KERN_ERR "%s: no pte tables\n", __func__);
415 ret = -ENOMEM; 413 ret = -ENOMEM;
@@ -419,8 +417,6 @@ static int __init consistent_init(void)
419 consistent_pte = pte; 417 consistent_pte = pte;
420 } while (0); 418 } while (0);
421 419
422 spin_unlock(&init_mm.page_table_lock);
423
424 return ret; 420 return ret;
425} 421}
426 422
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index be4ab3d73c91..7fc1b35a6746 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -26,6 +26,11 @@ static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
26/* 26/*
27 * We take the easy way out of this problem - we make the 27 * We take the easy way out of this problem - we make the
28 * PTE uncacheable. However, we leave the write buffer on. 28 * PTE uncacheable. However, we leave the write buffer on.
29 *
30 * Note that the pte lock held when calling update_mmu_cache must also
31 * guard the pte (somewhere else in the same mm) that we modify here.
32 * Therefore those configurations which might call adjust_pte (those
33 * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
29 */ 34 */
30static int adjust_pte(struct vm_area_struct *vma, unsigned long address) 35static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
31{ 36{
@@ -127,7 +132,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page);
127 * 2. If we have multiple shared mappings of the same space in 132 * 2. If we have multiple shared mappings of the same space in
128 * an object, we need to deal with the cache aliasing issues. 133 * an object, we need to deal with the cache aliasing issues.
129 * 134 *
130 * Note that the page_table_lock will be held. 135 * Note that the pte lock will be held.
131 */ 136 */
132void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) 137void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
133{ 138{
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6fb1258df1b5..0f128c28fee4 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -75,7 +75,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
75 75
76 pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags); 76 pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags);
77 do { 77 do {
78 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 78 pte_t * pte = pte_alloc_kernel(pmd, address);
79 if (!pte) 79 if (!pte)
80 return -ENOMEM; 80 return -ENOMEM;
81 remap_area_pte(pte, address, end - address, address + phys_addr, pgprot); 81 remap_area_pte(pte, address, end - address, address + phys_addr, pgprot);
@@ -97,7 +97,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr,
97 phys_addr -= address; 97 phys_addr -= address;
98 dir = pgd_offset(&init_mm, address); 98 dir = pgd_offset(&init_mm, address);
99 BUG_ON(address >= end); 99 BUG_ON(address >= end);
100 spin_lock(&init_mm.page_table_lock);
101 do { 100 do {
102 pmd_t *pmd = pmd_alloc(&init_mm, dir, address); 101 pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
103 if (!pmd) { 102 if (!pmd) {
@@ -114,7 +113,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr,
114 dir++; 113 dir++;
115 } while (address && (address < end)); 114 } while (address && (address < end));
116 115
117 spin_unlock(&init_mm.page_table_lock);
118 flush_cache_vmap(start, end); 116 flush_cache_vmap(start, end);
119 return err; 117 return err;
120} 118}
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index 61bc2fa0511e..1221fdde1769 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -180,11 +180,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
180 180
181 if (!vectors_high()) { 181 if (!vectors_high()) {
182 /* 182 /*
183 * This lock is here just to satisfy pmd_alloc and pte_lock
184 */
185 spin_lock(&mm->page_table_lock);
186
187 /*
188 * On ARM, first page must always be allocated since it 183 * On ARM, first page must always be allocated since it
189 * contains the machine vectors. 184 * contains the machine vectors.
190 */ 185 */
@@ -201,23 +196,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
201 set_pte(new_pte, *init_pte); 196 set_pte(new_pte, *init_pte);
202 pte_unmap_nested(init_pte); 197 pte_unmap_nested(init_pte);
203 pte_unmap(new_pte); 198 pte_unmap(new_pte);
204
205 spin_unlock(&mm->page_table_lock);
206 } 199 }
207 200
208 return new_pgd; 201 return new_pgd;
209 202
210no_pte: 203no_pte:
211 spin_unlock(&mm->page_table_lock);
212 pmd_free(new_pmd); 204 pmd_free(new_pmd);
213 free_pages((unsigned long)new_pgd, 2);
214 return NULL;
215
216no_pmd: 205no_pmd:
217 spin_unlock(&mm->page_table_lock);
218 free_pages((unsigned long)new_pgd, 2); 206 free_pages((unsigned long)new_pgd, 2);
219 return NULL;
220
221no_pgd: 207no_pgd:
222 return NULL; 208 return NULL;
223} 209}
@@ -243,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd)
243 pte = pmd_page(*pmd); 229 pte = pmd_page(*pmd);
244 pmd_clear(pmd); 230 pmd_clear(pmd);
245 dec_page_state(nr_page_table_pages); 231 dec_page_state(nr_page_table_pages);
232 pte_lock_deinit(pte);
246 pte_free(pte); 233 pte_free(pte);
247 pmd_free(pmd); 234 pmd_free(pmd);
248free: 235free:
diff --git a/arch/arm/oprofile/backtrace.c b/arch/arm/oprofile/backtrace.c
index df35c452a8bf..7c22c12618cc 100644
--- a/arch/arm/oprofile/backtrace.c
+++ b/arch/arm/oprofile/backtrace.c
@@ -49,42 +49,22 @@ static struct frame_tail* kernel_backtrace(struct frame_tail *tail)
49 49
50static struct frame_tail* user_backtrace(struct frame_tail *tail) 50static struct frame_tail* user_backtrace(struct frame_tail *tail)
51{ 51{
52 struct frame_tail buftail; 52 struct frame_tail buftail[2];
53 53
54 /* hardware pte might not be valid due to dirty/accessed bit emulation 54 /* Also check accessibility of one struct frame_tail beyond */
55 * so we use copy_from_user and benefit from exception fixups */ 55 if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
56 if (copy_from_user(&buftail, tail, sizeof(struct frame_tail))) 56 return NULL;
57 if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
57 return NULL; 58 return NULL;
58 59
59 oprofile_add_trace(buftail.lr); 60 oprofile_add_trace(buftail[0].lr);
60 61
61 /* frame pointers should strictly progress back up the stack 62 /* frame pointers should strictly progress back up the stack
62 * (towards higher addresses) */ 63 * (towards higher addresses) */
63 if (tail >= buftail.fp) 64 if (tail >= buftail[0].fp)
64 return NULL; 65 return NULL;
65 66
66 return buftail.fp-1; 67 return buftail[0].fp-1;
67}
68
69/* Compare two addresses and see if they're on the same page */
70#define CMP_ADDR_EQUAL(x,y,offset) ((((unsigned long) x) >> PAGE_SHIFT) \
71 == ((((unsigned long) y) + offset) >> PAGE_SHIFT))
72
73/* check that the page(s) containing the frame tail are present */
74static int pages_present(struct frame_tail *tail)
75{
76 struct mm_struct * mm = current->mm;
77
78 if (!check_user_page_readable(mm, (unsigned long)tail))
79 return 0;
80
81 if (CMP_ADDR_EQUAL(tail, tail, 8))
82 return 1;
83
84 if (!check_user_page_readable(mm, ((unsigned long)tail) + 8))
85 return 0;
86
87 return 1;
88} 68}
89 69
90/* 70/*
@@ -118,7 +98,6 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs)
118void arm_backtrace(struct pt_regs * const regs, unsigned int depth) 98void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
119{ 99{
120 struct frame_tail *tail; 100 struct frame_tail *tail;
121 unsigned long last_address = 0;
122 101
123 tail = ((struct frame_tail *) regs->ARM_fp) - 1; 102 tail = ((struct frame_tail *) regs->ARM_fp) - 1;
124 103
@@ -132,13 +111,6 @@ void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
132 return; 111 return;
133 } 112 }
134 113
135 while (depth-- && tail && !((unsigned long) tail & 3)) { 114 while (depth-- && tail && !((unsigned long) tail & 3))
136 if ((!CMP_ADDR_EQUAL(last_address, tail, 0)
137 || !CMP_ADDR_EQUAL(last_address, tail, 8))
138 && !pages_present(tail))
139 return;
140 last_address = (unsigned long) tail;
141 tail = user_backtrace(tail); 115 tail = user_backtrace(tail);
142 }
143} 116}
144
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
index 8e8a2bb2487d..34def6397c3c 100644
--- a/arch/arm26/mm/memc.c
+++ b/arch/arm26/mm/memc.c
@@ -79,12 +79,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
79 goto no_pgd; 79 goto no_pgd;
80 80
81 /* 81 /*
82 * This lock is here just to satisfy pmd_alloc and pte_lock
83 * FIXME: I bet we could avoid taking it pretty much altogether
84 */
85 spin_lock(&mm->page_table_lock);
86
87 /*
88 * On ARM, first page must always be allocated since it contains 82 * On ARM, first page must always be allocated since it contains
89 * the machine vectors. 83 * the machine vectors.
90 */ 84 */
@@ -92,7 +86,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
92 if (!new_pmd) 86 if (!new_pmd)
93 goto no_pmd; 87 goto no_pmd;
94 88
95 new_pte = pte_alloc_kernel(mm, new_pmd, 0); 89 new_pte = pte_alloc_map(mm, new_pmd, 0);
96 if (!new_pte) 90 if (!new_pte)
97 goto no_pte; 91 goto no_pte;
98 92
@@ -101,6 +95,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
101 init_pte = pte_offset(init_pmd, 0); 95 init_pte = pte_offset(init_pmd, 0);
102 96
103 set_pte(new_pte, *init_pte); 97 set_pte(new_pte, *init_pte);
98 pte_unmap(new_pte);
104 99
105 /* 100 /*
106 * the page table entries are zeroed 101 * the page table entries are zeroed
@@ -112,23 +107,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
112 memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, 107 memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
113 (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); 108 (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
114 109
115 spin_unlock(&mm->page_table_lock);
116
117 /* update MEMC tables */ 110 /* update MEMC tables */
118 cpu_memc_update_all(new_pgd); 111 cpu_memc_update_all(new_pgd);
119 return new_pgd; 112 return new_pgd;
120 113
121no_pte: 114no_pte:
122 spin_unlock(&mm->page_table_lock);
123 pmd_free(new_pmd); 115 pmd_free(new_pmd);
124 free_pgd_slow(new_pgd);
125 return NULL;
126
127no_pmd: 116no_pmd:
128 spin_unlock(&mm->page_table_lock);
129 free_pgd_slow(new_pgd); 117 free_pgd_slow(new_pgd);
130 return NULL;
131
132no_pgd: 118no_pgd:
133 return NULL; 119 return NULL;
134} 120}
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
index 8233406798d3..b08a28bb58ab 100644
--- a/arch/cris/arch-v32/mm/tlb.c
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -175,6 +175,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
175 return 0; 175 return 0;
176} 176}
177 177
178static DEFINE_SPINLOCK(mmu_context_lock);
179
178/* Called in schedule() just before actually doing the switch_to. */ 180/* Called in schedule() just before actually doing the switch_to. */
179void 181void
180switch_mm(struct mm_struct *prev, struct mm_struct *next, 182switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -183,10 +185,10 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
183 int cpu = smp_processor_id(); 185 int cpu = smp_processor_id();
184 186
185 /* Make sure there is a MMU context. */ 187 /* Make sure there is a MMU context. */
186 spin_lock(&next->page_table_lock); 188 spin_lock(&mmu_context_lock);
187 get_mmu_context(next); 189 get_mmu_context(next);
188 cpu_set(cpu, next->cpu_vm_mask); 190 cpu_set(cpu, next->cpu_vm_mask);
189 spin_unlock(&next->page_table_lock); 191 spin_unlock(&mmu_context_lock);
190 192
191 /* 193 /*
192 * Remember the pgd for the fault handlers. Keep a seperate copy of it 194 * Remember the pgd for the fault handlers. Keep a seperate copy of it
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index ebba11e270fa..a92ac9877582 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
52 if (address >= end) 52 if (address >= end)
53 BUG(); 53 BUG();
54 do { 54 do {
55 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 55 pte_t * pte = pte_alloc_kernel(pmd, address);
56 if (!pte) 56 if (!pte)
57 return -ENOMEM; 57 return -ENOMEM;
58 remap_area_pte(pte, address, end - address, address + phys_addr, prot); 58 remap_area_pte(pte, address, end - address, address + phys_addr, prot);
@@ -74,7 +74,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
74 flush_cache_all(); 74 flush_cache_all();
75 if (address >= end) 75 if (address >= end)
76 BUG(); 76 BUG();
77 spin_lock(&init_mm.page_table_lock);
78 do { 77 do {
79 pud_t *pud; 78 pud_t *pud;
80 pmd_t *pmd; 79 pmd_t *pmd;
@@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
94 address = (address + PGDIR_SIZE) & PGDIR_MASK; 93 address = (address + PGDIR_SIZE) & PGDIR_MASK;
95 dir++; 94 dir++;
96 } while (address && (address < end)); 95 } while (address && (address < end));
97 spin_unlock(&init_mm.page_table_lock);
98 flush_tlb_all(); 96 flush_tlb_all();
99 return error; 97 return error;
100} 98}
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index cfc4f97490c6..342823aad758 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -55,21 +55,18 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
55 pte_t *pte; 55 pte_t *pte;
56 int err = -ENOMEM; 56 int err = -ENOMEM;
57 57
58 spin_lock(&init_mm.page_table_lock);
59
60 /* Use upper 10 bits of VA to index the first level map */ 58 /* Use upper 10 bits of VA to index the first level map */
61 pge = pgd_offset_k(va); 59 pge = pgd_offset_k(va);
62 pue = pud_offset(pge, va); 60 pue = pud_offset(pge, va);
63 pme = pmd_offset(pue, va); 61 pme = pmd_offset(pue, va);
64 62
65 /* Use middle 10 bits of VA to index the second-level map */ 63 /* Use middle 10 bits of VA to index the second-level map */
66 pte = pte_alloc_kernel(&init_mm, pme, va); 64 pte = pte_alloc_kernel(pme, va);
67 if (pte != 0) { 65 if (pte != 0) {
68 err = 0; 66 err = 0;
69 set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot)); 67 set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot));
70 } 68 }
71 69
72 spin_unlock(&init_mm.page_table_lock);
73 return err; 70 return err;
74} 71}
75 72
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 4eaec0f3525b..2c67dfe5a6b3 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd)
87 if (pgd_list) 87 if (pgd_list)
88 pgd_list->private = (unsigned long) &page->index; 88 pgd_list->private = (unsigned long) &page->index;
89 pgd_list = page; 89 pgd_list = page;
90 page->private = (unsigned long) &pgd_list; 90 set_page_private(page, (unsigned long)&pgd_list);
91} 91}
92 92
93static inline void pgd_list_del(pgd_t *pgd) 93static inline void pgd_list_del(pgd_t *pgd)
94{ 94{
95 struct page *next, **pprev, *page = virt_to_page(pgd); 95 struct page *next, **pprev, *page = virt_to_page(pgd);
96 next = (struct page *) page->index; 96 next = (struct page *) page->index;
97 pprev = (struct page **) page->private; 97 pprev = (struct page **)page_private(page);
98 *pprev = next; 98 *pprev = next;
99 if (next) 99 if (next)
100 next->private = (unsigned long) pprev; 100 next->private = (unsigned long) pprev;
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index 16b485009622..fc1993564f98 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
134 return ret; 134 return ret;
135} 135}
136 136
137static void mark_screen_rdonly(struct task_struct * tsk) 137static void mark_screen_rdonly(struct mm_struct *mm)
138{ 138{
139 pgd_t *pgd; 139 pgd_t *pgd;
140 pud_t *pud; 140 pud_t *pud;
141 pmd_t *pmd; 141 pmd_t *pmd;
142 pte_t *pte, *mapped; 142 pte_t *pte;
143 spinlock_t *ptl;
143 int i; 144 int i;
144 145
145 preempt_disable(); 146 pgd = pgd_offset(mm, 0xA0000);
146 spin_lock(&tsk->mm->page_table_lock);
147 pgd = pgd_offset(tsk->mm, 0xA0000);
148 if (pgd_none_or_clear_bad(pgd)) 147 if (pgd_none_or_clear_bad(pgd))
149 goto out; 148 goto out;
150 pud = pud_offset(pgd, 0xA0000); 149 pud = pud_offset(pgd, 0xA0000);
@@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk)
153 pmd = pmd_offset(pud, 0xA0000); 152 pmd = pmd_offset(pud, 0xA0000);
154 if (pmd_none_or_clear_bad(pmd)) 153 if (pmd_none_or_clear_bad(pmd))
155 goto out; 154 goto out;
156 pte = mapped = pte_offset_map(pmd, 0xA0000); 155 pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
157 for (i = 0; i < 32; i++) { 156 for (i = 0; i < 32; i++) {
158 if (pte_present(*pte)) 157 if (pte_present(*pte))
159 set_pte(pte, pte_wrprotect(*pte)); 158 set_pte(pte, pte_wrprotect(*pte));
160 pte++; 159 pte++;
161 } 160 }
162 pte_unmap(mapped); 161 pte_unmap_unlock(pte, ptl);
163out: 162out:
164 spin_unlock(&tsk->mm->page_table_lock);
165 preempt_enable();
166 flush_tlb(); 163 flush_tlb();
167} 164}
168 165
@@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
306 303
307 tsk->thread.screen_bitmap = info->screen_bitmap; 304 tsk->thread.screen_bitmap = info->screen_bitmap;
308 if (info->flags & VM86_SCREEN_BITMAP) 305 if (info->flags & VM86_SCREEN_BITMAP)
309 mark_screen_rdonly(tsk); 306 mark_screen_rdonly(tsk->mm);
310 __asm__ __volatile__( 307 __asm__ __volatile__(
311 "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" 308 "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
312 "movl %0,%%esp\n\t" 309 "movl %0,%%esp\n\t"
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 244d8ec66be2..c4af9638dbfa 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
98 98
99extern unsigned long find_max_low_pfn(void); 99extern unsigned long find_max_low_pfn(void);
100extern void find_max_pfn(void); 100extern void find_max_pfn(void);
101extern void one_highpage_init(struct page *, int, int); 101extern void add_one_highpage_init(struct page *, int, int);
102 102
103extern struct e820map e820; 103extern struct e820map e820;
104extern unsigned long init_pg_tables_end; 104extern unsigned long init_pg_tables_end;
@@ -427,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro)
427 if (!pfn_valid(node_pfn)) 427 if (!pfn_valid(node_pfn))
428 continue; 428 continue;
429 page = pfn_to_page(node_pfn); 429 page = pfn_to_page(node_pfn);
430 one_highpage_init(page, node_pfn, bad_ppro); 430 add_one_highpage_init(page, node_pfn, bad_ppro);
431 } 431 }
432 } 432 }
433 totalram_pages += totalhigh_pages; 433 totalram_pages += totalhigh_pages;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 2ebaf75f732e..542d9298da5e 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -27,6 +27,7 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
29#include <linux/efi.h> 29#include <linux/efi.h>
30#include <linux/memory_hotplug.h>
30 31
31#include <asm/processor.h> 32#include <asm/processor.h>
32#include <asm/system.h> 33#include <asm/system.h>
@@ -266,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
266 pkmap_page_table = pte; 267 pkmap_page_table = pte;
267} 268}
268 269
269void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) 270void __devinit free_new_highpage(struct page *page)
271{
272 set_page_count(page, 1);
273 __free_page(page);
274 totalhigh_pages++;
275}
276
277void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
270{ 278{
271 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 279 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
272 ClearPageReserved(page); 280 ClearPageReserved(page);
273 set_page_count(page, 1); 281 free_new_highpage(page);
274 __free_page(page);
275 totalhigh_pages++;
276 } else 282 } else
277 SetPageReserved(page); 283 SetPageReserved(page);
278} 284}
279 285
286static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
287{
288 free_new_highpage(page);
289 totalram_pages++;
290#ifdef CONFIG_FLATMEM
291 max_mapnr = max(pfn, max_mapnr);
292#endif
293 num_physpages++;
294 return 0;
295}
296
297/*
298 * Not currently handling the NUMA case.
299 * Assuming single node and all memory that
300 * has been added dynamically that would be
301 * onlined here is in HIGHMEM
302 */
303void online_page(struct page *page)
304{
305 ClearPageReserved(page);
306 add_one_highpage_hotplug(page, page_to_pfn(page));
307}
308
309
280#ifdef CONFIG_NUMA 310#ifdef CONFIG_NUMA
281extern void set_highmem_pages_init(int); 311extern void set_highmem_pages_init(int);
282#else 312#else
@@ -284,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
284{ 314{
285 int pfn; 315 int pfn;
286 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) 316 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
287 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 317 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
288 totalram_pages += totalhigh_pages; 318 totalram_pages += totalhigh_pages;
289} 319}
290#endif /* CONFIG_FLATMEM */ 320#endif /* CONFIG_FLATMEM */
@@ -615,6 +645,28 @@ void __init mem_init(void)
615#endif 645#endif
616} 646}
617 647
648/*
649 * this is for the non-NUMA, single node SMP system case.
650 * Specifically, in the case of x86, we will always add
651 * memory to the highmem for now.
652 */
653#ifndef CONFIG_NEED_MULTIPLE_NODES
654int add_memory(u64 start, u64 size)
655{
656 struct pglist_data *pgdata = &contig_page_data;
657 struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
658 unsigned long start_pfn = start >> PAGE_SHIFT;
659 unsigned long nr_pages = size >> PAGE_SHIFT;
660
661 return __add_pages(zone, start_pfn, nr_pages);
662}
663
664int remove_memory(u64 start, u64 size)
665{
666 return -EINVAL;
667}
668#endif
669
618kmem_cache_t *pgd_cache; 670kmem_cache_t *pgd_cache;
619kmem_cache_t *pmd_cache; 671kmem_cache_t *pmd_cache;
620 672
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index f379b8d67558..5d09de8d1c6b 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
28 unsigned long pfn; 28 unsigned long pfn;
29 29
30 pfn = phys_addr >> PAGE_SHIFT; 30 pfn = phys_addr >> PAGE_SHIFT;
31 pte = pte_alloc_kernel(&init_mm, pmd, addr); 31 pte = pte_alloc_kernel(pmd, addr);
32 if (!pte) 32 if (!pte)
33 return -ENOMEM; 33 return -ENOMEM;
34 do { 34 do {
@@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr,
87 flush_cache_all(); 87 flush_cache_all();
88 phys_addr -= addr; 88 phys_addr -= addr;
89 pgd = pgd_offset_k(addr); 89 pgd = pgd_offset_k(addr);
90 spin_lock(&init_mm.page_table_lock);
91 do { 90 do {
92 next = pgd_addr_end(addr, end); 91 next = pgd_addr_end(addr, end);
93 err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); 92 err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
94 if (err) 93 if (err)
95 break; 94 break;
96 } while (pgd++, addr = next, addr != end); 95 } while (pgd++, addr = next, addr != end);
97 spin_unlock(&init_mm.page_table_lock);
98 flush_tlb_all(); 96 flush_tlb_all();
99 return err; 97 return err;
100} 98}
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index dcdce2c6c532..9db3242103be 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -31,11 +31,13 @@ void show_mem(void)
31 pg_data_t *pgdat; 31 pg_data_t *pgdat;
32 unsigned long i; 32 unsigned long i;
33 struct page_state ps; 33 struct page_state ps;
34 unsigned long flags;
34 35
35 printk(KERN_INFO "Mem-info:\n"); 36 printk(KERN_INFO "Mem-info:\n");
36 show_free_areas(); 37 show_free_areas();
37 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 38 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
38 for_each_pgdat(pgdat) { 39 for_each_pgdat(pgdat) {
40 pgdat_resize_lock(pgdat, &flags);
39 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 41 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
40 page = pgdat_page_nr(pgdat, i); 42 page = pgdat_page_nr(pgdat, i);
41 total++; 43 total++;
@@ -48,6 +50,7 @@ void show_mem(void)
48 else if (page_count(page)) 50 else if (page_count(page))
49 shared += page_count(page) - 1; 51 shared += page_count(page) - 1;
50 } 52 }
53 pgdat_resize_unlock(pgdat, &flags);
51 } 54 }
52 printk(KERN_INFO "%d pages of RAM\n", total); 55 printk(KERN_INFO "%d pages of RAM\n", total);
53 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); 56 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
@@ -188,19 +191,19 @@ static inline void pgd_list_add(pgd_t *pgd)
188 struct page *page = virt_to_page(pgd); 191 struct page *page = virt_to_page(pgd);
189 page->index = (unsigned long)pgd_list; 192 page->index = (unsigned long)pgd_list;
190 if (pgd_list) 193 if (pgd_list)
191 pgd_list->private = (unsigned long)&page->index; 194 set_page_private(pgd_list, (unsigned long)&page->index);
192 pgd_list = page; 195 pgd_list = page;
193 page->private = (unsigned long)&pgd_list; 196 set_page_private(page, (unsigned long)&pgd_list);
194} 197}
195 198
196static inline void pgd_list_del(pgd_t *pgd) 199static inline void pgd_list_del(pgd_t *pgd)
197{ 200{
198 struct page *next, **pprev, *page = virt_to_page(pgd); 201 struct page *next, **pprev, *page = virt_to_page(pgd);
199 next = (struct page *)page->index; 202 next = (struct page *)page->index;
200 pprev = (struct page **)page->private; 203 pprev = (struct page **)page_private(page);
201 *pprev = next; 204 *pprev = next;
202 if (next) 205 if (next)
203 next->private = (unsigned long)pprev; 206 set_page_private(next, (unsigned long)pprev);
204} 207}
205 208
206void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) 209void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
index 65dfd2edb671..21654be3f73f 100644
--- a/arch/i386/oprofile/backtrace.c
+++ b/arch/i386/oprofile/backtrace.c
@@ -12,6 +12,7 @@
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/uaccess.h>
15 16
16struct frame_head { 17struct frame_head {
17 struct frame_head * ebp; 18 struct frame_head * ebp;
@@ -21,26 +22,22 @@ struct frame_head {
21static struct frame_head * 22static struct frame_head *
22dump_backtrace(struct frame_head * head) 23dump_backtrace(struct frame_head * head)
23{ 24{
24 oprofile_add_trace(head->ret); 25 struct frame_head bufhead[2];
25 26
26 /* frame pointers should strictly progress back up the stack 27 /* Also check accessibility of one struct frame_head beyond */
27 * (towards higher addresses) */ 28 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
28 if (head >= head->ebp) 29 return NULL;
30 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
29 return NULL; 31 return NULL;
30 32
31 return head->ebp; 33 oprofile_add_trace(bufhead[0].ret);
32}
33
34/* check that the page(s) containing the frame head are present */
35static int pages_present(struct frame_head * head)
36{
37 struct mm_struct * mm = current->mm;
38 34
39 /* FIXME: only necessary once per page */ 35 /* frame pointers should strictly progress back up the stack
40 if (!check_user_page_readable(mm, (unsigned long)head)) 36 * (towards higher addresses) */
41 return 0; 37 if (head >= bufhead[0].ebp)
38 return NULL;
42 39
43 return check_user_page_readable(mm, (unsigned long)(head + 1)); 40 return bufhead[0].ebp;
44} 41}
45 42
46/* 43/*
@@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
97 return; 94 return;
98 } 95 }
99 96
100#ifdef CONFIG_SMP 97 while (depth-- && head)
101 if (!spin_trylock(&current->mm->page_table_lock))
102 return;
103#endif
104
105 while (depth-- && head && pages_present(head))
106 head = dump_backtrace(head); 98 head = dump_backtrace(head);
107
108#ifdef CONFIG_SMP
109 spin_unlock(&current->mm->page_table_lock);
110#endif
111} 99}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index d71731ee5b61..f7dfc107cb7b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2352,7 +2352,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
2352 insert_vm_struct(mm, vma); 2352 insert_vm_struct(mm, vma);
2353 2353
2354 mm->total_vm += size >> PAGE_SHIFT; 2354 mm->total_vm += size >> PAGE_SHIFT;
2355 vm_stat_account(vma); 2355 vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
2356 vma_pages(vma));
2356 up_write(&task->mm->mmap_sem); 2357 up_write(&task->mm->mmap_sem);
2357 2358
2358 /* 2359 /*
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index a3788fb84809..a88cdb7232f8 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -555,9 +555,13 @@ void show_mem(void)
555 show_free_areas(); 555 show_free_areas();
556 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 556 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
557 for_each_pgdat(pgdat) { 557 for_each_pgdat(pgdat) {
558 unsigned long present = pgdat->node_present_pages; 558 unsigned long present;
559 unsigned long flags;
559 int shared = 0, cached = 0, reserved = 0; 560 int shared = 0, cached = 0, reserved = 0;
561
560 printk("Node ID: %d\n", pgdat->node_id); 562 printk("Node ID: %d\n", pgdat->node_id);
563 pgdat_resize_lock(pgdat, &flags);
564 present = pgdat->node_present_pages;
561 for(i = 0; i < pgdat->node_spanned_pages; i++) { 565 for(i = 0; i < pgdat->node_spanned_pages; i++) {
562 struct page *page; 566 struct page *page;
563 if (pfn_valid(pgdat->node_start_pfn + i)) 567 if (pfn_valid(pgdat->node_start_pfn + i))
@@ -571,6 +575,7 @@ void show_mem(void)
571 else if (page_count(page)) 575 else if (page_count(page))
572 shared += page_count(page)-1; 576 shared += page_count(page)-1;
573 } 577 }
578 pgdat_resize_unlock(pgdat, &flags);
574 total_present += present; 579 total_present += present;
575 total_reserved += reserved; 580 total_reserved += reserved;
576 total_cached += cached; 581 total_cached += cached;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 3c32af910d60..af7eb087dca7 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -20,32 +20,6 @@
20extern void die (char *, struct pt_regs *, long); 20extern void die (char *, struct pt_regs *, long);
21 21
22/* 22/*
23 * This routine is analogous to expand_stack() but instead grows the
24 * register backing store (which grows towards higher addresses).
25 * Since the register backing store is access sequentially, we
26 * disallow growing the RBS by more than a page at a time. Note that
27 * the VM_GROWSUP flag can be set on any VM area but that's fine
28 * because the total process size is still limited by RLIMIT_STACK and
29 * RLIMIT_AS.
30 */
31static inline long
32expand_backing_store (struct vm_area_struct *vma, unsigned long address)
33{
34 unsigned long grow;
35
36 grow = PAGE_SIZE >> PAGE_SHIFT;
37 if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
38 || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
39 return -ENOMEM;
40 vma->vm_end += PAGE_SIZE;
41 vma->vm_mm->total_vm += grow;
42 if (vma->vm_flags & VM_LOCKED)
43 vma->vm_mm->locked_vm += grow;
44 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
45 return 0;
46}
47
48/*
49 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment 23 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
50 * (inside region 5, on ia64) and that page is present. 24 * (inside region 5, on ia64) and that page is present.
51 */ 25 */
@@ -185,7 +159,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
185 if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) 159 if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
186 || REGION_OFFSET(address) >= RGN_MAP_LIMIT) 160 || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
187 goto bad_area; 161 goto bad_area;
188 if (expand_backing_store(vma, address)) 162 /*
163 * Since the register backing store is accessed sequentially,
164 * we disallow growing it by more than a page at a time.
165 */
166 if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
167 goto bad_area;
168 if (expand_upwards(vma, address))
189 goto bad_area; 169 goto bad_area;
190 } 170 }
191 goto good_area; 171 goto good_area;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 98246acd4991..e3215ba64ffd 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -158,7 +158,7 @@ ia64_init_addr_space (void)
158 vma->vm_start = current->thread.rbs_bot & PAGE_MASK; 158 vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
159 vma->vm_end = vma->vm_start + PAGE_SIZE; 159 vma->vm_end = vma->vm_start + PAGE_SIZE;
160 vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; 160 vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
161 vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP; 161 vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
162 down_write(&current->mm->mmap_sem); 162 down_write(&current->mm->mmap_sem);
163 if (insert_vm_struct(current->mm, vma)) { 163 if (insert_vm_struct(current->mm, vma)) {
164 up_write(&current->mm->mmap_sem); 164 up_write(&current->mm->mmap_sem);
@@ -275,26 +275,21 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
275 275
276 pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ 276 pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
277 277
278 spin_lock(&init_mm.page_table_lock);
279 { 278 {
280 pud = pud_alloc(&init_mm, pgd, address); 279 pud = pud_alloc(&init_mm, pgd, address);
281 if (!pud) 280 if (!pud)
282 goto out; 281 goto out;
283
284 pmd = pmd_alloc(&init_mm, pud, address); 282 pmd = pmd_alloc(&init_mm, pud, address);
285 if (!pmd) 283 if (!pmd)
286 goto out; 284 goto out;
287 pte = pte_alloc_map(&init_mm, pmd, address); 285 pte = pte_alloc_kernel(pmd, address);
288 if (!pte) 286 if (!pte)
289 goto out; 287 goto out;
290 if (!pte_none(*pte)) { 288 if (!pte_none(*pte))
291 pte_unmap(pte);
292 goto out; 289 goto out;
293 }
294 set_pte(pte, mk_pte(page, pgprot)); 290 set_pte(pte, mk_pte(page, pgprot));
295 pte_unmap(pte);
296 } 291 }
297 out: spin_unlock(&init_mm.page_table_lock); 292 out:
298 /* no need for flush_tlb */ 293 /* no need for flush_tlb */
299 return page; 294 return page;
300} 295}
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c93e0f2b5fea..c79a9b96d02b 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -158,10 +158,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
158# ifdef CONFIG_SMP 158# ifdef CONFIG_SMP
159 platform_global_tlb_purge(mm, start, end, nbits); 159 platform_global_tlb_purge(mm, start, end, nbits);
160# else 160# else
161 preempt_disable();
161 do { 162 do {
162 ia64_ptcl(start, (nbits<<2)); 163 ia64_ptcl(start, (nbits<<2));
163 start += (1UL << nbits); 164 start += (1UL << nbits);
164 } while (start < end); 165 } while (start < end);
166 preempt_enable();
165# endif 167# endif
166 168
167 ia64_srlz_i(); /* srlz.i implies srlz.d */ 169 ia64_srlz_i(); /* srlz.i implies srlz.d */
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index d9a40b1fe8ba..6facf15b04f3 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -48,6 +48,8 @@ void show_mem(void)
48 show_free_areas(); 48 show_free_areas();
49 printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); 49 printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
50 for_each_pgdat(pgdat) { 50 for_each_pgdat(pgdat) {
51 unsigned long flags;
52 pgdat_resize_lock(pgdat, &flags);
51 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 53 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
52 page = pgdat_page_nr(pgdat, i); 54 page = pgdat_page_nr(pgdat, i);
53 total++; 55 total++;
@@ -60,6 +62,7 @@ void show_mem(void)
60 else if (page_count(page)) 62 else if (page_count(page))
61 shared += page_count(page) - 1; 63 shared += page_count(page) - 1;
62 } 64 }
65 pgdat_resize_unlock(pgdat, &flags);
63 } 66 }
64 printk("%d pages of RAM\n", total); 67 printk("%d pages of RAM\n", total);
65 printk("%d pages of HIGHMEM\n",highmem); 68 printk("%d pages of HIGHMEM\n",highmem);
@@ -150,10 +153,14 @@ int __init reservedpages_count(void)
150 int reservedpages, nid, i; 153 int reservedpages, nid, i;
151 154
152 reservedpages = 0; 155 reservedpages = 0;
153 for_each_online_node(nid) 156 for_each_online_node(nid) {
157 unsigned long flags;
158 pgdat_resize_lock(NODE_DATA(nid), &flags);
154 for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) 159 for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
155 if (PageReserved(nid_page_nr(nid, i))) 160 if (PageReserved(nid_page_nr(nid, i)))
156 reservedpages++; 161 reservedpages++;
162 pgdat_resize_unlock(NODE_DATA(nid), &flags);
163 }
157 164
158 return reservedpages; 165 return reservedpages;
159} 166}
diff --git a/arch/m32r/mm/ioremap.c b/arch/m32r/mm/ioremap.c
index 70c59055c19c..a151849a605e 100644
--- a/arch/m32r/mm/ioremap.c
+++ b/arch/m32r/mm/ioremap.c
@@ -67,7 +67,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
67 if (address >= end) 67 if (address >= end)
68 BUG(); 68 BUG();
69 do { 69 do {
70 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 70 pte_t * pte = pte_alloc_kernel(pmd, address);
71 if (!pte) 71 if (!pte)
72 return -ENOMEM; 72 return -ENOMEM;
73 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 73 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -90,7 +90,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
90 flush_cache_all(); 90 flush_cache_all();
91 if (address >= end) 91 if (address >= end)
92 BUG(); 92 BUG();
93 spin_lock(&init_mm.page_table_lock);
94 do { 93 do {
95 pmd_t *pmd; 94 pmd_t *pmd;
96 pmd = pmd_alloc(&init_mm, dir, address); 95 pmd = pmd_alloc(&init_mm, dir, address);
@@ -104,7 +103,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
104 address = (address + PGDIR_SIZE) & PGDIR_MASK; 103 address = (address + PGDIR_SIZE) & PGDIR_MASK;
105 dir++; 104 dir++;
106 } while (address && (address < end)); 105 } while (address && (address < end));
107 spin_unlock(&init_mm.page_table_lock);
108 flush_tlb_all(); 106 flush_tlb_all();
109 return error; 107 return error;
110} 108}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index ba960bbc8e6d..1dd5d18b2201 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -388,33 +388,11 @@ config AMIGA_PCMCIA
388 Include support in the kernel for pcmcia on Amiga 1200 and Amiga 388 Include support in the kernel for pcmcia on Amiga 1200 and Amiga
389 600. If you intend to use pcmcia cards say Y; otherwise say N. 389 600. If you intend to use pcmcia cards say Y; otherwise say N.
390 390
391config STRAM_SWAP
392 bool "Support for ST-RAM as swap space"
393 depends on ATARI && BROKEN
394 ---help---
395 Some Atari 68k machines (including the 520STF and 1020STE) divide
396 their addressable memory into ST and TT sections. The TT section
397 (up to 512MB) is the main memory; the ST section (up to 4MB) is
398 accessible to the built-in graphics board, runs slower, and is
399 present mainly for backward compatibility with older machines.
400
401 This enables support for using (parts of) ST-RAM as swap space,
402 instead of as normal system memory. This can first enhance system
403 performance if you have lots of alternate RAM (compared to the size
404 of ST-RAM), because executable code always will reside in faster
405 memory. ST-RAM will remain as ultra-fast swap space. On the other
406 hand, it allows much improved dynamic allocations of ST-RAM buffers
407 for device driver modules (e.g. floppy, ACSI, SLM printer, DMA
408 sound). The probability that such allocations at module load time
409 fail is drastically reduced.
410
411config STRAM_PROC 391config STRAM_PROC
412 bool "ST-RAM statistics in /proc" 392 bool "ST-RAM statistics in /proc"
413 depends on ATARI 393 depends on ATARI
414 help 394 help
415 Say Y here to report ST-RAM usage statistics in /proc/stram. See 395 Say Y here to report ST-RAM usage statistics in /proc/stram.
416 the help for CONFIG_STRAM_SWAP for discussion of ST-RAM and its
417 uses.
418 396
419config HEARTBEAT 397config HEARTBEAT
420 bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40 398 bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index 5a3c106b40c8..22e0481a5f7b 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -15,11 +15,9 @@
15#include <linux/kdev_t.h> 15#include <linux/kdev_t.h>
16#include <linux/major.h> 16#include <linux/major.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/swap.h>
19#include <linux/slab.h> 18#include <linux/slab.h>
20#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
21#include <linux/pagemap.h> 20#include <linux/pagemap.h>
22#include <linux/shm.h>
23#include <linux/bootmem.h> 21#include <linux/bootmem.h>
24#include <linux/mount.h> 22#include <linux/mount.h>
25#include <linux/blkdev.h> 23#include <linux/blkdev.h>
@@ -33,8 +31,6 @@
33#include <asm/io.h> 31#include <asm/io.h>
34#include <asm/semaphore.h> 32#include <asm/semaphore.h>
35 33
36#include <linux/swapops.h>
37
38#undef DEBUG 34#undef DEBUG
39 35
40#ifdef DEBUG 36#ifdef DEBUG
@@ -49,8 +45,7 @@
49#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
50#endif 46#endif
51 47
52/* Pre-swapping comments: 48/*
53 *
54 * ++roman: 49 * ++roman:
55 * 50 *
56 * New version of ST-Ram buffer allocation. Instead of using the 51 * New version of ST-Ram buffer allocation. Instead of using the
@@ -75,76 +70,6 @@
75 * 70 *
76 */ 71 */
77 72
78/*
79 * New Nov 1997: Use ST-RAM as swap space!
80 *
81 * In the past, there were often problems with modules that require ST-RAM
82 * buffers. Such drivers have to use __get_dma_pages(), which unfortunately
83 * often isn't very successful in allocating more than 1 page :-( [1] The net
84 * result was that most of the time you couldn't insmod such modules (ataflop,
85 * ACSI, SCSI on Falcon, Atari internal framebuffer, not to speak of acsi_slm,
86 * which needs a 1 MB buffer... :-).
87 *
88 * To overcome this limitation, ST-RAM can now be turned into a very
89 * high-speed swap space. If a request for an ST-RAM buffer comes, the kernel
90 * now tries to unswap some pages on that swap device to make some free (and
91 * contiguous) space. This works much better in comparison to
92 * __get_dma_pages(), since used swap pages can be selectively freed by either
93 * moving them to somewhere else in swap space, or by reading them back into
94 * system memory. Ok, there operation of unswapping isn't really cheap (for
95 * each page, one has to go through the page tables of all processes), but it
96 * doesn't happen that often (only when allocation ST-RAM, i.e. when loading a
97 * module that needs ST-RAM). But it at least makes it possible to load such
98 * modules!
99 *
100 * It could also be that overall system performance increases a bit due to
101 * ST-RAM swapping, since slow ST-RAM isn't used anymore for holding data or
102 * executing code in. It's then just a (very fast, compared to disk) back
103 * storage for not-so-often needed data. (But this effect must be compared
104 * with the loss of total memory...) Don't know if the effect is already
105 * visible on a TT, where the speed difference between ST- and TT-RAM isn't
106 * that dramatic, but it should on machines where TT-RAM is really much faster
107 * (e.g. Afterburner).
108 *
109 * [1]: __get_free_pages() does a fine job if you only want one page, but if
110 * you want more (contiguous) pages, it can give you such a block only if
111 * there's already a free one. The algorithm can't try to free buffers or swap
112 * out something in order to make more free space, since all that page-freeing
113 * mechanisms work "target-less", i.e. they just free something, but not in a
114 * specific place. I.e., __get_free_pages() can't do anything to free
115 * *adjacent* pages :-( This situation becomes even worse for DMA memory,
116 * since the freeing algorithms are also blind to DMA capability of pages.
117 */
118
119/* 1998-10-20: ++andreas
120 unswap_by_move disabled because it does not handle swapped shm pages.
121*/
122
123/* 2000-05-01: ++andreas
124 Integrated with bootmem. Remove all traces of unswap_by_move.
125*/
126
127#ifdef CONFIG_STRAM_SWAP
128#define ALIGN_IF_SWAP(x) PAGE_ALIGN(x)
129#else
130#define ALIGN_IF_SWAP(x) (x)
131#endif
132
133/* get index of swap page at address 'addr' */
134#define SWAP_NR(addr) (((addr) - swap_start) >> PAGE_SHIFT)
135
136/* get address of swap page #'nr' */
137#define SWAP_ADDR(nr) (swap_start + ((nr) << PAGE_SHIFT))
138
139/* get number of pages for 'n' bytes (already page-aligned) */
140#define N_PAGES(n) ((n) >> PAGE_SHIFT)
141
142/* The following two numbers define the maximum fraction of ST-RAM in total
143 * memory, below that the kernel would automatically use ST-RAM as swap
144 * space. This decision can be overridden with stram_swap= */
145#define MAX_STRAM_FRACTION_NOM 1
146#define MAX_STRAM_FRACTION_DENOM 3
147
148/* Start and end (virtual) of ST-RAM */ 73/* Start and end (virtual) of ST-RAM */
149static void *stram_start, *stram_end; 74static void *stram_start, *stram_end;
150 75
@@ -164,10 +89,9 @@ typedef struct stram_block {
164} BLOCK; 89} BLOCK;
165 90
166/* values for flags field */ 91/* values for flags field */
167#define BLOCK_FREE 0x01 /* free structure in the BLOCKs pool */ 92#define BLOCK_FREE 0x01 /* free structure in the BLOCKs pool */
168#define BLOCK_KMALLOCED 0x02 /* structure allocated by kmalloc() */ 93#define BLOCK_KMALLOCED 0x02 /* structure allocated by kmalloc() */
169#define BLOCK_GFP 0x08 /* block allocated with __get_dma_pages() */ 94#define BLOCK_GFP 0x08 /* block allocated with __get_dma_pages() */
170#define BLOCK_INSWAP 0x10 /* block allocated in swap space */
171 95
172/* list of allocated blocks */ 96/* list of allocated blocks */
173static BLOCK *alloc_list; 97static BLOCK *alloc_list;
@@ -179,60 +103,8 @@ static BLOCK *alloc_list;
179#define N_STATIC_BLOCKS 20 103#define N_STATIC_BLOCKS 20
180static BLOCK static_blocks[N_STATIC_BLOCKS]; 104static BLOCK static_blocks[N_STATIC_BLOCKS];
181 105
182#ifdef CONFIG_STRAM_SWAP
183/* max. number of bytes to use for swapping
184 * 0 = no ST-RAM swapping
185 * -1 = do swapping (to whole ST-RAM) if it's less than MAX_STRAM_FRACTION of
186 * total memory
187 */
188static int max_swap_size = -1;
189
190/* start and end of swapping area */
191static void *swap_start, *swap_end;
192
193/* The ST-RAM's swap info structure */
194static struct swap_info_struct *stram_swap_info;
195
196/* The ST-RAM's swap type */
197static int stram_swap_type;
198
199/* Semaphore for get_stram_region. */
200static DECLARE_MUTEX(stram_swap_sem);
201
202/* major and minor device number of the ST-RAM device; for the major, we use
203 * the same as Amiga z2ram, which is really similar and impossible on Atari,
204 * and for the minor a relatively odd number to avoid the user creating and
205 * using that device. */
206#define STRAM_MAJOR Z2RAM_MAJOR
207#define STRAM_MINOR 13
208
209/* Some impossible pointer value */
210#define MAGIC_FILE_P (struct file *)0xffffdead
211
212#ifdef DO_PROC
213static unsigned stat_swap_read;
214static unsigned stat_swap_write;
215static unsigned stat_swap_force;
216#endif /* DO_PROC */
217
218#endif /* CONFIG_STRAM_SWAP */
219
220/***************************** Prototypes *****************************/ 106/***************************** Prototypes *****************************/
221 107
222#ifdef CONFIG_STRAM_SWAP
223static int swap_init(void *start_mem, void *swap_data);
224static void *get_stram_region( unsigned long n_pages );
225static void free_stram_region( unsigned long offset, unsigned long n_pages
226 );
227static int in_some_region(void *addr);
228static unsigned long find_free_region( unsigned long n_pages, unsigned long
229 *total_free, unsigned long
230 *region_free );
231static void do_stram_request(request_queue_t *);
232static int stram_open( struct inode *inode, struct file *filp );
233static int stram_release( struct inode *inode, struct file *filp );
234static void reserve_region(void *start, void *end);
235#endif
236static BLOCK *add_region( void *addr, unsigned long size ); 108static BLOCK *add_region( void *addr, unsigned long size );
237static BLOCK *find_region( void *addr ); 109static BLOCK *find_region( void *addr );
238static int remove_region( BLOCK *block ); 110static int remove_region( BLOCK *block );
@@ -279,84 +151,11 @@ void __init atari_stram_init(void)
279 */ 151 */
280void __init atari_stram_reserve_pages(void *start_mem) 152void __init atari_stram_reserve_pages(void *start_mem)
281{ 153{
282#ifdef CONFIG_STRAM_SWAP
283 /* if max_swap_size is negative (i.e. no stram_swap= option given),
284 * determine at run time whether to use ST-RAM swapping */
285 if (max_swap_size < 0)
286 /* Use swapping if ST-RAM doesn't make up more than MAX_STRAM_FRACTION
287 * of total memory. In that case, the max. size is set to 16 MB,
288 * because ST-RAM can never be bigger than that.
289 * Also, never use swapping on a Hades, there's no separate ST-RAM in
290 * that machine. */
291 max_swap_size =
292 (!MACH_IS_HADES &&
293 (N_PAGES(stram_end-stram_start)*MAX_STRAM_FRACTION_DENOM <=
294 ((unsigned long)high_memory>>PAGE_SHIFT)*MAX_STRAM_FRACTION_NOM)) ? 16*1024*1024 : 0;
295 DPRINTK( "atari_stram_reserve_pages: max_swap_size = %d\n", max_swap_size );
296#endif
297
298 /* always reserve first page of ST-RAM, the first 2 kB are 154 /* always reserve first page of ST-RAM, the first 2 kB are
299 * supervisor-only! */ 155 * supervisor-only! */
300 if (!kernel_in_stram) 156 if (!kernel_in_stram)
301 reserve_bootmem (0, PAGE_SIZE); 157 reserve_bootmem (0, PAGE_SIZE);
302 158
303#ifdef CONFIG_STRAM_SWAP
304 {
305 void *swap_data;
306
307 start_mem = (void *) PAGE_ALIGN ((unsigned long) start_mem);
308 /* determine first page to use as swap: if the kernel is
309 in TT-RAM, this is the first page of (usable) ST-RAM;
310 otherwise just use the end of kernel data (= start_mem) */
311 swap_start = !kernel_in_stram ? stram_start + PAGE_SIZE : start_mem;
312 /* decrement by one page, rest of kernel assumes that first swap page
313 * is always reserved and maybe doesn't handle swp_entry == 0
314 * correctly */
315 swap_start -= PAGE_SIZE;
316 swap_end = stram_end;
317 if (swap_end-swap_start > max_swap_size)
318 swap_end = swap_start + max_swap_size;
319 DPRINTK( "atari_stram_reserve_pages: swapping enabled; "
320 "swap=%p-%p\n", swap_start, swap_end);
321
322 /* reserve some amount of memory for maintainance of
323 * swapping itself: one page for each 2048 (PAGE_SIZE/2)
324 * swap pages. (2 bytes for each page) */
325 swap_data = start_mem;
326 start_mem += ((SWAP_NR(swap_end) + PAGE_SIZE/2 - 1)
327 >> (PAGE_SHIFT-1)) << PAGE_SHIFT;
328 /* correct swap_start if necessary */
329 if (swap_start + PAGE_SIZE == swap_data)
330 swap_start = start_mem - PAGE_SIZE;
331
332 if (!swap_init( start_mem, swap_data )) {
333 printk( KERN_ERR "ST-RAM swap space initialization failed\n" );
334 max_swap_size = 0;
335 return;
336 }
337 /* reserve region for swapping meta-data */
338 reserve_region(swap_data, start_mem);
339 /* reserve swapping area itself */
340 reserve_region(swap_start + PAGE_SIZE, swap_end);
341
342 /*
343 * If the whole ST-RAM is used for swapping, there are no allocatable
344 * dma pages left. But unfortunately, some shared parts of the kernel
345 * (particularly the SCSI mid-level) call __get_dma_pages()
346 * unconditionally :-( These calls then fail, and scsi.c even doesn't
347 * check for NULL return values and just crashes. The quick fix for
348 * this (instead of doing much clean up work in the SCSI code) is to
349 * pretend all pages are DMA-able by setting mach_max_dma_address to
350 * ULONG_MAX. This doesn't change any functionality so far, since
351 * get_dma_pages() shouldn't be used on Atari anyway anymore (better
352 * use atari_stram_alloc()), and the Atari SCSI drivers don't need DMA
353 * memory. But unfortunately there's now no kind of warning (even not
354 * a NULL return value) if you use get_dma_pages() nevertheless :-(
355 * You just will get non-DMA-able memory...
356 */
357 mach_max_dma_address = 0xffffffff;
358 }
359#endif
360} 159}
361 160
362void atari_stram_mem_init_hook (void) 161void atari_stram_mem_init_hook (void)
@@ -367,7 +166,6 @@ void atari_stram_mem_init_hook (void)
367 166
368/* 167/*
369 * This is main public interface: somehow allocate a ST-RAM block 168 * This is main public interface: somehow allocate a ST-RAM block
370 * There are three strategies:
371 * 169 *
372 * - If we're before mem_init(), we have to make a static allocation. The 170 * - If we're before mem_init(), we have to make a static allocation. The
373 * region is taken in the kernel data area (if the kernel is in ST-RAM) or 171 * region is taken in the kernel data area (if the kernel is in ST-RAM) or
@@ -375,14 +173,9 @@ void atari_stram_mem_init_hook (void)
375 * rsvd_stram_* region. The ST-RAM is somewhere in the middle of kernel 173 * rsvd_stram_* region. The ST-RAM is somewhere in the middle of kernel
376 * address space in the latter case. 174 * address space in the latter case.
377 * 175 *
378 * - If mem_init() already has been called and ST-RAM swapping is enabled, 176 * - If mem_init() already has been called, try with __get_dma_pages().
379 * try to get the memory from the (pseudo) swap-space, either free already 177 * This has the disadvantage that it's very hard to get more than 1 page,
380 * or by moving some other pages out of the swap. 178 * and it is likely to fail :-(
381 *
382 * - If mem_init() already has been called, and ST-RAM swapping is not
383 * enabled, the only possibility is to try with __get_dma_pages(). This has
384 * the disadvantage that it's very hard to get more than 1 page, and it is
385 * likely to fail :-(
386 * 179 *
387 */ 180 */
388void *atari_stram_alloc(long size, const char *owner) 181void *atari_stram_alloc(long size, const char *owner)
@@ -393,27 +186,13 @@ void *atari_stram_alloc(long size, const char *owner)
393 186
394 DPRINTK("atari_stram_alloc(size=%08lx,owner=%s)\n", size, owner); 187 DPRINTK("atari_stram_alloc(size=%08lx,owner=%s)\n", size, owner);
395 188
396 size = ALIGN_IF_SWAP(size);
397 DPRINTK( "atari_stram_alloc: rounded size = %08lx\n", size );
398#ifdef CONFIG_STRAM_SWAP
399 if (max_swap_size) {
400 /* If swapping is active: make some free space in the swap
401 "device". */
402 DPRINTK( "atari_stram_alloc: after mem_init, swapping ok, "
403 "calling get_region\n" );
404 addr = get_stram_region( N_PAGES(size) );
405 flags = BLOCK_INSWAP;
406 }
407 else
408#endif
409 if (!mem_init_done) 189 if (!mem_init_done)
410 return alloc_bootmem_low(size); 190 return alloc_bootmem_low(size);
411 else { 191 else {
412 /* After mem_init() and no swapping: can only resort to 192 /* After mem_init(): can only resort to __get_dma_pages() */
413 * __get_dma_pages() */
414 addr = (void *)__get_dma_pages(GFP_KERNEL, get_order(size)); 193 addr = (void *)__get_dma_pages(GFP_KERNEL, get_order(size));
415 flags = BLOCK_GFP; 194 flags = BLOCK_GFP;
416 DPRINTK( "atari_stram_alloc: after mem_init, swapping off, " 195 DPRINTK( "atari_stram_alloc: after mem_init, "
417 "get_pages=%p\n", addr ); 196 "get_pages=%p\n", addr );
418 } 197 }
419 198
@@ -422,12 +201,7 @@ void *atari_stram_alloc(long size, const char *owner)
422 /* out of memory for BLOCK structure :-( */ 201 /* out of memory for BLOCK structure :-( */
423 DPRINTK( "atari_stram_alloc: out of mem for BLOCK -- " 202 DPRINTK( "atari_stram_alloc: out of mem for BLOCK -- "
424 "freeing again\n" ); 203 "freeing again\n" );
425#ifdef CONFIG_STRAM_SWAP 204 free_pages((unsigned long)addr, get_order(size));
426 if (flags == BLOCK_INSWAP)
427 free_stram_region( SWAP_NR(addr), N_PAGES(size) );
428 else
429#endif
430 free_pages((unsigned long)addr, get_order(size));
431 return( NULL ); 205 return( NULL );
432 } 206 }
433 block->owner = owner; 207 block->owner = owner;
@@ -451,25 +225,12 @@ void atari_stram_free( void *addr )
451 DPRINTK( "atari_stram_free: found block (%p): size=%08lx, owner=%s, " 225 DPRINTK( "atari_stram_free: found block (%p): size=%08lx, owner=%s, "
452 "flags=%02x\n", block, block->size, block->owner, block->flags ); 226 "flags=%02x\n", block, block->size, block->owner, block->flags );
453 227
454#ifdef CONFIG_STRAM_SWAP 228 if (!(block->flags & BLOCK_GFP))
455 if (!max_swap_size) {
456#endif
457 if (block->flags & BLOCK_GFP) {
458 DPRINTK("atari_stram_free: is kmalloced, order_size=%d\n",
459 get_order(block->size));
460 free_pages((unsigned long)addr, get_order(block->size));
461 }
462 else
463 goto fail;
464#ifdef CONFIG_STRAM_SWAP
465 }
466 else if (block->flags & BLOCK_INSWAP) {
467 DPRINTK( "atari_stram_free: is swap-alloced\n" );
468 free_stram_region( SWAP_NR(block->start), N_PAGES(block->size) );
469 }
470 else
471 goto fail; 229 goto fail;
472#endif 230
231 DPRINTK("atari_stram_free: is kmalloced, order_size=%d\n",
232 get_order(block->size));
233 free_pages((unsigned long)addr, get_order(block->size));
473 remove_region( block ); 234 remove_region( block );
474 return; 235 return;
475 236
@@ -478,612 +239,6 @@ void atari_stram_free( void *addr )
478 "(called from %p)\n", addr, __builtin_return_address(0) ); 239 "(called from %p)\n", addr, __builtin_return_address(0) );
479} 240}
480 241
481
482#ifdef CONFIG_STRAM_SWAP
483
484
485/* ------------------------------------------------------------------------ */
486/* Main Swapping Functions */
487/* ------------------------------------------------------------------------ */
488
489
490/*
491 * Initialize ST-RAM swap device
492 * (lots copied and modified from sys_swapon() in mm/swapfile.c)
493 */
494static int __init swap_init(void *start_mem, void *swap_data)
495{
496 static struct dentry fake_dentry;
497 static struct vfsmount fake_vfsmnt;
498 struct swap_info_struct *p;
499 struct inode swap_inode;
500 unsigned int type;
501 void *addr;
502 int i, j, k, prev;
503
504 DPRINTK("swap_init(start_mem=%p, swap_data=%p)\n",
505 start_mem, swap_data);
506
507 /* need at least one page for swapping to (and this also isn't very
508 * much... :-) */
509 if (swap_end - swap_start < 2*PAGE_SIZE) {
510 printk( KERN_WARNING "stram_swap_init: swap space too small\n" );
511 return( 0 );
512 }
513
514 /* find free slot in swap_info */
515 for( p = swap_info, type = 0; type < nr_swapfiles; type++, p++ )
516 if (!(p->flags & SWP_USED))
517 break;
518 if (type >= MAX_SWAPFILES) {
519 printk( KERN_WARNING "stram_swap_init: max. number of "
520 "swap devices exhausted\n" );
521 return( 0 );
522 }
523 if (type >= nr_swapfiles)
524 nr_swapfiles = type+1;
525
526 stram_swap_info = p;
527 stram_swap_type = type;
528
529 /* fake some dir cache entries to give us some name in /dev/swaps */
530 fake_dentry.d_parent = &fake_dentry;
531 fake_dentry.d_name.name = "stram (internal)";
532 fake_dentry.d_name.len = 16;
533 fake_vfsmnt.mnt_parent = &fake_vfsmnt;
534
535 p->flags = SWP_USED;
536 p->swap_file = &fake_dentry;
537 p->swap_vfsmnt = &fake_vfsmnt;
538 p->swap_map = swap_data;
539 p->cluster_nr = 0;
540 p->next = -1;
541 p->prio = 0x7ff0; /* a rather high priority, but not the higest
542 * to give the user a chance to override */
543
544 /* call stram_open() directly, avoids at least the overhead in
545 * constructing a dummy file structure... */
546 swap_inode.i_rdev = MKDEV( STRAM_MAJOR, STRAM_MINOR );
547 stram_open( &swap_inode, MAGIC_FILE_P );
548 p->max = SWAP_NR(swap_end);
549
550 /* initialize swap_map: set regions that are already allocated or belong
551 * to kernel data space to SWAP_MAP_BAD, otherwise to free */
552 j = 0; /* # of free pages */
553 k = 0; /* # of already allocated pages (from pre-mem_init stram_alloc()) */
554 p->lowest_bit = 0;
555 p->highest_bit = 0;
556 for( i = 1, addr = SWAP_ADDR(1); i < p->max;
557 i++, addr += PAGE_SIZE ) {
558 if (in_some_region( addr )) {
559 p->swap_map[i] = SWAP_MAP_BAD;
560 ++k;
561 }
562 else if (kernel_in_stram && addr < start_mem ) {
563 p->swap_map[i] = SWAP_MAP_BAD;
564 }
565 else {
566 p->swap_map[i] = 0;
567 ++j;
568 if (!p->lowest_bit) p->lowest_bit = i;
569 p->highest_bit = i;
570 }
571 }
572 /* first page always reserved (and doesn't really belong to swap space) */
573 p->swap_map[0] = SWAP_MAP_BAD;
574
575 /* now swapping to this device ok */
576 p->pages = j + k;
577 swap_list_lock();
578 nr_swap_pages += j;
579 p->flags = SWP_WRITEOK;
580
581 /* insert swap space into swap_list */
582 prev = -1;
583 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
584 if (p->prio >= swap_info[i].prio) {
585 break;
586 }
587 prev = i;
588 }
589 p->next = i;
590 if (prev < 0) {
591 swap_list.head = swap_list.next = p - swap_info;
592 } else {
593 swap_info[prev].next = p - swap_info;
594 }
595 swap_list_unlock();
596
597 printk( KERN_INFO "Using %dk (%d pages) of ST-RAM as swap space.\n",
598 p->pages << 2, p->pages );
599 return( 1 );
600}
601
602
603/*
604 * The swap entry has been read in advance, and we return 1 to indicate
605 * that the page has been used or is no longer needed.
606 *
607 * Always set the resulting pte to be nowrite (the same as COW pages
608 * after one process has exited). We don't know just how many PTEs will
609 * share this swap entry, so be cautious and let do_wp_page work out
610 * what to do if a write is requested later.
611 */
612static inline void unswap_pte(struct vm_area_struct * vma, unsigned long
613 address, pte_t *dir, swp_entry_t entry,
614 struct page *page)
615{
616 pte_t pte = *dir;
617
618 if (pte_none(pte))
619 return;
620 if (pte_present(pte)) {
621 /* If this entry is swap-cached, then page must already
622 hold the right address for any copies in physical
623 memory */
624 if (pte_page(pte) != page)
625 return;
626 /* We will be removing the swap cache in a moment, so... */
627 set_pte(dir, pte_mkdirty(pte));
628 return;
629 }
630 if (pte_val(pte) != entry.val)
631 return;
632
633 DPRINTK("unswap_pte: replacing entry %08lx by new page %p",
634 entry.val, page);
635 set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
636 swap_free(entry);
637 get_page(page);
638 inc_mm_counter(vma->vm_mm, rss);
639}
640
641static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir,
642 unsigned long address, unsigned long size,
643 unsigned long offset, swp_entry_t entry,
644 struct page *page)
645{
646 pte_t * pte;
647 unsigned long end;
648
649 if (pmd_none(*dir))
650 return;
651 if (pmd_bad(*dir)) {
652 pmd_ERROR(*dir);
653 pmd_clear(dir);
654 return;
655 }
656 pte = pte_offset_kernel(dir, address);
657 offset += address & PMD_MASK;
658 address &= ~PMD_MASK;
659 end = address + size;
660 if (end > PMD_SIZE)
661 end = PMD_SIZE;
662 do {
663 unswap_pte(vma, offset+address-vma->vm_start, pte, entry, page);
664 address += PAGE_SIZE;
665 pte++;
666 } while (address < end);
667}
668
669static inline void unswap_pgd(struct vm_area_struct * vma, pgd_t *dir,
670 unsigned long address, unsigned long size,
671 swp_entry_t entry, struct page *page)
672{
673 pmd_t * pmd;
674 unsigned long offset, end;
675
676 if (pgd_none(*dir))
677 return;
678 if (pgd_bad(*dir)) {
679 pgd_ERROR(*dir);
680 pgd_clear(dir);
681 return;
682 }
683 pmd = pmd_offset(dir, address);
684 offset = address & PGDIR_MASK;
685 address &= ~PGDIR_MASK;
686 end = address + size;
687 if (end > PGDIR_SIZE)
688 end = PGDIR_SIZE;
689 do {
690 unswap_pmd(vma, pmd, address, end - address, offset, entry,
691 page);
692 address = (address + PMD_SIZE) & PMD_MASK;
693 pmd++;
694 } while (address < end);
695}
696
697static void unswap_vma(struct vm_area_struct * vma, pgd_t *pgdir,
698 swp_entry_t entry, struct page *page)
699{
700 unsigned long start = vma->vm_start, end = vma->vm_end;
701
702 do {
703 unswap_pgd(vma, pgdir, start, end - start, entry, page);
704 start = (start + PGDIR_SIZE) & PGDIR_MASK;
705 pgdir++;
706 } while (start < end);
707}
708
709static void unswap_process(struct mm_struct * mm, swp_entry_t entry,
710 struct page *page)
711{
712 struct vm_area_struct* vma;
713
714 /*
715 * Go through process' page directory.
716 */
717 if (!mm)
718 return;
719 for (vma = mm->mmap; vma; vma = vma->vm_next) {
720 pgd_t * pgd = pgd_offset(mm, vma->vm_start);
721 unswap_vma(vma, pgd, entry, page);
722 }
723}
724
725
726static int unswap_by_read(unsigned short *map, unsigned long max,
727 unsigned long start, unsigned long n_pages)
728{
729 struct task_struct *p;
730 struct page *page;
731 swp_entry_t entry;
732 unsigned long i;
733
734 DPRINTK( "unswapping %lu..%lu by reading in\n",
735 start, start+n_pages-1 );
736
737 for( i = start; i < start+n_pages; ++i ) {
738 if (map[i] == SWAP_MAP_BAD) {
739 printk( KERN_ERR "get_stram_region: page %lu already "
740 "reserved??\n", i );
741 continue;
742 }
743
744 if (map[i]) {
745 entry = swp_entry(stram_swap_type, i);
746 DPRINTK("unswap: map[i=%lu]=%u nr_swap=%ld\n",
747 i, map[i], nr_swap_pages);
748
749 swap_device_lock(stram_swap_info);
750 map[i]++;
751 swap_device_unlock(stram_swap_info);
752 /* Get a page for the entry, using the existing
753 swap cache page if there is one. Otherwise,
754 get a clean page and read the swap into it. */
755 page = read_swap_cache_async(entry, NULL, 0);
756 if (!page) {
757 swap_free(entry);
758 return -ENOMEM;
759 }
760 read_lock(&tasklist_lock);
761 for_each_process(p)
762 unswap_process(p->mm, entry, page);
763 read_unlock(&tasklist_lock);
764 shmem_unuse(entry, page);
765 /* Now get rid of the extra reference to the
766 temporary page we've been using. */
767 if (PageSwapCache(page))
768 delete_from_swap_cache(page);
769 __free_page(page);
770 #ifdef DO_PROC
771 stat_swap_force++;
772 #endif
773 }
774
775 DPRINTK( "unswap: map[i=%lu]=%u nr_swap=%ld\n",
776 i, map[i], nr_swap_pages );
777 swap_list_lock();
778 swap_device_lock(stram_swap_info);
779 map[i] = SWAP_MAP_BAD;
780 if (stram_swap_info->lowest_bit == i)
781 stram_swap_info->lowest_bit++;
782 if (stram_swap_info->highest_bit == i)
783 stram_swap_info->highest_bit--;
784 --nr_swap_pages;
785 swap_device_unlock(stram_swap_info);
786 swap_list_unlock();
787 }
788
789 return 0;
790}
791
792/*
793 * reserve a region in ST-RAM swap space for an allocation
794 */
795static void *get_stram_region( unsigned long n_pages )
796{
797 unsigned short *map = stram_swap_info->swap_map;
798 unsigned long max = stram_swap_info->max;
799 unsigned long start, total_free, region_free;
800 int err;
801 void *ret = NULL;
802
803 DPRINTK( "get_stram_region(n_pages=%lu)\n", n_pages );
804
805 down(&stram_swap_sem);
806
807 /* disallow writing to the swap device now */
808 stram_swap_info->flags = SWP_USED;
809
810 /* find a region of n_pages pages in the swap space including as much free
811 * pages as possible (and excluding any already-reserved pages). */
812 if (!(start = find_free_region( n_pages, &total_free, &region_free )))
813 goto end;
814 DPRINTK( "get_stram_region: region starts at %lu, has %lu free pages\n",
815 start, region_free );
816
817 err = unswap_by_read(map, max, start, n_pages);
818 if (err)
819 goto end;
820
821 ret = SWAP_ADDR(start);
822 end:
823 /* allow using swap device again */
824 stram_swap_info->flags = SWP_WRITEOK;
825 up(&stram_swap_sem);
826 DPRINTK( "get_stram_region: returning %p\n", ret );
827 return( ret );
828}
829
830
831/*
832 * free a reserved region in ST-RAM swap space
833 */
834static void free_stram_region( unsigned long offset, unsigned long n_pages )
835{
836 unsigned short *map = stram_swap_info->swap_map;
837
838 DPRINTK( "free_stram_region(offset=%lu,n_pages=%lu)\n", offset, n_pages );
839
840 if (offset < 1 || offset + n_pages > stram_swap_info->max) {
841 printk( KERN_ERR "free_stram_region: Trying to free non-ST-RAM\n" );
842 return;
843 }
844
845 swap_list_lock();
846 swap_device_lock(stram_swap_info);
847 /* un-reserve the freed pages */
848 for( ; n_pages > 0; ++offset, --n_pages ) {
849 if (map[offset] != SWAP_MAP_BAD)
850 printk( KERN_ERR "free_stram_region: Swap page %lu was not "
851 "reserved\n", offset );
852 map[offset] = 0;
853 }
854
855 /* update swapping meta-data */
856 if (offset < stram_swap_info->lowest_bit)
857 stram_swap_info->lowest_bit = offset;
858 if (offset+n_pages-1 > stram_swap_info->highest_bit)
859 stram_swap_info->highest_bit = offset+n_pages-1;
860 if (stram_swap_info->prio > swap_info[swap_list.next].prio)
861 swap_list.next = swap_list.head;
862 nr_swap_pages += n_pages;
863 swap_device_unlock(stram_swap_info);
864 swap_list_unlock();
865}
866
867
868/* ------------------------------------------------------------------------ */
869/* Utility Functions for Swapping */
870/* ------------------------------------------------------------------------ */
871
872
873/* is addr in some of the allocated regions? */
874static int in_some_region(void *addr)
875{
876 BLOCK *p;
877
878 for( p = alloc_list; p; p = p->next ) {
879 if (p->start <= addr && addr < p->start + p->size)
880 return( 1 );
881 }
882 return( 0 );
883}
884
885
886static unsigned long find_free_region(unsigned long n_pages,
887 unsigned long *total_free,
888 unsigned long *region_free)
889{
890 unsigned short *map = stram_swap_info->swap_map;
891 unsigned long max = stram_swap_info->max;
892 unsigned long head, tail, max_start;
893 long nfree, max_free;
894
895 /* first scan the swap space for a suitable place for the allocation */
896 head = 1;
897 max_start = 0;
898 max_free = -1;
899 *total_free = 0;
900
901 start_over:
902 /* increment tail until final window size reached, and count free pages */
903 nfree = 0;
904 for( tail = head; tail-head < n_pages && tail < max; ++tail ) {
905 if (map[tail] == SWAP_MAP_BAD) {
906 head = tail+1;
907 goto start_over;
908 }
909 if (!map[tail]) {
910 ++nfree;
911 ++*total_free;
912 }
913 }
914 if (tail-head < n_pages)
915 goto out;
916 if (nfree > max_free) {
917 max_start = head;
918 max_free = nfree;
919 if (max_free >= n_pages)
920 /* don't need more free pages... :-) */
921 goto out;
922 }
923
924 /* now shift the window and look for the area where as much pages as
925 * possible are free */
926 while( tail < max ) {
927 nfree -= (map[head++] == 0);
928 if (map[tail] == SWAP_MAP_BAD) {
929 head = tail+1;
930 goto start_over;
931 }
932 if (!map[tail]) {
933 ++nfree;
934 ++*total_free;
935 }
936 ++tail;
937 if (nfree > max_free) {
938 max_start = head;
939 max_free = nfree;
940 if (max_free >= n_pages)
941 /* don't need more free pages... :-) */
942 goto out;
943 }
944 }
945
946 out:
947 if (max_free < 0) {
948 printk( KERN_NOTICE "get_stram_region: ST-RAM too full or fragmented "
949 "-- can't allocate %lu pages\n", n_pages );
950 return( 0 );
951 }
952
953 *region_free = max_free;
954 return( max_start );
955}
956
957
958/* setup parameters from command line */
959void __init stram_swap_setup(char *str, int *ints)
960{
961 if (ints[0] >= 1)
962 max_swap_size = ((ints[1] < 0 ? 0 : ints[1]) * 1024) & PAGE_MASK;
963}
964
965
966/* ------------------------------------------------------------------------ */
967/* ST-RAM device */
968/* ------------------------------------------------------------------------ */
969
970static int refcnt;
971
972static void do_stram_request(request_queue_t *q)
973{
974 struct request *req;
975
976 while ((req = elv_next_request(q)) != NULL) {
977 void *start = swap_start + (req->sector << 9);
978 unsigned long len = req->current_nr_sectors << 9;
979 if ((start + len) > swap_end) {
980 printk( KERN_ERR "stram: bad access beyond end of device: "
981 "block=%ld, count=%d\n",
982 req->sector,
983 req->current_nr_sectors );
984 end_request(req, 0);
985 continue;
986 }
987
988 if (req->cmd == READ) {
989 memcpy(req->buffer, start, len);
990#ifdef DO_PROC
991 stat_swap_read += N_PAGES(len);
992#endif
993 }
994 else {
995 memcpy(start, req->buffer, len);
996#ifdef DO_PROC
997 stat_swap_write += N_PAGES(len);
998#endif
999 }
1000 end_request(req, 1);
1001 }
1002}
1003
1004
1005static int stram_open( struct inode *inode, struct file *filp )
1006{
1007 if (filp != MAGIC_FILE_P) {
1008 printk( KERN_NOTICE "Only kernel can open ST-RAM device\n" );
1009 return( -EPERM );
1010 }
1011 if (refcnt)
1012 return( -EBUSY );
1013 ++refcnt;
1014 return( 0 );
1015}
1016
1017static int stram_release( struct inode *inode, struct file *filp )
1018{
1019 if (filp != MAGIC_FILE_P) {
1020 printk( KERN_NOTICE "Only kernel can close ST-RAM device\n" );
1021 return( -EPERM );
1022 }
1023 if (refcnt > 0)
1024 --refcnt;
1025 return( 0 );
1026}
1027
1028
1029static struct block_device_operations stram_fops = {
1030 .open = stram_open,
1031 .release = stram_release,
1032};
1033
1034static struct gendisk *stram_disk;
1035static struct request_queue *stram_queue;
1036static DEFINE_SPINLOCK(stram_lock);
1037
1038int __init stram_device_init(void)
1039{
1040 if (!MACH_IS_ATARI)
1041 /* no point in initializing this, I hope */
1042 return -ENXIO;
1043
1044 if (!max_swap_size)
1045 /* swapping not enabled */
1046 return -ENXIO;
1047 stram_disk = alloc_disk(1);
1048 if (!stram_disk)
1049 return -ENOMEM;
1050
1051 if (register_blkdev(STRAM_MAJOR, "stram")) {
1052 put_disk(stram_disk);
1053 return -ENXIO;
1054 }
1055
1056 stram_queue = blk_init_queue(do_stram_request, &stram_lock);
1057 if (!stram_queue) {
1058 unregister_blkdev(STRAM_MAJOR, "stram");
1059 put_disk(stram_disk);
1060 return -ENOMEM;
1061 }
1062
1063 stram_disk->major = STRAM_MAJOR;
1064 stram_disk->first_minor = STRAM_MINOR;
1065 stram_disk->fops = &stram_fops;
1066 stram_disk->queue = stram_queue;
1067 sprintf(stram_disk->disk_name, "stram");
1068 set_capacity(stram_disk, (swap_end - swap_start)/512);
1069 add_disk(stram_disk);
1070 return 0;
1071}
1072
1073
1074
1075/* ------------------------------------------------------------------------ */
1076/* Misc Utility Functions */
1077/* ------------------------------------------------------------------------ */
1078
1079/* reserve a range of pages */
1080static void reserve_region(void *start, void *end)
1081{
1082 reserve_bootmem (virt_to_phys(start), end - start);
1083}
1084
1085#endif /* CONFIG_STRAM_SWAP */
1086
1087 242
1088/* ------------------------------------------------------------------------ */ 243/* ------------------------------------------------------------------------ */
1089/* Region Management */ 244/* Region Management */
@@ -1173,50 +328,9 @@ int get_stram_list( char *buf )
1173{ 328{
1174 int len = 0; 329 int len = 0;
1175 BLOCK *p; 330 BLOCK *p;
1176#ifdef CONFIG_STRAM_SWAP
1177 int i;
1178 unsigned short *map = stram_swap_info->swap_map;
1179 unsigned long max = stram_swap_info->max;
1180 unsigned free = 0, used = 0, rsvd = 0;
1181#endif
1182 331
1183#ifdef CONFIG_STRAM_SWAP 332 PRINT_PROC("Total ST-RAM: %8u kB\n",
1184 if (max_swap_size) {
1185 for( i = 1; i < max; ++i ) {
1186 if (!map[i])
1187 ++free;
1188 else if (map[i] == SWAP_MAP_BAD)
1189 ++rsvd;
1190 else
1191 ++used;
1192 }
1193 PRINT_PROC(
1194 "Total ST-RAM: %8u kB\n"
1195 "Total ST-RAM swap: %8lu kB\n"
1196 "Free swap: %8u kB\n"
1197 "Used swap: %8u kB\n"
1198 "Allocated swap: %8u kB\n"
1199 "Swap Reads: %8u\n"
1200 "Swap Writes: %8u\n"
1201 "Swap Forced Reads: %8u\n",
1202 (stram_end - stram_start) >> 10,
1203 (max-1) << (PAGE_SHIFT-10),
1204 free << (PAGE_SHIFT-10),
1205 used << (PAGE_SHIFT-10),
1206 rsvd << (PAGE_SHIFT-10),
1207 stat_swap_read,
1208 stat_swap_write,
1209 stat_swap_force );
1210 }
1211 else {
1212#endif
1213 PRINT_PROC( "ST-RAM swapping disabled\n" );
1214 PRINT_PROC("Total ST-RAM: %8u kB\n",
1215 (stram_end - stram_start) >> 10); 333 (stram_end - stram_start) >> 10);
1216#ifdef CONFIG_STRAM_SWAP
1217 }
1218#endif
1219
1220 PRINT_PROC( "Allocated regions:\n" ); 334 PRINT_PROC( "Allocated regions:\n" );
1221 for( p = alloc_list; p; p = p->next ) { 335 for( p = alloc_list; p; p = p->next ) {
1222 if (len + 50 >= PAGE_SIZE) 336 if (len + 50 >= PAGE_SIZE)
@@ -1227,8 +341,6 @@ int get_stram_list( char *buf )
1227 p->owner); 341 p->owner);
1228 if (p->flags & BLOCK_GFP) 342 if (p->flags & BLOCK_GFP)
1229 PRINT_PROC( "page-alloced)\n" ); 343 PRINT_PROC( "page-alloced)\n" );
1230 else if (p->flags & BLOCK_INSWAP)
1231 PRINT_PROC( "in swap)\n" );
1232 else 344 else
1233 PRINT_PROC( "??)\n" ); 345 PRINT_PROC( "??)\n" );
1234 } 346 }
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 5dcb3fa35ea9..fe2383e36b06 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -201,7 +201,7 @@ void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
201 virtaddr += PTRTREESIZE; 201 virtaddr += PTRTREESIZE;
202 size -= PTRTREESIZE; 202 size -= PTRTREESIZE;
203 } else { 203 } else {
204 pte_dir = pte_alloc_kernel(&init_mm, pmd_dir, virtaddr); 204 pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
205 if (!pte_dir) { 205 if (!pte_dir) {
206 printk("ioremap: no mem for pte_dir\n"); 206 printk("ioremap: no mem for pte_dir\n");
207 return NULL; 207 return NULL;
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 32e55adfeb8e..117481e86305 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -116,7 +116,7 @@ inline int dvma_map_cpu(unsigned long kaddr,
116 pte_t *pte; 116 pte_t *pte;
117 unsigned long end3; 117 unsigned long end3;
118 118
119 if((pte = pte_alloc_kernel(&init_mm, pmd, vaddr)) == NULL) { 119 if((pte = pte_alloc_kernel(pmd, vaddr)) == NULL) {
120 ret = -ENOMEM; 120 ret = -ENOMEM;
121 goto out; 121 goto out;
122 } 122 }
diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index 99262fe64560..7ce34d4aa220 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -697,7 +697,6 @@ static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs)
697 /* Do this so that we can load the interpreter, if need be. We will 697 /* Do this so that we can load the interpreter, if need be. We will
698 * change some of these later. 698 * change some of these later.
699 */ 699 */
700 set_mm_counter(current->mm, rss, 0);
701 setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); 700 setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
702 current->mm->start_stack = bprm->p; 701 current->mm->start_stack = bprm->p;
703 702
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 9c44ca70befa..3101d1db5592 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -55,7 +55,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
55 if (address >= end) 55 if (address >= end)
56 BUG(); 56 BUG();
57 do { 57 do {
58 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 58 pte_t * pte = pte_alloc_kernel(pmd, address);
59 if (!pte) 59 if (!pte)
60 return -ENOMEM; 60 return -ENOMEM;
61 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 61 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -77,7 +77,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr,
77 flush_cache_all(); 77 flush_cache_all();
78 if (address >= end) 78 if (address >= end)
79 BUG(); 79 BUG();
80 spin_lock(&init_mm.page_table_lock);
81 do { 80 do {
82 pud_t *pud; 81 pud_t *pud;
83 pmd_t *pmd; 82 pmd_t *pmd;
@@ -96,7 +95,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr,
96 address = (address + PGDIR_SIZE) & PGDIR_MASK; 95 address = (address + PGDIR_SIZE) & PGDIR_MASK;
97 dir++; 96 dir++;
98 } while (address && (address < end)); 97 } while (address && (address < end));
99 spin_unlock(&init_mm.page_table_lock);
100 flush_tlb_all(); 98 flush_tlb_all();
101 return error; 99 return error;
102} 100}
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index e15f09eaed12..a065349aee37 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -270,7 +270,6 @@ void flush_dcache_page(struct page *page)
270 unsigned long offset; 270 unsigned long offset;
271 unsigned long addr; 271 unsigned long addr;
272 pgoff_t pgoff; 272 pgoff_t pgoff;
273 pte_t *pte;
274 unsigned long pfn = page_to_pfn(page); 273 unsigned long pfn = page_to_pfn(page);
275 274
276 275
@@ -301,21 +300,16 @@ void flush_dcache_page(struct page *page)
301 * taking a page fault if the pte doesn't exist. 300 * taking a page fault if the pte doesn't exist.
302 * This is just for speed. If the page translation 301 * This is just for speed. If the page translation
303 * isn't there, there's no point exciting the 302 * isn't there, there's no point exciting the
304 * nadtlb handler into a nullification frenzy */ 303 * nadtlb handler into a nullification frenzy.
305 304 *
306 305 * Make sure we really have this page: the private
307 if(!(pte = translation_exists(mpnt, addr)))
308 continue;
309
310 /* make sure we really have this page: the private
311 * mappings may cover this area but have COW'd this 306 * mappings may cover this area but have COW'd this
312 * particular page */ 307 * particular page.
313 if(pte_pfn(*pte) != pfn) 308 */
314 continue; 309 if (translation_exists(mpnt, addr, pfn)) {
315 310 __flush_cache_page(mpnt, addr);
316 __flush_cache_page(mpnt, addr); 311 break;
317 312 }
318 break;
319 } 313 }
320 flush_dcache_mmap_unlock(mapping); 314 flush_dcache_mmap_unlock(mapping);
321} 315}
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index ae6213d71670..f94a02ef3d95 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -114,7 +114,7 @@ static inline int map_pmd_uncached(pmd_t * pmd, unsigned long vaddr,
114 if (end > PGDIR_SIZE) 114 if (end > PGDIR_SIZE)
115 end = PGDIR_SIZE; 115 end = PGDIR_SIZE;
116 do { 116 do {
117 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, vaddr); 117 pte_t * pte = pte_alloc_kernel(pmd, vaddr);
118 if (!pte) 118 if (!pte)
119 return -ENOMEM; 119 return -ENOMEM;
120 if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr)) 120 if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr))
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 2886ad70db48..29b998e430e6 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -505,7 +505,9 @@ void show_mem(void)
505 505
506 for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { 506 for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
507 struct page *p; 507 struct page *p;
508 unsigned long flags;
508 509
510 pgdat_resize_lock(NODE_DATA(i), &flags);
509 p = nid_page_nr(i, j) - node_start_pfn(i); 511 p = nid_page_nr(i, j) - node_start_pfn(i);
510 512
511 total++; 513 total++;
@@ -517,6 +519,7 @@ void show_mem(void)
517 free++; 519 free++;
518 else 520 else
519 shared += page_count(p) - 1; 521 shared += page_count(p) - 1;
522 pgdat_resize_unlock(NODE_DATA(i), &flags);
520 } 523 }
521 } 524 }
522#endif 525#endif
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index f2df502cdae3..5c7a1b3b9326 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
52 if (address >= end) 52 if (address >= end)
53 BUG(); 53 BUG();
54 do { 54 do {
55 pte_t * pte = pte_alloc_kernel(NULL, pmd, address); 55 pte_t * pte = pte_alloc_kernel(pmd, address);
56 if (!pte) 56 if (!pte)
57 return -ENOMEM; 57 return -ENOMEM;
58 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 58 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -75,10 +75,9 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
75 flush_cache_all(); 75 flush_cache_all();
76 if (address >= end) 76 if (address >= end)
77 BUG(); 77 BUG();
78 spin_lock(&init_mm.page_table_lock);
79 do { 78 do {
80 pmd_t *pmd; 79 pmd_t *pmd;
81 pmd = pmd_alloc(dir, address); 80 pmd = pmd_alloc(&init_mm, dir, address);
82 error = -ENOMEM; 81 error = -ENOMEM;
83 if (!pmd) 82 if (!pmd)
84 break; 83 break;
@@ -89,7 +88,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
89 address = (address + PGDIR_SIZE) & PGDIR_MASK; 88 address = (address + PGDIR_SIZE) & PGDIR_MASK;
90 dir++; 89 dir++;
91 } while (address && (address < end)); 90 } while (address && (address < end));
92 spin_unlock(&init_mm.page_table_lock);
93 flush_tlb_all(); 91 flush_tlb_all();
94 return error; 92 return error;
95} 93}
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
index 0f710d2baec6..685fd0defe23 100644
--- a/arch/ppc/kernel/dma-mapping.c
+++ b/arch/ppc/kernel/dma-mapping.c
@@ -335,8 +335,6 @@ static int __init dma_alloc_init(void)
335 pte_t *pte; 335 pte_t *pte;
336 int ret = 0; 336 int ret = 0;
337 337
338 spin_lock(&init_mm.page_table_lock);
339
340 do { 338 do {
341 pgd = pgd_offset(&init_mm, CONSISTENT_BASE); 339 pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
342 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); 340 pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -347,7 +345,7 @@ static int __init dma_alloc_init(void)
347 } 345 }
348 WARN_ON(!pmd_none(*pmd)); 346 WARN_ON(!pmd_none(*pmd));
349 347
350 pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE); 348 pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
351 if (!pte) { 349 if (!pte) {
352 printk(KERN_ERR "%s: no pte tables\n", __func__); 350 printk(KERN_ERR "%s: no pte tables\n", __func__);
353 ret = -ENOMEM; 351 ret = -ENOMEM;
@@ -357,8 +355,6 @@ static int __init dma_alloc_init(void)
357 consistent_pte = pte; 355 consistent_pte = pte;
358 } while (0); 356 } while (0);
359 357
360 spin_unlock(&init_mm.page_table_lock);
361
362 return ret; 358 return ret;
363} 359}
364 360
diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c
index b7bcbc232f39..4d006aa1a0d1 100644
--- a/arch/ppc/mm/4xx_mmu.c
+++ b/arch/ppc/mm/4xx_mmu.c
@@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void)
110 pmd_t *pmdp; 110 pmd_t *pmdp;
111 unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; 111 unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
112 112
113 spin_lock(&init_mm.page_table_lock);
114 pmdp = pmd_offset(pgd_offset_k(v), v); 113 pmdp = pmd_offset(pgd_offset_k(v), v);
115 pmd_val(*pmdp++) = val; 114 pmd_val(*pmdp++) = val;
116 pmd_val(*pmdp++) = val; 115 pmd_val(*pmdp++) = val;
117 pmd_val(*pmdp++) = val; 116 pmd_val(*pmdp++) = val;
118 pmd_val(*pmdp++) = val; 117 pmd_val(*pmdp++) = val;
119 spin_unlock(&init_mm.page_table_lock);
120 118
121 v += LARGE_PAGE_SIZE_16M; 119 v += LARGE_PAGE_SIZE_16M;
122 p += LARGE_PAGE_SIZE_16M; 120 p += LARGE_PAGE_SIZE_16M;
@@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void)
127 pmd_t *pmdp; 125 pmd_t *pmdp;
128 unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; 126 unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
129 127
130 spin_lock(&init_mm.page_table_lock);
131 pmdp = pmd_offset(pgd_offset_k(v), v); 128 pmdp = pmd_offset(pgd_offset_k(v), v);
132 pmd_val(*pmdp) = val; 129 pmd_val(*pmdp) = val;
133 spin_unlock(&init_mm.page_table_lock);
134 130
135 v += LARGE_PAGE_SIZE_4M; 131 v += LARGE_PAGE_SIZE_4M;
136 p += LARGE_PAGE_SIZE_4M; 132 p += LARGE_PAGE_SIZE_4M;
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 43505b1fc5d8..6ea9185fd120 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -280,18 +280,16 @@ map_page(unsigned long va, phys_addr_t pa, int flags)
280 pte_t *pg; 280 pte_t *pg;
281 int err = -ENOMEM; 281 int err = -ENOMEM;
282 282
283 spin_lock(&init_mm.page_table_lock);
284 /* Use upper 10 bits of VA to index the first level map */ 283 /* Use upper 10 bits of VA to index the first level map */
285 pd = pmd_offset(pgd_offset_k(va), va); 284 pd = pmd_offset(pgd_offset_k(va), va);
286 /* Use middle 10 bits of VA to index the second-level map */ 285 /* Use middle 10 bits of VA to index the second-level map */
287 pg = pte_alloc_kernel(&init_mm, pd, va); 286 pg = pte_alloc_kernel(pd, va);
288 if (pg != 0) { 287 if (pg != 0) {
289 err = 0; 288 err = 0;
290 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); 289 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
291 if (mem_init_done) 290 if (mem_init_done)
292 flush_HPTE(0, va, pmd_val(*pd)); 291 flush_HPTE(0, va, pmd_val(*pd));
293 } 292 }
294 spin_unlock(&init_mm.page_table_lock);
295 return err; 293 return err;
296} 294}
297 295
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
index efa985f05aca..4aacf521e3e4 100644
--- a/arch/ppc64/kernel/vdso.c
+++ b/arch/ppc64/kernel/vdso.c
@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
176 return NOPAGE_SIGBUS; 176 return NOPAGE_SIGBUS;
177 177
178 /* 178 /*
179 * Last page is systemcfg, special handling here, no get_page() a 179 * Last page is systemcfg.
180 * this is a reserved page
181 */ 180 */
182 if ((vma->vm_end - address) <= PAGE_SIZE) 181 if ((vma->vm_end - address) <= PAGE_SIZE)
183 return virt_to_page(systemcfg); 182 pg = virt_to_page(systemcfg);
183 else
184 pg = virt_to_page(vbase + offset);
184 185
185 pg = virt_to_page(vbase + offset);
186 get_page(pg); 186 get_page(pg);
187 DBG(" ->page count: %d\n", page_count(pg)); 187 DBG(" ->page count: %d\n", page_count(pg));
188 188
@@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
259 * gettimeofday will be totally dead. It's fine to use that for setting 259 * gettimeofday will be totally dead. It's fine to use that for setting
260 * breakpoints in the vDSO code pages though 260 * breakpoints in the vDSO code pages though
261 */ 261 */
262 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 262 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
263 vma->vm_flags |= mm->def_flags; 263 vma->vm_flags |= mm->def_flags;
264 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; 264 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
265 vma->vm_ops = &vdso_vmops; 265 vma->vm_ops = &vdso_vmops;
@@ -603,6 +603,8 @@ void __init vdso_init(void)
603 ClearPageReserved(pg); 603 ClearPageReserved(pg);
604 get_page(pg); 604 get_page(pg);
605 } 605 }
606
607 get_page(virt_to_page(systemcfg));
606} 608}
607 609
608int in_gate_area_no_task(unsigned long addr) 610int in_gate_area_no_task(unsigned long addr)
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
index c65b87b92756..f4ca29cf5364 100644
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -300,12 +300,7 @@ void im_free(void * addr)
300 for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { 300 for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
301 if (tmp->addr == addr) { 301 if (tmp->addr == addr) {
302 *p = tmp->next; 302 *p = tmp->next;
303
304 /* XXX: do we need the lock? */
305 spin_lock(&init_mm.page_table_lock);
306 unmap_vm_area(tmp); 303 unmap_vm_area(tmp);
307 spin_unlock(&init_mm.page_table_lock);
308
309 kfree(tmp); 304 kfree(tmp);
310 up(&imlist_sem); 305 up(&imlist_sem);
311 return; 306 return;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index be64b157afce..e2bd7776622f 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -104,6 +104,8 @@ void show_mem(void)
104 show_free_areas(); 104 show_free_areas();
105 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 105 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
106 for_each_pgdat(pgdat) { 106 for_each_pgdat(pgdat) {
107 unsigned long flags;
108 pgdat_resize_lock(pgdat, &flags);
107 for (i = 0; i < pgdat->node_spanned_pages; i++) { 109 for (i = 0; i < pgdat->node_spanned_pages; i++) {
108 page = pgdat_page_nr(pgdat, i); 110 page = pgdat_page_nr(pgdat, i);
109 total++; 111 total++;
@@ -114,6 +116,7 @@ void show_mem(void)
114 else if (page_count(page)) 116 else if (page_count(page))
115 shared += page_count(page) - 1; 117 shared += page_count(page) - 1;
116 } 118 }
119 pgdat_resize_unlock(pgdat, &flags);
117 } 120 }
118 printk("%ld pages of RAM\n", total); 121 printk("%ld pages of RAM\n", total);
119 printk("%ld reserved pages\n", reserved); 122 printk("%ld reserved pages\n", reserved);
@@ -155,7 +158,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
155 unsigned long vsid; 158 unsigned long vsid;
156 159
157 if (mem_init_done) { 160 if (mem_init_done) {
158 spin_lock(&init_mm.page_table_lock);
159 pgdp = pgd_offset_k(ea); 161 pgdp = pgd_offset_k(ea);
160 pudp = pud_alloc(&init_mm, pgdp, ea); 162 pudp = pud_alloc(&init_mm, pgdp, ea);
161 if (!pudp) 163 if (!pudp)
@@ -163,12 +165,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
163 pmdp = pmd_alloc(&init_mm, pudp, ea); 165 pmdp = pmd_alloc(&init_mm, pudp, ea);
164 if (!pmdp) 166 if (!pmdp)
165 return -ENOMEM; 167 return -ENOMEM;
166 ptep = pte_alloc_kernel(&init_mm, pmdp, ea); 168 ptep = pte_alloc_kernel(pmdp, ea);
167 if (!ptep) 169 if (!ptep)
168 return -ENOMEM; 170 return -ENOMEM;
169 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 171 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
170 __pgprot(flags))); 172 __pgprot(flags)));
171 spin_unlock(&init_mm.page_table_lock);
172 } else { 173 } else {
173 unsigned long va, vpn, hash, hpteg; 174 unsigned long va, vpn, hash, hpteg;
174 175
@@ -649,11 +650,14 @@ void __init mem_init(void)
649#endif 650#endif
650 651
651 for_each_pgdat(pgdat) { 652 for_each_pgdat(pgdat) {
653 unsigned long flags;
654 pgdat_resize_lock(pgdat, &flags);
652 for (i = 0; i < pgdat->node_spanned_pages; i++) { 655 for (i = 0; i < pgdat->node_spanned_pages; i++) {
653 page = pgdat_page_nr(pgdat, i); 656 page = pgdat_page_nr(pgdat, i);
654 if (PageReserved(page)) 657 if (PageReserved(page))
655 reservedpages++; 658 reservedpages++;
656 } 659 }
660 pgdat_resize_unlock(pgdat, &flags);
657 } 661 }
658 662
659 codesize = (unsigned long)&_etext - (unsigned long)&_stext; 663 codesize = (unsigned long)&_etext - (unsigned long)&_stext;
@@ -867,3 +871,80 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
867 return vma_prot; 871 return vma_prot;
868} 872}
869EXPORT_SYMBOL(phys_mem_access_prot); 873EXPORT_SYMBOL(phys_mem_access_prot);
874
875#ifdef CONFIG_MEMORY_HOTPLUG
876
877void online_page(struct page *page)
878{
879 ClearPageReserved(page);
880 free_cold_page(page);
881 totalram_pages++;
882 num_physpages++;
883}
884
885/*
886 * This works only for the non-NUMA case. Later, we'll need a lookup
887 * to convert from real physical addresses to nid, that doesn't use
888 * pfn_to_nid().
889 */
890int __devinit add_memory(u64 start, u64 size)
891{
892 struct pglist_data *pgdata = NODE_DATA(0);
893 struct zone *zone;
894 unsigned long start_pfn = start >> PAGE_SHIFT;
895 unsigned long nr_pages = size >> PAGE_SHIFT;
896
897 /* this should work for most non-highmem platforms */
898 zone = pgdata->node_zones;
899
900 return __add_pages(zone, start_pfn, nr_pages);
901
902 return 0;
903}
904
905/*
906 * First pass at this code will check to determine if the remove
907 * request is within the RMO. Do not allow removal within the RMO.
908 */
909int __devinit remove_memory(u64 start, u64 size)
910{
911 struct zone *zone;
912 unsigned long start_pfn, end_pfn, nr_pages;
913
914 start_pfn = start >> PAGE_SHIFT;
915 nr_pages = size >> PAGE_SHIFT;
916 end_pfn = start_pfn + nr_pages;
917
918 printk("%s(): Attempting to remove memoy in range "
919 "%lx to %lx\n", __func__, start, start+size);
920 /*
921 * check for range within RMO
922 */
923 zone = page_zone(pfn_to_page(start_pfn));
924
925 printk("%s(): memory will be removed from "
926 "the %s zone\n", __func__, zone->name);
927
928 /*
929 * not handling removing memory ranges that
930 * overlap multiple zones yet
931 */
932 if (end_pfn > (zone->zone_start_pfn + zone->spanned_pages))
933 goto overlap;
934
935 /* make sure it is NOT in RMO */
936 if ((start < lmb.rmo_size) || ((start+size) < lmb.rmo_size)) {
937 printk("%s(): range to be removed must NOT be in RMO!\n",
938 __func__);
939 goto in_rmo;
940 }
941
942 return __remove_pages(zone, start_pfn, nr_pages);
943
944overlap:
945 printk("%s(): memory range to be removed overlaps "
946 "multiple zones!!!\n", __func__);
947in_rmo:
948 return -1;
949}
950#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c
index c6c39d868bc8..0f6e9ecbefe2 100644
--- a/arch/s390/mm/ioremap.c
+++ b/arch/s390/mm/ioremap.c
@@ -58,7 +58,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
58 if (address >= end) 58 if (address >= end)
59 BUG(); 59 BUG();
60 do { 60 do {
61 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 61 pte_t * pte = pte_alloc_kernel(pmd, address);
62 if (!pte) 62 if (!pte)
63 return -ENOMEM; 63 return -ENOMEM;
64 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 64 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -80,7 +80,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
80 flush_cache_all(); 80 flush_cache_all();
81 if (address >= end) 81 if (address >= end)
82 BUG(); 82 BUG();
83 spin_lock(&init_mm.page_table_lock);
84 do { 83 do {
85 pmd_t *pmd; 84 pmd_t *pmd;
86 pmd = pmd_alloc(&init_mm, dir, address); 85 pmd = pmd_alloc(&init_mm, dir, address);
@@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
94 address = (address + PGDIR_SIZE) & PGDIR_MASK; 93 address = (address + PGDIR_SIZE) & PGDIR_MASK;
95 dir++; 94 dir++;
96 } while (address && (address < end)); 95 } while (address && (address < end));
97 spin_unlock(&init_mm.page_table_lock);
98 flush_tlb_all(); 96 flush_tlb_all();
99 return 0; 97 return 0;
100} 98}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 7abba2161da6..775f86cd3fe8 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -194,10 +194,13 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
194 unsigned long address) 194 unsigned long address)
195{ 195{
196 unsigned long addrmax = P4SEG; 196 unsigned long addrmax = P4SEG;
197 pgd_t *dir; 197 pgd_t *pgd;
198 pmd_t *pmd; 198 pmd_t *pmd;
199 pte_t *pte; 199 pte_t *pte;
200 pte_t entry; 200 pte_t entry;
201 struct mm_struct *mm;
202 spinlock_t *ptl;
203 int ret = 1;
201 204
202#ifdef CONFIG_SH_KGDB 205#ifdef CONFIG_SH_KGDB
203 if (kgdb_nofault && kgdb_bus_err_hook) 206 if (kgdb_nofault && kgdb_bus_err_hook)
@@ -208,28 +211,28 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
208 addrmax = P4SEG_STORE_QUE + 0x04000000; 211 addrmax = P4SEG_STORE_QUE + 0x04000000;
209#endif 212#endif
210 213
211 if (address >= P3SEG && address < addrmax) 214 if (address >= P3SEG && address < addrmax) {
212 dir = pgd_offset_k(address); 215 pgd = pgd_offset_k(address);
213 else if (address >= TASK_SIZE) 216 mm = NULL;
217 } else if (address >= TASK_SIZE)
214 return 1; 218 return 1;
215 else if (!current->mm) 219 else if (!(mm = current->mm))
216 return 1; 220 return 1;
217 else 221 else
218 dir = pgd_offset(current->mm, address); 222 pgd = pgd_offset(mm, address);
219 223
220 pmd = pmd_offset(dir, address); 224 pmd = pmd_offset(pgd, address);
221 if (pmd_none(*pmd)) 225 if (pmd_none_or_clear_bad(pmd))
222 return 1;
223 if (pmd_bad(*pmd)) {
224 pmd_ERROR(*pmd);
225 pmd_clear(pmd);
226 return 1; 226 return 1;
227 } 227 if (mm)
228 pte = pte_offset_kernel(pmd, address); 228 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
229 else
230 pte = pte_offset_kernel(pmd, address);
231
229 entry = *pte; 232 entry = *pte;
230 if (pte_none(entry) || pte_not_present(entry) 233 if (pte_none(entry) || pte_not_present(entry)
231 || (writeaccess && !pte_write(entry))) 234 || (writeaccess && !pte_write(entry)))
232 return 1; 235 goto unlock;
233 236
234 if (writeaccess) 237 if (writeaccess)
235 entry = pte_mkdirty(entry); 238 entry = pte_mkdirty(entry);
@@ -251,8 +254,11 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
251 254
252 set_pte(pte, entry); 255 set_pte(pte, entry);
253 update_mmu_cache(NULL, address, entry); 256 update_mmu_cache(NULL, address, entry);
254 257 ret = 0;
255 return 0; 258unlock:
259 if (mm)
260 pte_unmap_unlock(pte, ptl);
261 return ret;
256} 262}
257 263
258void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) 264void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 95bb1a6c6060..6b7a7688c98e 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -54,8 +54,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
54 return pte; 54 return pte;
55} 55}
56 56
57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
58
59void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 57void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
60 pte_t *ptep, pte_t entry) 58 pte_t *ptep, pte_t entry)
61{ 59{
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index 9f490c2742f0..e794e27a72f1 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -57,7 +57,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
57 if (address >= end) 57 if (address >= end)
58 BUG(); 58 BUG();
59 do { 59 do {
60 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 60 pte_t * pte = pte_alloc_kernel(pmd, address);
61 if (!pte) 61 if (!pte)
62 return -ENOMEM; 62 return -ENOMEM;
63 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 63 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -79,7 +79,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr,
79 flush_cache_all(); 79 flush_cache_all();
80 if (address >= end) 80 if (address >= end)
81 BUG(); 81 BUG();
82 spin_lock(&init_mm.page_table_lock);
83 do { 82 do {
84 pmd_t *pmd; 83 pmd_t *pmd;
85 pmd = pmd_alloc(&init_mm, dir, address); 84 pmd = pmd_alloc(&init_mm, dir, address);
@@ -93,7 +92,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr,
93 address = (address + PGDIR_SIZE) & PGDIR_MASK; 92 address = (address + PGDIR_SIZE) & PGDIR_MASK;
94 dir++; 93 dir++;
95 } while (address && (address < end)); 94 } while (address && (address < end));
96 spin_unlock(&init_mm.page_table_lock);
97 flush_tlb_all(); 95 flush_tlb_all();
98 return error; 96 return error;
99} 97}
diff --git a/arch/sh64/mm/cache.c b/arch/sh64/mm/cache.c
index 3b87e25ea773..c0c1b21350d8 100644
--- a/arch/sh64/mm/cache.c
+++ b/arch/sh64/mm/cache.c
@@ -584,32 +584,36 @@ static void sh64_dcache_purge_phy_page(unsigned long paddr)
584 } 584 }
585} 585}
586 586
587static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr) 587static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
588 unsigned long addr, unsigned long end)
588{ 589{
589 pgd_t *pgd; 590 pgd_t *pgd;
590 pmd_t *pmd; 591 pmd_t *pmd;
591 pte_t *pte; 592 pte_t *pte;
592 pte_t entry; 593 pte_t entry;
594 spinlock_t *ptl;
593 unsigned long paddr; 595 unsigned long paddr;
594 596
595 /* NOTE : all the callers of this have mm->page_table_lock held, so the 597 if (!mm)
596 following page table traversal is safe even on SMP/pre-emptible. */ 598 return; /* No way to find physical address of page */
597 599
598 if (!mm) return; /* No way to find physical address of page */ 600 pgd = pgd_offset(mm, addr);
599 pgd = pgd_offset(mm, eaddr); 601 if (pgd_bad(*pgd))
600 if (pgd_bad(*pgd)) return; 602 return;
601 603
602 pmd = pmd_offset(pgd, eaddr); 604 pmd = pmd_offset(pgd, addr);
603 if (pmd_none(*pmd) || pmd_bad(*pmd)) return; 605 if (pmd_none(*pmd) || pmd_bad(*pmd))
604 606 return;
605 pte = pte_offset_kernel(pmd, eaddr); 607
606 entry = *pte; 608 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
607 if (pte_none(entry) || !pte_present(entry)) return; 609 do {
608 610 entry = *pte;
609 paddr = pte_val(entry) & PAGE_MASK; 611 if (pte_none(entry) || !pte_present(entry))
610 612 continue;
611 sh64_dcache_purge_coloured_phy_page(paddr, eaddr); 613 paddr = pte_val(entry) & PAGE_MASK;
612 614 sh64_dcache_purge_coloured_phy_page(paddr, addr);
615 } while (pte++, addr += PAGE_SIZE, addr != end);
616 pte_unmap_unlock(pte - 1, ptl);
613} 617}
614/****************************************************************************/ 618/****************************************************************************/
615 619
@@ -668,7 +672,7 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
668 int n_pages; 672 int n_pages;
669 673
670 n_pages = ((end - start) >> PAGE_SHIFT); 674 n_pages = ((end - start) >> PAGE_SHIFT);
671 if (n_pages >= 64) { 675 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
672#if 1 676#if 1
673 sh64_dcache_purge_all(); 677 sh64_dcache_purge_all();
674#else 678#else
@@ -707,20 +711,10 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
707 } 711 }
708#endif 712#endif
709 } else { 713 } else {
710 /* 'Small' range */ 714 /* Small range, covered by a single page table page */
711 unsigned long aligned_start; 715 start &= PAGE_MASK; /* should already be so */
712 unsigned long eaddr; 716 end = PAGE_ALIGN(end); /* should already be so */
713 unsigned long last_page_start; 717 sh64_dcache_purge_user_pages(mm, start, end);
714
715 aligned_start = start & PAGE_MASK;
716 /* 'end' is 1 byte beyond the end of the range */
717 last_page_start = (end - 1) & PAGE_MASK;
718
719 eaddr = aligned_start;
720 while (eaddr <= last_page_start) {
721 sh64_dcache_purge_user_page(mm, eaddr);
722 eaddr += PAGE_SIZE;
723 }
724 } 718 }
725 return; 719 return;
726} 720}
@@ -880,9 +874,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
880 addresses from the user address space specified by mm, after writing 874 addresses from the user address space specified by mm, after writing
881 back any dirty data. 875 back any dirty data.
882 876
883 Note(1), 'end' is 1 byte beyond the end of the range to flush. 877 Note, 'end' is 1 byte beyond the end of the range to flush. */
884
885 Note(2), this is called with mm->page_table_lock held.*/
886 878
887 sh64_dcache_purge_user_range(mm, start, end); 879 sh64_dcache_purge_user_range(mm, start, end);
888 sh64_icache_inv_user_page_range(mm, start, end); 880 sh64_icache_inv_user_page_range(mm, start, end);
@@ -898,7 +890,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned
898 the I-cache must be searched too in case the page in question is 890 the I-cache must be searched too in case the page in question is
899 both writable and being executed from (e.g. stack trampolines.) 891 both writable and being executed from (e.g. stack trampolines.)
900 892
901 Note(1), this is called with mm->page_table_lock held. 893 Note, this is called with pte lock held.
902 */ 894 */
903 895
904 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); 896 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index dcd9c8a8baf8..ed6a505b3ee2 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -54,41 +54,31 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
54 return pte; 54 return pte;
55} 55}
56 56
57#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) 57void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
58 58 pte_t *ptep, pte_t entry)
59static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
60 struct page *page, pte_t * page_table, int write_access)
61{ 59{
62 unsigned long i; 60 int i;
63 pte_t entry;
64
65 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
66
67 if (write_access)
68 entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
69 vma->vm_page_prot)));
70 else
71 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
72 entry = pte_mkyoung(entry);
73 mk_pte_huge(entry);
74 61
75 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 62 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
76 set_pte(page_table, entry); 63 set_pte_at(mm, addr, ptep, entry);
77 page_table++; 64 ptep++;
78 65 addr += PAGE_SIZE;
79 pte_val(entry) += PAGE_SIZE; 66 pte_val(entry) += PAGE_SIZE;
80 } 67 }
81} 68}
82 69
83pte_t huge_ptep_get_and_clear(pte_t *ptep) 70pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
71 pte_t *ptep)
84{ 72{
85 pte_t entry; 73 pte_t entry;
74 int i;
86 75
87 entry = *ptep; 76 entry = *ptep;
88 77
89 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 78 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
90 pte_clear(pte); 79 pte_clear(mm, addr, ptep);
91 pte++; 80 addr += PAGE_SIZE;
81 ptep++;
92 } 82 }
93 83
94 return entry; 84 return entry;
@@ -106,79 +96,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
106 return 0; 96 return 0;
107} 97}
108 98
109int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
110 struct vm_area_struct *vma)
111{
112 pte_t *src_pte, *dst_pte, entry;
113 struct page *ptepage;
114 unsigned long addr = vma->vm_start;
115 unsigned long end = vma->vm_end;
116 int i;
117
118 while (addr < end) {
119 dst_pte = huge_pte_alloc(dst, addr);
120 if (!dst_pte)
121 goto nomem;
122 src_pte = huge_pte_offset(src, addr);
123 BUG_ON(!src_pte || pte_none(*src_pte));
124 entry = *src_pte;
125 ptepage = pte_page(entry);
126 get_page(ptepage);
127 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
128 set_pte(dst_pte, entry);
129 pte_val(entry) += PAGE_SIZE;
130 dst_pte++;
131 }
132 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
133 addr += HPAGE_SIZE;
134 }
135 return 0;
136
137nomem:
138 return -ENOMEM;
139}
140
141int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
142 struct page **pages, struct vm_area_struct **vmas,
143 unsigned long *position, int *length, int i)
144{
145 unsigned long vaddr = *position;
146 int remainder = *length;
147
148 WARN_ON(!is_vm_hugetlb_page(vma));
149
150 while (vaddr < vma->vm_end && remainder) {
151 if (pages) {
152 pte_t *pte;
153 struct page *page;
154
155 pte = huge_pte_offset(mm, vaddr);
156
157 /* hugetlb should be locked, and hence, prefaulted */
158 BUG_ON(!pte || pte_none(*pte));
159
160 page = pte_page(*pte);
161
162 WARN_ON(!PageCompound(page));
163
164 get_page(page);
165 pages[i] = page;
166 }
167
168 if (vmas)
169 vmas[i] = vma;
170
171 vaddr += PAGE_SIZE;
172 --remainder;
173 ++i;
174 }
175
176 *length = remainder;
177 *position = vaddr;
178
179 return i;
180}
181
182struct page *follow_huge_addr(struct mm_struct *mm, 99struct page *follow_huge_addr(struct mm_struct *mm,
183 unsigned long address, int write) 100 unsigned long address, int write)
184{ 101{
@@ -195,84 +112,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
195{ 112{
196 return NULL; 113 return NULL;
197} 114}
198
199void unmap_hugepage_range(struct vm_area_struct *vma,
200 unsigned long start, unsigned long end)
201{
202 struct mm_struct *mm = vma->vm_mm;
203 unsigned long address;
204 pte_t *pte;
205 struct page *page;
206 int i;
207
208 BUG_ON(start & (HPAGE_SIZE - 1));
209 BUG_ON(end & (HPAGE_SIZE - 1));
210
211 for (address = start; address < end; address += HPAGE_SIZE) {
212 pte = huge_pte_offset(mm, address);
213 BUG_ON(!pte);
214 if (pte_none(*pte))
215 continue;
216 page = pte_page(*pte);
217 put_page(page);
218 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
219 pte_clear(mm, address+(i*PAGE_SIZE), pte);
220 pte++;
221 }
222 }
223 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
224 flush_tlb_range(vma, start, end);
225}
226
227int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
228{
229 struct mm_struct *mm = current->mm;
230 unsigned long addr;
231 int ret = 0;
232
233 BUG_ON(vma->vm_start & ~HPAGE_MASK);
234 BUG_ON(vma->vm_end & ~HPAGE_MASK);
235
236 spin_lock(&mm->page_table_lock);
237 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
238 unsigned long idx;
239 pte_t *pte = huge_pte_alloc(mm, addr);
240 struct page *page;
241
242 if (!pte) {
243 ret = -ENOMEM;
244 goto out;
245 }
246 if (!pte_none(*pte))
247 continue;
248
249 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
250 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
251 page = find_get_page(mapping, idx);
252 if (!page) {
253 /* charge the fs quota first */
254 if (hugetlb_get_quota(mapping)) {
255 ret = -ENOMEM;
256 goto out;
257 }
258 page = alloc_huge_page();
259 if (!page) {
260 hugetlb_put_quota(mapping);
261 ret = -ENOMEM;
262 goto out;
263 }
264 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
265 if (! ret) {
266 unlock_page(page);
267 } else {
268 hugetlb_put_quota(mapping);
269 free_huge_page(page);
270 goto out;
271 }
272 }
273 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
274 }
275out:
276 spin_unlock(&mm->page_table_lock);
277 return ret;
278}
diff --git a/arch/sh64/mm/ioremap.c b/arch/sh64/mm/ioremap.c
index f4003da556bc..fb1866fa2c9d 100644
--- a/arch/sh64/mm/ioremap.c
+++ b/arch/sh64/mm/ioremap.c
@@ -79,7 +79,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
79 BUG(); 79 BUG();
80 80
81 do { 81 do {
82 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 82 pte_t * pte = pte_alloc_kernel(pmd, address);
83 if (!pte) 83 if (!pte)
84 return -ENOMEM; 84 return -ENOMEM;
85 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 85 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -101,7 +101,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
101 flush_cache_all(); 101 flush_cache_all();
102 if (address >= end) 102 if (address >= end)
103 BUG(); 103 BUG();
104 spin_lock(&init_mm.page_table_lock);
105 do { 104 do {
106 pmd_t *pmd = pmd_alloc(&init_mm, dir, address); 105 pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
107 error = -ENOMEM; 106 error = -ENOMEM;
@@ -115,7 +114,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
115 address = (address + PGDIR_SIZE) & PGDIR_MASK; 114 address = (address + PGDIR_SIZE) & PGDIR_MASK;
116 dir++; 115 dir++;
117 } while (address && (address < end)); 116 } while (address && (address < end));
118 spin_unlock(&init_mm.page_table_lock);
119 flush_tlb_all(); 117 flush_tlb_all();
120 return 0; 118 return 0;
121} 119}
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 20ccb957fb77..9604893ffdbd 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -73,14 +73,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
73 int space = GET_IOSPACE(pfn); 73 int space = GET_IOSPACE(pfn);
74 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; 74 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
75 75
76 /* See comment in mm/memory.c remap_pfn_range */
77 vma->vm_flags |= VM_IO | VM_RESERVED;
78
76 prot = __pgprot(pg_iobits); 79 prot = __pgprot(pg_iobits);
77 offset -= from; 80 offset -= from;
78 dir = pgd_offset(mm, from); 81 dir = pgd_offset(mm, from);
79 flush_cache_range(vma, beg, end); 82 flush_cache_range(vma, beg, end);
80 83
81 spin_lock(&mm->page_table_lock);
82 while (from < end) { 84 while (from < end) {
83 pmd_t *pmd = pmd_alloc(current->mm, dir, from); 85 pmd_t *pmd = pmd_alloc(mm, dir, from);
84 error = -ENOMEM; 86 error = -ENOMEM;
85 if (!pmd) 87 if (!pmd)
86 break; 88 break;
@@ -90,7 +92,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
90 from = (from + PGDIR_SIZE) & PGDIR_MASK; 92 from = (from + PGDIR_SIZE) & PGDIR_MASK;
91 dir++; 93 dir++;
92 } 94 }
93 spin_unlock(&mm->page_table_lock);
94 95
95 flush_tlb_range(vma, beg, end); 96 flush_tlb_range(vma, beg, end);
96 return error; 97 return error;
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c
index b2854ef221d0..edf52d06b280 100644
--- a/arch/sparc64/kernel/binfmt_aout32.c
+++ b/arch/sparc64/kernel/binfmt_aout32.c
@@ -241,7 +241,6 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs)
241 current->mm->brk = ex.a_bss + 241 current->mm->brk = ex.a_bss +
242 (current->mm->start_brk = N_BSSADDR(ex)); 242 (current->mm->start_brk = N_BSSADDR(ex));
243 243
244 set_mm_counter(current->mm, rss, 0);
245 current->mm->mmap = NULL; 244 current->mm->mmap = NULL;
246 compute_creds(bprm); 245 compute_creds(bprm);
247 current->flags &= ~PF_FORKNOEXEC; 246 current->flags &= ~PF_FORKNOEXEC;
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index c954d91f01d0..112c316e7cd2 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -127,14 +127,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
127 int space = GET_IOSPACE(pfn); 127 int space = GET_IOSPACE(pfn);
128 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; 128 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
129 129
130 /* See comment in mm/memory.c remap_pfn_range */
131 vma->vm_flags |= VM_IO | VM_RESERVED;
132
130 prot = __pgprot(pg_iobits); 133 prot = __pgprot(pg_iobits);
131 offset -= from; 134 offset -= from;
132 dir = pgd_offset(mm, from); 135 dir = pgd_offset(mm, from);
133 flush_cache_range(vma, beg, end); 136 flush_cache_range(vma, beg, end);
134 137
135 spin_lock(&mm->page_table_lock);
136 while (from < end) { 138 while (from < end) {
137 pud_t *pud = pud_alloc(current->mm, dir, from); 139 pud_t *pud = pud_alloc(mm, dir, from);
138 error = -ENOMEM; 140 error = -ENOMEM;
139 if (!pud) 141 if (!pud)
140 break; 142 break;
@@ -144,8 +146,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
144 from = (from + PGDIR_SIZE) & PGDIR_MASK; 146 from = (from + PGDIR_SIZE) & PGDIR_MASK;
145 dir++; 147 dir++;
146 } 148 }
147 flush_tlb_range(vma, beg, end);
148 spin_unlock(&mm->page_table_lock);
149 149
150 flush_tlb_range(vma, beg, end);
150 return error; 151 return error;
151} 152}
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 90ca99d0b89c..8b104be4662b 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -18,8 +18,7 @@
18 18
19/* Heavily inspired by the ppc64 code. */ 19/* Heavily inspired by the ppc64 code. */
20 20
21DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = 21DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = { 0, };
22 { NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
23 22
24void flush_tlb_pending(void) 23void flush_tlb_pending(void)
25{ 24{
@@ -72,7 +71,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
72 71
73no_cache_flush: 72no_cache_flush:
74 73
75 if (mp->tlb_frozen) 74 if (mp->fullmm)
76 return; 75 return;
77 76
78 nr = mp->tlb_nr; 77 nr = mp->tlb_nr;
@@ -97,7 +96,7 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long
97 unsigned long nr = mp->tlb_nr; 96 unsigned long nr = mp->tlb_nr;
98 long s = start, e = end, vpte_base; 97 long s = start, e = end, vpte_base;
99 98
100 if (mp->tlb_frozen) 99 if (mp->fullmm)
101 return; 100 return;
102 101
103 /* If start is greater than end, that is a real problem. */ 102 /* If start is greater than end, that is a real problem. */
diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h
index 45d7da6c3b2c..8efc1e0f1b84 100644
--- a/arch/um/include/tlb.h
+++ b/arch/um/include/tlb.h
@@ -34,7 +34,6 @@ struct host_vm_op {
34 } u; 34 } u;
35}; 35};
36 36
37extern void mprotect_kernel_vm(int w);
38extern void force_flush_all(void); 37extern void force_flush_all(void);
39extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 38extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
40 unsigned long end_addr, int force, 39 unsigned long end_addr, int force,
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index 0d73ceeece72..34b54a3e2132 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -222,6 +222,7 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
222 pud_t *pud; 222 pud_t *pud;
223 pmd_t *pmd; 223 pmd_t *pmd;
224 pte_t *pte; 224 pte_t *pte;
225 pte_t ptent;
225 226
226 if(task->mm == NULL) 227 if(task->mm == NULL)
227 return(ERR_PTR(-EINVAL)); 228 return(ERR_PTR(-EINVAL));
@@ -238,12 +239,13 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
238 return(ERR_PTR(-EINVAL)); 239 return(ERR_PTR(-EINVAL));
239 240
240 pte = pte_offset_kernel(pmd, addr); 241 pte = pte_offset_kernel(pmd, addr);
241 if(!pte_present(*pte)) 242 ptent = *pte;
243 if(!pte_present(ptent))
242 return(ERR_PTR(-EINVAL)); 244 return(ERR_PTR(-EINVAL));
243 245
244 if(pte_out != NULL) 246 if(pte_out != NULL)
245 *pte_out = *pte; 247 *pte_out = ptent;
246 return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK)); 248 return((void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK));
247} 249}
248 250
249char *current_cmd(void) 251char *current_cmd(void)
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 240143b616a2..9e5e39cea821 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -28,7 +28,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
28 pmd_t *pmd; 28 pmd_t *pmd;
29 pte_t *pte; 29 pte_t *pte;
30 30
31 spin_lock(&mm->page_table_lock);
32 pgd = pgd_offset(mm, proc); 31 pgd = pgd_offset(mm, proc);
33 pud = pud_alloc(mm, pgd, proc); 32 pud = pud_alloc(mm, pgd, proc);
34 if (!pud) 33 if (!pud)
@@ -63,7 +62,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
63 *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); 62 *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
64 *pte = pte_mkexec(*pte); 63 *pte = pte_mkexec(*pte);
65 *pte = pte_wrprotect(*pte); 64 *pte = pte_wrprotect(*pte);
66 spin_unlock(&mm->page_table_lock);
67 return(0); 65 return(0);
68 66
69 out_pmd: 67 out_pmd:
@@ -71,7 +69,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
71 out_pte: 69 out_pte:
72 pmd_free(pmd); 70 pmd_free(pmd);
73 out: 71 out:
74 spin_unlock(&mm->page_table_lock);
75 return(-ENOMEM); 72 return(-ENOMEM);
76} 73}
77 74
@@ -147,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm)
147 144
148 if(!proc_mm || !ptrace_faultinfo){ 145 if(!proc_mm || !ptrace_faultinfo){
149 free_page(mmu->id.stack); 146 free_page(mmu->id.stack);
147 pte_lock_deinit(virt_to_page(mmu->last_page_table));
150 pte_free_kernel((pte_t *) mmu->last_page_table); 148 pte_free_kernel((pte_t *) mmu->last_page_table);
151 dec_page_state(nr_page_table_pages); 149 dec_page_state(nr_page_table_pages);
152#ifdef CONFIG_3_LEVEL_PGTABLES 150#ifdef CONFIG_3_LEVEL_PGTABLES
diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c
index f1d85dbb45b9..ae6217c86135 100644
--- a/arch/um/kernel/tt/tlb.c
+++ b/arch/um/kernel/tt/tlb.c
@@ -74,42 +74,6 @@ void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end)
74 atomic_inc(&vmchange_seq); 74 atomic_inc(&vmchange_seq);
75} 75}
76 76
77static void protect_vm_page(unsigned long addr, int w, int must_succeed)
78{
79 int err;
80
81 err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed);
82 if(err == 0) return;
83 else if((err == -EFAULT) || (err == -ENOMEM)){
84 flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
85 protect_vm_page(addr, w, 1);
86 }
87 else panic("protect_vm_page : protect failed, errno = %d\n", err);
88}
89
90void mprotect_kernel_vm(int w)
91{
92 struct mm_struct *mm;
93 pgd_t *pgd;
94 pud_t *pud;
95 pmd_t *pmd;
96 pte_t *pte;
97 unsigned long addr;
98
99 mm = &init_mm;
100 for(addr = start_vm; addr < end_vm;){
101 pgd = pgd_offset(mm, addr);
102 pud = pud_offset(pgd, addr);
103 pmd = pmd_offset(pud, addr);
104 if(pmd_present(*pmd)){
105 pte = pte_offset_kernel(pmd, addr);
106 if(pte_present(*pte)) protect_vm_page(addr, w, 0);
107 addr += PAGE_SIZE;
108 }
109 else addr += PMD_SIZE;
110 }
111}
112
113void flush_tlb_kernel_vm_tt(void) 77void flush_tlb_kernel_vm_tt(void)
114{ 78{
115 flush_tlb_kernel_range(start_vm, end_vm); 79 flush_tlb_kernel_range(start_vm, end_vm);
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 3e6780fa0186..93c60f4aa47a 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -314,7 +314,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
314 current->mm->free_area_cache = TASK_UNMAPPED_BASE; 314 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
315 current->mm->cached_hole_size = 0; 315 current->mm->cached_hole_size = 0;
316 316
317 set_mm_counter(current->mm, rss, 0);
318 current->mm->mmap = NULL; 317 current->mm->mmap = NULL;
319 compute_creds(bprm); 318 compute_creds(bprm);
320 current->flags &= ~PF_FORKNOEXEC; 319 current->flags &= ~PF_FORKNOEXEC;
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 6972df480d2b..ecf7acb5db9b 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -60,7 +60,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
60 if (address >= end) 60 if (address >= end)
61 BUG(); 61 BUG();
62 do { 62 do {
63 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 63 pte_t * pte = pte_alloc_kernel(pmd, address);
64 if (!pte) 64 if (!pte)
65 return -ENOMEM; 65 return -ENOMEM;
66 remap_area_pte(pte, address, end - address, address + phys_addr, flags); 66 remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -105,7 +105,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
105 flush_cache_all(); 105 flush_cache_all();
106 if (address >= end) 106 if (address >= end)
107 BUG(); 107 BUG();
108 spin_lock(&init_mm.page_table_lock);
109 do { 108 do {
110 pud_t *pud; 109 pud_t *pud;
111 pud = pud_alloc(&init_mm, pgd, address); 110 pud = pud_alloc(&init_mm, pgd, address);
@@ -119,7 +118,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
119 address = (address + PGDIR_SIZE) & PGDIR_MASK; 118 address = (address + PGDIR_SIZE) & PGDIR_MASK;
120 pgd++; 119 pgd++;
121 } while (address && (address < end)); 120 } while (address && (address < end));
122 spin_unlock(&init_mm.page_table_lock);
123 flush_tlb_all(); 121 flush_tlb_all();
124 return error; 122 return error;
125} 123}