aboutsummaryrefslogtreecommitdiffstats
path: root/arch/alpha/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/alpha/mm
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/alpha/mm')
-rw-r--r--arch/alpha/mm/Makefile9
-rw-r--r--arch/alpha/mm/extable.c34
-rw-r--r--arch/alpha/mm/fault.c247
-rw-r--r--arch/alpha/mm/init.c382
-rw-r--r--arch/alpha/mm/numa.c395
-rw-r--r--arch/alpha/mm/remap.c90
6 files changed, 1157 insertions, 0 deletions
diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
new file mode 100644
index 000000000000..6edd9a09ea4f
--- /dev/null
+++ b/arch/alpha/mm/Makefile
@@ -0,0 +1,9 @@
1#
2# Makefile for the linux alpha-specific parts of the memory manager.
3#
4
5EXTRA_CFLAGS := -Werror
6
7obj-y := init.o fault.o extable.o remap.o
8
9obj-$(CONFIG_DISCONTIGMEM) += numa.o
diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c
new file mode 100644
index 000000000000..c3849baebd57
--- /dev/null
+++ b/arch/alpha/mm/extable.c
@@ -0,0 +1,34 @@
1/*
2 * linux/arch/alpha/mm/extable.c
3 */
4
5#include <linux/config.h>
6#include <linux/module.h>
7#include <asm/uaccess.h>
8
9void sort_extable(struct exception_table_entry *start,
10 struct exception_table_entry *finish)
11{
12}
13
14const struct exception_table_entry *
15search_extable(const struct exception_table_entry *first,
16 const struct exception_table_entry *last,
17 unsigned long value)
18{
19 while (first <= last) {
20 const struct exception_table_entry *mid;
21 unsigned long mid_value;
22
23 mid = (last - first) / 2 + first;
24 mid_value = (unsigned long)&mid->insn + mid->insn;
25 if (mid_value == value)
26 return mid;
27 else if (mid_value < value)
28 first = mid+1;
29 else
30 last = mid-1;
31 }
32
33 return NULL;
34}
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
new file mode 100644
index 000000000000..64ace5a9cd3d
--- /dev/null
+++ b/arch/alpha/mm/fault.c
@@ -0,0 +1,247 @@
1/*
2 * linux/arch/alpha/mm/fault.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 */
6
7#include <linux/config.h>
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/mm.h>
11#include <asm/io.h>
12
13#define __EXTERN_INLINE inline
14#include <asm/mmu_context.h>
15#include <asm/tlbflush.h>
16#undef __EXTERN_INLINE
17
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/string.h>
21#include <linux/types.h>
22#include <linux/ptrace.h>
23#include <linux/mman.h>
24#include <linux/smp.h>
25#include <linux/smp_lock.h>
26#include <linux/interrupt.h>
27#include <linux/module.h>
28
29#include <asm/system.h>
30#include <asm/uaccess.h>
31
32extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
33
34
35/*
36 * Force a new ASN for a task.
37 */
38
39#ifndef CONFIG_SMP
40unsigned long last_asn = ASN_FIRST_VERSION;
41#endif
42
43void
44__load_new_mm_context(struct mm_struct *next_mm)
45{
46 unsigned long mmc;
47 struct pcb_struct *pcb;
48
49 mmc = __get_new_mm_context(next_mm, smp_processor_id());
50 next_mm->context[smp_processor_id()] = mmc;
51
52 pcb = &current_thread_info()->pcb;
53 pcb->asn = mmc & HARDWARE_ASN_MASK;
54 pcb->ptbr = ((unsigned long) next_mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
55
56 __reload_thread(pcb);
57}
58
59
60/*
61 * This routine handles page faults. It determines the address,
62 * and the problem, and then passes it off to handle_mm_fault().
63 *
64 * mmcsr:
65 * 0 = translation not valid
66 * 1 = access violation
67 * 2 = fault-on-read
68 * 3 = fault-on-execute
69 * 4 = fault-on-write
70 *
71 * cause:
72 * -1 = instruction fetch
73 * 0 = load
74 * 1 = store
75 *
76 * Registers $9 through $15 are saved in a block just prior to `regs' and
77 * are saved and restored around the call to allow exception code to
78 * modify them.
79 */
80
81/* Macro for exception fixup code to access integer registers. */
82#define dpf_reg(r) \
83 (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \
84 (r) <= 18 ? (r)+8 : (r)-10])
85
86asmlinkage void
87do_page_fault(unsigned long address, unsigned long mmcsr,
88 long cause, struct pt_regs *regs)
89{
90 struct vm_area_struct * vma;
91 struct mm_struct *mm = current->mm;
92 const struct exception_table_entry *fixup;
93 int fault, si_code = SEGV_MAPERR;
94 siginfo_t info;
95
96 /* As of EV6, a load into $31/$f31 is a prefetch, and never faults
97 (or is suppressed by the PALcode). Support that for older CPUs
98 by ignoring such an instruction. */
99 if (cause == 0) {
100 unsigned int insn;
101 __get_user(insn, (unsigned int __user *)regs->pc);
102 if ((insn >> 21 & 0x1f) == 0x1f &&
103 /* ldq ldl ldt lds ldg ldf ldwu ldbu */
104 (1ul << (insn >> 26) & 0x30f00001400ul)) {
105 regs->pc += 4;
106 return;
107 }
108 }
109
110 /* If we're in an interrupt context, or have no user context,
111 we must not take the fault. */
112 if (!mm || in_interrupt())
113 goto no_context;
114
115#ifdef CONFIG_ALPHA_LARGE_VMALLOC
116 if (address >= TASK_SIZE)
117 goto vmalloc_fault;
118#endif
119
120 down_read(&mm->mmap_sem);
121 vma = find_vma(mm, address);
122 if (!vma)
123 goto bad_area;
124 if (vma->vm_start <= address)
125 goto good_area;
126 if (!(vma->vm_flags & VM_GROWSDOWN))
127 goto bad_area;
128 if (expand_stack(vma, address))
129 goto bad_area;
130
131 /* Ok, we have a good vm_area for this memory access, so
132 we can handle it. */
133 good_area:
134 si_code = SEGV_ACCERR;
135 if (cause < 0) {
136 if (!(vma->vm_flags & VM_EXEC))
137 goto bad_area;
138 } else if (!cause) {
139 /* Allow reads even for write-only mappings */
140 if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
141 goto bad_area;
142 } else {
143 if (!(vma->vm_flags & VM_WRITE))
144 goto bad_area;
145 }
146
147 survive:
148 /* If for any reason at all we couldn't handle the fault,
149 make sure we exit gracefully rather than endlessly redo
150 the fault. */
151 fault = handle_mm_fault(mm, vma, address, cause > 0);
152 up_read(&mm->mmap_sem);
153
154 switch (fault) {
155 case VM_FAULT_MINOR:
156 current->min_flt++;
157 break;
158 case VM_FAULT_MAJOR:
159 current->maj_flt++;
160 break;
161 case VM_FAULT_SIGBUS:
162 goto do_sigbus;
163 case VM_FAULT_OOM:
164 goto out_of_memory;
165 default:
166 BUG();
167 }
168 return;
169
170 /* Something tried to access memory that isn't in our memory map.
171 Fix it, but check if it's kernel or user first. */
172 bad_area:
173 up_read(&mm->mmap_sem);
174
175 if (user_mode(regs))
176 goto do_sigsegv;
177
178 no_context:
179 /* Are we prepared to handle this fault as an exception? */
180 if ((fixup = search_exception_tables(regs->pc)) != 0) {
181 unsigned long newpc;
182 newpc = fixup_exception(dpf_reg, fixup, regs->pc);
183 regs->pc = newpc;
184 return;
185 }
186
187 /* Oops. The kernel tried to access some bad page. We'll have to
188 terminate things with extreme prejudice. */
189 printk(KERN_ALERT "Unable to handle kernel paging request at "
190 "virtual address %016lx\n", address);
191 die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
192 do_exit(SIGKILL);
193
194 /* We ran out of memory, or some other thing happened to us that
195 made us unable to handle the page fault gracefully. */
196 out_of_memory:
197 if (current->pid == 1) {
198 yield();
199 down_read(&mm->mmap_sem);
200 goto survive;
201 }
202 printk(KERN_ALERT "VM: killing process %s(%d)\n",
203 current->comm, current->pid);
204 if (!user_mode(regs))
205 goto no_context;
206 do_exit(SIGKILL);
207
208 do_sigbus:
209 /* Send a sigbus, regardless of whether we were in kernel
210 or user mode. */
211 info.si_signo = SIGBUS;
212 info.si_errno = 0;
213 info.si_code = BUS_ADRERR;
214 info.si_addr = (void __user *) address;
215 force_sig_info(SIGBUS, &info, current);
216 if (!user_mode(regs))
217 goto no_context;
218 return;
219
220 do_sigsegv:
221 info.si_signo = SIGSEGV;
222 info.si_errno = 0;
223 info.si_code = si_code;
224 info.si_addr = (void __user *) address;
225 force_sig_info(SIGSEGV, &info, current);
226 return;
227
228#ifdef CONFIG_ALPHA_LARGE_VMALLOC
229 vmalloc_fault:
230 if (user_mode(regs))
231 goto do_sigsegv;
232 else {
233 /* Synchronize this task's top level page-table
234 with the "reference" page table from init. */
235 long index = pgd_index(address);
236 pgd_t *pgd, *pgd_k;
237
238 pgd = current->active_mm->pgd + index;
239 pgd_k = swapper_pg_dir + index;
240 if (!pgd_present(*pgd) && pgd_present(*pgd_k)) {
241 pgd_val(*pgd) = pgd_val(*pgd_k);
242 return;
243 }
244 goto no_context;
245 }
246#endif
247}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
new file mode 100644
index 000000000000..90752f6d8867
--- /dev/null
+++ b/arch/alpha/mm/init.c
@@ -0,0 +1,382 @@
1/*
2 * linux/arch/alpha/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 */
6
7/* 2.3.x zone allocator, 1999 Andrea Arcangeli <andrea@suse.de> */
8
9#include <linux/config.h>
10#include <linux/signal.h>
11#include <linux/sched.h>
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/string.h>
15#include <linux/types.h>
16#include <linux/ptrace.h>
17#include <linux/mman.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/init.h>
21#include <linux/bootmem.h> /* max_low_pfn */
22#include <linux/vmalloc.h>
23
24#include <asm/system.h>
25#include <asm/uaccess.h>
26#include <asm/pgtable.h>
27#include <asm/pgalloc.h>
28#include <asm/hwrpb.h>
29#include <asm/dma.h>
30#include <asm/mmu_context.h>
31#include <asm/console.h>
32#include <asm/tlb.h>
33
34DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
35
36extern void die_if_kernel(char *,struct pt_regs *,long);
37
38static struct pcb_struct original_pcb;
39
40pgd_t *
41pgd_alloc(struct mm_struct *mm)
42{
43 pgd_t *ret, *init;
44
45 ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
46 init = pgd_offset(&init_mm, 0UL);
47 if (ret) {
48#ifdef CONFIG_ALPHA_LARGE_VMALLOC
49 memcpy (ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
50 (PTRS_PER_PGD - USER_PTRS_PER_PGD - 1)*sizeof(pgd_t));
51#else
52 pgd_val(ret[PTRS_PER_PGD-2]) = pgd_val(init[PTRS_PER_PGD-2]);
53#endif
54
55 /* The last PGD entry is the VPTB self-map. */
56 pgd_val(ret[PTRS_PER_PGD-1])
57 = pte_val(mk_pte(virt_to_page(ret), PAGE_KERNEL));
58 }
59 return ret;
60}
61
62pte_t *
63pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
64{
65 pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
66 return pte;
67}
68
69
70/*
71 * BAD_PAGE is the page that is used for page faults when linux
72 * is out-of-memory. Older versions of linux just did a
73 * do_exit(), but using this instead means there is less risk
74 * for a process dying in kernel mode, possibly leaving an inode
75 * unused etc..
76 *
77 * BAD_PAGETABLE is the accompanying page-table: it is initialized
78 * to point to BAD_PAGE entries.
79 *
80 * ZERO_PAGE is a special page that is used for zero-initialized
81 * data and COW.
82 */
83pmd_t *
84__bad_pagetable(void)
85{
86 memset((void *) EMPTY_PGT, 0, PAGE_SIZE);
87 return (pmd_t *) EMPTY_PGT;
88}
89
90pte_t
91__bad_page(void)
92{
93 memset((void *) EMPTY_PGE, 0, PAGE_SIZE);
94 return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED));
95}
96
97#ifndef CONFIG_DISCONTIGMEM
98void
99show_mem(void)
100{
101 long i,free = 0,total = 0,reserved = 0;
102 long shared = 0, cached = 0;
103
104 printk("\nMem-info:\n");
105 show_free_areas();
106 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
107 i = max_mapnr;
108 while (i-- > 0) {
109 total++;
110 if (PageReserved(mem_map+i))
111 reserved++;
112 else if (PageSwapCache(mem_map+i))
113 cached++;
114 else if (!page_count(mem_map+i))
115 free++;
116 else
117 shared += page_count(mem_map + i) - 1;
118 }
119 printk("%ld pages of RAM\n",total);
120 printk("%ld free pages\n",free);
121 printk("%ld reserved pages\n",reserved);
122 printk("%ld pages shared\n",shared);
123 printk("%ld pages swap cached\n",cached);
124}
125#endif
126
127static inline unsigned long
128load_PCB(struct pcb_struct *pcb)
129{
130 register unsigned long sp __asm__("$30");
131 pcb->ksp = sp;
132 return __reload_thread(pcb);
133}
134
135/* Set up initial PCB, VPTB, and other such nicities. */
136
137static inline void
138switch_to_system_map(void)
139{
140 unsigned long newptbr;
141 unsigned long original_pcb_ptr;
142
143 /* Initialize the kernel's page tables. Linux puts the vptb in
144 the last slot of the L1 page table. */
145 memset(swapper_pg_dir, 0, PAGE_SIZE);
146 newptbr = ((unsigned long) swapper_pg_dir - PAGE_OFFSET) >> PAGE_SHIFT;
147 pgd_val(swapper_pg_dir[1023]) =
148 (newptbr << 32) | pgprot_val(PAGE_KERNEL);
149
150 /* Set the vptb. This is often done by the bootloader, but
151 shouldn't be required. */
152 if (hwrpb->vptb != 0xfffffffe00000000UL) {
153 wrvptptr(0xfffffffe00000000UL);
154 hwrpb->vptb = 0xfffffffe00000000UL;
155 hwrpb_update_checksum(hwrpb);
156 }
157
158 /* Also set up the real kernel PCB while we're at it. */
159 init_thread_info.pcb.ptbr = newptbr;
160 init_thread_info.pcb.flags = 1; /* set FEN, clear everything else */
161 original_pcb_ptr = load_PCB(&init_thread_info.pcb);
162 tbia();
163
164 /* Save off the contents of the original PCB so that we can
165 restore the original console's page tables for a clean reboot.
166
167 Note that the PCB is supposed to be a physical address, but
168 since KSEG values also happen to work, folks get confused.
169 Check this here. */
170
171 if (original_pcb_ptr < PAGE_OFFSET) {
172 original_pcb_ptr = (unsigned long)
173 phys_to_virt(original_pcb_ptr);
174 }
175 original_pcb = *(struct pcb_struct *) original_pcb_ptr;
176}
177
178int callback_init_done;
179
180void * __init
181callback_init(void * kernel_end)
182{
183 struct crb_struct * crb;
184 pgd_t *pgd;
185 pmd_t *pmd;
186 void *two_pages;
187
188 /* Starting at the HWRPB, locate the CRB. */
189 crb = (struct crb_struct *)((char *)hwrpb + hwrpb->crb_offset);
190
191 if (alpha_using_srm) {
192 /* Tell the console whither it is to be remapped. */
193 if (srm_fixup(VMALLOC_START, (unsigned long)hwrpb))
194 __halt(); /* "We're boned." --Bender */
195
196 /* Edit the procedure descriptors for DISPATCH and FIXUP. */
197 crb->dispatch_va = (struct procdesc_struct *)
198 (VMALLOC_START + (unsigned long)crb->dispatch_va
199 - crb->map[0].va);
200 crb->fixup_va = (struct procdesc_struct *)
201 (VMALLOC_START + (unsigned long)crb->fixup_va
202 - crb->map[0].va);
203 }
204
205 switch_to_system_map();
206
207 /* Allocate one PGD and one PMD. In the case of SRM, we'll need
208 these to actually remap the console. There is an assumption
209 here that only one of each is needed, and this allows for 8MB.
210 On systems with larger consoles, additional pages will be
211 allocated as needed during the mapping process.
212
213 In the case of not SRM, but not CONFIG_ALPHA_LARGE_VMALLOC,
214 we need to allocate the PGD we use for vmalloc before we start
215 forking other tasks. */
216
217 two_pages = (void *)
218 (((unsigned long)kernel_end + ~PAGE_MASK) & PAGE_MASK);
219 kernel_end = two_pages + 2*PAGE_SIZE;
220 memset(two_pages, 0, 2*PAGE_SIZE);
221
222 pgd = pgd_offset_k(VMALLOC_START);
223 pgd_set(pgd, (pmd_t *)two_pages);
224 pmd = pmd_offset(pgd, VMALLOC_START);
225 pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
226
227 if (alpha_using_srm) {
228 static struct vm_struct console_remap_vm;
229 unsigned long vaddr = VMALLOC_START;
230 unsigned long i, j;
231
232 /* Set up the third level PTEs and update the virtual
233 addresses of the CRB entries. */
234 for (i = 0; i < crb->map_entries; ++i) {
235 unsigned long pfn = crb->map[i].pa >> PAGE_SHIFT;
236 crb->map[i].va = vaddr;
237 for (j = 0; j < crb->map[i].count; ++j) {
238 /* Newer console's (especially on larger
239 systems) may require more pages of
240 PTEs. Grab additional pages as needed. */
241 if (pmd != pmd_offset(pgd, vaddr)) {
242 memset(kernel_end, 0, PAGE_SIZE);
243 pmd = pmd_offset(pgd, vaddr);
244 pmd_set(pmd, (pte_t *)kernel_end);
245 kernel_end += PAGE_SIZE;
246 }
247 set_pte(pte_offset_kernel(pmd, vaddr),
248 pfn_pte(pfn, PAGE_KERNEL));
249 pfn++;
250 vaddr += PAGE_SIZE;
251 }
252 }
253
254 /* Let vmalloc know that we've allocated some space. */
255 console_remap_vm.flags = VM_ALLOC;
256 console_remap_vm.addr = (void *) VMALLOC_START;
257 console_remap_vm.size = vaddr - VMALLOC_START;
258 vmlist = &console_remap_vm;
259 }
260
261 callback_init_done = 1;
262 return kernel_end;
263}
264
265
266#ifndef CONFIG_DISCONTIGMEM
267/*
268 * paging_init() sets up the memory map.
269 */
270void
271paging_init(void)
272{
273 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
274 unsigned long dma_pfn, high_pfn;
275
276 dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
277 high_pfn = max_pfn = max_low_pfn;
278
279 if (dma_pfn >= high_pfn)
280 zones_size[ZONE_DMA] = high_pfn;
281 else {
282 zones_size[ZONE_DMA] = dma_pfn;
283 zones_size[ZONE_NORMAL] = high_pfn - dma_pfn;
284 }
285
286 /* Initialize mem_map[]. */
287 free_area_init(zones_size);
288
289 /* Initialize the kernel's ZERO_PGE. */
290 memset((void *)ZERO_PGE, 0, PAGE_SIZE);
291}
292#endif /* CONFIG_DISCONTIGMEM */
293
294#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
295void
296srm_paging_stop (void)
297{
298 /* Move the vptb back to where the SRM console expects it. */
299 swapper_pg_dir[1] = swapper_pg_dir[1023];
300 tbia();
301 wrvptptr(0x200000000UL);
302 hwrpb->vptb = 0x200000000UL;
303 hwrpb_update_checksum(hwrpb);
304
305 /* Reload the page tables that the console had in use. */
306 load_PCB(&original_pcb);
307 tbia();
308}
309#endif
310
311#ifndef CONFIG_DISCONTIGMEM
312static void __init
313printk_memory_info(void)
314{
315 unsigned long codesize, reservedpages, datasize, initsize, tmp;
316 extern int page_is_ram(unsigned long) __init;
317 extern char _text, _etext, _data, _edata;
318 extern char __init_begin, __init_end;
319
320 /* printk all informations */
321 reservedpages = 0;
322 for (tmp = 0; tmp < max_low_pfn; tmp++)
323 /*
324 * Only count reserved RAM pages
325 */
326 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
327 reservedpages++;
328
329 codesize = (unsigned long) &_etext - (unsigned long) &_text;
330 datasize = (unsigned long) &_edata - (unsigned long) &_data;
331 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
332
333 printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n",
334 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
335 max_mapnr << (PAGE_SHIFT-10),
336 codesize >> 10,
337 reservedpages << (PAGE_SHIFT-10),
338 datasize >> 10,
339 initsize >> 10);
340}
341
342void __init
343mem_init(void)
344{
345 max_mapnr = num_physpages = max_low_pfn;
346 totalram_pages += free_all_bootmem();
347 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
348
349 printk_memory_info();
350}
351#endif /* CONFIG_DISCONTIGMEM */
352
353void
354free_reserved_mem(void *start, void *end)
355{
356 void *__start = start;
357 for (; __start < end; __start += PAGE_SIZE) {
358 ClearPageReserved(virt_to_page(__start));
359 set_page_count(virt_to_page(__start), 1);
360 free_page((long)__start);
361 totalram_pages++;
362 }
363}
364
365void
366free_initmem(void)
367{
368 extern char __init_begin, __init_end;
369
370 free_reserved_mem(&__init_begin, &__init_end);
371 printk ("Freeing unused kernel memory: %ldk freed\n",
372 (&__init_end - &__init_begin) >> 10);
373}
374
375#ifdef CONFIG_BLK_DEV_INITRD
376void
377free_initrd_mem(unsigned long start, unsigned long end)
378{
379 free_reserved_mem((void *)start, (void *)end);
380 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
381}
382#endif
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
new file mode 100644
index 000000000000..ba81c4422aaf
--- /dev/null
+++ b/arch/alpha/mm/numa.c
@@ -0,0 +1,395 @@
1/*
2 * linux/arch/alpha/mm/numa.c
3 *
4 * DISCONTIGMEM NUMA alpha support.
5 *
6 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
7 */
8
9#include <linux/config.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/mm.h>
13#include <linux/bootmem.h>
14#include <linux/swap.h>
15#include <linux/initrd.h>
16
17#include <asm/hwrpb.h>
18#include <asm/pgalloc.h>
19
20pg_data_t node_data[MAX_NUMNODES];
21bootmem_data_t node_bdata[MAX_NUMNODES];
22
23#undef DEBUG_DISCONTIG
24#ifdef DEBUG_DISCONTIG
25#define DBGDCONT(args...) printk(args)
26#else
27#define DBGDCONT(args...)
28#endif
29
30#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
31#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
32#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
33#define for_each_mem_cluster(memdesc, cluster, i) \
34 for ((cluster) = (memdesc)->cluster, (i) = 0; \
35 (i) < (memdesc)->numclusters; (i)++, (cluster)++)
36
37static void __init show_mem_layout(void)
38{
39 struct memclust_struct * cluster;
40 struct memdesc_struct * memdesc;
41 int i;
42
43 /* Find free clusters, and init and free the bootmem accordingly. */
44 memdesc = (struct memdesc_struct *)
45 (hwrpb->mddt_offset + (unsigned long) hwrpb);
46
47 printk("Raw memory layout:\n");
48 for_each_mem_cluster(memdesc, cluster, i) {
49 printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
50 i, cluster->usage, cluster->start_pfn,
51 cluster->start_pfn + cluster->numpages);
52 }
53}
54
55static void __init
56setup_memory_node(int nid, void *kernel_end)
57{
58 extern unsigned long mem_size_limit;
59 struct memclust_struct * cluster;
60 struct memdesc_struct * memdesc;
61 unsigned long start_kernel_pfn, end_kernel_pfn;
62 unsigned long bootmap_size, bootmap_pages, bootmap_start;
63 unsigned long start, end;
64 unsigned long node_pfn_start, node_pfn_end;
65 unsigned long node_min_pfn, node_max_pfn;
66 int i;
67 unsigned long node_datasz = PFN_UP(sizeof(pg_data_t));
68 int show_init = 0;
69
70 /* Find the bounds of current node */
71 node_pfn_start = (node_mem_start(nid)) >> PAGE_SHIFT;
72 node_pfn_end = node_pfn_start + (node_mem_size(nid) >> PAGE_SHIFT);
73
74 /* Find free clusters, and init and free the bootmem accordingly. */
75 memdesc = (struct memdesc_struct *)
76 (hwrpb->mddt_offset + (unsigned long) hwrpb);
77
78 /* find the bounds of this node (node_min_pfn/node_max_pfn) */
79 node_min_pfn = ~0UL;
80 node_max_pfn = 0UL;
81 for_each_mem_cluster(memdesc, cluster, i) {
82 /* Bit 0 is console/PALcode reserved. Bit 1 is
83 non-volatile memory -- we might want to mark
84 this for later. */
85 if (cluster->usage & 3)
86 continue;
87
88 start = cluster->start_pfn;
89 end = start + cluster->numpages;
90
91 if (start >= node_pfn_end || end <= node_pfn_start)
92 continue;
93
94 if (!show_init) {
95 show_init = 1;
96 printk("Initializing bootmem allocator on Node ID %d\n", nid);
97 }
98 printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
99 i, cluster->usage, cluster->start_pfn,
100 cluster->start_pfn + cluster->numpages);
101
102 if (start < node_pfn_start)
103 start = node_pfn_start;
104 if (end > node_pfn_end)
105 end = node_pfn_end;
106
107 if (start < node_min_pfn)
108 node_min_pfn = start;
109 if (end > node_max_pfn)
110 node_max_pfn = end;
111 }
112
113 if (mem_size_limit && node_max_pfn > mem_size_limit) {
114 static int msg_shown = 0;
115 if (!msg_shown) {
116 msg_shown = 1;
117 printk("setup: forcing memory size to %ldK (from %ldK).\n",
118 mem_size_limit << (PAGE_SHIFT - 10),
119 node_max_pfn << (PAGE_SHIFT - 10));
120 }
121 node_max_pfn = mem_size_limit;
122 }
123
124 if (node_min_pfn >= node_max_pfn)
125 return;
126
127 /* Update global {min,max}_low_pfn from node information. */
128 if (node_min_pfn < min_low_pfn)
129 min_low_pfn = node_min_pfn;
130 if (node_max_pfn > max_low_pfn)
131 max_pfn = max_low_pfn = node_max_pfn;
132
133 num_physpages += node_max_pfn - node_min_pfn;
134
135#if 0 /* we'll try this one again in a little while */
136 /* Cute trick to make sure our local node data is on local memory */
137 node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
138#endif
139 /* Quasi-mark the pg_data_t as in-use */
140 node_min_pfn += node_datasz;
141 if (node_min_pfn >= node_max_pfn) {
142 printk(" not enough mem to reserve NODE_DATA");
143 return;
144 }
145 NODE_DATA(nid)->bdata = &node_bdata[nid];
146
147 printk(" Detected node memory: start %8lu, end %8lu\n",
148 node_min_pfn, node_max_pfn);
149
150 DBGDCONT(" DISCONTIG: node_data[%d] is at 0x%p\n", nid, NODE_DATA(nid));
151 DBGDCONT(" DISCONTIG: NODE_DATA(%d)->bdata is at 0x%p\n", nid, NODE_DATA(nid)->bdata);
152
153 /* Find the bounds of kernel memory. */
154 start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
155 end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
156 bootmap_start = -1;
157
158 if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
159 panic("kernel loaded out of ram");
160
161 /* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
162 Note that we round this down, not up - node memory
163 has much larger alignment than 8Mb, so it's safe. */
164 node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
165
166 /* We need to know how many physically contiguous pages
167 we'll need for the bootmap. */
168 bootmap_pages = bootmem_bootmap_pages(node_max_pfn-node_min_pfn);
169
170 /* Now find a good region where to allocate the bootmap. */
171 for_each_mem_cluster(memdesc, cluster, i) {
172 if (cluster->usage & 3)
173 continue;
174
175 start = cluster->start_pfn;
176 end = start + cluster->numpages;
177
178 if (start >= node_max_pfn || end <= node_min_pfn)
179 continue;
180
181 if (end > node_max_pfn)
182 end = node_max_pfn;
183 if (start < node_min_pfn)
184 start = node_min_pfn;
185
186 if (start < start_kernel_pfn) {
187 if (end > end_kernel_pfn
188 && end - end_kernel_pfn >= bootmap_pages) {
189 bootmap_start = end_kernel_pfn;
190 break;
191 } else if (end > start_kernel_pfn)
192 end = start_kernel_pfn;
193 } else if (start < end_kernel_pfn)
194 start = end_kernel_pfn;
195 if (end - start >= bootmap_pages) {
196 bootmap_start = start;
197 break;
198 }
199 }
200
201 if (bootmap_start == -1)
202 panic("couldn't find a contigous place for the bootmap");
203
204 /* Allocate the bootmap and mark the whole MM as reserved. */
205 bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start,
206 node_min_pfn, node_max_pfn);
207 DBGDCONT(" bootmap_start %lu, bootmap_size %lu, bootmap_pages %lu\n",
208 bootmap_start, bootmap_size, bootmap_pages);
209
210 /* Mark the free regions. */
211 for_each_mem_cluster(memdesc, cluster, i) {
212 if (cluster->usage & 3)
213 continue;
214
215 start = cluster->start_pfn;
216 end = cluster->start_pfn + cluster->numpages;
217
218 if (start >= node_max_pfn || end <= node_min_pfn)
219 continue;
220
221 if (end > node_max_pfn)
222 end = node_max_pfn;
223 if (start < node_min_pfn)
224 start = node_min_pfn;
225
226 if (start < start_kernel_pfn) {
227 if (end > end_kernel_pfn) {
228 free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start),
229 (PFN_PHYS(start_kernel_pfn)
230 - PFN_PHYS(start)));
231 printk(" freeing pages %ld:%ld\n",
232 start, start_kernel_pfn);
233 start = end_kernel_pfn;
234 } else if (end > start_kernel_pfn)
235 end = start_kernel_pfn;
236 } else if (start < end_kernel_pfn)
237 start = end_kernel_pfn;
238 if (start >= end)
239 continue;
240
241 free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start));
242 printk(" freeing pages %ld:%ld\n", start, end);
243 }
244
245 /* Reserve the bootmap memory. */
246 reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), bootmap_size);
247 printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
248
249 node_set_online(nid);
250}
251
252void __init
253setup_memory(void *kernel_end)
254{
255 int nid;
256
257 show_mem_layout();
258
259 nodes_clear(node_online_map);
260
261 min_low_pfn = ~0UL;
262 max_low_pfn = 0UL;
263 for (nid = 0; nid < MAX_NUMNODES; nid++)
264 setup_memory_node(nid, kernel_end);
265
266#ifdef CONFIG_BLK_DEV_INITRD
267 initrd_start = INITRD_START;
268 if (initrd_start) {
269 extern void *move_initrd(unsigned long);
270
271 initrd_end = initrd_start+INITRD_SIZE;
272 printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
273 (void *) initrd_start, INITRD_SIZE);
274
275 if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
276 if (!move_initrd(PFN_PHYS(max_low_pfn)))
277 printk("initrd extends beyond end of memory "
278 "(0x%08lx > 0x%p)\ndisabling initrd\n",
279 initrd_end,
280 phys_to_virt(PFN_PHYS(max_low_pfn)));
281 } else {
282 nid = kvaddr_to_nid(initrd_start);
283 reserve_bootmem_node(NODE_DATA(nid),
284 virt_to_phys((void *)initrd_start),
285 INITRD_SIZE);
286 }
287 }
288#endif /* CONFIG_BLK_DEV_INITRD */
289}
290
291void __init paging_init(void)
292{
293 unsigned int nid;
294 unsigned long zones_size[MAX_NR_ZONES] = {0, };
295 unsigned long dma_local_pfn;
296
297 /*
298 * The old global MAX_DMA_ADDRESS per-arch API doesn't fit
299 * in the NUMA model, for now we convert it to a pfn and
300 * we interpret this pfn as a local per-node information.
301 * This issue isn't very important since none of these machines
302 * have legacy ISA slots anyways.
303 */
304 dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
305
306 for_each_online_node(nid) {
307 unsigned long start_pfn = node_bdata[nid].node_boot_start >> PAGE_SHIFT;
308 unsigned long end_pfn = node_bdata[nid].node_low_pfn;
309
310 if (dma_local_pfn >= end_pfn - start_pfn)
311 zones_size[ZONE_DMA] = end_pfn - start_pfn;
312 else {
313 zones_size[ZONE_DMA] = dma_local_pfn;
314 zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn;
315 }
316 free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, NULL);
317 }
318
319 /* Initialize the kernel's ZERO_PGE. */
320 memset((void *)ZERO_PGE, 0, PAGE_SIZE);
321}
322
323void __init mem_init(void)
324{
325 unsigned long codesize, reservedpages, datasize, initsize, pfn;
326 extern int page_is_ram(unsigned long) __init;
327 extern char _text, _etext, _data, _edata;
328 extern char __init_begin, __init_end;
329 unsigned long nid, i;
330 struct page * lmem_map;
331
332 high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
333
334 reservedpages = 0;
335 for_each_online_node(nid) {
336 /*
337 * This will free up the bootmem, ie, slot 0 memory
338 */
339 totalram_pages += free_all_bootmem_node(NODE_DATA(nid));
340
341 lmem_map = node_mem_map(nid);
342 pfn = NODE_DATA(nid)->node_start_pfn;
343 for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
344 if (page_is_ram(pfn) && PageReserved(lmem_map+i))
345 reservedpages++;
346 }
347
348 codesize = (unsigned long) &_etext - (unsigned long) &_text;
349 datasize = (unsigned long) &_edata - (unsigned long) &_data;
350 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
351
352 printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, "
353 "%luk data, %luk init)\n",
354 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
355 num_physpages << (PAGE_SHIFT-10),
356 codesize >> 10,
357 reservedpages << (PAGE_SHIFT-10),
358 datasize >> 10,
359 initsize >> 10);
360#if 0
361 mem_stress();
362#endif
363}
364
365void
366show_mem(void)
367{
368 long i,free = 0,total = 0,reserved = 0;
369 long shared = 0, cached = 0;
370 int nid;
371
372 printk("\nMem-info:\n");
373 show_free_areas();
374 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
375 for_each_online_node(nid) {
376 struct page * lmem_map = node_mem_map(nid);
377 i = node_spanned_pages(nid);
378 while (i-- > 0) {
379 total++;
380 if (PageReserved(lmem_map+i))
381 reserved++;
382 else if (PageSwapCache(lmem_map+i))
383 cached++;
384 else if (!page_count(lmem_map+i))
385 free++;
386 else
387 shared += page_count(lmem_map + i) - 1;
388 }
389 }
390 printk("%ld pages of RAM\n",total);
391 printk("%ld free pages\n",free);
392 printk("%ld reserved pages\n",reserved);
393 printk("%ld pages shared\n",shared);
394 printk("%ld pages swap cached\n",cached);
395}
diff --git a/arch/alpha/mm/remap.c b/arch/alpha/mm/remap.c
new file mode 100644
index 000000000000..19817ad3d89b
--- /dev/null
+++ b/arch/alpha/mm/remap.c
@@ -0,0 +1,90 @@
1#include <linux/vmalloc.h>
2#include <asm/pgalloc.h>
3#include <asm/cacheflush.h>
4
5/* called with the page_table_lock held */
6static inline void
7remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
8 unsigned long phys_addr, unsigned long flags)
9{
10 unsigned long end;
11 unsigned long pfn;
12
13 address &= ~PMD_MASK;
14 end = address + size;
15 if (end > PMD_SIZE)
16 end = PMD_SIZE;
17 if (address >= end)
18 BUG();
19 pfn = phys_addr >> PAGE_SHIFT;
20 do {
21 if (!pte_none(*pte)) {
22 printk("remap_area_pte: page already exists\n");
23 BUG();
24 }
25 set_pte(pte, pfn_pte(pfn,
26 __pgprot(_PAGE_VALID | _PAGE_ASM |
27 _PAGE_KRE | _PAGE_KWE | flags)));
28 address += PAGE_SIZE;
29 pfn++;
30 pte++;
31 } while (address && (address < end));
32}
33
34/* called with the page_table_lock held */
35static inline int
36remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
37 unsigned long phys_addr, unsigned long flags)
38{
39 unsigned long end;
40
41 address &= ~PGDIR_MASK;
42 end = address + size;
43 if (end > PGDIR_SIZE)
44 end = PGDIR_SIZE;
45 phys_addr -= address;
46 if (address >= end)
47 BUG();
48 do {
49 pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
50 if (!pte)
51 return -ENOMEM;
52 remap_area_pte(pte, address, end - address,
53 address + phys_addr, flags);
54 address = (address + PMD_SIZE) & PMD_MASK;
55 pmd++;
56 } while (address && (address < end));
57 return 0;
58}
59
60int
61__alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
62 unsigned long size, unsigned long flags)
63{
64 pgd_t * dir;
65 int error = 0;
66 unsigned long end = address + size;
67
68 phys_addr -= address;
69 dir = pgd_offset(&init_mm, address);
70 flush_cache_all();
71 if (address >= end)
72 BUG();
73 spin_lock(&init_mm.page_table_lock);
74 do {
75 pmd_t *pmd;
76 pmd = pmd_alloc(&init_mm, dir, address);
77 error = -ENOMEM;
78 if (!pmd)
79 break;
80 if (remap_area_pmd(pmd, address, end - address,
81 phys_addr + address, flags))
82 break;
83 error = 0;
84 address = (address + PGDIR_SIZE) & PGDIR_MASK;
85 dir++;
86 } while (address && (address < end));
87 spin_unlock(&init_mm.page_table_lock);
88 return error;
89}
90