aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
authorJes Sorensen <jes@wildopensource.com>2005-06-21 20:15:02 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:18 -0400
commitf14f75b81187cdbe10cc53a521bf9fdf97b59f8c (patch)
tree5c0d48c8a3338e6f1747e6cd55f699be96ffef1a /arch/ia64/kernel
parent2caaad41e4aa8f5dd999695b4ddeaa0e7f3912a4 (diff)
[PATCH] ia64 uncached alloc
This patch contains the ia64 uncached page allocator and the generic allocator (genalloc). The uncached allocator was formerly part of the SN2 mspec driver but there are several other users of it so it has been split off from the driver. The generic allocator can be used by device driver to manage special memory etc. The generic allocator is based on the allocator from the sym53c8xx_2 driver. Various users on ia64 needs uncached memory. The SGI SN architecture requires it for inter-partition communication between partitions within a large NUMA cluster. The specific user for this is the XPC code. Another application is large MPI style applications which use it for synchronization, on SN this can be done using special 'fetchop' operations but it also benefits non SN hardware which may use regular uncached memory for this purpose. Performance of doing this through uncached vs cached memory is pretty substantial. This is handled by the mspec driver which I will push out in a seperate patch. Rather than creating a specific allocator for just uncached memory I came up with genalloc which is a generic purpose allocator that can be used by device drivers and other subsystems as they please. For instance to handle onboard device memory. It was derived from the sym53c7xx_2 driver's allocator which is also an example of a potential user (I am refraining from modifying sym2 right now as it seems to have been under fairly heavy development recently). On ia64 memory has various properties within a granule, ie. it isn't safe to access memory as uncached within the same granule as currently has memory accessed in cached mode. The regular system therefore doesn't utilize memory in the lower granules which is mixed in with device PAL code etc. The uncached driver walks the EFI memmap and pulls out the spill uncached pages and sticks them into the uncached pool. Only after these chunks have been utilized, will it start converting regular cached memory into uncached memory. Hence the reason for the EFI related code additions. Signed-off-by: Jes Sorensen <jes@wildopensource.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/ia64/kernel/efi.c32
-rw-r--r--arch/ia64/kernel/uncached.c246
3 files changed, 279 insertions, 0 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c1a02bbc252c..4c73d8ba2e3d 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o
20obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o 20obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
21obj-$(CONFIG_IA64_CYCLONE) += cyclone.o 21obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
22obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o 22obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
23obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
23mca_recovery-y += mca_drv.o mca_drv_asm.o 24mca_recovery-y += mca_drv.o mca_drv_asm.o
24 25
25# The gate DSO image is built using a special linker script. 26# The gate DSO image is built using a special linker script.
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 4a3b1aac43e7..179f230816ed 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -410,6 +410,38 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
410} 410}
411 411
412/* 412/*
413 * Walk the EFI memory map to pull out leftover pages in the lower
414 * memory regions which do not end up in the regular memory map and
415 * stick them into the uncached allocator
416 *
417 * The regular walk function is significantly more complex than the
418 * uncached walk which means it really doesn't make sense to try and
419 * marge the two.
420 */
421void __init
422efi_memmap_walk_uc (efi_freemem_callback_t callback)
423{
424 void *efi_map_start, *efi_map_end, *p;
425 efi_memory_desc_t *md;
426 u64 efi_desc_size, start, end;
427
428 efi_map_start = __va(ia64_boot_param->efi_memmap);
429 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
430 efi_desc_size = ia64_boot_param->efi_memdesc_size;
431
432 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
433 md = p;
434 if (md->attribute == EFI_MEMORY_UC) {
435 start = PAGE_ALIGN(md->phys_addr);
436 end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
437 if ((*callback)(start, end, NULL) < 0)
438 return;
439 }
440 }
441}
442
443
444/*
413 * Look for the PAL_CODE region reported by EFI and maps it using an 445 * Look for the PAL_CODE region reported by EFI and maps it using an
414 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor 446 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
415 * Abstraction Layer chapter 11 in ADAG 447 * Abstraction Layer chapter 11 in ADAG
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
new file mode 100644
index 000000000000..490dfc9ab47f
--- /dev/null
+++ b/arch/ia64/kernel/uncached.c
@@ -0,0 +1,246 @@
1/*
2 * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
7 *
8 * A simple uncached page allocator using the generic allocator. This
9 * allocator first utilizes the spare (spill) pages found in the EFI
10 * memmap and will then start converting cached pages to uncached ones
11 * at a granule at a time. Node awareness is implemented by having a
12 * pool of pages per node.
13 */
14
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/errno.h>
20#include <linux/string.h>
21#include <linux/slab.h>
22#include <linux/efi.h>
23#include <linux/genalloc.h>
24#include <asm/page.h>
25#include <asm/pal.h>
26#include <asm/system.h>
27#include <asm/pgtable.h>
28#include <asm/atomic.h>
29#include <asm/tlbflush.h>
30#include <asm/sn/arch.h>
31
32#define DEBUG 0
33
34#if DEBUG
35#define dprintk printk
36#else
37#define dprintk(x...) do { } while (0)
38#endif
39
40void __init efi_memmap_walk_uc (efi_freemem_callback_t callback);
41
42#define MAX_UNCACHED_GRANULES 5
43static int allocated_granules;
44
45struct gen_pool *uncached_pool[MAX_NUMNODES];
46
47
48static void uncached_ipi_visibility(void *data)
49{
50 int status;
51
52 status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
53 if ((status != PAL_VISIBILITY_OK) &&
54 (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
55 printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on "
56 "CPU %i\n", status, get_cpu());
57}
58
59
60static void uncached_ipi_mc_drain(void *data)
61{
62 int status;
63 status = ia64_pal_mc_drain();
64 if (status)
65 printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on "
66 "CPU %i\n", status, get_cpu());
67}
68
69
70static unsigned long
71uncached_get_new_chunk(struct gen_pool *poolp)
72{
73 struct page *page;
74 void *tmp;
75 int status, i;
76 unsigned long addr, node;
77
78 if (allocated_granules >= MAX_UNCACHED_GRANULES)
79 return 0;
80
81 node = poolp->private;
82 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO,
83 IA64_GRANULE_SHIFT-PAGE_SHIFT);
84
85 dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n",
86 page, (unsigned long)(page-vmem_map) << PAGE_SHIFT);
87
88 /*
89 * Do magic if no mem on local node! XXX
90 */
91 if (!page)
92 return 0;
93 tmp = page_address(page);
94
95 /*
96 * There's a small race here where it's possible for someone to
97 * access the page through /dev/mem halfway through the conversion
98 * to uncached - not sure it's really worth bothering about
99 */
100 for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
101 SetPageUncached(&page[i]);
102
103 flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE);
104
105 status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
106
107 dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
108 status, get_cpu());
109
110 if (!status) {
111 status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1);
112 if (status)
113 printk(KERN_WARNING "smp_call_function failed for "
114 "uncached_ipi_visibility! (%i)\n", status);
115 }
116
117 if (ia64_platform_is("sn2"))
118 sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE);
119 else
120 flush_icache_range((unsigned long)tmp,
121 (unsigned long)tmp+IA64_GRANULE_SIZE);
122
123 ia64_pal_mc_drain();
124 status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1);
125 if (status)
126 printk(KERN_WARNING "smp_call_function failed for "
127 "uncached_ipi_mc_drain! (%i)\n", status);
128
129 addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
130
131 allocated_granules++;
132 return addr;
133}
134
135
136/*
137 * uncached_alloc_page
138 *
139 * Allocate 1 uncached page. Allocates on the requested node. If no
140 * uncached pages are available on the requested node, roundrobin starting
141 * with higher nodes.
142 */
143unsigned long
144uncached_alloc_page(int nid)
145{
146 unsigned long maddr;
147
148 maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE);
149
150 dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n",
151 maddr, nid);
152
153 /*
154 * If no memory is availble on our local node, try the
155 * remaining nodes in the system.
156 */
157 if (!maddr) {
158 int i;
159
160 for (i = MAX_NUMNODES - 1; i >= 0; i--) {
161 if (i == nid || !node_online(i))
162 continue;
163 maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE);
164 dprintk(KERN_DEBUG "uncached_alloc_page alternate search "
165 "returns %lx on node %i\n", maddr, i);
166 if (maddr) {
167 break;
168 }
169 }
170 }
171
172 return maddr;
173}
174EXPORT_SYMBOL(uncached_alloc_page);
175
176
177/*
178 * uncached_free_page
179 *
180 * Free a single uncached page.
181 */
182void
183uncached_free_page(unsigned long maddr)
184{
185 int node;
186
187 node = nasid_to_cnodeid(NASID_GET(maddr));
188
189 dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node);
190
191 if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
192 panic("uncached_free_page invalid address %lx\n", maddr);
193
194 gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE);
195}
196EXPORT_SYMBOL(uncached_free_page);
197
198
199/*
200 * uncached_build_memmap,
201 *
202 * Called at boot time to build a map of pages that can be used for
203 * memory special operations.
204 */
205static int __init
206uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
207{
208 long length;
209 unsigned long vstart, vend;
210 int node;
211
212 length = end - start;
213 vstart = start + __IA64_UNCACHED_OFFSET;
214 vend = end + __IA64_UNCACHED_OFFSET;
215
216 dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
217
218 memset((char *)vstart, 0, length);
219
220 node = nasid_to_cnodeid(NASID_GET(start));
221
222 for (; vstart < vend ; vstart += PAGE_SIZE) {
223 dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
224 gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE);
225 }
226
227 return 0;
228}
229
230
231static int __init uncached_init(void) {
232 int i;
233
234 for (i = 0; i < MAX_NUMNODES; i++) {
235 if (!node_online(i))
236 continue;
237 uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT,
238 &uncached_get_new_chunk, i);
239 }
240
241 efi_memmap_walk_uc(uncached_build_memmap);
242
243 return 0;
244}
245
246__initcall(uncached_init);