aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2006-11-29 06:27:42 -0500
committerPaul Mackerras <paulus@samba.org>2006-12-10 21:49:49 -0500
commit0204568a088fecd5478153504f9476ee2c46d5bf (patch)
tree6c22d9590bbedd3b4ba22e53737c37c6862f5e48
parenta223535425eb28082a0925b0ce2f02f962936cf4 (diff)
[POWERPC] Support ibm,dynamic-reconfiguration-memory nodes
For PAPR partitions with large amounts of memory, the firmware has an alternative, more compact representation for the information about the memory in the partition and its NUMA associativity information. This adds the code to the kernel to parse this alternative representation. The other part of this patch is telling the firmware that we can handle the alternative representation. There is however a subtlety here, because the firmware will invoke a reboot if the memory representation we request is different from the representation that firmware is currently using. This is because firmware can't change the representation on the fly. Further, some firmware versions used on POWER5+ machines have a bug where this reboot leaves the machine with an altered value of load-base, which will prevent any kernel booting until it is reset to the normal value (0x4000). Because of this bug, we do NOT set fake_elf.rpanote.new_mem_def = 1, and thus we do not request the new representation on POWER5+ and earlier machines. We do request the new representation on POWER6, which uses the ibm,client-architecture-support call. Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/kernel/prom.c55
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/mm/numa.c65
3 files changed, 121 insertions, 1 deletions
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c18dbe77fdc2..1fc732a552db 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -804,6 +804,56 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
804 return of_read_ulong(p, s); 804 return of_read_ulong(p, s);
805} 805}
806 806
807#ifdef CONFIG_PPC_PSERIES
808/*
809 * Interpret the ibm,dynamic-memory property in the
810 * /ibm,dynamic-reconfiguration-memory node.
811 * This contains a list of memory blocks along with NUMA affinity
812 * information.
813 */
814static int __init early_init_dt_scan_drconf_memory(unsigned long node)
815{
816 cell_t *dm, *ls;
817 unsigned long l, n;
818 unsigned long base, size, lmb_size, flags;
819
820 ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
821 if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t))
822 return 0;
823 lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls);
824
825 dm = (cell_t *)of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
826 if (dm == NULL || l < sizeof(cell_t))
827 return 0;
828
829 n = *dm++; /* number of entries */
830 if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(cell_t))
831 return 0;
832
833 for (; n != 0; --n) {
834 base = dt_mem_next_cell(dt_root_addr_cells, &dm);
835 flags = dm[3];
836 /* skip DRC index, pad, assoc. list index, flags */
837 dm += 4;
838 /* skip this block if the reserved bit is set in flags (0x80)
839 or if the block is not assigned to this partition (0x8) */
840 if ((flags & 0x80) || !(flags & 0x8))
841 continue;
842 size = lmb_size;
843 if (iommu_is_off) {
844 if (base >= 0x80000000ul)
845 continue;
846 if ((base + size) > 0x80000000ul)
847 size = 0x80000000ul - base;
848 }
849 lmb_add(base, size);
850 }
851 lmb_dump_all();
852 return 0;
853}
854#else
855#define early_init_dt_scan_drconf_memory(node) 0
856#endif /* CONFIG_PPC_PSERIES */
807 857
808static int __init early_init_dt_scan_memory(unsigned long node, 858static int __init early_init_dt_scan_memory(unsigned long node,
809 const char *uname, int depth, void *data) 859 const char *uname, int depth, void *data)
@@ -812,6 +862,11 @@ static int __init early_init_dt_scan_memory(unsigned long node,
812 cell_t *reg, *endp; 862 cell_t *reg, *endp;
813 unsigned long l; 863 unsigned long l;
814 864
865 /* Look for the ibm,dynamic-reconfiguration-memory node */
866 if (depth == 1 &&
867 strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
868 return early_init_dt_scan_drconf_memory(node);
869
815 /* We are scanning "memory" nodes only */ 870 /* We are scanning "memory" nodes only */
816 if (type == NULL) { 871 if (type == NULL) {
817 /* 872 /*
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 46cf32670ddb..520ef42f642e 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -679,7 +679,7 @@ static unsigned char ibm_architecture_vec[] = {
679 /* option vector 5: PAPR/OF options */ 679 /* option vector 5: PAPR/OF options */
680 3 - 2, /* length */ 680 3 - 2, /* length */
681 0, /* don't ignore, don't halt */ 681 0, /* don't ignore, don't halt */
682 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES, 682 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY,
683}; 683};
684 684
685/* Old method - ELF header with PT_NOTE sections */ 685/* Old method - ELF header with PT_NOTE sections */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9da01dc8cfd9..262790910ff2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -295,6 +295,63 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
295 return lmb_end_of_DRAM() - start; 295 return lmb_end_of_DRAM() - start;
296} 296}
297 297
298/*
299 * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
300 * node. This assumes n_mem_{addr,size}_cells have been set.
301 */
302static void __init parse_drconf_memory(struct device_node *memory)
303{
304 const unsigned int *lm, *dm, *aa;
305 unsigned int ls, ld, la;
306 unsigned int n, aam, aalen;
307 unsigned long lmb_size, size;
308 int nid, default_nid = 0;
309 unsigned int start, ai, flags;
310
311 lm = get_property(memory, "ibm,lmb-size", &ls);
312 dm = get_property(memory, "ibm,dynamic-memory", &ld);
313 aa = get_property(memory, "ibm,associativity-lookup-arrays", &la);
314 if (!lm || !dm || !aa ||
315 ls < sizeof(unsigned int) || ld < sizeof(unsigned int) ||
316 la < 2 * sizeof(unsigned int))
317 return;
318
319 lmb_size = read_n_cells(n_mem_size_cells, &lm);
320 n = *dm++; /* number of LMBs */
321 aam = *aa++; /* number of associativity lists */
322 aalen = *aa++; /* length of each associativity list */
323 if (ld < (n * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int) ||
324 la < (aam * aalen + 2) * sizeof(unsigned int))
325 return;
326
327 for (; n != 0; --n) {
328 start = read_n_cells(n_mem_addr_cells, &dm);
329 ai = dm[2];
330 flags = dm[3];
331 dm += 4;
332 /* 0x80 == reserved, 0x8 = assigned to us */
333 if ((flags & 0x80) || !(flags & 0x8))
334 continue;
335 nid = default_nid;
336 /* flags & 0x40 means associativity index is invalid */
337 if (min_common_depth > 0 && min_common_depth <= aalen &&
338 (flags & 0x40) == 0 && ai < aam) {
339 /* this is like of_node_to_nid_single */
340 nid = aa[ai * aalen + min_common_depth - 1];
341 if (nid == 0xffff || nid >= MAX_NUMNODES)
342 nid = default_nid;
343 }
344 node_set_online(nid);
345
346 size = numa_enforce_memory_limit(start, lmb_size);
347 if (!size)
348 continue;
349
350 add_active_range(nid, start >> PAGE_SHIFT,
351 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
352 }
353}
354
298static int __init parse_numa_properties(void) 355static int __init parse_numa_properties(void)
299{ 356{
300 struct device_node *cpu = NULL; 357 struct device_node *cpu = NULL;
@@ -385,6 +442,14 @@ new_range:
385 goto new_range; 442 goto new_range;
386 } 443 }
387 444
445 /*
446 * Now do the same thing for each LMB listed in the ibm,dynamic-memory
447 * property in the ibm,dynamic-reconfiguration-memory node.
448 */
449 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
450 if (memory)
451 parse_drconf_memory(memory);
452
388 return 0; 453 return 0;
389} 454}
390 455