aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2010-01-07 19:10:57 -0500
committerTony Luck <tony.luck@intel.com>2010-01-07 19:10:57 -0500
commit6c57a332901f851bd092aba7a2b4d8ef4e643829 (patch)
tree049e913b0c7b4c678ba699ac02f62e16db8cb515
parent410dc0aac63d1500faeabcbaecce4f4266380ed1 (diff)
[IA64] __per_cpu_idtrs[] is a memory hog
__per_cpu_idtrs is statically allocated ... on CONFIG_NR_CPUS=4096 systems it hogs 16MB of memory. This is way too much for a quite probably unused facility (only KVM uses dynamic TR registers). Change to an array of pointers, and allocate entries as needed on a per cpu basis. Change the name too as the __per_cpu_ prefix is confusing (this isn't a classic <linux/percpu.h> type object). Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/include/asm/tlb.h2
-rw-r--r--arch/ia64/kernel/mca.c5
-rw-r--r--arch/ia64/mm/tlb.c32
3 files changed, 24 insertions, 15 deletions
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 85d965cb19a0..23cce999eb1c 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -74,7 +74,7 @@ struct ia64_tr_entry {
74extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); 74extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
75extern void ia64_ptr_entry(u64 target_mask, int slot); 75extern void ia64_ptr_entry(u64 target_mask, int slot);
76 76
77extern struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; 77extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
78 78
79/* 79/*
80 region register macros 80 region register macros
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 32f2639e9b0a..378b4833024f 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1225,9 +1225,12 @@ static void mca_insert_tr(u64 iord)
1225 unsigned long psr; 1225 unsigned long psr;
1226 int cpu = smp_processor_id(); 1226 int cpu = smp_processor_id();
1227 1227
1228 if (!ia64_idtrs[cpu])
1229 return;
1230
1228 psr = ia64_clear_ic(); 1231 psr = ia64_clear_ic();
1229 for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { 1232 for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
1230 p = &__per_cpu_idtrs[cpu][iord-1][i]; 1233 p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX;
1231 if (p->pte & 0x1) { 1234 if (p->pte & 0x1) {
1232 old_rr = ia64_get_rr(p->ifa); 1235 old_rr = ia64_get_rr(p->ifa);
1233 if (old_rr != p->rr) { 1236 if (old_rr != p->rr) {
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index ee09d261f2e6..f3de9d7a98b4 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -48,7 +48,7 @@ DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
48DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ 48DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/
49DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ 49DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
50 50
51struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; 51struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
52 52
53/* 53/*
54 * Initializes the ia64_ctx.bitmap array based on max_ctx+1. 54 * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -429,10 +429,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
429 struct ia64_tr_entry *p; 429 struct ia64_tr_entry *p;
430 int cpu = smp_processor_id(); 430 int cpu = smp_processor_id();
431 431
432 if (!ia64_idtrs[cpu]) {
433 ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
434 sizeof (struct ia64_tr_entry), GFP_KERNEL);
435 if (!ia64_idtrs[cpu])
436 return -ENOMEM;
437 }
432 r = -EINVAL; 438 r = -EINVAL;
433 /*Check overlap with existing TR entries*/ 439 /*Check overlap with existing TR entries*/
434 if (target_mask & 0x1) { 440 if (target_mask & 0x1) {
435 p = &__per_cpu_idtrs[cpu][0][0]; 441 p = ia64_idtrs[cpu];
436 for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); 442 for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
437 i++, p++) { 443 i++, p++) {
438 if (p->pte & 0x1) 444 if (p->pte & 0x1)
@@ -444,7 +450,7 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
444 } 450 }
445 } 451 }
446 if (target_mask & 0x2) { 452 if (target_mask & 0x2) {
447 p = &__per_cpu_idtrs[cpu][1][0]; 453 p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
448 for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); 454 for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
449 i++, p++) { 455 i++, p++) {
450 if (p->pte & 0x1) 456 if (p->pte & 0x1)
@@ -459,16 +465,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
459 for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { 465 for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
460 switch (target_mask & 0x3) { 466 switch (target_mask & 0x3) {
461 case 1: 467 case 1:
462 if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1)) 468 if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
463 goto found; 469 goto found;
464 continue; 470 continue;
465 case 2: 471 case 2:
466 if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) 472 if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
467 goto found; 473 goto found;
468 continue; 474 continue;
469 case 3: 475 case 3:
470 if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) && 476 if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
471 !(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) 477 !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
472 goto found; 478 goto found;
473 continue; 479 continue;
474 default: 480 default:
@@ -488,7 +494,7 @@ found:
488 if (target_mask & 0x1) { 494 if (target_mask & 0x1) {
489 ia64_itr(0x1, i, va, pte, log_size); 495 ia64_itr(0x1, i, va, pte, log_size);
490 ia64_srlz_i(); 496 ia64_srlz_i();
491 p = &__per_cpu_idtrs[cpu][0][i]; 497 p = ia64_idtrs[cpu] + i;
492 p->ifa = va; 498 p->ifa = va;
493 p->pte = pte; 499 p->pte = pte;
494 p->itir = log_size << 2; 500 p->itir = log_size << 2;
@@ -497,7 +503,7 @@ found:
497 if (target_mask & 0x2) { 503 if (target_mask & 0x2) {
498 ia64_itr(0x2, i, va, pte, log_size); 504 ia64_itr(0x2, i, va, pte, log_size);
499 ia64_srlz_i(); 505 ia64_srlz_i();
500 p = &__per_cpu_idtrs[cpu][1][i]; 506 p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
501 p->ifa = va; 507 p->ifa = va;
502 p->pte = pte; 508 p->pte = pte;
503 p->itir = log_size << 2; 509 p->itir = log_size << 2;
@@ -528,7 +534,7 @@ void ia64_ptr_entry(u64 target_mask, int slot)
528 return; 534 return;
529 535
530 if (target_mask & 0x1) { 536 if (target_mask & 0x1) {
531 p = &__per_cpu_idtrs[cpu][0][slot]; 537 p = ia64_idtrs[cpu] + slot;
532 if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { 538 if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
533 p->pte = 0; 539 p->pte = 0;
534 ia64_ptr(0x1, p->ifa, p->itir>>2); 540 ia64_ptr(0x1, p->ifa, p->itir>>2);
@@ -537,7 +543,7 @@ void ia64_ptr_entry(u64 target_mask, int slot)
537 } 543 }
538 544
539 if (target_mask & 0x2) { 545 if (target_mask & 0x2) {
540 p = &__per_cpu_idtrs[cpu][1][slot]; 546 p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
541 if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { 547 if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
542 p->pte = 0; 548 p->pte = 0;
543 ia64_ptr(0x2, p->ifa, p->itir>>2); 549 ia64_ptr(0x2, p->ifa, p->itir>>2);
@@ -546,8 +552,8 @@ void ia64_ptr_entry(u64 target_mask, int slot)
546 } 552 }
547 553
548 for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { 554 for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
549 if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) || 555 if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
550 (__per_cpu_idtrs[cpu][1][i].pte & 0x1)) 556 ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
551 break; 557 break;
552 } 558 }
553 per_cpu(ia64_tr_used, cpu) = i; 559 per_cpu(ia64_tr_used, cpu) = i;