diff options
author | Tony Luck <tony.luck@intel.com> | 2010-01-07 19:10:57 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2010-01-07 19:10:57 -0500 |
commit | 6c57a332901f851bd092aba7a2b4d8ef4e643829 (patch) | |
tree | 049e913b0c7b4c678ba699ac02f62e16db8cb515 | |
parent | 410dc0aac63d1500faeabcbaecce4f4266380ed1 (diff) |
[IA64] __per_cpu_idtrs[] is a memory hog
__per_cpu_idtrs is statically allocated ... on CONFIG_NR_CPUS=4096
systems it hogs 16MB of memory. This is way too much for a quite
probably unused facility (only KVM uses dynamic TR registers).
Change to an array of pointers, and allocate entries as needed on
a per cpu basis. Change the name too as the __per_cpu_ prefix is
confusing (this isn't a classic <linux/percpu.h> type object).
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | arch/ia64/include/asm/tlb.h | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 5 | ||||
-rw-r--r-- | arch/ia64/mm/tlb.c | 32 |
3 files changed, 24 insertions, 15 deletions
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 85d965cb19a0..23cce999eb1c 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h | |||
@@ -74,7 +74,7 @@ struct ia64_tr_entry { | |||
74 | extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); | 74 | extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); |
75 | extern void ia64_ptr_entry(u64 target_mask, int slot); | 75 | extern void ia64_ptr_entry(u64 target_mask, int slot); |
76 | 76 | ||
77 | extern struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; | 77 | extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; |
78 | 78 | ||
79 | /* | 79 | /* |
80 | region register macros | 80 | region register macros |
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 32f2639e9b0a..378b4833024f 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -1225,9 +1225,12 @@ static void mca_insert_tr(u64 iord) | |||
1225 | unsigned long psr; | 1225 | unsigned long psr; |
1226 | int cpu = smp_processor_id(); | 1226 | int cpu = smp_processor_id(); |
1227 | 1227 | ||
1228 | if (!ia64_idtrs[cpu]) | ||
1229 | return; | ||
1230 | |||
1228 | psr = ia64_clear_ic(); | 1231 | psr = ia64_clear_ic(); |
1229 | for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { | 1232 | for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { |
1230 | p = &__per_cpu_idtrs[cpu][iord-1][i]; | 1233 | p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX; |
1231 | if (p->pte & 0x1) { | 1234 | if (p->pte & 0x1) { |
1232 | old_rr = ia64_get_rr(p->ifa); | 1235 | old_rr = ia64_get_rr(p->ifa); |
1233 | if (old_rr != p->rr) { | 1236 | if (old_rr != p->rr) { |
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index ee09d261f2e6..f3de9d7a98b4 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
@@ -48,7 +48,7 @@ DEFINE_PER_CPU(u8, ia64_need_tlb_flush); | |||
48 | DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ | 48 | DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ |
49 | DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ | 49 | DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ |
50 | 50 | ||
51 | struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; | 51 | struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Initializes the ia64_ctx.bitmap array based on max_ctx+1. | 54 | * Initializes the ia64_ctx.bitmap array based on max_ctx+1. |
@@ -429,10 +429,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) | |||
429 | struct ia64_tr_entry *p; | 429 | struct ia64_tr_entry *p; |
430 | int cpu = smp_processor_id(); | 430 | int cpu = smp_processor_id(); |
431 | 431 | ||
432 | if (!ia64_idtrs[cpu]) { | ||
433 | ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX * | ||
434 | sizeof (struct ia64_tr_entry), GFP_KERNEL); | ||
435 | if (!ia64_idtrs[cpu]) | ||
436 | return -ENOMEM; | ||
437 | } | ||
432 | r = -EINVAL; | 438 | r = -EINVAL; |
433 | /*Check overlap with existing TR entries*/ | 439 | /*Check overlap with existing TR entries*/ |
434 | if (target_mask & 0x1) { | 440 | if (target_mask & 0x1) { |
435 | p = &__per_cpu_idtrs[cpu][0][0]; | 441 | p = ia64_idtrs[cpu]; |
436 | for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); | 442 | for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); |
437 | i++, p++) { | 443 | i++, p++) { |
438 | if (p->pte & 0x1) | 444 | if (p->pte & 0x1) |
@@ -444,7 +450,7 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) | |||
444 | } | 450 | } |
445 | } | 451 | } |
446 | if (target_mask & 0x2) { | 452 | if (target_mask & 0x2) { |
447 | p = &__per_cpu_idtrs[cpu][1][0]; | 453 | p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; |
448 | for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); | 454 | for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); |
449 | i++, p++) { | 455 | i++, p++) { |
450 | if (p->pte & 0x1) | 456 | if (p->pte & 0x1) |
@@ -459,16 +465,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) | |||
459 | for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { | 465 | for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { |
460 | switch (target_mask & 0x3) { | 466 | switch (target_mask & 0x3) { |
461 | case 1: | 467 | case 1: |
462 | if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1)) | 468 | if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) |
463 | goto found; | 469 | goto found; |
464 | continue; | 470 | continue; |
465 | case 2: | 471 | case 2: |
466 | if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) | 472 | if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) |
467 | goto found; | 473 | goto found; |
468 | continue; | 474 | continue; |
469 | case 3: | 475 | case 3: |
470 | if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) && | 476 | if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && |
471 | !(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) | 477 | !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) |
472 | goto found; | 478 | goto found; |
473 | continue; | 479 | continue; |
474 | default: | 480 | default: |
@@ -488,7 +494,7 @@ found: | |||
488 | if (target_mask & 0x1) { | 494 | if (target_mask & 0x1) { |
489 | ia64_itr(0x1, i, va, pte, log_size); | 495 | ia64_itr(0x1, i, va, pte, log_size); |
490 | ia64_srlz_i(); | 496 | ia64_srlz_i(); |
491 | p = &__per_cpu_idtrs[cpu][0][i]; | 497 | p = ia64_idtrs[cpu] + i; |
492 | p->ifa = va; | 498 | p->ifa = va; |
493 | p->pte = pte; | 499 | p->pte = pte; |
494 | p->itir = log_size << 2; | 500 | p->itir = log_size << 2; |
@@ -497,7 +503,7 @@ found: | |||
497 | if (target_mask & 0x2) { | 503 | if (target_mask & 0x2) { |
498 | ia64_itr(0x2, i, va, pte, log_size); | 504 | ia64_itr(0x2, i, va, pte, log_size); |
499 | ia64_srlz_i(); | 505 | ia64_srlz_i(); |
500 | p = &__per_cpu_idtrs[cpu][1][i]; | 506 | p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; |
501 | p->ifa = va; | 507 | p->ifa = va; |
502 | p->pte = pte; | 508 | p->pte = pte; |
503 | p->itir = log_size << 2; | 509 | p->itir = log_size << 2; |
@@ -528,7 +534,7 @@ void ia64_ptr_entry(u64 target_mask, int slot) | |||
528 | return; | 534 | return; |
529 | 535 | ||
530 | if (target_mask & 0x1) { | 536 | if (target_mask & 0x1) { |
531 | p = &__per_cpu_idtrs[cpu][0][slot]; | 537 | p = ia64_idtrs[cpu] + slot; |
532 | if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { | 538 | if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { |
533 | p->pte = 0; | 539 | p->pte = 0; |
534 | ia64_ptr(0x1, p->ifa, p->itir>>2); | 540 | ia64_ptr(0x1, p->ifa, p->itir>>2); |
@@ -537,7 +543,7 @@ void ia64_ptr_entry(u64 target_mask, int slot) | |||
537 | } | 543 | } |
538 | 544 | ||
539 | if (target_mask & 0x2) { | 545 | if (target_mask & 0x2) { |
540 | p = &__per_cpu_idtrs[cpu][1][slot]; | 546 | p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; |
541 | if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { | 547 | if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { |
542 | p->pte = 0; | 548 | p->pte = 0; |
543 | ia64_ptr(0x2, p->ifa, p->itir>>2); | 549 | ia64_ptr(0x2, p->ifa, p->itir>>2); |
@@ -546,8 +552,8 @@ void ia64_ptr_entry(u64 target_mask, int slot) | |||
546 | } | 552 | } |
547 | 553 | ||
548 | for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { | 554 | for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { |
549 | if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) || | 555 | if (((ia64_idtrs[cpu] + i)->pte & 0x1) || |
550 | (__per_cpu_idtrs[cpu][1][i].pte & 0x1)) | 556 | ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) |
551 | break; | 557 | break; |
552 | } | 558 | } |
553 | per_cpu(ia64_tr_used, cpu) = i; | 559 | per_cpu(ia64_tr_used, cpu) = i; |