diff options
author | Robin Holt <holt@sgi.com> | 2005-04-25 16:13:16 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-04-25 16:13:16 -0400 |
commit | fde740e4dd4a05ca8957490d468fa9b2770f5bd6 (patch) | |
tree | 04bc0221bc6c59379a17f3631fc4bd3c886e1d61 /include/asm-ia64/pgalloc.h | |
parent | ff3eb55ed97db3f12964beeffe3d34602d295367 (diff) |
[IA64] Percpu quicklist for combined allocator for pgd/pmd/pte.
This patch introduces using the quicklists for pgd, pmd, and pte levels
by combining the alloc and free functions into a common set of routines.
This greatly simplifies the reading of this header file.
This patch is simple but necessary for large numa configurations.
It simply ensures that only pages from the local node are added to a
cpus quicklist. This prevents the trapping of pages on a remote nodes
quicklist by starting a process, touching a large number of pages to
fill pmd and pte entries, migrating to another node, and then unmapping
or exiting. With those conditions, the pages get trapped and if the
machine has more than 100 nodes of the same size, the calculation of
the pgtable high water mark will be larger than any single node so page
table cache flushing will never occur.
I ran lmbench lat_proc fork and lat_proc exec on a zx1 with and without
this patch and did not notice any change.
On an sn2 machine, there was a slight improvement which is possibly
due to pages from other nodes trapped on the test node before starting
the run. I did not investigate further.
This patch shrinks the quicklist based upon free memory on the node
instead of the high/low water marks. I have written it to enable
preemption periodically and recalculate the amount to shrink every time
we have freed enough pages that the quicklist size should have grown.
I rescan the nodes zones each pass because other processess may be
draining node memory at the same time as we are adding.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'include/asm-ia64/pgalloc.h')
-rw-r--r-- | include/asm-ia64/pgalloc.h | 144 |
1 files changed, 61 insertions, 83 deletions
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 0f05dc8bd460..e86a8c331ee6 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h | |||
@@ -22,146 +22,124 @@ | |||
22 | 22 | ||
23 | #include <asm/mmu_context.h> | 23 | #include <asm/mmu_context.h> |
24 | 24 | ||
25 | /* | 25 | DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist); |
26 | * Very stupidly, we used to get new pgd's and pmd's, init their contents | 26 | #define pgtable_quicklist __ia64_per_cpu_var(__pgtable_quicklist) |
27 | * to point to the NULL versions of the next level page table, later on | 27 | DECLARE_PER_CPU(long, __pgtable_quicklist_size); |
28 | * completely re-init them the same way, then free them up. This wasted | 28 | #define pgtable_quicklist_size __ia64_per_cpu_var(__pgtable_quicklist_size) |
29 | * a lot of work and caused unnecessary memory traffic. How broken... | ||
30 | * We fix this by caching them. | ||
31 | */ | ||
32 | #define pgd_quicklist (local_cpu_data->pgd_quick) | ||
33 | #define pmd_quicklist (local_cpu_data->pmd_quick) | ||
34 | #define pgtable_cache_size (local_cpu_data->pgtable_cache_sz) | ||
35 | 29 | ||
36 | static inline pgd_t* | 30 | static inline long pgtable_quicklist_total_size(void) |
37 | pgd_alloc_one_fast (struct mm_struct *mm) | 31 | { |
32 | long ql_size; | ||
33 | int cpuid; | ||
34 | |||
35 | for_each_online_cpu(cpuid) { | ||
36 | ql_size += per_cpu(__pgtable_quicklist_size, cpuid); | ||
37 | } | ||
38 | return ql_size; | ||
39 | } | ||
40 | |||
41 | static inline void *pgtable_quicklist_alloc(void) | ||
38 | { | 42 | { |
39 | unsigned long *ret = NULL; | 43 | unsigned long *ret = NULL; |
40 | 44 | ||
41 | preempt_disable(); | 45 | preempt_disable(); |
42 | 46 | ||
43 | ret = pgd_quicklist; | 47 | ret = pgtable_quicklist; |
44 | if (likely(ret != NULL)) { | 48 | if (likely(ret != NULL)) { |
45 | pgd_quicklist = (unsigned long *)(*ret); | 49 | pgtable_quicklist = (unsigned long *)(*ret); |
46 | ret[0] = 0; | 50 | ret[0] = 0; |
47 | --pgtable_cache_size; | 51 | --pgtable_quicklist_size; |
48 | } else | 52 | } else { |
49 | ret = NULL; | 53 | ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
54 | } | ||
50 | 55 | ||
51 | preempt_enable(); | 56 | preempt_enable(); |
52 | 57 | ||
53 | return (pgd_t *) ret; | 58 | return ret; |
54 | } | 59 | } |
55 | 60 | ||
56 | static inline pgd_t* | 61 | static inline void pgtable_quicklist_free(void *pgtable_entry) |
57 | pgd_alloc (struct mm_struct *mm) | ||
58 | { | 62 | { |
59 | /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ | 63 | #ifdef CONFIG_NUMA |
60 | pgd_t *pgd = pgd_alloc_one_fast(mm); | 64 | unsigned long nid = page_to_nid(virt_to_page(pgtable_entry)); |
61 | 65 | ||
62 | if (unlikely(pgd == NULL)) { | 66 | if (unlikely(nid != numa_node_id())) { |
63 | pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); | 67 | free_page((unsigned long)pgtable_entry); |
68 | return; | ||
64 | } | 69 | } |
65 | return pgd; | 70 | #endif |
66 | } | ||
67 | 71 | ||
68 | static inline void | ||
69 | pgd_free (pgd_t *pgd) | ||
70 | { | ||
71 | preempt_disable(); | 72 | preempt_disable(); |
72 | *(unsigned long *)pgd = (unsigned long) pgd_quicklist; | 73 | *(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist; |
73 | pgd_quicklist = (unsigned long *) pgd; | 74 | pgtable_quicklist = (unsigned long *)pgtable_entry; |
74 | ++pgtable_cache_size; | 75 | ++pgtable_quicklist_size; |
75 | preempt_enable(); | 76 | preempt_enable(); |
76 | } | 77 | } |
77 | 78 | ||
78 | static inline void | 79 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) |
79 | pud_populate (struct mm_struct *mm, pud_t *pud_entry, pmd_t *pmd) | ||
80 | { | 80 | { |
81 | pud_val(*pud_entry) = __pa(pmd); | 81 | return pgtable_quicklist_alloc(); |
82 | } | 82 | } |
83 | 83 | ||
84 | static inline pmd_t* | 84 | static inline void pgd_free(pgd_t * pgd) |
85 | pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr) | ||
86 | { | 85 | { |
87 | unsigned long *ret = NULL; | 86 | pgtable_quicklist_free(pgd); |
88 | |||
89 | preempt_disable(); | ||
90 | |||
91 | ret = (unsigned long *)pmd_quicklist; | ||
92 | if (likely(ret != NULL)) { | ||
93 | pmd_quicklist = (unsigned long *)(*ret); | ||
94 | ret[0] = 0; | ||
95 | --pgtable_cache_size; | ||
96 | } | ||
97 | |||
98 | preempt_enable(); | ||
99 | |||
100 | return (pmd_t *)ret; | ||
101 | } | 87 | } |
102 | 88 | ||
103 | static inline pmd_t* | 89 | static inline void |
104 | pmd_alloc_one (struct mm_struct *mm, unsigned long addr) | 90 | pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) |
105 | { | 91 | { |
106 | pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 92 | pud_val(*pud_entry) = __pa(pmd); |
93 | } | ||
107 | 94 | ||
108 | return pmd; | 95 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
96 | { | ||
97 | return pgtable_quicklist_alloc(); | ||
109 | } | 98 | } |
110 | 99 | ||
111 | static inline void | 100 | static inline void pmd_free(pmd_t * pmd) |
112 | pmd_free (pmd_t *pmd) | ||
113 | { | 101 | { |
114 | preempt_disable(); | 102 | pgtable_quicklist_free(pmd); |
115 | *(unsigned long *)pmd = (unsigned long) pmd_quicklist; | ||
116 | pmd_quicklist = (unsigned long *) pmd; | ||
117 | ++pgtable_cache_size; | ||
118 | preempt_enable(); | ||
119 | } | 103 | } |
120 | 104 | ||
121 | #define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) | 105 | #define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) |
122 | 106 | ||
123 | static inline void | 107 | static inline void |
124 | pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, struct page *pte) | 108 | pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte) |
125 | { | 109 | { |
126 | pmd_val(*pmd_entry) = page_to_phys(pte); | 110 | pmd_val(*pmd_entry) = page_to_phys(pte); |
127 | } | 111 | } |
128 | 112 | ||
129 | static inline void | 113 | static inline void |
130 | pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte) | 114 | pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) |
131 | { | 115 | { |
132 | pmd_val(*pmd_entry) = __pa(pte); | 116 | pmd_val(*pmd_entry) = __pa(pte); |
133 | } | 117 | } |
134 | 118 | ||
135 | static inline struct page * | 119 | static inline struct page *pte_alloc_one(struct mm_struct *mm, |
136 | pte_alloc_one (struct mm_struct *mm, unsigned long addr) | 120 | unsigned long addr) |
137 | { | 121 | { |
138 | struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | 122 | return virt_to_page(pgtable_quicklist_alloc()); |
139 | |||
140 | return pte; | ||
141 | } | 123 | } |
142 | 124 | ||
143 | static inline pte_t * | 125 | static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, |
144 | pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr) | 126 | unsigned long addr) |
145 | { | 127 | { |
146 | pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 128 | return pgtable_quicklist_alloc(); |
147 | |||
148 | return pte; | ||
149 | } | 129 | } |
150 | 130 | ||
151 | static inline void | 131 | static inline void pte_free(struct page *pte) |
152 | pte_free (struct page *pte) | ||
153 | { | 132 | { |
154 | __free_page(pte); | 133 | pgtable_quicklist_free(page_address(pte)); |
155 | } | 134 | } |
156 | 135 | ||
157 | static inline void | 136 | static inline void pte_free_kernel(pte_t * pte) |
158 | pte_free_kernel (pte_t *pte) | ||
159 | { | 137 | { |
160 | free_page((unsigned long) pte); | 138 | pgtable_quicklist_free(pte); |
161 | } | 139 | } |
162 | 140 | ||
163 | #define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte)) | 141 | #define __pte_free_tlb(tlb, pte) pte_free(pte) |
164 | 142 | ||
165 | extern void check_pgt_cache (void); | 143 | extern void check_pgt_cache(void); |
166 | 144 | ||
167 | #endif /* _ASM_IA64_PGALLOC_H */ | 145 | #endif /* _ASM_IA64_PGALLOC_H */ |