diff options
author | Dave Jones <davej@redhat.com> | 2006-09-05 17:20:21 -0400 |
---|---|---|
committer | Dave Jones <davej@redhat.com> | 2006-09-05 17:20:21 -0400 |
commit | 115b384cf87249d76adb0b21aca11ee22128927d (patch) | |
tree | f39a2a54863e9d82d1196906f92c82ab5991c6af /mm | |
parent | 8eb7925f93af75e66a240d148efdec212f95bcb7 (diff) | |
parent | c336923b668fdcf0312efbec3b44895d713f4d81 (diff) |
Merge ../linus
Diffstat (limited to 'mm')
-rw-r--r-- | mm/fadvise.c | 3 | ||||
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 44 | ||||
-rw-r--r-- | mm/mempolicy.c | 10 | ||||
-rw-r--r-- | mm/mempool.c | 9 | ||||
-rw-r--r-- | mm/slab.c | 4 | ||||
-rw-r--r-- | mm/swap.c | 20 | ||||
-rw-r--r-- | mm/swapfile.c | 3 | ||||
-rw-r--r-- | mm/vmstat.c | 151 |
9 files changed, 196 insertions, 50 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c index 60a5d55e51d9..168c78a121bb 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c | |||
@@ -73,7 +73,6 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) | |||
73 | file->f_ra.ra_pages = bdi->ra_pages * 2; | 73 | file->f_ra.ra_pages = bdi->ra_pages * 2; |
74 | break; | 74 | break; |
75 | case POSIX_FADV_WILLNEED: | 75 | case POSIX_FADV_WILLNEED: |
76 | case POSIX_FADV_NOREUSE: | ||
77 | if (!mapping->a_ops->readpage) { | 76 | if (!mapping->a_ops->readpage) { |
78 | ret = -EINVAL; | 77 | ret = -EINVAL; |
79 | break; | 78 | break; |
@@ -94,6 +93,8 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) | |||
94 | if (ret > 0) | 93 | if (ret > 0) |
95 | ret = 0; | 94 | ret = 0; |
96 | break; | 95 | break; |
96 | case POSIX_FADV_NOREUSE: | ||
97 | break; | ||
97 | case POSIX_FADV_DONTNEED: | 98 | case POSIX_FADV_DONTNEED: |
98 | if (!bdi_write_congested(mapping->backing_dev_info)) | 99 | if (!bdi_write_congested(mapping->backing_dev_info)) |
99 | filemap_flush(mapping); | 100 | filemap_flush(mapping); |
diff --git a/mm/filemap.c b/mm/filemap.c index d087fc3d3281..b9a60c43b61a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -849,8 +849,6 @@ static void shrink_readahead_size_eio(struct file *filp, | |||
849 | return; | 849 | return; |
850 | 850 | ||
851 | ra->ra_pages /= 4; | 851 | ra->ra_pages /= 4; |
852 | printk(KERN_WARNING "Reducing readahead size to %luK\n", | ||
853 | ra->ra_pages << (PAGE_CACHE_SHIFT - 10)); | ||
854 | } | 852 | } |
855 | 853 | ||
856 | /** | 854 | /** |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 01c9fb97c619..c37319542b70 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -52,6 +52,9 @@ static int __add_section(struct zone *zone, unsigned long phys_start_pfn) | |||
52 | int nr_pages = PAGES_PER_SECTION; | 52 | int nr_pages = PAGES_PER_SECTION; |
53 | int ret; | 53 | int ret; |
54 | 54 | ||
55 | if (pfn_valid(phys_start_pfn)) | ||
56 | return -EEXIST; | ||
57 | |||
55 | ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); | 58 | ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); |
56 | 59 | ||
57 | if (ret < 0) | 60 | if (ret < 0) |
@@ -76,15 +79,22 @@ int __add_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
76 | { | 79 | { |
77 | unsigned long i; | 80 | unsigned long i; |
78 | int err = 0; | 81 | int err = 0; |
82 | int start_sec, end_sec; | ||
83 | /* during initialize mem_map, align hot-added range to section */ | ||
84 | start_sec = pfn_to_section_nr(phys_start_pfn); | ||
85 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | ||
79 | 86 | ||
80 | for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) { | 87 | for (i = start_sec; i <= end_sec; i++) { |
81 | err = __add_section(zone, phys_start_pfn + i); | 88 | err = __add_section(zone, i << PFN_SECTION_SHIFT); |
82 | 89 | ||
83 | /* We want to keep adding the rest of the | 90 | /* |
84 | * sections if the first ones already exist | 91 | * EEXIST is finally dealed with by ioresource collision |
92 | * check. see add_memory() => register_memory_resource() | ||
93 | * Warning will be printed if there is collision. | ||
85 | */ | 94 | */ |
86 | if (err && (err != -EEXIST)) | 95 | if (err && (err != -EEXIST)) |
87 | break; | 96 | break; |
97 | err = 0; | ||
88 | } | 98 | } |
89 | 99 | ||
90 | return err; | 100 | return err; |
@@ -156,7 +166,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
156 | res.flags = IORESOURCE_MEM; /* we just need system ram */ | 166 | res.flags = IORESOURCE_MEM; /* we just need system ram */ |
157 | section_end = res.end; | 167 | section_end = res.end; |
158 | 168 | ||
159 | while (find_next_system_ram(&res) >= 0) { | 169 | while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) { |
160 | start_pfn = (unsigned long)(res.start >> PAGE_SHIFT); | 170 | start_pfn = (unsigned long)(res.start >> PAGE_SHIFT); |
161 | nr_pages = (unsigned long) | 171 | nr_pages = (unsigned long) |
162 | ((res.end + 1 - res.start) >> PAGE_SHIFT); | 172 | ((res.end + 1 - res.start) >> PAGE_SHIFT); |
@@ -213,10 +223,9 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat) | |||
213 | } | 223 | } |
214 | 224 | ||
215 | /* add this memory to iomem resource */ | 225 | /* add this memory to iomem resource */ |
216 | static void register_memory_resource(u64 start, u64 size) | 226 | static struct resource *register_memory_resource(u64 start, u64 size) |
217 | { | 227 | { |
218 | struct resource *res; | 228 | struct resource *res; |
219 | |||
220 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); | 229 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); |
221 | BUG_ON(!res); | 230 | BUG_ON(!res); |
222 | 231 | ||
@@ -228,7 +237,18 @@ static void register_memory_resource(u64 start, u64 size) | |||
228 | printk("System RAM resource %llx - %llx cannot be added\n", | 237 | printk("System RAM resource %llx - %llx cannot be added\n", |
229 | (unsigned long long)res->start, (unsigned long long)res->end); | 238 | (unsigned long long)res->start, (unsigned long long)res->end); |
230 | kfree(res); | 239 | kfree(res); |
240 | res = NULL; | ||
231 | } | 241 | } |
242 | return res; | ||
243 | } | ||
244 | |||
245 | static void release_memory_resource(struct resource *res) | ||
246 | { | ||
247 | if (!res) | ||
248 | return; | ||
249 | release_resource(res); | ||
250 | kfree(res); | ||
251 | return; | ||
232 | } | 252 | } |
233 | 253 | ||
234 | 254 | ||
@@ -237,8 +257,13 @@ int add_memory(int nid, u64 start, u64 size) | |||
237 | { | 257 | { |
238 | pg_data_t *pgdat = NULL; | 258 | pg_data_t *pgdat = NULL; |
239 | int new_pgdat = 0; | 259 | int new_pgdat = 0; |
260 | struct resource *res; | ||
240 | int ret; | 261 | int ret; |
241 | 262 | ||
263 | res = register_memory_resource(start, size); | ||
264 | if (!res) | ||
265 | return -EEXIST; | ||
266 | |||
242 | if (!node_online(nid)) { | 267 | if (!node_online(nid)) { |
243 | pgdat = hotadd_new_pgdat(nid, start); | 268 | pgdat = hotadd_new_pgdat(nid, start); |
244 | if (!pgdat) | 269 | if (!pgdat) |
@@ -268,14 +293,13 @@ int add_memory(int nid, u64 start, u64 size) | |||
268 | BUG_ON(ret); | 293 | BUG_ON(ret); |
269 | } | 294 | } |
270 | 295 | ||
271 | /* register this memory as resource */ | ||
272 | register_memory_resource(start, size); | ||
273 | |||
274 | return ret; | 296 | return ret; |
275 | error: | 297 | error: |
276 | /* rollback pgdat allocation and others */ | 298 | /* rollback pgdat allocation and others */ |
277 | if (new_pgdat) | 299 | if (new_pgdat) |
278 | rollback_node_hotadd(nid, pgdat); | 300 | rollback_node_hotadd(nid, pgdat); |
301 | if (res) | ||
302 | release_memory_resource(res); | ||
279 | 303 | ||
280 | return ret; | 304 | return ret; |
281 | } | 305 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e07e27e846a2..a9963ceddd65 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1176,7 +1176,15 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
1176 | if (vma) { | 1176 | if (vma) { |
1177 | unsigned long off; | 1177 | unsigned long off; |
1178 | 1178 | ||
1179 | off = vma->vm_pgoff; | 1179 | /* |
1180 | * for small pages, there is no difference between | ||
1181 | * shift and PAGE_SHIFT, so the bit-shift is safe. | ||
1182 | * for huge pages, since vm_pgoff is in units of small | ||
1183 | * pages, we need to shift off the always 0 bits to get | ||
1184 | * a useful offset. | ||
1185 | */ | ||
1186 | BUG_ON(shift < PAGE_SHIFT); | ||
1187 | off = vma->vm_pgoff >> (shift - PAGE_SHIFT); | ||
1180 | off += (addr - vma->vm_start) >> shift; | 1188 | off += (addr - vma->vm_start) >> shift; |
1181 | return offset_il_node(pol, vma, off); | 1189 | return offset_il_node(pol, vma, off); |
1182 | } else | 1190 | } else |
diff --git a/mm/mempool.c b/mm/mempool.c index fe6e05289cc5..ccd8cb8cd41f 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -238,8 +238,13 @@ repeat_alloc: | |||
238 | init_wait(&wait); | 238 | init_wait(&wait); |
239 | prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); | 239 | prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); |
240 | smp_mb(); | 240 | smp_mb(); |
241 | if (!pool->curr_nr) | 241 | if (!pool->curr_nr) { |
242 | io_schedule(); | 242 | /* |
243 | * FIXME: this should be io_schedule(). The timeout is there | ||
244 | * as a workaround for some DM problems in 2.6.18. | ||
245 | */ | ||
246 | io_schedule_timeout(5*HZ); | ||
247 | } | ||
243 | finish_wait(&pool->wait, &wait); | 248 | finish_wait(&pool->wait, &wait); |
244 | 249 | ||
245 | goto repeat_alloc; | 250 | goto repeat_alloc; |
@@ -1106,7 +1106,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1106 | 1106 | ||
1107 | #endif | 1107 | #endif |
1108 | 1108 | ||
1109 | static int __devinit cpuup_callback(struct notifier_block *nfb, | 1109 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, |
1110 | unsigned long action, void *hcpu) | 1110 | unsigned long action, void *hcpu) |
1111 | { | 1111 | { |
1112 | long cpu = (long)hcpu; | 1112 | long cpu = (long)hcpu; |
@@ -3224,7 +3224,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3224 | EXPORT_SYMBOL(kmem_cache_alloc); | 3224 | EXPORT_SYMBOL(kmem_cache_alloc); |
3225 | 3225 | ||
3226 | /** | 3226 | /** |
3227 | * kmem_cache_alloc - Allocate an object. The memory is set to zero. | 3227 | * kmem_cache_zalloc - Allocate an object. The memory is set to zero. |
3228 | * @cache: The cache to allocate from. | 3228 | * @cache: The cache to allocate from. |
3229 | * @flags: See kmalloc(). | 3229 | * @flags: See kmalloc(). |
3230 | * | 3230 | * |
@@ -54,6 +54,26 @@ void put_page(struct page *page) | |||
54 | } | 54 | } |
55 | EXPORT_SYMBOL(put_page); | 55 | EXPORT_SYMBOL(put_page); |
56 | 56 | ||
57 | /** | ||
58 | * put_pages_list(): release a list of pages | ||
59 | * | ||
60 | * Release a list of pages which are strung together on page.lru. Currently | ||
61 | * used by read_cache_pages() and related error recovery code. | ||
62 | * | ||
63 | * @pages: list of pages threaded on page->lru | ||
64 | */ | ||
65 | void put_pages_list(struct list_head *pages) | ||
66 | { | ||
67 | while (!list_empty(pages)) { | ||
68 | struct page *victim; | ||
69 | |||
70 | victim = list_entry(pages->prev, struct page, lru); | ||
71 | list_del(&victim->lru); | ||
72 | page_cache_release(victim); | ||
73 | } | ||
74 | } | ||
75 | EXPORT_SYMBOL(put_pages_list); | ||
76 | |||
57 | /* | 77 | /* |
58 | * Writeback is about to end against a page which has been marked for immediate | 78 | * Writeback is about to end against a page which has been marked for immediate |
59 | * reclaim. If it still appears to be reclaimable, move it to the tail of the | 79 | * reclaim. If it still appears to be reclaimable, move it to the tail of the |
diff --git a/mm/swapfile.c b/mm/swapfile.c index e70d6c6d6fee..f1f5ec783781 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -442,11 +442,12 @@ int swap_type_of(dev_t device) | |||
442 | 442 | ||
443 | if (!(swap_info[i].flags & SWP_WRITEOK)) | 443 | if (!(swap_info[i].flags & SWP_WRITEOK)) |
444 | continue; | 444 | continue; |
445 | |||
445 | if (!device) { | 446 | if (!device) { |
446 | spin_unlock(&swap_lock); | 447 | spin_unlock(&swap_lock); |
447 | return i; | 448 | return i; |
448 | } | 449 | } |
449 | inode = swap_info->swap_file->f_dentry->d_inode; | 450 | inode = swap_info[i].swap_file->f_dentry->d_inode; |
450 | if (S_ISBLK(inode->i_mode) && | 451 | if (S_ISBLK(inode->i_mode) && |
451 | device == MKDEV(imajor(inode), iminor(inode))) { | 452 | device == MKDEV(imajor(inode), iminor(inode))) { |
452 | spin_unlock(&swap_lock); | 453 | spin_unlock(&swap_lock); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index dfdf24133901..c1b5f4106b38 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/config.h> | 12 | #include <linux/config.h> |
13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/cpu.h> | ||
15 | 16 | ||
16 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | 17 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, |
17 | unsigned long *free, struct pglist_data *pgdat) | 18 | unsigned long *free, struct pglist_data *pgdat) |
@@ -114,17 +115,72 @@ EXPORT_SYMBOL(vm_stat); | |||
114 | 115 | ||
115 | #ifdef CONFIG_SMP | 116 | #ifdef CONFIG_SMP |
116 | 117 | ||
117 | #define STAT_THRESHOLD 32 | 118 | static int calculate_threshold(struct zone *zone) |
119 | { | ||
120 | int threshold; | ||
121 | int mem; /* memory in 128 MB units */ | ||
122 | |||
123 | /* | ||
124 | * The threshold scales with the number of processors and the amount | ||
125 | * of memory per zone. More memory means that we can defer updates for | ||
126 | * longer, more processors could lead to more contention. | ||
127 | * fls() is used to have a cheap way of logarithmic scaling. | ||
128 | * | ||
129 | * Some sample thresholds: | ||
130 | * | ||
131 | * Threshold Processors (fls) Zonesize fls(mem+1) | ||
132 | * ------------------------------------------------------------------ | ||
133 | * 8 1 1 0.9-1 GB 4 | ||
134 | * 16 2 2 0.9-1 GB 4 | ||
135 | * 20 2 2 1-2 GB 5 | ||
136 | * 24 2 2 2-4 GB 6 | ||
137 | * 28 2 2 4-8 GB 7 | ||
138 | * 32 2 2 8-16 GB 8 | ||
139 | * 4 2 2 <128M 1 | ||
140 | * 30 4 3 2-4 GB 5 | ||
141 | * 48 4 3 8-16 GB 8 | ||
142 | * 32 8 4 1-2 GB 4 | ||
143 | * 32 8 4 0.9-1GB 4 | ||
144 | * 10 16 5 <128M 1 | ||
145 | * 40 16 5 900M 4 | ||
146 | * 70 64 7 2-4 GB 5 | ||
147 | * 84 64 7 4-8 GB 6 | ||
148 | * 108 512 9 4-8 GB 6 | ||
149 | * 125 1024 10 8-16 GB 8 | ||
150 | * 125 1024 10 16-32 GB 9 | ||
151 | */ | ||
152 | |||
153 | mem = zone->present_pages >> (27 - PAGE_SHIFT); | ||
154 | |||
155 | threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); | ||
156 | |||
157 | /* | ||
158 | * Maximum threshold is 125 | ||
159 | */ | ||
160 | threshold = min(125, threshold); | ||
161 | |||
162 | return threshold; | ||
163 | } | ||
118 | 164 | ||
119 | /* | 165 | /* |
120 | * Determine pointer to currently valid differential byte given a zone and | 166 | * Refresh the thresholds for each zone. |
121 | * the item number. | ||
122 | * | ||
123 | * Preemption must be off | ||
124 | */ | 167 | */ |
125 | static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | 168 | static void refresh_zone_stat_thresholds(void) |
126 | { | 169 | { |
127 | return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; | 170 | struct zone *zone; |
171 | int cpu; | ||
172 | int threshold; | ||
173 | |||
174 | for_each_zone(zone) { | ||
175 | |||
176 | if (!zone->present_pages) | ||
177 | continue; | ||
178 | |||
179 | threshold = calculate_threshold(zone); | ||
180 | |||
181 | for_each_online_cpu(cpu) | ||
182 | zone_pcp(zone, cpu)->stat_threshold = threshold; | ||
183 | } | ||
128 | } | 184 | } |
129 | 185 | ||
130 | /* | 186 | /* |
@@ -133,17 +189,16 @@ static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | |||
133 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | 189 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, |
134 | int delta) | 190 | int delta) |
135 | { | 191 | { |
136 | s8 *p; | 192 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); |
193 | s8 *p = pcp->vm_stat_diff + item; | ||
137 | long x; | 194 | long x; |
138 | 195 | ||
139 | p = diff_pointer(zone, item); | ||
140 | x = delta + *p; | 196 | x = delta + *p; |
141 | 197 | ||
142 | if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { | 198 | if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { |
143 | zone_page_state_add(x, zone, item); | 199 | zone_page_state_add(x, zone, item); |
144 | x = 0; | 200 | x = 0; |
145 | } | 201 | } |
146 | |||
147 | *p = x; | 202 | *p = x; |
148 | } | 203 | } |
149 | EXPORT_SYMBOL(__mod_zone_page_state); | 204 | EXPORT_SYMBOL(__mod_zone_page_state); |
@@ -172,10 +227,12 @@ EXPORT_SYMBOL(mod_zone_page_state); | |||
172 | * No overflow check is necessary and therefore the differential can be | 227 | * No overflow check is necessary and therefore the differential can be |
173 | * incremented or decremented in place which may allow the compilers to | 228 | * incremented or decremented in place which may allow the compilers to |
174 | * generate better code. | 229 | * generate better code. |
175 | * | ||
176 | * The increment or decrement is known and therefore one boundary check can | 230 | * The increment or decrement is known and therefore one boundary check can |
177 | * be omitted. | 231 | * be omitted. |
178 | * | 232 | * |
233 | * NOTE: These functions are very performance sensitive. Change only | ||
234 | * with care. | ||
235 | * | ||
179 | * Some processors have inc/dec instructions that are atomic vs an interrupt. | 236 | * Some processors have inc/dec instructions that are atomic vs an interrupt. |
180 | * However, the code must first determine the differential location in a zone | 237 | * However, the code must first determine the differential location in a zone |
181 | * based on the processor number and then inc/dec the counter. There is no | 238 | * based on the processor number and then inc/dec the counter. There is no |
@@ -185,13 +242,16 @@ EXPORT_SYMBOL(mod_zone_page_state); | |||
185 | */ | 242 | */ |
186 | static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | 243 | static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
187 | { | 244 | { |
188 | s8 *p = diff_pointer(zone, item); | 245 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); |
246 | s8 *p = pcp->vm_stat_diff + item; | ||
189 | 247 | ||
190 | (*p)++; | 248 | (*p)++; |
191 | 249 | ||
192 | if (unlikely(*p > STAT_THRESHOLD)) { | 250 | if (unlikely(*p > pcp->stat_threshold)) { |
193 | zone_page_state_add(*p, zone, item); | 251 | int overstep = pcp->stat_threshold / 2; |
194 | *p = 0; | 252 | |
253 | zone_page_state_add(*p + overstep, zone, item); | ||
254 | *p = -overstep; | ||
195 | } | 255 | } |
196 | } | 256 | } |
197 | 257 | ||
@@ -204,13 +264,16 @@ EXPORT_SYMBOL(__inc_zone_page_state); | |||
204 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | 264 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) |
205 | { | 265 | { |
206 | struct zone *zone = page_zone(page); | 266 | struct zone *zone = page_zone(page); |
207 | s8 *p = diff_pointer(zone, item); | 267 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); |
268 | s8 *p = pcp->vm_stat_diff + item; | ||
208 | 269 | ||
209 | (*p)--; | 270 | (*p)--; |
210 | 271 | ||
211 | if (unlikely(*p < -STAT_THRESHOLD)) { | 272 | if (unlikely(*p < - pcp->stat_threshold)) { |
212 | zone_page_state_add(*p, zone, item); | 273 | int overstep = pcp->stat_threshold / 2; |
213 | *p = 0; | 274 | |
275 | zone_page_state_add(*p - overstep, zone, item); | ||
276 | *p = overstep; | ||
214 | } | 277 | } |
215 | } | 278 | } |
216 | EXPORT_SYMBOL(__dec_zone_page_state); | 279 | EXPORT_SYMBOL(__dec_zone_page_state); |
@@ -239,19 +302,9 @@ EXPORT_SYMBOL(inc_zone_page_state); | |||
239 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | 302 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) |
240 | { | 303 | { |
241 | unsigned long flags; | 304 | unsigned long flags; |
242 | struct zone *zone; | ||
243 | s8 *p; | ||
244 | 305 | ||
245 | zone = page_zone(page); | ||
246 | local_irq_save(flags); | 306 | local_irq_save(flags); |
247 | p = diff_pointer(zone, item); | 307 | __dec_zone_page_state(page, item); |
248 | |||
249 | (*p)--; | ||
250 | |||
251 | if (unlikely(*p < -STAT_THRESHOLD)) { | ||
252 | zone_page_state_add(*p, zone, item); | ||
253 | *p = 0; | ||
254 | } | ||
255 | local_irq_restore(flags); | 308 | local_irq_restore(flags); |
256 | } | 309 | } |
257 | EXPORT_SYMBOL(dec_zone_page_state); | 310 | EXPORT_SYMBOL(dec_zone_page_state); |
@@ -525,6 +578,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
525 | pageset->pcp[j].high, | 578 | pageset->pcp[j].high, |
526 | pageset->pcp[j].batch); | 579 | pageset->pcp[j].batch); |
527 | } | 580 | } |
581 | #ifdef CONFIG_SMP | ||
582 | seq_printf(m, "\n vm stats threshold: %d", | ||
583 | pageset->stat_threshold); | ||
584 | #endif | ||
528 | } | 585 | } |
529 | seq_printf(m, | 586 | seq_printf(m, |
530 | "\n all_unreclaimable: %u" | 587 | "\n all_unreclaimable: %u" |
@@ -613,3 +670,35 @@ struct seq_operations vmstat_op = { | |||
613 | 670 | ||
614 | #endif /* CONFIG_PROC_FS */ | 671 | #endif /* CONFIG_PROC_FS */ |
615 | 672 | ||
673 | #ifdef CONFIG_SMP | ||
674 | /* | ||
675 | * Use the cpu notifier to insure that the thresholds are recalculated | ||
676 | * when necessary. | ||
677 | */ | ||
678 | static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, | ||
679 | unsigned long action, | ||
680 | void *hcpu) | ||
681 | { | ||
682 | switch (action) { | ||
683 | case CPU_UP_PREPARE: | ||
684 | case CPU_UP_CANCELED: | ||
685 | case CPU_DEAD: | ||
686 | refresh_zone_stat_thresholds(); | ||
687 | break; | ||
688 | default: | ||
689 | break; | ||
690 | } | ||
691 | return NOTIFY_OK; | ||
692 | } | ||
693 | |||
694 | static struct notifier_block __cpuinitdata vmstat_notifier = | ||
695 | { &vmstat_cpuup_callback, NULL, 0 }; | ||
696 | |||
697 | int __init setup_vmstat(void) | ||
698 | { | ||
699 | refresh_zone_stat_thresholds(); | ||
700 | register_cpu_notifier(&vmstat_notifier); | ||
701 | return 0; | ||
702 | } | ||
703 | module_init(setup_vmstat) | ||
704 | #endif | ||