diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 670 |
1 files changed, 135 insertions, 535 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 253a450c400d..3e792a583f3b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -14,7 +14,6 @@ | |||
14 | * (lots of bits borrowed from Ingo Molnar & Andrew Morton) | 14 | * (lots of bits borrowed from Ingo Molnar & Andrew Morton) |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/config.h> | ||
18 | #include <linux/stddef.h> | 17 | #include <linux/stddef.h> |
19 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
20 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
@@ -37,6 +36,7 @@ | |||
37 | #include <linux/nodemask.h> | 36 | #include <linux/nodemask.h> |
38 | #include <linux/vmalloc.h> | 37 | #include <linux/vmalloc.h> |
39 | #include <linux/mempolicy.h> | 38 | #include <linux/mempolicy.h> |
39 | #include <linux/stop_machine.h> | ||
40 | 40 | ||
41 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
42 | #include <asm/div64.h> | 42 | #include <asm/div64.h> |
@@ -83,8 +83,8 @@ EXPORT_SYMBOL(zone_table); | |||
83 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; | 83 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; |
84 | int min_free_kbytes = 1024; | 84 | int min_free_kbytes = 1024; |
85 | 85 | ||
86 | unsigned long __initdata nr_kernel_pages; | 86 | unsigned long __meminitdata nr_kernel_pages; |
87 | unsigned long __initdata nr_all_pages; | 87 | unsigned long __meminitdata nr_all_pages; |
88 | 88 | ||
89 | #ifdef CONFIG_DEBUG_VM | 89 | #ifdef CONFIG_DEBUG_VM |
90 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 90 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
@@ -265,7 +265,7 @@ static inline void rmv_page_order(struct page *page) | |||
265 | * satisfies the following equation: | 265 | * satisfies the following equation: |
266 | * P = B & ~(1 << O) | 266 | * P = B & ~(1 << O) |
267 | * | 267 | * |
268 | * Assumption: *_mem_map is contigious at least up to MAX_ORDER | 268 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER |
269 | */ | 269 | */ |
270 | static inline struct page * | 270 | static inline struct page * |
271 | __page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order) | 271 | __page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order) |
@@ -286,22 +286,27 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
286 | * we can do coalesce a page and its buddy if | 286 | * we can do coalesce a page and its buddy if |
287 | * (a) the buddy is not in a hole && | 287 | * (a) the buddy is not in a hole && |
288 | * (b) the buddy is in the buddy system && | 288 | * (b) the buddy is in the buddy system && |
289 | * (c) a page and its buddy have the same order. | 289 | * (c) a page and its buddy have the same order && |
290 | * (d) a page and its buddy are in the same zone. | ||
290 | * | 291 | * |
291 | * For recording whether a page is in the buddy system, we use PG_buddy. | 292 | * For recording whether a page is in the buddy system, we use PG_buddy. |
292 | * Setting, clearing, and testing PG_buddy is serialized by zone->lock. | 293 | * Setting, clearing, and testing PG_buddy is serialized by zone->lock. |
293 | * | 294 | * |
294 | * For recording page's order, we use page_private(page). | 295 | * For recording page's order, we use page_private(page). |
295 | */ | 296 | */ |
296 | static inline int page_is_buddy(struct page *page, int order) | 297 | static inline int page_is_buddy(struct page *page, struct page *buddy, |
298 | int order) | ||
297 | { | 299 | { |
298 | #ifdef CONFIG_HOLES_IN_ZONE | 300 | #ifdef CONFIG_HOLES_IN_ZONE |
299 | if (!pfn_valid(page_to_pfn(page))) | 301 | if (!pfn_valid(page_to_pfn(buddy))) |
300 | return 0; | 302 | return 0; |
301 | #endif | 303 | #endif |
302 | 304 | ||
303 | if (PageBuddy(page) && page_order(page) == order) { | 305 | if (page_zone_id(page) != page_zone_id(buddy)) |
304 | BUG_ON(page_count(page) != 0); | 306 | return 0; |
307 | |||
308 | if (PageBuddy(buddy) && page_order(buddy) == order) { | ||
309 | BUG_ON(page_count(buddy) != 0); | ||
305 | return 1; | 310 | return 1; |
306 | } | 311 | } |
307 | return 0; | 312 | return 0; |
@@ -352,7 +357,7 @@ static inline void __free_one_page(struct page *page, | |||
352 | struct page *buddy; | 357 | struct page *buddy; |
353 | 358 | ||
354 | buddy = __page_find_buddy(page, page_idx, order); | 359 | buddy = __page_find_buddy(page, page_idx, order); |
355 | if (!page_is_buddy(buddy, order)) | 360 | if (!page_is_buddy(page, buddy, order)) |
356 | break; /* Move the buddy up one level. */ | 361 | break; /* Move the buddy up one level. */ |
357 | 362 | ||
358 | list_del(&buddy->lru); | 363 | list_del(&buddy->lru); |
@@ -440,8 +445,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
440 | 445 | ||
441 | arch_free_page(page, order); | 446 | arch_free_page(page, order); |
442 | if (!PageHighMem(page)) | 447 | if (!PageHighMem(page)) |
443 | mutex_debug_check_no_locks_freed(page_address(page), | 448 | debug_check_no_locks_freed(page_address(page), |
444 | PAGE_SIZE<<order); | 449 | PAGE_SIZE<<order); |
445 | 450 | ||
446 | for (i = 0 ; i < (1 << order) ; ++i) | 451 | for (i = 0 ; i < (1 << order) ; ++i) |
447 | reserved += free_pages_check(page + i); | 452 | reserved += free_pages_check(page + i); |
@@ -450,7 +455,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
450 | 455 | ||
451 | kernel_map_pages(page, 1 << order, 0); | 456 | kernel_map_pages(page, 1 << order, 0); |
452 | local_irq_save(flags); | 457 | local_irq_save(flags); |
453 | __mod_page_state(pgfree, 1 << order); | 458 | __count_vm_events(PGFREE, 1 << order); |
454 | free_one_page(page_zone(page), page, order); | 459 | free_one_page(page_zone(page), page, order); |
455 | local_irq_restore(flags); | 460 | local_irq_restore(flags); |
456 | } | 461 | } |
@@ -703,27 +708,6 @@ void drain_local_pages(void) | |||
703 | } | 708 | } |
704 | #endif /* CONFIG_PM */ | 709 | #endif /* CONFIG_PM */ |
705 | 710 | ||
706 | static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu) | ||
707 | { | ||
708 | #ifdef CONFIG_NUMA | ||
709 | pg_data_t *pg = z->zone_pgdat; | ||
710 | pg_data_t *orig = zonelist->zones[0]->zone_pgdat; | ||
711 | struct per_cpu_pageset *p; | ||
712 | |||
713 | p = zone_pcp(z, cpu); | ||
714 | if (pg == orig) { | ||
715 | p->numa_hit++; | ||
716 | } else { | ||
717 | p->numa_miss++; | ||
718 | zone_pcp(zonelist->zones[0], cpu)->numa_foreign++; | ||
719 | } | ||
720 | if (pg == NODE_DATA(numa_node_id())) | ||
721 | p->local_node++; | ||
722 | else | ||
723 | p->other_node++; | ||
724 | #endif | ||
725 | } | ||
726 | |||
727 | /* | 711 | /* |
728 | * Free a 0-order page | 712 | * Free a 0-order page |
729 | */ | 713 | */ |
@@ -744,7 +728,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
744 | 728 | ||
745 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 729 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
746 | local_irq_save(flags); | 730 | local_irq_save(flags); |
747 | __inc_page_state(pgfree); | 731 | __count_vm_event(PGFREE); |
748 | list_add(&page->lru, &pcp->list); | 732 | list_add(&page->lru, &pcp->list); |
749 | pcp->count++; | 733 | pcp->count++; |
750 | if (pcp->count >= pcp->high) { | 734 | if (pcp->count >= pcp->high) { |
@@ -820,8 +804,8 @@ again: | |||
820 | goto failed; | 804 | goto failed; |
821 | } | 805 | } |
822 | 806 | ||
823 | __mod_page_state_zone(zone, pgalloc, 1 << order); | 807 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
824 | zone_statistics(zonelist, zone, cpu); | 808 | zone_statistics(zonelist, zone); |
825 | local_irq_restore(flags); | 809 | local_irq_restore(flags); |
826 | put_cpu(); | 810 | put_cpu(); |
827 | 811 | ||
@@ -951,8 +935,7 @@ restart: | |||
951 | goto got_pg; | 935 | goto got_pg; |
952 | 936 | ||
953 | do { | 937 | do { |
954 | if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL)) | 938 | wakeup_kswapd(*z, order); |
955 | wakeup_kswapd(*z, order); | ||
956 | } while (*(++z)); | 939 | } while (*(++z)); |
957 | 940 | ||
958 | /* | 941 | /* |
@@ -1226,141 +1209,6 @@ static void show_node(struct zone *zone) | |||
1226 | #define show_node(zone) do { } while (0) | 1209 | #define show_node(zone) do { } while (0) |
1227 | #endif | 1210 | #endif |
1228 | 1211 | ||
1229 | /* | ||
1230 | * Accumulate the page_state information across all CPUs. | ||
1231 | * The result is unavoidably approximate - it can change | ||
1232 | * during and after execution of this function. | ||
1233 | */ | ||
1234 | static DEFINE_PER_CPU(struct page_state, page_states) = {0}; | ||
1235 | |||
1236 | atomic_t nr_pagecache = ATOMIC_INIT(0); | ||
1237 | EXPORT_SYMBOL(nr_pagecache); | ||
1238 | #ifdef CONFIG_SMP | ||
1239 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | ||
1240 | #endif | ||
1241 | |||
1242 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | ||
1243 | { | ||
1244 | unsigned cpu; | ||
1245 | |||
1246 | memset(ret, 0, nr * sizeof(unsigned long)); | ||
1247 | cpus_and(*cpumask, *cpumask, cpu_online_map); | ||
1248 | |||
1249 | for_each_cpu_mask(cpu, *cpumask) { | ||
1250 | unsigned long *in; | ||
1251 | unsigned long *out; | ||
1252 | unsigned off; | ||
1253 | unsigned next_cpu; | ||
1254 | |||
1255 | in = (unsigned long *)&per_cpu(page_states, cpu); | ||
1256 | |||
1257 | next_cpu = next_cpu(cpu, *cpumask); | ||
1258 | if (likely(next_cpu < NR_CPUS)) | ||
1259 | prefetch(&per_cpu(page_states, next_cpu)); | ||
1260 | |||
1261 | out = (unsigned long *)ret; | ||
1262 | for (off = 0; off < nr; off++) | ||
1263 | *out++ += *in++; | ||
1264 | } | ||
1265 | } | ||
1266 | |||
1267 | void get_page_state_node(struct page_state *ret, int node) | ||
1268 | { | ||
1269 | int nr; | ||
1270 | cpumask_t mask = node_to_cpumask(node); | ||
1271 | |||
1272 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | ||
1273 | nr /= sizeof(unsigned long); | ||
1274 | |||
1275 | __get_page_state(ret, nr+1, &mask); | ||
1276 | } | ||
1277 | |||
1278 | void get_page_state(struct page_state *ret) | ||
1279 | { | ||
1280 | int nr; | ||
1281 | cpumask_t mask = CPU_MASK_ALL; | ||
1282 | |||
1283 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | ||
1284 | nr /= sizeof(unsigned long); | ||
1285 | |||
1286 | __get_page_state(ret, nr + 1, &mask); | ||
1287 | } | ||
1288 | |||
1289 | void get_full_page_state(struct page_state *ret) | ||
1290 | { | ||
1291 | cpumask_t mask = CPU_MASK_ALL; | ||
1292 | |||
1293 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | ||
1294 | } | ||
1295 | |||
1296 | unsigned long read_page_state_offset(unsigned long offset) | ||
1297 | { | ||
1298 | unsigned long ret = 0; | ||
1299 | int cpu; | ||
1300 | |||
1301 | for_each_online_cpu(cpu) { | ||
1302 | unsigned long in; | ||
1303 | |||
1304 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | ||
1305 | ret += *((unsigned long *)in); | ||
1306 | } | ||
1307 | return ret; | ||
1308 | } | ||
1309 | |||
1310 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) | ||
1311 | { | ||
1312 | void *ptr; | ||
1313 | |||
1314 | ptr = &__get_cpu_var(page_states); | ||
1315 | *(unsigned long *)(ptr + offset) += delta; | ||
1316 | } | ||
1317 | EXPORT_SYMBOL(__mod_page_state_offset); | ||
1318 | |||
1319 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | ||
1320 | { | ||
1321 | unsigned long flags; | ||
1322 | void *ptr; | ||
1323 | |||
1324 | local_irq_save(flags); | ||
1325 | ptr = &__get_cpu_var(page_states); | ||
1326 | *(unsigned long *)(ptr + offset) += delta; | ||
1327 | local_irq_restore(flags); | ||
1328 | } | ||
1329 | EXPORT_SYMBOL(mod_page_state_offset); | ||
1330 | |||
1331 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | ||
1332 | unsigned long *free, struct pglist_data *pgdat) | ||
1333 | { | ||
1334 | struct zone *zones = pgdat->node_zones; | ||
1335 | int i; | ||
1336 | |||
1337 | *active = 0; | ||
1338 | *inactive = 0; | ||
1339 | *free = 0; | ||
1340 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
1341 | *active += zones[i].nr_active; | ||
1342 | *inactive += zones[i].nr_inactive; | ||
1343 | *free += zones[i].free_pages; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | void get_zone_counts(unsigned long *active, | ||
1348 | unsigned long *inactive, unsigned long *free) | ||
1349 | { | ||
1350 | struct pglist_data *pgdat; | ||
1351 | |||
1352 | *active = 0; | ||
1353 | *inactive = 0; | ||
1354 | *free = 0; | ||
1355 | for_each_online_pgdat(pgdat) { | ||
1356 | unsigned long l, m, n; | ||
1357 | __get_zone_counts(&l, &m, &n, pgdat); | ||
1358 | *active += l; | ||
1359 | *inactive += m; | ||
1360 | *free += n; | ||
1361 | } | ||
1362 | } | ||
1363 | |||
1364 | void si_meminfo(struct sysinfo *val) | 1212 | void si_meminfo(struct sysinfo *val) |
1365 | { | 1213 | { |
1366 | val->totalram = totalram_pages; | 1214 | val->totalram = totalram_pages; |
@@ -1401,7 +1249,6 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
1401 | */ | 1249 | */ |
1402 | void show_free_areas(void) | 1250 | void show_free_areas(void) |
1403 | { | 1251 | { |
1404 | struct page_state ps; | ||
1405 | int cpu, temperature; | 1252 | int cpu, temperature; |
1406 | unsigned long active; | 1253 | unsigned long active; |
1407 | unsigned long inactive; | 1254 | unsigned long inactive; |
@@ -1433,7 +1280,6 @@ void show_free_areas(void) | |||
1433 | } | 1280 | } |
1434 | } | 1281 | } |
1435 | 1282 | ||
1436 | get_page_state(&ps); | ||
1437 | get_zone_counts(&active, &inactive, &free); | 1283 | get_zone_counts(&active, &inactive, &free); |
1438 | 1284 | ||
1439 | printk("Free pages: %11ukB (%ukB HighMem)\n", | 1285 | printk("Free pages: %11ukB (%ukB HighMem)\n", |
@@ -1444,13 +1290,13 @@ void show_free_areas(void) | |||
1444 | "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", | 1290 | "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", |
1445 | active, | 1291 | active, |
1446 | inactive, | 1292 | inactive, |
1447 | ps.nr_dirty, | 1293 | global_page_state(NR_FILE_DIRTY), |
1448 | ps.nr_writeback, | 1294 | global_page_state(NR_WRITEBACK), |
1449 | ps.nr_unstable, | 1295 | global_page_state(NR_UNSTABLE_NFS), |
1450 | nr_free_pages(), | 1296 | nr_free_pages(), |
1451 | ps.nr_slab, | 1297 | global_page_state(NR_SLAB), |
1452 | ps.nr_mapped, | 1298 | global_page_state(NR_FILE_MAPPED), |
1453 | ps.nr_page_table_pages); | 1299 | global_page_state(NR_PAGETABLE)); |
1454 | 1300 | ||
1455 | for_each_zone(zone) { | 1301 | for_each_zone(zone) { |
1456 | int i; | 1302 | int i; |
@@ -1485,7 +1331,7 @@ void show_free_areas(void) | |||
1485 | } | 1331 | } |
1486 | 1332 | ||
1487 | for_each_zone(zone) { | 1333 | for_each_zone(zone) { |
1488 | unsigned long nr, flags, order, total = 0; | 1334 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
1489 | 1335 | ||
1490 | show_node(zone); | 1336 | show_node(zone); |
1491 | printk("%s: ", zone->name); | 1337 | printk("%s: ", zone->name); |
@@ -1496,11 +1342,12 @@ void show_free_areas(void) | |||
1496 | 1342 | ||
1497 | spin_lock_irqsave(&zone->lock, flags); | 1343 | spin_lock_irqsave(&zone->lock, flags); |
1498 | for (order = 0; order < MAX_ORDER; order++) { | 1344 | for (order = 0; order < MAX_ORDER; order++) { |
1499 | nr = zone->free_area[order].nr_free; | 1345 | nr[order] = zone->free_area[order].nr_free; |
1500 | total += nr << order; | 1346 | total += nr[order] << order; |
1501 | printk("%lu*%lukB ", nr, K(1UL) << order); | ||
1502 | } | 1347 | } |
1503 | spin_unlock_irqrestore(&zone->lock, flags); | 1348 | spin_unlock_irqrestore(&zone->lock, flags); |
1349 | for (order = 0; order < MAX_ORDER; order++) | ||
1350 | printk("%lu*%lukB ", nr[order], K(1UL) << order); | ||
1504 | printk("= %lukB\n", K(total)); | 1351 | printk("= %lukB\n", K(total)); |
1505 | } | 1352 | } |
1506 | 1353 | ||
@@ -1512,7 +1359,7 @@ void show_free_areas(void) | |||
1512 | * | 1359 | * |
1513 | * Add all populated zones of a node to the zonelist. | 1360 | * Add all populated zones of a node to the zonelist. |
1514 | */ | 1361 | */ |
1515 | static int __init build_zonelists_node(pg_data_t *pgdat, | 1362 | static int __meminit build_zonelists_node(pg_data_t *pgdat, |
1516 | struct zonelist *zonelist, int nr_zones, int zone_type) | 1363 | struct zonelist *zonelist, int nr_zones, int zone_type) |
1517 | { | 1364 | { |
1518 | struct zone *zone; | 1365 | struct zone *zone; |
@@ -1548,7 +1395,7 @@ static inline int highest_zone(int zone_bits) | |||
1548 | 1395 | ||
1549 | #ifdef CONFIG_NUMA | 1396 | #ifdef CONFIG_NUMA |
1550 | #define MAX_NODE_LOAD (num_online_nodes()) | 1397 | #define MAX_NODE_LOAD (num_online_nodes()) |
1551 | static int __initdata node_load[MAX_NUMNODES]; | 1398 | static int __meminitdata node_load[MAX_NUMNODES]; |
1552 | /** | 1399 | /** |
1553 | * find_next_best_node - find the next node that should appear in a given node's fallback list | 1400 | * find_next_best_node - find the next node that should appear in a given node's fallback list |
1554 | * @node: node whose fallback list we're appending | 1401 | * @node: node whose fallback list we're appending |
@@ -1563,7 +1410,7 @@ static int __initdata node_load[MAX_NUMNODES]; | |||
1563 | * on them otherwise. | 1410 | * on them otherwise. |
1564 | * It returns -1 if no node is found. | 1411 | * It returns -1 if no node is found. |
1565 | */ | 1412 | */ |
1566 | static int __init find_next_best_node(int node, nodemask_t *used_node_mask) | 1413 | static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) |
1567 | { | 1414 | { |
1568 | int n, val; | 1415 | int n, val; |
1569 | int min_val = INT_MAX; | 1416 | int min_val = INT_MAX; |
@@ -1609,7 +1456,7 @@ static int __init find_next_best_node(int node, nodemask_t *used_node_mask) | |||
1609 | return best_node; | 1456 | return best_node; |
1610 | } | 1457 | } |
1611 | 1458 | ||
1612 | static void __init build_zonelists(pg_data_t *pgdat) | 1459 | static void __meminit build_zonelists(pg_data_t *pgdat) |
1613 | { | 1460 | { |
1614 | int i, j, k, node, local_node; | 1461 | int i, j, k, node, local_node; |
1615 | int prev_node, load; | 1462 | int prev_node, load; |
@@ -1661,7 +1508,7 @@ static void __init build_zonelists(pg_data_t *pgdat) | |||
1661 | 1508 | ||
1662 | #else /* CONFIG_NUMA */ | 1509 | #else /* CONFIG_NUMA */ |
1663 | 1510 | ||
1664 | static void __init build_zonelists(pg_data_t *pgdat) | 1511 | static void __meminit build_zonelists(pg_data_t *pgdat) |
1665 | { | 1512 | { |
1666 | int i, j, k, node, local_node; | 1513 | int i, j, k, node, local_node; |
1667 | 1514 | ||
@@ -1699,14 +1546,29 @@ static void __init build_zonelists(pg_data_t *pgdat) | |||
1699 | 1546 | ||
1700 | #endif /* CONFIG_NUMA */ | 1547 | #endif /* CONFIG_NUMA */ |
1701 | 1548 | ||
1702 | void __init build_all_zonelists(void) | 1549 | /* return values int ....just for stop_machine_run() */ |
1550 | static int __meminit __build_all_zonelists(void *dummy) | ||
1703 | { | 1551 | { |
1704 | int i; | 1552 | int nid; |
1553 | for_each_online_node(nid) | ||
1554 | build_zonelists(NODE_DATA(nid)); | ||
1555 | return 0; | ||
1556 | } | ||
1705 | 1557 | ||
1706 | for_each_online_node(i) | 1558 | void __meminit build_all_zonelists(void) |
1707 | build_zonelists(NODE_DATA(i)); | 1559 | { |
1708 | printk("Built %i zonelists\n", num_online_nodes()); | 1560 | if (system_state == SYSTEM_BOOTING) { |
1709 | cpuset_init_current_mems_allowed(); | 1561 | __build_all_zonelists(0); |
1562 | cpuset_init_current_mems_allowed(); | ||
1563 | } else { | ||
1564 | /* we have to stop all cpus to guaranntee there is no user | ||
1565 | of zonelist */ | ||
1566 | stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); | ||
1567 | /* cpuset refresh routine should be here */ | ||
1568 | } | ||
1569 | vm_total_pages = nr_free_pagecache_pages(); | ||
1570 | printk("Built %i zonelists. Total pages: %ld\n", | ||
1571 | num_online_nodes(), vm_total_pages); | ||
1710 | } | 1572 | } |
1711 | 1573 | ||
1712 | /* | 1574 | /* |
@@ -1722,7 +1584,8 @@ void __init build_all_zonelists(void) | |||
1722 | */ | 1584 | */ |
1723 | #define PAGES_PER_WAITQUEUE 256 | 1585 | #define PAGES_PER_WAITQUEUE 256 |
1724 | 1586 | ||
1725 | static inline unsigned long wait_table_size(unsigned long pages) | 1587 | #ifndef CONFIG_MEMORY_HOTPLUG |
1588 | static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) | ||
1726 | { | 1589 | { |
1727 | unsigned long size = 1; | 1590 | unsigned long size = 1; |
1728 | 1591 | ||
@@ -1740,6 +1603,29 @@ static inline unsigned long wait_table_size(unsigned long pages) | |||
1740 | 1603 | ||
1741 | return max(size, 4UL); | 1604 | return max(size, 4UL); |
1742 | } | 1605 | } |
1606 | #else | ||
1607 | /* | ||
1608 | * A zone's size might be changed by hot-add, so it is not possible to determine | ||
1609 | * a suitable size for its wait_table. So we use the maximum size now. | ||
1610 | * | ||
1611 | * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: | ||
1612 | * | ||
1613 | * i386 (preemption config) : 4096 x 16 = 64Kbyte. | ||
1614 | * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. | ||
1615 | * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. | ||
1616 | * | ||
1617 | * The maximum entries are prepared when a zone's memory is (512K + 256) pages | ||
1618 | * or more by the traditional way. (See above). It equals: | ||
1619 | * | ||
1620 | * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. | ||
1621 | * ia64(16K page size) : = ( 8G + 4M)byte. | ||
1622 | * powerpc (64K page size) : = (32G +16M)byte. | ||
1623 | */ | ||
1624 | static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) | ||
1625 | { | ||
1626 | return 4096UL; | ||
1627 | } | ||
1628 | #endif | ||
1743 | 1629 | ||
1744 | /* | 1630 | /* |
1745 | * This is an integer logarithm so that shifts can be used later | 1631 | * This is an integer logarithm so that shifts can be used later |
@@ -1964,7 +1850,7 @@ static inline void free_zone_pagesets(int cpu) | |||
1964 | } | 1850 | } |
1965 | } | 1851 | } |
1966 | 1852 | ||
1967 | static int pageset_cpuup_callback(struct notifier_block *nfb, | 1853 | static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, |
1968 | unsigned long action, | 1854 | unsigned long action, |
1969 | void *hcpu) | 1855 | void *hcpu) |
1970 | { | 1856 | { |
@@ -1986,7 +1872,7 @@ static int pageset_cpuup_callback(struct notifier_block *nfb, | |||
1986 | return ret; | 1872 | return ret; |
1987 | } | 1873 | } |
1988 | 1874 | ||
1989 | static struct notifier_block pageset_notifier = | 1875 | static struct notifier_block __cpuinitdata pageset_notifier = |
1990 | { &pageset_cpuup_callback, NULL, 0 }; | 1876 | { &pageset_cpuup_callback, NULL, 0 }; |
1991 | 1877 | ||
1992 | void __init setup_per_cpu_pageset(void) | 1878 | void __init setup_per_cpu_pageset(void) |
@@ -2005,23 +1891,46 @@ void __init setup_per_cpu_pageset(void) | |||
2005 | #endif | 1891 | #endif |
2006 | 1892 | ||
2007 | static __meminit | 1893 | static __meminit |
2008 | void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | 1894 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) |
2009 | { | 1895 | { |
2010 | int i; | 1896 | int i; |
2011 | struct pglist_data *pgdat = zone->zone_pgdat; | 1897 | struct pglist_data *pgdat = zone->zone_pgdat; |
1898 | size_t alloc_size; | ||
2012 | 1899 | ||
2013 | /* | 1900 | /* |
2014 | * The per-page waitqueue mechanism uses hashed waitqueues | 1901 | * The per-page waitqueue mechanism uses hashed waitqueues |
2015 | * per zone. | 1902 | * per zone. |
2016 | */ | 1903 | */ |
2017 | zone->wait_table_size = wait_table_size(zone_size_pages); | 1904 | zone->wait_table_hash_nr_entries = |
2018 | zone->wait_table_bits = wait_table_bits(zone->wait_table_size); | 1905 | wait_table_hash_nr_entries(zone_size_pages); |
2019 | zone->wait_table = (wait_queue_head_t *) | 1906 | zone->wait_table_bits = |
2020 | alloc_bootmem_node(pgdat, zone->wait_table_size | 1907 | wait_table_bits(zone->wait_table_hash_nr_entries); |
2021 | * sizeof(wait_queue_head_t)); | 1908 | alloc_size = zone->wait_table_hash_nr_entries |
1909 | * sizeof(wait_queue_head_t); | ||
1910 | |||
1911 | if (system_state == SYSTEM_BOOTING) { | ||
1912 | zone->wait_table = (wait_queue_head_t *) | ||
1913 | alloc_bootmem_node(pgdat, alloc_size); | ||
1914 | } else { | ||
1915 | /* | ||
1916 | * This case means that a zone whose size was 0 gets new memory | ||
1917 | * via memory hot-add. | ||
1918 | * But it may be the case that a new node was hot-added. In | ||
1919 | * this case vmalloc() will not be able to use this new node's | ||
1920 | * memory - this wait_table must be initialized to use this new | ||
1921 | * node itself as well. | ||
1922 | * To use this new node's memory, further consideration will be | ||
1923 | * necessary. | ||
1924 | */ | ||
1925 | zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size); | ||
1926 | } | ||
1927 | if (!zone->wait_table) | ||
1928 | return -ENOMEM; | ||
2022 | 1929 | ||
2023 | for(i = 0; i < zone->wait_table_size; ++i) | 1930 | for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) |
2024 | init_waitqueue_head(zone->wait_table + i); | 1931 | init_waitqueue_head(zone->wait_table + i); |
1932 | |||
1933 | return 0; | ||
2025 | } | 1934 | } |
2026 | 1935 | ||
2027 | static __meminit void zone_pcp_init(struct zone *zone) | 1936 | static __meminit void zone_pcp_init(struct zone *zone) |
@@ -2043,12 +1952,15 @@ static __meminit void zone_pcp_init(struct zone *zone) | |||
2043 | zone->name, zone->present_pages, batch); | 1952 | zone->name, zone->present_pages, batch); |
2044 | } | 1953 | } |
2045 | 1954 | ||
2046 | static __meminit void init_currently_empty_zone(struct zone *zone, | 1955 | __meminit int init_currently_empty_zone(struct zone *zone, |
2047 | unsigned long zone_start_pfn, unsigned long size) | 1956 | unsigned long zone_start_pfn, |
1957 | unsigned long size) | ||
2048 | { | 1958 | { |
2049 | struct pglist_data *pgdat = zone->zone_pgdat; | 1959 | struct pglist_data *pgdat = zone->zone_pgdat; |
2050 | 1960 | int ret; | |
2051 | zone_wait_table_init(zone, size); | 1961 | ret = zone_wait_table_init(zone, size); |
1962 | if (ret) | ||
1963 | return ret; | ||
2052 | pgdat->nr_zones = zone_idx(zone) + 1; | 1964 | pgdat->nr_zones = zone_idx(zone) + 1; |
2053 | 1965 | ||
2054 | zone->zone_start_pfn = zone_start_pfn; | 1966 | zone->zone_start_pfn = zone_start_pfn; |
@@ -2056,6 +1968,8 @@ static __meminit void init_currently_empty_zone(struct zone *zone, | |||
2056 | memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); | 1968 | memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); |
2057 | 1969 | ||
2058 | zone_init_free_lists(pgdat, zone, zone->spanned_pages); | 1970 | zone_init_free_lists(pgdat, zone, zone->spanned_pages); |
1971 | |||
1972 | return 0; | ||
2059 | } | 1973 | } |
2060 | 1974 | ||
2061 | /* | 1975 | /* |
@@ -2064,12 +1978,13 @@ static __meminit void init_currently_empty_zone(struct zone *zone, | |||
2064 | * - mark all memory queues empty | 1978 | * - mark all memory queues empty |
2065 | * - clear the memory bitmaps | 1979 | * - clear the memory bitmaps |
2066 | */ | 1980 | */ |
2067 | static void __init free_area_init_core(struct pglist_data *pgdat, | 1981 | static void __meminit free_area_init_core(struct pglist_data *pgdat, |
2068 | unsigned long *zones_size, unsigned long *zholes_size) | 1982 | unsigned long *zones_size, unsigned long *zholes_size) |
2069 | { | 1983 | { |
2070 | unsigned long j; | 1984 | unsigned long j; |
2071 | int nid = pgdat->node_id; | 1985 | int nid = pgdat->node_id; |
2072 | unsigned long zone_start_pfn = pgdat->node_start_pfn; | 1986 | unsigned long zone_start_pfn = pgdat->node_start_pfn; |
1987 | int ret; | ||
2073 | 1988 | ||
2074 | pgdat_resize_init(pgdat); | 1989 | pgdat_resize_init(pgdat); |
2075 | pgdat->nr_zones = 0; | 1990 | pgdat->nr_zones = 0; |
@@ -2106,12 +2021,14 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
2106 | zone->nr_scan_inactive = 0; | 2021 | zone->nr_scan_inactive = 0; |
2107 | zone->nr_active = 0; | 2022 | zone->nr_active = 0; |
2108 | zone->nr_inactive = 0; | 2023 | zone->nr_inactive = 0; |
2024 | zap_zone_vm_stats(zone); | ||
2109 | atomic_set(&zone->reclaim_in_progress, 0); | 2025 | atomic_set(&zone->reclaim_in_progress, 0); |
2110 | if (!size) | 2026 | if (!size) |
2111 | continue; | 2027 | continue; |
2112 | 2028 | ||
2113 | zonetable_add(zone, nid, j, zone_start_pfn, size); | 2029 | zonetable_add(zone, nid, j, zone_start_pfn, size); |
2114 | init_currently_empty_zone(zone, zone_start_pfn, size); | 2030 | ret = init_currently_empty_zone(zone, zone_start_pfn, size); |
2031 | BUG_ON(ret); | ||
2115 | zone_start_pfn += size; | 2032 | zone_start_pfn += size; |
2116 | } | 2033 | } |
2117 | } | 2034 | } |
@@ -2152,7 +2069,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) | |||
2152 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | 2069 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ |
2153 | } | 2070 | } |
2154 | 2071 | ||
2155 | void __init free_area_init_node(int nid, struct pglist_data *pgdat, | 2072 | void __meminit free_area_init_node(int nid, struct pglist_data *pgdat, |
2156 | unsigned long *zones_size, unsigned long node_start_pfn, | 2073 | unsigned long *zones_size, unsigned long node_start_pfn, |
2157 | unsigned long *zholes_size) | 2074 | unsigned long *zholes_size) |
2158 | { | 2075 | { |
@@ -2178,307 +2095,18 @@ void __init free_area_init(unsigned long *zones_size) | |||
2178 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); | 2095 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); |
2179 | } | 2096 | } |
2180 | 2097 | ||
2181 | #ifdef CONFIG_PROC_FS | ||
2182 | |||
2183 | #include <linux/seq_file.h> | ||
2184 | |||
2185 | static void *frag_start(struct seq_file *m, loff_t *pos) | ||
2186 | { | ||
2187 | pg_data_t *pgdat; | ||
2188 | loff_t node = *pos; | ||
2189 | for (pgdat = first_online_pgdat(); | ||
2190 | pgdat && node; | ||
2191 | pgdat = next_online_pgdat(pgdat)) | ||
2192 | --node; | ||
2193 | |||
2194 | return pgdat; | ||
2195 | } | ||
2196 | |||
2197 | static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | ||
2198 | { | ||
2199 | pg_data_t *pgdat = (pg_data_t *)arg; | ||
2200 | |||
2201 | (*pos)++; | ||
2202 | return next_online_pgdat(pgdat); | ||
2203 | } | ||
2204 | |||
2205 | static void frag_stop(struct seq_file *m, void *arg) | ||
2206 | { | ||
2207 | } | ||
2208 | |||
2209 | /* | ||
2210 | * This walks the free areas for each zone. | ||
2211 | */ | ||
2212 | static int frag_show(struct seq_file *m, void *arg) | ||
2213 | { | ||
2214 | pg_data_t *pgdat = (pg_data_t *)arg; | ||
2215 | struct zone *zone; | ||
2216 | struct zone *node_zones = pgdat->node_zones; | ||
2217 | unsigned long flags; | ||
2218 | int order; | ||
2219 | |||
2220 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | ||
2221 | if (!populated_zone(zone)) | ||
2222 | continue; | ||
2223 | |||
2224 | spin_lock_irqsave(&zone->lock, flags); | ||
2225 | seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); | ||
2226 | for (order = 0; order < MAX_ORDER; ++order) | ||
2227 | seq_printf(m, "%6lu ", zone->free_area[order].nr_free); | ||
2228 | spin_unlock_irqrestore(&zone->lock, flags); | ||
2229 | seq_putc(m, '\n'); | ||
2230 | } | ||
2231 | return 0; | ||
2232 | } | ||
2233 | |||
2234 | struct seq_operations fragmentation_op = { | ||
2235 | .start = frag_start, | ||
2236 | .next = frag_next, | ||
2237 | .stop = frag_stop, | ||
2238 | .show = frag_show, | ||
2239 | }; | ||
2240 | |||
2241 | /* | ||
2242 | * Output information about zones in @pgdat. | ||
2243 | */ | ||
2244 | static int zoneinfo_show(struct seq_file *m, void *arg) | ||
2245 | { | ||
2246 | pg_data_t *pgdat = arg; | ||
2247 | struct zone *zone; | ||
2248 | struct zone *node_zones = pgdat->node_zones; | ||
2249 | unsigned long flags; | ||
2250 | |||
2251 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | ||
2252 | int i; | ||
2253 | |||
2254 | if (!populated_zone(zone)) | ||
2255 | continue; | ||
2256 | |||
2257 | spin_lock_irqsave(&zone->lock, flags); | ||
2258 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | ||
2259 | seq_printf(m, | ||
2260 | "\n pages free %lu" | ||
2261 | "\n min %lu" | ||
2262 | "\n low %lu" | ||
2263 | "\n high %lu" | ||
2264 | "\n active %lu" | ||
2265 | "\n inactive %lu" | ||
2266 | "\n scanned %lu (a: %lu i: %lu)" | ||
2267 | "\n spanned %lu" | ||
2268 | "\n present %lu", | ||
2269 | zone->free_pages, | ||
2270 | zone->pages_min, | ||
2271 | zone->pages_low, | ||
2272 | zone->pages_high, | ||
2273 | zone->nr_active, | ||
2274 | zone->nr_inactive, | ||
2275 | zone->pages_scanned, | ||
2276 | zone->nr_scan_active, zone->nr_scan_inactive, | ||
2277 | zone->spanned_pages, | ||
2278 | zone->present_pages); | ||
2279 | seq_printf(m, | ||
2280 | "\n protection: (%lu", | ||
2281 | zone->lowmem_reserve[0]); | ||
2282 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | ||
2283 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | ||
2284 | seq_printf(m, | ||
2285 | ")" | ||
2286 | "\n pagesets"); | ||
2287 | for_each_online_cpu(i) { | ||
2288 | struct per_cpu_pageset *pageset; | ||
2289 | int j; | ||
2290 | |||
2291 | pageset = zone_pcp(zone, i); | ||
2292 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | ||
2293 | if (pageset->pcp[j].count) | ||
2294 | break; | ||
2295 | } | ||
2296 | if (j == ARRAY_SIZE(pageset->pcp)) | ||
2297 | continue; | ||
2298 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | ||
2299 | seq_printf(m, | ||
2300 | "\n cpu: %i pcp: %i" | ||
2301 | "\n count: %i" | ||
2302 | "\n high: %i" | ||
2303 | "\n batch: %i", | ||
2304 | i, j, | ||
2305 | pageset->pcp[j].count, | ||
2306 | pageset->pcp[j].high, | ||
2307 | pageset->pcp[j].batch); | ||
2308 | } | ||
2309 | #ifdef CONFIG_NUMA | ||
2310 | seq_printf(m, | ||
2311 | "\n numa_hit: %lu" | ||
2312 | "\n numa_miss: %lu" | ||
2313 | "\n numa_foreign: %lu" | ||
2314 | "\n interleave_hit: %lu" | ||
2315 | "\n local_node: %lu" | ||
2316 | "\n other_node: %lu", | ||
2317 | pageset->numa_hit, | ||
2318 | pageset->numa_miss, | ||
2319 | pageset->numa_foreign, | ||
2320 | pageset->interleave_hit, | ||
2321 | pageset->local_node, | ||
2322 | pageset->other_node); | ||
2323 | #endif | ||
2324 | } | ||
2325 | seq_printf(m, | ||
2326 | "\n all_unreclaimable: %u" | ||
2327 | "\n prev_priority: %i" | ||
2328 | "\n temp_priority: %i" | ||
2329 | "\n start_pfn: %lu", | ||
2330 | zone->all_unreclaimable, | ||
2331 | zone->prev_priority, | ||
2332 | zone->temp_priority, | ||
2333 | zone->zone_start_pfn); | ||
2334 | spin_unlock_irqrestore(&zone->lock, flags); | ||
2335 | seq_putc(m, '\n'); | ||
2336 | } | ||
2337 | return 0; | ||
2338 | } | ||
2339 | |||
2340 | struct seq_operations zoneinfo_op = { | ||
2341 | .start = frag_start, /* iterate over all zones. The same as in | ||
2342 | * fragmentation. */ | ||
2343 | .next = frag_next, | ||
2344 | .stop = frag_stop, | ||
2345 | .show = zoneinfo_show, | ||
2346 | }; | ||
2347 | |||
2348 | static char *vmstat_text[] = { | ||
2349 | "nr_dirty", | ||
2350 | "nr_writeback", | ||
2351 | "nr_unstable", | ||
2352 | "nr_page_table_pages", | ||
2353 | "nr_mapped", | ||
2354 | "nr_slab", | ||
2355 | |||
2356 | "pgpgin", | ||
2357 | "pgpgout", | ||
2358 | "pswpin", | ||
2359 | "pswpout", | ||
2360 | |||
2361 | "pgalloc_high", | ||
2362 | "pgalloc_normal", | ||
2363 | "pgalloc_dma32", | ||
2364 | "pgalloc_dma", | ||
2365 | |||
2366 | "pgfree", | ||
2367 | "pgactivate", | ||
2368 | "pgdeactivate", | ||
2369 | |||
2370 | "pgfault", | ||
2371 | "pgmajfault", | ||
2372 | |||
2373 | "pgrefill_high", | ||
2374 | "pgrefill_normal", | ||
2375 | "pgrefill_dma32", | ||
2376 | "pgrefill_dma", | ||
2377 | |||
2378 | "pgsteal_high", | ||
2379 | "pgsteal_normal", | ||
2380 | "pgsteal_dma32", | ||
2381 | "pgsteal_dma", | ||
2382 | |||
2383 | "pgscan_kswapd_high", | ||
2384 | "pgscan_kswapd_normal", | ||
2385 | "pgscan_kswapd_dma32", | ||
2386 | "pgscan_kswapd_dma", | ||
2387 | |||
2388 | "pgscan_direct_high", | ||
2389 | "pgscan_direct_normal", | ||
2390 | "pgscan_direct_dma32", | ||
2391 | "pgscan_direct_dma", | ||
2392 | |||
2393 | "pginodesteal", | ||
2394 | "slabs_scanned", | ||
2395 | "kswapd_steal", | ||
2396 | "kswapd_inodesteal", | ||
2397 | "pageoutrun", | ||
2398 | "allocstall", | ||
2399 | |||
2400 | "pgrotated", | ||
2401 | "nr_bounce", | ||
2402 | }; | ||
2403 | |||
2404 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | ||
2405 | { | ||
2406 | struct page_state *ps; | ||
2407 | |||
2408 | if (*pos >= ARRAY_SIZE(vmstat_text)) | ||
2409 | return NULL; | ||
2410 | |||
2411 | ps = kmalloc(sizeof(*ps), GFP_KERNEL); | ||
2412 | m->private = ps; | ||
2413 | if (!ps) | ||
2414 | return ERR_PTR(-ENOMEM); | ||
2415 | get_full_page_state(ps); | ||
2416 | ps->pgpgin /= 2; /* sectors -> kbytes */ | ||
2417 | ps->pgpgout /= 2; | ||
2418 | return (unsigned long *)ps + *pos; | ||
2419 | } | ||
2420 | |||
2421 | static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) | ||
2422 | { | ||
2423 | (*pos)++; | ||
2424 | if (*pos >= ARRAY_SIZE(vmstat_text)) | ||
2425 | return NULL; | ||
2426 | return (unsigned long *)m->private + *pos; | ||
2427 | } | ||
2428 | |||
2429 | static int vmstat_show(struct seq_file *m, void *arg) | ||
2430 | { | ||
2431 | unsigned long *l = arg; | ||
2432 | unsigned long off = l - (unsigned long *)m->private; | ||
2433 | |||
2434 | seq_printf(m, "%s %lu\n", vmstat_text[off], *l); | ||
2435 | return 0; | ||
2436 | } | ||
2437 | |||
2438 | static void vmstat_stop(struct seq_file *m, void *arg) | ||
2439 | { | ||
2440 | kfree(m->private); | ||
2441 | m->private = NULL; | ||
2442 | } | ||
2443 | |||
2444 | struct seq_operations vmstat_op = { | ||
2445 | .start = vmstat_start, | ||
2446 | .next = vmstat_next, | ||
2447 | .stop = vmstat_stop, | ||
2448 | .show = vmstat_show, | ||
2449 | }; | ||
2450 | |||
2451 | #endif /* CONFIG_PROC_FS */ | ||
2452 | |||
2453 | #ifdef CONFIG_HOTPLUG_CPU | 2098 | #ifdef CONFIG_HOTPLUG_CPU |
2454 | static int page_alloc_cpu_notify(struct notifier_block *self, | 2099 | static int page_alloc_cpu_notify(struct notifier_block *self, |
2455 | unsigned long action, void *hcpu) | 2100 | unsigned long action, void *hcpu) |
2456 | { | 2101 | { |
2457 | int cpu = (unsigned long)hcpu; | 2102 | int cpu = (unsigned long)hcpu; |
2458 | long *count; | ||
2459 | unsigned long *src, *dest; | ||
2460 | 2103 | ||
2461 | if (action == CPU_DEAD) { | 2104 | if (action == CPU_DEAD) { |
2462 | int i; | ||
2463 | |||
2464 | /* Drain local pagecache count. */ | ||
2465 | count = &per_cpu(nr_pagecache_local, cpu); | ||
2466 | atomic_add(*count, &nr_pagecache); | ||
2467 | *count = 0; | ||
2468 | local_irq_disable(); | 2105 | local_irq_disable(); |
2469 | __drain_pages(cpu); | 2106 | __drain_pages(cpu); |
2470 | 2107 | vm_events_fold_cpu(cpu); | |
2471 | /* Add dead cpu's page_states to our own. */ | ||
2472 | dest = (unsigned long *)&__get_cpu_var(page_states); | ||
2473 | src = (unsigned long *)&per_cpu(page_states, cpu); | ||
2474 | |||
2475 | for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long); | ||
2476 | i++) { | ||
2477 | dest[i] += src[i]; | ||
2478 | src[i] = 0; | ||
2479 | } | ||
2480 | |||
2481 | local_irq_enable(); | 2108 | local_irq_enable(); |
2109 | refresh_cpu_vm_stats(cpu); | ||
2482 | } | 2110 | } |
2483 | return NOTIFY_OK; | 2111 | return NOTIFY_OK; |
2484 | } | 2112 | } |
@@ -2804,42 +2432,14 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
2804 | } | 2432 | } |
2805 | 2433 | ||
2806 | #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE | 2434 | #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE |
2807 | /* | ||
2808 | * pfn <-> page translation. out-of-line version. | ||
2809 | * (see asm-generic/memory_model.h) | ||
2810 | */ | ||
2811 | #if defined(CONFIG_FLATMEM) | ||
2812 | struct page *pfn_to_page(unsigned long pfn) | ||
2813 | { | ||
2814 | return mem_map + (pfn - ARCH_PFN_OFFSET); | ||
2815 | } | ||
2816 | unsigned long page_to_pfn(struct page *page) | ||
2817 | { | ||
2818 | return (page - mem_map) + ARCH_PFN_OFFSET; | ||
2819 | } | ||
2820 | #elif defined(CONFIG_DISCONTIGMEM) | ||
2821 | struct page *pfn_to_page(unsigned long pfn) | ||
2822 | { | ||
2823 | int nid = arch_pfn_to_nid(pfn); | ||
2824 | return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid); | ||
2825 | } | ||
2826 | unsigned long page_to_pfn(struct page *page) | ||
2827 | { | ||
2828 | struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); | ||
2829 | return (page - pgdat->node_mem_map) + pgdat->node_start_pfn; | ||
2830 | } | ||
2831 | #elif defined(CONFIG_SPARSEMEM) | ||
2832 | struct page *pfn_to_page(unsigned long pfn) | 2435 | struct page *pfn_to_page(unsigned long pfn) |
2833 | { | 2436 | { |
2834 | return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn; | 2437 | return __pfn_to_page(pfn); |
2835 | } | 2438 | } |
2836 | |||
2837 | unsigned long page_to_pfn(struct page *page) | 2439 | unsigned long page_to_pfn(struct page *page) |
2838 | { | 2440 | { |
2839 | long section_id = page_to_section(page); | 2441 | return __page_to_pfn(page); |
2840 | return page - __section_mem_map_addr(__nr_to_section(section_id)); | ||
2841 | } | 2442 | } |
2842 | #endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */ | ||
2843 | EXPORT_SYMBOL(pfn_to_page); | 2443 | EXPORT_SYMBOL(pfn_to_page); |
2844 | EXPORT_SYMBOL(page_to_pfn); | 2444 | EXPORT_SYMBOL(page_to_pfn); |
2845 | #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ | 2445 | #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ |