diff options
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r-- | include/linux/mmzone.h | 219 |
1 files changed, 116 insertions, 103 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6cbd1b6c3d20..318df7051850 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -143,6 +143,7 @@ enum zone_stat_item { | |||
143 | NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ | 143 | NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ |
144 | NR_DIRTIED, /* page dirtyings since bootup */ | 144 | NR_DIRTIED, /* page dirtyings since bootup */ |
145 | NR_WRITTEN, /* page writings since bootup */ | 145 | NR_WRITTEN, /* page writings since bootup */ |
146 | NR_PAGES_SCANNED, /* pages scanned since last reclaim */ | ||
146 | #ifdef CONFIG_NUMA | 147 | #ifdef CONFIG_NUMA |
147 | NUMA_HIT, /* allocated in intended node */ | 148 | NUMA_HIT, /* allocated in intended node */ |
148 | NUMA_MISS, /* allocated in non intended node */ | 149 | NUMA_MISS, /* allocated in non intended node */ |
@@ -324,19 +325,12 @@ enum zone_type { | |||
324 | #ifndef __GENERATING_BOUNDS_H | 325 | #ifndef __GENERATING_BOUNDS_H |
325 | 326 | ||
326 | struct zone { | 327 | struct zone { |
327 | /* Fields commonly accessed by the page allocator */ | 328 | /* Read-mostly fields */ |
328 | 329 | ||
329 | /* zone watermarks, access with *_wmark_pages(zone) macros */ | 330 | /* zone watermarks, access with *_wmark_pages(zone) macros */ |
330 | unsigned long watermark[NR_WMARK]; | 331 | unsigned long watermark[NR_WMARK]; |
331 | 332 | ||
332 | /* | 333 | /* |
333 | * When free pages are below this point, additional steps are taken | ||
334 | * when reading the number of free pages to avoid per-cpu counter | ||
335 | * drift allowing watermarks to be breached | ||
336 | */ | ||
337 | unsigned long percpu_drift_mark; | ||
338 | |||
339 | /* | ||
340 | * We don't know if the memory that we're going to allocate will be freeable | 334 | * We don't know if the memory that we're going to allocate will be freeable |
341 | * or/and it will be released eventually, so to avoid totally wasting several | 335 | * or/and it will be released eventually, so to avoid totally wasting several |
342 | * GB of ram we must reserve some of the lower zone memory (otherwise we risk | 336 | * GB of ram we must reserve some of the lower zone memory (otherwise we risk |
@@ -344,41 +338,26 @@ struct zone { | |||
344 | * on the higher zones). This array is recalculated at runtime if the | 338 | * on the higher zones). This array is recalculated at runtime if the |
345 | * sysctl_lowmem_reserve_ratio sysctl changes. | 339 | * sysctl_lowmem_reserve_ratio sysctl changes. |
346 | */ | 340 | */ |
347 | unsigned long lowmem_reserve[MAX_NR_ZONES]; | 341 | long lowmem_reserve[MAX_NR_ZONES]; |
348 | |||
349 | /* | ||
350 | * This is a per-zone reserve of pages that should not be | ||
351 | * considered dirtyable memory. | ||
352 | */ | ||
353 | unsigned long dirty_balance_reserve; | ||
354 | 342 | ||
355 | #ifdef CONFIG_NUMA | 343 | #ifdef CONFIG_NUMA |
356 | int node; | 344 | int node; |
345 | #endif | ||
346 | |||
357 | /* | 347 | /* |
358 | * zone reclaim becomes active if more unmapped pages exist. | 348 | * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on |
349 | * this zone's LRU. Maintained by the pageout code. | ||
359 | */ | 350 | */ |
360 | unsigned long min_unmapped_pages; | 351 | unsigned int inactive_ratio; |
361 | unsigned long min_slab_pages; | 352 | |
362 | #endif | 353 | struct pglist_data *zone_pgdat; |
363 | struct per_cpu_pageset __percpu *pageset; | 354 | struct per_cpu_pageset __percpu *pageset; |
355 | |||
364 | /* | 356 | /* |
365 | * free areas of different sizes | 357 | * This is a per-zone reserve of pages that should not be |
358 | * considered dirtyable memory. | ||
366 | */ | 359 | */ |
367 | spinlock_t lock; | 360 | unsigned long dirty_balance_reserve; |
368 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
369 | /* Set to true when the PG_migrate_skip bits should be cleared */ | ||
370 | bool compact_blockskip_flush; | ||
371 | |||
372 | /* pfn where compaction free scanner should start */ | ||
373 | unsigned long compact_cached_free_pfn; | ||
374 | /* pfn where async and sync compaction migration scanner should start */ | ||
375 | unsigned long compact_cached_migrate_pfn[2]; | ||
376 | #endif | ||
377 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
378 | /* see spanned/present_pages for more description */ | ||
379 | seqlock_t span_seqlock; | ||
380 | #endif | ||
381 | struct free_area free_area[MAX_ORDER]; | ||
382 | 361 | ||
383 | #ifndef CONFIG_SPARSEMEM | 362 | #ifndef CONFIG_SPARSEMEM |
384 | /* | 363 | /* |
@@ -388,74 +367,14 @@ struct zone { | |||
388 | unsigned long *pageblock_flags; | 367 | unsigned long *pageblock_flags; |
389 | #endif /* CONFIG_SPARSEMEM */ | 368 | #endif /* CONFIG_SPARSEMEM */ |
390 | 369 | ||
391 | #ifdef CONFIG_COMPACTION | 370 | #ifdef CONFIG_NUMA |
392 | /* | ||
393 | * On compaction failure, 1<<compact_defer_shift compactions | ||
394 | * are skipped before trying again. The number attempted since | ||
395 | * last failure is tracked with compact_considered. | ||
396 | */ | ||
397 | unsigned int compact_considered; | ||
398 | unsigned int compact_defer_shift; | ||
399 | int compact_order_failed; | ||
400 | #endif | ||
401 | |||
402 | ZONE_PADDING(_pad1_) | ||
403 | |||
404 | /* Fields commonly accessed by the page reclaim scanner */ | ||
405 | spinlock_t lru_lock; | ||
406 | struct lruvec lruvec; | ||
407 | |||
408 | /* Evictions & activations on the inactive file list */ | ||
409 | atomic_long_t inactive_age; | ||
410 | |||
411 | unsigned long pages_scanned; /* since last reclaim */ | ||
412 | unsigned long flags; /* zone flags, see below */ | ||
413 | |||
414 | /* Zone statistics */ | ||
415 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | ||
416 | |||
417 | /* | ||
418 | * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on | ||
419 | * this zone's LRU. Maintained by the pageout code. | ||
420 | */ | ||
421 | unsigned int inactive_ratio; | ||
422 | |||
423 | |||
424 | ZONE_PADDING(_pad2_) | ||
425 | /* Rarely used or read-mostly fields */ | ||
426 | |||
427 | /* | 371 | /* |
428 | * wait_table -- the array holding the hash table | 372 | * zone reclaim becomes active if more unmapped pages exist. |
429 | * wait_table_hash_nr_entries -- the size of the hash table array | ||
430 | * wait_table_bits -- wait_table_size == (1 << wait_table_bits) | ||
431 | * | ||
432 | * The purpose of all these is to keep track of the people | ||
433 | * waiting for a page to become available and make them | ||
434 | * runnable again when possible. The trouble is that this | ||
435 | * consumes a lot of space, especially when so few things | ||
436 | * wait on pages at a given time. So instead of using | ||
437 | * per-page waitqueues, we use a waitqueue hash table. | ||
438 | * | ||
439 | * The bucket discipline is to sleep on the same queue when | ||
440 | * colliding and wake all in that wait queue when removing. | ||
441 | * When something wakes, it must check to be sure its page is | ||
442 | * truly available, a la thundering herd. The cost of a | ||
443 | * collision is great, but given the expected load of the | ||
444 | * table, they should be so rare as to be outweighed by the | ||
445 | * benefits from the saved space. | ||
446 | * | ||
447 | * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the | ||
448 | * primary users of these fields, and in mm/page_alloc.c | ||
449 | * free_area_init_core() performs the initialization of them. | ||
450 | */ | 373 | */ |
451 | wait_queue_head_t * wait_table; | 374 | unsigned long min_unmapped_pages; |
452 | unsigned long wait_table_hash_nr_entries; | 375 | unsigned long min_slab_pages; |
453 | unsigned long wait_table_bits; | 376 | #endif /* CONFIG_NUMA */ |
454 | 377 | ||
455 | /* | ||
456 | * Discontig memory support fields. | ||
457 | */ | ||
458 | struct pglist_data *zone_pgdat; | ||
459 | /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ | 378 | /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ |
460 | unsigned long zone_start_pfn; | 379 | unsigned long zone_start_pfn; |
461 | 380 | ||
@@ -500,9 +419,11 @@ struct zone { | |||
500 | * adjust_managed_page_count() should be used instead of directly | 419 | * adjust_managed_page_count() should be used instead of directly |
501 | * touching zone->managed_pages and totalram_pages. | 420 | * touching zone->managed_pages and totalram_pages. |
502 | */ | 421 | */ |
422 | unsigned long managed_pages; | ||
503 | unsigned long spanned_pages; | 423 | unsigned long spanned_pages; |
504 | unsigned long present_pages; | 424 | unsigned long present_pages; |
505 | unsigned long managed_pages; | 425 | |
426 | const char *name; | ||
506 | 427 | ||
507 | /* | 428 | /* |
508 | * Number of MIGRATE_RESEVE page block. To maintain for just | 429 | * Number of MIGRATE_RESEVE page block. To maintain for just |
@@ -510,10 +431,94 @@ struct zone { | |||
510 | */ | 431 | */ |
511 | int nr_migrate_reserve_block; | 432 | int nr_migrate_reserve_block; |
512 | 433 | ||
434 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
435 | /* see spanned/present_pages for more description */ | ||
436 | seqlock_t span_seqlock; | ||
437 | #endif | ||
438 | |||
513 | /* | 439 | /* |
514 | * rarely used fields: | 440 | * wait_table -- the array holding the hash table |
441 | * wait_table_hash_nr_entries -- the size of the hash table array | ||
442 | * wait_table_bits -- wait_table_size == (1 << wait_table_bits) | ||
443 | * | ||
444 | * The purpose of all these is to keep track of the people | ||
445 | * waiting for a page to become available and make them | ||
446 | * runnable again when possible. The trouble is that this | ||
447 | * consumes a lot of space, especially when so few things | ||
448 | * wait on pages at a given time. So instead of using | ||
449 | * per-page waitqueues, we use a waitqueue hash table. | ||
450 | * | ||
451 | * The bucket discipline is to sleep on the same queue when | ||
452 | * colliding and wake all in that wait queue when removing. | ||
453 | * When something wakes, it must check to be sure its page is | ||
454 | * truly available, a la thundering herd. The cost of a | ||
455 | * collision is great, but given the expected load of the | ||
456 | * table, they should be so rare as to be outweighed by the | ||
457 | * benefits from the saved space. | ||
458 | * | ||
459 | * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the | ||
460 | * primary users of these fields, and in mm/page_alloc.c | ||
461 | * free_area_init_core() performs the initialization of them. | ||
515 | */ | 462 | */ |
516 | const char *name; | 463 | wait_queue_head_t *wait_table; |
464 | unsigned long wait_table_hash_nr_entries; | ||
465 | unsigned long wait_table_bits; | ||
466 | |||
467 | ZONE_PADDING(_pad1_) | ||
468 | |||
469 | /* Write-intensive fields used from the page allocator */ | ||
470 | spinlock_t lock; | ||
471 | |||
472 | /* free areas of different sizes */ | ||
473 | struct free_area free_area[MAX_ORDER]; | ||
474 | |||
475 | /* zone flags, see below */ | ||
476 | unsigned long flags; | ||
477 | |||
478 | ZONE_PADDING(_pad2_) | ||
479 | |||
480 | /* Write-intensive fields used by page reclaim */ | ||
481 | |||
482 | /* Fields commonly accessed by the page reclaim scanner */ | ||
483 | spinlock_t lru_lock; | ||
484 | struct lruvec lruvec; | ||
485 | |||
486 | /* Evictions & activations on the inactive file list */ | ||
487 | atomic_long_t inactive_age; | ||
488 | |||
489 | /* | ||
490 | * When free pages are below this point, additional steps are taken | ||
491 | * when reading the number of free pages to avoid per-cpu counter | ||
492 | * drift allowing watermarks to be breached | ||
493 | */ | ||
494 | unsigned long percpu_drift_mark; | ||
495 | |||
496 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
497 | /* pfn where compaction free scanner should start */ | ||
498 | unsigned long compact_cached_free_pfn; | ||
499 | /* pfn where async and sync compaction migration scanner should start */ | ||
500 | unsigned long compact_cached_migrate_pfn[2]; | ||
501 | #endif | ||
502 | |||
503 | #ifdef CONFIG_COMPACTION | ||
504 | /* | ||
505 | * On compaction failure, 1<<compact_defer_shift compactions | ||
506 | * are skipped before trying again. The number attempted since | ||
507 | * last failure is tracked with compact_considered. | ||
508 | */ | ||
509 | unsigned int compact_considered; | ||
510 | unsigned int compact_defer_shift; | ||
511 | int compact_order_failed; | ||
512 | #endif | ||
513 | |||
514 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
515 | /* Set to true when the PG_migrate_skip bits should be cleared */ | ||
516 | bool compact_blockskip_flush; | ||
517 | #endif | ||
518 | |||
519 | ZONE_PADDING(_pad3_) | ||
520 | /* Zone statistics */ | ||
521 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | ||
517 | } ____cacheline_internodealigned_in_smp; | 522 | } ____cacheline_internodealigned_in_smp; |
518 | 523 | ||
519 | typedef enum { | 524 | typedef enum { |
@@ -529,6 +534,7 @@ typedef enum { | |||
529 | ZONE_WRITEBACK, /* reclaim scanning has recently found | 534 | ZONE_WRITEBACK, /* reclaim scanning has recently found |
530 | * many pages under writeback | 535 | * many pages under writeback |
531 | */ | 536 | */ |
537 | ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ | ||
532 | } zone_flags_t; | 538 | } zone_flags_t; |
533 | 539 | ||
534 | static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) | 540 | static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) |
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone) | |||
566 | return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); | 572 | return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); |
567 | } | 573 | } |
568 | 574 | ||
575 | static inline int zone_is_fair_depleted(const struct zone *zone) | ||
576 | { | ||
577 | return test_bit(ZONE_FAIR_DEPLETED, &zone->flags); | ||
578 | } | ||
579 | |||
569 | static inline int zone_is_oom_locked(const struct zone *zone) | 580 | static inline int zone_is_oom_locked(const struct zone *zone) |
570 | { | 581 | { |
571 | return test_bit(ZONE_OOM_LOCKED, &zone->flags); | 582 | return test_bit(ZONE_OOM_LOCKED, &zone->flags); |
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void) | |||
872 | { | 883 | { |
873 | #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) | 884 | #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) |
874 | return movable_zone == ZONE_HIGHMEM; | 885 | return movable_zone == ZONE_HIGHMEM; |
886 | #elif defined(CONFIG_HIGHMEM) | ||
887 | return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; | ||
875 | #else | 888 | #else |
876 | return 0; | 889 | return 0; |
877 | #endif | 890 | #endif |