diff options
Diffstat (limited to 'include/linux/mm.h')
| -rw-r--r-- | include/linux/mm.h | 275 |
1 files changed, 229 insertions, 46 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b6e55ee8855..0548eb201e05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -50,6 +50,10 @@ extern int sysctl_legacy_va_layout; | |||
| 50 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
| 51 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
| 52 | 52 | ||
| 53 | #ifndef __pa_symbol | ||
| 54 | #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) | ||
| 55 | #endif | ||
| 56 | |||
| 53 | extern unsigned long sysctl_user_reserve_kbytes; | 57 | extern unsigned long sysctl_user_reserve_kbytes; |
| 54 | extern unsigned long sysctl_admin_reserve_kbytes; | 58 | extern unsigned long sysctl_admin_reserve_kbytes; |
| 55 | 59 | ||
| @@ -297,12 +301,26 @@ static inline int put_page_testzero(struct page *page) | |||
| 297 | /* | 301 | /* |
| 298 | * Try to grab a ref unless the page has a refcount of zero, return false if | 302 | * Try to grab a ref unless the page has a refcount of zero, return false if |
| 299 | * that is the case. | 303 | * that is the case. |
| 304 | * This can be called when MMU is off so it must not access | ||
| 305 | * any of the virtual mappings. | ||
| 300 | */ | 306 | */ |
| 301 | static inline int get_page_unless_zero(struct page *page) | 307 | static inline int get_page_unless_zero(struct page *page) |
| 302 | { | 308 | { |
| 303 | return atomic_inc_not_zero(&page->_count); | 309 | return atomic_inc_not_zero(&page->_count); |
| 304 | } | 310 | } |
| 305 | 311 | ||
| 312 | /* | ||
| 313 | * Try to drop a ref unless the page has a refcount of one, return false if | ||
| 314 | * that is the case. | ||
| 315 | * This is to make sure that the refcount won't become zero after this drop. | ||
| 316 | * This can be called when MMU is off so it must not access | ||
| 317 | * any of the virtual mappings. | ||
| 318 | */ | ||
| 319 | static inline int put_page_unless_one(struct page *page) | ||
| 320 | { | ||
| 321 | return atomic_add_unless(&page->_count, -1, 1); | ||
| 322 | } | ||
| 323 | |||
| 306 | extern int page_is_ram(unsigned long pfn); | 324 | extern int page_is_ram(unsigned long pfn); |
| 307 | 325 | ||
| 308 | /* Support for virtually mapped pages */ | 326 | /* Support for virtually mapped pages */ |
| @@ -581,11 +599,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
| 581 | * sets it, so none of the operations on it need to be atomic. | 599 | * sets it, so none of the operations on it need to be atomic. |
| 582 | */ | 600 | */ |
| 583 | 601 | ||
| 584 | /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */ | 602 | /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ |
| 585 | #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) | 603 | #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) |
| 586 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) | 604 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) |
| 587 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) | 605 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) |
| 588 | #define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH) | 606 | #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) |
| 589 | 607 | ||
| 590 | /* | 608 | /* |
| 591 | * Define the bit shifts to access each section. For non-existent | 609 | * Define the bit shifts to access each section. For non-existent |
| @@ -595,7 +613,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
| 595 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) | 613 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) |
| 596 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) | 614 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) |
| 597 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) | 615 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) |
| 598 | #define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0)) | 616 | #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) |
| 599 | 617 | ||
| 600 | /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ | 618 | /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ |
| 601 | #ifdef NODE_NOT_IN_PAGE_FLAGS | 619 | #ifdef NODE_NOT_IN_PAGE_FLAGS |
| @@ -617,7 +635,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
| 617 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) | 635 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) |
| 618 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) | 636 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) |
| 619 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) | 637 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) |
| 620 | #define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1) | 638 | #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) |
| 621 | #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) | 639 | #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) |
| 622 | 640 | ||
| 623 | static inline enum zone_type page_zonenum(const struct page *page) | 641 | static inline enum zone_type page_zonenum(const struct page *page) |
| @@ -661,51 +679,117 @@ static inline int page_to_nid(const struct page *page) | |||
| 661 | #endif | 679 | #endif |
| 662 | 680 | ||
| 663 | #ifdef CONFIG_NUMA_BALANCING | 681 | #ifdef CONFIG_NUMA_BALANCING |
| 664 | #ifdef LAST_NID_NOT_IN_PAGE_FLAGS | 682 | static inline int cpu_pid_to_cpupid(int cpu, int pid) |
| 665 | static inline int page_nid_xchg_last(struct page *page, int nid) | ||
| 666 | { | 683 | { |
| 667 | return xchg(&page->_last_nid, nid); | 684 | return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); |
| 668 | } | 685 | } |
| 669 | 686 | ||
| 670 | static inline int page_nid_last(struct page *page) | 687 | static inline int cpupid_to_pid(int cpupid) |
| 671 | { | 688 | { |
| 672 | return page->_last_nid; | 689 | return cpupid & LAST__PID_MASK; |
| 673 | } | 690 | } |
| 674 | static inline void page_nid_reset_last(struct page *page) | 691 | |
| 692 | static inline int cpupid_to_cpu(int cpupid) | ||
| 675 | { | 693 | { |
| 676 | page->_last_nid = -1; | 694 | return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK; |
| 677 | } | 695 | } |
| 678 | #else | 696 | |
| 679 | static inline int page_nid_last(struct page *page) | 697 | static inline int cpupid_to_nid(int cpupid) |
| 680 | { | 698 | { |
| 681 | return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; | 699 | return cpu_to_node(cpupid_to_cpu(cpupid)); |
| 682 | } | 700 | } |
| 683 | 701 | ||
| 684 | extern int page_nid_xchg_last(struct page *page, int nid); | 702 | static inline bool cpupid_pid_unset(int cpupid) |
| 703 | { | ||
| 704 | return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK); | ||
| 705 | } | ||
| 685 | 706 | ||
| 686 | static inline void page_nid_reset_last(struct page *page) | 707 | static inline bool cpupid_cpu_unset(int cpupid) |
| 687 | { | 708 | { |
| 688 | int nid = (1 << LAST_NID_SHIFT) - 1; | 709 | return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK); |
| 710 | } | ||
| 689 | 711 | ||
| 690 | page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); | 712 | static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) |
| 691 | page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; | 713 | { |
| 714 | return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid); | ||
| 715 | } | ||
| 716 | |||
| 717 | #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) | ||
| 718 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS | ||
| 719 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) | ||
| 720 | { | ||
| 721 | return xchg(&page->_last_cpupid, cpupid); | ||
| 722 | } | ||
| 723 | |||
| 724 | static inline int page_cpupid_last(struct page *page) | ||
| 725 | { | ||
| 726 | return page->_last_cpupid; | ||
| 727 | } | ||
| 728 | static inline void page_cpupid_reset_last(struct page *page) | ||
| 729 | { | ||
| 730 | page->_last_cpupid = -1; | ||
| 692 | } | 731 | } |
| 693 | #endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ | ||
| 694 | #else | 732 | #else |
| 695 | static inline int page_nid_xchg_last(struct page *page, int nid) | 733 | static inline int page_cpupid_last(struct page *page) |
| 734 | { | ||
| 735 | return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; | ||
| 736 | } | ||
| 737 | |||
| 738 | extern int page_cpupid_xchg_last(struct page *page, int cpupid); | ||
| 739 | |||
| 740 | static inline void page_cpupid_reset_last(struct page *page) | ||
| 696 | { | 741 | { |
| 697 | return page_to_nid(page); | 742 | int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; |
| 743 | |||
| 744 | page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); | ||
| 745 | page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; | ||
| 746 | } | ||
| 747 | #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ | ||
| 748 | #else /* !CONFIG_NUMA_BALANCING */ | ||
| 749 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) | ||
| 750 | { | ||
| 751 | return page_to_nid(page); /* XXX */ | ||
| 698 | } | 752 | } |
| 699 | 753 | ||
| 700 | static inline int page_nid_last(struct page *page) | 754 | static inline int page_cpupid_last(struct page *page) |
| 701 | { | 755 | { |
| 702 | return page_to_nid(page); | 756 | return page_to_nid(page); /* XXX */ |
| 703 | } | 757 | } |
| 704 | 758 | ||
| 705 | static inline void page_nid_reset_last(struct page *page) | 759 | static inline int cpupid_to_nid(int cpupid) |
| 706 | { | 760 | { |
| 761 | return -1; | ||
| 762 | } | ||
| 763 | |||
| 764 | static inline int cpupid_to_pid(int cpupid) | ||
| 765 | { | ||
| 766 | return -1; | ||
| 707 | } | 767 | } |
| 708 | #endif | 768 | |
| 769 | static inline int cpupid_to_cpu(int cpupid) | ||
| 770 | { | ||
| 771 | return -1; | ||
| 772 | } | ||
| 773 | |||
| 774 | static inline int cpu_pid_to_cpupid(int nid, int pid) | ||
| 775 | { | ||
| 776 | return -1; | ||
| 777 | } | ||
| 778 | |||
| 779 | static inline bool cpupid_pid_unset(int cpupid) | ||
| 780 | { | ||
| 781 | return 1; | ||
| 782 | } | ||
| 783 | |||
| 784 | static inline void page_cpupid_reset_last(struct page *page) | ||
| 785 | { | ||
| 786 | } | ||
| 787 | |||
| 788 | static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) | ||
| 789 | { | ||
| 790 | return false; | ||
| 791 | } | ||
| 792 | #endif /* CONFIG_NUMA_BALANCING */ | ||
| 709 | 793 | ||
| 710 | static inline struct zone *page_zone(const struct page *page) | 794 | static inline struct zone *page_zone(const struct page *page) |
| 711 | { | 795 | { |
| @@ -1232,32 +1316,85 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a | |||
| 1232 | } | 1316 | } |
| 1233 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ | 1317 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ |
| 1234 | 1318 | ||
| 1235 | #if USE_SPLIT_PTLOCKS | 1319 | #if USE_SPLIT_PTE_PTLOCKS |
| 1236 | /* | 1320 | #if BLOATED_SPINLOCKS |
| 1237 | * We tuck a spinlock to guard each pagetable page into its struct page, | 1321 | void __init ptlock_cache_init(void); |
| 1238 | * at page->private, with BUILD_BUG_ON to make sure that this will not | 1322 | extern bool ptlock_alloc(struct page *page); |
| 1239 | * overflow into the next struct page (as it might with DEBUG_SPINLOCK). | 1323 | extern void ptlock_free(struct page *page); |
| 1240 | * When freeing, reset page->mapping so free_pages_check won't complain. | 1324 | |
| 1241 | */ | 1325 | static inline spinlock_t *ptlock_ptr(struct page *page) |
| 1242 | #define __pte_lockptr(page) &((page)->ptl) | 1326 | { |
| 1243 | #define pte_lock_init(_page) do { \ | 1327 | return page->ptl; |
| 1244 | spin_lock_init(__pte_lockptr(_page)); \ | 1328 | } |
| 1245 | } while (0) | 1329 | #else /* BLOATED_SPINLOCKS */ |
| 1246 | #define pte_lock_deinit(page) ((page)->mapping = NULL) | 1330 | static inline void ptlock_cache_init(void) {} |
| 1247 | #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) | 1331 | static inline bool ptlock_alloc(struct page *page) |
| 1248 | #else /* !USE_SPLIT_PTLOCKS */ | 1332 | { |
| 1333 | return true; | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | static inline void ptlock_free(struct page *page) | ||
| 1337 | { | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | static inline spinlock_t *ptlock_ptr(struct page *page) | ||
| 1341 | { | ||
| 1342 | return &page->ptl; | ||
| 1343 | } | ||
| 1344 | #endif /* BLOATED_SPINLOCKS */ | ||
| 1345 | |||
| 1346 | static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) | ||
| 1347 | { | ||
| 1348 | return ptlock_ptr(pmd_page(*pmd)); | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | static inline bool ptlock_init(struct page *page) | ||
| 1352 | { | ||
| 1353 | /* | ||
| 1354 | * prep_new_page() initialize page->private (and therefore page->ptl) | ||
| 1355 | * with 0. Make sure nobody took it in use in between. | ||
| 1356 | * | ||
| 1357 | * It can happen if arch try to use slab for page table allocation: | ||
| 1358 | * slab code uses page->slab_cache and page->first_page (for tail | ||
| 1359 | * pages), which share storage with page->ptl. | ||
| 1360 | */ | ||
| 1361 | VM_BUG_ON(*(unsigned long *)&page->ptl); | ||
| 1362 | if (!ptlock_alloc(page)) | ||
| 1363 | return false; | ||
| 1364 | spin_lock_init(ptlock_ptr(page)); | ||
| 1365 | return true; | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | /* Reset page->mapping so free_pages_check won't complain. */ | ||
| 1369 | static inline void pte_lock_deinit(struct page *page) | ||
| 1370 | { | ||
| 1371 | page->mapping = NULL; | ||
| 1372 | ptlock_free(page); | ||
| 1373 | } | ||
| 1374 | |||
| 1375 | #else /* !USE_SPLIT_PTE_PTLOCKS */ | ||
| 1249 | /* | 1376 | /* |
| 1250 | * We use mm->page_table_lock to guard all pagetable pages of the mm. | 1377 | * We use mm->page_table_lock to guard all pagetable pages of the mm. |
| 1251 | */ | 1378 | */ |
| 1252 | #define pte_lock_init(page) do {} while (0) | 1379 | static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) |
| 1253 | #define pte_lock_deinit(page) do {} while (0) | 1380 | { |
| 1254 | #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) | 1381 | return &mm->page_table_lock; |
| 1255 | #endif /* USE_SPLIT_PTLOCKS */ | 1382 | } |
| 1383 | static inline void ptlock_cache_init(void) {} | ||
| 1384 | static inline bool ptlock_init(struct page *page) { return true; } | ||
| 1385 | static inline void pte_lock_deinit(struct page *page) {} | ||
| 1386 | #endif /* USE_SPLIT_PTE_PTLOCKS */ | ||
| 1387 | |||
| 1388 | static inline void pgtable_init(void) | ||
| 1389 | { | ||
| 1390 | ptlock_cache_init(); | ||
| 1391 | pgtable_cache_init(); | ||
| 1392 | } | ||
| 1256 | 1393 | ||
| 1257 | static inline void pgtable_page_ctor(struct page *page) | 1394 | static inline bool pgtable_page_ctor(struct page *page) |
| 1258 | { | 1395 | { |
| 1259 | pte_lock_init(page); | ||
| 1260 | inc_zone_page_state(page, NR_PAGETABLE); | 1396 | inc_zone_page_state(page, NR_PAGETABLE); |
| 1397 | return ptlock_init(page); | ||
| 1261 | } | 1398 | } |
| 1262 | 1399 | ||
| 1263 | static inline void pgtable_page_dtor(struct page *page) | 1400 | static inline void pgtable_page_dtor(struct page *page) |
| @@ -1294,6 +1431,52 @@ static inline void pgtable_page_dtor(struct page *page) | |||
| 1294 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ | 1431 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ |
| 1295 | NULL: pte_offset_kernel(pmd, address)) | 1432 | NULL: pte_offset_kernel(pmd, address)) |
| 1296 | 1433 | ||
| 1434 | #if USE_SPLIT_PMD_PTLOCKS | ||
| 1435 | |||
| 1436 | static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) | ||
| 1437 | { | ||
| 1438 | return ptlock_ptr(virt_to_page(pmd)); | ||
| 1439 | } | ||
| 1440 | |||
| 1441 | static inline bool pgtable_pmd_page_ctor(struct page *page) | ||
| 1442 | { | ||
| 1443 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 1444 | page->pmd_huge_pte = NULL; | ||
| 1445 | #endif | ||
| 1446 | return ptlock_init(page); | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | static inline void pgtable_pmd_page_dtor(struct page *page) | ||
| 1450 | { | ||
| 1451 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 1452 | VM_BUG_ON(page->pmd_huge_pte); | ||
| 1453 | #endif | ||
| 1454 | ptlock_free(page); | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | #define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) | ||
| 1458 | |||
| 1459 | #else | ||
| 1460 | |||
| 1461 | static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) | ||
| 1462 | { | ||
| 1463 | return &mm->page_table_lock; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } | ||
| 1467 | static inline void pgtable_pmd_page_dtor(struct page *page) {} | ||
| 1468 | |||
| 1469 | #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) | ||
| 1470 | |||
| 1471 | #endif | ||
| 1472 | |||
| 1473 | static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) | ||
| 1474 | { | ||
| 1475 | spinlock_t *ptl = pmd_lockptr(mm, pmd); | ||
| 1476 | spin_lock(ptl); | ||
| 1477 | return ptl; | ||
| 1478 | } | ||
| 1479 | |||
| 1297 | extern void free_area_init(unsigned long * zones_size); | 1480 | extern void free_area_init(unsigned long * zones_size); |
| 1298 | extern void free_area_init_node(int nid, unsigned long * zones_size, | 1481 | extern void free_area_init_node(int nid, unsigned long * zones_size, |
| 1299 | unsigned long zone_start_pfn, unsigned long *zholes_size); | 1482 | unsigned long zone_start_pfn, unsigned long *zholes_size); |
