diff options
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r-- | include/linux/mm.h | 266 |
1 files changed, 220 insertions, 46 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b6e55ee8855..35527173cf50 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -50,6 +50,10 @@ extern int sysctl_legacy_va_layout; | |||
50 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
51 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
52 | 52 | ||
53 | #ifndef __pa_symbol | ||
54 | #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) | ||
55 | #endif | ||
56 | |||
53 | extern unsigned long sysctl_user_reserve_kbytes; | 57 | extern unsigned long sysctl_user_reserve_kbytes; |
54 | extern unsigned long sysctl_admin_reserve_kbytes; | 58 | extern unsigned long sysctl_admin_reserve_kbytes; |
55 | 59 | ||
@@ -297,12 +301,26 @@ static inline int put_page_testzero(struct page *page) | |||
297 | /* | 301 | /* |
298 | * Try to grab a ref unless the page has a refcount of zero, return false if | 302 | * Try to grab a ref unless the page has a refcount of zero, return false if |
299 | * that is the case. | 303 | * that is the case. |
304 | * This can be called when MMU is off so it must not access | ||
305 | * any of the virtual mappings. | ||
300 | */ | 306 | */ |
301 | static inline int get_page_unless_zero(struct page *page) | 307 | static inline int get_page_unless_zero(struct page *page) |
302 | { | 308 | { |
303 | return atomic_inc_not_zero(&page->_count); | 309 | return atomic_inc_not_zero(&page->_count); |
304 | } | 310 | } |
305 | 311 | ||
312 | /* | ||
313 | * Try to drop a ref unless the page has a refcount of one, return false if | ||
314 | * that is the case. | ||
315 | * This is to make sure that the refcount won't become zero after this drop. | ||
316 | * This can be called when MMU is off so it must not access | ||
317 | * any of the virtual mappings. | ||
318 | */ | ||
319 | static inline int put_page_unless_one(struct page *page) | ||
320 | { | ||
321 | return atomic_add_unless(&page->_count, -1, 1); | ||
322 | } | ||
323 | |||
306 | extern int page_is_ram(unsigned long pfn); | 324 | extern int page_is_ram(unsigned long pfn); |
307 | 325 | ||
308 | /* Support for virtually mapped pages */ | 326 | /* Support for virtually mapped pages */ |
@@ -581,11 +599,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
581 | * sets it, so none of the operations on it need to be atomic. | 599 | * sets it, so none of the operations on it need to be atomic. |
582 | */ | 600 | */ |
583 | 601 | ||
584 | /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */ | 602 | /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ |
585 | #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) | 603 | #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) |
586 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) | 604 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) |
587 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) | 605 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) |
588 | #define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH) | 606 | #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) |
589 | 607 | ||
590 | /* | 608 | /* |
591 | * Define the bit shifts to access each section. For non-existent | 609 | * Define the bit shifts to access each section. For non-existent |
@@ -595,7 +613,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
595 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) | 613 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) |
596 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) | 614 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) |
597 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) | 615 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) |
598 | #define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0)) | 616 | #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) |
599 | 617 | ||
600 | /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ | 618 | /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ |
601 | #ifdef NODE_NOT_IN_PAGE_FLAGS | 619 | #ifdef NODE_NOT_IN_PAGE_FLAGS |
@@ -617,7 +635,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
617 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) | 635 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) |
618 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) | 636 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) |
619 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) | 637 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) |
620 | #define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1) | 638 | #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) |
621 | #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) | 639 | #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) |
622 | 640 | ||
623 | static inline enum zone_type page_zonenum(const struct page *page) | 641 | static inline enum zone_type page_zonenum(const struct page *page) |
@@ -661,51 +679,117 @@ static inline int page_to_nid(const struct page *page) | |||
661 | #endif | 679 | #endif |
662 | 680 | ||
663 | #ifdef CONFIG_NUMA_BALANCING | 681 | #ifdef CONFIG_NUMA_BALANCING |
664 | #ifdef LAST_NID_NOT_IN_PAGE_FLAGS | 682 | static inline int cpu_pid_to_cpupid(int cpu, int pid) |
665 | static inline int page_nid_xchg_last(struct page *page, int nid) | ||
666 | { | 683 | { |
667 | return xchg(&page->_last_nid, nid); | 684 | return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); |
668 | } | 685 | } |
669 | 686 | ||
670 | static inline int page_nid_last(struct page *page) | 687 | static inline int cpupid_to_pid(int cpupid) |
671 | { | 688 | { |
672 | return page->_last_nid; | 689 | return cpupid & LAST__PID_MASK; |
673 | } | 690 | } |
674 | static inline void page_nid_reset_last(struct page *page) | 691 | |
692 | static inline int cpupid_to_cpu(int cpupid) | ||
675 | { | 693 | { |
676 | page->_last_nid = -1; | 694 | return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK; |
677 | } | 695 | } |
678 | #else | 696 | |
679 | static inline int page_nid_last(struct page *page) | 697 | static inline int cpupid_to_nid(int cpupid) |
698 | { | ||
699 | return cpu_to_node(cpupid_to_cpu(cpupid)); | ||
700 | } | ||
701 | |||
702 | static inline bool cpupid_pid_unset(int cpupid) | ||
680 | { | 703 | { |
681 | return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; | 704 | return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK); |
682 | } | 705 | } |
683 | 706 | ||
684 | extern int page_nid_xchg_last(struct page *page, int nid); | 707 | static inline bool cpupid_cpu_unset(int cpupid) |
708 | { | ||
709 | return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK); | ||
710 | } | ||
685 | 711 | ||
686 | static inline void page_nid_reset_last(struct page *page) | 712 | static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) |
687 | { | 713 | { |
688 | int nid = (1 << LAST_NID_SHIFT) - 1; | 714 | return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid); |
715 | } | ||
689 | 716 | ||
690 | page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); | 717 | #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) |
691 | page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; | 718 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
719 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) | ||
720 | { | ||
721 | return xchg(&page->_last_cpupid, cpupid); | ||
722 | } | ||
723 | |||
724 | static inline int page_cpupid_last(struct page *page) | ||
725 | { | ||
726 | return page->_last_cpupid; | ||
727 | } | ||
728 | static inline void page_cpupid_reset_last(struct page *page) | ||
729 | { | ||
730 | page->_last_cpupid = -1; | ||
692 | } | 731 | } |
693 | #endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ | ||
694 | #else | 732 | #else |
695 | static inline int page_nid_xchg_last(struct page *page, int nid) | 733 | static inline int page_cpupid_last(struct page *page) |
734 | { | ||
735 | return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; | ||
736 | } | ||
737 | |||
738 | extern int page_cpupid_xchg_last(struct page *page, int cpupid); | ||
739 | |||
740 | static inline void page_cpupid_reset_last(struct page *page) | ||
741 | { | ||
742 | int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; | ||
743 | |||
744 | page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); | ||
745 | page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; | ||
746 | } | ||
747 | #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ | ||
748 | #else /* !CONFIG_NUMA_BALANCING */ | ||
749 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) | ||
696 | { | 750 | { |
697 | return page_to_nid(page); | 751 | return page_to_nid(page); /* XXX */ |
698 | } | 752 | } |
699 | 753 | ||
700 | static inline int page_nid_last(struct page *page) | 754 | static inline int page_cpupid_last(struct page *page) |
701 | { | 755 | { |
702 | return page_to_nid(page); | 756 | return page_to_nid(page); /* XXX */ |
703 | } | 757 | } |
704 | 758 | ||
705 | static inline void page_nid_reset_last(struct page *page) | 759 | static inline int cpupid_to_nid(int cpupid) |
760 | { | ||
761 | return -1; | ||
762 | } | ||
763 | |||
764 | static inline int cpupid_to_pid(int cpupid) | ||
765 | { | ||
766 | return -1; | ||
767 | } | ||
768 | |||
769 | static inline int cpupid_to_cpu(int cpupid) | ||
770 | { | ||
771 | return -1; | ||
772 | } | ||
773 | |||
774 | static inline int cpu_pid_to_cpupid(int nid, int pid) | ||
775 | { | ||
776 | return -1; | ||
777 | } | ||
778 | |||
779 | static inline bool cpupid_pid_unset(int cpupid) | ||
780 | { | ||
781 | return 1; | ||
782 | } | ||
783 | |||
784 | static inline void page_cpupid_reset_last(struct page *page) | ||
706 | { | 785 | { |
707 | } | 786 | } |
708 | #endif | 787 | |
788 | static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) | ||
789 | { | ||
790 | return false; | ||
791 | } | ||
792 | #endif /* CONFIG_NUMA_BALANCING */ | ||
709 | 793 | ||
710 | static inline struct zone *page_zone(const struct page *page) | 794 | static inline struct zone *page_zone(const struct page *page) |
711 | { | 795 | { |
@@ -1232,32 +1316,76 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a | |||
1232 | } | 1316 | } |
1233 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ | 1317 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ |
1234 | 1318 | ||
1235 | #if USE_SPLIT_PTLOCKS | 1319 | #if USE_SPLIT_PTE_PTLOCKS |
1236 | /* | 1320 | #if ALLOC_SPLIT_PTLOCKS |
1237 | * We tuck a spinlock to guard each pagetable page into its struct page, | 1321 | extern bool ptlock_alloc(struct page *page); |
1238 | * at page->private, with BUILD_BUG_ON to make sure that this will not | 1322 | extern void ptlock_free(struct page *page); |
1239 | * overflow into the next struct page (as it might with DEBUG_SPINLOCK). | 1323 | |
1240 | * When freeing, reset page->mapping so free_pages_check won't complain. | 1324 | static inline spinlock_t *ptlock_ptr(struct page *page) |
1241 | */ | 1325 | { |
1242 | #define __pte_lockptr(page) &((page)->ptl) | 1326 | return page->ptl; |
1243 | #define pte_lock_init(_page) do { \ | 1327 | } |
1244 | spin_lock_init(__pte_lockptr(_page)); \ | 1328 | #else /* ALLOC_SPLIT_PTLOCKS */ |
1245 | } while (0) | 1329 | static inline bool ptlock_alloc(struct page *page) |
1246 | #define pte_lock_deinit(page) ((page)->mapping = NULL) | 1330 | { |
1247 | #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) | 1331 | return true; |
1248 | #else /* !USE_SPLIT_PTLOCKS */ | 1332 | } |
1333 | |||
1334 | static inline void ptlock_free(struct page *page) | ||
1335 | { | ||
1336 | } | ||
1337 | |||
1338 | static inline spinlock_t *ptlock_ptr(struct page *page) | ||
1339 | { | ||
1340 | return &page->ptl; | ||
1341 | } | ||
1342 | #endif /* ALLOC_SPLIT_PTLOCKS */ | ||
1343 | |||
1344 | static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) | ||
1345 | { | ||
1346 | return ptlock_ptr(pmd_page(*pmd)); | ||
1347 | } | ||
1348 | |||
1349 | static inline bool ptlock_init(struct page *page) | ||
1350 | { | ||
1351 | /* | ||
1352 | * prep_new_page() initialize page->private (and therefore page->ptl) | ||
1353 | * with 0. Make sure nobody took it in use in between. | ||
1354 | * | ||
1355 | * It can happen if arch try to use slab for page table allocation: | ||
1356 | * slab code uses page->slab_cache and page->first_page (for tail | ||
1357 | * pages), which share storage with page->ptl. | ||
1358 | */ | ||
1359 | VM_BUG_ON(*(unsigned long *)&page->ptl); | ||
1360 | if (!ptlock_alloc(page)) | ||
1361 | return false; | ||
1362 | spin_lock_init(ptlock_ptr(page)); | ||
1363 | return true; | ||
1364 | } | ||
1365 | |||
1366 | /* Reset page->mapping so free_pages_check won't complain. */ | ||
1367 | static inline void pte_lock_deinit(struct page *page) | ||
1368 | { | ||
1369 | page->mapping = NULL; | ||
1370 | ptlock_free(page); | ||
1371 | } | ||
1372 | |||
1373 | #else /* !USE_SPLIT_PTE_PTLOCKS */ | ||
1249 | /* | 1374 | /* |
1250 | * We use mm->page_table_lock to guard all pagetable pages of the mm. | 1375 | * We use mm->page_table_lock to guard all pagetable pages of the mm. |
1251 | */ | 1376 | */ |
1252 | #define pte_lock_init(page) do {} while (0) | 1377 | static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) |
1253 | #define pte_lock_deinit(page) do {} while (0) | 1378 | { |
1254 | #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) | 1379 | return &mm->page_table_lock; |
1255 | #endif /* USE_SPLIT_PTLOCKS */ | 1380 | } |
1381 | static inline bool ptlock_init(struct page *page) { return true; } | ||
1382 | static inline void pte_lock_deinit(struct page *page) {} | ||
1383 | #endif /* USE_SPLIT_PTE_PTLOCKS */ | ||
1256 | 1384 | ||
1257 | static inline void pgtable_page_ctor(struct page *page) | 1385 | static inline bool pgtable_page_ctor(struct page *page) |
1258 | { | 1386 | { |
1259 | pte_lock_init(page); | ||
1260 | inc_zone_page_state(page, NR_PAGETABLE); | 1387 | inc_zone_page_state(page, NR_PAGETABLE); |
1388 | return ptlock_init(page); | ||
1261 | } | 1389 | } |
1262 | 1390 | ||
1263 | static inline void pgtable_page_dtor(struct page *page) | 1391 | static inline void pgtable_page_dtor(struct page *page) |
@@ -1294,6 +1422,52 @@ static inline void pgtable_page_dtor(struct page *page) | |||
1294 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ | 1422 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ |
1295 | NULL: pte_offset_kernel(pmd, address)) | 1423 | NULL: pte_offset_kernel(pmd, address)) |
1296 | 1424 | ||
1425 | #if USE_SPLIT_PMD_PTLOCKS | ||
1426 | |||
1427 | static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) | ||
1428 | { | ||
1429 | return ptlock_ptr(virt_to_page(pmd)); | ||
1430 | } | ||
1431 | |||
1432 | static inline bool pgtable_pmd_page_ctor(struct page *page) | ||
1433 | { | ||
1434 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1435 | page->pmd_huge_pte = NULL; | ||
1436 | #endif | ||
1437 | return ptlock_init(page); | ||
1438 | } | ||
1439 | |||
1440 | static inline void pgtable_pmd_page_dtor(struct page *page) | ||
1441 | { | ||
1442 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1443 | VM_BUG_ON(page->pmd_huge_pte); | ||
1444 | #endif | ||
1445 | ptlock_free(page); | ||
1446 | } | ||
1447 | |||
1448 | #define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) | ||
1449 | |||
1450 | #else | ||
1451 | |||
1452 | static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) | ||
1453 | { | ||
1454 | return &mm->page_table_lock; | ||
1455 | } | ||
1456 | |||
1457 | static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } | ||
1458 | static inline void pgtable_pmd_page_dtor(struct page *page) {} | ||
1459 | |||
1460 | #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) | ||
1461 | |||
1462 | #endif | ||
1463 | |||
1464 | static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) | ||
1465 | { | ||
1466 | spinlock_t *ptl = pmd_lockptr(mm, pmd); | ||
1467 | spin_lock(ptl); | ||
1468 | return ptl; | ||
1469 | } | ||
1470 | |||
1297 | extern void free_area_init(unsigned long * zones_size); | 1471 | extern void free_area_init(unsigned long * zones_size); |
1298 | extern void free_area_init_node(int nid, unsigned long * zones_size, | 1472 | extern void free_area_init_node(int nid, unsigned long * zones_size, |
1299 | unsigned long zone_start_pfn, unsigned long *zholes_size); | 1473 | unsigned long zone_start_pfn, unsigned long *zholes_size); |