diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 18 | ||||
-rw-r--r-- | mm/migrate.c | 128 | ||||
-rw-r--r-- | mm/mprotect.c | 23 | ||||
-rw-r--r-- | mm/rmap.c | 38 | ||||
-rw-r--r-- | mm/swapfile.c | 20 |
5 files changed, 195 insertions, 32 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7e3683fd4f3c..11673c5d2c20 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -434,7 +434,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
434 | /* pte contains position in swap or file, so copy. */ | 434 | /* pte contains position in swap or file, so copy. */ |
435 | if (unlikely(!pte_present(pte))) { | 435 | if (unlikely(!pte_present(pte))) { |
436 | if (!pte_file(pte)) { | 436 | if (!pte_file(pte)) { |
437 | swap_duplicate(pte_to_swp_entry(pte)); | 437 | swp_entry_t entry = pte_to_swp_entry(pte); |
438 | |||
439 | swap_duplicate(entry); | ||
438 | /* make sure dst_mm is on swapoff's mmlist. */ | 440 | /* make sure dst_mm is on swapoff's mmlist. */ |
439 | if (unlikely(list_empty(&dst_mm->mmlist))) { | 441 | if (unlikely(list_empty(&dst_mm->mmlist))) { |
440 | spin_lock(&mmlist_lock); | 442 | spin_lock(&mmlist_lock); |
@@ -443,6 +445,16 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
443 | &src_mm->mmlist); | 445 | &src_mm->mmlist); |
444 | spin_unlock(&mmlist_lock); | 446 | spin_unlock(&mmlist_lock); |
445 | } | 447 | } |
448 | if (is_write_migration_entry(entry) && | ||
449 | is_cow_mapping(vm_flags)) { | ||
450 | /* | ||
451 | * COW mappings require pages in both parent | ||
452 | * and child to be set to read. | ||
453 | */ | ||
454 | make_migration_entry_read(&entry); | ||
455 | pte = swp_entry_to_pte(entry); | ||
456 | set_pte_at(src_mm, addr, src_pte, pte); | ||
457 | } | ||
446 | } | 458 | } |
447 | goto out_set_pte; | 459 | goto out_set_pte; |
448 | } | 460 | } |
@@ -1879,6 +1891,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1879 | goto out; | 1891 | goto out; |
1880 | 1892 | ||
1881 | entry = pte_to_swp_entry(orig_pte); | 1893 | entry = pte_to_swp_entry(orig_pte); |
1894 | if (is_migration_entry(entry)) { | ||
1895 | migration_entry_wait(mm, pmd, address); | ||
1896 | goto out; | ||
1897 | } | ||
1882 | page = lookup_swap_cache(entry); | 1898 | page = lookup_swap_cache(entry); |
1883 | if (!page) { | 1899 | if (!page) { |
1884 | swapin_readahead(entry, address, vma); | 1900 | swapin_readahead(entry, address, vma); |
diff --git a/mm/migrate.c b/mm/migrate.c index 5a340f4ca212..0a011e421bb4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/migrate.h> | 15 | #include <linux/migrate.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
18 | #include <linux/swapops.h> | ||
18 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
19 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
20 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
@@ -23,7 +24,6 @@ | |||
23 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
24 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
25 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
26 | #include <linux/swapops.h> | ||
27 | 27 | ||
28 | #include "internal.h" | 28 | #include "internal.h" |
29 | 29 | ||
@@ -119,6 +119,132 @@ int putback_lru_pages(struct list_head *l) | |||
119 | return count; | 119 | return count; |
120 | } | 120 | } |
121 | 121 | ||
122 | static inline int is_swap_pte(pte_t pte) | ||
123 | { | ||
124 | return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Restore a potential migration pte to a working pte entry | ||
129 | */ | ||
130 | static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | ||
131 | struct page *old, struct page *new) | ||
132 | { | ||
133 | struct mm_struct *mm = vma->vm_mm; | ||
134 | swp_entry_t entry; | ||
135 | pgd_t *pgd; | ||
136 | pud_t *pud; | ||
137 | pmd_t *pmd; | ||
138 | pte_t *ptep, pte; | ||
139 | spinlock_t *ptl; | ||
140 | |||
141 | pgd = pgd_offset(mm, addr); | ||
142 | if (!pgd_present(*pgd)) | ||
143 | return; | ||
144 | |||
145 | pud = pud_offset(pgd, addr); | ||
146 | if (!pud_present(*pud)) | ||
147 | return; | ||
148 | |||
149 | pmd = pmd_offset(pud, addr); | ||
150 | if (!pmd_present(*pmd)) | ||
151 | return; | ||
152 | |||
153 | ptep = pte_offset_map(pmd, addr); | ||
154 | |||
155 | if (!is_swap_pte(*ptep)) { | ||
156 | pte_unmap(ptep); | ||
157 | return; | ||
158 | } | ||
159 | |||
160 | ptl = pte_lockptr(mm, pmd); | ||
161 | spin_lock(ptl); | ||
162 | pte = *ptep; | ||
163 | if (!is_swap_pte(pte)) | ||
164 | goto out; | ||
165 | |||
166 | entry = pte_to_swp_entry(pte); | ||
167 | |||
168 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | ||
169 | goto out; | ||
170 | |||
171 | inc_mm_counter(mm, anon_rss); | ||
172 | get_page(new); | ||
173 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | ||
174 | if (is_write_migration_entry(entry)) | ||
175 | pte = pte_mkwrite(pte); | ||
176 | set_pte_at(mm, addr, ptep, pte); | ||
177 | page_add_anon_rmap(new, vma, addr); | ||
178 | out: | ||
179 | pte_unmap_unlock(ptep, ptl); | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Get rid of all migration entries and replace them by | ||
184 | * references to the indicated page. | ||
185 | * | ||
186 | * Must hold mmap_sem lock on at least one of the vmas containing | ||
187 | * the page so that the anon_vma cannot vanish. | ||
188 | */ | ||
189 | static void remove_migration_ptes(struct page *old, struct page *new) | ||
190 | { | ||
191 | struct anon_vma *anon_vma; | ||
192 | struct vm_area_struct *vma; | ||
193 | unsigned long mapping; | ||
194 | |||
195 | mapping = (unsigned long)new->mapping; | ||
196 | |||
197 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) | ||
198 | return; | ||
199 | |||
200 | /* | ||
201 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
202 | */ | ||
203 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); | ||
204 | spin_lock(&anon_vma->lock); | ||
205 | |||
206 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
207 | remove_migration_pte(vma, page_address_in_vma(new, vma), | ||
208 | old, new); | ||
209 | |||
210 | spin_unlock(&anon_vma->lock); | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Something used the pte of a page under migration. We need to | ||
215 | * get to the page and wait until migration is finished. | ||
216 | * When we return from this function the fault will be retried. | ||
217 | * | ||
218 | * This function is called from do_swap_page(). | ||
219 | */ | ||
220 | void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | ||
221 | unsigned long address) | ||
222 | { | ||
223 | pte_t *ptep, pte; | ||
224 | spinlock_t *ptl; | ||
225 | swp_entry_t entry; | ||
226 | struct page *page; | ||
227 | |||
228 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
229 | pte = *ptep; | ||
230 | if (!is_swap_pte(pte)) | ||
231 | goto out; | ||
232 | |||
233 | entry = pte_to_swp_entry(pte); | ||
234 | if (!is_migration_entry(entry)) | ||
235 | goto out; | ||
236 | |||
237 | page = migration_entry_to_page(entry); | ||
238 | |||
239 | get_page(page); | ||
240 | pte_unmap_unlock(ptep, ptl); | ||
241 | wait_on_page_locked(page); | ||
242 | put_page(page); | ||
243 | return; | ||
244 | out: | ||
245 | pte_unmap_unlock(ptep, ptl); | ||
246 | } | ||
247 | |||
122 | /* | 248 | /* |
123 | * swapout a single page | 249 | * swapout a single page |
124 | * page is locked upon entry, unlocked on exit | 250 | * page is locked upon entry, unlocked on exit |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 5faf01ad3ef8..14f93e62270f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -19,7 +19,8 @@ | |||
19 | #include <linux/mempolicy.h> | 19 | #include <linux/mempolicy.h> |
20 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
21 | #include <linux/syscalls.h> | 21 | #include <linux/syscalls.h> |
22 | 22 | #include <linux/swap.h> | |
23 | #include <linux/swapops.h> | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | #include <asm/pgtable.h> | 25 | #include <asm/pgtable.h> |
25 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
@@ -28,12 +29,13 @@ | |||
28 | static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | 29 | static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, |
29 | unsigned long addr, unsigned long end, pgprot_t newprot) | 30 | unsigned long addr, unsigned long end, pgprot_t newprot) |
30 | { | 31 | { |
31 | pte_t *pte; | 32 | pte_t *pte, oldpte; |
32 | spinlock_t *ptl; | 33 | spinlock_t *ptl; |
33 | 34 | ||
34 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 35 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
35 | do { | 36 | do { |
36 | if (pte_present(*pte)) { | 37 | oldpte = *pte; |
38 | if (pte_present(oldpte)) { | ||
37 | pte_t ptent; | 39 | pte_t ptent; |
38 | 40 | ||
39 | /* Avoid an SMP race with hardware updated dirty/clean | 41 | /* Avoid an SMP race with hardware updated dirty/clean |
@@ -43,7 +45,22 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
43 | ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot); | 45 | ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot); |
44 | set_pte_at(mm, addr, pte, ptent); | 46 | set_pte_at(mm, addr, pte, ptent); |
45 | lazy_mmu_prot_update(ptent); | 47 | lazy_mmu_prot_update(ptent); |
48 | #ifdef CONFIG_MIGRATION | ||
49 | } else if (!pte_file(oldpte)) { | ||
50 | swp_entry_t entry = pte_to_swp_entry(oldpte); | ||
51 | |||
52 | if (is_write_migration_entry(entry)) { | ||
53 | /* | ||
54 | * A protection check is difficult so | ||
55 | * just be safe and disable write | ||
56 | */ | ||
57 | make_migration_entry_read(&entry); | ||
58 | set_pte_at(mm, addr, pte, | ||
59 | swp_entry_to_pte(entry)); | ||
60 | } | ||
61 | #endif | ||
46 | } | 62 | } |
63 | |||
47 | } while (pte++, addr += PAGE_SIZE, addr != end); | 64 | } while (pte++, addr += PAGE_SIZE, addr != end); |
48 | pte_unmap_unlock(pte - 1, ptl); | 65 | pte_unmap_unlock(pte - 1, ptl); |
49 | } | 66 | } |
@@ -103,7 +103,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
103 | spin_lock(&mm->page_table_lock); | 103 | spin_lock(&mm->page_table_lock); |
104 | if (likely(!vma->anon_vma)) { | 104 | if (likely(!vma->anon_vma)) { |
105 | vma->anon_vma = anon_vma; | 105 | vma->anon_vma = anon_vma; |
106 | list_add(&vma->anon_vma_node, &anon_vma->head); | 106 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); |
107 | allocated = NULL; | 107 | allocated = NULL; |
108 | } | 108 | } |
109 | spin_unlock(&mm->page_table_lock); | 109 | spin_unlock(&mm->page_table_lock); |
@@ -127,7 +127,7 @@ void __anon_vma_link(struct vm_area_struct *vma) | |||
127 | struct anon_vma *anon_vma = vma->anon_vma; | 127 | struct anon_vma *anon_vma = vma->anon_vma; |
128 | 128 | ||
129 | if (anon_vma) { | 129 | if (anon_vma) { |
130 | list_add(&vma->anon_vma_node, &anon_vma->head); | 130 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); |
131 | validate_anon_vma(vma); | 131 | validate_anon_vma(vma); |
132 | } | 132 | } |
133 | } | 133 | } |
@@ -138,7 +138,7 @@ void anon_vma_link(struct vm_area_struct *vma) | |||
138 | 138 | ||
139 | if (anon_vma) { | 139 | if (anon_vma) { |
140 | spin_lock(&anon_vma->lock); | 140 | spin_lock(&anon_vma->lock); |
141 | list_add(&vma->anon_vma_node, &anon_vma->head); | 141 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); |
142 | validate_anon_vma(vma); | 142 | validate_anon_vma(vma); |
143 | spin_unlock(&anon_vma->lock); | 143 | spin_unlock(&anon_vma->lock); |
144 | } | 144 | } |
@@ -620,17 +620,27 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
620 | 620 | ||
621 | if (PageAnon(page)) { | 621 | if (PageAnon(page)) { |
622 | swp_entry_t entry = { .val = page_private(page) }; | 622 | swp_entry_t entry = { .val = page_private(page) }; |
623 | /* | 623 | |
624 | * Store the swap location in the pte. | 624 | if (PageSwapCache(page)) { |
625 | * See handle_pte_fault() ... | 625 | /* |
626 | */ | 626 | * Store the swap location in the pte. |
627 | BUG_ON(!PageSwapCache(page)); | 627 | * See handle_pte_fault() ... |
628 | swap_duplicate(entry); | 628 | */ |
629 | if (list_empty(&mm->mmlist)) { | 629 | swap_duplicate(entry); |
630 | spin_lock(&mmlist_lock); | 630 | if (list_empty(&mm->mmlist)) { |
631 | if (list_empty(&mm->mmlist)) | 631 | spin_lock(&mmlist_lock); |
632 | list_add(&mm->mmlist, &init_mm.mmlist); | 632 | if (list_empty(&mm->mmlist)) |
633 | spin_unlock(&mmlist_lock); | 633 | list_add(&mm->mmlist, &init_mm.mmlist); |
634 | spin_unlock(&mmlist_lock); | ||
635 | } | ||
636 | } else { | ||
637 | /* | ||
638 | * Store the pfn of the page in a special migration | ||
639 | * pte. do_swap_page() will wait until the migration | ||
640 | * pte is removed and then restart fault handling. | ||
641 | */ | ||
642 | BUG_ON(!migration); | ||
643 | entry = make_migration_entry(page, pte_write(pteval)); | ||
634 | } | 644 | } |
635 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 645 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |
636 | BUG_ON(pte_file(*pte)); | 646 | BUG_ON(pte_file(*pte)); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 47a6812f5f8c..e3b1362372c2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -395,6 +395,9 @@ void free_swap_and_cache(swp_entry_t entry) | |||
395 | struct swap_info_struct * p; | 395 | struct swap_info_struct * p; |
396 | struct page *page = NULL; | 396 | struct page *page = NULL; |
397 | 397 | ||
398 | if (is_migration_entry(entry)) | ||
399 | return; | ||
400 | |||
398 | p = swap_info_get(entry); | 401 | p = swap_info_get(entry); |
399 | if (p) { | 402 | if (p) { |
400 | if (swap_entry_free(p, swp_offset(entry)) == 1) { | 403 | if (swap_entry_free(p, swp_offset(entry)) == 1) { |
@@ -1400,19 +1403,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1400 | if (!(p->flags & SWP_USED)) | 1403 | if (!(p->flags & SWP_USED)) |
1401 | break; | 1404 | break; |
1402 | error = -EPERM; | 1405 | error = -EPERM; |
1403 | /* | 1406 | if (type >= MAX_SWAPFILES) { |
1404 | * Test if adding another swap device is possible. There are | ||
1405 | * two limiting factors: 1) the number of bits for the swap | ||
1406 | * type swp_entry_t definition and 2) the number of bits for | ||
1407 | * the swap type in the swap ptes as defined by the different | ||
1408 | * architectures. To honor both limitations a swap entry | ||
1409 | * with swap offset 0 and swap type ~0UL is created, encoded | ||
1410 | * to a swap pte, decoded to a swp_entry_t again and finally | ||
1411 | * the swap type part is extracted. This will mask all bits | ||
1412 | * from the initial ~0UL that can't be encoded in either the | ||
1413 | * swp_entry_t or the architecture definition of a swap pte. | ||
1414 | */ | ||
1415 | if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { | ||
1416 | spin_unlock(&swap_lock); | 1407 | spin_unlock(&swap_lock); |
1417 | goto out; | 1408 | goto out; |
1418 | } | 1409 | } |
@@ -1702,6 +1693,9 @@ int swap_duplicate(swp_entry_t entry) | |||
1702 | unsigned long offset, type; | 1693 | unsigned long offset, type; |
1703 | int result = 0; | 1694 | int result = 0; |
1704 | 1695 | ||
1696 | if (is_migration_entry(entry)) | ||
1697 | return 1; | ||
1698 | |||
1705 | type = swp_type(entry); | 1699 | type = swp_type(entry); |
1706 | if (type >= nr_swapfiles) | 1700 | if (type >= nr_swapfiles) |
1707 | goto bad_file; | 1701 | goto bad_file; |