diff options
-rw-r--r-- | include/linux/ksm.h | 13 | ||||
-rw-r--r-- | include/linux/rmap.h | 6 | ||||
-rw-r--r-- | mm/ksm.c | 65 | ||||
-rw-r--r-- | mm/migrate.c | 85 | ||||
-rw-r--r-- | mm/rmap.c | 79 |
5 files changed, 181 insertions, 67 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 157d83dbaef8..bed5f16ba827 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h | |||
@@ -88,6 +88,9 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, | |||
88 | int page_referenced_ksm(struct page *page, | 88 | int page_referenced_ksm(struct page *page, |
89 | struct mem_cgroup *memcg, unsigned long *vm_flags); | 89 | struct mem_cgroup *memcg, unsigned long *vm_flags); |
90 | int try_to_unmap_ksm(struct page *page, enum ttu_flags flags); | 90 | int try_to_unmap_ksm(struct page *page, enum ttu_flags flags); |
91 | int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, | ||
92 | struct vm_area_struct *, unsigned long, void *), void *arg); | ||
93 | void ksm_migrate_page(struct page *newpage, struct page *oldpage); | ||
91 | 94 | ||
92 | #else /* !CONFIG_KSM */ | 95 | #else /* !CONFIG_KSM */ |
93 | 96 | ||
@@ -127,6 +130,16 @@ static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) | |||
127 | { | 130 | { |
128 | return 0; | 131 | return 0; |
129 | } | 132 | } |
133 | |||
134 | static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*, | ||
135 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
136 | { | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) | ||
141 | { | ||
142 | } | ||
130 | #endif /* !CONFIG_KSM */ | 143 | #endif /* !CONFIG_KSM */ |
131 | 144 | ||
132 | #endif /* __LINUX_KSM_H */ | 145 | #endif /* __LINUX_KSM_H */ |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 980094a527ee..b019ae64e2ab 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -164,6 +164,12 @@ struct anon_vma *page_lock_anon_vma(struct page *page); | |||
164 | void page_unlock_anon_vma(struct anon_vma *anon_vma); | 164 | void page_unlock_anon_vma(struct anon_vma *anon_vma); |
165 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); | 165 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); |
166 | 166 | ||
167 | /* | ||
168 | * Called by migrate.c to remove migration ptes, but might be used more later. | ||
169 | */ | ||
170 | int rmap_walk(struct page *page, int (*rmap_one)(struct page *, | ||
171 | struct vm_area_struct *, unsigned long, void *), void *arg); | ||
172 | |||
167 | #else /* !CONFIG_MMU */ | 173 | #else /* !CONFIG_MMU */ |
168 | 174 | ||
169 | #define anon_vma_init() do {} while (0) | 175 | #define anon_vma_init() do {} while (0) |
@@ -1656,6 +1656,71 @@ out: | |||
1656 | return ret; | 1656 | return ret; |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | #ifdef CONFIG_MIGRATION | ||
1660 | int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, | ||
1661 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1662 | { | ||
1663 | struct stable_node *stable_node; | ||
1664 | struct hlist_node *hlist; | ||
1665 | struct rmap_item *rmap_item; | ||
1666 | int ret = SWAP_AGAIN; | ||
1667 | int search_new_forks = 0; | ||
1668 | |||
1669 | VM_BUG_ON(!PageKsm(page)); | ||
1670 | VM_BUG_ON(!PageLocked(page)); | ||
1671 | |||
1672 | stable_node = page_stable_node(page); | ||
1673 | if (!stable_node) | ||
1674 | return ret; | ||
1675 | again: | ||
1676 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { | ||
1677 | struct anon_vma *anon_vma = rmap_item->anon_vma; | ||
1678 | struct vm_area_struct *vma; | ||
1679 | |||
1680 | spin_lock(&anon_vma->lock); | ||
1681 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | ||
1682 | if (rmap_item->address < vma->vm_start || | ||
1683 | rmap_item->address >= vma->vm_end) | ||
1684 | continue; | ||
1685 | /* | ||
1686 | * Initially we examine only the vma which covers this | ||
1687 | * rmap_item; but later, if there is still work to do, | ||
1688 | * we examine covering vmas in other mms: in case they | ||
1689 | * were forked from the original since ksmd passed. | ||
1690 | */ | ||
1691 | if ((rmap_item->mm == vma->vm_mm) == search_new_forks) | ||
1692 | continue; | ||
1693 | |||
1694 | ret = rmap_one(page, vma, rmap_item->address, arg); | ||
1695 | if (ret != SWAP_AGAIN) { | ||
1696 | spin_unlock(&anon_vma->lock); | ||
1697 | goto out; | ||
1698 | } | ||
1699 | } | ||
1700 | spin_unlock(&anon_vma->lock); | ||
1701 | } | ||
1702 | if (!search_new_forks++) | ||
1703 | goto again; | ||
1704 | out: | ||
1705 | return ret; | ||
1706 | } | ||
1707 | |||
1708 | void ksm_migrate_page(struct page *newpage, struct page *oldpage) | ||
1709 | { | ||
1710 | struct stable_node *stable_node; | ||
1711 | |||
1712 | VM_BUG_ON(!PageLocked(oldpage)); | ||
1713 | VM_BUG_ON(!PageLocked(newpage)); | ||
1714 | VM_BUG_ON(newpage->mapping != oldpage->mapping); | ||
1715 | |||
1716 | stable_node = page_stable_node(newpage); | ||
1717 | if (stable_node) { | ||
1718 | VM_BUG_ON(stable_node->page != oldpage); | ||
1719 | stable_node->page = newpage; | ||
1720 | } | ||
1721 | } | ||
1722 | #endif /* CONFIG_MIGRATION */ | ||
1723 | |||
1659 | #ifdef CONFIG_SYSFS | 1724 | #ifdef CONFIG_SYSFS |
1660 | /* | 1725 | /* |
1661 | * This all compiles without CONFIG_SYSFS, but is a waste of space. | 1726 | * This all compiles without CONFIG_SYSFS, but is a waste of space. |
diff --git a/mm/migrate.c b/mm/migrate.c index 367272d04423..0b714747c028 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
22 | #include <linux/nsproxy.h> | 22 | #include <linux/nsproxy.h> |
23 | #include <linux/pagevec.h> | 23 | #include <linux/pagevec.h> |
24 | #include <linux/ksm.h> | ||
24 | #include <linux/rmap.h> | 25 | #include <linux/rmap.h> |
25 | #include <linux/topology.h> | 26 | #include <linux/topology.h> |
26 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
@@ -78,8 +79,8 @@ int putback_lru_pages(struct list_head *l) | |||
78 | /* | 79 | /* |
79 | * Restore a potential migration pte to a working pte entry | 80 | * Restore a potential migration pte to a working pte entry |
80 | */ | 81 | */ |
81 | static void remove_migration_pte(struct vm_area_struct *vma, | 82 | static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, |
82 | struct page *old, struct page *new) | 83 | unsigned long addr, void *old) |
83 | { | 84 | { |
84 | struct mm_struct *mm = vma->vm_mm; | 85 | struct mm_struct *mm = vma->vm_mm; |
85 | swp_entry_t entry; | 86 | swp_entry_t entry; |
@@ -88,40 +89,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
88 | pmd_t *pmd; | 89 | pmd_t *pmd; |
89 | pte_t *ptep, pte; | 90 | pte_t *ptep, pte; |
90 | spinlock_t *ptl; | 91 | spinlock_t *ptl; |
91 | unsigned long addr = page_address_in_vma(new, vma); | ||
92 | |||
93 | if (addr == -EFAULT) | ||
94 | return; | ||
95 | 92 | ||
96 | pgd = pgd_offset(mm, addr); | 93 | pgd = pgd_offset(mm, addr); |
97 | if (!pgd_present(*pgd)) | 94 | if (!pgd_present(*pgd)) |
98 | return; | 95 | goto out; |
99 | 96 | ||
100 | pud = pud_offset(pgd, addr); | 97 | pud = pud_offset(pgd, addr); |
101 | if (!pud_present(*pud)) | 98 | if (!pud_present(*pud)) |
102 | return; | 99 | goto out; |
103 | 100 | ||
104 | pmd = pmd_offset(pud, addr); | 101 | pmd = pmd_offset(pud, addr); |
105 | if (!pmd_present(*pmd)) | 102 | if (!pmd_present(*pmd)) |
106 | return; | 103 | goto out; |
107 | 104 | ||
108 | ptep = pte_offset_map(pmd, addr); | 105 | ptep = pte_offset_map(pmd, addr); |
109 | 106 | ||
110 | if (!is_swap_pte(*ptep)) { | 107 | if (!is_swap_pte(*ptep)) { |
111 | pte_unmap(ptep); | 108 | pte_unmap(ptep); |
112 | return; | 109 | goto out; |
113 | } | 110 | } |
114 | 111 | ||
115 | ptl = pte_lockptr(mm, pmd); | 112 | ptl = pte_lockptr(mm, pmd); |
116 | spin_lock(ptl); | 113 | spin_lock(ptl); |
117 | pte = *ptep; | 114 | pte = *ptep; |
118 | if (!is_swap_pte(pte)) | 115 | if (!is_swap_pte(pte)) |
119 | goto out; | 116 | goto unlock; |
120 | 117 | ||
121 | entry = pte_to_swp_entry(pte); | 118 | entry = pte_to_swp_entry(pte); |
122 | 119 | ||
123 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 120 | if (!is_migration_entry(entry) || |
124 | goto out; | 121 | migration_entry_to_page(entry) != old) |
122 | goto unlock; | ||
125 | 123 | ||
126 | get_page(new); | 124 | get_page(new); |
127 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 125 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
@@ -137,55 +135,10 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
137 | 135 | ||
138 | /* No need to invalidate - it was non-present before */ | 136 | /* No need to invalidate - it was non-present before */ |
139 | update_mmu_cache(vma, addr, pte); | 137 | update_mmu_cache(vma, addr, pte); |
140 | 138 | unlock: | |
141 | out: | ||
142 | pte_unmap_unlock(ptep, ptl); | 139 | pte_unmap_unlock(ptep, ptl); |
143 | } | 140 | out: |
144 | 141 | return SWAP_AGAIN; | |
145 | /* | ||
146 | * Note that remove_file_migration_ptes will only work on regular mappings, | ||
147 | * Nonlinear mappings do not use migration entries. | ||
148 | */ | ||
149 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
150 | { | ||
151 | struct vm_area_struct *vma; | ||
152 | struct address_space *mapping = new->mapping; | ||
153 | struct prio_tree_iter iter; | ||
154 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
155 | |||
156 | if (!mapping) | ||
157 | return; | ||
158 | |||
159 | spin_lock(&mapping->i_mmap_lock); | ||
160 | |||
161 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
162 | remove_migration_pte(vma, old, new); | ||
163 | |||
164 | spin_unlock(&mapping->i_mmap_lock); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Must hold mmap_sem lock on at least one of the vmas containing | ||
169 | * the page so that the anon_vma cannot vanish. | ||
170 | */ | ||
171 | static void remove_anon_migration_ptes(struct page *old, struct page *new) | ||
172 | { | ||
173 | struct anon_vma *anon_vma; | ||
174 | struct vm_area_struct *vma; | ||
175 | |||
176 | /* | ||
177 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
178 | */ | ||
179 | anon_vma = page_anon_vma(new); | ||
180 | if (!anon_vma) | ||
181 | return; | ||
182 | |||
183 | spin_lock(&anon_vma->lock); | ||
184 | |||
185 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
186 | remove_migration_pte(vma, old, new); | ||
187 | |||
188 | spin_unlock(&anon_vma->lock); | ||
189 | } | 142 | } |
190 | 143 | ||
191 | /* | 144 | /* |
@@ -194,10 +147,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) | |||
194 | */ | 147 | */ |
195 | static void remove_migration_ptes(struct page *old, struct page *new) | 148 | static void remove_migration_ptes(struct page *old, struct page *new) |
196 | { | 149 | { |
197 | if (PageAnon(new)) | 150 | rmap_walk(new, remove_migration_pte, old); |
198 | remove_anon_migration_ptes(old, new); | ||
199 | else | ||
200 | remove_file_migration_ptes(old, new); | ||
201 | } | 151 | } |
202 | 152 | ||
203 | /* | 153 | /* |
@@ -358,6 +308,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
358 | } | 308 | } |
359 | 309 | ||
360 | mlock_migrate_page(newpage, page); | 310 | mlock_migrate_page(newpage, page); |
311 | ksm_migrate_page(newpage, page); | ||
361 | 312 | ||
362 | ClearPageSwapCache(page); | 313 | ClearPageSwapCache(page); |
363 | ClearPagePrivate(page); | 314 | ClearPagePrivate(page); |
@@ -577,9 +528,9 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
577 | else | 528 | else |
578 | rc = fallback_migrate_page(mapping, newpage, page); | 529 | rc = fallback_migrate_page(mapping, newpage, page); |
579 | 530 | ||
580 | if (!rc) { | 531 | if (!rc) |
581 | remove_migration_ptes(page, newpage); | 532 | remove_migration_ptes(page, newpage); |
582 | } else | 533 | else |
583 | newpage->mapping = NULL; | 534 | newpage->mapping = NULL; |
584 | 535 | ||
585 | unlock_page(newpage); | 536 | unlock_page(newpage); |
@@ -1203,3 +1203,82 @@ int try_to_munlock(struct page *page) | |||
1203 | else | 1203 | else |
1204 | return try_to_unmap_file(page, TTU_MUNLOCK); | 1204 | return try_to_unmap_file(page, TTU_MUNLOCK); |
1205 | } | 1205 | } |
1206 | |||
1207 | #ifdef CONFIG_MIGRATION | ||
1208 | /* | ||
1209 | * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): | ||
1210 | * Called by migrate.c to remove migration ptes, but might be used more later. | ||
1211 | */ | ||
1212 | static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | ||
1213 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1214 | { | ||
1215 | struct anon_vma *anon_vma; | ||
1216 | struct vm_area_struct *vma; | ||
1217 | int ret = SWAP_AGAIN; | ||
1218 | |||
1219 | /* | ||
1220 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma() | ||
1221 | * because that depends on page_mapped(); but not all its usages | ||
1222 | * are holding mmap_sem, which also gave the necessary guarantee | ||
1223 | * (that this anon_vma's slab has not already been destroyed). | ||
1224 | * This needs to be reviewed later: avoiding page_lock_anon_vma() | ||
1225 | * is risky, and currently limits the usefulness of rmap_walk(). | ||
1226 | */ | ||
1227 | anon_vma = page_anon_vma(page); | ||
1228 | if (!anon_vma) | ||
1229 | return ret; | ||
1230 | spin_lock(&anon_vma->lock); | ||
1231 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | ||
1232 | unsigned long address = vma_address(page, vma); | ||
1233 | if (address == -EFAULT) | ||
1234 | continue; | ||
1235 | ret = rmap_one(page, vma, address, arg); | ||
1236 | if (ret != SWAP_AGAIN) | ||
1237 | break; | ||
1238 | } | ||
1239 | spin_unlock(&anon_vma->lock); | ||
1240 | return ret; | ||
1241 | } | ||
1242 | |||
1243 | static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, | ||
1244 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1245 | { | ||
1246 | struct address_space *mapping = page->mapping; | ||
1247 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1248 | struct vm_area_struct *vma; | ||
1249 | struct prio_tree_iter iter; | ||
1250 | int ret = SWAP_AGAIN; | ||
1251 | |||
1252 | if (!mapping) | ||
1253 | return ret; | ||
1254 | spin_lock(&mapping->i_mmap_lock); | ||
1255 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | ||
1256 | unsigned long address = vma_address(page, vma); | ||
1257 | if (address == -EFAULT) | ||
1258 | continue; | ||
1259 | ret = rmap_one(page, vma, address, arg); | ||
1260 | if (ret != SWAP_AGAIN) | ||
1261 | break; | ||
1262 | } | ||
1263 | /* | ||
1264 | * No nonlinear handling: being always shared, nonlinear vmas | ||
1265 | * never contain migration ptes. Decide what to do about this | ||
1266 | * limitation to linear when we need rmap_walk() on nonlinear. | ||
1267 | */ | ||
1268 | spin_unlock(&mapping->i_mmap_lock); | ||
1269 | return ret; | ||
1270 | } | ||
1271 | |||
1272 | int rmap_walk(struct page *page, int (*rmap_one)(struct page *, | ||
1273 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1274 | { | ||
1275 | VM_BUG_ON(!PageLocked(page)); | ||
1276 | |||
1277 | if (unlikely(PageKsm(page))) | ||
1278 | return rmap_walk_ksm(page, rmap_one, arg); | ||
1279 | else if (PageAnon(page)) | ||
1280 | return rmap_walk_anon(page, rmap_one, arg); | ||
1281 | else | ||
1282 | return rmap_walk_file(page, rmap_one, arg); | ||
1283 | } | ||
1284 | #endif /* CONFIG_MIGRATION */ | ||