diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 152 |
1 files changed, 143 insertions, 9 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6c1eac92a316..aa8af0e20269 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1146,6 +1146,129 @@ int zeromap_page_range(struct vm_area_struct *vma, | |||
| 1146 | return err; | 1146 | return err; |
| 1147 | } | 1147 | } |
| 1148 | 1148 | ||
| 1149 | pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) | ||
| 1150 | { | ||
| 1151 | pgd_t * pgd = pgd_offset(mm, addr); | ||
| 1152 | pud_t * pud = pud_alloc(mm, pgd, addr); | ||
| 1153 | if (pud) { | ||
| 1154 | pmd_t * pmd = pmd_alloc(mm, pud, addr); | ||
| 1155 | if (pmd) | ||
| 1156 | return pte_alloc_map_lock(mm, pmd, addr, ptl); | ||
| 1157 | } | ||
| 1158 | return NULL; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | /* | ||
| 1162 | * This is the old fallback for page remapping. | ||
| 1163 | * | ||
| 1164 | * For historical reasons, it only allows reserved pages. Only | ||
| 1165 | * old drivers should use this, and they needed to mark their | ||
| 1166 | * pages reserved for the old functions anyway. | ||
| 1167 | */ | ||
| 1168 | static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot) | ||
| 1169 | { | ||
| 1170 | int retval; | ||
| 1171 | pte_t *pte; | ||
| 1172 | spinlock_t *ptl; | ||
| 1173 | |||
| 1174 | retval = -EINVAL; | ||
| 1175 | if (PageAnon(page)) | ||
| 1176 | goto out; | ||
| 1177 | retval = -ENOMEM; | ||
| 1178 | flush_dcache_page(page); | ||
| 1179 | pte = get_locked_pte(mm, addr, &ptl); | ||
| 1180 | if (!pte) | ||
| 1181 | goto out; | ||
| 1182 | retval = -EBUSY; | ||
| 1183 | if (!pte_none(*pte)) | ||
| 1184 | goto out_unlock; | ||
| 1185 | |||
| 1186 | /* Ok, finally just insert the thing.. */ | ||
| 1187 | get_page(page); | ||
| 1188 | inc_mm_counter(mm, file_rss); | ||
| 1189 | page_add_file_rmap(page); | ||
| 1190 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); | ||
| 1191 | |||
| 1192 | retval = 0; | ||
| 1193 | out_unlock: | ||
| 1194 | pte_unmap_unlock(pte, ptl); | ||
| 1195 | out: | ||
| 1196 | return retval; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | /* | ||
| 1200 | * This allows drivers to insert individual pages they've allocated | ||
| 1201 | * into a user vma. | ||
| 1202 | * | ||
| 1203 | * The page has to be a nice clean _individual_ kernel allocation. | ||
| 1204 | * If you allocate a compound page, you need to have marked it as | ||
| 1205 | * such (__GFP_COMP), or manually just split the page up yourself | ||
| 1206 | * (which is mainly an issue of doing "set_page_count(page, 1)" for | ||
| 1207 | * each sub-page, and then freeing them one by one when you free | ||
| 1208 | * them rather than freeing it as a compound page). | ||
| 1209 | * | ||
| 1210 | * NOTE! Traditionally this was done with "remap_pfn_range()" which | ||
| 1211 | * took an arbitrary page protection parameter. This doesn't allow | ||
| 1212 | * that. Your vma protection will have to be set up correctly, which | ||
| 1213 | * means that if you want a shared writable mapping, you'd better | ||
| 1214 | * ask for a shared writable mapping! | ||
| 1215 | * | ||
| 1216 | * The page does not need to be reserved. | ||
| 1217 | */ | ||
| 1218 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page) | ||
| 1219 | { | ||
| 1220 | if (addr < vma->vm_start || addr >= vma->vm_end) | ||
| 1221 | return -EFAULT; | ||
| 1222 | if (!page_count(page)) | ||
| 1223 | return -EINVAL; | ||
| 1224 | return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot); | ||
| 1225 | } | ||
| 1226 | EXPORT_SYMBOL(vm_insert_page); | ||
| 1227 | |||
| 1228 | /* | ||
| 1229 | * Somebody does a pfn remapping that doesn't actually work as a vma. | ||
| 1230 | * | ||
| 1231 | * Do it as individual pages instead, and warn about it. It's bad form, | ||
| 1232 | * and very inefficient. | ||
| 1233 | */ | ||
| 1234 | static int incomplete_pfn_remap(struct vm_area_struct *vma, | ||
| 1235 | unsigned long start, unsigned long end, | ||
| 1236 | unsigned long pfn, pgprot_t prot) | ||
| 1237 | { | ||
| 1238 | static int warn = 10; | ||
| 1239 | struct page *page; | ||
| 1240 | int retval; | ||
| 1241 | |||
| 1242 | if (!(vma->vm_flags & VM_INCOMPLETE)) { | ||
| 1243 | if (warn) { | ||
| 1244 | warn--; | ||
| 1245 | printk("%s does an incomplete pfn remapping", current->comm); | ||
| 1246 | dump_stack(); | ||
| 1247 | } | ||
| 1248 | } | ||
| 1249 | vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED; | ||
| 1250 | |||
| 1251 | if (start < vma->vm_start || end > vma->vm_end) | ||
| 1252 | return -EINVAL; | ||
| 1253 | |||
| 1254 | if (!pfn_valid(pfn)) | ||
| 1255 | return -EINVAL; | ||
| 1256 | |||
| 1257 | page = pfn_to_page(pfn); | ||
| 1258 | if (!PageReserved(page)) | ||
| 1259 | return -EINVAL; | ||
| 1260 | |||
| 1261 | retval = 0; | ||
| 1262 | while (start < end) { | ||
| 1263 | retval = insert_page(vma->vm_mm, start, page, prot); | ||
| 1264 | if (retval < 0) | ||
| 1265 | break; | ||
| 1266 | start += PAGE_SIZE; | ||
| 1267 | page++; | ||
| 1268 | } | ||
| 1269 | return retval; | ||
| 1270 | } | ||
| 1271 | |||
| 1149 | /* | 1272 | /* |
| 1150 | * maps a range of physical memory into the requested pages. the old | 1273 | * maps a range of physical memory into the requested pages. the old |
| 1151 | * mappings are removed. any references to nonexistent pages results | 1274 | * mappings are removed. any references to nonexistent pages results |
| @@ -1220,6 +1343,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
| 1220 | struct mm_struct *mm = vma->vm_mm; | 1343 | struct mm_struct *mm = vma->vm_mm; |
| 1221 | int err; | 1344 | int err; |
| 1222 | 1345 | ||
| 1346 | if (addr != vma->vm_start || end != vma->vm_end) | ||
| 1347 | return incomplete_pfn_remap(vma, addr, end, pfn, prot); | ||
| 1348 | |||
| 1223 | /* | 1349 | /* |
| 1224 | * Physically remapped pages are special. Tell the | 1350 | * Physically remapped pages are special. Tell the |
| 1225 | * rest of the world about it: | 1351 | * rest of the world about it: |
| @@ -1300,8 +1426,15 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo | |||
| 1300 | */ | 1426 | */ |
| 1301 | if (unlikely(!src)) { | 1427 | if (unlikely(!src)) { |
| 1302 | void *kaddr = kmap_atomic(dst, KM_USER0); | 1428 | void *kaddr = kmap_atomic(dst, KM_USER0); |
| 1303 | unsigned long left = __copy_from_user_inatomic(kaddr, (void __user *)va, PAGE_SIZE); | 1429 | void __user *uaddr = (void __user *)(va & PAGE_MASK); |
| 1304 | if (left) | 1430 | |
| 1431 | /* | ||
| 1432 | * This really shouldn't fail, because the page is there | ||
| 1433 | * in the page tables. But it might just be unreadable, | ||
| 1434 | * in which case we just give up and fill the result with | ||
| 1435 | * zeroes. | ||
| 1436 | */ | ||
| 1437 | if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) | ||
| 1305 | memset(kaddr, 0, PAGE_SIZE); | 1438 | memset(kaddr, 0, PAGE_SIZE); |
| 1306 | kunmap_atomic(kaddr, KM_USER0); | 1439 | kunmap_atomic(kaddr, KM_USER0); |
| 1307 | return; | 1440 | return; |
| @@ -1332,12 +1465,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1332 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1465 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
| 1333 | spinlock_t *ptl, pte_t orig_pte) | 1466 | spinlock_t *ptl, pte_t orig_pte) |
| 1334 | { | 1467 | { |
| 1335 | struct page *old_page, *src_page, *new_page; | 1468 | struct page *old_page, *new_page; |
| 1336 | pte_t entry; | 1469 | pte_t entry; |
| 1337 | int ret = VM_FAULT_MINOR; | 1470 | int ret = VM_FAULT_MINOR; |
| 1338 | 1471 | ||
| 1339 | old_page = vm_normal_page(vma, address, orig_pte); | 1472 | old_page = vm_normal_page(vma, address, orig_pte); |
| 1340 | src_page = old_page; | ||
| 1341 | if (!old_page) | 1473 | if (!old_page) |
| 1342 | goto gotten; | 1474 | goto gotten; |
| 1343 | 1475 | ||
| @@ -1345,7 +1477,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1345 | int reuse = can_share_swap_page(old_page); | 1477 | int reuse = can_share_swap_page(old_page); |
| 1346 | unlock_page(old_page); | 1478 | unlock_page(old_page); |
| 1347 | if (reuse) { | 1479 | if (reuse) { |
| 1348 | flush_cache_page(vma, address, pfn); | 1480 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
| 1349 | entry = pte_mkyoung(orig_pte); | 1481 | entry = pte_mkyoung(orig_pte); |
| 1350 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1482 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
| 1351 | ptep_set_access_flags(vma, address, page_table, entry, 1); | 1483 | ptep_set_access_flags(vma, address, page_table, entry, 1); |
| @@ -1365,7 +1497,7 @@ gotten: | |||
| 1365 | 1497 | ||
| 1366 | if (unlikely(anon_vma_prepare(vma))) | 1498 | if (unlikely(anon_vma_prepare(vma))) |
| 1367 | goto oom; | 1499 | goto oom; |
| 1368 | if (src_page == ZERO_PAGE(address)) { | 1500 | if (old_page == ZERO_PAGE(address)) { |
| 1369 | new_page = alloc_zeroed_user_highpage(vma, address); | 1501 | new_page = alloc_zeroed_user_highpage(vma, address); |
| 1370 | if (!new_page) | 1502 | if (!new_page) |
| 1371 | goto oom; | 1503 | goto oom; |
| @@ -1373,7 +1505,7 @@ gotten: | |||
| 1373 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 1505 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
| 1374 | if (!new_page) | 1506 | if (!new_page) |
| 1375 | goto oom; | 1507 | goto oom; |
| 1376 | cow_user_page(new_page, src_page, address); | 1508 | cow_user_page(new_page, old_page, address); |
| 1377 | } | 1509 | } |
| 1378 | 1510 | ||
| 1379 | /* | 1511 | /* |
| @@ -1389,7 +1521,7 @@ gotten: | |||
| 1389 | } | 1521 | } |
| 1390 | } else | 1522 | } else |
| 1391 | inc_mm_counter(mm, anon_rss); | 1523 | inc_mm_counter(mm, anon_rss); |
| 1392 | flush_cache_page(vma, address, pfn); | 1524 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
| 1393 | entry = mk_pte(new_page, vma->vm_page_prot); | 1525 | entry = mk_pte(new_page, vma->vm_page_prot); |
| 1394 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1526 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
| 1395 | ptep_establish(vma, address, page_table, entry); | 1527 | ptep_establish(vma, address, page_table, entry); |
| @@ -1909,6 +2041,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1909 | int anon = 0; | 2041 | int anon = 0; |
| 1910 | 2042 | ||
| 1911 | pte_unmap(page_table); | 2043 | pte_unmap(page_table); |
| 2044 | BUG_ON(vma->vm_flags & VM_PFNMAP); | ||
| 2045 | |||
| 1912 | if (vma->vm_file) { | 2046 | if (vma->vm_file) { |
| 1913 | mapping = vma->vm_file->f_mapping; | 2047 | mapping = vma->vm_file->f_mapping; |
| 1914 | sequence = mapping->truncate_count; | 2048 | sequence = mapping->truncate_count; |
| @@ -1941,7 +2075,7 @@ retry: | |||
| 1941 | page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 2075 | page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
| 1942 | if (!page) | 2076 | if (!page) |
| 1943 | goto oom; | 2077 | goto oom; |
| 1944 | cow_user_page(page, new_page, address); | 2078 | copy_user_highpage(page, new_page, address); |
| 1945 | page_cache_release(new_page); | 2079 | page_cache_release(new_page); |
| 1946 | new_page = page; | 2080 | new_page = page; |
| 1947 | anon = 1; | 2081 | anon = 1; |
