aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/fremap.c24
-rw-r--r--mm/memory.c152
-rw-r--r--mm/page_alloc.c16
-rw-r--r--mm/rmap.c2
4 files changed, 154 insertions, 40 deletions
diff --git a/mm/fremap.c b/mm/fremap.c
index f851775e09c2..9f381e58bf44 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -55,20 +55,10 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
55 pgoff_t size; 55 pgoff_t size;
56 int err = -ENOMEM; 56 int err = -ENOMEM;
57 pte_t *pte; 57 pte_t *pte;
58 pmd_t *pmd;
59 pud_t *pud;
60 pgd_t *pgd;
61 pte_t pte_val; 58 pte_t pte_val;
62 spinlock_t *ptl; 59 spinlock_t *ptl;
63 60
64 pgd = pgd_offset(mm, addr); 61 pte = get_locked_pte(mm, addr, &ptl);
65 pud = pud_alloc(mm, pgd, addr);
66 if (!pud)
67 goto out;
68 pmd = pmd_alloc(mm, pud, addr);
69 if (!pmd)
70 goto out;
71 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
72 if (!pte) 62 if (!pte)
73 goto out; 63 goto out;
74 64
@@ -110,20 +100,10 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
110{ 100{
111 int err = -ENOMEM; 101 int err = -ENOMEM;
112 pte_t *pte; 102 pte_t *pte;
113 pmd_t *pmd;
114 pud_t *pud;
115 pgd_t *pgd;
116 pte_t pte_val; 103 pte_t pte_val;
117 spinlock_t *ptl; 104 spinlock_t *ptl;
118 105
119 pgd = pgd_offset(mm, addr); 106 pte = get_locked_pte(mm, addr, &ptl);
120 pud = pud_alloc(mm, pgd, addr);
121 if (!pud)
122 goto out;
123 pmd = pmd_alloc(mm, pud, addr);
124 if (!pmd)
125 goto out;
126 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
127 if (!pte) 107 if (!pte)
128 goto out; 108 goto out;
129 109
diff --git a/mm/memory.c b/mm/memory.c
index 6c1eac92a316..aa8af0e20269 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1146,6 +1146,129 @@ int zeromap_page_range(struct vm_area_struct *vma,
1146 return err; 1146 return err;
1147} 1147}
1148 1148
1149pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
1150{
1151 pgd_t * pgd = pgd_offset(mm, addr);
1152 pud_t * pud = pud_alloc(mm, pgd, addr);
1153 if (pud) {
1154 pmd_t * pmd = pmd_alloc(mm, pud, addr);
1155 if (pmd)
1156 return pte_alloc_map_lock(mm, pmd, addr, ptl);
1157 }
1158 return NULL;
1159}
1160
1161/*
1162 * This is the old fallback for page remapping.
1163 *
1164 * For historical reasons, it only allows reserved pages. Only
1165 * old drivers should use this, and they needed to mark their
1166 * pages reserved for the old functions anyway.
1167 */
1168static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
1169{
1170 int retval;
1171 pte_t *pte;
1172 spinlock_t *ptl;
1173
1174 retval = -EINVAL;
1175 if (PageAnon(page))
1176 goto out;
1177 retval = -ENOMEM;
1178 flush_dcache_page(page);
1179 pte = get_locked_pte(mm, addr, &ptl);
1180 if (!pte)
1181 goto out;
1182 retval = -EBUSY;
1183 if (!pte_none(*pte))
1184 goto out_unlock;
1185
1186 /* Ok, finally just insert the thing.. */
1187 get_page(page);
1188 inc_mm_counter(mm, file_rss);
1189 page_add_file_rmap(page);
1190 set_pte_at(mm, addr, pte, mk_pte(page, prot));
1191
1192 retval = 0;
1193out_unlock:
1194 pte_unmap_unlock(pte, ptl);
1195out:
1196 return retval;
1197}
1198
1199/*
1200 * This allows drivers to insert individual pages they've allocated
1201 * into a user vma.
1202 *
1203 * The page has to be a nice clean _individual_ kernel allocation.
1204 * If you allocate a compound page, you need to have marked it as
1205 * such (__GFP_COMP), or manually just split the page up yourself
1206 * (which is mainly an issue of doing "set_page_count(page, 1)" for
1207 * each sub-page, and then freeing them one by one when you free
1208 * them rather than freeing it as a compound page).
1209 *
1210 * NOTE! Traditionally this was done with "remap_pfn_range()" which
1211 * took an arbitrary page protection parameter. This doesn't allow
1212 * that. Your vma protection will have to be set up correctly, which
1213 * means that if you want a shared writable mapping, you'd better
1214 * ask for a shared writable mapping!
1215 *
1216 * The page does not need to be reserved.
1217 */
1218int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page)
1219{
1220 if (addr < vma->vm_start || addr >= vma->vm_end)
1221 return -EFAULT;
1222 if (!page_count(page))
1223 return -EINVAL;
1224 return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
1225}
1226EXPORT_SYMBOL(vm_insert_page);
1227
1228/*
1229 * Somebody does a pfn remapping that doesn't actually work as a vma.
1230 *
1231 * Do it as individual pages instead, and warn about it. It's bad form,
1232 * and very inefficient.
1233 */
1234static int incomplete_pfn_remap(struct vm_area_struct *vma,
1235 unsigned long start, unsigned long end,
1236 unsigned long pfn, pgprot_t prot)
1237{
1238 static int warn = 10;
1239 struct page *page;
1240 int retval;
1241
1242 if (!(vma->vm_flags & VM_INCOMPLETE)) {
1243 if (warn) {
1244 warn--;
1245 printk("%s does an incomplete pfn remapping", current->comm);
1246 dump_stack();
1247 }
1248 }
1249 vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED;
1250
1251 if (start < vma->vm_start || end > vma->vm_end)
1252 return -EINVAL;
1253
1254 if (!pfn_valid(pfn))
1255 return -EINVAL;
1256
1257 page = pfn_to_page(pfn);
1258 if (!PageReserved(page))
1259 return -EINVAL;
1260
1261 retval = 0;
1262 while (start < end) {
1263 retval = insert_page(vma->vm_mm, start, page, prot);
1264 if (retval < 0)
1265 break;
1266 start += PAGE_SIZE;
1267 page++;
1268 }
1269 return retval;
1270}
1271
1149/* 1272/*
1150 * maps a range of physical memory into the requested pages. the old 1273 * maps a range of physical memory into the requested pages. the old
1151 * mappings are removed. any references to nonexistent pages results 1274 * mappings are removed. any references to nonexistent pages results
@@ -1220,6 +1343,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1220 struct mm_struct *mm = vma->vm_mm; 1343 struct mm_struct *mm = vma->vm_mm;
1221 int err; 1344 int err;
1222 1345
1346 if (addr != vma->vm_start || end != vma->vm_end)
1347 return incomplete_pfn_remap(vma, addr, end, pfn, prot);
1348
1223 /* 1349 /*
1224 * Physically remapped pages are special. Tell the 1350 * Physically remapped pages are special. Tell the
1225 * rest of the world about it: 1351 * rest of the world about it:
@@ -1300,8 +1426,15 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
1300 */ 1426 */
1301 if (unlikely(!src)) { 1427 if (unlikely(!src)) {
1302 void *kaddr = kmap_atomic(dst, KM_USER0); 1428 void *kaddr = kmap_atomic(dst, KM_USER0);
1303 unsigned long left = __copy_from_user_inatomic(kaddr, (void __user *)va, PAGE_SIZE); 1429 void __user *uaddr = (void __user *)(va & PAGE_MASK);
1304 if (left) 1430
1431 /*
1432 * This really shouldn't fail, because the page is there
1433 * in the page tables. But it might just be unreadable,
1434 * in which case we just give up and fill the result with
1435 * zeroes.
1436 */
1437 if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
1305 memset(kaddr, 0, PAGE_SIZE); 1438 memset(kaddr, 0, PAGE_SIZE);
1306 kunmap_atomic(kaddr, KM_USER0); 1439 kunmap_atomic(kaddr, KM_USER0);
1307 return; 1440 return;
@@ -1332,12 +1465,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1332 unsigned long address, pte_t *page_table, pmd_t *pmd, 1465 unsigned long address, pte_t *page_table, pmd_t *pmd,
1333 spinlock_t *ptl, pte_t orig_pte) 1466 spinlock_t *ptl, pte_t orig_pte)
1334{ 1467{
1335 struct page *old_page, *src_page, *new_page; 1468 struct page *old_page, *new_page;
1336 pte_t entry; 1469 pte_t entry;
1337 int ret = VM_FAULT_MINOR; 1470 int ret = VM_FAULT_MINOR;
1338 1471
1339 old_page = vm_normal_page(vma, address, orig_pte); 1472 old_page = vm_normal_page(vma, address, orig_pte);
1340 src_page = old_page;
1341 if (!old_page) 1473 if (!old_page)
1342 goto gotten; 1474 goto gotten;
1343 1475
@@ -1345,7 +1477,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1345 int reuse = can_share_swap_page(old_page); 1477 int reuse = can_share_swap_page(old_page);
1346 unlock_page(old_page); 1478 unlock_page(old_page);
1347 if (reuse) { 1479 if (reuse) {
1348 flush_cache_page(vma, address, pfn); 1480 flush_cache_page(vma, address, pte_pfn(orig_pte));
1349 entry = pte_mkyoung(orig_pte); 1481 entry = pte_mkyoung(orig_pte);
1350 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1482 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1351 ptep_set_access_flags(vma, address, page_table, entry, 1); 1483 ptep_set_access_flags(vma, address, page_table, entry, 1);
@@ -1365,7 +1497,7 @@ gotten:
1365 1497
1366 if (unlikely(anon_vma_prepare(vma))) 1498 if (unlikely(anon_vma_prepare(vma)))
1367 goto oom; 1499 goto oom;
1368 if (src_page == ZERO_PAGE(address)) { 1500 if (old_page == ZERO_PAGE(address)) {
1369 new_page = alloc_zeroed_user_highpage(vma, address); 1501 new_page = alloc_zeroed_user_highpage(vma, address);
1370 if (!new_page) 1502 if (!new_page)
1371 goto oom; 1503 goto oom;
@@ -1373,7 +1505,7 @@ gotten:
1373 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); 1505 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1374 if (!new_page) 1506 if (!new_page)
1375 goto oom; 1507 goto oom;
1376 cow_user_page(new_page, src_page, address); 1508 cow_user_page(new_page, old_page, address);
1377 } 1509 }
1378 1510
1379 /* 1511 /*
@@ -1389,7 +1521,7 @@ gotten:
1389 } 1521 }
1390 } else 1522 } else
1391 inc_mm_counter(mm, anon_rss); 1523 inc_mm_counter(mm, anon_rss);
1392 flush_cache_page(vma, address, pfn); 1524 flush_cache_page(vma, address, pte_pfn(orig_pte));
1393 entry = mk_pte(new_page, vma->vm_page_prot); 1525 entry = mk_pte(new_page, vma->vm_page_prot);
1394 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1526 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1395 ptep_establish(vma, address, page_table, entry); 1527 ptep_establish(vma, address, page_table, entry);
@@ -1909,6 +2041,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1909 int anon = 0; 2041 int anon = 0;
1910 2042
1911 pte_unmap(page_table); 2043 pte_unmap(page_table);
2044 BUG_ON(vma->vm_flags & VM_PFNMAP);
2045
1912 if (vma->vm_file) { 2046 if (vma->vm_file) {
1913 mapping = vma->vm_file->f_mapping; 2047 mapping = vma->vm_file->f_mapping;
1914 sequence = mapping->truncate_count; 2048 sequence = mapping->truncate_count;
@@ -1941,7 +2075,7 @@ retry:
1941 page = alloc_page_vma(GFP_HIGHUSER, vma, address); 2075 page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1942 if (!page) 2076 if (!page)
1943 goto oom; 2077 goto oom;
1944 cow_user_page(page, new_page, address); 2078 copy_user_highpage(page, new_page, address);
1945 page_cache_release(new_page); 2079 page_cache_release(new_page);
1946 new_page = page; 2080 new_page = page;
1947 anon = 1; 2081 anon = 1;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b257720edfc8..3b21a13d841c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1772,16 +1772,16 @@ static int __devinit zone_batchsize(struct zone *zone)
1772 batch = 1; 1772 batch = 1;
1773 1773
1774 /* 1774 /*
1775 * We will be trying to allcoate bigger chunks of contiguous 1775 * Clamp the batch to a 2^n - 1 value. Having a power
1776 * memory of the order of fls(batch). This should result in 1776 * of 2 value was found to be more likely to have
1777 * better cache coloring. 1777 * suboptimal cache aliasing properties in some cases.
1778 * 1778 *
1779 * A sanity check also to ensure that batch is still in limits. 1779 * For example if 2 tasks are alternately allocating
1780 * batches of pages, one task can end up with a lot
1781 * of pages of one half of the possible page colors
1782 * and the other with pages of the other colors.
1780 */ 1783 */
1781 batch = (1 << fls(batch + batch/2)); 1784 batch = (1 << (fls(batch + batch/2)-1)) - 1;
1782
1783 if (fls(batch) >= (PAGE_SHIFT + MAX_ORDER - 2))
1784 batch = PAGE_SHIFT + ((MAX_ORDER - 1 - PAGE_SHIFT)/2);
1785 1785
1786 return batch; 1786 return batch;
1787} 1787}
diff --git a/mm/rmap.c b/mm/rmap.c
index 491ac350048f..f853c6def159 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -641,7 +641,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
641 continue; 641 continue;
642 642
643 /* Nuke the page table entry. */ 643 /* Nuke the page table entry. */
644 flush_cache_page(vma, address, pfn); 644 flush_cache_page(vma, address, pte_pfn(*pte));
645 pteval = ptep_clear_flush(vma, address, pte); 645 pteval = ptep_clear_flush(vma, address, pte);
646 646
647 /* If nonlinear, store the file page offset in the pte. */ 647 /* If nonlinear, store the file page offset in the pte. */