aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJens Axboe <jaxboe@fusionio.com>2011-05-20 14:33:15 -0400
committerJens Axboe <jaxboe@fusionio.com>2011-05-20 14:33:15 -0400
commit698567f3fa790fea37509a54dea855302dd88331 (patch)
tree7a1df976a0eb12cab03e82c18809a30d5482fee4 /mm
parentd70d0711edd8076ec2ce0ed109106e2df950681b (diff)
parent61c4f2c81c61f73549928dfd9f3e8f26aa36a8cf (diff)
Merge commit 'v2.6.39' into for-2.6.40/core
Since for-2.6.40/core was forked off the 2.6.39 devel tree, we've had churn in the core area that makes it difficult to handle patches for eg cfq or blk-throttle. Instead of requiring that they be based in older versions with bugs that have been fixed later in the rc cycle, merge in 2.6.39 final. Also fixes up conflicts in the below files. Conflicts: drivers/block/paride/pcd.c drivers/cdrom/viocd.c drivers/ide/ide-cd.c Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c43
-rw-r--r--mm/memory.c21
-rw-r--r--mm/mlock.c5
-rw-r--r--mm/mmap.c11
-rw-r--r--mm/oom_kill.c9
-rw-r--r--mm/page_alloc.c57
-rw-r--r--mm/page_cgroup.c2
-rw-r--r--mm/shmem.c149
-rw-r--r--mm/slub.c4
-rw-r--r--mm/swap.c3
-rw-r--r--mm/vmscan.c2
11 files changed, 179 insertions, 127 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 470dcda10add..83326ad66d9b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1408,6 +1408,9 @@ out:
1408 return ret; 1408 return ret;
1409} 1409}
1410 1410
1411#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
1412 VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
1413
1411int hugepage_madvise(struct vm_area_struct *vma, 1414int hugepage_madvise(struct vm_area_struct *vma,
1412 unsigned long *vm_flags, int advice) 1415 unsigned long *vm_flags, int advice)
1413{ 1416{
@@ -1416,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
1416 /* 1419 /*
1417 * Be somewhat over-protective like KSM for now! 1420 * Be somewhat over-protective like KSM for now!
1418 */ 1421 */
1419 if (*vm_flags & (VM_HUGEPAGE | 1422 if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
1420 VM_SHARED | VM_MAYSHARE |
1421 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1422 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1423 VM_MIXEDMAP | VM_SAO))
1424 return -EINVAL; 1423 return -EINVAL;
1425 *vm_flags &= ~VM_NOHUGEPAGE; 1424 *vm_flags &= ~VM_NOHUGEPAGE;
1426 *vm_flags |= VM_HUGEPAGE; 1425 *vm_flags |= VM_HUGEPAGE;
@@ -1436,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
1436 /* 1435 /*
1437 * Be somewhat over-protective like KSM for now! 1436 * Be somewhat over-protective like KSM for now!
1438 */ 1437 */
1439 if (*vm_flags & (VM_NOHUGEPAGE | 1438 if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
1440 VM_SHARED | VM_MAYSHARE |
1441 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1442 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1443 VM_MIXEDMAP | VM_SAO))
1444 return -EINVAL; 1439 return -EINVAL;
1445 *vm_flags &= ~VM_HUGEPAGE; 1440 *vm_flags &= ~VM_HUGEPAGE;
1446 *vm_flags |= VM_NOHUGEPAGE; 1441 *vm_flags |= VM_NOHUGEPAGE;
@@ -1574,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
1574 * page fault if needed. 1569 * page fault if needed.
1575 */ 1570 */
1576 return 0; 1571 return 0;
1577 if (vma->vm_file || vma->vm_ops) 1572 if (vma->vm_ops)
1578 /* khugepaged not yet working on file or special mappings */ 1573 /* khugepaged not yet working on file or special mappings */
1579 return 0; 1574 return 0;
1580 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1575 /*
1576 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
1577 * true too, verify it here.
1578 */
1579 VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1581 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 1580 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
1582 hend = vma->vm_end & HPAGE_PMD_MASK; 1581 hend = vma->vm_end & HPAGE_PMD_MASK;
1583 if (hstart < hend) 1582 if (hstart < hend)
@@ -1828,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm,
1828 (vma->vm_flags & VM_NOHUGEPAGE)) 1827 (vma->vm_flags & VM_NOHUGEPAGE))
1829 goto out; 1828 goto out;
1830 1829
1831 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 1830 if (!vma->anon_vma || vma->vm_ops)
1832 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
1833 goto out; 1831 goto out;
1834 if (is_vma_temporary_stack(vma)) 1832 if (is_vma_temporary_stack(vma))
1835 goto out; 1833 goto out;
1836 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1834 /*
1835 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
1836 * true too, verify it here.
1837 */
1838 VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1837 1839
1838 pgd = pgd_offset(mm, address); 1840 pgd = pgd_offset(mm, address);
1839 if (!pgd_present(*pgd)) 1841 if (!pgd_present(*pgd))
@@ -2066,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
2066 progress++; 2068 progress++;
2067 continue; 2069 continue;
2068 } 2070 }
2069 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 2071 if (!vma->anon_vma || vma->vm_ops)
2070 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
2071 goto skip; 2072 goto skip;
2072 if (is_vma_temporary_stack(vma)) 2073 if (is_vma_temporary_stack(vma))
2073 goto skip; 2074 goto skip;
2074 2075 /*
2075 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 2076 * If is_pfn_mapping() is true is_learn_pfn_mapping()
2077 * must be true too, verify it here.
2078 */
2079 VM_BUG_ON(is_linear_pfn_mapping(vma) ||
2080 vma->vm_flags & VM_NO_THP);
2076 2081
2077 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2082 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2078 hend = vma->vm_end & HPAGE_PMD_MASK; 2083 hend = vma->vm_end & HPAGE_PMD_MASK;
diff --git a/mm/memory.c b/mm/memory.c
index ce22a250926f..61e66f026563 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1359,7 +1359,7 @@ split_fallthrough:
1359 */ 1359 */
1360 mark_page_accessed(page); 1360 mark_page_accessed(page);
1361 } 1361 }
1362 if (flags & FOLL_MLOCK) { 1362 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
1363 /* 1363 /*
1364 * The preliminary mapping check is mainly to avoid the 1364 * The preliminary mapping check is mainly to avoid the
1365 * pointless overhead of lock_page on the ZERO_PAGE 1365 * pointless overhead of lock_page on the ZERO_PAGE
@@ -1412,9 +1412,8 @@ no_page_table:
1412 1412
1413static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) 1413static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
1414{ 1414{
1415 return (vma->vm_flags & VM_GROWSDOWN) && 1415 return stack_guard_page_start(vma, addr) ||
1416 (vma->vm_start == addr) && 1416 stack_guard_page_end(vma, addr+PAGE_SIZE);
1417 !vma_stack_continue(vma->vm_prev, addr);
1418} 1417}
1419 1418
1420/** 1419/**
@@ -1551,13 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1551 continue; 1550 continue;
1552 } 1551 }
1553 1552
1554 /*
1555 * If we don't actually want the page itself,
1556 * and it's the stack guard page, just skip it.
1557 */
1558 if (!pages && stack_guard_page(vma, start))
1559 goto next_page;
1560
1561 do { 1553 do {
1562 struct page *page; 1554 struct page *page;
1563 unsigned int foll_flags = gup_flags; 1555 unsigned int foll_flags = gup_flags;
@@ -1574,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1574 int ret; 1566 int ret;
1575 unsigned int fault_flags = 0; 1567 unsigned int fault_flags = 0;
1576 1568
1569 /* For mlock, just skip the stack guard page. */
1570 if (foll_flags & FOLL_MLOCK) {
1571 if (stack_guard_page(vma, start))
1572 goto next_page;
1573 }
1577 if (foll_flags & FOLL_WRITE) 1574 if (foll_flags & FOLL_WRITE)
1578 fault_flags |= FAULT_FLAG_WRITE; 1575 fault_flags |= FAULT_FLAG_WRITE;
1579 if (nonblocking) 1576 if (nonblocking)
@@ -3396,7 +3393,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3396 * run pte_offset_map on the pmd, if an huge pmd could 3393 * run pte_offset_map on the pmd, if an huge pmd could
3397 * materialize from under us from a different thread. 3394 * materialize from under us from a different thread.
3398 */ 3395 */
3399 if (unlikely(__pte_alloc(mm, vma, pmd, address))) 3396 if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address))
3400 return VM_FAULT_OOM; 3397 return VM_FAULT_OOM;
3401 /* if an huge pmd materialized from under us just retry later */ 3398 /* if an huge pmd materialized from under us just retry later */
3402 if (unlikely(pmd_trans_huge(*pmd))) 3399 if (unlikely(pmd_trans_huge(*pmd)))
diff --git a/mm/mlock.c b/mm/mlock.c
index 6b55e3efe0df..516b2c2ddd5a 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -162,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
162 VM_BUG_ON(end > vma->vm_end); 162 VM_BUG_ON(end > vma->vm_end);
163 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); 163 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
164 164
165 gup_flags = FOLL_TOUCH; 165 gup_flags = FOLL_TOUCH | FOLL_MLOCK;
166 /* 166 /*
167 * We want to touch writable mappings with a write fault in order 167 * We want to touch writable mappings with a write fault in order
168 * to break COW, except for shared mappings because these don't COW 168 * to break COW, except for shared mappings because these don't COW
@@ -178,9 +178,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
178 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) 178 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
179 gup_flags |= FOLL_FORCE; 179 gup_flags |= FOLL_FORCE;
180 180
181 if (vma->vm_flags & VM_LOCKED)
182 gup_flags |= FOLL_MLOCK;
183
184 return __get_user_pages(current, mm, addr, nr_pages, gup_flags, 181 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
185 NULL, NULL, nonblocking); 182 NULL, NULL, nonblocking);
186} 183}
diff --git a/mm/mmap.c b/mm/mmap.c
index e27e0cf0de03..772140c53ab1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1767,10 +1767,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1767 size = address - vma->vm_start; 1767 size = address - vma->vm_start;
1768 grow = (address - vma->vm_end) >> PAGE_SHIFT; 1768 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1769 1769
1770 error = acct_stack_growth(vma, size, grow); 1770 error = -ENOMEM;
1771 if (!error) { 1771 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
1772 vma->vm_end = address; 1772 error = acct_stack_growth(vma, size, grow);
1773 perf_event_mmap(vma); 1773 if (!error) {
1774 vma->vm_end = address;
1775 perf_event_mmap(vma);
1776 }
1774 } 1777 }
1775 } 1778 }
1776 vma_unlock_anon_vma(vma); 1779 vma_unlock_anon_vma(vma);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 83fb72c108b7..f52e85c80e8d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -172,10 +172,13 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
172 172
173 /* 173 /*
174 * The baseline for the badness score is the proportion of RAM that each 174 * The baseline for the badness score is the proportion of RAM that each
175 * task's rss and swap space use. 175 * task's rss, pagetable and swap space use.
176 */ 176 */
177 points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 / 177 points = get_mm_rss(p->mm) + p->mm->nr_ptes;
178 totalpages; 178 points += get_mm_counter(p->mm, MM_SWAPENTS);
179
180 points *= 1000;
181 points /= totalpages;
179 task_unlock(p); 182 task_unlock(p);
180 183
181 /* 184 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f8a97b9a350..3f8bce264df6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order)
2317 2317
2318EXPORT_SYMBOL(free_pages); 2318EXPORT_SYMBOL(free_pages);
2319 2319
2320static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
2321{
2322 if (addr) {
2323 unsigned long alloc_end = addr + (PAGE_SIZE << order);
2324 unsigned long used = addr + PAGE_ALIGN(size);
2325
2326 split_page(virt_to_page((void *)addr), order);
2327 while (used < alloc_end) {
2328 free_page(used);
2329 used += PAGE_SIZE;
2330 }
2331 }
2332 return (void *)addr;
2333}
2334
2320/** 2335/**
2321 * alloc_pages_exact - allocate an exact number physically-contiguous pages. 2336 * alloc_pages_exact - allocate an exact number physically-contiguous pages.
2322 * @size: the number of bytes to allocate 2337 * @size: the number of bytes to allocate
@@ -2336,22 +2351,33 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
2336 unsigned long addr; 2351 unsigned long addr;
2337 2352
2338 addr = __get_free_pages(gfp_mask, order); 2353 addr = __get_free_pages(gfp_mask, order);
2339 if (addr) { 2354 return make_alloc_exact(addr, order, size);
2340 unsigned long alloc_end = addr + (PAGE_SIZE << order);
2341 unsigned long used = addr + PAGE_ALIGN(size);
2342
2343 split_page(virt_to_page((void *)addr), order);
2344 while (used < alloc_end) {
2345 free_page(used);
2346 used += PAGE_SIZE;
2347 }
2348 }
2349
2350 return (void *)addr;
2351} 2355}
2352EXPORT_SYMBOL(alloc_pages_exact); 2356EXPORT_SYMBOL(alloc_pages_exact);
2353 2357
2354/** 2358/**
2359 * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
2360 * pages on a node.
2361 * @nid: the preferred node ID where memory should be allocated
2362 * @size: the number of bytes to allocate
2363 * @gfp_mask: GFP flags for the allocation
2364 *
2365 * Like alloc_pages_exact(), but try to allocate on node nid first before falling
2366 * back.
2367 * Note this is not alloc_pages_exact_node() which allocates on a specific node,
2368 * but is not exact.
2369 */
2370void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2371{
2372 unsigned order = get_order(size);
2373 struct page *p = alloc_pages_node(nid, gfp_mask, order);
2374 if (!p)
2375 return NULL;
2376 return make_alloc_exact((unsigned long)page_address(p), order, size);
2377}
2378EXPORT_SYMBOL(alloc_pages_exact_nid);
2379
2380/**
2355 * free_pages_exact - release memory allocated via alloc_pages_exact() 2381 * free_pages_exact - release memory allocated via alloc_pages_exact()
2356 * @virt: the value returned by alloc_pages_exact. 2382 * @virt: the value returned by alloc_pages_exact.
2357 * @size: size of allocation, same value as passed to alloc_pages_exact(). 2383 * @size: size of allocation, same value as passed to alloc_pages_exact().
@@ -3564,7 +3590,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3564 3590
3565 if (!slab_is_available()) { 3591 if (!slab_is_available()) {
3566 zone->wait_table = (wait_queue_head_t *) 3592 zone->wait_table = (wait_queue_head_t *)
3567 alloc_bootmem_node(pgdat, alloc_size); 3593 alloc_bootmem_node_nopanic(pgdat, alloc_size);
3568 } else { 3594 } else {
3569 /* 3595 /*
3570 * This case means that a zone whose size was 0 gets new memory 3596 * This case means that a zone whose size was 0 gets new memory
@@ -4141,7 +4167,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
4141 unsigned long usemapsize = usemap_size(zonesize); 4167 unsigned long usemapsize = usemap_size(zonesize);
4142 zone->pageblock_flags = NULL; 4168 zone->pageblock_flags = NULL;
4143 if (usemapsize) 4169 if (usemapsize)
4144 zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); 4170 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
4171 usemapsize);
4145} 4172}
4146#else 4173#else
4147static inline void setup_usemap(struct pglist_data *pgdat, 4174static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4307,7 +4334,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4307 size = (end - start) * sizeof(struct page); 4334 size = (end - start) * sizeof(struct page);
4308 map = alloc_remap(pgdat->node_id, size); 4335 map = alloc_remap(pgdat->node_id, size);
4309 if (!map) 4336 if (!map)
4310 map = alloc_bootmem_node(pgdat, size); 4337 map = alloc_bootmem_node_nopanic(pgdat, size);
4311 pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); 4338 pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
4312 } 4339 }
4313#ifndef CONFIG_NEED_MULTIPLE_NODES 4340#ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 99055010cece..2daadc322ba6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -134,7 +134,7 @@ static void *__init_refok alloc_page_cgroup(size_t size, int nid)
134{ 134{
135 void *addr = NULL; 135 void *addr = NULL;
136 136
137 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN); 137 addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
138 if (addr) 138 if (addr)
139 return addr; 139 return addr;
140 140
diff --git a/mm/shmem.c b/mm/shmem.c
index 8fa27e4e582a..dfc7069102ee 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
852 852
853static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) 853static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
854{ 854{
855 struct inode *inode; 855 struct address_space *mapping;
856 unsigned long idx; 856 unsigned long idx;
857 unsigned long size; 857 unsigned long size;
858 unsigned long limit; 858 unsigned long limit;
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
875 if (size > SHMEM_NR_DIRECT) 875 if (size > SHMEM_NR_DIRECT)
876 size = SHMEM_NR_DIRECT; 876 size = SHMEM_NR_DIRECT;
877 offset = shmem_find_swp(entry, ptr, ptr+size); 877 offset = shmem_find_swp(entry, ptr, ptr+size);
878 if (offset >= 0) 878 if (offset >= 0) {
879 shmem_swp_balance_unmap();
879 goto found; 880 goto found;
881 }
880 if (!info->i_indirect) 882 if (!info->i_indirect)
881 goto lost2; 883 goto lost2;
882 884
@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
914 if (size > ENTRIES_PER_PAGE) 916 if (size > ENTRIES_PER_PAGE)
915 size = ENTRIES_PER_PAGE; 917 size = ENTRIES_PER_PAGE;
916 offset = shmem_find_swp(entry, ptr, ptr+size); 918 offset = shmem_find_swp(entry, ptr, ptr+size);
917 shmem_swp_unmap(ptr);
918 if (offset >= 0) { 919 if (offset >= 0) {
919 shmem_dir_unmap(dir); 920 shmem_dir_unmap(dir);
920 goto found; 921 goto found;
921 } 922 }
923 shmem_swp_unmap(ptr);
922 } 924 }
923 } 925 }
924lost1: 926lost1:
@@ -928,8 +930,7 @@ lost2:
928 return 0; 930 return 0;
929found: 931found:
930 idx += offset; 932 idx += offset;
931 inode = igrab(&info->vfs_inode); 933 ptr += offset;
932 spin_unlock(&info->lock);
933 934
934 /* 935 /*
935 * Move _head_ to start search for next from here. 936 * Move _head_ to start search for next from here.
@@ -940,37 +941,18 @@ found:
940 */ 941 */
941 if (shmem_swaplist.next != &info->swaplist) 942 if (shmem_swaplist.next != &info->swaplist)
942 list_move_tail(&shmem_swaplist, &info->swaplist); 943 list_move_tail(&shmem_swaplist, &info->swaplist);
943 mutex_unlock(&shmem_swaplist_mutex);
944 944
945 error = 1;
946 if (!inode)
947 goto out;
948 /* 945 /*
949 * Charge page using GFP_KERNEL while we can wait. 946 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
950 * Charged back to the user(not to caller) when swap account is used. 947 * but also to hold up shmem_evict_inode(): so inode cannot be freed
951 * add_to_page_cache() will be called with GFP_NOWAIT. 948 * beneath us (pagelock doesn't help until the page is in pagecache).
952 */ 949 */
953 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 950 mapping = info->vfs_inode.i_mapping;
954 if (error) 951 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
955 goto out; 952 /* which does mem_cgroup_uncharge_cache_page on error */
956 error = radix_tree_preload(GFP_KERNEL);
957 if (error) {
958 mem_cgroup_uncharge_cache_page(page);
959 goto out;
960 }
961 error = 1;
962
963 spin_lock(&info->lock);
964 ptr = shmem_swp_entry(info, idx, NULL);
965 if (ptr && ptr->val == entry.val) {
966 error = add_to_page_cache_locked(page, inode->i_mapping,
967 idx, GFP_NOWAIT);
968 /* does mem_cgroup_uncharge_cache_page on error */
969 } else /* we must compensate for our precharge above */
970 mem_cgroup_uncharge_cache_page(page);
971 953
972 if (error == -EEXIST) { 954 if (error == -EEXIST) {
973 struct page *filepage = find_get_page(inode->i_mapping, idx); 955 struct page *filepage = find_get_page(mapping, idx);
974 error = 1; 956 error = 1;
975 if (filepage) { 957 if (filepage) {
976 /* 958 /*
@@ -990,14 +972,8 @@ found:
990 swap_free(entry); 972 swap_free(entry);
991 error = 1; /* not an error, but entry was found */ 973 error = 1; /* not an error, but entry was found */
992 } 974 }
993 if (ptr) 975 shmem_swp_unmap(ptr);
994 shmem_swp_unmap(ptr);
995 spin_unlock(&info->lock); 976 spin_unlock(&info->lock);
996 radix_tree_preload_end();
997out:
998 unlock_page(page);
999 page_cache_release(page);
1000 iput(inode); /* allows for NULL */
1001 return error; 977 return error;
1002} 978}
1003 979
@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1009 struct list_head *p, *next; 985 struct list_head *p, *next;
1010 struct shmem_inode_info *info; 986 struct shmem_inode_info *info;
1011 int found = 0; 987 int found = 0;
988 int error;
989
990 /*
991 * Charge page using GFP_KERNEL while we can wait, before taking
992 * the shmem_swaplist_mutex which might hold up shmem_writepage().
993 * Charged back to the user (not to caller) when swap account is used.
994 * add_to_page_cache() will be called with GFP_NOWAIT.
995 */
996 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
997 if (error)
998 goto out;
999 /*
1000 * Try to preload while we can wait, to not make a habit of
1001 * draining atomic reserves; but don't latch on to this cpu,
1002 * it's okay if sometimes we get rescheduled after this.
1003 */
1004 error = radix_tree_preload(GFP_KERNEL);
1005 if (error)
1006 goto uncharge;
1007 radix_tree_preload_end();
1012 1008
1013 mutex_lock(&shmem_swaplist_mutex); 1009 mutex_lock(&shmem_swaplist_mutex);
1014 list_for_each_safe(p, next, &shmem_swaplist) { 1010 list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1016 found = shmem_unuse_inode(info, entry, page); 1012 found = shmem_unuse_inode(info, entry, page);
1017 cond_resched(); 1013 cond_resched();
1018 if (found) 1014 if (found)
1019 goto out; 1015 break;
1020 } 1016 }
1021 mutex_unlock(&shmem_swaplist_mutex); 1017 mutex_unlock(&shmem_swaplist_mutex);
1022 /* 1018
1023 * Can some race bring us here? We've been holding page lock, 1019uncharge:
1024 * so I think not; but would rather try again later than BUG() 1020 if (!found)
1025 */ 1021 mem_cgroup_uncharge_cache_page(page);
1022 if (found < 0)
1023 error = found;
1024out:
1026 unlock_page(page); 1025 unlock_page(page);
1027 page_cache_release(page); 1026 page_cache_release(page);
1028out: 1027 return error;
1029 return (found < 0) ? found : 0;
1030} 1028}
1031 1029
1032/* 1030/*
@@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1064 else 1062 else
1065 swap.val = 0; 1063 swap.val = 0;
1066 1064
1065 /*
1066 * Add inode to shmem_unuse()'s list of swapped-out inodes,
1067 * if it's not already there. Do it now because we cannot take
1068 * mutex while holding spinlock, and must do so before the page
1069 * is moved to swap cache, when its pagelock no longer protects
1070 * the inode from eviction. But don't unlock the mutex until
1071 * we've taken the spinlock, because shmem_unuse_inode() will
1072 * prune a !swapped inode from the swaplist under both locks.
1073 */
1074 if (swap.val) {
1075 mutex_lock(&shmem_swaplist_mutex);
1076 if (list_empty(&info->swaplist))
1077 list_add_tail(&info->swaplist, &shmem_swaplist);
1078 }
1079
1067 spin_lock(&info->lock); 1080 spin_lock(&info->lock);
1081 if (swap.val)
1082 mutex_unlock(&shmem_swaplist_mutex);
1083
1068 if (index >= info->next_index) { 1084 if (index >= info->next_index) {
1069 BUG_ON(!(info->flags & SHMEM_TRUNCATE)); 1085 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
1070 goto unlock; 1086 goto unlock;
@@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1084 delete_from_page_cache(page); 1100 delete_from_page_cache(page);
1085 shmem_swp_set(info, entry, swap.val); 1101 shmem_swp_set(info, entry, swap.val);
1086 shmem_swp_unmap(entry); 1102 shmem_swp_unmap(entry);
1087 if (list_empty(&info->swaplist))
1088 inode = igrab(inode);
1089 else
1090 inode = NULL;
1091 spin_unlock(&info->lock); 1103 spin_unlock(&info->lock);
1092 swap_shmem_alloc(swap); 1104 swap_shmem_alloc(swap);
1093 BUG_ON(page_mapped(page)); 1105 BUG_ON(page_mapped(page));
1094 swap_writepage(page, wbc); 1106 swap_writepage(page, wbc);
1095 if (inode) {
1096 mutex_lock(&shmem_swaplist_mutex);
1097 /* move instead of add in case we're racing */
1098 list_move_tail(&info->swaplist, &shmem_swaplist);
1099 mutex_unlock(&shmem_swaplist_mutex);
1100 iput(inode);
1101 }
1102 return 0; 1107 return 0;
1103 } 1108 }
1104 1109
@@ -1400,20 +1405,14 @@ repeat:
1400 if (sbinfo->max_blocks) { 1405 if (sbinfo->max_blocks) {
1401 if (percpu_counter_compare(&sbinfo->used_blocks, 1406 if (percpu_counter_compare(&sbinfo->used_blocks,
1402 sbinfo->max_blocks) >= 0 || 1407 sbinfo->max_blocks) >= 0 ||
1403 shmem_acct_block(info->flags)) { 1408 shmem_acct_block(info->flags))
1404 spin_unlock(&info->lock); 1409 goto nospace;
1405 error = -ENOSPC;
1406 goto failed;
1407 }
1408 percpu_counter_inc(&sbinfo->used_blocks); 1410 percpu_counter_inc(&sbinfo->used_blocks);
1409 spin_lock(&inode->i_lock); 1411 spin_lock(&inode->i_lock);
1410 inode->i_blocks += BLOCKS_PER_PAGE; 1412 inode->i_blocks += BLOCKS_PER_PAGE;
1411 spin_unlock(&inode->i_lock); 1413 spin_unlock(&inode->i_lock);
1412 } else if (shmem_acct_block(info->flags)) { 1414 } else if (shmem_acct_block(info->flags))
1413 spin_unlock(&info->lock); 1415 goto nospace;
1414 error = -ENOSPC;
1415 goto failed;
1416 }
1417 1416
1418 if (!filepage) { 1417 if (!filepage) {
1419 int ret; 1418 int ret;
@@ -1493,6 +1492,24 @@ done:
1493 error = 0; 1492 error = 0;
1494 goto out; 1493 goto out;
1495 1494
1495nospace:
1496 /*
1497 * Perhaps the page was brought in from swap between find_lock_page
1498 * and taking info->lock? We allow for that at add_to_page_cache_lru,
1499 * but must also avoid reporting a spurious ENOSPC while working on a
1500 * full tmpfs. (When filepage has been passed in to shmem_getpage, it
1501 * is already in page cache, which prevents this race from occurring.)
1502 */
1503 if (!filepage) {
1504 struct page *page = find_get_page(mapping, idx);
1505 if (page) {
1506 spin_unlock(&info->lock);
1507 page_cache_release(page);
1508 goto repeat;
1509 }
1510 }
1511 spin_unlock(&info->lock);
1512 error = -ENOSPC;
1496failed: 1513failed:
1497 if (*pagep != filepage) { 1514 if (*pagep != filepage) {
1498 unlock_page(filepage); 1515 unlock_page(filepage);
diff --git a/mm/slub.c b/mm/slub.c
index 94d2a33a866e..9d2e5e46bf09 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1940,7 +1940,7 @@ redo:
1940 * Since this is without lock semantics the protection is only against 1940 * Since this is without lock semantics the protection is only against
1941 * code executing on this cpu *not* from access by other cpus. 1941 * code executing on this cpu *not* from access by other cpus.
1942 */ 1942 */
1943 if (unlikely(!this_cpu_cmpxchg_double( 1943 if (unlikely(!irqsafe_cpu_cmpxchg_double(
1944 s->cpu_slab->freelist, s->cpu_slab->tid, 1944 s->cpu_slab->freelist, s->cpu_slab->tid,
1945 object, tid, 1945 object, tid,
1946 get_freepointer(s, object), next_tid(tid)))) { 1946 get_freepointer(s, object), next_tid(tid)))) {
@@ -2145,7 +2145,7 @@ redo:
2145 set_freepointer(s, object, c->freelist); 2145 set_freepointer(s, object, c->freelist);
2146 2146
2147#ifdef CONFIG_CMPXCHG_LOCAL 2147#ifdef CONFIG_CMPXCHG_LOCAL
2148 if (unlikely(!this_cpu_cmpxchg_double( 2148 if (unlikely(!irqsafe_cpu_cmpxchg_double(
2149 s->cpu_slab->freelist, s->cpu_slab->tid, 2149 s->cpu_slab->freelist, s->cpu_slab->tid,
2150 c->freelist, tid, 2150 c->freelist, tid,
2151 object, next_tid(tid)))) { 2151 object, next_tid(tid)))) {
diff --git a/mm/swap.c b/mm/swap.c
index a448db377cb0..5602f1a1b1e7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -396,6 +396,9 @@ static void lru_deactivate_fn(struct page *page, void *arg)
396 if (!PageLRU(page)) 396 if (!PageLRU(page))
397 return; 397 return;
398 398
399 if (PageUnevictable(page))
400 return;
401
399 /* Some processes are using the page */ 402 /* Some processes are using the page */
400 if (page_mapped(page)) 403 if (page_mapped(page))
401 return; 404 return;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f6b435c80079..8bfd45050a61 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -937,7 +937,7 @@ keep_lumpy:
937 * back off and wait for congestion to clear because further reclaim 937 * back off and wait for congestion to clear because further reclaim
938 * will encounter the same problem 938 * will encounter the same problem
939 */ 939 */
940 if (nr_dirty == nr_congested && nr_dirty != 0) 940 if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
941 zone_set_flag(zone, ZONE_CONGESTED); 941 zone_set_flag(zone, ZONE_CONGESTED);
942 942
943 free_page_list(&free_pages); 943 free_page_list(&free_pages);