7 files changed, 144 insertions, 96 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 27f425378112..61e66f026563 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1412,9 +1412,8 @@ no_page_table:
 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 {
-        return (vma->vm_flags & VM_GROWSDOWN) &&
+        return stack_guard_page_start(vma, addr) ||
-                (vma->vm_start == addr) &&
+               stack_guard_page_end(vma, addr+PAGE_SIZE);
-                !vma_stack_continue(vma->vm_prev, addr);
 }
 /**
@@ -1551,12 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        continue;
                }
-                /*
-                 * For mlock, just skip the stack guard page.
-                 */
-                if ((gup_flags & FOLL_MLOCK) && stack_guard_page(vma, start))
-                        goto next_page;
                do {
                        struct page *page;
                        unsigned int foll_flags = gup_flags;
@@ -1573,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                int ret;
                                unsigned int fault_flags = 0;
+                                /* For mlock, just skip the stack guard page. */
+                                if (foll_flags & FOLL_MLOCK) {
+                                        if (stack_guard_page(vma, start))
+                                                goto next_page;
+                                }
                                if (foll_flags & FOLL_WRITE)
                                        fault_flags |= FAULT_FLAG_WRITE;
                                if (nonblocking)
diff --git a/mm/mmap.c b/mm/mmap.c
index e27e0cf0de03..772140c53ab1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1767,10 +1767,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                size = address - vma->vm_start;
                grow = (address - vma->vm_end) >> PAGE_SHIFT;
-                error = acct_stack_growth(vma, size, grow);
+                error = -ENOMEM;
-                if (!error) {
+                if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
-                        vma->vm_end = address;
+                        error = acct_stack_growth(vma, size, grow);
-                        perf_event_mmap(vma);
+                        if (!error) {
+                                vma->vm_end = address;
+                                perf_event_mmap(vma);
+                        }
                }
        }
        vma_unlock_anon_vma(vma);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f8a97b9a350..3f8bce264df6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order)
 EXPORT_SYMBOL(free_pages);
+static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+{
+        if (addr) {
+                unsigned long alloc_end = addr + (PAGE_SIZE << order);
+                unsigned long used = addr + PAGE_ALIGN(size);
+                split_page(virt_to_page((void *)addr), order);
+                while (used < alloc_end) {
+                        free_page(used);
+                        used += PAGE_SIZE;
+                }
+        }
+        return (void *)addr;
+}
 /**
 * alloc_pages_exact - allocate an exact number physically-contiguous pages.
 * @size: the number of bytes to allocate
@@ -2336,22 +2351,33 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
        unsigned long addr;
        addr = __get_free_pages(gfp_mask, order);
-        if (addr) {
+        return make_alloc_exact(addr, order, size);
-                unsigned long alloc_end = addr + (PAGE_SIZE << order);
-                unsigned long used = addr + PAGE_ALIGN(size);
-                split_page(virt_to_page((void *)addr), order);
-                while (used < alloc_end) {
-                        free_page(used);
-                        used += PAGE_SIZE;
-                }
-        }
-        return (void *)addr;
 }
 EXPORT_SYMBOL(alloc_pages_exact);
 /**
+ * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
+ *                         pages on a node.
+ * @nid: the preferred node ID where memory should be allocated
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * Like alloc_pages_exact(), but try to allocate on node nid first before falling
+ * back.
+ * Note this is not alloc_pages_exact_node() which allocates on a specific node,
+ * but is not exact.
+ */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+{
+        unsigned order = get_order(size);
+        struct page *p = alloc_pages_node(nid, gfp_mask, order);
+        if (!p)
+                return NULL;
+        return make_alloc_exact((unsigned long)page_address(p), order, size);
+}
+EXPORT_SYMBOL(alloc_pages_exact_nid);
+/**
 * free_pages_exact - release memory allocated via alloc_pages_exact()
 * @virt: the value returned by alloc_pages_exact.
 * @size: size of allocation, same value as passed to alloc_pages_exact().
@@ -3564,7 +3590,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
        if (!slab_is_available()) {
                zone->wait_table = (wait_queue_head_t *)
-                        alloc_bootmem_node(pgdat, alloc_size);
+                        alloc_bootmem_node_nopanic(pgdat, alloc_size);
        } else {
                /*
                 * This case means that a zone whose size was 0 gets new memory
@@ -4141,7 +4167,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
        unsigned long usemapsize = usemap_size(zonesize);
        zone->pageblock_flags = NULL;
        if (usemapsize)
-                zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
+                zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
+                                                                   usemapsize);
 }
 #else
 static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4307,7 +4334,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
                size =  (end - start) * sizeof(struct page);
                map = alloc_remap(pgdat->node_id, size);
                if (!map)
-                        map = alloc_bootmem_node(pgdat, size);
+                        map = alloc_bootmem_node_nopanic(pgdat, size);
                pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
        }
 #ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 99055010cece..2daadc322ba6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -134,7 +134,7 @@ static void *__init_refok alloc_page_cgroup(size_t size, int nid)
 {
        void *addr = NULL;
-        addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
+        addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
        if (addr)
                return addr;
diff --git a/mm/shmem.c b/mm/shmem.c
index 8fa27e4e582a..dfc7069102ee 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 {
-        struct inode *inode;
+        struct address_space *mapping;
        unsigned long idx;
        unsigned long size;
        unsigned long limit;
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
        if (size > SHMEM_NR_DIRECT)
                size = SHMEM_NR_DIRECT;
        offset = shmem_find_swp(entry, ptr, ptr+size);
-        if (offset >= 0)
+        if (offset >= 0) {
+                shmem_swp_balance_unmap();
                goto found;
+        }
        if (!info->i_indirect)
                goto lost2;
@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
                        if (size > ENTRIES_PER_PAGE)
                                size = ENTRIES_PER_PAGE;
                        offset = shmem_find_swp(entry, ptr, ptr+size);
-                        shmem_swp_unmap(ptr);
                        if (offset >= 0) {
                                shmem_dir_unmap(dir);
                                goto found;
                        }
+                        shmem_swp_unmap(ptr);
                }
        }
 lost1:
@@ -928,8 +930,7 @@ lost2:
        return 0;
 found:
        idx += offset;
-        inode = igrab(&info->vfs_inode);
+        ptr += offset;
-        spin_unlock(&info->lock);
        /*
         * Move _head_ to start search for next from here.
@@ -940,37 +941,18 @@ found:
         */
        if (shmem_swaplist.next != &info->swaplist)
                list_move_tail(&shmem_swaplist, &info->swaplist);
-        mutex_unlock(&shmem_swaplist_mutex);
-        error = 1;
-        if (!inode)
-                goto out;
        /*
-         * Charge page using GFP_KERNEL while we can wait.
+         * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
-         * Charged back to the user(not to caller) when swap account is used.
+         * but also to hold up shmem_evict_inode(): so inode cannot be freed
-         * add_to_page_cache() will be called with GFP_NOWAIT.
+         * beneath us (pagelock doesn't help until the page is in pagecache).
         */
-        error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+        mapping = info->vfs_inode.i_mapping;
-        if (error)
+        error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
-                goto out;
+        /* which does mem_cgroup_uncharge_cache_page on error */
-        error = radix_tree_preload(GFP_KERNEL);
-        if (error) {
-                mem_cgroup_uncharge_cache_page(page);
-                goto out;
-        }
-        error = 1;
-        spin_lock(&info->lock);
-        ptr = shmem_swp_entry(info, idx, NULL);
-        if (ptr && ptr->val == entry.val) {
-                error = add_to_page_cache_locked(page, inode->i_mapping,
-                                                idx, GFP_NOWAIT);
-                /* does mem_cgroup_uncharge_cache_page on error */
-        } else  /* we must compensate for our precharge above */
-                mem_cgroup_uncharge_cache_page(page);
        if (error == -EEXIST) {
-                struct page *filepage = find_get_page(inode->i_mapping, idx);
+                struct page *filepage = find_get_page(mapping, idx);
                error = 1;
                if (filepage) {
                        /*
@@ -990,14 +972,8 @@ found:
                swap_free(entry);
                error = 1;      /* not an error, but entry was found */
        }
-        if (ptr)
+        shmem_swp_unmap(ptr);
-                shmem_swp_unmap(ptr);
        spin_unlock(&info->lock);
-        radix_tree_preload_end();
-out:
-        unlock_page(page);
-        page_cache_release(page);
-        iput(inode);            /* allows for NULL */
        return error;
 }
@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
        struct list_head *p, *next;
        struct shmem_inode_info *info;
        int found = 0;
+        int error;
+        /*
+         * Charge page using GFP_KERNEL while we can wait, before taking
+         * the shmem_swaplist_mutex which might hold up shmem_writepage().
+         * Charged back to the user (not to caller) when swap account is used.
+         * add_to_page_cache() will be called with GFP_NOWAIT.
+         */
+        error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+        if (error)
+                goto out;
+        /*
+         * Try to preload while we can wait, to not make a habit of
+         * draining atomic reserves; but don't latch on to this cpu,
+         * it's okay if sometimes we get rescheduled after this.
+         */
+        error = radix_tree_preload(GFP_KERNEL);
+        if (error)
+                goto uncharge;
+        radix_tree_preload_end();
        mutex_lock(&shmem_swaplist_mutex);
        list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
                found = shmem_unuse_inode(info, entry, page);
                cond_resched();
                if (found)
-                        goto out;
+                        break;
        }
        mutex_unlock(&shmem_swaplist_mutex);
-        /*
-         * Can some race bring us here?  We've been holding page lock,
+uncharge:
-         * so I think not; but would rather try again later than BUG()
+        if (!found)
-         */
+                mem_cgroup_uncharge_cache_page(page);
+        if (found < 0)
+                error = found;
+out:
        unlock_page(page);
        page_cache_release(page);
-out:
+        return error;
-        return (found < 0) ? found : 0;
 }
 /*
@@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
        else
                swap.val = 0;
+        /*
+         * Add inode to shmem_unuse()'s list of swapped-out inodes,
+         * if it's not already there.  Do it now because we cannot take
+         * mutex while holding spinlock, and must do so before the page
+         * is moved to swap cache, when its pagelock no longer protects
+         * the inode from eviction.  But don't unlock the mutex until
+         * we've taken the spinlock, because shmem_unuse_inode() will
+         * prune a !swapped inode from the swaplist under both locks.
+         */
+        if (swap.val) {
+                mutex_lock(&shmem_swaplist_mutex);
+                if (list_empty(&info->swaplist))
+                        list_add_tail(&info->swaplist, &shmem_swaplist);
+        }
        spin_lock(&info->lock);
+        if (swap.val)
+                mutex_unlock(&shmem_swaplist_mutex);
        if (index >= info->next_index) {
                BUG_ON(!(info->flags & SHMEM_TRUNCATE));
                goto unlock;
@@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                delete_from_page_cache(page);
                shmem_swp_set(info, entry, swap.val);
                shmem_swp_unmap(entry);
-                if (list_empty(&info->swaplist))
-                        inode = igrab(inode);
-                else
-                        inode = NULL;
                spin_unlock(&info->lock);
                swap_shmem_alloc(swap);
                BUG_ON(page_mapped(page));
                swap_writepage(page, wbc);
-                if (inode) {
-                        mutex_lock(&shmem_swaplist_mutex);
-                        /* move instead of add in case we're racing */
-                        list_move_tail(&info->swaplist, &shmem_swaplist);
-                        mutex_unlock(&shmem_swaplist_mutex);
-                        iput(inode);
-                }
                return 0;
        }
@@ -1400,20 +1405,14 @@ repeat:
                if (sbinfo->max_blocks) {
                        if (percpu_counter_compare(&sbinfo->used_blocks,
                                                sbinfo->max_blocks) >= 0 ||
-                            shmem_acct_block(info->flags)) {
+                            shmem_acct_block(info->flags))
-                                spin_unlock(&info->lock);
+                                goto nospace;
-                                error = -ENOSPC;
-                                goto failed;
-                        }
                        percpu_counter_inc(&sbinfo->used_blocks);
                        spin_lock(&inode->i_lock);
                        inode->i_blocks += BLOCKS_PER_PAGE;
                        spin_unlock(&inode->i_lock);
-                } else if (shmem_acct_block(info->flags)) {
+                } else if (shmem_acct_block(info->flags))
-                        spin_unlock(&info->lock);
+                        goto nospace;
-                        error = -ENOSPC;
-                        goto failed;
-                }
                if (!filepage) {
                        int ret;
@@ -1493,6 +1492,24 @@ done:
        error = 0;
        goto out;
+nospace:
+        /*
+         * Perhaps the page was brought in from swap between find_lock_page
+         * and taking info->lock?  We allow for that at add_to_page_cache_lru,
+         * but must also avoid reporting a spurious ENOSPC while working on a
+         * full tmpfs.  (When filepage has been passed in to shmem_getpage, it
+         * is already in page cache, which prevents this race from occurring.)
+         */
+        if (!filepage) {
+                struct page *page = find_get_page(mapping, idx);
+                if (page) {
+                        spin_unlock(&info->lock);
+                        page_cache_release(page);
+                        goto repeat;
+                }
+        }
+        spin_unlock(&info->lock);
+        error = -ENOSPC;
 failed:
        if (*pagep != filepage) {
                unlock_page(filepage);
diff --git a/mm/swap.c b/mm/swap.c
index a448db377cb0..5602f1a1b1e7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -396,6 +396,9 @@ static void lru_deactivate_fn(struct page *page, void *arg)
        if (!PageLRU(page))
                return;
+        if (PageUnevictable(page))
+                return;
        /* Some processes are using the page */
        if (page_mapped(page))
                return;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f6b435c80079..8bfd45050a61 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -937,7 +937,7 @@ keep_lumpy:
         * back off and wait for congestion to clear because further reclaim
         * will encounter the same problem
         */
-        if (nr_dirty == nr_congested && nr_dirty != 0)
+        if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
                zone_set_flag(zone, ZONE_CONGESTED);
        free_page_list(&free_pages);

diff --git a/mm/memory.c b/mm/memory.c index 27f425378112..61e66f026563 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -1412,9 +1412,8 @@ no_page_table:
1412		1412
1413	static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)	1413	static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
1414	{	1414	{
1415	return (vma->vm_flags & VM_GROWSDOWN) &&	1415	return stack_guard_page_start(vma, addr) \|\|
1416	(vma->vm_start == addr) &&	1416	stack_guard_page_end(vma, addr+PAGE_SIZE);
1417	!vma_stack_continue(vma->vm_prev, addr);
1418	}	1417	}
1419		1418
1420	/**	1419	/**
@@ -1551,12 +1550,6 @@ int __get_user_pages(struct task_struct tsk, struct mm_struct mm,
1551	continue;	1550	continue;
1552	}	1551	}
1553		1552
1554	/*
1555	* For mlock, just skip the stack guard page.
1556	*/
1557	if ((gup_flags & FOLL_MLOCK) && stack_guard_page(vma, start))
1558	goto next_page;
1559
1560	do {	1553	do {
1561	struct page *page;	1554	struct page *page;
1562	unsigned int foll_flags = gup_flags;	1555	unsigned int foll_flags = gup_flags;
@@ -1573,6 +1566,11 @@ int __get_user_pages(struct task_struct tsk, struct mm_struct mm,
1573	int ret;	1566	int ret;
1574	unsigned int fault_flags = 0;	1567	unsigned int fault_flags = 0;
1575		1568
		1569	/* For mlock, just skip the stack guard page. */
		1570	if (foll_flags & FOLL_MLOCK) {
		1571	if (stack_guard_page(vma, start))
		1572	goto next_page;
		1573	}
1576	if (foll_flags & FOLL_WRITE)	1574	if (foll_flags & FOLL_WRITE)
1577	fault_flags \|= FAULT_FLAG_WRITE;	1575	fault_flags \|= FAULT_FLAG_WRITE;
1578	if (nonblocking)	1576	if (nonblocking)


diff --git a/mm/mmap.c b/mm/mmap.c index e27e0cf0de03..772140c53ab1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c
@@ -1767,10 +1767,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1767	size = address - vma->vm_start;	1767	size = address - vma->vm_start;
1768	grow = (address - vma->vm_end) >> PAGE_SHIFT;	1768	grow = (address - vma->vm_end) >> PAGE_SHIFT;
1769		1769
1770	error = acct_stack_growth(vma, size, grow);	1770	error = -ENOMEM;
1771	if (!error) {	1771	if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
1772	vma->vm_end = address;	1772	error = acct_stack_growth(vma, size, grow);
1773	perf_event_mmap(vma);	1773	if (!error) {
		1774	vma->vm_end = address;
		1775	perf_event_mmap(vma);
		1776	}
1774	}	1777	}
1775	}	1778	}
1776	vma_unlock_anon_vma(vma);	1779	vma_unlock_anon_vma(vma);


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f8a97b9a350..3f8bce264df6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order)
2317		2317
2318	EXPORT_SYMBOL(free_pages);	2318	EXPORT_SYMBOL(free_pages);
2319		2319
		2320	static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
		2321	{
		2322	if (addr) {
		2323	unsigned long alloc_end = addr + (PAGE_SIZE << order);
		2324	unsigned long used = addr + PAGE_ALIGN(size);
		2325
		2326	split_page(virt_to_page((void *)addr), order);
		2327	while (used < alloc_end) {
		2328	free_page(used);
		2329	used += PAGE_SIZE;
		2330	}
		2331	}
		2332	return (void *)addr;
		2333	}
		2334
2320	/**	2335	/**
2321	* alloc_pages_exact - allocate an exact number physically-contiguous pages.	2336	* alloc_pages_exact - allocate an exact number physically-contiguous pages.
2322	* @size: the number of bytes to allocate	2337	* @size: the number of bytes to allocate
@@ -2336,22 +2351,33 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
2336	unsigned long addr;	2351	unsigned long addr;
2337		2352
2338	addr = __get_free_pages(gfp_mask, order);	2353	addr = __get_free_pages(gfp_mask, order);
2339	if (addr) {	2354	return make_alloc_exact(addr, order, size);
2340	unsigned long alloc_end = addr + (PAGE_SIZE << order);
2341	unsigned long used = addr + PAGE_ALIGN(size);
2342
2343	split_page(virt_to_page((void *)addr), order);
2344	while (used < alloc_end) {
2345	free_page(used);
2346	used += PAGE_SIZE;
2347	}
2348	}
2349
2350	return (void *)addr;
2351	}	2355	}
2352	EXPORT_SYMBOL(alloc_pages_exact);	2356	EXPORT_SYMBOL(alloc_pages_exact);
2353		2357
2354	/**	2358	/**
		2359	* alloc_pages_exact_nid - allocate an exact number of physically-contiguous
		2360	* pages on a node.
		2361	* @nid: the preferred node ID where memory should be allocated
		2362	* @size: the number of bytes to allocate
		2363	* @gfp_mask: GFP flags for the allocation
		2364	*
		2365	* Like alloc_pages_exact(), but try to allocate on node nid first before falling
		2366	* back.
		2367	* Note this is not alloc_pages_exact_node() which allocates on a specific node,
		2368	* but is not exact.
		2369	*/
		2370	void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
		2371	{
		2372	unsigned order = get_order(size);
		2373	struct page *p = alloc_pages_node(nid, gfp_mask, order);
		2374	if (!p)
		2375	return NULL;
		2376	return make_alloc_exact((unsigned long)page_address(p), order, size);
		2377	}
		2378	EXPORT_SYMBOL(alloc_pages_exact_nid);
		2379
		2380	/**
2355	* free_pages_exact - release memory allocated via alloc_pages_exact()	2381	* free_pages_exact - release memory allocated via alloc_pages_exact()
2356	* @virt: the value returned by alloc_pages_exact.	2382	* @virt: the value returned by alloc_pages_exact.
2357	* @size: size of allocation, same value as passed to alloc_pages_exact().	2383	* @size: size of allocation, same value as passed to alloc_pages_exact().
@@ -3564,7 +3590,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3564		3590
3565	if (!slab_is_available()) {	3591	if (!slab_is_available()) {
3566	zone->wait_table = (wait_queue_head_t *)	3592	zone->wait_table = (wait_queue_head_t *)
3567	alloc_bootmem_node(pgdat, alloc_size);	3593	alloc_bootmem_node_nopanic(pgdat, alloc_size);
3568	} else {	3594	} else {
3569	/*	3595	/*
3570	* This case means that a zone whose size was 0 gets new memory	3596	* This case means that a zone whose size was 0 gets new memory
@@ -4141,7 +4167,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
4141	unsigned long usemapsize = usemap_size(zonesize);	4167	unsigned long usemapsize = usemap_size(zonesize);
4142	zone->pageblock_flags = NULL;	4168	zone->pageblock_flags = NULL;
4143	if (usemapsize)	4169	if (usemapsize)
4144	zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);	4170	zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
		4171	usemapsize);
4145	}	4172	}
4146	#else	4173	#else
4147	static inline void setup_usemap(struct pglist_data *pgdat,	4174	static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4307,7 +4334,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4307	size = (end - start) * sizeof(struct page);	4334	size = (end - start) * sizeof(struct page);
4308	map = alloc_remap(pgdat->node_id, size);	4335	map = alloc_remap(pgdat->node_id, size);
4309	if (!map)	4336	if (!map)
4310	map = alloc_bootmem_node(pgdat, size);	4337	map = alloc_bootmem_node_nopanic(pgdat, size);
4311	pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);	4338	pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
4312	}	4339	}
4313	#ifndef CONFIG_NEED_MULTIPLE_NODES	4340	#ifndef CONFIG_NEED_MULTIPLE_NODES


diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 99055010cece..2daadc322ba6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c
@@ -134,7 +134,7 @@ static void *__init_refok alloc_page_cgroup(size_t size, int nid)
134	{	134	{
135	void *addr = NULL;	135	void *addr = NULL;
136		136
137	addr = alloc_pages_exact(size, GFP_KERNEL \| __GFP_NOWARN);	137	addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL \| __GFP_NOWARN);
138	if (addr)	138	if (addr)
139	return addr;	139	return addr;
140		140


diff --git a/mm/shmem.c b/mm/shmem.c index 8fa27e4e582a..dfc7069102ee 100644 --- a/mm/shmem.c +++ b/mm/shmem.c
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
852		852
853	static int shmem_unuse_inode(struct shmem_inode_info info, swp_entry_t entry, struct page page)	853	static int shmem_unuse_inode(struct shmem_inode_info info, swp_entry_t entry, struct page page)
854	{	854	{
855	struct inode *inode;	855	struct address_space *mapping;
856	unsigned long idx;	856	unsigned long idx;
857	unsigned long size;	857	unsigned long size;
858	unsigned long limit;	858	unsigned long limit;
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
875	if (size > SHMEM_NR_DIRECT)	875	if (size > SHMEM_NR_DIRECT)
876	size = SHMEM_NR_DIRECT;	876	size = SHMEM_NR_DIRECT;
877	offset = shmem_find_swp(entry, ptr, ptr+size);	877	offset = shmem_find_swp(entry, ptr, ptr+size);
878	if (offset >= 0)	878	if (offset >= 0) {
		879	shmem_swp_balance_unmap();
879	goto found;	880	goto found;
		881	}
880	if (!info->i_indirect)	882	if (!info->i_indirect)
881	goto lost2;	883	goto lost2;
882		884
@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
914	if (size > ENTRIES_PER_PAGE)	916	if (size > ENTRIES_PER_PAGE)
915	size = ENTRIES_PER_PAGE;	917	size = ENTRIES_PER_PAGE;
916	offset = shmem_find_swp(entry, ptr, ptr+size);	918	offset = shmem_find_swp(entry, ptr, ptr+size);
917	shmem_swp_unmap(ptr);
918	if (offset >= 0) {	919	if (offset >= 0) {
919	shmem_dir_unmap(dir);	920	shmem_dir_unmap(dir);
920	goto found;	921	goto found;
921	}	922	}
		923	shmem_swp_unmap(ptr);
922	}	924	}
923	}	925	}
924	lost1:	926	lost1:
@@ -928,8 +930,7 @@ lost2:
928	return 0;	930	return 0;
929	found:	931	found:
930	idx += offset;	932	idx += offset;
931	inode = igrab(&info->vfs_inode);	933	ptr += offset;
932	spin_unlock(&info->lock);
933		934
934	/*	935	/*
935	* Move _head_ to start search for next from here.	936	* Move _head_ to start search for next from here.
@@ -940,37 +941,18 @@ found:
940	*/	941	*/
941	if (shmem_swaplist.next != &info->swaplist)	942	if (shmem_swaplist.next != &info->swaplist)
942	list_move_tail(&shmem_swaplist, &info->swaplist);	943	list_move_tail(&shmem_swaplist, &info->swaplist);
943	mutex_unlock(&shmem_swaplist_mutex);
944		944
945	error = 1;
946	if (!inode)
947	goto out;
948	/*	945	/*
949	* Charge page using GFP_KERNEL while we can wait.	946	* We rely on shmem_swaplist_mutex, not only to protect the swaplist,
950	* Charged back to the user(not to caller) when swap account is used.	947	* but also to hold up shmem_evict_inode(): so inode cannot be freed
951	* add_to_page_cache() will be called with GFP_NOWAIT.	948	* beneath us (pagelock doesn't help until the page is in pagecache).
952	*/	949	*/
953	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);	950	mapping = info->vfs_inode.i_mapping;
954	if (error)	951	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
955	goto out;	952	/* which does mem_cgroup_uncharge_cache_page on error */
956	error = radix_tree_preload(GFP_KERNEL);
957	if (error) {
958	mem_cgroup_uncharge_cache_page(page);
959	goto out;
960	}
961	error = 1;
962
963	spin_lock(&info->lock);
964	ptr = shmem_swp_entry(info, idx, NULL);
965	if (ptr && ptr->val == entry.val) {
966	error = add_to_page_cache_locked(page, inode->i_mapping,
967	idx, GFP_NOWAIT);
968	/* does mem_cgroup_uncharge_cache_page on error */
969	} else /* we must compensate for our precharge above */
970	mem_cgroup_uncharge_cache_page(page);
971		953
972	if (error == -EEXIST) {	954	if (error == -EEXIST) {
973	struct page *filepage = find_get_page(inode->i_mapping, idx);	955	struct page *filepage = find_get_page(mapping, idx);
974	error = 1;	956	error = 1;
975	if (filepage) {	957	if (filepage) {
976	/*	958	/*
@@ -990,14 +972,8 @@ found:
990	swap_free(entry);	972	swap_free(entry);
991	error = 1; /* not an error, but entry was found */	973	error = 1; /* not an error, but entry was found */
992	}	974	}
993	if (ptr)	975	shmem_swp_unmap(ptr);
994	shmem_swp_unmap(ptr);
995	spin_unlock(&info->lock);	976	spin_unlock(&info->lock);
996	radix_tree_preload_end();
997	out:
998	unlock_page(page);
999	page_cache_release(page);
1000	iput(inode); /* allows for NULL */
1001	return error;	977	return error;
1002	}	978	}
1003		979
@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1009	struct list_head p, next;	985	struct list_head p, next;
1010	struct shmem_inode_info *info;	986	struct shmem_inode_info *info;
1011	int found = 0;	987	int found = 0;
		988	int error;
		989
		990	/*
		991	* Charge page using GFP_KERNEL while we can wait, before taking
		992	* the shmem_swaplist_mutex which might hold up shmem_writepage().
		993	* Charged back to the user (not to caller) when swap account is used.
		994	* add_to_page_cache() will be called with GFP_NOWAIT.
		995	*/
		996	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
		997	if (error)
		998	goto out;
		999	/*
		1000	* Try to preload while we can wait, to not make a habit of
		1001	* draining atomic reserves; but don't latch on to this cpu,
		1002	* it's okay if sometimes we get rescheduled after this.
		1003	*/
		1004	error = radix_tree_preload(GFP_KERNEL);
		1005	if (error)
		1006	goto uncharge;
		1007	radix_tree_preload_end();
1012		1008
1013	mutex_lock(&shmem_swaplist_mutex);	1009	mutex_lock(&shmem_swaplist_mutex);
1014	list_for_each_safe(p, next, &shmem_swaplist) {	1010	list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1016	found = shmem_unuse_inode(info, entry, page);	1012	found = shmem_unuse_inode(info, entry, page);
1017	cond_resched();	1013	cond_resched();
1018	if (found)	1014	if (found)
1019	goto out;	1015	break;
1020	}	1016	}
1021	mutex_unlock(&shmem_swaplist_mutex);	1017	mutex_unlock(&shmem_swaplist_mutex);
1022	/*	1018
1023	* Can some race bring us here? We've been holding page lock,	1019	uncharge:
1024	* so I think not; but would rather try again later than BUG()	1020	if (!found)
1025	*/	1021	mem_cgroup_uncharge_cache_page(page);
		1022	if (found < 0)
		1023	error = found;
		1024	out:
1026	unlock_page(page);	1025	unlock_page(page);
1027	page_cache_release(page);	1026	page_cache_release(page);
1028	out:	1027	return error;
1029	return (found < 0) ? found : 0;
1030	}	1028	}
1031		1029
1032	/*	1030	/*
@@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page page, struct writeback_control wbc)
1064	else	1062	else
1065	swap.val = 0;	1063	swap.val = 0;
1066		1064
		1065	/*
		1066	* Add inode to shmem_unuse()'s list of swapped-out inodes,
		1067	* if it's not already there. Do it now because we cannot take
		1068	* mutex while holding spinlock, and must do so before the page
		1069	* is moved to swap cache, when its pagelock no longer protects
		1070	* the inode from eviction. But don't unlock the mutex until
		1071	* we've taken the spinlock, because shmem_unuse_inode() will
		1072	* prune a !swapped inode from the swaplist under both locks.
		1073	*/
		1074	if (swap.val) {
		1075	mutex_lock(&shmem_swaplist_mutex);
		1076	if (list_empty(&info->swaplist))
		1077	list_add_tail(&info->swaplist, &shmem_swaplist);
		1078	}
		1079
1067	spin_lock(&info->lock);	1080	spin_lock(&info->lock);
		1081	if (swap.val)
		1082	mutex_unlock(&shmem_swaplist_mutex);
		1083
1068	if (index >= info->next_index) {	1084	if (index >= info->next_index) {
1069	BUG_ON(!(info->flags & SHMEM_TRUNCATE));	1085	BUG_ON(!(info->flags & SHMEM_TRUNCATE));
1070	goto unlock;	1086	goto unlock;
@@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page page, struct writeback_control wbc)
1084	delete_from_page_cache(page);	1100	delete_from_page_cache(page);
1085	shmem_swp_set(info, entry, swap.val);	1101	shmem_swp_set(info, entry, swap.val);
1086	shmem_swp_unmap(entry);	1102	shmem_swp_unmap(entry);
1087	if (list_empty(&info->swaplist))
1088	inode = igrab(inode);
1089	else
1090	inode = NULL;
1091	spin_unlock(&info->lock);	1103	spin_unlock(&info->lock);
1092	swap_shmem_alloc(swap);	1104	swap_shmem_alloc(swap);
1093	BUG_ON(page_mapped(page));	1105	BUG_ON(page_mapped(page));
1094	swap_writepage(page, wbc);	1106	swap_writepage(page, wbc);
1095	if (inode) {
1096	mutex_lock(&shmem_swaplist_mutex);
1097	/* move instead of add in case we're racing */
1098	list_move_tail(&info->swaplist, &shmem_swaplist);
1099	mutex_unlock(&shmem_swaplist_mutex);
1100	iput(inode);
1101	}
1102	return 0;	1107	return 0;
1103	}	1108	}
1104		1109
@@ -1400,20 +1405,14 @@ repeat:
1400	if (sbinfo->max_blocks) {	1405	if (sbinfo->max_blocks) {
1401	if (percpu_counter_compare(&sbinfo->used_blocks,	1406	if (percpu_counter_compare(&sbinfo->used_blocks,
1402	sbinfo->max_blocks) >= 0 \|\|	1407	sbinfo->max_blocks) >= 0 \|\|
1403	shmem_acct_block(info->flags)) {	1408	shmem_acct_block(info->flags))
1404	spin_unlock(&info->lock);	1409	goto nospace;
1405	error = -ENOSPC;
1406	goto failed;
1407	}
1408	percpu_counter_inc(&sbinfo->used_blocks);	1410	percpu_counter_inc(&sbinfo->used_blocks);
1409	spin_lock(&inode->i_lock);	1411	spin_lock(&inode->i_lock);
1410	inode->i_blocks += BLOCKS_PER_PAGE;	1412	inode->i_blocks += BLOCKS_PER_PAGE;
1411	spin_unlock(&inode->i_lock);	1413	spin_unlock(&inode->i_lock);
1412	} else if (shmem_acct_block(info->flags)) {	1414	} else if (shmem_acct_block(info->flags))
1413	spin_unlock(&info->lock);	1415	goto nospace;
1414	error = -ENOSPC;
1415	goto failed;
1416	}
1417		1416
1418	if (!filepage) {	1417	if (!filepage) {
1419	int ret;	1418	int ret;
@@ -1493,6 +1492,24 @@ done:
1493	error = 0;	1492	error = 0;
1494	goto out;	1493	goto out;
1495		1494
		1495	nospace:
		1496	/*
		1497	* Perhaps the page was brought in from swap between find_lock_page
		1498	* and taking info->lock? We allow for that at add_to_page_cache_lru,
		1499	* but must also avoid reporting a spurious ENOSPC while working on a
		1500	* full tmpfs. (When filepage has been passed in to shmem_getpage, it
		1501	* is already in page cache, which prevents this race from occurring.)
		1502	*/
		1503	if (!filepage) {
		1504	struct page *page = find_get_page(mapping, idx);
		1505	if (page) {
		1506	spin_unlock(&info->lock);
		1507	page_cache_release(page);
		1508	goto repeat;
		1509	}
		1510	}
		1511	spin_unlock(&info->lock);
		1512	error = -ENOSPC;
1496	failed:	1513	failed:
1497	if (*pagep != filepage) {	1514	if (*pagep != filepage) {
1498	unlock_page(filepage);	1515	unlock_page(filepage);


diff --git a/mm/swap.c b/mm/swap.c index a448db377cb0..5602f1a1b1e7 100644 --- a/mm/swap.c +++ b/mm/swap.c
@@ -396,6 +396,9 @@ static void lru_deactivate_fn(struct page page, void arg)
396	if (!PageLRU(page))	396	if (!PageLRU(page))
397	return;	397	return;
398		398
		399	if (PageUnevictable(page))
		400	return;
		401
399	/* Some processes are using the page */	402	/* Some processes are using the page */
400	if (page_mapped(page))	403	if (page_mapped(page))
401	return;	404	return;


diff --git a/mm/vmscan.c b/mm/vmscan.c index f6b435c80079..8bfd45050a61 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -937,7 +937,7 @@ keep_lumpy:
937	* back off and wait for congestion to clear because further reclaim	937	* back off and wait for congestion to clear because further reclaim
938	* will encounter the same problem	938	* will encounter the same problem
939	*/	939	*/
940	if (nr_dirty == nr_congested && nr_dirty != 0)	940	if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
941	zone_set_flag(zone, ZONE_CONGESTED);	941	zone_set_flag(zone, ZONE_CONGESTED);
942		942
943	free_page_list(&free_pages);	943	free_page_list(&free_pages);