aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-02-25 03:40:22 -0500
committerIngo Molnar <mingo@elte.hu>2010-02-25 03:40:26 -0500
commit996de8c6fe95c5a9fc524241cc8f142ef0605d3d (patch)
tree0f637ab0d80d6d7e213707ac2d8c1cc16b69523c /mm
parent017c426138122c8e9b9f5057fbd0567c37b35247 (diff)
parent60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)
Merge commit 'v2.6.33' into core/rcu
Merge reason: Update from -rc4 to -final. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c103
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/memcontrol.c11
-rw-r--r--mm/migrate.c39
-rw-r--r--mm/nommu.c102
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c9
-rw-r--r--mm/truncate.c30
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmalloc.c114
-rw-r--r--mm/vmscan.c3
11 files changed, 279 insertions, 143 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6cb..698ea80f2102 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
1634static struct page *__read_cache_page(struct address_space *mapping, 1634static struct page *__read_cache_page(struct address_space *mapping,
1635 pgoff_t index, 1635 pgoff_t index,
1636 int (*filler)(void *,struct page*), 1636 int (*filler)(void *,struct page*),
1637 void *data) 1637 void *data,
1638 gfp_t gfp)
1638{ 1639{
1639 struct page *page; 1640 struct page *page;
1640 int err; 1641 int err;
1641repeat: 1642repeat:
1642 page = find_get_page(mapping, index); 1643 page = find_get_page(mapping, index);
1643 if (!page) { 1644 if (!page) {
1644 page = page_cache_alloc_cold(mapping); 1645 page = __page_cache_alloc(gfp | __GFP_COLD);
1645 if (!page) 1646 if (!page)
1646 return ERR_PTR(-ENOMEM); 1647 return ERR_PTR(-ENOMEM);
1647 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); 1648 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
1661 return page; 1662 return page;
1662} 1663}
1663 1664
1664/** 1665static struct page *do_read_cache_page(struct address_space *mapping,
1665 * read_cache_page_async - read into page cache, fill it if needed
1666 * @mapping: the page's address_space
1667 * @index: the page index
1668 * @filler: function to perform the read
1669 * @data: destination for read data
1670 *
1671 * Same as read_cache_page, but don't wait for page to become unlocked
1672 * after submitting it to the filler.
1673 *
1674 * Read into the page cache. If a page already exists, and PageUptodate() is
1675 * not set, try to fill the page but don't wait for it to become unlocked.
1676 *
1677 * If the page does not get brought uptodate, return -EIO.
1678 */
1679struct page *read_cache_page_async(struct address_space *mapping,
1680 pgoff_t index, 1666 pgoff_t index,
1681 int (*filler)(void *,struct page*), 1667 int (*filler)(void *,struct page*),
1682 void *data) 1668 void *data,
1669 gfp_t gfp)
1670
1683{ 1671{
1684 struct page *page; 1672 struct page *page;
1685 int err; 1673 int err;
1686 1674
1687retry: 1675retry:
1688 page = __read_cache_page(mapping, index, filler, data); 1676 page = __read_cache_page(mapping, index, filler, data, gfp);
1689 if (IS_ERR(page)) 1677 if (IS_ERR(page))
1690 return page; 1678 return page;
1691 if (PageUptodate(page)) 1679 if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
1710 mark_page_accessed(page); 1698 mark_page_accessed(page);
1711 return page; 1699 return page;
1712} 1700}
1701
1702/**
1703 * read_cache_page_async - read into page cache, fill it if needed
1704 * @mapping: the page's address_space
1705 * @index: the page index
1706 * @filler: function to perform the read
1707 * @data: destination for read data
1708 *
1709 * Same as read_cache_page, but don't wait for page to become unlocked
1710 * after submitting it to the filler.
1711 *
1712 * Read into the page cache. If a page already exists, and PageUptodate() is
1713 * not set, try to fill the page but don't wait for it to become unlocked.
1714 *
1715 * If the page does not get brought uptodate, return -EIO.
1716 */
1717struct page *read_cache_page_async(struct address_space *mapping,
1718 pgoff_t index,
1719 int (*filler)(void *,struct page*),
1720 void *data)
1721{
1722 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
1723}
1713EXPORT_SYMBOL(read_cache_page_async); 1724EXPORT_SYMBOL(read_cache_page_async);
1714 1725
1726static struct page *wait_on_page_read(struct page *page)
1727{
1728 if (!IS_ERR(page)) {
1729 wait_on_page_locked(page);
1730 if (!PageUptodate(page)) {
1731 page_cache_release(page);
1732 page = ERR_PTR(-EIO);
1733 }
1734 }
1735 return page;
1736}
1737
1738/**
1739 * read_cache_page_gfp - read into page cache, using specified page allocation flags.
1740 * @mapping: the page's address_space
1741 * @index: the page index
1742 * @gfp: the page allocator flags to use if allocating
1743 *
1744 * This is the same as "read_mapping_page(mapping, index, NULL)", but with
1745 * any new page allocations done using the specified allocation flags. Note
1746 * that the Radix tree operations will still use GFP_KERNEL, so you can't
1747 * expect to do this atomically or anything like that - but you can pass in
1748 * other page requirements.
1749 *
1750 * If the page does not get brought uptodate, return -EIO.
1751 */
1752struct page *read_cache_page_gfp(struct address_space *mapping,
1753 pgoff_t index,
1754 gfp_t gfp)
1755{
1756 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
1757
1758 return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
1759}
1760EXPORT_SYMBOL(read_cache_page_gfp);
1761
1715/** 1762/**
1716 * read_cache_page - read into page cache, fill it if needed 1763 * read_cache_page - read into page cache, fill it if needed
1717 * @mapping: the page's address_space 1764 * @mapping: the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
1729 int (*filler)(void *,struct page*), 1776 int (*filler)(void *,struct page*),
1730 void *data) 1777 void *data)
1731{ 1778{
1732 struct page *page; 1779 return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
1733
1734 page = read_cache_page_async(mapping, index, filler, data);
1735 if (IS_ERR(page))
1736 goto out;
1737 wait_on_page_locked(page);
1738 if (!PageUptodate(page)) {
1739 page_cache_release(page);
1740 page = ERR_PTR(-EIO);
1741 }
1742 out:
1743 return page;
1744} 1780}
1745EXPORT_SYMBOL(read_cache_page); 1781EXPORT_SYMBOL(read_cache_page);
1746 1782
@@ -2196,6 +2232,9 @@ again:
2196 if (unlikely(status)) 2232 if (unlikely(status))
2197 break; 2233 break;
2198 2234
2235 if (mapping_writably_mapped(mapping))
2236 flush_dcache_page(page);
2237
2199 pagefault_disable(); 2238 pagefault_disable();
2200 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 2239 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2201 pagefault_enable(); 2240 pagefault_enable();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e91b81b63670..2d16fa6b8c2d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1515,10 +1515,9 @@ static struct attribute_group hstate_attr_group = {
1515 .attrs = hstate_attrs, 1515 .attrs = hstate_attrs,
1516}; 1516};
1517 1517
1518static int __init hugetlb_sysfs_add_hstate(struct hstate *h, 1518static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
1519 struct kobject *parent, 1519 struct kobject **hstate_kobjs,
1520 struct kobject **hstate_kobjs, 1520 struct attribute_group *hstate_attr_group)
1521 struct attribute_group *hstate_attr_group)
1522{ 1521{
1523 int retval; 1522 int retval;
1524 int hi = h - hstates; 1523 int hi = h - hstates;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 488b644e0e8e..954032b80bed 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2586,7 +2586,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
2586 if (free_all) 2586 if (free_all)
2587 goto try_to_free; 2587 goto try_to_free;
2588move_account: 2588move_account:
2589 while (mem->res.usage > 0) { 2589 do {
2590 ret = -EBUSY; 2590 ret = -EBUSY;
2591 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) 2591 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
2592 goto out; 2592 goto out;
@@ -2614,8 +2614,8 @@ move_account:
2614 if (ret == -ENOMEM) 2614 if (ret == -ENOMEM)
2615 goto try_to_free; 2615 goto try_to_free;
2616 cond_resched(); 2616 cond_resched();
2617 } 2617 /* "ret" should also be checked to ensure all lists are empty. */
2618 ret = 0; 2618 } while (mem->res.usage > 0 || ret);
2619out: 2619out:
2620 css_put(&mem->css); 2620 css_put(&mem->css);
2621 return ret; 2621 return ret;
@@ -2648,10 +2648,7 @@ try_to_free:
2648 } 2648 }
2649 lru_add_drain(); 2649 lru_add_drain();
2650 /* try move_account...there may be some *locked* pages. */ 2650 /* try move_account...there may be some *locked* pages. */
2651 if (mem->res.usage) 2651 goto move_account;
2652 goto move_account;
2653 ret = 0;
2654 goto out;
2655} 2652}
2656 2653
2657int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) 2654int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
diff --git a/mm/migrate.c b/mm/migrate.c
index efddbf0926b2..880bd592d38e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
912 goto out_pm; 912 goto out_pm;
913 913
914 err = -ENODEV; 914 err = -ENODEV;
915 if (node < 0 || node >= MAX_NUMNODES)
916 goto out_pm;
917
915 if (!node_state(node, N_HIGH_MEMORY)) 918 if (!node_state(node, N_HIGH_MEMORY))
916 goto out_pm; 919 goto out_pm;
917 920
@@ -999,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
999#define DO_PAGES_STAT_CHUNK_NR 16 1002#define DO_PAGES_STAT_CHUNK_NR 16
1000 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; 1003 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1001 int chunk_status[DO_PAGES_STAT_CHUNK_NR]; 1004 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1002 unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1003 int err;
1004 1005
1005 for (i = 0; i < nr_pages; i += chunk_nr) { 1006 while (nr_pages) {
1006 if (chunk_nr > nr_pages - i) 1007 unsigned long chunk_nr;
1007 chunk_nr = nr_pages - i;
1008 1008
1009 err = copy_from_user(chunk_pages, &pages[i], 1009 chunk_nr = nr_pages;
1010 chunk_nr * sizeof(*chunk_pages)); 1010 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1011 if (err) { 1011 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1012 err = -EFAULT; 1012
1013 goto out; 1013 if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
1014 } 1014 break;
1015 1015
1016 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); 1016 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1017 1017
1018 err = copy_to_user(&status[i], chunk_status, 1018 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1019 chunk_nr * sizeof(*chunk_status)); 1019 break;
1020 if (err) {
1021 err = -EFAULT;
1022 goto out;
1023 }
1024 }
1025 err = 0;
1026 1020
1027out: 1021 pages += chunk_nr;
1028 return err; 1022 status += chunk_nr;
1023 nr_pages -= chunk_nr;
1024 }
1025 return nr_pages ? -EFAULT : 0;
1029} 1026}
1030 1027
1031/* 1028/*
diff --git a/mm/nommu.c b/mm/nommu.c
index 17773862619b..48a2ecfaf059 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -552,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to)
552static void __put_nommu_region(struct vm_region *region) 552static void __put_nommu_region(struct vm_region *region)
553 __releases(nommu_region_sem) 553 __releases(nommu_region_sem)
554{ 554{
555 kenter("%p{%d}", region, atomic_read(&region->vm_usage)); 555 kenter("%p{%d}", region, region->vm_usage);
556 556
557 BUG_ON(!nommu_region_tree.rb_node); 557 BUG_ON(!nommu_region_tree.rb_node);
558 558
559 if (atomic_dec_and_test(&region->vm_usage)) { 559 if (--region->vm_usage == 0) {
560 if (region->vm_top > region->vm_start) 560 if (region->vm_top > region->vm_start)
561 delete_nommu_region(region); 561 delete_nommu_region(region);
562 up_write(&nommu_region_sem); 562 up_write(&nommu_region_sem);
@@ -1205,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file,
1205 if (!vma) 1205 if (!vma)
1206 goto error_getting_vma; 1206 goto error_getting_vma;
1207 1207
1208 atomic_set(&region->vm_usage, 1); 1208 region->vm_usage = 1;
1209 region->vm_flags = vm_flags; 1209 region->vm_flags = vm_flags;
1210 region->vm_pgoff = pgoff; 1210 region->vm_pgoff = pgoff;
1211 1211
@@ -1272,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file,
1272 } 1272 }
1273 1273
1274 /* we've found a region we can share */ 1274 /* we've found a region we can share */
1275 atomic_inc(&pregion->vm_usage); 1275 pregion->vm_usage++;
1276 vma->vm_region = pregion; 1276 vma->vm_region = pregion;
1277 start = pregion->vm_start; 1277 start = pregion->vm_start;
1278 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1278 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
@@ -1289,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file,
1289 vma->vm_region = NULL; 1289 vma->vm_region = NULL;
1290 vma->vm_start = 0; 1290 vma->vm_start = 0;
1291 vma->vm_end = 0; 1291 vma->vm_end = 0;
1292 atomic_dec(&pregion->vm_usage); 1292 pregion->vm_usage--;
1293 pregion = NULL; 1293 pregion = NULL;
1294 goto error_just_free; 1294 goto error_just_free;
1295 } 1295 }
@@ -1441,10 +1441,9 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1441 1441
1442 kenter(""); 1442 kenter("");
1443 1443
1444 /* we're only permitted to split anonymous regions that have a single 1444 /* we're only permitted to split anonymous regions (these should have
1445 * owner */ 1445 * only a single usage on the region) */
1446 if (vma->vm_file || 1446 if (vma->vm_file)
1447 atomic_read(&vma->vm_region->vm_usage) != 1)
1448 return -ENOMEM; 1447 return -ENOMEM;
1449 1448
1450 if (mm->map_count >= sysctl_max_map_count) 1449 if (mm->map_count >= sysctl_max_map_count)
@@ -1518,7 +1517,7 @@ static int shrink_vma(struct mm_struct *mm,
1518 1517
1519 /* cut the backing region down to size */ 1518 /* cut the backing region down to size */
1520 region = vma->vm_region; 1519 region = vma->vm_region;
1521 BUG_ON(atomic_read(&region->vm_usage) != 1); 1520 BUG_ON(region->vm_usage != 1);
1522 1521
1523 down_write(&nommu_region_sem); 1522 down_write(&nommu_region_sem);
1524 delete_nommu_region(region); 1523 delete_nommu_region(region);
@@ -1762,27 +1761,6 @@ void unmap_mapping_range(struct address_space *mapping,
1762EXPORT_SYMBOL(unmap_mapping_range); 1761EXPORT_SYMBOL(unmap_mapping_range);
1763 1762
1764/* 1763/*
1765 * ask for an unmapped area at which to create a mapping on a file
1766 */
1767unsigned long get_unmapped_area(struct file *file, unsigned long addr,
1768 unsigned long len, unsigned long pgoff,
1769 unsigned long flags)
1770{
1771 unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
1772 unsigned long, unsigned long);
1773
1774 get_area = current->mm->get_unmapped_area;
1775 if (file && file->f_op && file->f_op->get_unmapped_area)
1776 get_area = file->f_op->get_unmapped_area;
1777
1778 if (!get_area)
1779 return -ENOSYS;
1780
1781 return get_area(file, addr, len, pgoff, flags);
1782}
1783EXPORT_SYMBOL(get_unmapped_area);
1784
1785/*
1786 * Check that a process has enough memory to allocate a new virtual 1764 * Check that a process has enough memory to allocate a new virtual
1787 * mapping. 0 means there is enough memory for the allocation to 1765 * mapping. 0 means there is enough memory for the allocation to
1788 * succeed and -ENOMEM implies there is not. 1766 * succeed and -ENOMEM implies there is not.
@@ -1936,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
1936 mmput(mm); 1914 mmput(mm);
1937 return len; 1915 return len;
1938} 1916}
1917
1918/**
1919 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
1920 * @inode: The inode to check
1921 * @size: The current filesize of the inode
1922 * @newsize: The proposed filesize of the inode
1923 *
1924 * Check the shared mappings on an inode on behalf of a shrinking truncate to
1925 * make sure that that any outstanding VMAs aren't broken and then shrink the
1926 * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
1927 * automatically grant mappings that are too large.
1928 */
1929int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
1930 size_t newsize)
1931{
1932 struct vm_area_struct *vma;
1933 struct prio_tree_iter iter;
1934 struct vm_region *region;
1935 pgoff_t low, high;
1936 size_t r_size, r_top;
1937
1938 low = newsize >> PAGE_SHIFT;
1939 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
1940
1941 down_write(&nommu_region_sem);
1942
1943 /* search for VMAs that fall within the dead zone */
1944 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
1945 low, high) {
1946 /* found one - only interested if it's shared out of the page
1947 * cache */
1948 if (vma->vm_flags & VM_SHARED) {
1949 up_write(&nommu_region_sem);
1950 return -ETXTBSY; /* not quite true, but near enough */
1951 }
1952 }
1953
1954 /* reduce any regions that overlap the dead zone - if in existence,
1955 * these will be pointed to by VMAs that don't overlap the dead zone
1956 *
1957 * we don't check for any regions that start beyond the EOF as there
1958 * shouldn't be any
1959 */
1960 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
1961 0, ULONG_MAX) {
1962 if (!(vma->vm_flags & VM_SHARED))
1963 continue;
1964
1965 region = vma->vm_region;
1966 r_size = region->vm_top - region->vm_start;
1967 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size;
1968
1969 if (r_top > newsize) {
1970 region->vm_top -= r_top - newsize;
1971 if (region->vm_end > region->vm_top)
1972 region->vm_end = region->vm_top;
1973 }
1974 }
1975
1976 up_write(&nommu_region_sem);
1977 return 0;
1978}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52481b1c1e5..237050478f28 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
459 list_for_each_entry(c, &p->children, sibling) { 459 list_for_each_entry(c, &p->children, sibling) {
460 if (c->mm == p->mm) 460 if (c->mm == p->mm)
461 continue; 461 continue;
462 if (mem && !task_in_mem_cgroup(c, mem))
463 continue;
462 if (!oom_kill_task(c)) 464 if (!oom_kill_task(c))
463 return 0; 465 return 0;
464 } 466 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e9f5cc5fb59..8deb9d0fd5b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -556,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
556 page = list_entry(list->prev, struct page, lru); 556 page = list_entry(list->prev, struct page, lru);
557 /* must delete as __free_one_page list manipulates */ 557 /* must delete as __free_one_page list manipulates */
558 list_del(&page->lru); 558 list_del(&page->lru);
559 __free_one_page(page, zone, 0, migratetype); 559 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
560 trace_mm_page_pcpu_drain(page, 0, migratetype); 560 __free_one_page(page, zone, 0, page_private(page));
561 trace_mm_page_pcpu_drain(page, 0, page_private(page));
561 } while (--count && --batch_free && !list_empty(list)); 562 } while (--count && --batch_free && !list_empty(list));
562 } 563 }
563 spin_unlock(&zone->lock); 564 spin_unlock(&zone->lock);
@@ -1222,10 +1223,10 @@ again:
1222 } 1223 }
1223 spin_lock_irqsave(&zone->lock, flags); 1224 spin_lock_irqsave(&zone->lock, flags);
1224 page = __rmqueue(zone, order, migratetype); 1225 page = __rmqueue(zone, order, migratetype);
1225 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
1226 spin_unlock(&zone->lock); 1226 spin_unlock(&zone->lock);
1227 if (!page) 1227 if (!page)
1228 goto failed; 1228 goto failed;
1229 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
1229 } 1230 }
1230 1231
1231 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1232 __count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -3998,7 +3999,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
3998 } 3999 }
3999 4000
4000 /* Merge backward if suitable */ 4001 /* Merge backward if suitable */
4001 if (start_pfn < early_node_map[i].end_pfn && 4002 if (start_pfn < early_node_map[i].start_pfn &&
4002 end_pfn >= early_node_map[i].start_pfn) { 4003 end_pfn >= early_node_map[i].start_pfn) {
4003 early_node_map[i].start_pfn = start_pfn; 4004 early_node_map[i].start_pfn = start_pfn;
4004 return; 4005 return;
diff --git a/mm/truncate.c b/mm/truncate.c
index 342deee22684..e87e37244829 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -522,22 +522,20 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
522 */ 522 */
523void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) 523void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
524{ 524{
525 if (new < old) { 525 struct address_space *mapping = inode->i_mapping;
526 struct address_space *mapping = inode->i_mapping; 526
527 527 /*
528 /* 528 * unmap_mapping_range is called twice, first simply for
529 * unmap_mapping_range is called twice, first simply for 529 * efficiency so that truncate_inode_pages does fewer
530 * efficiency so that truncate_inode_pages does fewer 530 * single-page unmaps. However after this first call, and
531 * single-page unmaps. However after this first call, and 531 * before truncate_inode_pages finishes, it is possible for
532 * before truncate_inode_pages finishes, it is possible for 532 * private pages to be COWed, which remain after
533 * private pages to be COWed, which remain after 533 * truncate_inode_pages finishes, hence the second
534 * truncate_inode_pages finishes, hence the second 534 * unmap_mapping_range call must be made for correctness.
535 * unmap_mapping_range call must be made for correctness. 535 */
536 */ 536 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
537 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); 537 truncate_inode_pages(mapping, new);
538 truncate_inode_pages(mapping, new); 538 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
539 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
540 }
541} 539}
542EXPORT_SYMBOL(truncate_pagecache); 540EXPORT_SYMBOL(truncate_pagecache);
543 541
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..834db7be240f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -220,7 +220,7 @@ char *strndup_user(const char __user *s, long n)
220} 220}
221EXPORT_SYMBOL(strndup_user); 221EXPORT_SYMBOL(strndup_user);
222 222
223#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT 223#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
224void arch_pick_mmap_layout(struct mm_struct *mm) 224void arch_pick_mmap_layout(struct mm_struct *mm)
225{ 225{
226 mm->mmap_base = TASK_UNMAPPED_BASE; 226 mm->mmap_base = TASK_UNMAPPED_BASE;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 37e69295f250..ae007462b7f6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void)
509 509
510static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); 510static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
511 511
512/* for per-CPU blocks */
513static void purge_fragmented_blocks_allcpus(void);
514
512/* 515/*
513 * Purges all lazily-freed vmap areas. 516 * Purges all lazily-freed vmap areas.
514 * 517 *
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
539 } else 542 } else
540 spin_lock(&purge_lock); 543 spin_lock(&purge_lock);
541 544
545 if (sync)
546 purge_fragmented_blocks_allcpus();
547
542 rcu_read_lock(); 548 rcu_read_lock();
543 list_for_each_entry_rcu(va, &vmap_area_list, list) { 549 list_for_each_entry_rcu(va, &vmap_area_list, list) {
544 if (va->flags & VM_LAZY_FREE) { 550 if (va->flags & VM_LAZY_FREE) {
@@ -555,10 +561,8 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
555 } 561 }
556 rcu_read_unlock(); 562 rcu_read_unlock();
557 563
558 if (nr) { 564 if (nr)
559 BUG_ON(nr > atomic_read(&vmap_lazy_nr));
560 atomic_sub(nr, &vmap_lazy_nr); 565 atomic_sub(nr, &vmap_lazy_nr);
561 }
562 566
563 if (nr || force_flush) 567 if (nr || force_flush)
564 flush_tlb_kernel_range(*start, *end); 568 flush_tlb_kernel_range(*start, *end);
@@ -669,8 +673,6 @@ static bool vmap_initialized __read_mostly = false;
669struct vmap_block_queue { 673struct vmap_block_queue {
670 spinlock_t lock; 674 spinlock_t lock;
671 struct list_head free; 675 struct list_head free;
672 struct list_head dirty;
673 unsigned int nr_dirty;
674}; 676};
675 677
676struct vmap_block { 678struct vmap_block {
@@ -680,10 +682,9 @@ struct vmap_block {
680 unsigned long free, dirty; 682 unsigned long free, dirty;
681 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); 683 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
682 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); 684 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
683 union { 685 struct list_head free_list;
684 struct list_head free_list; 686 struct rcu_head rcu_head;
685 struct rcu_head rcu_head; 687 struct list_head purge;
686 };
687}; 688};
688 689
689/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ 690/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -759,7 +760,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
759 vbq = &get_cpu_var(vmap_block_queue); 760 vbq = &get_cpu_var(vmap_block_queue);
760 vb->vbq = vbq; 761 vb->vbq = vbq;
761 spin_lock(&vbq->lock); 762 spin_lock(&vbq->lock);
762 list_add(&vb->free_list, &vbq->free); 763 list_add_rcu(&vb->free_list, &vbq->free);
763 spin_unlock(&vbq->lock); 764 spin_unlock(&vbq->lock);
764 put_cpu_var(vmap_block_queue); 765 put_cpu_var(vmap_block_queue);
765 766
@@ -778,8 +779,6 @@ static void free_vmap_block(struct vmap_block *vb)
778 struct vmap_block *tmp; 779 struct vmap_block *tmp;
779 unsigned long vb_idx; 780 unsigned long vb_idx;
780 781
781 BUG_ON(!list_empty(&vb->free_list));
782
783 vb_idx = addr_to_vb_idx(vb->va->va_start); 782 vb_idx = addr_to_vb_idx(vb->va->va_start);
784 spin_lock(&vmap_block_tree_lock); 783 spin_lock(&vmap_block_tree_lock);
785 tmp = radix_tree_delete(&vmap_block_tree, vb_idx); 784 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
@@ -790,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb)
790 call_rcu(&vb->rcu_head, rcu_free_vb); 789 call_rcu(&vb->rcu_head, rcu_free_vb);
791} 790}
792 791
792static void purge_fragmented_blocks(int cpu)
793{
794 LIST_HEAD(purge);
795 struct vmap_block *vb;
796 struct vmap_block *n_vb;
797 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
798
799 rcu_read_lock();
800 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
801
802 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
803 continue;
804
805 spin_lock(&vb->lock);
806 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
807 vb->free = 0; /* prevent further allocs after releasing lock */
808 vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
809 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
810 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
811 spin_lock(&vbq->lock);
812 list_del_rcu(&vb->free_list);
813 spin_unlock(&vbq->lock);
814 spin_unlock(&vb->lock);
815 list_add_tail(&vb->purge, &purge);
816 } else
817 spin_unlock(&vb->lock);
818 }
819 rcu_read_unlock();
820
821 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
822 list_del(&vb->purge);
823 free_vmap_block(vb);
824 }
825}
826
827static void purge_fragmented_blocks_thiscpu(void)
828{
829 purge_fragmented_blocks(smp_processor_id());
830}
831
832static void purge_fragmented_blocks_allcpus(void)
833{
834 int cpu;
835
836 for_each_possible_cpu(cpu)
837 purge_fragmented_blocks(cpu);
838}
839
793static void *vb_alloc(unsigned long size, gfp_t gfp_mask) 840static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
794{ 841{
795 struct vmap_block_queue *vbq; 842 struct vmap_block_queue *vbq;
796 struct vmap_block *vb; 843 struct vmap_block *vb;
797 unsigned long addr = 0; 844 unsigned long addr = 0;
798 unsigned int order; 845 unsigned int order;
846 int purge = 0;
799 847
800 BUG_ON(size & ~PAGE_MASK); 848 BUG_ON(size & ~PAGE_MASK);
801 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); 849 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -808,24 +856,38 @@ again:
808 int i; 856 int i;
809 857
810 spin_lock(&vb->lock); 858 spin_lock(&vb->lock);
859 if (vb->free < 1UL << order)
860 goto next;
861
811 i = bitmap_find_free_region(vb->alloc_map, 862 i = bitmap_find_free_region(vb->alloc_map,
812 VMAP_BBMAP_BITS, order); 863 VMAP_BBMAP_BITS, order);
813 864
814 if (i >= 0) { 865 if (i < 0) {
815 addr = vb->va->va_start + (i << PAGE_SHIFT); 866 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
816 BUG_ON(addr_to_vb_idx(addr) != 867 /* fragmented and no outstanding allocations */
817 addr_to_vb_idx(vb->va->va_start)); 868 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
818 vb->free -= 1UL << order; 869 purge = 1;
819 if (vb->free == 0) {
820 spin_lock(&vbq->lock);
821 list_del_init(&vb->free_list);
822 spin_unlock(&vbq->lock);
823 } 870 }
824 spin_unlock(&vb->lock); 871 goto next;
825 break; 872 }
873 addr = vb->va->va_start + (i << PAGE_SHIFT);
874 BUG_ON(addr_to_vb_idx(addr) !=
875 addr_to_vb_idx(vb->va->va_start));
876 vb->free -= 1UL << order;
877 if (vb->free == 0) {
878 spin_lock(&vbq->lock);
879 list_del_rcu(&vb->free_list);
880 spin_unlock(&vbq->lock);
826 } 881 }
827 spin_unlock(&vb->lock); 882 spin_unlock(&vb->lock);
883 break;
884next:
885 spin_unlock(&vb->lock);
828 } 886 }
887
888 if (purge)
889 purge_fragmented_blocks_thiscpu();
890
829 put_cpu_var(vmap_block_queue); 891 put_cpu_var(vmap_block_queue);
830 rcu_read_unlock(); 892 rcu_read_unlock();
831 893
@@ -862,11 +924,11 @@ static void vb_free(const void *addr, unsigned long size)
862 BUG_ON(!vb); 924 BUG_ON(!vb);
863 925
864 spin_lock(&vb->lock); 926 spin_lock(&vb->lock);
865 bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); 927 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
866 928
867 vb->dirty += 1UL << order; 929 vb->dirty += 1UL << order;
868 if (vb->dirty == VMAP_BBMAP_BITS) { 930 if (vb->dirty == VMAP_BBMAP_BITS) {
869 BUG_ON(vb->free || !list_empty(&vb->free_list)); 931 BUG_ON(vb->free);
870 spin_unlock(&vb->lock); 932 spin_unlock(&vb->lock);
871 free_vmap_block(vb); 933 free_vmap_block(vb);
872 } else 934 } else
@@ -1035,8 +1097,6 @@ void __init vmalloc_init(void)
1035 vbq = &per_cpu(vmap_block_queue, i); 1097 vbq = &per_cpu(vmap_block_queue, i);
1036 spin_lock_init(&vbq->lock); 1098 spin_lock_init(&vbq->lock);
1037 INIT_LIST_HEAD(&vbq->free); 1099 INIT_LIST_HEAD(&vbq->free);
1038 INIT_LIST_HEAD(&vbq->dirty);
1039 vbq->nr_dirty = 0;
1040 } 1100 }
1041 1101
1042 /* Import existing vmlist entries. */ 1102 /* Import existing vmlist entries. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 885207a6b6b7..c26986c85ce0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1922,6 +1922,9 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
1922 if (!populated_zone(zone)) 1922 if (!populated_zone(zone))
1923 continue; 1923 continue;
1924 1924
1925 if (zone_is_all_unreclaimable(zone))
1926 continue;
1927
1925 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), 1928 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
1926 0, 0)) 1929 0, 0))
1927 return 1; 1930 return 1;