aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-02-17 01:09:29 -0500
committerDavid S. Miller <davem@davemloft.net>2010-02-17 01:09:29 -0500
commit2bb4646fce8d09916b351d1a62f98db7cec6fc41 (patch)
treec1f0d002e69868606eca9d1b919835f422892063 /mm
parent6836b9bdd98e3b500cd49512484df68f46e14659 (diff)
parentb0483e78e5c4c9871fc5541875b3bc006846d46b (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c103
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/vmalloc.c110
5 files changed, 166 insertions, 62 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6cb..698ea80f2102 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
1634static struct page *__read_cache_page(struct address_space *mapping, 1634static struct page *__read_cache_page(struct address_space *mapping,
1635 pgoff_t index, 1635 pgoff_t index,
1636 int (*filler)(void *,struct page*), 1636 int (*filler)(void *,struct page*),
1637 void *data) 1637 void *data,
1638 gfp_t gfp)
1638{ 1639{
1639 struct page *page; 1640 struct page *page;
1640 int err; 1641 int err;
1641repeat: 1642repeat:
1642 page = find_get_page(mapping, index); 1643 page = find_get_page(mapping, index);
1643 if (!page) { 1644 if (!page) {
1644 page = page_cache_alloc_cold(mapping); 1645 page = __page_cache_alloc(gfp | __GFP_COLD);
1645 if (!page) 1646 if (!page)
1646 return ERR_PTR(-ENOMEM); 1647 return ERR_PTR(-ENOMEM);
1647 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); 1648 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
1661 return page; 1662 return page;
1662} 1663}
1663 1664
1664/** 1665static struct page *do_read_cache_page(struct address_space *mapping,
1665 * read_cache_page_async - read into page cache, fill it if needed
1666 * @mapping: the page's address_space
1667 * @index: the page index
1668 * @filler: function to perform the read
1669 * @data: destination for read data
1670 *
1671 * Same as read_cache_page, but don't wait for page to become unlocked
1672 * after submitting it to the filler.
1673 *
1674 * Read into the page cache. If a page already exists, and PageUptodate() is
1675 * not set, try to fill the page but don't wait for it to become unlocked.
1676 *
1677 * If the page does not get brought uptodate, return -EIO.
1678 */
1679struct page *read_cache_page_async(struct address_space *mapping,
1680 pgoff_t index, 1666 pgoff_t index,
1681 int (*filler)(void *,struct page*), 1667 int (*filler)(void *,struct page*),
1682 void *data) 1668 void *data,
1669 gfp_t gfp)
1670
1683{ 1671{
1684 struct page *page; 1672 struct page *page;
1685 int err; 1673 int err;
1686 1674
1687retry: 1675retry:
1688 page = __read_cache_page(mapping, index, filler, data); 1676 page = __read_cache_page(mapping, index, filler, data, gfp);
1689 if (IS_ERR(page)) 1677 if (IS_ERR(page))
1690 return page; 1678 return page;
1691 if (PageUptodate(page)) 1679 if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
1710 mark_page_accessed(page); 1698 mark_page_accessed(page);
1711 return page; 1699 return page;
1712} 1700}
1701
1702/**
1703 * read_cache_page_async - read into page cache, fill it if needed
1704 * @mapping: the page's address_space
1705 * @index: the page index
1706 * @filler: function to perform the read
1707 * @data: destination for read data
1708 *
1709 * Same as read_cache_page, but don't wait for page to become unlocked
1710 * after submitting it to the filler.
1711 *
1712 * Read into the page cache. If a page already exists, and PageUptodate() is
1713 * not set, try to fill the page but don't wait for it to become unlocked.
1714 *
1715 * If the page does not get brought uptodate, return -EIO.
1716 */
1717struct page *read_cache_page_async(struct address_space *mapping,
1718 pgoff_t index,
1719 int (*filler)(void *,struct page*),
1720 void *data)
1721{
1722 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
1723}
1713EXPORT_SYMBOL(read_cache_page_async); 1724EXPORT_SYMBOL(read_cache_page_async);
1714 1725
1726static struct page *wait_on_page_read(struct page *page)
1727{
1728 if (!IS_ERR(page)) {
1729 wait_on_page_locked(page);
1730 if (!PageUptodate(page)) {
1731 page_cache_release(page);
1732 page = ERR_PTR(-EIO);
1733 }
1734 }
1735 return page;
1736}
1737
1738/**
1739 * read_cache_page_gfp - read into page cache, using specified page allocation flags.
1740 * @mapping: the page's address_space
1741 * @index: the page index
1742 * @gfp: the page allocator flags to use if allocating
1743 *
1744 * This is the same as "read_mapping_page(mapping, index, NULL)", but with
1745 * any new page allocations done using the specified allocation flags. Note
1746 * that the Radix tree operations will still use GFP_KERNEL, so you can't
1747 * expect to do this atomically or anything like that - but you can pass in
1748 * other page requirements.
1749 *
1750 * If the page does not get brought uptodate, return -EIO.
1751 */
1752struct page *read_cache_page_gfp(struct address_space *mapping,
1753 pgoff_t index,
1754 gfp_t gfp)
1755{
1756 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
1757
1758 return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
1759}
1760EXPORT_SYMBOL(read_cache_page_gfp);
1761
1715/** 1762/**
1716 * read_cache_page - read into page cache, fill it if needed 1763 * read_cache_page - read into page cache, fill it if needed
1717 * @mapping: the page's address_space 1764 * @mapping: the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
1729 int (*filler)(void *,struct page*), 1776 int (*filler)(void *,struct page*),
1730 void *data) 1777 void *data)
1731{ 1778{
1732 struct page *page; 1779 return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
1733
1734 page = read_cache_page_async(mapping, index, filler, data);
1735 if (IS_ERR(page))
1736 goto out;
1737 wait_on_page_locked(page);
1738 if (!PageUptodate(page)) {
1739 page_cache_release(page);
1740 page = ERR_PTR(-EIO);
1741 }
1742 out:
1743 return page;
1744} 1780}
1745EXPORT_SYMBOL(read_cache_page); 1781EXPORT_SYMBOL(read_cache_page);
1746 1782
@@ -2196,6 +2232,9 @@ again:
2196 if (unlikely(status)) 2232 if (unlikely(status))
2197 break; 2233 break;
2198 2234
2235 if (mapping_writably_mapped(mapping))
2236 flush_dcache_page(page);
2237
2199 pagefault_disable(); 2238 pagefault_disable();
2200 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 2239 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2201 pagefault_enable(); 2240 pagefault_enable();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e91b81b63670..2d16fa6b8c2d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1515,10 +1515,9 @@ static struct attribute_group hstate_attr_group = {
1515 .attrs = hstate_attrs, 1515 .attrs = hstate_attrs,
1516}; 1516};
1517 1517
1518static int __init hugetlb_sysfs_add_hstate(struct hstate *h, 1518static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
1519 struct kobject *parent, 1519 struct kobject **hstate_kobjs,
1520 struct kobject **hstate_kobjs, 1520 struct attribute_group *hstate_attr_group)
1521 struct attribute_group *hstate_attr_group)
1522{ 1521{
1523 int retval; 1522 int retval;
1524 int hi = h - hstates; 1523 int hi = h - hstates;
diff --git a/mm/migrate.c b/mm/migrate.c
index efddbf0926b2..9a0db5bbabe4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
912 goto out_pm; 912 goto out_pm;
913 913
914 err = -ENODEV; 914 err = -ENODEV;
915 if (node < 0 || node >= MAX_NUMNODES)
916 goto out_pm;
917
915 if (!node_state(node, N_HIGH_MEMORY)) 918 if (!node_state(node, N_HIGH_MEMORY))
916 goto out_pm; 919 goto out_pm;
917 920
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d2a8889b4c58..8deb9d0fd5b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -556,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
556 page = list_entry(list->prev, struct page, lru); 556 page = list_entry(list->prev, struct page, lru);
557 /* must delete as __free_one_page list manipulates */ 557 /* must delete as __free_one_page list manipulates */
558 list_del(&page->lru); 558 list_del(&page->lru);
559 __free_one_page(page, zone, 0, migratetype); 559 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
560 trace_mm_page_pcpu_drain(page, 0, migratetype); 560 __free_one_page(page, zone, 0, page_private(page));
561 trace_mm_page_pcpu_drain(page, 0, page_private(page));
561 } while (--count && --batch_free && !list_empty(list)); 562 } while (--count && --batch_free && !list_empty(list));
562 } 563 }
563 spin_unlock(&zone->lock); 564 spin_unlock(&zone->lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d55d905463eb..ae007462b7f6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void)
509 509
510static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); 510static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
511 511
512/* for per-CPU blocks */
513static void purge_fragmented_blocks_allcpus(void);
514
512/* 515/*
513 * Purges all lazily-freed vmap areas. 516 * Purges all lazily-freed vmap areas.
514 * 517 *
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
539 } else 542 } else
540 spin_lock(&purge_lock); 543 spin_lock(&purge_lock);
541 544
545 if (sync)
546 purge_fragmented_blocks_allcpus();
547
542 rcu_read_lock(); 548 rcu_read_lock();
543 list_for_each_entry_rcu(va, &vmap_area_list, list) { 549 list_for_each_entry_rcu(va, &vmap_area_list, list) {
544 if (va->flags & VM_LAZY_FREE) { 550 if (va->flags & VM_LAZY_FREE) {
@@ -667,8 +673,6 @@ static bool vmap_initialized __read_mostly = false;
667struct vmap_block_queue { 673struct vmap_block_queue {
668 spinlock_t lock; 674 spinlock_t lock;
669 struct list_head free; 675 struct list_head free;
670 struct list_head dirty;
671 unsigned int nr_dirty;
672}; 676};
673 677
674struct vmap_block { 678struct vmap_block {
@@ -678,10 +682,9 @@ struct vmap_block {
678 unsigned long free, dirty; 682 unsigned long free, dirty;
679 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); 683 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
680 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); 684 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
681 union { 685 struct list_head free_list;
682 struct list_head free_list; 686 struct rcu_head rcu_head;
683 struct rcu_head rcu_head; 687 struct list_head purge;
684 };
685}; 688};
686 689
687/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ 690/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -757,7 +760,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
757 vbq = &get_cpu_var(vmap_block_queue); 760 vbq = &get_cpu_var(vmap_block_queue);
758 vb->vbq = vbq; 761 vb->vbq = vbq;
759 spin_lock(&vbq->lock); 762 spin_lock(&vbq->lock);
760 list_add(&vb->free_list, &vbq->free); 763 list_add_rcu(&vb->free_list, &vbq->free);
761 spin_unlock(&vbq->lock); 764 spin_unlock(&vbq->lock);
762 put_cpu_var(vmap_block_queue); 765 put_cpu_var(vmap_block_queue);
763 766
@@ -776,8 +779,6 @@ static void free_vmap_block(struct vmap_block *vb)
776 struct vmap_block *tmp; 779 struct vmap_block *tmp;
777 unsigned long vb_idx; 780 unsigned long vb_idx;
778 781
779 BUG_ON(!list_empty(&vb->free_list));
780
781 vb_idx = addr_to_vb_idx(vb->va->va_start); 782 vb_idx = addr_to_vb_idx(vb->va->va_start);
782 spin_lock(&vmap_block_tree_lock); 783 spin_lock(&vmap_block_tree_lock);
783 tmp = radix_tree_delete(&vmap_block_tree, vb_idx); 784 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
@@ -788,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb)
788 call_rcu(&vb->rcu_head, rcu_free_vb); 789 call_rcu(&vb->rcu_head, rcu_free_vb);
789} 790}
790 791
792static void purge_fragmented_blocks(int cpu)
793{
794 LIST_HEAD(purge);
795 struct vmap_block *vb;
796 struct vmap_block *n_vb;
797 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
798
799 rcu_read_lock();
800 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
801
802 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
803 continue;
804
805 spin_lock(&vb->lock);
806 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
807 vb->free = 0; /* prevent further allocs after releasing lock */
808 vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
809 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
810 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
811 spin_lock(&vbq->lock);
812 list_del_rcu(&vb->free_list);
813 spin_unlock(&vbq->lock);
814 spin_unlock(&vb->lock);
815 list_add_tail(&vb->purge, &purge);
816 } else
817 spin_unlock(&vb->lock);
818 }
819 rcu_read_unlock();
820
821 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
822 list_del(&vb->purge);
823 free_vmap_block(vb);
824 }
825}
826
827static void purge_fragmented_blocks_thiscpu(void)
828{
829 purge_fragmented_blocks(smp_processor_id());
830}
831
832static void purge_fragmented_blocks_allcpus(void)
833{
834 int cpu;
835
836 for_each_possible_cpu(cpu)
837 purge_fragmented_blocks(cpu);
838}
839
791static void *vb_alloc(unsigned long size, gfp_t gfp_mask) 840static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
792{ 841{
793 struct vmap_block_queue *vbq; 842 struct vmap_block_queue *vbq;
794 struct vmap_block *vb; 843 struct vmap_block *vb;
795 unsigned long addr = 0; 844 unsigned long addr = 0;
796 unsigned int order; 845 unsigned int order;
846 int purge = 0;
797 847
798 BUG_ON(size & ~PAGE_MASK); 848 BUG_ON(size & ~PAGE_MASK);
799 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); 849 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -806,24 +856,38 @@ again:
806 int i; 856 int i;
807 857
808 spin_lock(&vb->lock); 858 spin_lock(&vb->lock);
859 if (vb->free < 1UL << order)
860 goto next;
861
809 i = bitmap_find_free_region(vb->alloc_map, 862 i = bitmap_find_free_region(vb->alloc_map,
810 VMAP_BBMAP_BITS, order); 863 VMAP_BBMAP_BITS, order);
811 864
812 if (i >= 0) { 865 if (i < 0) {
813 addr = vb->va->va_start + (i << PAGE_SHIFT); 866 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
814 BUG_ON(addr_to_vb_idx(addr) != 867 /* fragmented and no outstanding allocations */
815 addr_to_vb_idx(vb->va->va_start)); 868 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
816 vb->free -= 1UL << order; 869 purge = 1;
817 if (vb->free == 0) {
818 spin_lock(&vbq->lock);
819 list_del_init(&vb->free_list);
820 spin_unlock(&vbq->lock);
821 } 870 }
822 spin_unlock(&vb->lock); 871 goto next;
823 break;
824 } 872 }
873 addr = vb->va->va_start + (i << PAGE_SHIFT);
874 BUG_ON(addr_to_vb_idx(addr) !=
875 addr_to_vb_idx(vb->va->va_start));
876 vb->free -= 1UL << order;
877 if (vb->free == 0) {
878 spin_lock(&vbq->lock);
879 list_del_rcu(&vb->free_list);
880 spin_unlock(&vbq->lock);
881 }
882 spin_unlock(&vb->lock);
883 break;
884next:
825 spin_unlock(&vb->lock); 885 spin_unlock(&vb->lock);
826 } 886 }
887
888 if (purge)
889 purge_fragmented_blocks_thiscpu();
890
827 put_cpu_var(vmap_block_queue); 891 put_cpu_var(vmap_block_queue);
828 rcu_read_unlock(); 892 rcu_read_unlock();
829 893
@@ -860,11 +924,11 @@ static void vb_free(const void *addr, unsigned long size)
860 BUG_ON(!vb); 924 BUG_ON(!vb);
861 925
862 spin_lock(&vb->lock); 926 spin_lock(&vb->lock);
863 bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); 927 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
864 928
865 vb->dirty += 1UL << order; 929 vb->dirty += 1UL << order;
866 if (vb->dirty == VMAP_BBMAP_BITS) { 930 if (vb->dirty == VMAP_BBMAP_BITS) {
867 BUG_ON(vb->free || !list_empty(&vb->free_list)); 931 BUG_ON(vb->free);
868 spin_unlock(&vb->lock); 932 spin_unlock(&vb->lock);
869 free_vmap_block(vb); 933 free_vmap_block(vb);
870 } else 934 } else
@@ -1033,8 +1097,6 @@ void __init vmalloc_init(void)
1033 vbq = &per_cpu(vmap_block_queue, i); 1097 vbq = &per_cpu(vmap_block_queue, i);
1034 spin_lock_init(&vbq->lock); 1098 spin_lock_init(&vbq->lock);
1035 INIT_LIST_HEAD(&vbq->free); 1099 INIT_LIST_HEAD(&vbq->free);
1036 INIT_LIST_HEAD(&vbq->dirty);
1037 vbq->nr_dirty = 0;
1038 } 1100 }
1039 1101
1040 /* Import existing vmlist entries. */ 1102 /* Import existing vmlist entries. */