aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2006-03-16 20:01:19 -0500
committerPaul Mackerras <paulus@samba.org>2006-03-16 20:01:19 -0500
commit23dd64011285010ac291f7dddf6e287bdb43a0ad (patch)
tree0e4f4569d38d82f4dceb4150d5ad940e0fd5f24f /mm
parent516450179454de9e689e0a53ed8f34b896e8651c (diff)
parent485ff09990416c75ae9593ddc71619939ab9dd51 (diff)
Merge ../linux-2.6
Diffstat (limited to 'mm')
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mempolicy.c8
-rw-r--r--mm/page_alloc.c17
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/slab.c65
-rw-r--r--mm/vmscan.c21
6 files changed, 89 insertions, 26 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a918f77f02f3..1fe76d963ac2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -130,6 +130,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
130 onlined_pages++; 130 onlined_pages++;
131 } 131 }
132 zone->present_pages += onlined_pages; 132 zone->present_pages += onlined_pages;
133 zone->zone_pgdat->node_present_pages += onlined_pages;
133 134
134 setup_per_zone_pages_min(); 135 setup_per_zone_pages_min();
135 136
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 954981b14303..2a8206009422 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -748,7 +748,7 @@ long do_mbind(unsigned long start, unsigned long len,
748 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) 748 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
749 || mode > MPOL_MAX) 749 || mode > MPOL_MAX)
750 return -EINVAL; 750 return -EINVAL;
751 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) 751 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
752 return -EPERM; 752 return -EPERM;
753 753
754 if (start & ~PAGE_MASK) 754 if (start & ~PAGE_MASK)
@@ -942,20 +942,20 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
942 */ 942 */
943 if ((current->euid != task->suid) && (current->euid != task->uid) && 943 if ((current->euid != task->suid) && (current->euid != task->uid) &&
944 (current->uid != task->suid) && (current->uid != task->uid) && 944 (current->uid != task->suid) && (current->uid != task->uid) &&
945 !capable(CAP_SYS_ADMIN)) { 945 !capable(CAP_SYS_NICE)) {
946 err = -EPERM; 946 err = -EPERM;
947 goto out; 947 goto out;
948 } 948 }
949 949
950 task_nodes = cpuset_mems_allowed(task); 950 task_nodes = cpuset_mems_allowed(task);
951 /* Is the user allowed to access the target nodes? */ 951 /* Is the user allowed to access the target nodes? */
952 if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { 952 if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_NICE)) {
953 err = -EPERM; 953 err = -EPERM;
954 goto out; 954 goto out;
955 } 955 }
956 956
957 err = do_migrate_pages(mm, &old, &new, 957 err = do_migrate_pages(mm, &old, &new,
958 capable(CAP_SYS_ADMIN) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); 958 capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
959out: 959out:
960 mmput(mm); 960 mmput(mm);
961 return err; 961 return err;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 791690d7d3fa..234bd4895d14 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -590,21 +590,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
590} 590}
591 591
592#ifdef CONFIG_NUMA 592#ifdef CONFIG_NUMA
593/* Called from the slab reaper to drain remote pagesets */ 593/*
594void drain_remote_pages(void) 594 * Called from the slab reaper to drain pagesets on a particular node that
595 * belong to the currently executing processor.
596 */
597void drain_node_pages(int nodeid)
595{ 598{
596 struct zone *zone; 599 int i, z;
597 int i;
598 unsigned long flags; 600 unsigned long flags;
599 601
600 local_irq_save(flags); 602 local_irq_save(flags);
601 for_each_zone(zone) { 603 for (z = 0; z < MAX_NR_ZONES; z++) {
604 struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
602 struct per_cpu_pageset *pset; 605 struct per_cpu_pageset *pset;
603 606
604 /* Do not drain local pagesets */
605 if (zone->zone_pgdat->node_id == numa_node_id())
606 continue;
607
608 pset = zone_pcp(zone, smp_processor_id()); 607 pset = zone_pcp(zone, smp_processor_id());
609 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { 608 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
610 struct per_cpu_pages *pcp; 609 struct per_cpu_pages *pcp;
diff --git a/mm/rmap.c b/mm/rmap.c
index d8ce5ff61454..67f0e20b101f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -537,9 +537,6 @@ void page_add_new_anon_rmap(struct page *page,
537 */ 537 */
538void page_add_file_rmap(struct page *page) 538void page_add_file_rmap(struct page *page)
539{ 539{
540 BUG_ON(PageAnon(page));
541 BUG_ON(!pfn_valid(page_to_pfn(page)));
542
543 if (atomic_inc_and_test(&page->_mapcount)) 540 if (atomic_inc_and_test(&page->_mapcount))
544 __inc_page_state(nr_mapped); 541 __inc_page_state(nr_mapped);
545} 542}
diff --git a/mm/slab.c b/mm/slab.c
index 61800b88e241..d0bd7f07ab04 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char *
789 dump_stack(); 789 dump_stack();
790} 790}
791 791
792#ifdef CONFIG_NUMA
793/*
794 * Special reaping functions for NUMA systems called from cache_reap().
795 * These take care of doing round robin flushing of alien caches (containing
796 * objects freed on different nodes from which they were allocated) and the
797 * flushing of remote pcps by calling drain_node_pages.
798 */
799static DEFINE_PER_CPU(unsigned long, reap_node);
800
801static void init_reap_node(int cpu)
802{
803 int node;
804
805 node = next_node(cpu_to_node(cpu), node_online_map);
806 if (node == MAX_NUMNODES)
807 node = 0;
808
809 __get_cpu_var(reap_node) = node;
810}
811
812static void next_reap_node(void)
813{
814 int node = __get_cpu_var(reap_node);
815
816 /*
817 * Also drain per cpu pages on remote zones
818 */
819 if (node != numa_node_id())
820 drain_node_pages(node);
821
822 node = next_node(node, node_online_map);
823 if (unlikely(node >= MAX_NUMNODES))
824 node = first_node(node_online_map);
825 __get_cpu_var(reap_node) = node;
826}
827
828#else
829#define init_reap_node(cpu) do { } while (0)
830#define next_reap_node(void) do { } while (0)
831#endif
832
792/* 833/*
793 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz 834 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
794 * via the workqueue/eventd. 835 * via the workqueue/eventd.
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu)
806 * at that time. 847 * at that time.
807 */ 848 */
808 if (keventd_up() && reap_work->func == NULL) { 849 if (keventd_up() && reap_work->func == NULL) {
850 init_reap_node(cpu);
809 INIT_WORK(reap_work, cache_reap, NULL); 851 INIT_WORK(reap_work, cache_reap, NULL);
810 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); 852 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
811 } 853 }
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
884 } 926 }
885} 927}
886 928
929/*
930 * Called from cache_reap() to regularly drain alien caches round robin.
931 */
932static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
933{
934 int node = __get_cpu_var(reap_node);
935
936 if (l3->alien) {
937 struct array_cache *ac = l3->alien[node];
938 if (ac && ac->avail) {
939 spin_lock_irq(&ac->lock);
940 __drain_alien_cache(cachep, ac, node);
941 spin_unlock_irq(&ac->lock);
942 }
943 }
944}
945
887static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) 946static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
888{ 947{
889 int i = 0; 948 int i = 0;
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al
902#else 961#else
903 962
904#define drain_alien_cache(cachep, alien) do { } while (0) 963#define drain_alien_cache(cachep, alien) do { } while (0)
964#define reap_alien(cachep, l3) do { } while (0)
905 965
906static inline struct array_cache **alloc_alien_cache(int node, int limit) 966static inline struct array_cache **alloc_alien_cache(int node, int limit)
907{ 967{
@@ -3497,8 +3557,7 @@ static void cache_reap(void *unused)
3497 check_irq_on(); 3557 check_irq_on();
3498 3558
3499 l3 = searchp->nodelists[numa_node_id()]; 3559 l3 = searchp->nodelists[numa_node_id()];
3500 if (l3->alien) 3560 reap_alien(searchp, l3);
3501 drain_alien_cache(searchp, l3->alien);
3502 spin_lock_irq(&l3->list_lock); 3561 spin_lock_irq(&l3->list_lock);
3503 3562
3504 drain_array_locked(searchp, cpu_cache_get(searchp), 0, 3563 drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3548,7 +3607,7 @@ static void cache_reap(void *unused)
3548 } 3607 }
3549 check_irq_on(); 3608 check_irq_on();
3550 mutex_unlock(&cache_chain_mutex); 3609 mutex_unlock(&cache_chain_mutex);
3551 drain_remote_pages(); 3610 next_reap_node();
3552 /* Setup the next iteration */ 3611 /* Setup the next iteration */
3553 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3612 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3554} 3613}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b0af7593d01e..4fe7e3aa02e2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -700,7 +700,7 @@ int migrate_page_remove_references(struct page *newpage,
700 * the page. 700 * the page.
701 */ 701 */
702 if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) 702 if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
703 return 1; 703 return -EAGAIN;
704 704
705 /* 705 /*
706 * Establish swap ptes for anonymous pages or destroy pte 706 * Establish swap ptes for anonymous pages or destroy pte
@@ -721,13 +721,15 @@ int migrate_page_remove_references(struct page *newpage,
721 * If the page was not migrated then the PageSwapCache bit 721 * If the page was not migrated then the PageSwapCache bit
722 * is still set and the operation may continue. 722 * is still set and the operation may continue.
723 */ 723 */
724 try_to_unmap(page, 1); 724 if (try_to_unmap(page, 1) == SWAP_FAIL)
725 /* A vma has VM_LOCKED set -> Permanent failure */
726 return -EPERM;
725 727
726 /* 728 /*
727 * Give up if we were unable to remove all mappings. 729 * Give up if we were unable to remove all mappings.
728 */ 730 */
729 if (page_mapcount(page)) 731 if (page_mapcount(page))
730 return 1; 732 return -EAGAIN;
731 733
732 write_lock_irq(&mapping->tree_lock); 734 write_lock_irq(&mapping->tree_lock);
733 735
@@ -738,7 +740,7 @@ int migrate_page_remove_references(struct page *newpage,
738 if (!page_mapping(page) || page_count(page) != nr_refs || 740 if (!page_mapping(page) || page_count(page) != nr_refs ||
739 *radix_pointer != page) { 741 *radix_pointer != page) {
740 write_unlock_irq(&mapping->tree_lock); 742 write_unlock_irq(&mapping->tree_lock);
741 return 1; 743 return -EAGAIN;
742 } 744 }
743 745
744 /* 746 /*
@@ -813,10 +815,14 @@ EXPORT_SYMBOL(migrate_page_copy);
813 */ 815 */
814int migrate_page(struct page *newpage, struct page *page) 816int migrate_page(struct page *newpage, struct page *page)
815{ 817{
818 int rc;
819
816 BUG_ON(PageWriteback(page)); /* Writeback must be complete */ 820 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
817 821
818 if (migrate_page_remove_references(newpage, page, 2)) 822 rc = migrate_page_remove_references(newpage, page, 2);
819 return -EAGAIN; 823
824 if (rc)
825 return rc;
820 826
821 migrate_page_copy(newpage, page); 827 migrate_page_copy(newpage, page);
822 828
@@ -1883,7 +1889,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1883 1889
1884 if (!(gfp_mask & __GFP_WAIT) || 1890 if (!(gfp_mask & __GFP_WAIT) ||
1885 zone->all_unreclaimable || 1891 zone->all_unreclaimable ||
1886 atomic_read(&zone->reclaim_in_progress) > 0) 1892 atomic_read(&zone->reclaim_in_progress) > 0 ||
1893 (p->flags & PF_MEMALLOC))
1887 return 0; 1894 return 0;
1888 1895
1889 node_id = zone->zone_pgdat->node_id; 1896 node_id = zone->zone_pgdat->node_id;