aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c195
1 files changed, 85 insertions, 110 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37dc0f78d07..845b91749a42 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
732 } 732 }
733 local_irq_restore(flags); 733 local_irq_restore(flags);
734 put_cpu(); 734 put_cpu();
735 } 735 } else {
736
737 if (page == NULL) {
738 spin_lock_irqsave(&zone->lock, flags); 736 spin_lock_irqsave(&zone->lock, flags);
739 page = __rmqueue(zone, order); 737 page = __rmqueue(zone, order);
740 spin_unlock_irqrestore(&zone->lock, flags); 738 spin_unlock_irqrestore(&zone->lock, flags);
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
754 return page; 752 return page;
755} 753}
756 754
755#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
756#define ALLOC_HARDER 0x02 /* try to alloc harder */
757#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */
758#define ALLOC_CPUSET 0x08 /* check for correct cpuset */
759
757/* 760/*
758 * Return 1 if free pages are above 'mark'. This takes into account the order 761 * Return 1 if free pages are above 'mark'. This takes into account the order
759 * of the allocation. 762 * of the allocation.
760 */ 763 */
761int zone_watermark_ok(struct zone *z, int order, unsigned long mark, 764int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
762 int classzone_idx, int can_try_harder, gfp_t gfp_high) 765 int classzone_idx, int alloc_flags)
763{ 766{
764 /* free_pages my go negative - that's OK */ 767 /* free_pages my go negative - that's OK */
765 long min = mark, free_pages = z->free_pages - (1 << order) + 1; 768 long min = mark, free_pages = z->free_pages - (1 << order) + 1;
766 int o; 769 int o;
767 770
768 if (gfp_high) 771 if (alloc_flags & ALLOC_HIGH)
769 min -= min / 2; 772 min -= min / 2;
770 if (can_try_harder) 773 if (alloc_flags & ALLOC_HARDER)
771 min -= min / 4; 774 min -= min / 4;
772 775
773 if (free_pages <= min + z->lowmem_reserve[classzone_idx]) 776 if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
785 return 1; 788 return 1;
786} 789}
787 790
788static inline int 791/*
789should_reclaim_zone(struct zone *z, gfp_t gfp_mask) 792 * get_page_from_freeliest goes through the zonelist trying to allocate
793 * a page.
794 */
795static struct page *
796get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
797 struct zonelist *zonelist, int alloc_flags)
790{ 798{
791 if (!z->reclaim_pages) 799 struct zone **z = zonelist->zones;
792 return 0; 800 struct page *page = NULL;
793 if (gfp_mask & __GFP_NORECLAIM) 801 int classzone_idx = zone_idx(*z);
794 return 0; 802
795 return 1; 803 /*
804 * Go through the zonelist once, looking for a zone with enough free.
805 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
806 */
807 do {
808 if ((alloc_flags & ALLOC_CPUSET) &&
809 !cpuset_zone_allowed(*z, gfp_mask))
810 continue;
811
812 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
813 if (!zone_watermark_ok(*z, order, (*z)->pages_low,
814 classzone_idx, alloc_flags))
815 continue;
816 }
817
818 page = buffered_rmqueue(*z, order, gfp_mask);
819 if (page) {
820 zone_statistics(zonelist, *z);
821 break;
822 }
823 } while (*(++z) != NULL);
824 return page;
796} 825}
797 826
798/* 827/*
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
803 struct zonelist *zonelist) 832 struct zonelist *zonelist)
804{ 833{
805 const gfp_t wait = gfp_mask & __GFP_WAIT; 834 const gfp_t wait = gfp_mask & __GFP_WAIT;
806 struct zone **zones, *z; 835 struct zone **z;
807 struct page *page; 836 struct page *page;
808 struct reclaim_state reclaim_state; 837 struct reclaim_state reclaim_state;
809 struct task_struct *p = current; 838 struct task_struct *p = current;
810 int i;
811 int classzone_idx;
812 int do_retry; 839 int do_retry;
813 int can_try_harder; 840 int alloc_flags;
814 int did_some_progress; 841 int did_some_progress;
815 842
816 might_sleep_if(wait); 843 might_sleep_if(wait);
817 844
818 /* 845 z = zonelist->zones; /* the list of zones suitable for gfp_mask */
819 * The caller may dip into page reserves a bit more if the caller
820 * cannot run direct reclaim, or is the caller has realtime scheduling
821 * policy
822 */
823 can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
824
825 zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
826 846
827 if (unlikely(zones[0] == NULL)) { 847 if (unlikely(*z == NULL)) {
828 /* Should this ever happen?? */ 848 /* Should this ever happen?? */
829 return NULL; 849 return NULL;
830 } 850 }
851restart:
852 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
853 zonelist, ALLOC_CPUSET);
854 if (page)
855 goto got_pg;
831 856
832 classzone_idx = zone_idx(zones[0]); 857 do
858 wakeup_kswapd(*z, order);
859 while (*(++z));
833 860
834restart:
835 /* 861 /*
836 * Go through the zonelist once, looking for a zone with enough free. 862 * OK, we're below the kswapd watermark and have kicked background
837 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 863 * reclaim. Now things get more complex, so set up alloc_flags according
864 * to how we want to proceed.
865 *
866 * The caller may dip into page reserves a bit more if the caller
867 * cannot run direct reclaim, or if the caller has realtime scheduling
868 * policy.
838 */ 869 */
839 for (i = 0; (z = zones[i]) != NULL; i++) { 870 alloc_flags = 0;
840 int do_reclaim = should_reclaim_zone(z, gfp_mask); 871 if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
841 872 alloc_flags |= ALLOC_HARDER;
842 if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) 873 if (gfp_mask & __GFP_HIGH)
843 continue; 874 alloc_flags |= ALLOC_HIGH;
844 875 if (wait)
845 /* 876 alloc_flags |= ALLOC_CPUSET;
846 * If the zone is to attempt early page reclaim then this loop
847 * will try to reclaim pages and check the watermark a second
848 * time before giving up and falling back to the next zone.
849 */
850zone_reclaim_retry:
851 if (!zone_watermark_ok(z, order, z->pages_low,
852 classzone_idx, 0, 0)) {
853 if (!do_reclaim)
854 continue;
855 else {
856 zone_reclaim(z, gfp_mask, order);
857 /* Only try reclaim once */
858 do_reclaim = 0;
859 goto zone_reclaim_retry;
860 }
861 }
862
863 page = buffered_rmqueue(z, order, gfp_mask);
864 if (page)
865 goto got_pg;
866 }
867
868 for (i = 0; (z = zones[i]) != NULL; i++)
869 wakeup_kswapd(z, order);
870 877
871 /* 878 /*
872 * Go through the zonelist again. Let __GFP_HIGH and allocations 879 * Go through the zonelist again. Let __GFP_HIGH and allocations
873 * coming from realtime tasks to go deeper into reserves 880 * coming from realtime tasks go deeper into reserves.
874 * 881 *
875 * This is the last chance, in general, before the goto nopage. 882 * This is the last chance, in general, before the goto nopage.
876 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. 883 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
877 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 884 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
878 */ 885 */
879 for (i = 0; (z = zones[i]) != NULL; i++) { 886 page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
880 if (!zone_watermark_ok(z, order, z->pages_min, 887 if (page)
881 classzone_idx, can_try_harder, 888 goto got_pg;
882 gfp_mask & __GFP_HIGH))
883 continue;
884
885 if (wait && !cpuset_zone_allowed(z, gfp_mask))
886 continue;
887
888 page = buffered_rmqueue(z, order, gfp_mask);
889 if (page)
890 goto got_pg;
891 }
892 889
893 /* This allocation should allow future memory freeing. */ 890 /* This allocation should allow future memory freeing. */
894 891
@@ -897,13 +894,10 @@ zone_reclaim_retry:
897 if (!(gfp_mask & __GFP_NOMEMALLOC)) { 894 if (!(gfp_mask & __GFP_NOMEMALLOC)) {
898nofail_alloc: 895nofail_alloc:
899 /* go through the zonelist yet again, ignoring mins */ 896 /* go through the zonelist yet again, ignoring mins */
900 for (i = 0; (z = zones[i]) != NULL; i++) { 897 page = get_page_from_freelist(gfp_mask, order,
901 if (!cpuset_zone_allowed(z, gfp_mask)) 898 zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
902 continue; 899 if (page)
903 page = buffered_rmqueue(z, order, gfp_mask); 900 goto got_pg;
904 if (page)
905 goto got_pg;
906 }
907 if (gfp_mask & __GFP_NOFAIL) { 901 if (gfp_mask & __GFP_NOFAIL) {
908 blk_congestion_wait(WRITE, HZ/50); 902 blk_congestion_wait(WRITE, HZ/50);
909 goto nofail_alloc; 903 goto nofail_alloc;
@@ -924,7 +918,7 @@ rebalance:
924 reclaim_state.reclaimed_slab = 0; 918 reclaim_state.reclaimed_slab = 0;
925 p->reclaim_state = &reclaim_state; 919 p->reclaim_state = &reclaim_state;
926 920
927 did_some_progress = try_to_free_pages(zones, gfp_mask); 921 did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
928 922
929 p->reclaim_state = NULL; 923 p->reclaim_state = NULL;
930 p->flags &= ~PF_MEMALLOC; 924 p->flags &= ~PF_MEMALLOC;
@@ -932,19 +926,10 @@ rebalance:
932 cond_resched(); 926 cond_resched();
933 927
934 if (likely(did_some_progress)) { 928 if (likely(did_some_progress)) {
935 for (i = 0; (z = zones[i]) != NULL; i++) { 929 page = get_page_from_freelist(gfp_mask, order,
936 if (!zone_watermark_ok(z, order, z->pages_min, 930 zonelist, alloc_flags);
937 classzone_idx, can_try_harder, 931 if (page)
938 gfp_mask & __GFP_HIGH)) 932 goto got_pg;
939 continue;
940
941 if (!cpuset_zone_allowed(z, gfp_mask))
942 continue;
943
944 page = buffered_rmqueue(z, order, gfp_mask);
945 if (page)
946 goto got_pg;
947 }
948 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 933 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
949 /* 934 /*
950 * Go through the zonelist yet one more time, keep 935 * Go through the zonelist yet one more time, keep
@@ -952,18 +937,10 @@ rebalance:
952 * a parallel oom killing, we must fail if we're still 937 * a parallel oom killing, we must fail if we're still
953 * under heavy pressure. 938 * under heavy pressure.
954 */ 939 */
955 for (i = 0; (z = zones[i]) != NULL; i++) { 940 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
956 if (!zone_watermark_ok(z, order, z->pages_high, 941 zonelist, ALLOC_CPUSET);
957 classzone_idx, 0, 0)) 942 if (page)
958 continue; 943 goto got_pg;
959
960 if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
961 continue;
962
963 page = buffered_rmqueue(z, order, gfp_mask);
964 if (page)
965 goto got_pg;
966 }
967 944
968 out_of_memory(gfp_mask, order); 945 out_of_memory(gfp_mask, order);
969 goto restart; 946 goto restart;
@@ -996,9 +973,7 @@ nopage:
996 dump_stack(); 973 dump_stack();
997 show_mem(); 974 show_mem();
998 } 975 }
999 return NULL;
1000got_pg: 976got_pg:
1001 zone_statistics(zonelist, z);
1002 return page; 977 return page;
1003} 978}
1004 979