diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 195 |
1 files changed, 85 insertions, 110 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37dc0f78d07..845b91749a42 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | |||
732 | } | 732 | } |
733 | local_irq_restore(flags); | 733 | local_irq_restore(flags); |
734 | put_cpu(); | 734 | put_cpu(); |
735 | } | 735 | } else { |
736 | |||
737 | if (page == NULL) { | ||
738 | spin_lock_irqsave(&zone->lock, flags); | 736 | spin_lock_irqsave(&zone->lock, flags); |
739 | page = __rmqueue(zone, order); | 737 | page = __rmqueue(zone, order); |
740 | spin_unlock_irqrestore(&zone->lock, flags); | 738 | spin_unlock_irqrestore(&zone->lock, flags); |
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | |||
754 | return page; | 752 | return page; |
755 | } | 753 | } |
756 | 754 | ||
755 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ | ||
756 | #define ALLOC_HARDER 0x02 /* try to alloc harder */ | ||
757 | #define ALLOC_HIGH 0x04 /* __GFP_HIGH set */ | ||
758 | #define ALLOC_CPUSET 0x08 /* check for correct cpuset */ | ||
759 | |||
757 | /* | 760 | /* |
758 | * Return 1 if free pages are above 'mark'. This takes into account the order | 761 | * Return 1 if free pages are above 'mark'. This takes into account the order |
759 | * of the allocation. | 762 | * of the allocation. |
760 | */ | 763 | */ |
761 | int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 764 | int zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
762 | int classzone_idx, int can_try_harder, gfp_t gfp_high) | 765 | int classzone_idx, int alloc_flags) |
763 | { | 766 | { |
764 | /* free_pages my go negative - that's OK */ | 767 | /* free_pages my go negative - that's OK */ |
765 | long min = mark, free_pages = z->free_pages - (1 << order) + 1; | 768 | long min = mark, free_pages = z->free_pages - (1 << order) + 1; |
766 | int o; | 769 | int o; |
767 | 770 | ||
768 | if (gfp_high) | 771 | if (alloc_flags & ALLOC_HIGH) |
769 | min -= min / 2; | 772 | min -= min / 2; |
770 | if (can_try_harder) | 773 | if (alloc_flags & ALLOC_HARDER) |
771 | min -= min / 4; | 774 | min -= min / 4; |
772 | 775 | ||
773 | if (free_pages <= min + z->lowmem_reserve[classzone_idx]) | 776 | if (free_pages <= min + z->lowmem_reserve[classzone_idx]) |
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
785 | return 1; | 788 | return 1; |
786 | } | 789 | } |
787 | 790 | ||
788 | static inline int | 791 | /* |
789 | should_reclaim_zone(struct zone *z, gfp_t gfp_mask) | 792 | * get_page_from_freeliest goes through the zonelist trying to allocate |
793 | * a page. | ||
794 | */ | ||
795 | static struct page * | ||
796 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | ||
797 | struct zonelist *zonelist, int alloc_flags) | ||
790 | { | 798 | { |
791 | if (!z->reclaim_pages) | 799 | struct zone **z = zonelist->zones; |
792 | return 0; | 800 | struct page *page = NULL; |
793 | if (gfp_mask & __GFP_NORECLAIM) | 801 | int classzone_idx = zone_idx(*z); |
794 | return 0; | 802 | |
795 | return 1; | 803 | /* |
804 | * Go through the zonelist once, looking for a zone with enough free. | ||
805 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | ||
806 | */ | ||
807 | do { | ||
808 | if ((alloc_flags & ALLOC_CPUSET) && | ||
809 | !cpuset_zone_allowed(*z, gfp_mask)) | ||
810 | continue; | ||
811 | |||
812 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | ||
813 | if (!zone_watermark_ok(*z, order, (*z)->pages_low, | ||
814 | classzone_idx, alloc_flags)) | ||
815 | continue; | ||
816 | } | ||
817 | |||
818 | page = buffered_rmqueue(*z, order, gfp_mask); | ||
819 | if (page) { | ||
820 | zone_statistics(zonelist, *z); | ||
821 | break; | ||
822 | } | ||
823 | } while (*(++z) != NULL); | ||
824 | return page; | ||
796 | } | 825 | } |
797 | 826 | ||
798 | /* | 827 | /* |
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
803 | struct zonelist *zonelist) | 832 | struct zonelist *zonelist) |
804 | { | 833 | { |
805 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 834 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
806 | struct zone **zones, *z; | 835 | struct zone **z; |
807 | struct page *page; | 836 | struct page *page; |
808 | struct reclaim_state reclaim_state; | 837 | struct reclaim_state reclaim_state; |
809 | struct task_struct *p = current; | 838 | struct task_struct *p = current; |
810 | int i; | ||
811 | int classzone_idx; | ||
812 | int do_retry; | 839 | int do_retry; |
813 | int can_try_harder; | 840 | int alloc_flags; |
814 | int did_some_progress; | 841 | int did_some_progress; |
815 | 842 | ||
816 | might_sleep_if(wait); | 843 | might_sleep_if(wait); |
817 | 844 | ||
818 | /* | 845 | z = zonelist->zones; /* the list of zones suitable for gfp_mask */ |
819 | * The caller may dip into page reserves a bit more if the caller | ||
820 | * cannot run direct reclaim, or is the caller has realtime scheduling | ||
821 | * policy | ||
822 | */ | ||
823 | can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait; | ||
824 | |||
825 | zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ | ||
826 | 846 | ||
827 | if (unlikely(zones[0] == NULL)) { | 847 | if (unlikely(*z == NULL)) { |
828 | /* Should this ever happen?? */ | 848 | /* Should this ever happen?? */ |
829 | return NULL; | 849 | return NULL; |
830 | } | 850 | } |
851 | restart: | ||
852 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | ||
853 | zonelist, ALLOC_CPUSET); | ||
854 | if (page) | ||
855 | goto got_pg; | ||
831 | 856 | ||
832 | classzone_idx = zone_idx(zones[0]); | 857 | do |
858 | wakeup_kswapd(*z, order); | ||
859 | while (*(++z)); | ||
833 | 860 | ||
834 | restart: | ||
835 | /* | 861 | /* |
836 | * Go through the zonelist once, looking for a zone with enough free. | 862 | * OK, we're below the kswapd watermark and have kicked background |
837 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 863 | * reclaim. Now things get more complex, so set up alloc_flags according |
864 | * to how we want to proceed. | ||
865 | * | ||
866 | * The caller may dip into page reserves a bit more if the caller | ||
867 | * cannot run direct reclaim, or if the caller has realtime scheduling | ||
868 | * policy. | ||
838 | */ | 869 | */ |
839 | for (i = 0; (z = zones[i]) != NULL; i++) { | 870 | alloc_flags = 0; |
840 | int do_reclaim = should_reclaim_zone(z, gfp_mask); | 871 | if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) |
841 | 872 | alloc_flags |= ALLOC_HARDER; | |
842 | if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) | 873 | if (gfp_mask & __GFP_HIGH) |
843 | continue; | 874 | alloc_flags |= ALLOC_HIGH; |
844 | 875 | if (wait) | |
845 | /* | 876 | alloc_flags |= ALLOC_CPUSET; |
846 | * If the zone is to attempt early page reclaim then this loop | ||
847 | * will try to reclaim pages and check the watermark a second | ||
848 | * time before giving up and falling back to the next zone. | ||
849 | */ | ||
850 | zone_reclaim_retry: | ||
851 | if (!zone_watermark_ok(z, order, z->pages_low, | ||
852 | classzone_idx, 0, 0)) { | ||
853 | if (!do_reclaim) | ||
854 | continue; | ||
855 | else { | ||
856 | zone_reclaim(z, gfp_mask, order); | ||
857 | /* Only try reclaim once */ | ||
858 | do_reclaim = 0; | ||
859 | goto zone_reclaim_retry; | ||
860 | } | ||
861 | } | ||
862 | |||
863 | page = buffered_rmqueue(z, order, gfp_mask); | ||
864 | if (page) | ||
865 | goto got_pg; | ||
866 | } | ||
867 | |||
868 | for (i = 0; (z = zones[i]) != NULL; i++) | ||
869 | wakeup_kswapd(z, order); | ||
870 | 877 | ||
871 | /* | 878 | /* |
872 | * Go through the zonelist again. Let __GFP_HIGH and allocations | 879 | * Go through the zonelist again. Let __GFP_HIGH and allocations |
873 | * coming from realtime tasks to go deeper into reserves | 880 | * coming from realtime tasks go deeper into reserves. |
874 | * | 881 | * |
875 | * This is the last chance, in general, before the goto nopage. | 882 | * This is the last chance, in general, before the goto nopage. |
876 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 883 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. |
877 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 884 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
878 | */ | 885 | */ |
879 | for (i = 0; (z = zones[i]) != NULL; i++) { | 886 | page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); |
880 | if (!zone_watermark_ok(z, order, z->pages_min, | 887 | if (page) |
881 | classzone_idx, can_try_harder, | 888 | goto got_pg; |
882 | gfp_mask & __GFP_HIGH)) | ||
883 | continue; | ||
884 | |||
885 | if (wait && !cpuset_zone_allowed(z, gfp_mask)) | ||
886 | continue; | ||
887 | |||
888 | page = buffered_rmqueue(z, order, gfp_mask); | ||
889 | if (page) | ||
890 | goto got_pg; | ||
891 | } | ||
892 | 889 | ||
893 | /* This allocation should allow future memory freeing. */ | 890 | /* This allocation should allow future memory freeing. */ |
894 | 891 | ||
@@ -897,13 +894,10 @@ zone_reclaim_retry: | |||
897 | if (!(gfp_mask & __GFP_NOMEMALLOC)) { | 894 | if (!(gfp_mask & __GFP_NOMEMALLOC)) { |
898 | nofail_alloc: | 895 | nofail_alloc: |
899 | /* go through the zonelist yet again, ignoring mins */ | 896 | /* go through the zonelist yet again, ignoring mins */ |
900 | for (i = 0; (z = zones[i]) != NULL; i++) { | 897 | page = get_page_from_freelist(gfp_mask, order, |
901 | if (!cpuset_zone_allowed(z, gfp_mask)) | 898 | zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET); |
902 | continue; | 899 | if (page) |
903 | page = buffered_rmqueue(z, order, gfp_mask); | 900 | goto got_pg; |
904 | if (page) | ||
905 | goto got_pg; | ||
906 | } | ||
907 | if (gfp_mask & __GFP_NOFAIL) { | 901 | if (gfp_mask & __GFP_NOFAIL) { |
908 | blk_congestion_wait(WRITE, HZ/50); | 902 | blk_congestion_wait(WRITE, HZ/50); |
909 | goto nofail_alloc; | 903 | goto nofail_alloc; |
@@ -924,7 +918,7 @@ rebalance: | |||
924 | reclaim_state.reclaimed_slab = 0; | 918 | reclaim_state.reclaimed_slab = 0; |
925 | p->reclaim_state = &reclaim_state; | 919 | p->reclaim_state = &reclaim_state; |
926 | 920 | ||
927 | did_some_progress = try_to_free_pages(zones, gfp_mask); | 921 | did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask); |
928 | 922 | ||
929 | p->reclaim_state = NULL; | 923 | p->reclaim_state = NULL; |
930 | p->flags &= ~PF_MEMALLOC; | 924 | p->flags &= ~PF_MEMALLOC; |
@@ -932,19 +926,10 @@ rebalance: | |||
932 | cond_resched(); | 926 | cond_resched(); |
933 | 927 | ||
934 | if (likely(did_some_progress)) { | 928 | if (likely(did_some_progress)) { |
935 | for (i = 0; (z = zones[i]) != NULL; i++) { | 929 | page = get_page_from_freelist(gfp_mask, order, |
936 | if (!zone_watermark_ok(z, order, z->pages_min, | 930 | zonelist, alloc_flags); |
937 | classzone_idx, can_try_harder, | 931 | if (page) |
938 | gfp_mask & __GFP_HIGH)) | 932 | goto got_pg; |
939 | continue; | ||
940 | |||
941 | if (!cpuset_zone_allowed(z, gfp_mask)) | ||
942 | continue; | ||
943 | |||
944 | page = buffered_rmqueue(z, order, gfp_mask); | ||
945 | if (page) | ||
946 | goto got_pg; | ||
947 | } | ||
948 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { | 933 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { |
949 | /* | 934 | /* |
950 | * Go through the zonelist yet one more time, keep | 935 | * Go through the zonelist yet one more time, keep |
@@ -952,18 +937,10 @@ rebalance: | |||
952 | * a parallel oom killing, we must fail if we're still | 937 | * a parallel oom killing, we must fail if we're still |
953 | * under heavy pressure. | 938 | * under heavy pressure. |
954 | */ | 939 | */ |
955 | for (i = 0; (z = zones[i]) != NULL; i++) { | 940 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
956 | if (!zone_watermark_ok(z, order, z->pages_high, | 941 | zonelist, ALLOC_CPUSET); |
957 | classzone_idx, 0, 0)) | 942 | if (page) |
958 | continue; | 943 | goto got_pg; |
959 | |||
960 | if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) | ||
961 | continue; | ||
962 | |||
963 | page = buffered_rmqueue(z, order, gfp_mask); | ||
964 | if (page) | ||
965 | goto got_pg; | ||
966 | } | ||
967 | 944 | ||
968 | out_of_memory(gfp_mask, order); | 945 | out_of_memory(gfp_mask, order); |
969 | goto restart; | 946 | goto restart; |
@@ -996,9 +973,7 @@ nopage: | |||
996 | dump_stack(); | 973 | dump_stack(); |
997 | show_mem(); | 974 | show_mem(); |
998 | } | 975 | } |
999 | return NULL; | ||
1000 | got_pg: | 976 | got_pg: |
1001 | zone_statistics(zonelist, z); | ||
1002 | return page; | 977 | return page; |
1003 | } | 978 | } |
1004 | 979 | ||