aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rjw@sisk.pl>2006-12-06 23:34:18 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:27 -0500
commit8357376d3df21b7d6f857931a57ac50da9c66e26 (patch)
treedaf2c369e9b79d24c1666323b3ae75189e482a4a
parentbf73bae6ba0dc4bd4f1e570feb34a06b72725af6 (diff)
[PATCH] swsusp: Improve handling of highmem
Currently swsusp saves the contents of highmem pages by copying them to the normal zone which is quite inefficient (eg. it requires two normal pages to be used for saving one highmem page). This may be improved by using highmem for saving the contents of saveable highmem pages. Namely, during the suspend phase of the suspend-resume cycle we try to allocate as many free highmem pages as there are saveable highmem pages. If there are not enough highmem image pages to store the contents of all of the saveable highmem pages, some of them will be stored in the "normal" memory. Next, we allocate as many free "normal" pages as needed to store the (remaining) image data. We use a memory bitmap to mark the allocated free pages (ie. highmem as well as "normal" image pages). Now, we use another memory bitmap to mark all of the saveable pages (highmem as well as "normal") and the contents of the saveable pages are copied into the image pages. Then, the second bitmap is used to save the pfns corresponding to the saveable pages and the first one is used to save their data. During the resume phase the pfns of the pages that were saveable during the suspend are loaded from the image and used to mark the "unsafe" page frames. Next, we try to allocate as many free highmem page frames as to load all of the image data that had been in the highmem before the suspend and we allocate so many free "normal" page frames that the total number of allocated free pages (highmem and "normal") is equal to the size of the image. While doing this we have to make sure that there will be some extra free "normal" and "safe" page frames for two lists of PBEs constructed later. Now, the image data are loaded, if possible, into their "original" page frames. The image data that cannot be written into their "original" page frames are loaded into "safe" page frames and their "original" kernel virtual addresses, as well as the addresses of the "safe" pages containing their copies, are stored in one of two lists of PBEs. One list of PBEs is for the copies of "normal" suspend pages (ie. "normal" pages that were saveable during the suspend) and it is used in the same way as previously (ie. by the architecture-dependent parts of swsusp). The other list of PBEs is for the copies of highmem suspend pages. The pages in this list are restored (in a reversible way) right before the arch-dependent code is called. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Cc: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/suspend.h9
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/snapshot.c851
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/power/swsusp.c53
-rw-r--r--kernel/power/user.c2
-rw-r--r--mm/vmscan.c3
7 files changed, 680 insertions, 242 deletions
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index b1237f16ecde..bf99bd49f8ef 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -9,10 +9,13 @@
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11 11
12/* page backup entry */ 12/* struct pbe is used for creating lists of pages that should be restored
13 * atomically during the resume from disk, because the page frames they have
14 * occupied before the suspend are in use.
15 */
13struct pbe { 16struct pbe {
14 unsigned long address; /* address of the copy */ 17 void *address; /* address of the copy */
15 unsigned long orig_address; /* original address of page */ 18 void *orig_address; /* original address of a page */
16 struct pbe *next; 19 struct pbe *next;
17}; 20};
18 21
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7dbfd9f67e1c..3763343bde2f 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -103,8 +103,8 @@ struct snapshot_handle {
103extern unsigned int snapshot_additional_pages(struct zone *zone); 103extern unsigned int snapshot_additional_pages(struct zone *zone);
104extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); 104extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
105extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); 105extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
106extern void snapshot_write_finalize(struct snapshot_handle *handle);
106extern int snapshot_image_loaded(struct snapshot_handle *handle); 107extern int snapshot_image_loaded(struct snapshot_handle *handle);
107extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
108 108
109/* 109/*
110 * This structure is used to pass the values needed for the identification 110 * This structure is used to pass the values needed for the identification
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 99f9b7d177d6..fd8251d40eb8 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1,15 +1,15 @@
1/* 1/*
2 * linux/kernel/power/snapshot.c 2 * linux/kernel/power/snapshot.c
3 * 3 *
4 * This file provide system snapshot/restore functionality. 4 * This file provides system snapshot/restore functionality for swsusp.
5 * 5 *
6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
7 * 8 *
8 * This file is released under the GPLv2, and is based on swsusp.c. 9 * This file is released under the GPLv2.
9 * 10 *
10 */ 11 */
11 12
12
13#include <linux/version.h> 13#include <linux/version.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
@@ -34,137 +34,24 @@
34 34
35#include "power.h" 35#include "power.h"
36 36
37/* List of PBEs used for creating and restoring the suspend image */ 37/* List of PBEs needed for restoring the pages that were allocated before
38 * the suspend and included in the suspend image, but have also been
39 * allocated by the "resume" kernel, so their contents cannot be written
40 * directly to their "original" page frames.
41 */
38struct pbe *restore_pblist; 42struct pbe *restore_pblist;
39 43
40static unsigned int nr_copy_pages; 44/* Pointer to an auxiliary buffer (1 page) */
41static unsigned int nr_meta_pages;
42static void *buffer; 45static void *buffer;
43 46
44#ifdef CONFIG_HIGHMEM
45unsigned int count_highmem_pages(void)
46{
47 struct zone *zone;
48 unsigned long zone_pfn;
49 unsigned int n = 0;
50
51 for_each_zone (zone)
52 if (is_highmem(zone)) {
53 mark_free_pages(zone);
54 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
55 struct page *page;
56 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
57 if (!pfn_valid(pfn))
58 continue;
59 page = pfn_to_page(pfn);
60 if (PageReserved(page))
61 continue;
62 if (PageNosaveFree(page))
63 continue;
64 n++;
65 }
66 }
67 return n;
68}
69
70struct highmem_page {
71 char *data;
72 struct page *page;
73 struct highmem_page *next;
74};
75
76static struct highmem_page *highmem_copy;
77
78static int save_highmem_zone(struct zone *zone)
79{
80 unsigned long zone_pfn;
81 mark_free_pages(zone);
82 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
83 struct page *page;
84 struct highmem_page *save;
85 void *kaddr;
86 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
87
88 if (!(pfn%10000))
89 printk(".");
90 if (!pfn_valid(pfn))
91 continue;
92 page = pfn_to_page(pfn);
93 /*
94 * This condition results from rvmalloc() sans vmalloc_32()
95 * and architectural memory reservations. This should be
96 * corrected eventually when the cases giving rise to this
97 * are better understood.
98 */
99 if (PageReserved(page))
100 continue;
101 BUG_ON(PageNosave(page));
102 if (PageNosaveFree(page))
103 continue;
104 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
105 if (!save)
106 return -ENOMEM;
107 save->next = highmem_copy;
108 save->page = page;
109 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
110 if (!save->data) {
111 kfree(save);
112 return -ENOMEM;
113 }
114 kaddr = kmap_atomic(page, KM_USER0);
115 memcpy(save->data, kaddr, PAGE_SIZE);
116 kunmap_atomic(kaddr, KM_USER0);
117 highmem_copy = save;
118 }
119 return 0;
120}
121
122int save_highmem(void)
123{
124 struct zone *zone;
125 int res = 0;
126
127 pr_debug("swsusp: Saving Highmem");
128 drain_local_pages();
129 for_each_zone (zone) {
130 if (is_highmem(zone))
131 res = save_highmem_zone(zone);
132 if (res)
133 return res;
134 }
135 printk("\n");
136 return 0;
137}
138
139int restore_highmem(void)
140{
141 printk("swsusp: Restoring Highmem\n");
142 while (highmem_copy) {
143 struct highmem_page *save = highmem_copy;
144 void *kaddr;
145 highmem_copy = save->next;
146
147 kaddr = kmap_atomic(save->page, KM_USER0);
148 memcpy(kaddr, save->data, PAGE_SIZE);
149 kunmap_atomic(kaddr, KM_USER0);
150 free_page((long) save->data);
151 kfree(save);
152 }
153 return 0;
154}
155#else
156static inline unsigned int count_highmem_pages(void) {return 0;}
157static inline int save_highmem(void) {return 0;}
158static inline int restore_highmem(void) {return 0;}
159#endif
160
161/** 47/**
162 * @safe_needed - on resume, for storing the PBE list and the image, 48 * @safe_needed - on resume, for storing the PBE list and the image,
163 * we can only use memory pages that do not conflict with the pages 49 * we can only use memory pages that do not conflict with the pages
164 * used before suspend. 50 * used before suspend. The unsafe pages have PageNosaveFree set
51 * and we count them using unsafe_pages.
165 * 52 *
166 * The unsafe pages are marked with the PG_nosave_free flag 53 * Each allocated image page is marked as PageNosave and PageNosaveFree
167 * and we count them using unsafe_pages 54 * so that swsusp_free() can release it.
168 */ 55 */
169 56
170#define PG_ANY 0 57#define PG_ANY 0
@@ -174,7 +61,7 @@ static inline int restore_highmem(void) {return 0;}
174 61
175static unsigned int allocated_unsafe_pages; 62static unsigned int allocated_unsafe_pages;
176 63
177static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) 64static void *get_image_page(gfp_t gfp_mask, int safe_needed)
178{ 65{
179 void *res; 66 void *res;
180 67
@@ -195,20 +82,38 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
195 82
196unsigned long get_safe_page(gfp_t gfp_mask) 83unsigned long get_safe_page(gfp_t gfp_mask)
197{ 84{
198 return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE); 85 return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
86}
87
88static struct page *alloc_image_page(gfp_t gfp_mask) {
89 struct page *page;
90
91 page = alloc_page(gfp_mask);
92 if (page) {
93 SetPageNosave(page);
94 SetPageNosaveFree(page);
95 }
96 return page;
199} 97}
200 98
201/** 99/**
202 * free_image_page - free page represented by @addr, allocated with 100 * free_image_page - free page represented by @addr, allocated with
203 * alloc_image_page (page flags set by it must be cleared) 101 * get_image_page (page flags set by it must be cleared)
204 */ 102 */
205 103
206static inline void free_image_page(void *addr, int clear_nosave_free) 104static inline void free_image_page(void *addr, int clear_nosave_free)
207{ 105{
208 ClearPageNosave(virt_to_page(addr)); 106 struct page *page;
107
108 BUG_ON(!virt_addr_valid(addr));
109
110 page = virt_to_page(addr);
111
112 ClearPageNosave(page);
209 if (clear_nosave_free) 113 if (clear_nosave_free)
210 ClearPageNosaveFree(virt_to_page(addr)); 114 ClearPageNosaveFree(page);
211 free_page((unsigned long)addr); 115
116 __free_page(page);
212} 117}
213 118
214/* struct linked_page is used to build chains of pages */ 119/* struct linked_page is used to build chains of pages */
@@ -269,7 +174,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
269 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 174 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
270 struct linked_page *lp; 175 struct linked_page *lp;
271 176
272 lp = alloc_image_page(ca->gfp_mask, ca->safe_needed); 177 lp = get_image_page(ca->gfp_mask, ca->safe_needed);
273 if (!lp) 178 if (!lp)
274 return NULL; 179 return NULL;
275 180
@@ -446,8 +351,8 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
446 351
447 /* Compute the number of zones */ 352 /* Compute the number of zones */
448 nr = 0; 353 nr = 0;
449 for_each_zone (zone) 354 for_each_zone(zone)
450 if (populated_zone(zone) && !is_highmem(zone)) 355 if (populated_zone(zone))
451 nr++; 356 nr++;
452 357
453 /* Allocate the list of zones bitmap objects */ 358 /* Allocate the list of zones bitmap objects */
@@ -459,10 +364,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
459 } 364 }
460 365
461 /* Initialize the zone bitmap objects */ 366 /* Initialize the zone bitmap objects */
462 for_each_zone (zone) { 367 for_each_zone(zone) {
463 unsigned long pfn; 368 unsigned long pfn;
464 369
465 if (!populated_zone(zone) || is_highmem(zone)) 370 if (!populated_zone(zone))
466 continue; 371 continue;
467 372
468 zone_bm->start_pfn = zone->zone_start_pfn; 373 zone_bm->start_pfn = zone->zone_start_pfn;
@@ -481,7 +386,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
481 while (bb) { 386 while (bb) {
482 unsigned long *ptr; 387 unsigned long *ptr;
483 388
484 ptr = alloc_image_page(gfp_mask, safe_needed); 389 ptr = get_image_page(gfp_mask, safe_needed);
485 bb->data = ptr; 390 bb->data = ptr;
486 if (!ptr) 391 if (!ptr)
487 goto Free; 392 goto Free;
@@ -669,9 +574,81 @@ unsigned int snapshot_additional_pages(struct zone *zone)
669 574
670 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 575 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
671 res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); 576 res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
672 return res; 577 return 2 * res;
673} 578}
674 579
580#ifdef CONFIG_HIGHMEM
581/**
582 * count_free_highmem_pages - compute the total number of free highmem
583 * pages, system-wide.
584 */
585
586static unsigned int count_free_highmem_pages(void)
587{
588 struct zone *zone;
589 unsigned int cnt = 0;
590
591 for_each_zone(zone)
592 if (populated_zone(zone) && is_highmem(zone))
593 cnt += zone->free_pages;
594
595 return cnt;
596}
597
598/**
599 * saveable_highmem_page - Determine whether a highmem page should be
600 * included in the suspend image.
601 *
602 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
603 * and it isn't a part of a free chunk of pages.
604 */
605
606static struct page *saveable_highmem_page(unsigned long pfn)
607{
608 struct page *page;
609
610 if (!pfn_valid(pfn))
611 return NULL;
612
613 page = pfn_to_page(pfn);
614
615 BUG_ON(!PageHighMem(page));
616
617 if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
618 return NULL;
619
620 return page;
621}
622
623/**
624 * count_highmem_pages - compute the total number of saveable highmem
625 * pages.
626 */
627
628unsigned int count_highmem_pages(void)
629{
630 struct zone *zone;
631 unsigned int n = 0;
632
633 for_each_zone(zone) {
634 unsigned long pfn, max_zone_pfn;
635
636 if (!is_highmem(zone))
637 continue;
638
639 mark_free_pages(zone);
640 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
641 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
642 if (saveable_highmem_page(pfn))
643 n++;
644 }
645 return n;
646}
647#else
648static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
649static inline unsigned int count_highmem_pages(void) { return 0; }
650#endif /* CONFIG_HIGHMEM */
651
675/** 652/**
676 * pfn_is_nosave - check if given pfn is in the 'nosave' section 653 * pfn_is_nosave - check if given pfn is in the 'nosave' section
677 */ 654 */
@@ -684,12 +661,12 @@ static inline int pfn_is_nosave(unsigned long pfn)
684} 661}
685 662
686/** 663/**
687 * saveable - Determine whether a page should be cloned or not. 664 * saveable - Determine whether a non-highmem page should be included in
688 * @pfn: The page 665 * the suspend image.
689 * 666 *
690 * We save a page if it isn't Nosave, and is not in the range of pages 667 * We should save the page if it isn't Nosave, and is not in the range
691 * statically defined as 'unsaveable', and it 668 * of pages statically defined as 'unsaveable', and it isn't a part of
692 * isn't a part of a free chunk of pages. 669 * a free chunk of pages.
693 */ 670 */
694 671
695static struct page *saveable_page(unsigned long pfn) 672static struct page *saveable_page(unsigned long pfn)
@@ -701,76 +678,130 @@ static struct page *saveable_page(unsigned long pfn)
701 678
702 page = pfn_to_page(pfn); 679 page = pfn_to_page(pfn);
703 680
704 if (PageNosave(page)) 681 BUG_ON(PageHighMem(page));
682
683 if (PageNosave(page) || PageNosaveFree(page))
705 return NULL; 684 return NULL;
685
706 if (PageReserved(page) && pfn_is_nosave(pfn)) 686 if (PageReserved(page) && pfn_is_nosave(pfn))
707 return NULL; 687 return NULL;
708 if (PageNosaveFree(page))
709 return NULL;
710 688
711 return page; 689 return page;
712} 690}
713 691
692/**
693 * count_data_pages - compute the total number of saveable non-highmem
694 * pages.
695 */
696
714unsigned int count_data_pages(void) 697unsigned int count_data_pages(void)
715{ 698{
716 struct zone *zone; 699 struct zone *zone;
717 unsigned long pfn, max_zone_pfn; 700 unsigned long pfn, max_zone_pfn;
718 unsigned int n = 0; 701 unsigned int n = 0;
719 702
720 for_each_zone (zone) { 703 for_each_zone(zone) {
721 if (is_highmem(zone)) 704 if (is_highmem(zone))
722 continue; 705 continue;
706
723 mark_free_pages(zone); 707 mark_free_pages(zone);
724 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 708 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
725 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 709 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
726 n += !!saveable_page(pfn); 710 if(saveable_page(pfn))
711 n++;
727 } 712 }
728 return n; 713 return n;
729} 714}
730 715
731static inline void copy_data_page(long *dst, long *src) 716/* This is needed, because copy_page and memcpy are not usable for copying
717 * task structs.
718 */
719static inline void do_copy_page(long *dst, long *src)
732{ 720{
733 int n; 721 int n;
734 722
735 /* copy_page and memcpy are not usable for copying task structs. */
736 for (n = PAGE_SIZE / sizeof(long); n; n--) 723 for (n = PAGE_SIZE / sizeof(long); n; n--)
737 *dst++ = *src++; 724 *dst++ = *src++;
738} 725}
739 726
727#ifdef CONFIG_HIGHMEM
728static inline struct page *
729page_is_saveable(struct zone *zone, unsigned long pfn)
730{
731 return is_highmem(zone) ?
732 saveable_highmem_page(pfn) : saveable_page(pfn);
733}
734
735static inline void
736copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
737{
738 struct page *s_page, *d_page;
739 void *src, *dst;
740
741 s_page = pfn_to_page(src_pfn);
742 d_page = pfn_to_page(dst_pfn);
743 if (PageHighMem(s_page)) {
744 src = kmap_atomic(s_page, KM_USER0);
745 dst = kmap_atomic(d_page, KM_USER1);
746 do_copy_page(dst, src);
747 kunmap_atomic(src, KM_USER0);
748 kunmap_atomic(dst, KM_USER1);
749 } else {
750 src = page_address(s_page);
751 if (PageHighMem(d_page)) {
752 /* Page pointed to by src may contain some kernel
753 * data modified by kmap_atomic()
754 */
755 do_copy_page(buffer, src);
756 dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
757 memcpy(dst, buffer, PAGE_SIZE);
758 kunmap_atomic(dst, KM_USER0);
759 } else {
760 dst = page_address(d_page);
761 do_copy_page(dst, src);
762 }
763 }
764}
765#else
766#define page_is_saveable(zone, pfn) saveable_page(pfn)
767
768static inline void
769copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
770{
771 do_copy_page(page_address(pfn_to_page(dst_pfn)),
772 page_address(pfn_to_page(src_pfn)));
773}
774#endif /* CONFIG_HIGHMEM */
775
740static void 776static void
741copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) 777copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
742{ 778{
743 struct zone *zone; 779 struct zone *zone;
744 unsigned long pfn; 780 unsigned long pfn;
745 781
746 for_each_zone (zone) { 782 for_each_zone(zone) {
747 unsigned long max_zone_pfn; 783 unsigned long max_zone_pfn;
748 784
749 if (is_highmem(zone))
750 continue;
751
752 mark_free_pages(zone); 785 mark_free_pages(zone);
753 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 786 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
754 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 787 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
755 if (saveable_page(pfn)) 788 if (page_is_saveable(zone, pfn))
756 memory_bm_set_bit(orig_bm, pfn); 789 memory_bm_set_bit(orig_bm, pfn);
757 } 790 }
758 memory_bm_position_reset(orig_bm); 791 memory_bm_position_reset(orig_bm);
759 memory_bm_position_reset(copy_bm); 792 memory_bm_position_reset(copy_bm);
760 do { 793 do {
761 pfn = memory_bm_next_pfn(orig_bm); 794 pfn = memory_bm_next_pfn(orig_bm);
762 if (likely(pfn != BM_END_OF_MAP)) { 795 if (likely(pfn != BM_END_OF_MAP))
763 struct page *page; 796 copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
764 void *src;
765
766 page = pfn_to_page(pfn);
767 src = page_address(page);
768 page = pfn_to_page(memory_bm_next_pfn(copy_bm));
769 copy_data_page(page_address(page), src);
770 }
771 } while (pfn != BM_END_OF_MAP); 797 } while (pfn != BM_END_OF_MAP);
772} 798}
773 799
800/* Total number of image pages */
801static unsigned int nr_copy_pages;
802/* Number of pages needed for saving the original pfns of the image pages */
803static unsigned int nr_meta_pages;
804
774/** 805/**
775 * swsusp_free - free pages allocated for the suspend. 806 * swsusp_free - free pages allocated for the suspend.
776 * 807 *
@@ -792,7 +823,7 @@ void swsusp_free(void)
792 if (PageNosave(page) && PageNosaveFree(page)) { 823 if (PageNosave(page) && PageNosaveFree(page)) {
793 ClearPageNosave(page); 824 ClearPageNosave(page);
794 ClearPageNosaveFree(page); 825 ClearPageNosaveFree(page);
795 free_page((long) page_address(page)); 826 __free_page(page);
796 } 827 }
797 } 828 }
798 } 829 }
@@ -802,34 +833,108 @@ void swsusp_free(void)
802 buffer = NULL; 833 buffer = NULL;
803} 834}
804 835
836#ifdef CONFIG_HIGHMEM
837/**
838 * count_pages_for_highmem - compute the number of non-highmem pages
839 * that will be necessary for creating copies of highmem pages.
840 */
841
842static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
843{
844 unsigned int free_highmem = count_free_highmem_pages();
845
846 if (free_highmem >= nr_highmem)
847 nr_highmem = 0;
848 else
849 nr_highmem -= free_highmem;
850
851 return nr_highmem;
852}
853#else
854static unsigned int
855count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
856#endif /* CONFIG_HIGHMEM */
805 857
806/** 858/**
807 * enough_free_mem - Make sure we enough free memory to snapshot. 859 * enough_free_mem - Make sure we have enough free memory for the
808 * 860 * snapshot image.
809 * Returns TRUE or FALSE after checking the number of available
810 * free pages.
811 */ 861 */
812 862
813static int enough_free_mem(unsigned int nr_pages) 863static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
814{ 864{
815 struct zone *zone; 865 struct zone *zone;
816 unsigned int free = 0, meta = 0; 866 unsigned int free = 0, meta = 0;
817 867
818 for_each_zone (zone) 868 for_each_zone(zone) {
819 if (!is_highmem(zone)) { 869 meta += snapshot_additional_pages(zone);
870 if (!is_highmem(zone))
820 free += zone->free_pages; 871 free += zone->free_pages;
821 meta += snapshot_additional_pages(zone); 872 }
822 }
823 873
824 pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", 874 nr_pages += count_pages_for_highmem(nr_highmem);
875 pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n",
825 nr_pages, PAGES_FOR_IO, meta, free); 876 nr_pages, PAGES_FOR_IO, meta, free);
826 877
827 return free > nr_pages + PAGES_FOR_IO + meta; 878 return free > nr_pages + PAGES_FOR_IO + meta;
828} 879}
829 880
881#ifdef CONFIG_HIGHMEM
882/**
883 * get_highmem_buffer - if there are some highmem pages in the suspend
884 * image, we may need the buffer to copy them and/or load their data.
885 */
886
887static inline int get_highmem_buffer(int safe_needed)
888{
889 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
890 return buffer ? 0 : -ENOMEM;
891}
892
893/**
894 * alloc_highmem_image_pages - allocate some highmem pages for the image.
895 * Try to allocate as many pages as needed, but if the number of free
896 * highmem pages is lesser than that, allocate them all.
897 */
898
899static inline unsigned int
900alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
901{
902 unsigned int to_alloc = count_free_highmem_pages();
903
904 if (to_alloc > nr_highmem)
905 to_alloc = nr_highmem;
906
907 nr_highmem -= to_alloc;
908 while (to_alloc-- > 0) {
909 struct page *page;
910
911 page = alloc_image_page(__GFP_HIGHMEM);
912 memory_bm_set_bit(bm, page_to_pfn(page));
913 }
914 return nr_highmem;
915}
916#else
917static inline int get_highmem_buffer(int safe_needed) { return 0; }
918
919static inline unsigned int
920alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
921#endif /* CONFIG_HIGHMEM */
922
923/**
924 * swsusp_alloc - allocate memory for the suspend image
925 *
926 * We first try to allocate as many highmem pages as there are
927 * saveable highmem pages in the system. If that fails, we allocate
928 * non-highmem pages for the copies of the remaining highmem ones.
929 *
930 * In this approach it is likely that the copies of highmem pages will
931 * also be located in the high memory, because of the way in which
932 * copy_data_pages() works.
933 */
934
830static int 935static int
831swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 936swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
832 unsigned int nr_pages) 937 unsigned int nr_pages, unsigned int nr_highmem)
833{ 938{
834 int error; 939 int error;
835 940
@@ -841,13 +946,19 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
841 if (error) 946 if (error)
842 goto Free; 947 goto Free;
843 948
949 if (nr_highmem > 0) {
950 error = get_highmem_buffer(PG_ANY);
951 if (error)
952 goto Free;
953
954 nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
955 }
844 while (nr_pages-- > 0) { 956 while (nr_pages-- > 0) {
845 struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD); 957 struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
958
846 if (!page) 959 if (!page)
847 goto Free; 960 goto Free;
848 961
849 SetPageNosave(page);
850 SetPageNosaveFree(page);
851 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 962 memory_bm_set_bit(copy_bm, page_to_pfn(page));
852 } 963 }
853 return 0; 964 return 0;
@@ -857,30 +968,39 @@ Free:
857 return -ENOMEM; 968 return -ENOMEM;
858} 969}
859 970
860/* Memory bitmap used for marking saveable pages */ 971/* Memory bitmap used for marking saveable pages (during suspend) or the
972 * suspend image pages (during resume)
973 */
861static struct memory_bitmap orig_bm; 974static struct memory_bitmap orig_bm;
862/* Memory bitmap used for marking allocated pages that will contain the copies 975/* Memory bitmap used on suspend for marking allocated pages that will contain
863 * of saveable pages 976 * the copies of saveable pages. During resume it is initially used for
977 * marking the suspend image pages, but then its set bits are duplicated in
978 * @orig_bm and it is released. Next, on systems with high memory, it may be
979 * used for marking "safe" highmem pages, but it has to be reinitialized for
980 * this purpose.
864 */ 981 */
865static struct memory_bitmap copy_bm; 982static struct memory_bitmap copy_bm;
866 983
867asmlinkage int swsusp_save(void) 984asmlinkage int swsusp_save(void)
868{ 985{
869 unsigned int nr_pages; 986 unsigned int nr_pages, nr_highmem;
870 987
871 pr_debug("swsusp: critical section: \n"); 988 printk("swsusp: critical section: \n");
872 989
873 drain_local_pages(); 990 drain_local_pages();
874 nr_pages = count_data_pages(); 991 nr_pages = count_data_pages();
875 printk("swsusp: Need to copy %u pages\n", nr_pages); 992 nr_highmem = count_highmem_pages();
993 printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem);
876 994
877 if (!enough_free_mem(nr_pages)) { 995 if (!enough_free_mem(nr_pages, nr_highmem)) {
878 printk(KERN_ERR "swsusp: Not enough free memory\n"); 996 printk(KERN_ERR "swsusp: Not enough free memory\n");
879 return -ENOMEM; 997 return -ENOMEM;
880 } 998 }
881 999
882 if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages)) 1000 if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1001 printk(KERN_ERR "swsusp: Memory allocation failed\n");
883 return -ENOMEM; 1002 return -ENOMEM;
1003 }
884 1004
885 /* During allocating of suspend pagedir, new cold pages may appear. 1005 /* During allocating of suspend pagedir, new cold pages may appear.
886 * Kill them. 1006 * Kill them.
@@ -894,10 +1014,12 @@ asmlinkage int swsusp_save(void)
894 * touch swap space! Except we must write out our image of course. 1014 * touch swap space! Except we must write out our image of course.
895 */ 1015 */
896 1016
1017 nr_pages += nr_highmem;
897 nr_copy_pages = nr_pages; 1018 nr_copy_pages = nr_pages;
898 nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1019 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
899 1020
900 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); 1021 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
1022
901 return 0; 1023 return 0;
902} 1024}
903 1025
@@ -960,7 +1082,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
960 1082
961 if (!buffer) { 1083 if (!buffer) {
962 /* This makes the buffer be freed by swsusp_free() */ 1084 /* This makes the buffer be freed by swsusp_free() */
963 buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); 1085 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
964 if (!buffer) 1086 if (!buffer)
965 return -ENOMEM; 1087 return -ENOMEM;
966 } 1088 }
@@ -975,9 +1097,23 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
975 memset(buffer, 0, PAGE_SIZE); 1097 memset(buffer, 0, PAGE_SIZE);
976 pack_pfns(buffer, &orig_bm); 1098 pack_pfns(buffer, &orig_bm);
977 } else { 1099 } else {
978 unsigned long pfn = memory_bm_next_pfn(&copy_bm); 1100 struct page *page;
979 1101
980 handle->buffer = page_address(pfn_to_page(pfn)); 1102 page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1103 if (PageHighMem(page)) {
1104 /* Highmem pages are copied to the buffer,
1105 * because we can't return with a kmapped
1106 * highmem page (we may not be called again).
1107 */
1108 void *kaddr;
1109
1110 kaddr = kmap_atomic(page, KM_USER0);
1111 memcpy(buffer, kaddr, PAGE_SIZE);
1112 kunmap_atomic(kaddr, KM_USER0);
1113 handle->buffer = buffer;
1114 } else {
1115 handle->buffer = page_address(page);
1116 }
981 } 1117 }
982 handle->prev = handle->cur; 1118 handle->prev = handle->cur;
983 } 1119 }
@@ -1005,7 +1141,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
1005 unsigned long pfn, max_zone_pfn; 1141 unsigned long pfn, max_zone_pfn;
1006 1142
1007 /* Clear page flags */ 1143 /* Clear page flags */
1008 for_each_zone (zone) { 1144 for_each_zone(zone) {
1009 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 1145 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1010 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1146 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1011 if (pfn_valid(pfn)) 1147 if (pfn_valid(pfn))
@@ -1101,6 +1237,218 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1101 } 1237 }
1102} 1238}
1103 1239
1240/* List of "safe" pages that may be used to store data loaded from the suspend
1241 * image
1242 */
1243static struct linked_page *safe_pages_list;
1244
1245#ifdef CONFIG_HIGHMEM
1246/* struct highmem_pbe is used for creating the list of highmem pages that
1247 * should be restored atomically during the resume from disk, because the page
1248 * frames they have occupied before the suspend are in use.
1249 */
1250struct highmem_pbe {
1251 struct page *copy_page; /* data is here now */
1252 struct page *orig_page; /* data was here before the suspend */
1253 struct highmem_pbe *next;
1254};
1255
1256/* List of highmem PBEs needed for restoring the highmem pages that were
1257 * allocated before the suspend and included in the suspend image, but have
1258 * also been allocated by the "resume" kernel, so their contents cannot be
1259 * written directly to their "original" page frames.
1260 */
1261static struct highmem_pbe *highmem_pblist;
1262
1263/**
1264 * count_highmem_image_pages - compute the number of highmem pages in the
1265 * suspend image. The bits in the memory bitmap @bm that correspond to the
1266 * image pages are assumed to be set.
1267 */
1268
1269static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1270{
1271 unsigned long pfn;
1272 unsigned int cnt = 0;
1273
1274 memory_bm_position_reset(bm);
1275 pfn = memory_bm_next_pfn(bm);
1276 while (pfn != BM_END_OF_MAP) {
1277 if (PageHighMem(pfn_to_page(pfn)))
1278 cnt++;
1279
1280 pfn = memory_bm_next_pfn(bm);
1281 }
1282 return cnt;
1283}
1284
1285/**
1286 * prepare_highmem_image - try to allocate as many highmem pages as
1287 * there are highmem image pages (@nr_highmem_p points to the variable
1288 * containing the number of highmem image pages). The pages that are
1289 * "safe" (ie. will not be overwritten when the suspend image is
1290 * restored) have the corresponding bits set in @bm (it must be
1291 * unitialized).
1292 *
1293 * NOTE: This function should not be called if there are no highmem
1294 * image pages.
1295 */
1296
1297static unsigned int safe_highmem_pages;
1298
1299static struct memory_bitmap *safe_highmem_bm;
1300
1301static int
1302prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1303{
1304 unsigned int to_alloc;
1305
1306 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1307 return -ENOMEM;
1308
1309 if (get_highmem_buffer(PG_SAFE))
1310 return -ENOMEM;
1311
1312 to_alloc = count_free_highmem_pages();
1313 if (to_alloc > *nr_highmem_p)
1314 to_alloc = *nr_highmem_p;
1315 else
1316 *nr_highmem_p = to_alloc;
1317
1318 safe_highmem_pages = 0;
1319 while (to_alloc-- > 0) {
1320 struct page *page;
1321
1322 page = alloc_page(__GFP_HIGHMEM);
1323 if (!PageNosaveFree(page)) {
1324 /* The page is "safe", set its bit the bitmap */
1325 memory_bm_set_bit(bm, page_to_pfn(page));
1326 safe_highmem_pages++;
1327 }
1328 /* Mark the page as allocated */
1329 SetPageNosave(page);
1330 SetPageNosaveFree(page);
1331 }
1332 memory_bm_position_reset(bm);
1333 safe_highmem_bm = bm;
1334 return 0;
1335}
1336
1337/**
1338 * get_highmem_page_buffer - for given highmem image page find the buffer
1339 * that suspend_write_next() should set for its caller to write to.
1340 *
1341 * If the page is to be saved to its "original" page frame or a copy of
1342 * the page is to be made in the highmem, @buffer is returned. Otherwise,
1343 * the copy of the page is to be made in normal memory, so the address of
1344 * the copy is returned.
1345 *
1346 * If @buffer is returned, the caller of suspend_write_next() will write
1347 * the page's contents to @buffer, so they will have to be copied to the
1348 * right location on the next call to suspend_write_next() and it is done
1349 * with the help of copy_last_highmem_page(). For this purpose, if
1350 * @buffer is returned, @last_highmem page is set to the page to which
1351 * the data will have to be copied from @buffer.
1352 */
1353
1354static struct page *last_highmem_page;
1355
1356static void *
1357get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1358{
1359 struct highmem_pbe *pbe;
1360 void *kaddr;
1361
1362 if (PageNosave(page) && PageNosaveFree(page)) {
1363 /* We have allocated the "original" page frame and we can
1364 * use it directly to store the loaded page.
1365 */
1366 last_highmem_page = page;
1367 return buffer;
1368 }
1369 /* The "original" page frame has not been allocated and we have to
1370 * use a "safe" page frame to store the loaded page.
1371 */
1372 pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1373 if (!pbe) {
1374 swsusp_free();
1375 return NULL;
1376 }
1377 pbe->orig_page = page;
1378 if (safe_highmem_pages > 0) {
1379 struct page *tmp;
1380
1381 /* Copy of the page will be stored in high memory */
1382 kaddr = buffer;
1383 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1384 safe_highmem_pages--;
1385 last_highmem_page = tmp;
1386 pbe->copy_page = tmp;
1387 } else {
1388 /* Copy of the page will be stored in normal memory */
1389 kaddr = safe_pages_list;
1390 safe_pages_list = safe_pages_list->next;
1391 pbe->copy_page = virt_to_page(kaddr);
1392 }
1393 pbe->next = highmem_pblist;
1394 highmem_pblist = pbe;
1395 return kaddr;
1396}
1397
1398/**
1399 * copy_last_highmem_page - copy the contents of a highmem image from
1400 * @buffer, where the caller of snapshot_write_next() has place them,
1401 * to the right location represented by @last_highmem_page .
1402 */
1403
1404static void copy_last_highmem_page(void)
1405{
1406 if (last_highmem_page) {
1407 void *dst;
1408
1409 dst = kmap_atomic(last_highmem_page, KM_USER0);
1410 memcpy(dst, buffer, PAGE_SIZE);
1411 kunmap_atomic(dst, KM_USER0);
1412 last_highmem_page = NULL;
1413 }
1414}
1415
1416static inline int last_highmem_page_copied(void)
1417{
1418 return !last_highmem_page;
1419}
1420
1421static inline void free_highmem_data(void)
1422{
1423 if (safe_highmem_bm)
1424 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1425
1426 if (buffer)
1427 free_image_page(buffer, PG_UNSAFE_CLEAR);
1428}
1429#else
1430static inline int get_safe_write_buffer(void) { return 0; }
1431
1432static unsigned int
1433count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1434
1435static inline int
1436prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1437{
1438 return 0;
1439}
1440
1441static inline void *
1442get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1443{
1444 return NULL;
1445}
1446
1447static inline void copy_last_highmem_page(void) {}
1448static inline int last_highmem_page_copied(void) { return 1; }
1449static inline void free_highmem_data(void) {}
1450#endif /* CONFIG_HIGHMEM */
1451
1104/** 1452/**
1105 * prepare_image - use the memory bitmap @bm to mark the pages that will 1453 * prepare_image - use the memory bitmap @bm to mark the pages that will
1106 * be overwritten in the process of restoring the system memory state 1454 * be overwritten in the process of restoring the system memory state
@@ -1110,20 +1458,25 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1110 * The idea is to allocate a new memory bitmap first and then allocate 1458 * The idea is to allocate a new memory bitmap first and then allocate
1111 * as many pages as needed for the image data, but not to assign these 1459 * as many pages as needed for the image data, but not to assign these
1112 * pages to specific tasks initially. Instead, we just mark them as 1460 * pages to specific tasks initially. Instead, we just mark them as
1113 * allocated and create a list of "safe" pages that will be used later. 1461 * allocated and create a lists of "safe" pages that will be used
1462 * later. On systems with high memory a list of "safe" highmem pages is
1463 * also created.
1114 */ 1464 */
1115 1465
1116#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 1466#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
1117 1467
1118static struct linked_page *safe_pages_list;
1119
1120static int 1468static int
1121prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 1469prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1122{ 1470{
1123 unsigned int nr_pages; 1471 unsigned int nr_pages, nr_highmem;
1124 struct linked_page *sp_list, *lp; 1472 struct linked_page *sp_list, *lp;
1125 int error; 1473 int error;
1126 1474
1475 /* If there is no highmem, the buffer will not be necessary */
1476 free_image_page(buffer, PG_UNSAFE_CLEAR);
1477 buffer = NULL;
1478
1479 nr_highmem = count_highmem_image_pages(bm);
1127 error = mark_unsafe_pages(bm); 1480 error = mark_unsafe_pages(bm);
1128 if (error) 1481 if (error)
1129 goto Free; 1482 goto Free;
@@ -1134,6 +1487,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1134 1487
1135 duplicate_memory_bitmap(new_bm, bm); 1488 duplicate_memory_bitmap(new_bm, bm);
1136 memory_bm_free(bm, PG_UNSAFE_KEEP); 1489 memory_bm_free(bm, PG_UNSAFE_KEEP);
1490 if (nr_highmem > 0) {
1491 error = prepare_highmem_image(bm, &nr_highmem);
1492 if (error)
1493 goto Free;
1494 }
1137 /* Reserve some safe pages for potential later use. 1495 /* Reserve some safe pages for potential later use.
1138 * 1496 *
1139 * NOTE: This way we make sure there will be enough safe pages for the 1497 * NOTE: This way we make sure there will be enough safe pages for the
@@ -1142,10 +1500,10 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1142 */ 1500 */
1143 sp_list = NULL; 1501 sp_list = NULL;
1144 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ 1502 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1145 nr_pages = nr_copy_pages - allocated_unsafe_pages; 1503 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1146 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 1504 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1147 while (nr_pages > 0) { 1505 while (nr_pages > 0) {
1148 lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); 1506 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
1149 if (!lp) { 1507 if (!lp) {
1150 error = -ENOMEM; 1508 error = -ENOMEM;
1151 goto Free; 1509 goto Free;
@@ -1156,7 +1514,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1156 } 1514 }
1157 /* Preallocate memory for the image */ 1515 /* Preallocate memory for the image */
1158 safe_pages_list = NULL; 1516 safe_pages_list = NULL;
1159 nr_pages = nr_copy_pages - allocated_unsafe_pages; 1517 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1160 while (nr_pages > 0) { 1518 while (nr_pages > 0) {
1161 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 1519 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
1162 if (!lp) { 1520 if (!lp) {
@@ -1196,6 +1554,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1196 struct pbe *pbe; 1554 struct pbe *pbe;
1197 struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); 1555 struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
1198 1556
1557 if (PageHighMem(page))
1558 return get_highmem_page_buffer(page, ca);
1559
1199 if (PageNosave(page) && PageNosaveFree(page)) 1560 if (PageNosave(page) && PageNosaveFree(page))
1200 /* We have allocated the "original" page frame and we can 1561 /* We have allocated the "original" page frame and we can
1201 * use it directly to store the loaded page. 1562 * use it directly to store the loaded page.
@@ -1210,12 +1571,12 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1210 swsusp_free(); 1571 swsusp_free();
1211 return NULL; 1572 return NULL;
1212 } 1573 }
1213 pbe->orig_address = (unsigned long)page_address(page); 1574 pbe->orig_address = page_address(page);
1214 pbe->address = (unsigned long)safe_pages_list; 1575 pbe->address = safe_pages_list;
1215 safe_pages_list = safe_pages_list->next; 1576 safe_pages_list = safe_pages_list->next;
1216 pbe->next = restore_pblist; 1577 pbe->next = restore_pblist;
1217 restore_pblist = pbe; 1578 restore_pblist = pbe;
1218 return (void *)pbe->address; 1579 return pbe->address;
1219} 1580}
1220 1581
1221/** 1582/**
@@ -1249,14 +1610,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1249 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) 1610 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1250 return 0; 1611 return 0;
1251 1612
1252 if (!buffer) { 1613 if (handle->offset == 0) {
1253 /* This makes the buffer be freed by swsusp_free() */ 1614 if (!buffer)
1254 buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); 1615 /* This makes the buffer be freed by swsusp_free() */
1616 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1617
1255 if (!buffer) 1618 if (!buffer)
1256 return -ENOMEM; 1619 return -ENOMEM;
1257 } 1620
1258 if (!handle->offset)
1259 handle->buffer = buffer; 1621 handle->buffer = buffer;
1622 }
1260 handle->sync_read = 1; 1623 handle->sync_read = 1;
1261 if (handle->prev < handle->cur) { 1624 if (handle->prev < handle->cur) {
1262 if (handle->prev == 0) { 1625 if (handle->prev == 0) {
@@ -1284,8 +1647,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1284 return -ENOMEM; 1647 return -ENOMEM;
1285 } 1648 }
1286 } else { 1649 } else {
1650 copy_last_highmem_page();
1287 handle->buffer = get_buffer(&orig_bm, &ca); 1651 handle->buffer = get_buffer(&orig_bm, &ca);
1288 handle->sync_read = 0; 1652 if (handle->buffer != buffer)
1653 handle->sync_read = 0;
1289 } 1654 }
1290 handle->prev = handle->cur; 1655 handle->prev = handle->cur;
1291 } 1656 }
@@ -1301,15 +1666,73 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1301 return count; 1666 return count;
1302} 1667}
1303 1668
1669/**
1670 * snapshot_write_finalize - must be called after the last call to
1671 * snapshot_write_next() in case the last page in the image happens
1672 * to be a highmem page and its contents should be stored in the
1673 * highmem. Additionally, it releases the memory that will not be
1674 * used any more.
1675 */
1676
1677void snapshot_write_finalize(struct snapshot_handle *handle)
1678{
1679 copy_last_highmem_page();
1680 /* Free only if we have loaded the image entirely */
1681 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
1682 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
1683 free_highmem_data();
1684 }
1685}
1686
1304int snapshot_image_loaded(struct snapshot_handle *handle) 1687int snapshot_image_loaded(struct snapshot_handle *handle)
1305{ 1688{
1306 return !(!nr_copy_pages || 1689 return !(!nr_copy_pages || !last_highmem_page_copied() ||
1307 handle->cur <= nr_meta_pages + nr_copy_pages); 1690 handle->cur <= nr_meta_pages + nr_copy_pages);
1308} 1691}
1309 1692
1310void snapshot_free_unused_memory(struct snapshot_handle *handle) 1693#ifdef CONFIG_HIGHMEM
1694/* Assumes that @buf is ready and points to a "safe" page */
1695static inline void
1696swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
1311{ 1697{
1312 /* Free only if we have loaded the image entirely */ 1698 void *kaddr1, *kaddr2;
1313 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) 1699
1314 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 1700 kaddr1 = kmap_atomic(p1, KM_USER0);
1701 kaddr2 = kmap_atomic(p2, KM_USER1);
1702 memcpy(buf, kaddr1, PAGE_SIZE);
1703 memcpy(kaddr1, kaddr2, PAGE_SIZE);
1704 memcpy(kaddr2, buf, PAGE_SIZE);
1705 kunmap_atomic(kaddr1, KM_USER0);
1706 kunmap_atomic(kaddr2, KM_USER1);
1707}
1708
1709/**
1710 * restore_highmem - for each highmem page that was allocated before
1711 * the suspend and included in the suspend image, and also has been
1712 * allocated by the "resume" kernel swap its current (ie. "before
1713 * resume") contents with the previous (ie. "before suspend") one.
1714 *
1715 * If the resume eventually fails, we can call this function once
1716 * again and restore the "before resume" highmem state.
1717 */
1718
1719int restore_highmem(void)
1720{
1721 struct highmem_pbe *pbe = highmem_pblist;
1722 void *buf;
1723
1724 if (!pbe)
1725 return 0;
1726
1727 buf = get_image_page(GFP_ATOMIC, PG_SAFE);
1728 if (!buf)
1729 return -ENOMEM;
1730
1731 while (pbe) {
1732 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
1733 pbe = pbe->next;
1734 }
1735 free_image_page(buf, PG_UNSAFE_CLEAR);
1736 return 0;
1315} 1737}
1738#endif /* CONFIG_HIGHMEM */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index aa5a9bff01f1..cbd187e90410 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -558,7 +558,7 @@ static int load_image(struct swap_map_handle *handle,
558 error = err2; 558 error = err2;
559 if (!error) { 559 if (!error) {
560 printk("\b\b\b\bdone\n"); 560 printk("\b\b\b\bdone\n");
561 snapshot_free_unused_memory(snapshot); 561 snapshot_write_finalize(snapshot);
562 if (!snapshot_image_loaded(snapshot)) 562 if (!snapshot_image_loaded(snapshot))
563 error = -ENODATA; 563 error = -ENODATA;
564 } 564 }
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 4147a756a8c7..68de5c1dbd78 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -64,10 +64,8 @@ int in_suspend __nosavedata = 0;
64 64
65#ifdef CONFIG_HIGHMEM 65#ifdef CONFIG_HIGHMEM
66unsigned int count_highmem_pages(void); 66unsigned int count_highmem_pages(void);
67int save_highmem(void);
68int restore_highmem(void); 67int restore_highmem(void);
69#else 68#else
70static inline int save_highmem(void) { return 0; }
71static inline int restore_highmem(void) { return 0; } 69static inline int restore_highmem(void) { return 0; }
72static inline unsigned int count_highmem_pages(void) { return 0; } 70static inline unsigned int count_highmem_pages(void) { return 0; }
73#endif 71#endif
@@ -184,7 +182,7 @@ static inline unsigned long __shrink_memory(long tmp)
184 182
185int swsusp_shrink_memory(void) 183int swsusp_shrink_memory(void)
186{ 184{
187 long size, tmp; 185 long tmp;
188 struct zone *zone; 186 struct zone *zone;
189 unsigned long pages = 0; 187 unsigned long pages = 0;
190 unsigned int i = 0; 188 unsigned int i = 0;
@@ -192,15 +190,27 @@ int swsusp_shrink_memory(void)
192 190
193 printk("Shrinking memory... "); 191 printk("Shrinking memory... ");
194 do { 192 do {
195 size = 2 * count_highmem_pages(); 193 long size, highmem_size;
196 size += size / 50 + count_data_pages() + PAGES_FOR_IO; 194
195 highmem_size = count_highmem_pages();
196 size = count_data_pages() + PAGES_FOR_IO;
197 tmp = size; 197 tmp = size;
198 size += highmem_size;
198 for_each_zone (zone) 199 for_each_zone (zone)
199 if (!is_highmem(zone) && populated_zone(zone)) { 200 if (populated_zone(zone)) {
200 tmp -= zone->free_pages; 201 if (is_highmem(zone)) {
201 tmp += zone->lowmem_reserve[ZONE_NORMAL]; 202 highmem_size -= zone->free_pages;
202 tmp += snapshot_additional_pages(zone); 203 } else {
204 tmp -= zone->free_pages;
205 tmp += zone->lowmem_reserve[ZONE_NORMAL];
206 tmp += snapshot_additional_pages(zone);
207 }
203 } 208 }
209
210 if (highmem_size < 0)
211 highmem_size = 0;
212
213 tmp += highmem_size;
204 if (tmp > 0) { 214 if (tmp > 0) {
205 tmp = __shrink_memory(tmp); 215 tmp = __shrink_memory(tmp);
206 if (!tmp) 216 if (!tmp)
@@ -223,6 +233,7 @@ int swsusp_suspend(void)
223 233
224 if ((error = arch_prepare_suspend())) 234 if ((error = arch_prepare_suspend()))
225 return error; 235 return error;
236
226 local_irq_disable(); 237 local_irq_disable();
227 /* At this point, device_suspend() has been called, but *not* 238 /* At this point, device_suspend() has been called, but *not*
228 * device_power_down(). We *must* device_power_down() now. 239 * device_power_down(). We *must* device_power_down() now.
@@ -235,18 +246,11 @@ int swsusp_suspend(void)
235 goto Enable_irqs; 246 goto Enable_irqs;
236 } 247 }
237 248
238 if ((error = save_highmem())) {
239 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
240 goto Restore_highmem;
241 }
242
243 save_processor_state(); 249 save_processor_state();
244 if ((error = swsusp_arch_suspend())) 250 if ((error = swsusp_arch_suspend()))
245 printk(KERN_ERR "Error %d suspending\n", error); 251 printk(KERN_ERR "Error %d suspending\n", error);
246 /* Restore control flow magically appears here */ 252 /* Restore control flow magically appears here */
247 restore_processor_state(); 253 restore_processor_state();
248Restore_highmem:
249 restore_highmem();
250 /* NOTE: device_power_up() is just a resume() for devices 254 /* NOTE: device_power_up() is just a resume() for devices
251 * that suspended with irqs off ... no overall powerup. 255 * that suspended with irqs off ... no overall powerup.
252 */ 256 */
@@ -268,18 +272,23 @@ int swsusp_resume(void)
268 printk(KERN_ERR "Some devices failed to power down, very bad\n"); 272 printk(KERN_ERR "Some devices failed to power down, very bad\n");
269 /* We'll ignore saved state, but this gets preempt count (etc) right */ 273 /* We'll ignore saved state, but this gets preempt count (etc) right */
270 save_processor_state(); 274 save_processor_state();
271 error = swsusp_arch_resume(); 275 error = restore_highmem();
272 /* Code below is only ever reached in case of failure. Otherwise 276 if (!error) {
273 * execution continues at place where swsusp_arch_suspend was called 277 error = swsusp_arch_resume();
274 */ 278 /* The code below is only ever reached in case of a failure.
275 BUG_ON(!error); 279 * Otherwise execution continues at place where
280 * swsusp_arch_suspend() was called
281 */
282 BUG_ON(!error);
283 /* This call to restore_highmem() undos the previous one */
284 restore_highmem();
285 }
276 /* The only reason why swsusp_arch_resume() can fail is memory being 286 /* The only reason why swsusp_arch_resume() can fail is memory being
277 * very tight, so we have to free it as soon as we can to avoid 287 * very tight, so we have to free it as soon as we can to avoid
278 * subsequent failures 288 * subsequent failures
279 */ 289 */
280 swsusp_free(); 290 swsusp_free();
281 restore_processor_state(); 291 restore_processor_state();
282 restore_highmem();
283 touch_softlockup_watchdog(); 292 touch_softlockup_watchdog();
284 device_power_up(); 293 device_power_up();
285 local_irq_enable(); 294 local_irq_enable();
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 05c58a2c0dd4..a63b25c63b49 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -194,12 +194,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
194 break; 194 break;
195 195
196 case SNAPSHOT_ATOMIC_RESTORE: 196 case SNAPSHOT_ATOMIC_RESTORE:
197 snapshot_write_finalize(&data->handle);
197 if (data->mode != O_WRONLY || !data->frozen || 198 if (data->mode != O_WRONLY || !data->frozen ||
198 !snapshot_image_loaded(&data->handle)) { 199 !snapshot_image_loaded(&data->handle)) {
199 error = -EPERM; 200 error = -EPERM;
200 break; 201 break;
201 } 202 }
202 snapshot_free_unused_memory(&data->handle);
203 down(&pm_sem); 203 down(&pm_sem);
204 pm_prepare_console(); 204 pm_prepare_console();
205 suspend_console(); 205 suspend_console();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2e97baa3b2aa..2a6a79f68138 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1260,6 +1260,9 @@ out:
1260 } 1260 }
1261 if (!all_zones_ok) { 1261 if (!all_zones_ok) {
1262 cond_resched(); 1262 cond_resched();
1263
1264 try_to_freeze();
1265
1263 goto loop_again; 1266 goto loop_again;
1264 } 1267 }
1265 1268