diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-04 15:28:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-04 15:28:45 -0400 |
commit | a3fe778c7895cd847d23c25ad566d83346282a77 (patch) | |
tree | 53ba1b736dac08680a08753f7163a1890d7a0d03 /mm/swapfile.c | |
parent | 9171c670b4915e30360c2aed530b8377fbbcc852 (diff) | |
parent | 165c8aed5bbc6bdddbccae0ba9db451732558ff9 (diff) |
Merge tag 'stable/frontswap.v16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/mm
Pull frontswap feature from Konrad Rzeszutek Wilk:
"Frontswap provides a "transcendent memory" interface for swap pages.
In some environments, dramatic performance savings may be obtained
because swapped pages are saved in RAM (or a RAM-like device) instead
of a swap disk. This tag provides the basic infrastructure along with
some changes to the existing backends."
Fix up trivial conflict in mm/Makefile due to removal of swap token code
changing a line next to the new frontswap entry.
This pull request came in before the merge window even opened, it got
delayed to after the merge window by me just wanting to make sure it had
actual users. Apparently IBM is using this on their embedded side, and
Jan Beulich says that it's already made available for SLES and OpenSUSE
users.
Also acked by Rik van Riel, and Konrad points to other people liking it
too. So in it goes.
By Dan Magenheimer (4) and Konrad Rzeszutek Wilk (2)
via Konrad Rzeszutek Wilk
* tag 'stable/frontswap.v16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/mm:
frontswap: s/put_page/store/g s/get_page/load
MAINTAINER: Add myself for the frontswap API
mm: frontswap: config and doc files
mm: frontswap: core frontswap functionality
mm: frontswap: core swap subsystem hooks and headers
mm: frontswap: add frontswap header file
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 54 |
1 files changed, 41 insertions, 13 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 457b10baef59..de5bc51c4a66 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/memcontrol.h> | 31 | #include <linux/memcontrol.h> |
32 | #include <linux/poll.h> | 32 | #include <linux/poll.h> |
33 | #include <linux/oom.h> | 33 | #include <linux/oom.h> |
34 | #include <linux/frontswap.h> | ||
35 | #include <linux/swapfile.h> | ||
34 | 36 | ||
35 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
36 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, | |||
42 | static void free_swap_count_continuations(struct swap_info_struct *); | 44 | static void free_swap_count_continuations(struct swap_info_struct *); |
43 | static sector_t map_swap_entry(swp_entry_t, struct block_device**); | 45 | static sector_t map_swap_entry(swp_entry_t, struct block_device**); |
44 | 46 | ||
45 | static DEFINE_SPINLOCK(swap_lock); | 47 | DEFINE_SPINLOCK(swap_lock); |
46 | static unsigned int nr_swapfiles; | 48 | static unsigned int nr_swapfiles; |
47 | long nr_swap_pages; | 49 | long nr_swap_pages; |
48 | long total_swap_pages; | 50 | long total_swap_pages; |
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry "; | |||
53 | static const char Bad_offset[] = "Bad swap offset entry "; | 55 | static const char Bad_offset[] = "Bad swap offset entry "; |
54 | static const char Unused_offset[] = "Unused swap offset entry "; | 56 | static const char Unused_offset[] = "Unused swap offset entry "; |
55 | 57 | ||
56 | static struct swap_list_t swap_list = {-1, -1}; | 58 | struct swap_list_t swap_list = {-1, -1}; |
57 | 59 | ||
58 | static struct swap_info_struct *swap_info[MAX_SWAPFILES]; | 60 | struct swap_info_struct *swap_info[MAX_SWAPFILES]; |
59 | 61 | ||
60 | static DEFINE_MUTEX(swapon_mutex); | 62 | static DEFINE_MUTEX(swapon_mutex); |
61 | 63 | ||
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, | |||
556 | swap_list.next = p->type; | 558 | swap_list.next = p->type; |
557 | nr_swap_pages++; | 559 | nr_swap_pages++; |
558 | p->inuse_pages--; | 560 | p->inuse_pages--; |
561 | frontswap_invalidate_page(p->type, offset); | ||
559 | if ((p->flags & SWP_BLKDEV) && | 562 | if ((p->flags & SWP_BLKDEV) && |
560 | disk->fops->swap_slot_free_notify) | 563 | disk->fops->swap_slot_free_notify) |
561 | disk->fops->swap_slot_free_notify(p->bdev, offset); | 564 | disk->fops->swap_slot_free_notify(p->bdev, offset); |
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm, | |||
985 | } | 988 | } |
986 | 989 | ||
987 | /* | 990 | /* |
988 | * Scan swap_map from current position to next entry still in use. | 991 | * Scan swap_map (or frontswap_map if frontswap parameter is true) |
992 | * from current position to next entry still in use. | ||
989 | * Recycle to start on reaching the end, returning 0 when empty. | 993 | * Recycle to start on reaching the end, returning 0 when empty. |
990 | */ | 994 | */ |
991 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, | 995 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, |
992 | unsigned int prev) | 996 | unsigned int prev, bool frontswap) |
993 | { | 997 | { |
994 | unsigned int max = si->max; | 998 | unsigned int max = si->max; |
995 | unsigned int i = prev; | 999 | unsigned int i = prev; |
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1015 | prev = 0; | 1019 | prev = 0; |
1016 | i = 1; | 1020 | i = 1; |
1017 | } | 1021 | } |
1022 | if (frontswap) { | ||
1023 | if (frontswap_test(si, i)) | ||
1024 | break; | ||
1025 | else | ||
1026 | continue; | ||
1027 | } | ||
1018 | count = si->swap_map[i]; | 1028 | count = si->swap_map[i]; |
1019 | if (count && swap_count(count) != SWAP_MAP_BAD) | 1029 | if (count && swap_count(count) != SWAP_MAP_BAD) |
1020 | break; | 1030 | break; |
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1026 | * We completely avoid races by reading each swap page in advance, | 1036 | * We completely avoid races by reading each swap page in advance, |
1027 | * and then search for the process using it. All the necessary | 1037 | * and then search for the process using it. All the necessary |
1028 | * page table adjustments can then be made atomically. | 1038 | * page table adjustments can then be made atomically. |
1039 | * | ||
1040 | * if the boolean frontswap is true, only unuse pages_to_unuse pages; | ||
1041 | * pages_to_unuse==0 means all pages; ignored if frontswap is false | ||
1029 | */ | 1042 | */ |
1030 | static int try_to_unuse(unsigned int type) | 1043 | int try_to_unuse(unsigned int type, bool frontswap, |
1044 | unsigned long pages_to_unuse) | ||
1031 | { | 1045 | { |
1032 | struct swap_info_struct *si = swap_info[type]; | 1046 | struct swap_info_struct *si = swap_info[type]; |
1033 | struct mm_struct *start_mm; | 1047 | struct mm_struct *start_mm; |
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type) | |||
1060 | * one pass through swap_map is enough, but not necessarily: | 1074 | * one pass through swap_map is enough, but not necessarily: |
1061 | * there are races when an instance of an entry might be missed. | 1075 | * there are races when an instance of an entry might be missed. |
1062 | */ | 1076 | */ |
1063 | while ((i = find_next_to_unuse(si, i)) != 0) { | 1077 | while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { |
1064 | if (signal_pending(current)) { | 1078 | if (signal_pending(current)) { |
1065 | retval = -EINTR; | 1079 | retval = -EINTR; |
1066 | break; | 1080 | break; |
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type) | |||
1227 | * interactive performance. | 1241 | * interactive performance. |
1228 | */ | 1242 | */ |
1229 | cond_resched(); | 1243 | cond_resched(); |
1244 | if (frontswap && pages_to_unuse > 0) { | ||
1245 | if (!--pages_to_unuse) | ||
1246 | break; | ||
1247 | } | ||
1230 | } | 1248 | } |
1231 | 1249 | ||
1232 | mmput(start_mm); | 1250 | mmput(start_mm); |
@@ -1486,7 +1504,8 @@ bad_bmap: | |||
1486 | } | 1504 | } |
1487 | 1505 | ||
1488 | static void enable_swap_info(struct swap_info_struct *p, int prio, | 1506 | static void enable_swap_info(struct swap_info_struct *p, int prio, |
1489 | unsigned char *swap_map) | 1507 | unsigned char *swap_map, |
1508 | unsigned long *frontswap_map) | ||
1490 | { | 1509 | { |
1491 | int i, prev; | 1510 | int i, prev; |
1492 | 1511 | ||
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1496 | else | 1515 | else |
1497 | p->prio = --least_priority; | 1516 | p->prio = --least_priority; |
1498 | p->swap_map = swap_map; | 1517 | p->swap_map = swap_map; |
1518 | frontswap_map_set(p, frontswap_map); | ||
1499 | p->flags |= SWP_WRITEOK; | 1519 | p->flags |= SWP_WRITEOK; |
1500 | nr_swap_pages += p->pages; | 1520 | nr_swap_pages += p->pages; |
1501 | total_swap_pages += p->pages; | 1521 | total_swap_pages += p->pages; |
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1512 | swap_list.head = swap_list.next = p->type; | 1532 | swap_list.head = swap_list.next = p->type; |
1513 | else | 1533 | else |
1514 | swap_info[prev]->next = p->type; | 1534 | swap_info[prev]->next = p->type; |
1535 | frontswap_init(p->type); | ||
1515 | spin_unlock(&swap_lock); | 1536 | spin_unlock(&swap_lock); |
1516 | } | 1537 | } |
1517 | 1538 | ||
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1585 | spin_unlock(&swap_lock); | 1606 | spin_unlock(&swap_lock); |
1586 | 1607 | ||
1587 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); | 1608 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); |
1588 | err = try_to_unuse(type); | 1609 | err = try_to_unuse(type, false, 0); /* force all pages to be unused */ |
1589 | compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); | 1610 | compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); |
1590 | 1611 | ||
1591 | if (err) { | 1612 | if (err) { |
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1596 | * sys_swapoff for this swap_info_struct at this point. | 1617 | * sys_swapoff for this swap_info_struct at this point. |
1597 | */ | 1618 | */ |
1598 | /* re-insert swap space back into swap_list */ | 1619 | /* re-insert swap space back into swap_list */ |
1599 | enable_swap_info(p, p->prio, p->swap_map); | 1620 | enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); |
1600 | goto out_dput; | 1621 | goto out_dput; |
1601 | } | 1622 | } |
1602 | 1623 | ||
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1622 | swap_map = p->swap_map; | 1643 | swap_map = p->swap_map; |
1623 | p->swap_map = NULL; | 1644 | p->swap_map = NULL; |
1624 | p->flags = 0; | 1645 | p->flags = 0; |
1646 | frontswap_invalidate_area(type); | ||
1625 | spin_unlock(&swap_lock); | 1647 | spin_unlock(&swap_lock); |
1626 | mutex_unlock(&swapon_mutex); | 1648 | mutex_unlock(&swapon_mutex); |
1627 | vfree(swap_map); | 1649 | vfree(swap_map); |
1650 | vfree(frontswap_map_get(p)); | ||
1628 | /* Destroy swap account informatin */ | 1651 | /* Destroy swap account informatin */ |
1629 | swap_cgroup_swapoff(type); | 1652 | swap_cgroup_swapoff(type); |
1630 | 1653 | ||
@@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1988 | sector_t span; | 2011 | sector_t span; |
1989 | unsigned long maxpages; | 2012 | unsigned long maxpages; |
1990 | unsigned char *swap_map = NULL; | 2013 | unsigned char *swap_map = NULL; |
2014 | unsigned long *frontswap_map = NULL; | ||
1991 | struct page *page = NULL; | 2015 | struct page *page = NULL; |
1992 | struct inode *inode = NULL; | 2016 | struct inode *inode = NULL; |
1993 | 2017 | ||
@@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2071 | error = nr_extents; | 2095 | error = nr_extents; |
2072 | goto bad_swap; | 2096 | goto bad_swap; |
2073 | } | 2097 | } |
2098 | /* frontswap enabled? set up bit-per-page map for frontswap */ | ||
2099 | if (frontswap_enabled) | ||
2100 | frontswap_map = vzalloc(maxpages / sizeof(long)); | ||
2074 | 2101 | ||
2075 | if (p->bdev) { | 2102 | if (p->bdev) { |
2076 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2103 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
@@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2086 | if (swap_flags & SWAP_FLAG_PREFER) | 2113 | if (swap_flags & SWAP_FLAG_PREFER) |
2087 | prio = | 2114 | prio = |
2088 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; | 2115 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; |
2089 | enable_swap_info(p, prio, swap_map); | 2116 | enable_swap_info(p, prio, swap_map, frontswap_map); |
2090 | 2117 | ||
2091 | printk(KERN_INFO "Adding %uk swap on %s. " | 2118 | printk(KERN_INFO "Adding %uk swap on %s. " |
2092 | "Priority:%d extents:%d across:%lluk %s%s\n", | 2119 | "Priority:%d extents:%d across:%lluk %s%s%s\n", |
2093 | p->pages<<(PAGE_SHIFT-10), name, p->prio, | 2120 | p->pages<<(PAGE_SHIFT-10), name, p->prio, |
2094 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), | 2121 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), |
2095 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", | 2122 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", |
2096 | (p->flags & SWP_DISCARDABLE) ? "D" : ""); | 2123 | (p->flags & SWP_DISCARDABLE) ? "D" : "", |
2124 | (frontswap_map) ? "FS" : ""); | ||
2097 | 2125 | ||
2098 | mutex_unlock(&swapon_mutex); | 2126 | mutex_unlock(&swapon_mutex); |
2099 | atomic_inc(&proc_poll_event); | 2127 | atomic_inc(&proc_poll_event); |