aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /mm/swapfile.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c479
1 files changed, 267 insertions, 212 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7c703ff2f36f..ff8dc1a18cb4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -14,7 +14,7 @@
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/shm.h> 17#include <linux/shmem_fs.h>
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/writeback.h> 20#include <linux/writeback.h>
@@ -30,6 +30,8 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/syscalls.h> 31#include <linux/syscalls.h>
32#include <linux/memcontrol.h> 32#include <linux/memcontrol.h>
33#include <linux/poll.h>
34#include <linux/oom.h>
33 35
34#include <asm/pgtable.h> 36#include <asm/pgtable.h>
35#include <asm/tlbflush.h> 37#include <asm/tlbflush.h>
@@ -58,6 +60,10 @@ static struct swap_info_struct *swap_info[MAX_SWAPFILES];
58 60
59static DEFINE_MUTEX(swapon_mutex); 61static DEFINE_MUTEX(swapon_mutex);
60 62
63static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
64/* Activity counter to indicate that a swapon or swapoff has occurred */
65static atomic_t proc_poll_event = ATOMIC_INIT(0);
66
61static inline unsigned char swap_count(unsigned char ent) 67static inline unsigned char swap_count(unsigned char ent)
62{ 68{
63 return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */ 69 return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */
@@ -90,39 +96,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
90} 96}
91 97
92/* 98/*
93 * We need this because the bdev->unplug_fn can sleep and we cannot
94 * hold swap_lock while calling the unplug_fn. And swap_lock
95 * cannot be turned into a mutex.
96 */
97static DECLARE_RWSEM(swap_unplug_sem);
98
99void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
100{
101 swp_entry_t entry;
102
103 down_read(&swap_unplug_sem);
104 entry.val = page_private(page);
105 if (PageSwapCache(page)) {
106 struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
107 struct backing_dev_info *bdi;
108
109 /*
110 * If the page is removed from swapcache from under us (with a
111 * racy try_to_unuse/swapoff) we need an additional reference
112 * count to avoid reading garbage from page_private(page) above.
113 * If the WARN_ON triggers during a swapoff it maybe the race
114 * condition and it's harmless. However if it triggers without
115 * swapoff it signals a problem.
116 */
117 WARN_ON(page_count(page) <= 1);
118
119 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
120 blk_run_backing_dev(bdi, page);
121 }
122 up_read(&swap_unplug_sem);
123}
124
125/*
126 * swapon tell device that all the old swap contents can be discarded, 99 * swapon tell device that all the old swap contents can be discarded,
127 * to allow the swap device to optimize its wear-levelling. 100 * to allow the swap device to optimize its wear-levelling.
128 */ 101 */
@@ -139,7 +112,7 @@ static int discard_swap(struct swap_info_struct *si)
139 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); 112 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
140 if (nr_blocks) { 113 if (nr_blocks) {
141 err = blkdev_issue_discard(si->bdev, start_block, 114 err = blkdev_issue_discard(si->bdev, start_block,
142 nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); 115 nr_blocks, GFP_KERNEL, 0);
143 if (err) 116 if (err)
144 return err; 117 return err;
145 cond_resched(); 118 cond_resched();
@@ -150,7 +123,7 @@ static int discard_swap(struct swap_info_struct *si)
150 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); 123 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
151 124
152 err = blkdev_issue_discard(si->bdev, start_block, 125 err = blkdev_issue_discard(si->bdev, start_block,
153 nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); 126 nr_blocks, GFP_KERNEL, 0);
154 if (err) 127 if (err)
155 break; 128 break;
156 129
@@ -189,7 +162,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
189 start_block <<= PAGE_SHIFT - 9; 162 start_block <<= PAGE_SHIFT - 9;
190 nr_blocks <<= PAGE_SHIFT - 9; 163 nr_blocks <<= PAGE_SHIFT - 9;
191 if (blkdev_issue_discard(si->bdev, start_block, 164 if (blkdev_issue_discard(si->bdev, start_block,
192 nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT)) 165 nr_blocks, GFP_NOIO, 0))
193 break; 166 break;
194 } 167 }
195 168
@@ -207,8 +180,8 @@ static int wait_for_discard(void *word)
207#define SWAPFILE_CLUSTER 256 180#define SWAPFILE_CLUSTER 256
208#define LATENCY_LIMIT 256 181#define LATENCY_LIMIT 256
209 182
210static inline unsigned long scan_swap_map(struct swap_info_struct *si, 183static unsigned long scan_swap_map(struct swap_info_struct *si,
211 unsigned char usage) 184 unsigned char usage)
212{ 185{
213 unsigned long offset; 186 unsigned long offset;
214 unsigned long scan_base; 187 unsigned long scan_base;
@@ -875,7 +848,7 @@ unsigned int count_swap_pages(int type, int free)
875static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, 848static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
876 unsigned long addr, swp_entry_t entry, struct page *page) 849 unsigned long addr, swp_entry_t entry, struct page *page)
877{ 850{
878 struct mem_cgroup *ptr = NULL; 851 struct mem_cgroup *ptr;
879 spinlock_t *ptl; 852 spinlock_t *ptl;
880 pte_t *pte; 853 pte_t *pte;
881 int ret = 1; 854 int ret = 1;
@@ -959,6 +932,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
959 pmd = pmd_offset(pud, addr); 932 pmd = pmd_offset(pud, addr);
960 do { 933 do {
961 next = pmd_addr_end(addr, end); 934 next = pmd_addr_end(addr, end);
935 if (unlikely(pmd_trans_huge(*pmd)))
936 continue;
962 if (pmd_none_or_clear_bad(pmd)) 937 if (pmd_none_or_clear_bad(pmd))
963 continue; 938 continue;
964 ret = unuse_pte_range(vma, pmd, addr, next, entry, page); 939 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
@@ -1543,6 +1518,36 @@ bad_bmap:
1543 goto out; 1518 goto out;
1544} 1519}
1545 1520
1521static void enable_swap_info(struct swap_info_struct *p, int prio,
1522 unsigned char *swap_map)
1523{
1524 int i, prev;
1525
1526 spin_lock(&swap_lock);
1527 if (prio >= 0)
1528 p->prio = prio;
1529 else
1530 p->prio = --least_priority;
1531 p->swap_map = swap_map;
1532 p->flags |= SWP_WRITEOK;
1533 nr_swap_pages += p->pages;
1534 total_swap_pages += p->pages;
1535
1536 /* insert swap space into swap_list: */
1537 prev = -1;
1538 for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
1539 if (p->prio >= swap_info[i]->prio)
1540 break;
1541 prev = i;
1542 }
1543 p->next = i;
1544 if (prev < 0)
1545 swap_list.head = swap_list.next = p->type;
1546 else
1547 swap_info[prev]->next = p->type;
1548 spin_unlock(&swap_lock);
1549}
1550
1546SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) 1551SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1547{ 1552{
1548 struct swap_info_struct *p = NULL; 1553 struct swap_info_struct *p = NULL;
@@ -1551,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1551 struct address_space *mapping; 1556 struct address_space *mapping;
1552 struct inode *inode; 1557 struct inode *inode;
1553 char *pathname; 1558 char *pathname;
1559 int oom_score_adj;
1554 int i, type, prev; 1560 int i, type, prev;
1555 int err; 1561 int err;
1556 1562
@@ -1609,37 +1615,22 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1609 p->flags &= ~SWP_WRITEOK; 1615 p->flags &= ~SWP_WRITEOK;
1610 spin_unlock(&swap_lock); 1616 spin_unlock(&swap_lock);
1611 1617
1612 current->flags |= PF_OOM_ORIGIN; 1618 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1613 err = try_to_unuse(type); 1619 err = try_to_unuse(type);
1614 current->flags &= ~PF_OOM_ORIGIN; 1620 test_set_oom_score_adj(oom_score_adj);
1615 1621
1616 if (err) { 1622 if (err) {
1623 /*
1624 * reading p->prio and p->swap_map outside the lock is
1625 * safe here because only sys_swapon and sys_swapoff
1626 * change them, and there can be no other sys_swapon or
1627 * sys_swapoff for this swap_info_struct at this point.
1628 */
1617 /* re-insert swap space back into swap_list */ 1629 /* re-insert swap space back into swap_list */
1618 spin_lock(&swap_lock); 1630 enable_swap_info(p, p->prio, p->swap_map);
1619 if (p->prio < 0)
1620 p->prio = --least_priority;
1621 prev = -1;
1622 for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
1623 if (p->prio >= swap_info[i]->prio)
1624 break;
1625 prev = i;
1626 }
1627 p->next = i;
1628 if (prev < 0)
1629 swap_list.head = swap_list.next = type;
1630 else
1631 swap_info[prev]->next = type;
1632 nr_swap_pages += p->pages;
1633 total_swap_pages += p->pages;
1634 p->flags |= SWP_WRITEOK;
1635 spin_unlock(&swap_lock);
1636 goto out_dput; 1631 goto out_dput;
1637 } 1632 }
1638 1633
1639 /* wait for any unplug function to finish */
1640 down_write(&swap_unplug_sem);
1641 up_write(&swap_unplug_sem);
1642
1643 destroy_swap_extents(p); 1634 destroy_swap_extents(p);
1644 if (p->flags & SWP_CONTINUED) 1635 if (p->flags & SWP_CONTINUED)
1645 free_swap_count_continuations(p); 1636 free_swap_count_continuations(p);
@@ -1672,7 +1663,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1672 if (S_ISBLK(inode->i_mode)) { 1663 if (S_ISBLK(inode->i_mode)) {
1673 struct block_device *bdev = I_BDEV(inode); 1664 struct block_device *bdev = I_BDEV(inode);
1674 set_blocksize(bdev, p->old_block_size); 1665 set_blocksize(bdev, p->old_block_size);
1675 bd_release(bdev); 1666 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1676 } else { 1667 } else {
1677 mutex_lock(&inode->i_mutex); 1668 mutex_lock(&inode->i_mutex);
1678 inode->i_flags &= ~S_SWAPFILE; 1669 inode->i_flags &= ~S_SWAPFILE;
@@ -1680,6 +1671,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1680 } 1671 }
1681 filp_close(swap_file, NULL); 1672 filp_close(swap_file, NULL);
1682 err = 0; 1673 err = 0;
1674 atomic_inc(&proc_poll_event);
1675 wake_up_interruptible(&proc_poll_wait);
1683 1676
1684out_dput: 1677out_dput:
1685 filp_close(victim, NULL); 1678 filp_close(victim, NULL);
@@ -1688,6 +1681,25 @@ out:
1688} 1681}
1689 1682
1690#ifdef CONFIG_PROC_FS 1683#ifdef CONFIG_PROC_FS
1684struct proc_swaps {
1685 struct seq_file seq;
1686 int event;
1687};
1688
1689static unsigned swaps_poll(struct file *file, poll_table *wait)
1690{
1691 struct proc_swaps *s = file->private_data;
1692
1693 poll_wait(file, &proc_poll_wait, wait);
1694
1695 if (s->event != atomic_read(&proc_poll_event)) {
1696 s->event = atomic_read(&proc_poll_event);
1697 return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
1698 }
1699
1700 return POLLIN | POLLRDNORM;
1701}
1702
1691/* iterator */ 1703/* iterator */
1692static void *swap_start(struct seq_file *swap, loff_t *pos) 1704static void *swap_start(struct seq_file *swap, loff_t *pos)
1693{ 1705{
@@ -1771,7 +1783,24 @@ static const struct seq_operations swaps_op = {
1771 1783
1772static int swaps_open(struct inode *inode, struct file *file) 1784static int swaps_open(struct inode *inode, struct file *file)
1773{ 1785{
1774 return seq_open(file, &swaps_op); 1786 struct proc_swaps *s;
1787 int ret;
1788
1789 s = kmalloc(sizeof(struct proc_swaps), GFP_KERNEL);
1790 if (!s)
1791 return -ENOMEM;
1792
1793 file->private_data = s;
1794
1795 ret = seq_open(file, &swaps_op);
1796 if (ret) {
1797 kfree(s);
1798 return ret;
1799 }
1800
1801 s->seq.private = s;
1802 s->event = atomic_read(&proc_poll_event);
1803 return ret;
1775} 1804}
1776 1805
1777static const struct file_operations proc_swaps_operations = { 1806static const struct file_operations proc_swaps_operations = {
@@ -1779,6 +1808,7 @@ static const struct file_operations proc_swaps_operations = {
1779 .read = seq_read, 1808 .read = seq_read,
1780 .llseek = seq_lseek, 1809 .llseek = seq_lseek,
1781 .release = seq_release, 1810 .release = seq_release,
1811 .poll = swaps_poll,
1782}; 1812};
1783 1813
1784static int __init procswaps_init(void) 1814static int __init procswaps_init(void)
@@ -1798,49 +1828,24 @@ static int __init max_swapfiles_check(void)
1798late_initcall(max_swapfiles_check); 1828late_initcall(max_swapfiles_check);
1799#endif 1829#endif
1800 1830
1801/* 1831static struct swap_info_struct *alloc_swap_info(void)
1802 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1803 *
1804 * The swapon system call
1805 */
1806SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1807{ 1832{
1808 struct swap_info_struct *p; 1833 struct swap_info_struct *p;
1809 char *name = NULL;
1810 struct block_device *bdev = NULL;
1811 struct file *swap_file = NULL;
1812 struct address_space *mapping;
1813 unsigned int type; 1834 unsigned int type;
1814 int i, prev;
1815 int error;
1816 union swap_header *swap_header;
1817 unsigned int nr_good_pages;
1818 int nr_extents = 0;
1819 sector_t span;
1820 unsigned long maxpages;
1821 unsigned long swapfilepages;
1822 unsigned char *swap_map = NULL;
1823 struct page *page = NULL;
1824 struct inode *inode = NULL;
1825 int did_down = 0;
1826
1827 if (!capable(CAP_SYS_ADMIN))
1828 return -EPERM;
1829 1835
1830 p = kzalloc(sizeof(*p), GFP_KERNEL); 1836 p = kzalloc(sizeof(*p), GFP_KERNEL);
1831 if (!p) 1837 if (!p)
1832 return -ENOMEM; 1838 return ERR_PTR(-ENOMEM);
1833 1839
1834 spin_lock(&swap_lock); 1840 spin_lock(&swap_lock);
1835 for (type = 0; type < nr_swapfiles; type++) { 1841 for (type = 0; type < nr_swapfiles; type++) {
1836 if (!(swap_info[type]->flags & SWP_USED)) 1842 if (!(swap_info[type]->flags & SWP_USED))
1837 break; 1843 break;
1838 } 1844 }
1839 error = -EPERM;
1840 if (type >= MAX_SWAPFILES) { 1845 if (type >= MAX_SWAPFILES) {
1841 spin_unlock(&swap_lock); 1846 spin_unlock(&swap_lock);
1842 kfree(p); 1847 kfree(p);
1843 goto out; 1848 return ERR_PTR(-EPERM);
1844 } 1849 }
1845 if (type >= nr_swapfiles) { 1850 if (type >= nr_swapfiles) {
1846 p->type = type; 1851 p->type = type;
@@ -1865,80 +1870,49 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1865 p->next = -1; 1870 p->next = -1;
1866 spin_unlock(&swap_lock); 1871 spin_unlock(&swap_lock);
1867 1872
1868 name = getname(specialfile); 1873 return p;
1869 error = PTR_ERR(name); 1874}
1870 if (IS_ERR(name)) {
1871 name = NULL;
1872 goto bad_swap_2;
1873 }
1874 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1875 error = PTR_ERR(swap_file);
1876 if (IS_ERR(swap_file)) {
1877 swap_file = NULL;
1878 goto bad_swap_2;
1879 }
1880
1881 p->swap_file = swap_file;
1882 mapping = swap_file->f_mapping;
1883 inode = mapping->host;
1884
1885 error = -EBUSY;
1886 for (i = 0; i < nr_swapfiles; i++) {
1887 struct swap_info_struct *q = swap_info[i];
1888 1875
1889 if (i == type || !q->swap_file) 1876static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
1890 continue; 1877{
1891 if (mapping == q->swap_file->f_mapping) 1878 int error;
1892 goto bad_swap;
1893 }
1894 1879
1895 error = -EINVAL;
1896 if (S_ISBLK(inode->i_mode)) { 1880 if (S_ISBLK(inode->i_mode)) {
1897 bdev = I_BDEV(inode); 1881 p->bdev = bdgrab(I_BDEV(inode));
1898 error = bd_claim(bdev, sys_swapon); 1882 error = blkdev_get(p->bdev,
1883 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1884 sys_swapon);
1899 if (error < 0) { 1885 if (error < 0) {
1900 bdev = NULL; 1886 p->bdev = NULL;
1901 error = -EINVAL; 1887 return -EINVAL;
1902 goto bad_swap;
1903 } 1888 }
1904 p->old_block_size = block_size(bdev); 1889 p->old_block_size = block_size(p->bdev);
1905 error = set_blocksize(bdev, PAGE_SIZE); 1890 error = set_blocksize(p->bdev, PAGE_SIZE);
1906 if (error < 0) 1891 if (error < 0)
1907 goto bad_swap; 1892 return error;
1908 p->bdev = bdev;
1909 p->flags |= SWP_BLKDEV; 1893 p->flags |= SWP_BLKDEV;
1910 } else if (S_ISREG(inode->i_mode)) { 1894 } else if (S_ISREG(inode->i_mode)) {
1911 p->bdev = inode->i_sb->s_bdev; 1895 p->bdev = inode->i_sb->s_bdev;
1912 mutex_lock(&inode->i_mutex); 1896 mutex_lock(&inode->i_mutex);
1913 did_down = 1; 1897 if (IS_SWAPFILE(inode))
1914 if (IS_SWAPFILE(inode)) { 1898 return -EBUSY;
1915 error = -EBUSY; 1899 } else
1916 goto bad_swap; 1900 return -EINVAL;
1917 }
1918 } else {
1919 goto bad_swap;
1920 }
1921 1901
1922 swapfilepages = i_size_read(inode) >> PAGE_SHIFT; 1902 return 0;
1903}
1923 1904
1924 /* 1905static unsigned long read_swap_header(struct swap_info_struct *p,
1925 * Read the swap header. 1906 union swap_header *swap_header,
1926 */ 1907 struct inode *inode)
1927 if (!mapping->a_ops->readpage) { 1908{
1928 error = -EINVAL; 1909 int i;
1929 goto bad_swap; 1910 unsigned long maxpages;
1930 } 1911 unsigned long swapfilepages;
1931 page = read_mapping_page(mapping, 0, swap_file);
1932 if (IS_ERR(page)) {
1933 error = PTR_ERR(page);
1934 goto bad_swap;
1935 }
1936 swap_header = kmap(page);
1937 1912
1938 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { 1913 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
1939 printk(KERN_ERR "Unable to find swap-space signature\n"); 1914 printk(KERN_ERR "Unable to find swap-space signature\n");
1940 error = -EINVAL; 1915 return 0;
1941 goto bad_swap;
1942 } 1916 }
1943 1917
1944 /* swap partition endianess hack... */ 1918 /* swap partition endianess hack... */
@@ -1954,8 +1928,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1954 printk(KERN_WARNING 1928 printk(KERN_WARNING
1955 "Unable to handle swap header version %d\n", 1929 "Unable to handle swap header version %d\n",
1956 swap_header->info.version); 1930 swap_header->info.version);
1957 error = -EINVAL; 1931 return 0;
1958 goto bad_swap;
1959 } 1932 }
1960 1933
1961 p->lowest_bit = 1; 1934 p->lowest_bit = 1;
@@ -1986,62 +1959,156 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1986 } 1959 }
1987 p->highest_bit = maxpages - 1; 1960 p->highest_bit = maxpages - 1;
1988 1961
1989 error = -EINVAL;
1990 if (!maxpages) 1962 if (!maxpages)
1991 goto bad_swap; 1963 return 0;
1964 swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
1992 if (swapfilepages && maxpages > swapfilepages) { 1965 if (swapfilepages && maxpages > swapfilepages) {
1993 printk(KERN_WARNING 1966 printk(KERN_WARNING
1994 "Swap area shorter than signature indicates\n"); 1967 "Swap area shorter than signature indicates\n");
1995 goto bad_swap; 1968 return 0;
1996 } 1969 }
1997 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) 1970 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
1998 goto bad_swap; 1971 return 0;
1999 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) 1972 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
2000 goto bad_swap; 1973 return 0;
2001 1974
2002 /* OK, set up the swap map and apply the bad block list */ 1975 return maxpages;
2003 swap_map = vmalloc(maxpages); 1976}
2004 if (!swap_map) { 1977
2005 error = -ENOMEM; 1978static int setup_swap_map_and_extents(struct swap_info_struct *p,
2006 goto bad_swap; 1979 union swap_header *swap_header,
2007 } 1980 unsigned char *swap_map,
1981 unsigned long maxpages,
1982 sector_t *span)
1983{
1984 int i;
1985 unsigned int nr_good_pages;
1986 int nr_extents;
2008 1987
2009 memset(swap_map, 0, maxpages);
2010 nr_good_pages = maxpages - 1; /* omit header page */ 1988 nr_good_pages = maxpages - 1; /* omit header page */
2011 1989
2012 for (i = 0; i < swap_header->info.nr_badpages; i++) { 1990 for (i = 0; i < swap_header->info.nr_badpages; i++) {
2013 unsigned int page_nr = swap_header->info.badpages[i]; 1991 unsigned int page_nr = swap_header->info.badpages[i];
2014 if (page_nr == 0 || page_nr > swap_header->info.last_page) { 1992 if (page_nr == 0 || page_nr > swap_header->info.last_page)
2015 error = -EINVAL; 1993 return -EINVAL;
2016 goto bad_swap;
2017 }
2018 if (page_nr < maxpages) { 1994 if (page_nr < maxpages) {
2019 swap_map[page_nr] = SWAP_MAP_BAD; 1995 swap_map[page_nr] = SWAP_MAP_BAD;
2020 nr_good_pages--; 1996 nr_good_pages--;
2021 } 1997 }
2022 } 1998 }
2023 1999
2024 error = swap_cgroup_swapon(type, maxpages);
2025 if (error)
2026 goto bad_swap;
2027
2028 if (nr_good_pages) { 2000 if (nr_good_pages) {
2029 swap_map[0] = SWAP_MAP_BAD; 2001 swap_map[0] = SWAP_MAP_BAD;
2030 p->max = maxpages; 2002 p->max = maxpages;
2031 p->pages = nr_good_pages; 2003 p->pages = nr_good_pages;
2032 nr_extents = setup_swap_extents(p, &span); 2004 nr_extents = setup_swap_extents(p, span);
2033 if (nr_extents < 0) { 2005 if (nr_extents < 0)
2034 error = nr_extents; 2006 return nr_extents;
2035 goto bad_swap;
2036 }
2037 nr_good_pages = p->pages; 2007 nr_good_pages = p->pages;
2038 } 2008 }
2039 if (!nr_good_pages) { 2009 if (!nr_good_pages) {
2040 printk(KERN_WARNING "Empty swap-file\n"); 2010 printk(KERN_WARNING "Empty swap-file\n");
2011 return -EINVAL;
2012 }
2013
2014 return nr_extents;
2015}
2016
2017SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2018{
2019 struct swap_info_struct *p;
2020 char *name;
2021 struct file *swap_file = NULL;
2022 struct address_space *mapping;
2023 int i;
2024 int prio;
2025 int error;
2026 union swap_header *swap_header;
2027 int nr_extents;
2028 sector_t span;
2029 unsigned long maxpages;
2030 unsigned char *swap_map = NULL;
2031 struct page *page = NULL;
2032 struct inode *inode = NULL;
2033
2034 if (!capable(CAP_SYS_ADMIN))
2035 return -EPERM;
2036
2037 p = alloc_swap_info();
2038 if (IS_ERR(p))
2039 return PTR_ERR(p);
2040
2041 name = getname(specialfile);
2042 if (IS_ERR(name)) {
2043 error = PTR_ERR(name);
2044 name = NULL;
2045 goto bad_swap;
2046 }
2047 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
2048 if (IS_ERR(swap_file)) {
2049 error = PTR_ERR(swap_file);
2050 swap_file = NULL;
2051 goto bad_swap;
2052 }
2053
2054 p->swap_file = swap_file;
2055 mapping = swap_file->f_mapping;
2056
2057 for (i = 0; i < nr_swapfiles; i++) {
2058 struct swap_info_struct *q = swap_info[i];
2059
2060 if (q == p || !q->swap_file)
2061 continue;
2062 if (mapping == q->swap_file->f_mapping) {
2063 error = -EBUSY;
2064 goto bad_swap;
2065 }
2066 }
2067
2068 inode = mapping->host;
2069 /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
2070 error = claim_swapfile(p, inode);
2071 if (unlikely(error))
2072 goto bad_swap;
2073
2074 /*
2075 * Read the swap header.
2076 */
2077 if (!mapping->a_ops->readpage) {
2078 error = -EINVAL;
2079 goto bad_swap;
2080 }
2081 page = read_mapping_page(mapping, 0, swap_file);
2082 if (IS_ERR(page)) {
2083 error = PTR_ERR(page);
2084 goto bad_swap;
2085 }
2086 swap_header = kmap(page);
2087
2088 maxpages = read_swap_header(p, swap_header, inode);
2089 if (unlikely(!maxpages)) {
2041 error = -EINVAL; 2090 error = -EINVAL;
2042 goto bad_swap; 2091 goto bad_swap;
2043 } 2092 }
2044 2093
2094 /* OK, set up the swap map and apply the bad block list */
2095 swap_map = vzalloc(maxpages);
2096 if (!swap_map) {
2097 error = -ENOMEM;
2098 goto bad_swap;
2099 }
2100
2101 error = swap_cgroup_swapon(p->type, maxpages);
2102 if (error)
2103 goto bad_swap;
2104
2105 nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
2106 maxpages, &span);
2107 if (unlikely(nr_extents < 0)) {
2108 error = nr_extents;
2109 goto bad_swap;
2110 }
2111
2045 if (p->bdev) { 2112 if (p->bdev) {
2046 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2113 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
2047 p->flags |= SWP_SOLIDSTATE; 2114 p->flags |= SWP_SOLIDSTATE;
@@ -2052,55 +2119,46 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2052 } 2119 }
2053 2120
2054 mutex_lock(&swapon_mutex); 2121 mutex_lock(&swapon_mutex);
2055 spin_lock(&swap_lock); 2122 prio = -1;
2056 if (swap_flags & SWAP_FLAG_PREFER) 2123 if (swap_flags & SWAP_FLAG_PREFER)
2057 p->prio = 2124 prio =
2058 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2125 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2059 else 2126 enable_swap_info(p, prio, swap_map);
2060 p->prio = --least_priority;
2061 p->swap_map = swap_map;
2062 p->flags |= SWP_WRITEOK;
2063 nr_swap_pages += nr_good_pages;
2064 total_swap_pages += nr_good_pages;
2065 2127
2066 printk(KERN_INFO "Adding %uk swap on %s. " 2128 printk(KERN_INFO "Adding %uk swap on %s. "
2067 "Priority:%d extents:%d across:%lluk %s%s\n", 2129 "Priority:%d extents:%d across:%lluk %s%s\n",
2068 nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, 2130 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2069 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2131 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2070 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2132 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2071 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2133 (p->flags & SWP_DISCARDABLE) ? "D" : "");
2072 2134
2073 /* insert swap space into swap_list: */
2074 prev = -1;
2075 for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
2076 if (p->prio >= swap_info[i]->prio)
2077 break;
2078 prev = i;
2079 }
2080 p->next = i;
2081 if (prev < 0)
2082 swap_list.head = swap_list.next = type;
2083 else
2084 swap_info[prev]->next = type;
2085 spin_unlock(&swap_lock);
2086 mutex_unlock(&swapon_mutex); 2135 mutex_unlock(&swapon_mutex);
2136 atomic_inc(&proc_poll_event);
2137 wake_up_interruptible(&proc_poll_wait);
2138
2139 if (S_ISREG(inode->i_mode))
2140 inode->i_flags |= S_SWAPFILE;
2087 error = 0; 2141 error = 0;
2088 goto out; 2142 goto out;
2089bad_swap: 2143bad_swap:
2090 if (bdev) { 2144 if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
2091 set_blocksize(bdev, p->old_block_size); 2145 set_blocksize(p->bdev, p->old_block_size);
2092 bd_release(bdev); 2146 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2093 } 2147 }
2094 destroy_swap_extents(p); 2148 destroy_swap_extents(p);
2095 swap_cgroup_swapoff(type); 2149 swap_cgroup_swapoff(p->type);
2096bad_swap_2:
2097 spin_lock(&swap_lock); 2150 spin_lock(&swap_lock);
2098 p->swap_file = NULL; 2151 p->swap_file = NULL;
2099 p->flags = 0; 2152 p->flags = 0;
2100 spin_unlock(&swap_lock); 2153 spin_unlock(&swap_lock);
2101 vfree(swap_map); 2154 vfree(swap_map);
2102 if (swap_file) 2155 if (swap_file) {
2156 if (inode && S_ISREG(inode->i_mode)) {
2157 mutex_unlock(&inode->i_mutex);
2158 inode = NULL;
2159 }
2103 filp_close(swap_file, NULL); 2160 filp_close(swap_file, NULL);
2161 }
2104out: 2162out:
2105 if (page && !IS_ERR(page)) { 2163 if (page && !IS_ERR(page)) {
2106 kunmap(page); 2164 kunmap(page);
@@ -2108,11 +2166,8 @@ out:
2108 } 2166 }
2109 if (name) 2167 if (name)
2110 putname(name); 2168 putname(name);
2111 if (did_down) { 2169 if (inode && S_ISREG(inode->i_mode))
2112 if (!error)
2113 inode->i_flags |= S_SWAPFILE;
2114 mutex_unlock(&inode->i_mutex); 2170 mutex_unlock(&inode->i_mutex);
2115 }
2116 return error; 2171 return error;
2117} 2172}
2118 2173