diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /mm/swapfile.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 479 |
1 files changed, 267 insertions, 212 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 7c703ff2f36f..ff8dc1a18cb4 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | #include <linux/pagemap.h> | 15 | #include <linux/pagemap.h> |
16 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
17 | #include <linux/shm.h> | 17 | #include <linux/shmem_fs.h> |
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include <linux/random.h> | 19 | #include <linux/random.h> |
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
@@ -30,6 +30,8 @@ | |||
30 | #include <linux/capability.h> | 30 | #include <linux/capability.h> |
31 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
32 | #include <linux/memcontrol.h> | 32 | #include <linux/memcontrol.h> |
33 | #include <linux/poll.h> | ||
34 | #include <linux/oom.h> | ||
33 | 35 | ||
34 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
35 | #include <asm/tlbflush.h> | 37 | #include <asm/tlbflush.h> |
@@ -58,6 +60,10 @@ static struct swap_info_struct *swap_info[MAX_SWAPFILES]; | |||
58 | 60 | ||
59 | static DEFINE_MUTEX(swapon_mutex); | 61 | static DEFINE_MUTEX(swapon_mutex); |
60 | 62 | ||
63 | static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait); | ||
64 | /* Activity counter to indicate that a swapon or swapoff has occurred */ | ||
65 | static atomic_t proc_poll_event = ATOMIC_INIT(0); | ||
66 | |||
61 | static inline unsigned char swap_count(unsigned char ent) | 67 | static inline unsigned char swap_count(unsigned char ent) |
62 | { | 68 | { |
63 | return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */ | 69 | return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */ |
@@ -90,39 +96,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) | |||
90 | } | 96 | } |
91 | 97 | ||
92 | /* | 98 | /* |
93 | * We need this because the bdev->unplug_fn can sleep and we cannot | ||
94 | * hold swap_lock while calling the unplug_fn. And swap_lock | ||
95 | * cannot be turned into a mutex. | ||
96 | */ | ||
97 | static DECLARE_RWSEM(swap_unplug_sem); | ||
98 | |||
99 | void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) | ||
100 | { | ||
101 | swp_entry_t entry; | ||
102 | |||
103 | down_read(&swap_unplug_sem); | ||
104 | entry.val = page_private(page); | ||
105 | if (PageSwapCache(page)) { | ||
106 | struct block_device *bdev = swap_info[swp_type(entry)]->bdev; | ||
107 | struct backing_dev_info *bdi; | ||
108 | |||
109 | /* | ||
110 | * If the page is removed from swapcache from under us (with a | ||
111 | * racy try_to_unuse/swapoff) we need an additional reference | ||
112 | * count to avoid reading garbage from page_private(page) above. | ||
113 | * If the WARN_ON triggers during a swapoff it maybe the race | ||
114 | * condition and it's harmless. However if it triggers without | ||
115 | * swapoff it signals a problem. | ||
116 | */ | ||
117 | WARN_ON(page_count(page) <= 1); | ||
118 | |||
119 | bdi = bdev->bd_inode->i_mapping->backing_dev_info; | ||
120 | blk_run_backing_dev(bdi, page); | ||
121 | } | ||
122 | up_read(&swap_unplug_sem); | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * swapon tell device that all the old swap contents can be discarded, | 99 | * swapon tell device that all the old swap contents can be discarded, |
127 | * to allow the swap device to optimize its wear-levelling. | 100 | * to allow the swap device to optimize its wear-levelling. |
128 | */ | 101 | */ |
@@ -139,7 +112,7 @@ static int discard_swap(struct swap_info_struct *si) | |||
139 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); | 112 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); |
140 | if (nr_blocks) { | 113 | if (nr_blocks) { |
141 | err = blkdev_issue_discard(si->bdev, start_block, | 114 | err = blkdev_issue_discard(si->bdev, start_block, |
142 | nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); | 115 | nr_blocks, GFP_KERNEL, 0); |
143 | if (err) | 116 | if (err) |
144 | return err; | 117 | return err; |
145 | cond_resched(); | 118 | cond_resched(); |
@@ -150,7 +123,7 @@ static int discard_swap(struct swap_info_struct *si) | |||
150 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); | 123 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); |
151 | 124 | ||
152 | err = blkdev_issue_discard(si->bdev, start_block, | 125 | err = blkdev_issue_discard(si->bdev, start_block, |
153 | nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); | 126 | nr_blocks, GFP_KERNEL, 0); |
154 | if (err) | 127 | if (err) |
155 | break; | 128 | break; |
156 | 129 | ||
@@ -189,7 +162,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, | |||
189 | start_block <<= PAGE_SHIFT - 9; | 162 | start_block <<= PAGE_SHIFT - 9; |
190 | nr_blocks <<= PAGE_SHIFT - 9; | 163 | nr_blocks <<= PAGE_SHIFT - 9; |
191 | if (blkdev_issue_discard(si->bdev, start_block, | 164 | if (blkdev_issue_discard(si->bdev, start_block, |
192 | nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT)) | 165 | nr_blocks, GFP_NOIO, 0)) |
193 | break; | 166 | break; |
194 | } | 167 | } |
195 | 168 | ||
@@ -207,8 +180,8 @@ static int wait_for_discard(void *word) | |||
207 | #define SWAPFILE_CLUSTER 256 | 180 | #define SWAPFILE_CLUSTER 256 |
208 | #define LATENCY_LIMIT 256 | 181 | #define LATENCY_LIMIT 256 |
209 | 182 | ||
210 | static inline unsigned long scan_swap_map(struct swap_info_struct *si, | 183 | static unsigned long scan_swap_map(struct swap_info_struct *si, |
211 | unsigned char usage) | 184 | unsigned char usage) |
212 | { | 185 | { |
213 | unsigned long offset; | 186 | unsigned long offset; |
214 | unsigned long scan_base; | 187 | unsigned long scan_base; |
@@ -875,7 +848,7 @@ unsigned int count_swap_pages(int type, int free) | |||
875 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | 848 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, |
876 | unsigned long addr, swp_entry_t entry, struct page *page) | 849 | unsigned long addr, swp_entry_t entry, struct page *page) |
877 | { | 850 | { |
878 | struct mem_cgroup *ptr = NULL; | 851 | struct mem_cgroup *ptr; |
879 | spinlock_t *ptl; | 852 | spinlock_t *ptl; |
880 | pte_t *pte; | 853 | pte_t *pte; |
881 | int ret = 1; | 854 | int ret = 1; |
@@ -959,6 +932,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
959 | pmd = pmd_offset(pud, addr); | 932 | pmd = pmd_offset(pud, addr); |
960 | do { | 933 | do { |
961 | next = pmd_addr_end(addr, end); | 934 | next = pmd_addr_end(addr, end); |
935 | if (unlikely(pmd_trans_huge(*pmd))) | ||
936 | continue; | ||
962 | if (pmd_none_or_clear_bad(pmd)) | 937 | if (pmd_none_or_clear_bad(pmd)) |
963 | continue; | 938 | continue; |
964 | ret = unuse_pte_range(vma, pmd, addr, next, entry, page); | 939 | ret = unuse_pte_range(vma, pmd, addr, next, entry, page); |
@@ -1543,6 +1518,36 @@ bad_bmap: | |||
1543 | goto out; | 1518 | goto out; |
1544 | } | 1519 | } |
1545 | 1520 | ||
1521 | static void enable_swap_info(struct swap_info_struct *p, int prio, | ||
1522 | unsigned char *swap_map) | ||
1523 | { | ||
1524 | int i, prev; | ||
1525 | |||
1526 | spin_lock(&swap_lock); | ||
1527 | if (prio >= 0) | ||
1528 | p->prio = prio; | ||
1529 | else | ||
1530 | p->prio = --least_priority; | ||
1531 | p->swap_map = swap_map; | ||
1532 | p->flags |= SWP_WRITEOK; | ||
1533 | nr_swap_pages += p->pages; | ||
1534 | total_swap_pages += p->pages; | ||
1535 | |||
1536 | /* insert swap space into swap_list: */ | ||
1537 | prev = -1; | ||
1538 | for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { | ||
1539 | if (p->prio >= swap_info[i]->prio) | ||
1540 | break; | ||
1541 | prev = i; | ||
1542 | } | ||
1543 | p->next = i; | ||
1544 | if (prev < 0) | ||
1545 | swap_list.head = swap_list.next = p->type; | ||
1546 | else | ||
1547 | swap_info[prev]->next = p->type; | ||
1548 | spin_unlock(&swap_lock); | ||
1549 | } | ||
1550 | |||
1546 | SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | 1551 | SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) |
1547 | { | 1552 | { |
1548 | struct swap_info_struct *p = NULL; | 1553 | struct swap_info_struct *p = NULL; |
@@ -1551,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1551 | struct address_space *mapping; | 1556 | struct address_space *mapping; |
1552 | struct inode *inode; | 1557 | struct inode *inode; |
1553 | char *pathname; | 1558 | char *pathname; |
1559 | int oom_score_adj; | ||
1554 | int i, type, prev; | 1560 | int i, type, prev; |
1555 | int err; | 1561 | int err; |
1556 | 1562 | ||
@@ -1609,37 +1615,22 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1609 | p->flags &= ~SWP_WRITEOK; | 1615 | p->flags &= ~SWP_WRITEOK; |
1610 | spin_unlock(&swap_lock); | 1616 | spin_unlock(&swap_lock); |
1611 | 1617 | ||
1612 | current->flags |= PF_OOM_ORIGIN; | 1618 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); |
1613 | err = try_to_unuse(type); | 1619 | err = try_to_unuse(type); |
1614 | current->flags &= ~PF_OOM_ORIGIN; | 1620 | test_set_oom_score_adj(oom_score_adj); |
1615 | 1621 | ||
1616 | if (err) { | 1622 | if (err) { |
1623 | /* | ||
1624 | * reading p->prio and p->swap_map outside the lock is | ||
1625 | * safe here because only sys_swapon and sys_swapoff | ||
1626 | * change them, and there can be no other sys_swapon or | ||
1627 | * sys_swapoff for this swap_info_struct at this point. | ||
1628 | */ | ||
1617 | /* re-insert swap space back into swap_list */ | 1629 | /* re-insert swap space back into swap_list */ |
1618 | spin_lock(&swap_lock); | 1630 | enable_swap_info(p, p->prio, p->swap_map); |
1619 | if (p->prio < 0) | ||
1620 | p->prio = --least_priority; | ||
1621 | prev = -1; | ||
1622 | for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { | ||
1623 | if (p->prio >= swap_info[i]->prio) | ||
1624 | break; | ||
1625 | prev = i; | ||
1626 | } | ||
1627 | p->next = i; | ||
1628 | if (prev < 0) | ||
1629 | swap_list.head = swap_list.next = type; | ||
1630 | else | ||
1631 | swap_info[prev]->next = type; | ||
1632 | nr_swap_pages += p->pages; | ||
1633 | total_swap_pages += p->pages; | ||
1634 | p->flags |= SWP_WRITEOK; | ||
1635 | spin_unlock(&swap_lock); | ||
1636 | goto out_dput; | 1631 | goto out_dput; |
1637 | } | 1632 | } |
1638 | 1633 | ||
1639 | /* wait for any unplug function to finish */ | ||
1640 | down_write(&swap_unplug_sem); | ||
1641 | up_write(&swap_unplug_sem); | ||
1642 | |||
1643 | destroy_swap_extents(p); | 1634 | destroy_swap_extents(p); |
1644 | if (p->flags & SWP_CONTINUED) | 1635 | if (p->flags & SWP_CONTINUED) |
1645 | free_swap_count_continuations(p); | 1636 | free_swap_count_continuations(p); |
@@ -1672,7 +1663,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1672 | if (S_ISBLK(inode->i_mode)) { | 1663 | if (S_ISBLK(inode->i_mode)) { |
1673 | struct block_device *bdev = I_BDEV(inode); | 1664 | struct block_device *bdev = I_BDEV(inode); |
1674 | set_blocksize(bdev, p->old_block_size); | 1665 | set_blocksize(bdev, p->old_block_size); |
1675 | bd_release(bdev); | 1666 | blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); |
1676 | } else { | 1667 | } else { |
1677 | mutex_lock(&inode->i_mutex); | 1668 | mutex_lock(&inode->i_mutex); |
1678 | inode->i_flags &= ~S_SWAPFILE; | 1669 | inode->i_flags &= ~S_SWAPFILE; |
@@ -1680,6 +1671,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1680 | } | 1671 | } |
1681 | filp_close(swap_file, NULL); | 1672 | filp_close(swap_file, NULL); |
1682 | err = 0; | 1673 | err = 0; |
1674 | atomic_inc(&proc_poll_event); | ||
1675 | wake_up_interruptible(&proc_poll_wait); | ||
1683 | 1676 | ||
1684 | out_dput: | 1677 | out_dput: |
1685 | filp_close(victim, NULL); | 1678 | filp_close(victim, NULL); |
@@ -1688,6 +1681,25 @@ out: | |||
1688 | } | 1681 | } |
1689 | 1682 | ||
1690 | #ifdef CONFIG_PROC_FS | 1683 | #ifdef CONFIG_PROC_FS |
1684 | struct proc_swaps { | ||
1685 | struct seq_file seq; | ||
1686 | int event; | ||
1687 | }; | ||
1688 | |||
1689 | static unsigned swaps_poll(struct file *file, poll_table *wait) | ||
1690 | { | ||
1691 | struct proc_swaps *s = file->private_data; | ||
1692 | |||
1693 | poll_wait(file, &proc_poll_wait, wait); | ||
1694 | |||
1695 | if (s->event != atomic_read(&proc_poll_event)) { | ||
1696 | s->event = atomic_read(&proc_poll_event); | ||
1697 | return POLLIN | POLLRDNORM | POLLERR | POLLPRI; | ||
1698 | } | ||
1699 | |||
1700 | return POLLIN | POLLRDNORM; | ||
1701 | } | ||
1702 | |||
1691 | /* iterator */ | 1703 | /* iterator */ |
1692 | static void *swap_start(struct seq_file *swap, loff_t *pos) | 1704 | static void *swap_start(struct seq_file *swap, loff_t *pos) |
1693 | { | 1705 | { |
@@ -1771,7 +1783,24 @@ static const struct seq_operations swaps_op = { | |||
1771 | 1783 | ||
1772 | static int swaps_open(struct inode *inode, struct file *file) | 1784 | static int swaps_open(struct inode *inode, struct file *file) |
1773 | { | 1785 | { |
1774 | return seq_open(file, &swaps_op); | 1786 | struct proc_swaps *s; |
1787 | int ret; | ||
1788 | |||
1789 | s = kmalloc(sizeof(struct proc_swaps), GFP_KERNEL); | ||
1790 | if (!s) | ||
1791 | return -ENOMEM; | ||
1792 | |||
1793 | file->private_data = s; | ||
1794 | |||
1795 | ret = seq_open(file, &swaps_op); | ||
1796 | if (ret) { | ||
1797 | kfree(s); | ||
1798 | return ret; | ||
1799 | } | ||
1800 | |||
1801 | s->seq.private = s; | ||
1802 | s->event = atomic_read(&proc_poll_event); | ||
1803 | return ret; | ||
1775 | } | 1804 | } |
1776 | 1805 | ||
1777 | static const struct file_operations proc_swaps_operations = { | 1806 | static const struct file_operations proc_swaps_operations = { |
@@ -1779,6 +1808,7 @@ static const struct file_operations proc_swaps_operations = { | |||
1779 | .read = seq_read, | 1808 | .read = seq_read, |
1780 | .llseek = seq_lseek, | 1809 | .llseek = seq_lseek, |
1781 | .release = seq_release, | 1810 | .release = seq_release, |
1811 | .poll = swaps_poll, | ||
1782 | }; | 1812 | }; |
1783 | 1813 | ||
1784 | static int __init procswaps_init(void) | 1814 | static int __init procswaps_init(void) |
@@ -1798,49 +1828,24 @@ static int __init max_swapfiles_check(void) | |||
1798 | late_initcall(max_swapfiles_check); | 1828 | late_initcall(max_swapfiles_check); |
1799 | #endif | 1829 | #endif |
1800 | 1830 | ||
1801 | /* | 1831 | static struct swap_info_struct *alloc_swap_info(void) |
1802 | * Written 01/25/92 by Simmule Turner, heavily changed by Linus. | ||
1803 | * | ||
1804 | * The swapon system call | ||
1805 | */ | ||
1806 | SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | ||
1807 | { | 1832 | { |
1808 | struct swap_info_struct *p; | 1833 | struct swap_info_struct *p; |
1809 | char *name = NULL; | ||
1810 | struct block_device *bdev = NULL; | ||
1811 | struct file *swap_file = NULL; | ||
1812 | struct address_space *mapping; | ||
1813 | unsigned int type; | 1834 | unsigned int type; |
1814 | int i, prev; | ||
1815 | int error; | ||
1816 | union swap_header *swap_header; | ||
1817 | unsigned int nr_good_pages; | ||
1818 | int nr_extents = 0; | ||
1819 | sector_t span; | ||
1820 | unsigned long maxpages; | ||
1821 | unsigned long swapfilepages; | ||
1822 | unsigned char *swap_map = NULL; | ||
1823 | struct page *page = NULL; | ||
1824 | struct inode *inode = NULL; | ||
1825 | int did_down = 0; | ||
1826 | |||
1827 | if (!capable(CAP_SYS_ADMIN)) | ||
1828 | return -EPERM; | ||
1829 | 1835 | ||
1830 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 1836 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
1831 | if (!p) | 1837 | if (!p) |
1832 | return -ENOMEM; | 1838 | return ERR_PTR(-ENOMEM); |
1833 | 1839 | ||
1834 | spin_lock(&swap_lock); | 1840 | spin_lock(&swap_lock); |
1835 | for (type = 0; type < nr_swapfiles; type++) { | 1841 | for (type = 0; type < nr_swapfiles; type++) { |
1836 | if (!(swap_info[type]->flags & SWP_USED)) | 1842 | if (!(swap_info[type]->flags & SWP_USED)) |
1837 | break; | 1843 | break; |
1838 | } | 1844 | } |
1839 | error = -EPERM; | ||
1840 | if (type >= MAX_SWAPFILES) { | 1845 | if (type >= MAX_SWAPFILES) { |
1841 | spin_unlock(&swap_lock); | 1846 | spin_unlock(&swap_lock); |
1842 | kfree(p); | 1847 | kfree(p); |
1843 | goto out; | 1848 | return ERR_PTR(-EPERM); |
1844 | } | 1849 | } |
1845 | if (type >= nr_swapfiles) { | 1850 | if (type >= nr_swapfiles) { |
1846 | p->type = type; | 1851 | p->type = type; |
@@ -1865,80 +1870,49 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1865 | p->next = -1; | 1870 | p->next = -1; |
1866 | spin_unlock(&swap_lock); | 1871 | spin_unlock(&swap_lock); |
1867 | 1872 | ||
1868 | name = getname(specialfile); | 1873 | return p; |
1869 | error = PTR_ERR(name); | 1874 | } |
1870 | if (IS_ERR(name)) { | ||
1871 | name = NULL; | ||
1872 | goto bad_swap_2; | ||
1873 | } | ||
1874 | swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0); | ||
1875 | error = PTR_ERR(swap_file); | ||
1876 | if (IS_ERR(swap_file)) { | ||
1877 | swap_file = NULL; | ||
1878 | goto bad_swap_2; | ||
1879 | } | ||
1880 | |||
1881 | p->swap_file = swap_file; | ||
1882 | mapping = swap_file->f_mapping; | ||
1883 | inode = mapping->host; | ||
1884 | |||
1885 | error = -EBUSY; | ||
1886 | for (i = 0; i < nr_swapfiles; i++) { | ||
1887 | struct swap_info_struct *q = swap_info[i]; | ||
1888 | 1875 | ||
1889 | if (i == type || !q->swap_file) | 1876 | static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) |
1890 | continue; | 1877 | { |
1891 | if (mapping == q->swap_file->f_mapping) | 1878 | int error; |
1892 | goto bad_swap; | ||
1893 | } | ||
1894 | 1879 | ||
1895 | error = -EINVAL; | ||
1896 | if (S_ISBLK(inode->i_mode)) { | 1880 | if (S_ISBLK(inode->i_mode)) { |
1897 | bdev = I_BDEV(inode); | 1881 | p->bdev = bdgrab(I_BDEV(inode)); |
1898 | error = bd_claim(bdev, sys_swapon); | 1882 | error = blkdev_get(p->bdev, |
1883 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, | ||
1884 | sys_swapon); | ||
1899 | if (error < 0) { | 1885 | if (error < 0) { |
1900 | bdev = NULL; | 1886 | p->bdev = NULL; |
1901 | error = -EINVAL; | 1887 | return -EINVAL; |
1902 | goto bad_swap; | ||
1903 | } | 1888 | } |
1904 | p->old_block_size = block_size(bdev); | 1889 | p->old_block_size = block_size(p->bdev); |
1905 | error = set_blocksize(bdev, PAGE_SIZE); | 1890 | error = set_blocksize(p->bdev, PAGE_SIZE); |
1906 | if (error < 0) | 1891 | if (error < 0) |
1907 | goto bad_swap; | 1892 | return error; |
1908 | p->bdev = bdev; | ||
1909 | p->flags |= SWP_BLKDEV; | 1893 | p->flags |= SWP_BLKDEV; |
1910 | } else if (S_ISREG(inode->i_mode)) { | 1894 | } else if (S_ISREG(inode->i_mode)) { |
1911 | p->bdev = inode->i_sb->s_bdev; | 1895 | p->bdev = inode->i_sb->s_bdev; |
1912 | mutex_lock(&inode->i_mutex); | 1896 | mutex_lock(&inode->i_mutex); |
1913 | did_down = 1; | 1897 | if (IS_SWAPFILE(inode)) |
1914 | if (IS_SWAPFILE(inode)) { | 1898 | return -EBUSY; |
1915 | error = -EBUSY; | 1899 | } else |
1916 | goto bad_swap; | 1900 | return -EINVAL; |
1917 | } | ||
1918 | } else { | ||
1919 | goto bad_swap; | ||
1920 | } | ||
1921 | 1901 | ||
1922 | swapfilepages = i_size_read(inode) >> PAGE_SHIFT; | 1902 | return 0; |
1903 | } | ||
1923 | 1904 | ||
1924 | /* | 1905 | static unsigned long read_swap_header(struct swap_info_struct *p, |
1925 | * Read the swap header. | 1906 | union swap_header *swap_header, |
1926 | */ | 1907 | struct inode *inode) |
1927 | if (!mapping->a_ops->readpage) { | 1908 | { |
1928 | error = -EINVAL; | 1909 | int i; |
1929 | goto bad_swap; | 1910 | unsigned long maxpages; |
1930 | } | 1911 | unsigned long swapfilepages; |
1931 | page = read_mapping_page(mapping, 0, swap_file); | ||
1932 | if (IS_ERR(page)) { | ||
1933 | error = PTR_ERR(page); | ||
1934 | goto bad_swap; | ||
1935 | } | ||
1936 | swap_header = kmap(page); | ||
1937 | 1912 | ||
1938 | if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { | 1913 | if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { |
1939 | printk(KERN_ERR "Unable to find swap-space signature\n"); | 1914 | printk(KERN_ERR "Unable to find swap-space signature\n"); |
1940 | error = -EINVAL; | 1915 | return 0; |
1941 | goto bad_swap; | ||
1942 | } | 1916 | } |
1943 | 1917 | ||
1944 | /* swap partition endianess hack... */ | 1918 | /* swap partition endianess hack... */ |
@@ -1954,8 +1928,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1954 | printk(KERN_WARNING | 1928 | printk(KERN_WARNING |
1955 | "Unable to handle swap header version %d\n", | 1929 | "Unable to handle swap header version %d\n", |
1956 | swap_header->info.version); | 1930 | swap_header->info.version); |
1957 | error = -EINVAL; | 1931 | return 0; |
1958 | goto bad_swap; | ||
1959 | } | 1932 | } |
1960 | 1933 | ||
1961 | p->lowest_bit = 1; | 1934 | p->lowest_bit = 1; |
@@ -1986,62 +1959,156 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1986 | } | 1959 | } |
1987 | p->highest_bit = maxpages - 1; | 1960 | p->highest_bit = maxpages - 1; |
1988 | 1961 | ||
1989 | error = -EINVAL; | ||
1990 | if (!maxpages) | 1962 | if (!maxpages) |
1991 | goto bad_swap; | 1963 | return 0; |
1964 | swapfilepages = i_size_read(inode) >> PAGE_SHIFT; | ||
1992 | if (swapfilepages && maxpages > swapfilepages) { | 1965 | if (swapfilepages && maxpages > swapfilepages) { |
1993 | printk(KERN_WARNING | 1966 | printk(KERN_WARNING |
1994 | "Swap area shorter than signature indicates\n"); | 1967 | "Swap area shorter than signature indicates\n"); |
1995 | goto bad_swap; | 1968 | return 0; |
1996 | } | 1969 | } |
1997 | if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) | 1970 | if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) |
1998 | goto bad_swap; | 1971 | return 0; |
1999 | if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) | 1972 | if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) |
2000 | goto bad_swap; | 1973 | return 0; |
2001 | 1974 | ||
2002 | /* OK, set up the swap map and apply the bad block list */ | 1975 | return maxpages; |
2003 | swap_map = vmalloc(maxpages); | 1976 | } |
2004 | if (!swap_map) { | 1977 | |
2005 | error = -ENOMEM; | 1978 | static int setup_swap_map_and_extents(struct swap_info_struct *p, |
2006 | goto bad_swap; | 1979 | union swap_header *swap_header, |
2007 | } | 1980 | unsigned char *swap_map, |
1981 | unsigned long maxpages, | ||
1982 | sector_t *span) | ||
1983 | { | ||
1984 | int i; | ||
1985 | unsigned int nr_good_pages; | ||
1986 | int nr_extents; | ||
2008 | 1987 | ||
2009 | memset(swap_map, 0, maxpages); | ||
2010 | nr_good_pages = maxpages - 1; /* omit header page */ | 1988 | nr_good_pages = maxpages - 1; /* omit header page */ |
2011 | 1989 | ||
2012 | for (i = 0; i < swap_header->info.nr_badpages; i++) { | 1990 | for (i = 0; i < swap_header->info.nr_badpages; i++) { |
2013 | unsigned int page_nr = swap_header->info.badpages[i]; | 1991 | unsigned int page_nr = swap_header->info.badpages[i]; |
2014 | if (page_nr == 0 || page_nr > swap_header->info.last_page) { | 1992 | if (page_nr == 0 || page_nr > swap_header->info.last_page) |
2015 | error = -EINVAL; | 1993 | return -EINVAL; |
2016 | goto bad_swap; | ||
2017 | } | ||
2018 | if (page_nr < maxpages) { | 1994 | if (page_nr < maxpages) { |
2019 | swap_map[page_nr] = SWAP_MAP_BAD; | 1995 | swap_map[page_nr] = SWAP_MAP_BAD; |
2020 | nr_good_pages--; | 1996 | nr_good_pages--; |
2021 | } | 1997 | } |
2022 | } | 1998 | } |
2023 | 1999 | ||
2024 | error = swap_cgroup_swapon(type, maxpages); | ||
2025 | if (error) | ||
2026 | goto bad_swap; | ||
2027 | |||
2028 | if (nr_good_pages) { | 2000 | if (nr_good_pages) { |
2029 | swap_map[0] = SWAP_MAP_BAD; | 2001 | swap_map[0] = SWAP_MAP_BAD; |
2030 | p->max = maxpages; | 2002 | p->max = maxpages; |
2031 | p->pages = nr_good_pages; | 2003 | p->pages = nr_good_pages; |
2032 | nr_extents = setup_swap_extents(p, &span); | 2004 | nr_extents = setup_swap_extents(p, span); |
2033 | if (nr_extents < 0) { | 2005 | if (nr_extents < 0) |
2034 | error = nr_extents; | 2006 | return nr_extents; |
2035 | goto bad_swap; | ||
2036 | } | ||
2037 | nr_good_pages = p->pages; | 2007 | nr_good_pages = p->pages; |
2038 | } | 2008 | } |
2039 | if (!nr_good_pages) { | 2009 | if (!nr_good_pages) { |
2040 | printk(KERN_WARNING "Empty swap-file\n"); | 2010 | printk(KERN_WARNING "Empty swap-file\n"); |
2011 | return -EINVAL; | ||
2012 | } | ||
2013 | |||
2014 | return nr_extents; | ||
2015 | } | ||
2016 | |||
2017 | SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | ||
2018 | { | ||
2019 | struct swap_info_struct *p; | ||
2020 | char *name; | ||
2021 | struct file *swap_file = NULL; | ||
2022 | struct address_space *mapping; | ||
2023 | int i; | ||
2024 | int prio; | ||
2025 | int error; | ||
2026 | union swap_header *swap_header; | ||
2027 | int nr_extents; | ||
2028 | sector_t span; | ||
2029 | unsigned long maxpages; | ||
2030 | unsigned char *swap_map = NULL; | ||
2031 | struct page *page = NULL; | ||
2032 | struct inode *inode = NULL; | ||
2033 | |||
2034 | if (!capable(CAP_SYS_ADMIN)) | ||
2035 | return -EPERM; | ||
2036 | |||
2037 | p = alloc_swap_info(); | ||
2038 | if (IS_ERR(p)) | ||
2039 | return PTR_ERR(p); | ||
2040 | |||
2041 | name = getname(specialfile); | ||
2042 | if (IS_ERR(name)) { | ||
2043 | error = PTR_ERR(name); | ||
2044 | name = NULL; | ||
2045 | goto bad_swap; | ||
2046 | } | ||
2047 | swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0); | ||
2048 | if (IS_ERR(swap_file)) { | ||
2049 | error = PTR_ERR(swap_file); | ||
2050 | swap_file = NULL; | ||
2051 | goto bad_swap; | ||
2052 | } | ||
2053 | |||
2054 | p->swap_file = swap_file; | ||
2055 | mapping = swap_file->f_mapping; | ||
2056 | |||
2057 | for (i = 0; i < nr_swapfiles; i++) { | ||
2058 | struct swap_info_struct *q = swap_info[i]; | ||
2059 | |||
2060 | if (q == p || !q->swap_file) | ||
2061 | continue; | ||
2062 | if (mapping == q->swap_file->f_mapping) { | ||
2063 | error = -EBUSY; | ||
2064 | goto bad_swap; | ||
2065 | } | ||
2066 | } | ||
2067 | |||
2068 | inode = mapping->host; | ||
2069 | /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ | ||
2070 | error = claim_swapfile(p, inode); | ||
2071 | if (unlikely(error)) | ||
2072 | goto bad_swap; | ||
2073 | |||
2074 | /* | ||
2075 | * Read the swap header. | ||
2076 | */ | ||
2077 | if (!mapping->a_ops->readpage) { | ||
2078 | error = -EINVAL; | ||
2079 | goto bad_swap; | ||
2080 | } | ||
2081 | page = read_mapping_page(mapping, 0, swap_file); | ||
2082 | if (IS_ERR(page)) { | ||
2083 | error = PTR_ERR(page); | ||
2084 | goto bad_swap; | ||
2085 | } | ||
2086 | swap_header = kmap(page); | ||
2087 | |||
2088 | maxpages = read_swap_header(p, swap_header, inode); | ||
2089 | if (unlikely(!maxpages)) { | ||
2041 | error = -EINVAL; | 2090 | error = -EINVAL; |
2042 | goto bad_swap; | 2091 | goto bad_swap; |
2043 | } | 2092 | } |
2044 | 2093 | ||
2094 | /* OK, set up the swap map and apply the bad block list */ | ||
2095 | swap_map = vzalloc(maxpages); | ||
2096 | if (!swap_map) { | ||
2097 | error = -ENOMEM; | ||
2098 | goto bad_swap; | ||
2099 | } | ||
2100 | |||
2101 | error = swap_cgroup_swapon(p->type, maxpages); | ||
2102 | if (error) | ||
2103 | goto bad_swap; | ||
2104 | |||
2105 | nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, | ||
2106 | maxpages, &span); | ||
2107 | if (unlikely(nr_extents < 0)) { | ||
2108 | error = nr_extents; | ||
2109 | goto bad_swap; | ||
2110 | } | ||
2111 | |||
2045 | if (p->bdev) { | 2112 | if (p->bdev) { |
2046 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2113 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
2047 | p->flags |= SWP_SOLIDSTATE; | 2114 | p->flags |= SWP_SOLIDSTATE; |
@@ -2052,55 +2119,46 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2052 | } | 2119 | } |
2053 | 2120 | ||
2054 | mutex_lock(&swapon_mutex); | 2121 | mutex_lock(&swapon_mutex); |
2055 | spin_lock(&swap_lock); | 2122 | prio = -1; |
2056 | if (swap_flags & SWAP_FLAG_PREFER) | 2123 | if (swap_flags & SWAP_FLAG_PREFER) |
2057 | p->prio = | 2124 | prio = |
2058 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; | 2125 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; |
2059 | else | 2126 | enable_swap_info(p, prio, swap_map); |
2060 | p->prio = --least_priority; | ||
2061 | p->swap_map = swap_map; | ||
2062 | p->flags |= SWP_WRITEOK; | ||
2063 | nr_swap_pages += nr_good_pages; | ||
2064 | total_swap_pages += nr_good_pages; | ||
2065 | 2127 | ||
2066 | printk(KERN_INFO "Adding %uk swap on %s. " | 2128 | printk(KERN_INFO "Adding %uk swap on %s. " |
2067 | "Priority:%d extents:%d across:%lluk %s%s\n", | 2129 | "Priority:%d extents:%d across:%lluk %s%s\n", |
2068 | nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, | 2130 | p->pages<<(PAGE_SHIFT-10), name, p->prio, |
2069 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), | 2131 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), |
2070 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", | 2132 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", |
2071 | (p->flags & SWP_DISCARDABLE) ? "D" : ""); | 2133 | (p->flags & SWP_DISCARDABLE) ? "D" : ""); |
2072 | 2134 | ||
2073 | /* insert swap space into swap_list: */ | ||
2074 | prev = -1; | ||
2075 | for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { | ||
2076 | if (p->prio >= swap_info[i]->prio) | ||
2077 | break; | ||
2078 | prev = i; | ||
2079 | } | ||
2080 | p->next = i; | ||
2081 | if (prev < 0) | ||
2082 | swap_list.head = swap_list.next = type; | ||
2083 | else | ||
2084 | swap_info[prev]->next = type; | ||
2085 | spin_unlock(&swap_lock); | ||
2086 | mutex_unlock(&swapon_mutex); | 2135 | mutex_unlock(&swapon_mutex); |
2136 | atomic_inc(&proc_poll_event); | ||
2137 | wake_up_interruptible(&proc_poll_wait); | ||
2138 | |||
2139 | if (S_ISREG(inode->i_mode)) | ||
2140 | inode->i_flags |= S_SWAPFILE; | ||
2087 | error = 0; | 2141 | error = 0; |
2088 | goto out; | 2142 | goto out; |
2089 | bad_swap: | 2143 | bad_swap: |
2090 | if (bdev) { | 2144 | if (inode && S_ISBLK(inode->i_mode) && p->bdev) { |
2091 | set_blocksize(bdev, p->old_block_size); | 2145 | set_blocksize(p->bdev, p->old_block_size); |
2092 | bd_release(bdev); | 2146 | blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); |
2093 | } | 2147 | } |
2094 | destroy_swap_extents(p); | 2148 | destroy_swap_extents(p); |
2095 | swap_cgroup_swapoff(type); | 2149 | swap_cgroup_swapoff(p->type); |
2096 | bad_swap_2: | ||
2097 | spin_lock(&swap_lock); | 2150 | spin_lock(&swap_lock); |
2098 | p->swap_file = NULL; | 2151 | p->swap_file = NULL; |
2099 | p->flags = 0; | 2152 | p->flags = 0; |
2100 | spin_unlock(&swap_lock); | 2153 | spin_unlock(&swap_lock); |
2101 | vfree(swap_map); | 2154 | vfree(swap_map); |
2102 | if (swap_file) | 2155 | if (swap_file) { |
2156 | if (inode && S_ISREG(inode->i_mode)) { | ||
2157 | mutex_unlock(&inode->i_mutex); | ||
2158 | inode = NULL; | ||
2159 | } | ||
2103 | filp_close(swap_file, NULL); | 2160 | filp_close(swap_file, NULL); |
2161 | } | ||
2104 | out: | 2162 | out: |
2105 | if (page && !IS_ERR(page)) { | 2163 | if (page && !IS_ERR(page)) { |
2106 | kunmap(page); | 2164 | kunmap(page); |
@@ -2108,11 +2166,8 @@ out: | |||
2108 | } | 2166 | } |
2109 | if (name) | 2167 | if (name) |
2110 | putname(name); | 2168 | putname(name); |
2111 | if (did_down) { | 2169 | if (inode && S_ISREG(inode->i_mode)) |
2112 | if (!error) | ||
2113 | inode->i_flags |= S_SWAPFILE; | ||
2114 | mutex_unlock(&inode->i_mutex); | 2170 | mutex_unlock(&inode->i_mutex); |
2115 | } | ||
2116 | return error; | 2171 | return error; |
2117 | } | 2172 | } |
2118 | 2173 | ||