diff options
| -rw-r--r-- | Documentation/kernel-parameters.txt | 8 | ||||
| -rw-r--r-- | arch/x86/mm/numa.c | 21 | ||||
| -rw-r--r-- | fs/buffer.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.c | 38 | ||||
| -rw-r--r-- | fs/ocfs2/localalloc.c | 42 | ||||
| -rw-r--r-- | fs/ocfs2/localalloc.h | 6 | ||||
| -rw-r--r-- | include/linux/page-flags.h | 4 | ||||
| -rw-r--r-- | mm/page-writeback.c | 5 | ||||
| -rw-r--r-- | mm/swap_state.c | 63 | ||||
| -rw-r--r-- | mm/swapfile.c | 11 |
10 files changed, 178 insertions, 26 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8f441dab0396..7116fda7077f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1726,16 +1726,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1726 | option description. | 1726 | option description. |
| 1727 | 1727 | ||
| 1728 | memmap=nn[KMG]@ss[KMG] | 1728 | memmap=nn[KMG]@ss[KMG] |
| 1729 | [KNL] Force usage of a specific region of memory | 1729 | [KNL] Force usage of a specific region of memory. |
| 1730 | Region of memory to be used, from ss to ss+nn. | 1730 | Region of memory to be used is from ss to ss+nn. |
| 1731 | 1731 | ||
| 1732 | memmap=nn[KMG]#ss[KMG] | 1732 | memmap=nn[KMG]#ss[KMG] |
| 1733 | [KNL,ACPI] Mark specific memory as ACPI data. | 1733 | [KNL,ACPI] Mark specific memory as ACPI data. |
| 1734 | Region of memory to be used, from ss to ss+nn. | 1734 | Region of memory to be marked is from ss to ss+nn. |
| 1735 | 1735 | ||
| 1736 | memmap=nn[KMG]$ss[KMG] | 1736 | memmap=nn[KMG]$ss[KMG] |
| 1737 | [KNL,ACPI] Mark specific memory as reserved. | 1737 | [KNL,ACPI] Mark specific memory as reserved. |
| 1738 | Region of memory to be used, from ss to ss+nn. | 1738 | Region of memory to be reserved is from ss to ss+nn. |
| 1739 | Example: Exclude memory from 0x18690000-0x1869ffff | 1739 | Example: Exclude memory from 0x18690000-0x1869ffff |
| 1740 | memmap=64K$0x18690000 | 1740 | memmap=64K$0x18690000 |
| 1741 | or | 1741 | or |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 81b2750f3666..27aa0455fab3 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
| @@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
| 493 | struct numa_memblk *mb = &mi->blk[i]; | 493 | struct numa_memblk *mb = &mi->blk[i]; |
| 494 | memblock_set_node(mb->start, mb->end - mb->start, | 494 | memblock_set_node(mb->start, mb->end - mb->start, |
| 495 | &memblock.memory, mb->nid); | 495 | &memblock.memory, mb->nid); |
| 496 | |||
| 497 | /* | ||
| 498 | * At this time, all memory regions reserved by memblock are | ||
| 499 | * used by the kernel. Set the nid in memblock.reserved will | ||
| 500 | * mark out all the nodes the kernel resides in. | ||
| 501 | */ | ||
| 502 | memblock_set_node(mb->start, mb->end - mb->start, | ||
| 503 | &memblock.reserved, mb->nid); | ||
| 504 | } | 496 | } |
| 505 | 497 | ||
| 506 | /* | 498 | /* |
| @@ -565,10 +557,21 @@ static void __init numa_init_array(void) | |||
| 565 | static void __init numa_clear_kernel_node_hotplug(void) | 557 | static void __init numa_clear_kernel_node_hotplug(void) |
| 566 | { | 558 | { |
| 567 | int i, nid; | 559 | int i, nid; |
| 568 | nodemask_t numa_kernel_nodes; | 560 | nodemask_t numa_kernel_nodes = NODE_MASK_NONE; |
| 569 | unsigned long start, end; | 561 | unsigned long start, end; |
| 570 | struct memblock_type *type = &memblock.reserved; | 562 | struct memblock_type *type = &memblock.reserved; |
| 571 | 563 | ||
| 564 | /* | ||
| 565 | * At this time, all memory regions reserved by memblock are | ||
| 566 | * used by the kernel. Set the nid in memblock.reserved will | ||
| 567 | * mark out all the nodes the kernel resides in. | ||
| 568 | */ | ||
| 569 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | ||
| 570 | struct numa_memblk *mb = &numa_meminfo.blk[i]; | ||
| 571 | memblock_set_node(mb->start, mb->end - mb->start, | ||
| 572 | &memblock.reserved, mb->nid); | ||
| 573 | } | ||
| 574 | |||
| 572 | /* Mark all kernel nodes. */ | 575 | /* Mark all kernel nodes. */ |
| 573 | for (i = 0; i < type->cnt; i++) | 576 | for (i = 0; i < type->cnt; i++) |
| 574 | node_set(type->regions[i].nid, numa_kernel_nodes); | 577 | node_set(type->regions[i].nid, numa_kernel_nodes); |
diff --git a/fs/buffer.c b/fs/buffer.c index 651dba10b9c2..27265a8b43c1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
| 654 | static void __set_page_dirty(struct page *page, | 654 | static void __set_page_dirty(struct page *page, |
| 655 | struct address_space *mapping, int warn) | 655 | struct address_space *mapping, int warn) |
| 656 | { | 656 | { |
| 657 | spin_lock_irq(&mapping->tree_lock); | 657 | unsigned long flags; |
| 658 | |||
| 659 | spin_lock_irqsave(&mapping->tree_lock, flags); | ||
| 658 | if (page->mapping) { /* Race with truncate? */ | 660 | if (page->mapping) { /* Race with truncate? */ |
| 659 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 661 | WARN_ON_ONCE(warn && !PageUptodate(page)); |
| 660 | account_page_dirtied(page, mapping); | 662 | account_page_dirtied(page, mapping); |
| 661 | radix_tree_tag_set(&mapping->page_tree, | 663 | radix_tree_tag_set(&mapping->page_tree, |
| 662 | page_index(page), PAGECACHE_TAG_DIRTY); | 664 | page_index(page), PAGECACHE_TAG_DIRTY); |
| 663 | } | 665 | } |
| 664 | spin_unlock_irq(&mapping->tree_lock); | 666 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 665 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 667 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
| 666 | } | 668 | } |
| 667 | 669 | ||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 8750ae1b8636..aada5801567a 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, | |||
| 4742 | enum ocfs2_alloc_restarted *reason_ret) | 4742 | enum ocfs2_alloc_restarted *reason_ret) |
| 4743 | { | 4743 | { |
| 4744 | int status = 0, err = 0; | 4744 | int status = 0, err = 0; |
| 4745 | int need_free = 0; | ||
| 4745 | int free_extents; | 4746 | int free_extents; |
| 4746 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | 4747 | enum ocfs2_alloc_restarted reason = RESTART_NONE; |
| 4747 | u32 bit_off, num_bits; | 4748 | u32 bit_off, num_bits; |
| @@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, | |||
| 4796 | OCFS2_JOURNAL_ACCESS_WRITE); | 4797 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 4797 | if (status < 0) { | 4798 | if (status < 0) { |
| 4798 | mlog_errno(status); | 4799 | mlog_errno(status); |
| 4799 | goto leave; | 4800 | need_free = 1; |
| 4801 | goto bail; | ||
| 4800 | } | 4802 | } |
| 4801 | 4803 | ||
| 4802 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | 4804 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); |
| @@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, | |||
| 4807 | num_bits, flags, meta_ac); | 4809 | num_bits, flags, meta_ac); |
| 4808 | if (status < 0) { | 4810 | if (status < 0) { |
| 4809 | mlog_errno(status); | 4811 | mlog_errno(status); |
| 4810 | goto leave; | 4812 | need_free = 1; |
| 4813 | goto bail; | ||
| 4811 | } | 4814 | } |
| 4812 | 4815 | ||
| 4813 | ocfs2_journal_dirty(handle, et->et_root_bh); | 4816 | ocfs2_journal_dirty(handle, et->et_root_bh); |
| @@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, | |||
| 4821 | reason = RESTART_TRANS; | 4824 | reason = RESTART_TRANS; |
| 4822 | } | 4825 | } |
| 4823 | 4826 | ||
| 4827 | bail: | ||
| 4828 | if (need_free) { | ||
| 4829 | if (data_ac->ac_which == OCFS2_AC_USE_LOCAL) | ||
| 4830 | ocfs2_free_local_alloc_bits(osb, handle, data_ac, | ||
| 4831 | bit_off, num_bits); | ||
| 4832 | else | ||
| 4833 | ocfs2_free_clusters(handle, | ||
| 4834 | data_ac->ac_inode, | ||
| 4835 | data_ac->ac_bh, | ||
| 4836 | ocfs2_clusters_to_blocks(osb->sb, bit_off), | ||
| 4837 | num_bits); | ||
| 4838 | } | ||
| 4839 | |||
| 4824 | leave: | 4840 | leave: |
| 4825 | if (reason_ret) | 4841 | if (reason_ret) |
| 4826 | *reason_ret = reason; | 4842 | *reason_ret = reason; |
| @@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6805 | struct buffer_head *di_bh) | 6821 | struct buffer_head *di_bh) |
| 6806 | { | 6822 | { |
| 6807 | int ret, i, has_data, num_pages = 0; | 6823 | int ret, i, has_data, num_pages = 0; |
| 6824 | int need_free = 0; | ||
| 6825 | u32 bit_off, num; | ||
| 6808 | handle_t *handle; | 6826 | handle_t *handle; |
| 6809 | u64 uninitialized_var(block); | 6827 | u64 uninitialized_var(block); |
| 6810 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 6828 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
| @@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6850 | } | 6868 | } |
| 6851 | 6869 | ||
| 6852 | if (has_data) { | 6870 | if (has_data) { |
| 6853 | u32 bit_off, num; | ||
| 6854 | unsigned int page_end; | 6871 | unsigned int page_end; |
| 6855 | u64 phys; | 6872 | u64 phys; |
| 6856 | 6873 | ||
| @@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6886 | ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); | 6903 | ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); |
| 6887 | if (ret) { | 6904 | if (ret) { |
| 6888 | mlog_errno(ret); | 6905 | mlog_errno(ret); |
| 6906 | need_free = 1; | ||
| 6889 | goto out_commit; | 6907 | goto out_commit; |
| 6890 | } | 6908 | } |
| 6891 | 6909 | ||
| @@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6896 | ret = ocfs2_read_inline_data(inode, pages[0], di_bh); | 6914 | ret = ocfs2_read_inline_data(inode, pages[0], di_bh); |
| 6897 | if (ret) { | 6915 | if (ret) { |
| 6898 | mlog_errno(ret); | 6916 | mlog_errno(ret); |
| 6917 | need_free = 1; | ||
| 6899 | goto out_commit; | 6918 | goto out_commit; |
| 6900 | } | 6919 | } |
| 6901 | 6920 | ||
| @@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6927 | ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL); | 6946 | ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL); |
| 6928 | if (ret) { | 6947 | if (ret) { |
| 6929 | mlog_errno(ret); | 6948 | mlog_errno(ret); |
| 6949 | need_free = 1; | ||
| 6930 | goto out_commit; | 6950 | goto out_commit; |
| 6931 | } | 6951 | } |
| 6932 | 6952 | ||
| @@ -6938,6 +6958,18 @@ out_commit: | |||
| 6938 | dquot_free_space_nodirty(inode, | 6958 | dquot_free_space_nodirty(inode, |
| 6939 | ocfs2_clusters_to_bytes(osb->sb, 1)); | 6959 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
| 6940 | 6960 | ||
| 6961 | if (need_free) { | ||
| 6962 | if (data_ac->ac_which == OCFS2_AC_USE_LOCAL) | ||
| 6963 | ocfs2_free_local_alloc_bits(osb, handle, data_ac, | ||
| 6964 | bit_off, num); | ||
| 6965 | else | ||
| 6966 | ocfs2_free_clusters(handle, | ||
| 6967 | data_ac->ac_inode, | ||
| 6968 | data_ac->ac_bh, | ||
| 6969 | ocfs2_clusters_to_blocks(osb->sb, bit_off), | ||
| 6970 | num); | ||
| 6971 | } | ||
| 6972 | |||
| 6941 | ocfs2_commit_trans(osb, handle); | 6973 | ocfs2_commit_trans(osb, handle); |
| 6942 | 6974 | ||
| 6943 | out_unlock: | 6975 | out_unlock: |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index cd5496b7a0a3..044013455621 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -781,6 +781,48 @@ bail: | |||
| 781 | return status; | 781 | return status; |
| 782 | } | 782 | } |
| 783 | 783 | ||
| 784 | int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, | ||
| 785 | handle_t *handle, | ||
| 786 | struct ocfs2_alloc_context *ac, | ||
| 787 | u32 bit_off, | ||
| 788 | u32 num_bits) | ||
| 789 | { | ||
| 790 | int status, start; | ||
| 791 | u32 clear_bits; | ||
| 792 | struct inode *local_alloc_inode; | ||
| 793 | void *bitmap; | ||
| 794 | struct ocfs2_dinode *alloc; | ||
| 795 | struct ocfs2_local_alloc *la; | ||
| 796 | |||
| 797 | BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); | ||
| 798 | |||
| 799 | local_alloc_inode = ac->ac_inode; | ||
| 800 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | ||
| 801 | la = OCFS2_LOCAL_ALLOC(alloc); | ||
| 802 | |||
| 803 | bitmap = la->la_bitmap; | ||
| 804 | start = bit_off - le32_to_cpu(la->la_bm_off); | ||
| 805 | clear_bits = num_bits; | ||
| 806 | |||
| 807 | status = ocfs2_journal_access_di(handle, | ||
| 808 | INODE_CACHE(local_alloc_inode), | ||
| 809 | osb->local_alloc_bh, | ||
| 810 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 811 | if (status < 0) { | ||
| 812 | mlog_errno(status); | ||
| 813 | goto bail; | ||
| 814 | } | ||
| 815 | |||
| 816 | while (clear_bits--) | ||
| 817 | ocfs2_clear_bit(start++, bitmap); | ||
| 818 | |||
| 819 | le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); | ||
| 820 | ocfs2_journal_dirty(handle, osb->local_alloc_bh); | ||
| 821 | |||
| 822 | bail: | ||
| 823 | return status; | ||
| 824 | } | ||
| 825 | |||
| 784 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) | 826 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) |
| 785 | { | 827 | { |
| 786 | u32 count; | 828 | u32 count; |
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 1be9b5864460..44a7d1fb2dec 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h | |||
| @@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
| 55 | u32 *bit_off, | 55 | u32 *bit_off, |
| 56 | u32 *num_bits); | 56 | u32 *num_bits); |
| 57 | 57 | ||
| 58 | int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, | ||
| 59 | handle_t *handle, | ||
| 60 | struct ocfs2_alloc_context *ac, | ||
| 61 | u32 bit_off, | ||
| 62 | u32 num_bits); | ||
| 63 | |||
| 58 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, | 64 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, |
| 59 | unsigned int num_clusters); | 65 | unsigned int num_clusters); |
| 60 | void ocfs2_la_enable_worker(struct work_struct *work); | 66 | void ocfs2_la_enable_worker(struct work_struct *work); |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e464b4e987e8..d1fe1a761047 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
| @@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) | |||
| 228 | TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) | 228 | TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) |
| 229 | PAGEFLAG(MappedToDisk, mappedtodisk) | 229 | PAGEFLAG(MappedToDisk, mappedtodisk) |
| 230 | 230 | ||
| 231 | /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ | 231 | /* PG_readahead is only used for reads; PG_reclaim is only for writes */ |
| 232 | PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) | 232 | PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) |
| 233 | PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ | 233 | PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) |
| 234 | 234 | ||
| 235 | #ifdef CONFIG_HIGHMEM | 235 | #ifdef CONFIG_HIGHMEM |
| 236 | /* | 236 | /* |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2d30e2cfe804..7106cb1aca8e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
| 2173 | if (!TestSetPageDirty(page)) { | 2173 | if (!TestSetPageDirty(page)) { |
| 2174 | struct address_space *mapping = page_mapping(page); | 2174 | struct address_space *mapping = page_mapping(page); |
| 2175 | struct address_space *mapping2; | 2175 | struct address_space *mapping2; |
| 2176 | unsigned long flags; | ||
| 2176 | 2177 | ||
| 2177 | if (!mapping) | 2178 | if (!mapping) |
| 2178 | return 1; | 2179 | return 1; |
| 2179 | 2180 | ||
| 2180 | spin_lock_irq(&mapping->tree_lock); | 2181 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 2181 | mapping2 = page_mapping(page); | 2182 | mapping2 = page_mapping(page); |
| 2182 | if (mapping2) { /* Race with truncate? */ | 2183 | if (mapping2) { /* Race with truncate? */ |
| 2183 | BUG_ON(mapping2 != mapping); | 2184 | BUG_ON(mapping2 != mapping); |
| @@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
| 2186 | radix_tree_tag_set(&mapping->page_tree, | 2187 | radix_tree_tag_set(&mapping->page_tree, |
| 2187 | page_index(page), PAGECACHE_TAG_DIRTY); | 2188 | page_index(page), PAGECACHE_TAG_DIRTY); |
| 2188 | } | 2189 | } |
| 2189 | spin_unlock_irq(&mapping->tree_lock); | 2190 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 2190 | if (mapping->host) { | 2191 | if (mapping->host) { |
| 2191 | /* !PageAnon && !swapper_space */ | 2192 | /* !PageAnon && !swapper_space */ |
| 2192 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 2193 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 98e85e9c2b2d..e76ace30d436 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
| @@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void) | |||
| 63 | return ret; | 63 | return ret; |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); | ||
| 67 | |||
| 66 | void show_swap_cache_info(void) | 68 | void show_swap_cache_info(void) |
| 67 | { | 69 | { |
| 68 | printk("%lu pages in swap cache\n", total_swapcache_pages()); | 70 | printk("%lu pages in swap cache\n", total_swapcache_pages()); |
| @@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry) | |||
| 286 | 288 | ||
| 287 | page = find_get_page(swap_address_space(entry), entry.val); | 289 | page = find_get_page(swap_address_space(entry), entry.val); |
| 288 | 290 | ||
| 289 | if (page) | 291 | if (page) { |
| 290 | INC_CACHE_INFO(find_success); | 292 | INC_CACHE_INFO(find_success); |
| 293 | if (TestClearPageReadahead(page)) | ||
| 294 | atomic_inc(&swapin_readahead_hits); | ||
| 295 | } | ||
| 291 | 296 | ||
| 292 | INC_CACHE_INFO(find_total); | 297 | INC_CACHE_INFO(find_total); |
| 293 | return page; | 298 | return page; |
| @@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
| 389 | return found_page; | 394 | return found_page; |
| 390 | } | 395 | } |
| 391 | 396 | ||
| 397 | static unsigned long swapin_nr_pages(unsigned long offset) | ||
| 398 | { | ||
| 399 | static unsigned long prev_offset; | ||
| 400 | unsigned int pages, max_pages, last_ra; | ||
| 401 | static atomic_t last_readahead_pages; | ||
| 402 | |||
| 403 | max_pages = 1 << ACCESS_ONCE(page_cluster); | ||
| 404 | if (max_pages <= 1) | ||
| 405 | return 1; | ||
| 406 | |||
| 407 | /* | ||
| 408 | * This heuristic has been found to work well on both sequential and | ||
| 409 | * random loads, swapping to hard disk or to SSD: please don't ask | ||
| 410 | * what the "+ 2" means, it just happens to work well, that's all. | ||
| 411 | */ | ||
| 412 | pages = atomic_xchg(&swapin_readahead_hits, 0) + 2; | ||
| 413 | if (pages == 2) { | ||
| 414 | /* | ||
| 415 | * We can have no readahead hits to judge by: but must not get | ||
| 416 | * stuck here forever, so check for an adjacent offset instead | ||
| 417 | * (and don't even bother to check whether swap type is same). | ||
| 418 | */ | ||
| 419 | if (offset != prev_offset + 1 && offset != prev_offset - 1) | ||
| 420 | pages = 1; | ||
| 421 | prev_offset = offset; | ||
| 422 | } else { | ||
| 423 | unsigned int roundup = 4; | ||
| 424 | while (roundup < pages) | ||
| 425 | roundup <<= 1; | ||
| 426 | pages = roundup; | ||
| 427 | } | ||
| 428 | |||
| 429 | if (pages > max_pages) | ||
| 430 | pages = max_pages; | ||
| 431 | |||
| 432 | /* Don't shrink readahead too fast */ | ||
| 433 | last_ra = atomic_read(&last_readahead_pages) / 2; | ||
| 434 | if (pages < last_ra) | ||
| 435 | pages = last_ra; | ||
| 436 | atomic_set(&last_readahead_pages, pages); | ||
| 437 | |||
| 438 | return pages; | ||
| 439 | } | ||
| 440 | |||
| 392 | /** | 441 | /** |
| 393 | * swapin_readahead - swap in pages in hope we need them soon | 442 | * swapin_readahead - swap in pages in hope we need them soon |
| 394 | * @entry: swap entry of this memory | 443 | * @entry: swap entry of this memory |
| @@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
| 412 | struct vm_area_struct *vma, unsigned long addr) | 461 | struct vm_area_struct *vma, unsigned long addr) |
| 413 | { | 462 | { |
| 414 | struct page *page; | 463 | struct page *page; |
| 415 | unsigned long offset = swp_offset(entry); | 464 | unsigned long entry_offset = swp_offset(entry); |
| 465 | unsigned long offset = entry_offset; | ||
| 416 | unsigned long start_offset, end_offset; | 466 | unsigned long start_offset, end_offset; |
| 417 | unsigned long mask = (1UL << page_cluster) - 1; | 467 | unsigned long mask; |
| 418 | struct blk_plug plug; | 468 | struct blk_plug plug; |
| 419 | 469 | ||
| 470 | mask = swapin_nr_pages(offset) - 1; | ||
| 471 | if (!mask) | ||
| 472 | goto skip; | ||
| 473 | |||
| 420 | /* Read a page_cluster sized and aligned cluster around offset. */ | 474 | /* Read a page_cluster sized and aligned cluster around offset. */ |
| 421 | start_offset = offset & ~mask; | 475 | start_offset = offset & ~mask; |
| 422 | end_offset = offset | mask; | 476 | end_offset = offset | mask; |
| @@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
| 430 | gfp_mask, vma, addr); | 484 | gfp_mask, vma, addr); |
| 431 | if (!page) | 485 | if (!page) |
| 432 | continue; | 486 | continue; |
| 487 | if (offset != entry_offset) | ||
| 488 | SetPageReadahead(page); | ||
| 433 | page_cache_release(page); | 489 | page_cache_release(page); |
| 434 | } | 490 | } |
| 435 | blk_finish_plug(&plug); | 491 | blk_finish_plug(&plug); |
| 436 | 492 | ||
| 437 | lru_add_drain(); /* Push any new pages onto the LRU now */ | 493 | lru_add_drain(); /* Push any new pages onto the LRU now */ |
| 494 | skip: | ||
| 438 | return read_swap_cache_async(entry, gfp_mask, vma, addr); | 495 | return read_swap_cache_async(entry, gfp_mask, vma, addr); |
| 439 | } | 496 | } |
diff --git a/mm/swapfile.c b/mm/swapfile.c index c6c13b050a58..4a7f7e6992b6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
| @@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
| 1923 | p->swap_map = NULL; | 1923 | p->swap_map = NULL; |
| 1924 | cluster_info = p->cluster_info; | 1924 | cluster_info = p->cluster_info; |
| 1925 | p->cluster_info = NULL; | 1925 | p->cluster_info = NULL; |
| 1926 | p->flags = 0; | ||
| 1927 | frontswap_map = frontswap_map_get(p); | 1926 | frontswap_map = frontswap_map_get(p); |
| 1928 | spin_unlock(&p->lock); | 1927 | spin_unlock(&p->lock); |
| 1929 | spin_unlock(&swap_lock); | 1928 | spin_unlock(&swap_lock); |
| @@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
| 1949 | mutex_unlock(&inode->i_mutex); | 1948 | mutex_unlock(&inode->i_mutex); |
| 1950 | } | 1949 | } |
| 1951 | filp_close(swap_file, NULL); | 1950 | filp_close(swap_file, NULL); |
| 1951 | |||
| 1952 | /* | ||
| 1953 | * Clear the SWP_USED flag after all resources are freed so that swapon | ||
| 1954 | * can reuse this swap_info in alloc_swap_info() safely. It is ok to | ||
| 1955 | * not hold p->lock after we cleared its SWP_WRITEOK. | ||
| 1956 | */ | ||
| 1957 | spin_lock(&swap_lock); | ||
| 1958 | p->flags = 0; | ||
| 1959 | spin_unlock(&swap_lock); | ||
| 1960 | |||
| 1952 | err = 0; | 1961 | err = 0; |
| 1953 | atomic_inc(&proc_poll_event); | 1962 | atomic_inc(&proc_poll_event); |
| 1954 | wake_up_interruptible(&proc_poll_wait); | 1963 | wake_up_interruptible(&proc_poll_wait); |
