aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-02-06 16:49:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-02-06 16:49:03 -0500
commit9343224bfd4be6a02e6ae0c0d66426c955c7d76e (patch)
treec5d2287ff3a8fdfc15186dd35b9c00896003114d
parentf2de3a159937bfb1ab1ca671e0f2d06cda286a24 (diff)
parent227d53b397a32a7614667b3ecaf1d89902fb6c12 (diff)
Merge branch 'akpm' (patches from Andrew Morton)
Merge a bunch of fixes from Andrew Morton: "Commit 579f82901f6f ("swap: add a simple detector for inappropriate swapin readahead") is a feature. No probs if you decide to defer it until the next merge window. It has been sitting in my tree for over a year because of my dislike of all the magic numbers, but recent discussion with Hugh has made me give up" * emailed patches fron Andrew Morton <akpm@linux-foundation.org>: mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq arch/x86/mm/numa.c: fix array index overflow when synchronizing nid to memblock.reserved. arch/x86/mm/numa.c: initialize numa_kernel_nodes in numa_clear_kernel_node_hotplug() mm: __set_page_dirty_nobuffers() uses spin_lock_irqsave() instead of spin_lock_irq() mm/swap: fix race on swap_info reuse between swapoff and swapon swap: add a simple detector for inappropriate swapin readahead ocfs2: free allocated clusters if error occurs after ocfs2_claim_clusters Documentation/kernel-parameters.txt: fix memmap= language
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/ocfs2/alloc.c38
-rw-r--r--fs/ocfs2/localalloc.c42
-rw-r--r--fs/ocfs2/localalloc.h6
-rw-r--r--include/linux/page-flags.h4
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/swap_state.c63
-rw-r--r--mm/swapfile.c11
10 files changed, 178 insertions, 26 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8f441dab0396..7116fda7077f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1726,16 +1726,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1726 option description. 1726 option description.
1727 1727
1728 memmap=nn[KMG]@ss[KMG] 1728 memmap=nn[KMG]@ss[KMG]
1729 [KNL] Force usage of a specific region of memory 1729 [KNL] Force usage of a specific region of memory.
1730 Region of memory to be used, from ss to ss+nn. 1730 Region of memory to be used is from ss to ss+nn.
1731 1731
1732 memmap=nn[KMG]#ss[KMG] 1732 memmap=nn[KMG]#ss[KMG]
1733 [KNL,ACPI] Mark specific memory as ACPI data. 1733 [KNL,ACPI] Mark specific memory as ACPI data.
1734 Region of memory to be used, from ss to ss+nn. 1734 Region of memory to be marked is from ss to ss+nn.
1735 1735
1736 memmap=nn[KMG]$ss[KMG] 1736 memmap=nn[KMG]$ss[KMG]
1737 [KNL,ACPI] Mark specific memory as reserved. 1737 [KNL,ACPI] Mark specific memory as reserved.
1738 Region of memory to be used, from ss to ss+nn. 1738 Region of memory to be reserved is from ss to ss+nn.
1739 Example: Exclude memory from 0x18690000-0x1869ffff 1739 Example: Exclude memory from 0x18690000-0x1869ffff
1740 memmap=64K$0x18690000 1740 memmap=64K$0x18690000
1741 or 1741 or
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 81b2750f3666..27aa0455fab3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
493 struct numa_memblk *mb = &mi->blk[i]; 493 struct numa_memblk *mb = &mi->blk[i];
494 memblock_set_node(mb->start, mb->end - mb->start, 494 memblock_set_node(mb->start, mb->end - mb->start,
495 &memblock.memory, mb->nid); 495 &memblock.memory, mb->nid);
496
497 /*
498 * At this time, all memory regions reserved by memblock are
499 * used by the kernel. Set the nid in memblock.reserved will
500 * mark out all the nodes the kernel resides in.
501 */
502 memblock_set_node(mb->start, mb->end - mb->start,
503 &memblock.reserved, mb->nid);
504 } 496 }
505 497
506 /* 498 /*
@@ -565,10 +557,21 @@ static void __init numa_init_array(void)
565static void __init numa_clear_kernel_node_hotplug(void) 557static void __init numa_clear_kernel_node_hotplug(void)
566{ 558{
567 int i, nid; 559 int i, nid;
568 nodemask_t numa_kernel_nodes; 560 nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
569 unsigned long start, end; 561 unsigned long start, end;
570 struct memblock_type *type = &memblock.reserved; 562 struct memblock_type *type = &memblock.reserved;
571 563
564 /*
565 * At this time, all memory regions reserved by memblock are
566 * used by the kernel. Set the nid in memblock.reserved will
567 * mark out all the nodes the kernel resides in.
568 */
569 for (i = 0; i < numa_meminfo.nr_blks; i++) {
570 struct numa_memblk *mb = &numa_meminfo.blk[i];
571 memblock_set_node(mb->start, mb->end - mb->start,
572 &memblock.reserved, mb->nid);
573 }
574
572 /* Mark all kernel nodes. */ 575 /* Mark all kernel nodes. */
573 for (i = 0; i < type->cnt; i++) 576 for (i = 0; i < type->cnt; i++)
574 node_set(type->regions[i].nid, numa_kernel_nodes); 577 node_set(type->regions[i].nid, numa_kernel_nodes);
diff --git a/fs/buffer.c b/fs/buffer.c
index 651dba10b9c2..27265a8b43c1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
654static void __set_page_dirty(struct page *page, 654static void __set_page_dirty(struct page *page,
655 struct address_space *mapping, int warn) 655 struct address_space *mapping, int warn)
656{ 656{
657 spin_lock_irq(&mapping->tree_lock); 657 unsigned long flags;
658
659 spin_lock_irqsave(&mapping->tree_lock, flags);
658 if (page->mapping) { /* Race with truncate? */ 660 if (page->mapping) { /* Race with truncate? */
659 WARN_ON_ONCE(warn && !PageUptodate(page)); 661 WARN_ON_ONCE(warn && !PageUptodate(page));
660 account_page_dirtied(page, mapping); 662 account_page_dirtied(page, mapping);
661 radix_tree_tag_set(&mapping->page_tree, 663 radix_tree_tag_set(&mapping->page_tree,
662 page_index(page), PAGECACHE_TAG_DIRTY); 664 page_index(page), PAGECACHE_TAG_DIRTY);
663 } 665 }
664 spin_unlock_irq(&mapping->tree_lock); 666 spin_unlock_irqrestore(&mapping->tree_lock, flags);
665 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 667 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
666} 668}
667 669
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 8750ae1b8636..aada5801567a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4742 enum ocfs2_alloc_restarted *reason_ret) 4742 enum ocfs2_alloc_restarted *reason_ret)
4743{ 4743{
4744 int status = 0, err = 0; 4744 int status = 0, err = 0;
4745 int need_free = 0;
4745 int free_extents; 4746 int free_extents;
4746 enum ocfs2_alloc_restarted reason = RESTART_NONE; 4747 enum ocfs2_alloc_restarted reason = RESTART_NONE;
4747 u32 bit_off, num_bits; 4748 u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4796 OCFS2_JOURNAL_ACCESS_WRITE); 4797 OCFS2_JOURNAL_ACCESS_WRITE);
4797 if (status < 0) { 4798 if (status < 0) {
4798 mlog_errno(status); 4799 mlog_errno(status);
4799 goto leave; 4800 need_free = 1;
4801 goto bail;
4800 } 4802 }
4801 4803
4802 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 4804 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4807 num_bits, flags, meta_ac); 4809 num_bits, flags, meta_ac);
4808 if (status < 0) { 4810 if (status < 0) {
4809 mlog_errno(status); 4811 mlog_errno(status);
4810 goto leave; 4812 need_free = 1;
4813 goto bail;
4811 } 4814 }
4812 4815
4813 ocfs2_journal_dirty(handle, et->et_root_bh); 4816 ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4821 reason = RESTART_TRANS; 4824 reason = RESTART_TRANS;
4822 } 4825 }
4823 4826
4827bail:
4828 if (need_free) {
4829 if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
4830 ocfs2_free_local_alloc_bits(osb, handle, data_ac,
4831 bit_off, num_bits);
4832 else
4833 ocfs2_free_clusters(handle,
4834 data_ac->ac_inode,
4835 data_ac->ac_bh,
4836 ocfs2_clusters_to_blocks(osb->sb, bit_off),
4837 num_bits);
4838 }
4839
4824leave: 4840leave:
4825 if (reason_ret) 4841 if (reason_ret)
4826 *reason_ret = reason; 4842 *reason_ret = reason;
@@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6805 struct buffer_head *di_bh) 6821 struct buffer_head *di_bh)
6806{ 6822{
6807 int ret, i, has_data, num_pages = 0; 6823 int ret, i, has_data, num_pages = 0;
6824 int need_free = 0;
6825 u32 bit_off, num;
6808 handle_t *handle; 6826 handle_t *handle;
6809 u64 uninitialized_var(block); 6827 u64 uninitialized_var(block);
6810 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6828 struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6850 } 6868 }
6851 6869
6852 if (has_data) { 6870 if (has_data) {
6853 u32 bit_off, num;
6854 unsigned int page_end; 6871 unsigned int page_end;
6855 u64 phys; 6872 u64 phys;
6856 6873
@@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6886 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); 6903 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
6887 if (ret) { 6904 if (ret) {
6888 mlog_errno(ret); 6905 mlog_errno(ret);
6906 need_free = 1;
6889 goto out_commit; 6907 goto out_commit;
6890 } 6908 }
6891 6909
@@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6896 ret = ocfs2_read_inline_data(inode, pages[0], di_bh); 6914 ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
6897 if (ret) { 6915 if (ret) {
6898 mlog_errno(ret); 6916 mlog_errno(ret);
6917 need_free = 1;
6899 goto out_commit; 6918 goto out_commit;
6900 } 6919 }
6901 6920
@@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6927 ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL); 6946 ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
6928 if (ret) { 6947 if (ret) {
6929 mlog_errno(ret); 6948 mlog_errno(ret);
6949 need_free = 1;
6930 goto out_commit; 6950 goto out_commit;
6931 } 6951 }
6932 6952
@@ -6938,6 +6958,18 @@ out_commit:
6938 dquot_free_space_nodirty(inode, 6958 dquot_free_space_nodirty(inode,
6939 ocfs2_clusters_to_bytes(osb->sb, 1)); 6959 ocfs2_clusters_to_bytes(osb->sb, 1));
6940 6960
6961 if (need_free) {
6962 if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
6963 ocfs2_free_local_alloc_bits(osb, handle, data_ac,
6964 bit_off, num);
6965 else
6966 ocfs2_free_clusters(handle,
6967 data_ac->ac_inode,
6968 data_ac->ac_bh,
6969 ocfs2_clusters_to_blocks(osb->sb, bit_off),
6970 num);
6971 }
6972
6941 ocfs2_commit_trans(osb, handle); 6973 ocfs2_commit_trans(osb, handle);
6942 6974
6943out_unlock: 6975out_unlock:
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index cd5496b7a0a3..044013455621 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -781,6 +781,48 @@ bail:
781 return status; 781 return status;
782} 782}
783 783
784int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
785 handle_t *handle,
786 struct ocfs2_alloc_context *ac,
787 u32 bit_off,
788 u32 num_bits)
789{
790 int status, start;
791 u32 clear_bits;
792 struct inode *local_alloc_inode;
793 void *bitmap;
794 struct ocfs2_dinode *alloc;
795 struct ocfs2_local_alloc *la;
796
797 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
798
799 local_alloc_inode = ac->ac_inode;
800 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
801 la = OCFS2_LOCAL_ALLOC(alloc);
802
803 bitmap = la->la_bitmap;
804 start = bit_off - le32_to_cpu(la->la_bm_off);
805 clear_bits = num_bits;
806
807 status = ocfs2_journal_access_di(handle,
808 INODE_CACHE(local_alloc_inode),
809 osb->local_alloc_bh,
810 OCFS2_JOURNAL_ACCESS_WRITE);
811 if (status < 0) {
812 mlog_errno(status);
813 goto bail;
814 }
815
816 while (clear_bits--)
817 ocfs2_clear_bit(start++, bitmap);
818
819 le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
820 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
821
822bail:
823 return status;
824}
825
784static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 826static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
785{ 827{
786 u32 count; 828 u32 count;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 1be9b5864460..44a7d1fb2dec 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
55 u32 *bit_off, 55 u32 *bit_off,
56 u32 *num_bits); 56 u32 *num_bits);
57 57
58int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
59 handle_t *handle,
60 struct ocfs2_alloc_context *ac,
61 u32 bit_off,
62 u32 num_bits);
63
58void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 64void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
59 unsigned int num_clusters); 65 unsigned int num_clusters);
60void ocfs2_la_enable_worker(struct work_struct *work); 66void ocfs2_la_enable_worker(struct work_struct *work);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e464b4e987e8..d1fe1a761047 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
228TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) 228TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
229PAGEFLAG(MappedToDisk, mappedtodisk) 229PAGEFLAG(MappedToDisk, mappedtodisk)
230 230
231/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ 231/* PG_readahead is only used for reads; PG_reclaim is only for writes */
232PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) 232PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
233PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ 233PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
234 234
235#ifdef CONFIG_HIGHMEM 235#ifdef CONFIG_HIGHMEM
236/* 236/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2d30e2cfe804..7106cb1aca8e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page)
2173 if (!TestSetPageDirty(page)) { 2173 if (!TestSetPageDirty(page)) {
2174 struct address_space *mapping = page_mapping(page); 2174 struct address_space *mapping = page_mapping(page);
2175 struct address_space *mapping2; 2175 struct address_space *mapping2;
2176 unsigned long flags;
2176 2177
2177 if (!mapping) 2178 if (!mapping)
2178 return 1; 2179 return 1;
2179 2180
2180 spin_lock_irq(&mapping->tree_lock); 2181 spin_lock_irqsave(&mapping->tree_lock, flags);
2181 mapping2 = page_mapping(page); 2182 mapping2 = page_mapping(page);
2182 if (mapping2) { /* Race with truncate? */ 2183 if (mapping2) { /* Race with truncate? */
2183 BUG_ON(mapping2 != mapping); 2184 BUG_ON(mapping2 != mapping);
@@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page)
2186 radix_tree_tag_set(&mapping->page_tree, 2187 radix_tree_tag_set(&mapping->page_tree,
2187 page_index(page), PAGECACHE_TAG_DIRTY); 2188 page_index(page), PAGECACHE_TAG_DIRTY);
2188 } 2189 }
2189 spin_unlock_irq(&mapping->tree_lock); 2190 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2190 if (mapping->host) { 2191 if (mapping->host) {
2191 /* !PageAnon && !swapper_space */ 2192 /* !PageAnon && !swapper_space */
2192 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 2193 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 98e85e9c2b2d..e76ace30d436 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
63 return ret; 63 return ret;
64} 64}
65 65
66static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
67
66void show_swap_cache_info(void) 68void show_swap_cache_info(void)
67{ 69{
68 printk("%lu pages in swap cache\n", total_swapcache_pages()); 70 printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
286 288
287 page = find_get_page(swap_address_space(entry), entry.val); 289 page = find_get_page(swap_address_space(entry), entry.val);
288 290
289 if (page) 291 if (page) {
290 INC_CACHE_INFO(find_success); 292 INC_CACHE_INFO(find_success);
293 if (TestClearPageReadahead(page))
294 atomic_inc(&swapin_readahead_hits);
295 }
291 296
292 INC_CACHE_INFO(find_total); 297 INC_CACHE_INFO(find_total);
293 return page; 298 return page;
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
389 return found_page; 394 return found_page;
390} 395}
391 396
397static unsigned long swapin_nr_pages(unsigned long offset)
398{
399 static unsigned long prev_offset;
400 unsigned int pages, max_pages, last_ra;
401 static atomic_t last_readahead_pages;
402
403 max_pages = 1 << ACCESS_ONCE(page_cluster);
404 if (max_pages <= 1)
405 return 1;
406
407 /*
408 * This heuristic has been found to work well on both sequential and
409 * random loads, swapping to hard disk or to SSD: please don't ask
410 * what the "+ 2" means, it just happens to work well, that's all.
411 */
412 pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
413 if (pages == 2) {
414 /*
415 * We can have no readahead hits to judge by: but must not get
416 * stuck here forever, so check for an adjacent offset instead
417 * (and don't even bother to check whether swap type is same).
418 */
419 if (offset != prev_offset + 1 && offset != prev_offset - 1)
420 pages = 1;
421 prev_offset = offset;
422 } else {
423 unsigned int roundup = 4;
424 while (roundup < pages)
425 roundup <<= 1;
426 pages = roundup;
427 }
428
429 if (pages > max_pages)
430 pages = max_pages;
431
432 /* Don't shrink readahead too fast */
433 last_ra = atomic_read(&last_readahead_pages) / 2;
434 if (pages < last_ra)
435 pages = last_ra;
436 atomic_set(&last_readahead_pages, pages);
437
438 return pages;
439}
440
392/** 441/**
393 * swapin_readahead - swap in pages in hope we need them soon 442 * swapin_readahead - swap in pages in hope we need them soon
394 * @entry: swap entry of this memory 443 * @entry: swap entry of this memory
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
412 struct vm_area_struct *vma, unsigned long addr) 461 struct vm_area_struct *vma, unsigned long addr)
413{ 462{
414 struct page *page; 463 struct page *page;
415 unsigned long offset = swp_offset(entry); 464 unsigned long entry_offset = swp_offset(entry);
465 unsigned long offset = entry_offset;
416 unsigned long start_offset, end_offset; 466 unsigned long start_offset, end_offset;
417 unsigned long mask = (1UL << page_cluster) - 1; 467 unsigned long mask;
418 struct blk_plug plug; 468 struct blk_plug plug;
419 469
470 mask = swapin_nr_pages(offset) - 1;
471 if (!mask)
472 goto skip;
473
420 /* Read a page_cluster sized and aligned cluster around offset. */ 474 /* Read a page_cluster sized and aligned cluster around offset. */
421 start_offset = offset & ~mask; 475 start_offset = offset & ~mask;
422 end_offset = offset | mask; 476 end_offset = offset | mask;
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
430 gfp_mask, vma, addr); 484 gfp_mask, vma, addr);
431 if (!page) 485 if (!page)
432 continue; 486 continue;
487 if (offset != entry_offset)
488 SetPageReadahead(page);
433 page_cache_release(page); 489 page_cache_release(page);
434 } 490 }
435 blk_finish_plug(&plug); 491 blk_finish_plug(&plug);
436 492
437 lru_add_drain(); /* Push any new pages onto the LRU now */ 493 lru_add_drain(); /* Push any new pages onto the LRU now */
494skip:
438 return read_swap_cache_async(entry, gfp_mask, vma, addr); 495 return read_swap_cache_async(entry, gfp_mask, vma, addr);
439} 496}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c6c13b050a58..4a7f7e6992b6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1923 p->swap_map = NULL; 1923 p->swap_map = NULL;
1924 cluster_info = p->cluster_info; 1924 cluster_info = p->cluster_info;
1925 p->cluster_info = NULL; 1925 p->cluster_info = NULL;
1926 p->flags = 0;
1927 frontswap_map = frontswap_map_get(p); 1926 frontswap_map = frontswap_map_get(p);
1928 spin_unlock(&p->lock); 1927 spin_unlock(&p->lock);
1929 spin_unlock(&swap_lock); 1928 spin_unlock(&swap_lock);
@@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1949 mutex_unlock(&inode->i_mutex); 1948 mutex_unlock(&inode->i_mutex);
1950 } 1949 }
1951 filp_close(swap_file, NULL); 1950 filp_close(swap_file, NULL);
1951
1952 /*
1953 * Clear the SWP_USED flag after all resources are freed so that swapon
1954 * can reuse this swap_info in alloc_swap_info() safely. It is ok to
1955 * not hold p->lock after we cleared its SWP_WRITEOK.
1956 */
1957 spin_lock(&swap_lock);
1958 p->flags = 0;
1959 spin_unlock(&swap_lock);
1960
1952 err = 0; 1961 err = 0;
1953 atomic_inc(&proc_poll_event); 1962 atomic_inc(&proc_poll_event);
1954 wake_up_interruptible(&proc_poll_wait); 1963 wake_up_interruptible(&proc_poll_wait);