diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 22 | ||||
-rw-r--r-- | mm/filemap.c | 14 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/madvise.c | 9 | ||||
-rw-r--r-- | mm/memory.c | 7 | ||||
-rw-r--r-- | mm/mempolicy.c | 12 | ||||
-rw-r--r-- | mm/mremap.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 29 | ||||
-rw-r--r-- | mm/rmap.c | 29 | ||||
-rw-r--r-- | mm/shmem.c | 91 | ||||
-rw-r--r-- | mm/slab.c | 33 | ||||
-rw-r--r-- | mm/sparse.c | 75 | ||||
-rw-r--r-- | mm/swap_state.c | 6 | ||||
-rw-r--r-- | mm/swapfile.c | 412 | ||||
-rw-r--r-- | mm/vmalloc.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 12 |
16 files changed, 410 insertions, 349 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index cd379936cac6..4e9937ac3529 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -89,3 +89,25 @@ config NEED_MULTIPLE_NODES | |||
89 | config HAVE_MEMORY_PRESENT | 89 | config HAVE_MEMORY_PRESENT |
90 | def_bool y | 90 | def_bool y |
91 | depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM | 91 | depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM |
92 | |||
93 | # | ||
94 | # SPARSEMEM_EXTREME (which is the default) does some bootmem | ||
95 | # allocations when memory_present() is called. If this can not | ||
96 | # be done on your architecture, select this option. However, | ||
97 | # statically allocating the mem_section[] array can potentially | ||
98 | # consume vast quantities of .bss, so be careful. | ||
99 | # | ||
100 | # This option will also potentially produce smaller runtime code | ||
101 | # with gcc 3.4 and later. | ||
102 | # | ||
103 | config SPARSEMEM_STATIC | ||
104 | def_bool n | ||
105 | |||
106 | # | ||
107 | # Architectecture platforms which require a two level mem_section in SPARSEMEM | ||
108 | # must select this option. This is usually for architecture platforms with | ||
109 | # an extremely sparse physical address space. | ||
110 | # | ||
111 | config SPARSEMEM_EXTREME | ||
112 | def_bool y | ||
113 | depends on SPARSEMEM && !SPARSEMEM_STATIC | ||
diff --git a/mm/filemap.c b/mm/filemap.c index c11418dd94e8..88611928e71f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -54,9 +54,8 @@ | |||
54 | * | 54 | * |
55 | * ->i_mmap_lock (vmtruncate) | 55 | * ->i_mmap_lock (vmtruncate) |
56 | * ->private_lock (__free_pte->__set_page_dirty_buffers) | 56 | * ->private_lock (__free_pte->__set_page_dirty_buffers) |
57 | * ->swap_list_lock | 57 | * ->swap_lock (exclusive_swap_page, others) |
58 | * ->swap_device_lock (exclusive_swap_page, others) | 58 | * ->mapping->tree_lock |
59 | * ->mapping->tree_lock | ||
60 | * | 59 | * |
61 | * ->i_sem | 60 | * ->i_sem |
62 | * ->i_mmap_lock (truncate->unmap_mapping_range) | 61 | * ->i_mmap_lock (truncate->unmap_mapping_range) |
@@ -86,7 +85,7 @@ | |||
86 | * ->page_table_lock (anon_vma_prepare and various) | 85 | * ->page_table_lock (anon_vma_prepare and various) |
87 | * | 86 | * |
88 | * ->page_table_lock | 87 | * ->page_table_lock |
89 | * ->swap_device_lock (try_to_unmap_one) | 88 | * ->swap_lock (try_to_unmap_one) |
90 | * ->private_lock (try_to_unmap_one) | 89 | * ->private_lock (try_to_unmap_one) |
91 | * ->tree_lock (try_to_unmap_one) | 90 | * ->tree_lock (try_to_unmap_one) |
92 | * ->zone.lru_lock (follow_page->mark_page_accessed) | 91 | * ->zone.lru_lock (follow_page->mark_page_accessed) |
@@ -1505,8 +1504,12 @@ repeat: | |||
1505 | return -EINVAL; | 1504 | return -EINVAL; |
1506 | 1505 | ||
1507 | page = filemap_getpage(file, pgoff, nonblock); | 1506 | page = filemap_getpage(file, pgoff, nonblock); |
1507 | |||
1508 | /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as | ||
1509 | * done in shmem_populate calling shmem_getpage */ | ||
1508 | if (!page && !nonblock) | 1510 | if (!page && !nonblock) |
1509 | return -ENOMEM; | 1511 | return -ENOMEM; |
1512 | |||
1510 | if (page) { | 1513 | if (page) { |
1511 | err = install_page(mm, vma, addr, page, prot); | 1514 | err = install_page(mm, vma, addr, page, prot); |
1512 | if (err) { | 1515 | if (err) { |
@@ -1514,6 +1517,9 @@ repeat: | |||
1514 | return err; | 1517 | return err; |
1515 | } | 1518 | } |
1516 | } else { | 1519 | } else { |
1520 | /* No page was found just because we can't read it in now (being | ||
1521 | * here implies nonblock != 0), but the page may exist, so set | ||
1522 | * the PTE to fault it in later. */ | ||
1517 | err = install_file_pte(mm, vma, addr, pgoff, prot); | 1523 | err = install_file_pte(mm, vma, addr, pgoff, prot); |
1518 | if (err) | 1524 | if (err) |
1519 | return err; | 1525 | return err; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6bf720bc662c..901ac523a1c3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -360,8 +360,6 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | |||
360 | ret = -ENOMEM; | 360 | ret = -ENOMEM; |
361 | goto out; | 361 | goto out; |
362 | } | 362 | } |
363 | if (! pte_none(*pte)) | ||
364 | hugetlb_clean_stale_pgtable(pte); | ||
365 | 363 | ||
366 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | 364 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) |
367 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | 365 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); |
diff --git a/mm/madvise.c b/mm/madvise.c index c8c01a12fea4..4454936f87d1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -37,7 +37,7 @@ static long madvise_behavior(struct vm_area_struct * vma, | |||
37 | 37 | ||
38 | if (new_flags == vma->vm_flags) { | 38 | if (new_flags == vma->vm_flags) { |
39 | *prev = vma; | 39 | *prev = vma; |
40 | goto success; | 40 | goto out; |
41 | } | 41 | } |
42 | 42 | ||
43 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | 43 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); |
@@ -62,6 +62,7 @@ static long madvise_behavior(struct vm_area_struct * vma, | |||
62 | goto out; | 62 | goto out; |
63 | } | 63 | } |
64 | 64 | ||
65 | success: | ||
65 | /* | 66 | /* |
66 | * vm_flags is protected by the mmap_sem held in write mode. | 67 | * vm_flags is protected by the mmap_sem held in write mode. |
67 | */ | 68 | */ |
@@ -70,7 +71,6 @@ static long madvise_behavior(struct vm_area_struct * vma, | |||
70 | out: | 71 | out: |
71 | if (error == -ENOMEM) | 72 | if (error == -ENOMEM) |
72 | error = -EAGAIN; | 73 | error = -EAGAIN; |
73 | success: | ||
74 | return error; | 74 | return error; |
75 | } | 75 | } |
76 | 76 | ||
@@ -237,8 +237,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) | |||
237 | * - different from the way of handling in mlock etc. | 237 | * - different from the way of handling in mlock etc. |
238 | */ | 238 | */ |
239 | vma = find_vma_prev(current->mm, start, &prev); | 239 | vma = find_vma_prev(current->mm, start, &prev); |
240 | if (!vma && prev) | 240 | if (vma && start > vma->vm_start) |
241 | vma = prev->vm_next; | 241 | prev = vma; |
242 | |||
242 | for (;;) { | 243 | for (;;) { |
243 | /* Still start < end. */ | 244 | /* Still start < end. */ |
244 | error = -ENOMEM; | 245 | error = -ENOMEM; |
diff --git a/mm/memory.c b/mm/memory.c index a596c1172248..788a62810340 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -562,7 +562,8 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
562 | page->index > details->last_index)) | 562 | page->index > details->last_index)) |
563 | continue; | 563 | continue; |
564 | } | 564 | } |
565 | ptent = ptep_get_and_clear(tlb->mm, addr, pte); | 565 | ptent = ptep_get_and_clear_full(tlb->mm, addr, pte, |
566 | tlb->fullmm); | ||
566 | tlb_remove_tlb_entry(tlb, pte, addr); | 567 | tlb_remove_tlb_entry(tlb, pte, addr); |
567 | if (unlikely(!page)) | 568 | if (unlikely(!page)) |
568 | continue; | 569 | continue; |
@@ -590,7 +591,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
590 | continue; | 591 | continue; |
591 | if (!pte_file(ptent)) | 592 | if (!pte_file(ptent)) |
592 | free_swap_and_cache(pte_to_swp_entry(ptent)); | 593 | free_swap_and_cache(pte_to_swp_entry(ptent)); |
593 | pte_clear(tlb->mm, addr, pte); | 594 | pte_clear_full(tlb->mm, addr, pte, tlb->fullmm); |
594 | } while (pte++, addr += PAGE_SIZE, addr != end); | 595 | } while (pte++, addr += PAGE_SIZE, addr != end); |
595 | pte_unmap(pte - 1); | 596 | pte_unmap(pte - 1); |
596 | } | 597 | } |
@@ -1955,7 +1956,7 @@ static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1955 | * Fall back to the linear mapping if the fs does not support | 1956 | * Fall back to the linear mapping if the fs does not support |
1956 | * ->populate: | 1957 | * ->populate: |
1957 | */ | 1958 | */ |
1958 | if (!vma->vm_ops || !vma->vm_ops->populate || | 1959 | if (!vma->vm_ops->populate || |
1959 | (write_access && !(vma->vm_flags & VM_SHARED))) { | 1960 | (write_access && !(vma->vm_flags & VM_SHARED))) { |
1960 | pte_clear(mm, address, pte); | 1961 | pte_clear(mm, address, pte); |
1961 | return do_no_page(mm, vma, address, write_access, pte, pmd); | 1962 | return do_no_page(mm, vma, address, write_access, pte, pmd); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b4eababc8198..13492d66b7c8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -664,10 +664,10 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
664 | #endif | 664 | #endif |
665 | 665 | ||
666 | /* Return effective policy for a VMA */ | 666 | /* Return effective policy for a VMA */ |
667 | static struct mempolicy * | 667 | struct mempolicy * |
668 | get_vma_policy(struct vm_area_struct *vma, unsigned long addr) | 668 | get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) |
669 | { | 669 | { |
670 | struct mempolicy *pol = current->mempolicy; | 670 | struct mempolicy *pol = task->mempolicy; |
671 | 671 | ||
672 | if (vma) { | 672 | if (vma) { |
673 | if (vma->vm_ops && vma->vm_ops->get_policy) | 673 | if (vma->vm_ops && vma->vm_ops->get_policy) |
@@ -786,7 +786,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or | |||
786 | struct page * | 786 | struct page * |
787 | alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) | 787 | alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) |
788 | { | 788 | { |
789 | struct mempolicy *pol = get_vma_policy(vma, addr); | 789 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
790 | 790 | ||
791 | cpuset_update_current_mems_allowed(); | 791 | cpuset_update_current_mems_allowed(); |
792 | 792 | ||
@@ -908,7 +908,7 @@ void __mpol_free(struct mempolicy *p) | |||
908 | /* Find first node suitable for an allocation */ | 908 | /* Find first node suitable for an allocation */ |
909 | int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) | 909 | int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) |
910 | { | 910 | { |
911 | struct mempolicy *pol = get_vma_policy(vma, addr); | 911 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
912 | 912 | ||
913 | switch (pol->policy) { | 913 | switch (pol->policy) { |
914 | case MPOL_DEFAULT: | 914 | case MPOL_DEFAULT: |
@@ -928,7 +928,7 @@ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) | |||
928 | /* Find secondary valid nodes for an allocation */ | 928 | /* Find secondary valid nodes for an allocation */ |
929 | int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) | 929 | int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) |
930 | { | 930 | { |
931 | struct mempolicy *pol = get_vma_policy(vma, addr); | 931 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
932 | 932 | ||
933 | switch (pol->policy) { | 933 | switch (pol->policy) { |
934 | case MPOL_PREFERRED: | 934 | case MPOL_PREFERRED: |
diff --git a/mm/mremap.c b/mm/mremap.c index fc45dc9a617b..a32fed454bd7 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -141,6 +141,10 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, | |||
141 | if (dst) { | 141 | if (dst) { |
142 | pte_t pte; | 142 | pte_t pte; |
143 | pte = ptep_clear_flush(vma, old_addr, src); | 143 | pte = ptep_clear_flush(vma, old_addr, src); |
144 | /* ZERO_PAGE can be dependant on virtual addr */ | ||
145 | if (pfn_valid(pte_pfn(pte)) && | ||
146 | pte_page(pte) == ZERO_PAGE(old_addr)) | ||
147 | pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot)); | ||
144 | set_pte_at(mm, new_addr, dst, pte); | 148 | set_pte_at(mm, new_addr, dst, pte); |
145 | } else | 149 | } else |
146 | error = -ENOMEM; | 150 | error = -ENOMEM; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8d088371196a..b06a9636d971 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -329,7 +329,7 @@ static inline void free_pages_check(const char *function, struct page *page) | |||
329 | 1 << PG_writeback ))) | 329 | 1 << PG_writeback ))) |
330 | bad_page(function, page); | 330 | bad_page(function, page); |
331 | if (PageDirty(page)) | 331 | if (PageDirty(page)) |
332 | ClearPageDirty(page); | 332 | __ClearPageDirty(page); |
333 | } | 333 | } |
334 | 334 | ||
335 | /* | 335 | /* |
@@ -1130,19 +1130,20 @@ EXPORT_SYMBOL(nr_pagecache); | |||
1130 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | 1130 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; |
1131 | #endif | 1131 | #endif |
1132 | 1132 | ||
1133 | void __get_page_state(struct page_state *ret, int nr) | 1133 | void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
1134 | { | 1134 | { |
1135 | int cpu = 0; | 1135 | int cpu = 0; |
1136 | 1136 | ||
1137 | memset(ret, 0, sizeof(*ret)); | 1137 | memset(ret, 0, sizeof(*ret)); |
1138 | cpus_and(*cpumask, *cpumask, cpu_online_map); | ||
1138 | 1139 | ||
1139 | cpu = first_cpu(cpu_online_map); | 1140 | cpu = first_cpu(*cpumask); |
1140 | while (cpu < NR_CPUS) { | 1141 | while (cpu < NR_CPUS) { |
1141 | unsigned long *in, *out, off; | 1142 | unsigned long *in, *out, off; |
1142 | 1143 | ||
1143 | in = (unsigned long *)&per_cpu(page_states, cpu); | 1144 | in = (unsigned long *)&per_cpu(page_states, cpu); |
1144 | 1145 | ||
1145 | cpu = next_cpu(cpu, cpu_online_map); | 1146 | cpu = next_cpu(cpu, *cpumask); |
1146 | 1147 | ||
1147 | if (cpu < NR_CPUS) | 1148 | if (cpu < NR_CPUS) |
1148 | prefetch(&per_cpu(page_states, cpu)); | 1149 | prefetch(&per_cpu(page_states, cpu)); |
@@ -1153,19 +1154,33 @@ void __get_page_state(struct page_state *ret, int nr) | |||
1153 | } | 1154 | } |
1154 | } | 1155 | } |
1155 | 1156 | ||
1157 | void get_page_state_node(struct page_state *ret, int node) | ||
1158 | { | ||
1159 | int nr; | ||
1160 | cpumask_t mask = node_to_cpumask(node); | ||
1161 | |||
1162 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | ||
1163 | nr /= sizeof(unsigned long); | ||
1164 | |||
1165 | __get_page_state(ret, nr+1, &mask); | ||
1166 | } | ||
1167 | |||
1156 | void get_page_state(struct page_state *ret) | 1168 | void get_page_state(struct page_state *ret) |
1157 | { | 1169 | { |
1158 | int nr; | 1170 | int nr; |
1171 | cpumask_t mask = CPU_MASK_ALL; | ||
1159 | 1172 | ||
1160 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | 1173 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); |
1161 | nr /= sizeof(unsigned long); | 1174 | nr /= sizeof(unsigned long); |
1162 | 1175 | ||
1163 | __get_page_state(ret, nr + 1); | 1176 | __get_page_state(ret, nr + 1, &mask); |
1164 | } | 1177 | } |
1165 | 1178 | ||
1166 | void get_full_page_state(struct page_state *ret) | 1179 | void get_full_page_state(struct page_state *ret) |
1167 | { | 1180 | { |
1168 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long)); | 1181 | cpumask_t mask = CPU_MASK_ALL; |
1182 | |||
1183 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | ||
1169 | } | 1184 | } |
1170 | 1185 | ||
1171 | unsigned long __read_page_state(unsigned long offset) | 1186 | unsigned long __read_page_state(unsigned long offset) |
@@ -1909,7 +1924,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1909 | zone->nr_scan_inactive = 0; | 1924 | zone->nr_scan_inactive = 0; |
1910 | zone->nr_active = 0; | 1925 | zone->nr_active = 0; |
1911 | zone->nr_inactive = 0; | 1926 | zone->nr_inactive = 0; |
1912 | atomic_set(&zone->reclaim_in_progress, -1); | 1927 | atomic_set(&zone->reclaim_in_progress, 0); |
1913 | if (!size) | 1928 | if (!size) |
1914 | continue; | 1929 | continue; |
1915 | 1930 | ||
@@ -34,9 +34,8 @@ | |||
34 | * anon_vma->lock | 34 | * anon_vma->lock |
35 | * mm->page_table_lock | 35 | * mm->page_table_lock |
36 | * zone->lru_lock (in mark_page_accessed) | 36 | * zone->lru_lock (in mark_page_accessed) |
37 | * swap_list_lock (in swap_free etc's swap_info_get) | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
39 | * swap_device_lock (in swap_duplicate, swap_info_get) | ||
40 | * mapping->private_lock (in __set_page_dirty_buffers) | 39 | * mapping->private_lock (in __set_page_dirty_buffers) |
41 | * inode_lock (in set_page_dirty's __mark_inode_dirty) | 40 | * inode_lock (in set_page_dirty's __mark_inode_dirty) |
42 | * sb_lock (within inode_lock in fs/fs-writeback.c) | 41 | * sb_lock (within inode_lock in fs/fs-writeback.c) |
@@ -290,8 +289,6 @@ static int page_referenced_one(struct page *page, | |||
290 | pte_t *pte; | 289 | pte_t *pte; |
291 | int referenced = 0; | 290 | int referenced = 0; |
292 | 291 | ||
293 | if (!get_mm_counter(mm, rss)) | ||
294 | goto out; | ||
295 | address = vma_address(page, vma); | 292 | address = vma_address(page, vma); |
296 | if (address == -EFAULT) | 293 | if (address == -EFAULT) |
297 | goto out; | 294 | goto out; |
@@ -442,22 +439,19 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
442 | void page_add_anon_rmap(struct page *page, | 439 | void page_add_anon_rmap(struct page *page, |
443 | struct vm_area_struct *vma, unsigned long address) | 440 | struct vm_area_struct *vma, unsigned long address) |
444 | { | 441 | { |
445 | struct anon_vma *anon_vma = vma->anon_vma; | ||
446 | pgoff_t index; | ||
447 | |||
448 | BUG_ON(PageReserved(page)); | 442 | BUG_ON(PageReserved(page)); |
449 | BUG_ON(!anon_vma); | ||
450 | 443 | ||
451 | inc_mm_counter(vma->vm_mm, anon_rss); | 444 | inc_mm_counter(vma->vm_mm, anon_rss); |
452 | 445 | ||
453 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
454 | index = (address - vma->vm_start) >> PAGE_SHIFT; | ||
455 | index += vma->vm_pgoff; | ||
456 | index >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; | ||
457 | |||
458 | if (atomic_inc_and_test(&page->_mapcount)) { | 446 | if (atomic_inc_and_test(&page->_mapcount)) { |
459 | page->index = index; | 447 | struct anon_vma *anon_vma = vma->anon_vma; |
448 | |||
449 | BUG_ON(!anon_vma); | ||
450 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
460 | page->mapping = (struct address_space *) anon_vma; | 451 | page->mapping = (struct address_space *) anon_vma; |
452 | |||
453 | page->index = linear_page_index(vma, address); | ||
454 | |||
461 | inc_page_state(nr_mapped); | 455 | inc_page_state(nr_mapped); |
462 | } | 456 | } |
463 | /* else checking page index and mapping is racy */ | 457 | /* else checking page index and mapping is racy */ |
@@ -518,8 +512,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
518 | pte_t pteval; | 512 | pte_t pteval; |
519 | int ret = SWAP_AGAIN; | 513 | int ret = SWAP_AGAIN; |
520 | 514 | ||
521 | if (!get_mm_counter(mm, rss)) | ||
522 | goto out; | ||
523 | address = vma_address(page, vma); | 515 | address = vma_address(page, vma); |
524 | if (address == -EFAULT) | 516 | if (address == -EFAULT) |
525 | goto out; | 517 | goto out; |
@@ -532,6 +524,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
532 | * If the page is mlock()d, we cannot swap it out. | 524 | * If the page is mlock()d, we cannot swap it out. |
533 | * If it's recently referenced (perhaps page_referenced | 525 | * If it's recently referenced (perhaps page_referenced |
534 | * skipped over this mm) then we should reactivate it. | 526 | * skipped over this mm) then we should reactivate it. |
527 | * | ||
528 | * Pages belonging to VM_RESERVED regions should not happen here. | ||
535 | */ | 529 | */ |
536 | if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || | 530 | if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || |
537 | ptep_clear_flush_young(vma, address, pte)) { | 531 | ptep_clear_flush_young(vma, address, pte)) { |
@@ -767,8 +761,7 @@ static int try_to_unmap_file(struct page *page) | |||
767 | if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) | 761 | if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) |
768 | continue; | 762 | continue; |
769 | cursor = (unsigned long) vma->vm_private_data; | 763 | cursor = (unsigned long) vma->vm_private_data; |
770 | while (get_mm_counter(vma->vm_mm, rss) && | 764 | while ( cursor < max_nl_cursor && |
771 | cursor < max_nl_cursor && | ||
772 | cursor < vma->vm_end - vma->vm_start) { | 765 | cursor < vma->vm_end - vma->vm_start) { |
773 | try_to_unmap_cluster(cursor, &mapcount, vma); | 766 | try_to_unmap_cluster(cursor, &mapcount, vma); |
774 | cursor += CLUSTER_SIZE; | 767 | cursor += CLUSTER_SIZE; |
diff --git a/mm/shmem.c b/mm/shmem.c index 5a81b1ee4f7a..bdc4bbb6ddbb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
46 | #include <linux/mempolicy.h> | 46 | #include <linux/mempolicy.h> |
47 | #include <linux/namei.h> | 47 | #include <linux/namei.h> |
48 | #include <linux/xattr.h> | ||
49 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
50 | #include <asm/div64.h> | 49 | #include <asm/div64.h> |
51 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
@@ -179,7 +178,6 @@ static struct address_space_operations shmem_aops; | |||
179 | static struct file_operations shmem_file_operations; | 178 | static struct file_operations shmem_file_operations; |
180 | static struct inode_operations shmem_inode_operations; | 179 | static struct inode_operations shmem_inode_operations; |
181 | static struct inode_operations shmem_dir_inode_operations; | 180 | static struct inode_operations shmem_dir_inode_operations; |
182 | static struct inode_operations shmem_special_inode_operations; | ||
183 | static struct vm_operations_struct shmem_vm_ops; | 181 | static struct vm_operations_struct shmem_vm_ops; |
184 | 182 | ||
185 | static struct backing_dev_info shmem_backing_dev_info = { | 183 | static struct backing_dev_info shmem_backing_dev_info = { |
@@ -1195,6 +1193,7 @@ static int shmem_populate(struct vm_area_struct *vma, | |||
1195 | err = shmem_getpage(inode, pgoff, &page, sgp, NULL); | 1193 | err = shmem_getpage(inode, pgoff, &page, sgp, NULL); |
1196 | if (err) | 1194 | if (err) |
1197 | return err; | 1195 | return err; |
1196 | /* Page may still be null, but only if nonblock was set. */ | ||
1198 | if (page) { | 1197 | if (page) { |
1199 | mark_page_accessed(page); | 1198 | mark_page_accessed(page); |
1200 | err = install_page(mm, vma, addr, page, prot); | 1199 | err = install_page(mm, vma, addr, page, prot); |
@@ -1202,7 +1201,10 @@ static int shmem_populate(struct vm_area_struct *vma, | |||
1202 | page_cache_release(page); | 1201 | page_cache_release(page); |
1203 | return err; | 1202 | return err; |
1204 | } | 1203 | } |
1205 | } else if (nonblock) { | 1204 | } else { |
1205 | /* No page was found just because we can't read it in | ||
1206 | * now (being here implies nonblock != 0), but the page | ||
1207 | * may exist, so set the PTE to fault it in later. */ | ||
1206 | err = install_file_pte(mm, vma, addr, pgoff, prot); | 1208 | err = install_file_pte(mm, vma, addr, pgoff, prot); |
1207 | if (err) | 1209 | if (err) |
1208 | return err; | 1210 | return err; |
@@ -1296,7 +1298,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
1296 | 1298 | ||
1297 | switch (mode & S_IFMT) { | 1299 | switch (mode & S_IFMT) { |
1298 | default: | 1300 | default: |
1299 | inode->i_op = &shmem_special_inode_operations; | ||
1300 | init_special_inode(inode, mode, dev); | 1301 | init_special_inode(inode, mode, dev); |
1301 | break; | 1302 | break; |
1302 | case S_IFREG: | 1303 | case S_IFREG: |
@@ -1800,12 +1801,6 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co | |||
1800 | static struct inode_operations shmem_symlink_inline_operations = { | 1801 | static struct inode_operations shmem_symlink_inline_operations = { |
1801 | .readlink = generic_readlink, | 1802 | .readlink = generic_readlink, |
1802 | .follow_link = shmem_follow_link_inline, | 1803 | .follow_link = shmem_follow_link_inline, |
1803 | #ifdef CONFIG_TMPFS_XATTR | ||
1804 | .setxattr = generic_setxattr, | ||
1805 | .getxattr = generic_getxattr, | ||
1806 | .listxattr = generic_listxattr, | ||
1807 | .removexattr = generic_removexattr, | ||
1808 | #endif | ||
1809 | }; | 1804 | }; |
1810 | 1805 | ||
1811 | static struct inode_operations shmem_symlink_inode_operations = { | 1806 | static struct inode_operations shmem_symlink_inode_operations = { |
@@ -1813,12 +1808,6 @@ static struct inode_operations shmem_symlink_inode_operations = { | |||
1813 | .readlink = generic_readlink, | 1808 | .readlink = generic_readlink, |
1814 | .follow_link = shmem_follow_link, | 1809 | .follow_link = shmem_follow_link, |
1815 | .put_link = shmem_put_link, | 1810 | .put_link = shmem_put_link, |
1816 | #ifdef CONFIG_TMPFS_XATTR | ||
1817 | .setxattr = generic_setxattr, | ||
1818 | .getxattr = generic_getxattr, | ||
1819 | .listxattr = generic_listxattr, | ||
1820 | .removexattr = generic_removexattr, | ||
1821 | #endif | ||
1822 | }; | 1811 | }; |
1823 | 1812 | ||
1824 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) | 1813 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) |
@@ -1938,12 +1927,6 @@ static void shmem_put_super(struct super_block *sb) | |||
1938 | sb->s_fs_info = NULL; | 1927 | sb->s_fs_info = NULL; |
1939 | } | 1928 | } |
1940 | 1929 | ||
1941 | #ifdef CONFIG_TMPFS_XATTR | ||
1942 | static struct xattr_handler *shmem_xattr_handlers[]; | ||
1943 | #else | ||
1944 | #define shmem_xattr_handlers NULL | ||
1945 | #endif | ||
1946 | |||
1947 | static int shmem_fill_super(struct super_block *sb, | 1930 | static int shmem_fill_super(struct super_block *sb, |
1948 | void *data, int silent) | 1931 | void *data, int silent) |
1949 | { | 1932 | { |
@@ -1994,7 +1977,6 @@ static int shmem_fill_super(struct super_block *sb, | |||
1994 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 1977 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
1995 | sb->s_magic = TMPFS_MAGIC; | 1978 | sb->s_magic = TMPFS_MAGIC; |
1996 | sb->s_op = &shmem_ops; | 1979 | sb->s_op = &shmem_ops; |
1997 | sb->s_xattr = shmem_xattr_handlers; | ||
1998 | 1980 | ||
1999 | inode = shmem_get_inode(sb, S_IFDIR | mode, 0); | 1981 | inode = shmem_get_inode(sb, S_IFDIR | mode, 0); |
2000 | if (!inode) | 1982 | if (!inode) |
@@ -2083,12 +2065,6 @@ static struct file_operations shmem_file_operations = { | |||
2083 | static struct inode_operations shmem_inode_operations = { | 2065 | static struct inode_operations shmem_inode_operations = { |
2084 | .truncate = shmem_truncate, | 2066 | .truncate = shmem_truncate, |
2085 | .setattr = shmem_notify_change, | 2067 | .setattr = shmem_notify_change, |
2086 | #ifdef CONFIG_TMPFS_XATTR | ||
2087 | .setxattr = generic_setxattr, | ||
2088 | .getxattr = generic_getxattr, | ||
2089 | .listxattr = generic_listxattr, | ||
2090 | .removexattr = generic_removexattr, | ||
2091 | #endif | ||
2092 | }; | 2068 | }; |
2093 | 2069 | ||
2094 | static struct inode_operations shmem_dir_inode_operations = { | 2070 | static struct inode_operations shmem_dir_inode_operations = { |
@@ -2102,21 +2078,6 @@ static struct inode_operations shmem_dir_inode_operations = { | |||
2102 | .rmdir = shmem_rmdir, | 2078 | .rmdir = shmem_rmdir, |
2103 | .mknod = shmem_mknod, | 2079 | .mknod = shmem_mknod, |
2104 | .rename = shmem_rename, | 2080 | .rename = shmem_rename, |
2105 | #ifdef CONFIG_TMPFS_XATTR | ||
2106 | .setxattr = generic_setxattr, | ||
2107 | .getxattr = generic_getxattr, | ||
2108 | .listxattr = generic_listxattr, | ||
2109 | .removexattr = generic_removexattr, | ||
2110 | #endif | ||
2111 | #endif | ||
2112 | }; | ||
2113 | |||
2114 | static struct inode_operations shmem_special_inode_operations = { | ||
2115 | #ifdef CONFIG_TMPFS_XATTR | ||
2116 | .setxattr = generic_setxattr, | ||
2117 | .getxattr = generic_getxattr, | ||
2118 | .listxattr = generic_listxattr, | ||
2119 | .removexattr = generic_removexattr, | ||
2120 | #endif | 2081 | #endif |
2121 | }; | 2082 | }; |
2122 | 2083 | ||
@@ -2142,48 +2103,6 @@ static struct vm_operations_struct shmem_vm_ops = { | |||
2142 | }; | 2103 | }; |
2143 | 2104 | ||
2144 | 2105 | ||
2145 | #ifdef CONFIG_TMPFS_SECURITY | ||
2146 | |||
2147 | static size_t shmem_xattr_security_list(struct inode *inode, char *list, size_t list_len, | ||
2148 | const char *name, size_t name_len) | ||
2149 | { | ||
2150 | return security_inode_listsecurity(inode, list, list_len); | ||
2151 | } | ||
2152 | |||
2153 | static int shmem_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size) | ||
2154 | { | ||
2155 | if (strcmp(name, "") == 0) | ||
2156 | return -EINVAL; | ||
2157 | return security_inode_getsecurity(inode, name, buffer, size); | ||
2158 | } | ||
2159 | |||
2160 | static int shmem_xattr_security_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) | ||
2161 | { | ||
2162 | if (strcmp(name, "") == 0) | ||
2163 | return -EINVAL; | ||
2164 | return security_inode_setsecurity(inode, name, value, size, flags); | ||
2165 | } | ||
2166 | |||
2167 | static struct xattr_handler shmem_xattr_security_handler = { | ||
2168 | .prefix = XATTR_SECURITY_PREFIX, | ||
2169 | .list = shmem_xattr_security_list, | ||
2170 | .get = shmem_xattr_security_get, | ||
2171 | .set = shmem_xattr_security_set, | ||
2172 | }; | ||
2173 | |||
2174 | #endif /* CONFIG_TMPFS_SECURITY */ | ||
2175 | |||
2176 | #ifdef CONFIG_TMPFS_XATTR | ||
2177 | |||
2178 | static struct xattr_handler *shmem_xattr_handlers[] = { | ||
2179 | #ifdef CONFIG_TMPFS_SECURITY | ||
2180 | &shmem_xattr_security_handler, | ||
2181 | #endif | ||
2182 | NULL | ||
2183 | }; | ||
2184 | |||
2185 | #endif /* CONFIG_TMPFS_XATTR */ | ||
2186 | |||
2187 | static struct super_block *shmem_get_sb(struct file_system_type *fs_type, | 2106 | static struct super_block *shmem_get_sb(struct file_system_type *fs_type, |
2188 | int flags, const char *dev_name, void *data) | 2107 | int flags, const char *dev_name, void *data) |
2189 | { | 2108 | { |
@@ -189,6 +189,7 @@ | |||
189 | * is less than 512 (PAGE_SIZE<<3), but greater than 256. | 189 | * is less than 512 (PAGE_SIZE<<3), but greater than 256. |
190 | */ | 190 | */ |
191 | 191 | ||
192 | typedef unsigned int kmem_bufctl_t; | ||
192 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) | 193 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) |
193 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) | 194 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) |
194 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2) | 195 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2) |
@@ -600,7 +601,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, | |||
600 | csizep++; | 601 | csizep++; |
601 | 602 | ||
602 | /* | 603 | /* |
603 | * Really subtile: The last entry with cs->cs_size==ULONG_MAX | 604 | * Really subtle: The last entry with cs->cs_size==ULONG_MAX |
604 | * has cs_{dma,}cachep==NULL. Thus no special case | 605 | * has cs_{dma,}cachep==NULL. Thus no special case |
605 | * for large kmalloc calls required. | 606 | * for large kmalloc calls required. |
606 | */ | 607 | */ |
@@ -2165,7 +2166,9 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast fl | |||
2165 | objp = cache_alloc_refill(cachep, flags); | 2166 | objp = cache_alloc_refill(cachep, flags); |
2166 | } | 2167 | } |
2167 | local_irq_restore(save_flags); | 2168 | local_irq_restore(save_flags); |
2168 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0)); | 2169 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, |
2170 | __builtin_return_address(0)); | ||
2171 | prefetchw(objp); | ||
2169 | return objp; | 2172 | return objp; |
2170 | } | 2173 | } |
2171 | 2174 | ||
@@ -3073,20 +3076,24 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, | |||
3073 | } | 3076 | } |
3074 | #endif | 3077 | #endif |
3075 | 3078 | ||
3079 | /** | ||
3080 | * ksize - get the actual amount of memory allocated for a given object | ||
3081 | * @objp: Pointer to the object | ||
3082 | * | ||
3083 | * kmalloc may internally round up allocations and return more memory | ||
3084 | * than requested. ksize() can be used to determine the actual amount of | ||
3085 | * memory allocated. The caller may use this additional memory, even though | ||
3086 | * a smaller amount of memory was initially specified with the kmalloc call. | ||
3087 | * The caller must guarantee that objp points to a valid object previously | ||
3088 | * allocated with either kmalloc() or kmem_cache_alloc(). The object | ||
3089 | * must not be freed during the duration of the call. | ||
3090 | */ | ||
3076 | unsigned int ksize(const void *objp) | 3091 | unsigned int ksize(const void *objp) |
3077 | { | 3092 | { |
3078 | kmem_cache_t *c; | 3093 | if (unlikely(objp == NULL)) |
3079 | unsigned long flags; | 3094 | return 0; |
3080 | unsigned int size = 0; | ||
3081 | |||
3082 | if (likely(objp != NULL)) { | ||
3083 | local_irq_save(flags); | ||
3084 | c = GET_PAGE_CACHE(virt_to_page(objp)); | ||
3085 | size = kmem_cache_size(c); | ||
3086 | local_irq_restore(flags); | ||
3087 | } | ||
3088 | 3095 | ||
3089 | return size; | 3096 | return obj_reallen(GET_PAGE_CACHE(virt_to_page(objp))); |
3090 | } | 3097 | } |
3091 | 3098 | ||
3092 | 3099 | ||
diff --git a/mm/sparse.c b/mm/sparse.c index b54e304df4a7..347249a4917a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/mmzone.h> | 6 | #include <linux/mmzone.h> |
7 | #include <linux/bootmem.h> | 7 | #include <linux/bootmem.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/spinlock.h> | ||
9 | #include <asm/dma.h> | 10 | #include <asm/dma.h> |
10 | 11 | ||
11 | /* | 12 | /* |
@@ -13,9 +14,64 @@ | |||
13 | * | 14 | * |
14 | * 1) mem_section - memory sections, mem_map's for valid memory | 15 | * 1) mem_section - memory sections, mem_map's for valid memory |
15 | */ | 16 | */ |
16 | struct mem_section mem_section[NR_MEM_SECTIONS]; | 17 | #ifdef CONFIG_SPARSEMEM_EXTREME |
18 | struct mem_section *mem_section[NR_SECTION_ROOTS] | ||
19 | ____cacheline_maxaligned_in_smp; | ||
20 | #else | ||
21 | struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] | ||
22 | ____cacheline_maxaligned_in_smp; | ||
23 | #endif | ||
17 | EXPORT_SYMBOL(mem_section); | 24 | EXPORT_SYMBOL(mem_section); |
18 | 25 | ||
26 | #ifdef CONFIG_SPARSEMEM_EXTREME | ||
27 | static struct mem_section *sparse_index_alloc(int nid) | ||
28 | { | ||
29 | struct mem_section *section = NULL; | ||
30 | unsigned long array_size = SECTIONS_PER_ROOT * | ||
31 | sizeof(struct mem_section); | ||
32 | |||
33 | section = alloc_bootmem_node(NODE_DATA(nid), array_size); | ||
34 | |||
35 | if (section) | ||
36 | memset(section, 0, array_size); | ||
37 | |||
38 | return section; | ||
39 | } | ||
40 | |||
41 | static int sparse_index_init(unsigned long section_nr, int nid) | ||
42 | { | ||
43 | static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; | ||
44 | unsigned long root = SECTION_NR_TO_ROOT(section_nr); | ||
45 | struct mem_section *section; | ||
46 | int ret = 0; | ||
47 | |||
48 | if (mem_section[root]) | ||
49 | return -EEXIST; | ||
50 | |||
51 | section = sparse_index_alloc(nid); | ||
52 | /* | ||
53 | * This lock keeps two different sections from | ||
54 | * reallocating for the same index | ||
55 | */ | ||
56 | spin_lock(&index_init_lock); | ||
57 | |||
58 | if (mem_section[root]) { | ||
59 | ret = -EEXIST; | ||
60 | goto out; | ||
61 | } | ||
62 | |||
63 | mem_section[root] = section; | ||
64 | out: | ||
65 | spin_unlock(&index_init_lock); | ||
66 | return ret; | ||
67 | } | ||
68 | #else /* !SPARSEMEM_EXTREME */ | ||
69 | static inline int sparse_index_init(unsigned long section_nr, int nid) | ||
70 | { | ||
71 | return 0; | ||
72 | } | ||
73 | #endif | ||
74 | |||
19 | /* Record a memory area against a node. */ | 75 | /* Record a memory area against a node. */ |
20 | void memory_present(int nid, unsigned long start, unsigned long end) | 76 | void memory_present(int nid, unsigned long start, unsigned long end) |
21 | { | 77 | { |
@@ -24,8 +80,13 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
24 | start &= PAGE_SECTION_MASK; | 80 | start &= PAGE_SECTION_MASK; |
25 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | 81 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
26 | unsigned long section = pfn_to_section_nr(pfn); | 82 | unsigned long section = pfn_to_section_nr(pfn); |
27 | if (!mem_section[section].section_mem_map) | 83 | struct mem_section *ms; |
28 | mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; | 84 | |
85 | sparse_index_init(section, nid); | ||
86 | |||
87 | ms = __nr_to_section(section); | ||
88 | if (!ms->section_mem_map) | ||
89 | ms->section_mem_map = SECTION_MARKED_PRESENT; | ||
29 | } | 90 | } |
30 | } | 91 | } |
31 | 92 | ||
@@ -85,6 +146,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) | |||
85 | { | 146 | { |
86 | struct page *map; | 147 | struct page *map; |
87 | int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); | 148 | int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); |
149 | struct mem_section *ms = __nr_to_section(pnum); | ||
88 | 150 | ||
89 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); | 151 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); |
90 | if (map) | 152 | if (map) |
@@ -96,7 +158,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) | |||
96 | return map; | 158 | return map; |
97 | 159 | ||
98 | printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); | 160 | printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); |
99 | mem_section[pnum].section_mem_map = 0; | 161 | ms->section_mem_map = 0; |
100 | return NULL; | 162 | return NULL; |
101 | } | 163 | } |
102 | 164 | ||
@@ -114,8 +176,9 @@ void sparse_init(void) | |||
114 | continue; | 176 | continue; |
115 | 177 | ||
116 | map = sparse_early_mem_map_alloc(pnum); | 178 | map = sparse_early_mem_map_alloc(pnum); |
117 | if (map) | 179 | if (!map) |
118 | sparse_init_one_section(&mem_section[pnum], pnum, map); | 180 | continue; |
181 | sparse_init_one_section(__nr_to_section(pnum), pnum, map); | ||
119 | } | 182 | } |
120 | } | 183 | } |
121 | 184 | ||
diff --git a/mm/swap_state.c b/mm/swap_state.c index 4f251775ef90..029e56eb5e77 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -124,6 +124,7 @@ void __delete_from_swap_cache(struct page *page) | |||
124 | BUG_ON(!PageLocked(page)); | 124 | BUG_ON(!PageLocked(page)); |
125 | BUG_ON(!PageSwapCache(page)); | 125 | BUG_ON(!PageSwapCache(page)); |
126 | BUG_ON(PageWriteback(page)); | 126 | BUG_ON(PageWriteback(page)); |
127 | BUG_ON(PagePrivate(page)); | ||
127 | 128 | ||
128 | radix_tree_delete(&swapper_space.page_tree, page->private); | 129 | radix_tree_delete(&swapper_space.page_tree, page->private); |
129 | page->private = 0; | 130 | page->private = 0; |
@@ -196,11 +197,6 @@ void delete_from_swap_cache(struct page *page) | |||
196 | { | 197 | { |
197 | swp_entry_t entry; | 198 | swp_entry_t entry; |
198 | 199 | ||
199 | BUG_ON(!PageSwapCache(page)); | ||
200 | BUG_ON(!PageLocked(page)); | ||
201 | BUG_ON(PageWriteback(page)); | ||
202 | BUG_ON(PagePrivate(page)); | ||
203 | |||
204 | entry.val = page->private; | 200 | entry.val = page->private; |
205 | 201 | ||
206 | write_lock_irq(&swapper_space.tree_lock); | 202 | write_lock_irq(&swapper_space.tree_lock); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 60cd24a55204..4b6e8bf986bc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <linux/swapops.h> | 32 | #include <linux/swapops.h> |
33 | 33 | ||
34 | DEFINE_SPINLOCK(swaplock); | 34 | DEFINE_SPINLOCK(swap_lock); |
35 | unsigned int nr_swapfiles; | 35 | unsigned int nr_swapfiles; |
36 | long total_swap_pages; | 36 | long total_swap_pages; |
37 | static int swap_overflow; | 37 | static int swap_overflow; |
@@ -51,13 +51,11 @@ static DECLARE_MUTEX(swapon_sem); | |||
51 | 51 | ||
52 | /* | 52 | /* |
53 | * We need this because the bdev->unplug_fn can sleep and we cannot | 53 | * We need this because the bdev->unplug_fn can sleep and we cannot |
54 | * hold swap_list_lock while calling the unplug_fn. And swap_list_lock | 54 | * hold swap_lock while calling the unplug_fn. And swap_lock |
55 | * cannot be turned into a semaphore. | 55 | * cannot be turned into a semaphore. |
56 | */ | 56 | */ |
57 | static DECLARE_RWSEM(swap_unplug_sem); | 57 | static DECLARE_RWSEM(swap_unplug_sem); |
58 | 58 | ||
59 | #define SWAPFILE_CLUSTER 256 | ||
60 | |||
61 | void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) | 59 | void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) |
62 | { | 60 | { |
63 | swp_entry_t entry; | 61 | swp_entry_t entry; |
@@ -84,116 +82,135 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) | |||
84 | up_read(&swap_unplug_sem); | 82 | up_read(&swap_unplug_sem); |
85 | } | 83 | } |
86 | 84 | ||
87 | static inline int scan_swap_map(struct swap_info_struct *si) | 85 | #define SWAPFILE_CLUSTER 256 |
86 | #define LATENCY_LIMIT 256 | ||
87 | |||
88 | static inline unsigned long scan_swap_map(struct swap_info_struct *si) | ||
88 | { | 89 | { |
89 | unsigned long offset; | 90 | unsigned long offset, last_in_cluster; |
91 | int latency_ration = LATENCY_LIMIT; | ||
92 | |||
90 | /* | 93 | /* |
91 | * We try to cluster swap pages by allocating them | 94 | * We try to cluster swap pages by allocating them sequentially |
92 | * sequentially in swap. Once we've allocated | 95 | * in swap. Once we've allocated SWAPFILE_CLUSTER pages this |
93 | * SWAPFILE_CLUSTER pages this way, however, we resort to | 96 | * way, however, we resort to first-free allocation, starting |
94 | * first-free allocation, starting a new cluster. This | 97 | * a new cluster. This prevents us from scattering swap pages |
95 | * prevents us from scattering swap pages all over the entire | 98 | * all over the entire swap partition, so that we reduce |
96 | * swap partition, so that we reduce overall disk seek times | 99 | * overall disk seek times between swap pages. -- sct |
97 | * between swap pages. -- sct */ | 100 | * But we do now try to find an empty cluster. -Andrea |
98 | if (si->cluster_nr) { | 101 | */ |
99 | while (si->cluster_next <= si->highest_bit) { | 102 | |
100 | offset = si->cluster_next++; | 103 | si->flags += SWP_SCANNING; |
104 | if (unlikely(!si->cluster_nr)) { | ||
105 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | ||
106 | if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) | ||
107 | goto lowest; | ||
108 | spin_unlock(&swap_lock); | ||
109 | |||
110 | offset = si->lowest_bit; | ||
111 | last_in_cluster = offset + SWAPFILE_CLUSTER - 1; | ||
112 | |||
113 | /* Locate the first empty (unaligned) cluster */ | ||
114 | for (; last_in_cluster <= si->highest_bit; offset++) { | ||
101 | if (si->swap_map[offset]) | 115 | if (si->swap_map[offset]) |
102 | continue; | 116 | last_in_cluster = offset + SWAPFILE_CLUSTER; |
103 | si->cluster_nr--; | 117 | else if (offset == last_in_cluster) { |
104 | goto got_page; | 118 | spin_lock(&swap_lock); |
105 | } | 119 | si->cluster_next = offset-SWAPFILE_CLUSTER-1; |
106 | } | 120 | goto cluster; |
107 | si->cluster_nr = SWAPFILE_CLUSTER; | ||
108 | |||
109 | /* try to find an empty (even not aligned) cluster. */ | ||
110 | offset = si->lowest_bit; | ||
111 | check_next_cluster: | ||
112 | if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit) | ||
113 | { | ||
114 | unsigned long nr; | ||
115 | for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++) | ||
116 | if (si->swap_map[nr]) | ||
117 | { | ||
118 | offset = nr+1; | ||
119 | goto check_next_cluster; | ||
120 | } | 121 | } |
121 | /* We found a completly empty cluster, so start | 122 | if (unlikely(--latency_ration < 0)) { |
122 | * using it. | 123 | cond_resched(); |
123 | */ | 124 | latency_ration = LATENCY_LIMIT; |
124 | goto got_page; | 125 | } |
126 | } | ||
127 | spin_lock(&swap_lock); | ||
128 | goto lowest; | ||
125 | } | 129 | } |
126 | /* No luck, so now go finegrined as usual. -Andrea */ | 130 | |
127 | for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) { | 131 | si->cluster_nr--; |
128 | if (si->swap_map[offset]) | 132 | cluster: |
129 | continue; | 133 | offset = si->cluster_next; |
130 | si->lowest_bit = offset+1; | 134 | if (offset > si->highest_bit) |
131 | got_page: | 135 | lowest: offset = si->lowest_bit; |
136 | checks: if (!(si->flags & SWP_WRITEOK)) | ||
137 | goto no_page; | ||
138 | if (!si->highest_bit) | ||
139 | goto no_page; | ||
140 | if (!si->swap_map[offset]) { | ||
132 | if (offset == si->lowest_bit) | 141 | if (offset == si->lowest_bit) |
133 | si->lowest_bit++; | 142 | si->lowest_bit++; |
134 | if (offset == si->highest_bit) | 143 | if (offset == si->highest_bit) |
135 | si->highest_bit--; | 144 | si->highest_bit--; |
136 | if (si->lowest_bit > si->highest_bit) { | 145 | si->inuse_pages++; |
146 | if (si->inuse_pages == si->pages) { | ||
137 | si->lowest_bit = si->max; | 147 | si->lowest_bit = si->max; |
138 | si->highest_bit = 0; | 148 | si->highest_bit = 0; |
139 | } | 149 | } |
140 | si->swap_map[offset] = 1; | 150 | si->swap_map[offset] = 1; |
141 | si->inuse_pages++; | 151 | si->cluster_next = offset + 1; |
142 | nr_swap_pages--; | 152 | si->flags -= SWP_SCANNING; |
143 | si->cluster_next = offset+1; | ||
144 | return offset; | 153 | return offset; |
145 | } | 154 | } |
146 | si->lowest_bit = si->max; | 155 | |
147 | si->highest_bit = 0; | 156 | spin_unlock(&swap_lock); |
157 | while (++offset <= si->highest_bit) { | ||
158 | if (!si->swap_map[offset]) { | ||
159 | spin_lock(&swap_lock); | ||
160 | goto checks; | ||
161 | } | ||
162 | if (unlikely(--latency_ration < 0)) { | ||
163 | cond_resched(); | ||
164 | latency_ration = LATENCY_LIMIT; | ||
165 | } | ||
166 | } | ||
167 | spin_lock(&swap_lock); | ||
168 | goto lowest; | ||
169 | |||
170 | no_page: | ||
171 | si->flags -= SWP_SCANNING; | ||
148 | return 0; | 172 | return 0; |
149 | } | 173 | } |
150 | 174 | ||
151 | swp_entry_t get_swap_page(void) | 175 | swp_entry_t get_swap_page(void) |
152 | { | 176 | { |
153 | struct swap_info_struct * p; | 177 | struct swap_info_struct *si; |
154 | unsigned long offset; | 178 | pgoff_t offset; |
155 | swp_entry_t entry; | 179 | int type, next; |
156 | int type, wrapped = 0; | 180 | int wrapped = 0; |
157 | 181 | ||
158 | entry.val = 0; /* Out of memory */ | 182 | spin_lock(&swap_lock); |
159 | swap_list_lock(); | ||
160 | type = swap_list.next; | ||
161 | if (type < 0) | ||
162 | goto out; | ||
163 | if (nr_swap_pages <= 0) | 183 | if (nr_swap_pages <= 0) |
164 | goto out; | 184 | goto noswap; |
185 | nr_swap_pages--; | ||
186 | |||
187 | for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { | ||
188 | si = swap_info + type; | ||
189 | next = si->next; | ||
190 | if (next < 0 || | ||
191 | (!wrapped && si->prio != swap_info[next].prio)) { | ||
192 | next = swap_list.head; | ||
193 | wrapped++; | ||
194 | } | ||
165 | 195 | ||
166 | while (1) { | 196 | if (!si->highest_bit) |
167 | p = &swap_info[type]; | 197 | continue; |
168 | if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { | 198 | if (!(si->flags & SWP_WRITEOK)) |
169 | swap_device_lock(p); | 199 | continue; |
170 | offset = scan_swap_map(p); | 200 | |
171 | swap_device_unlock(p); | 201 | swap_list.next = next; |
172 | if (offset) { | 202 | offset = scan_swap_map(si); |
173 | entry = swp_entry(type,offset); | 203 | if (offset) { |
174 | type = swap_info[type].next; | 204 | spin_unlock(&swap_lock); |
175 | if (type < 0 || | 205 | return swp_entry(type, offset); |
176 | p->prio != swap_info[type].prio) { | ||
177 | swap_list.next = swap_list.head; | ||
178 | } else { | ||
179 | swap_list.next = type; | ||
180 | } | ||
181 | goto out; | ||
182 | } | ||
183 | } | 206 | } |
184 | type = p->next; | 207 | next = swap_list.next; |
185 | if (!wrapped) { | ||
186 | if (type < 0 || p->prio != swap_info[type].prio) { | ||
187 | type = swap_list.head; | ||
188 | wrapped = 1; | ||
189 | } | ||
190 | } else | ||
191 | if (type < 0) | ||
192 | goto out; /* out of swap space */ | ||
193 | } | 208 | } |
194 | out: | 209 | |
195 | swap_list_unlock(); | 210 | nr_swap_pages++; |
196 | return entry; | 211 | noswap: |
212 | spin_unlock(&swap_lock); | ||
213 | return (swp_entry_t) {0}; | ||
197 | } | 214 | } |
198 | 215 | ||
199 | static struct swap_info_struct * swap_info_get(swp_entry_t entry) | 216 | static struct swap_info_struct * swap_info_get(swp_entry_t entry) |
@@ -214,10 +231,7 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry) | |||
214 | goto bad_offset; | 231 | goto bad_offset; |
215 | if (!p->swap_map[offset]) | 232 | if (!p->swap_map[offset]) |
216 | goto bad_free; | 233 | goto bad_free; |
217 | swap_list_lock(); | 234 | spin_lock(&swap_lock); |
218 | if (p->prio > swap_info[swap_list.next].prio) | ||
219 | swap_list.next = type; | ||
220 | swap_device_lock(p); | ||
221 | return p; | 235 | return p; |
222 | 236 | ||
223 | bad_free: | 237 | bad_free: |
@@ -235,12 +249,6 @@ out: | |||
235 | return NULL; | 249 | return NULL; |
236 | } | 250 | } |
237 | 251 | ||
238 | static void swap_info_put(struct swap_info_struct * p) | ||
239 | { | ||
240 | swap_device_unlock(p); | ||
241 | swap_list_unlock(); | ||
242 | } | ||
243 | |||
244 | static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) | 252 | static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) |
245 | { | 253 | { |
246 | int count = p->swap_map[offset]; | 254 | int count = p->swap_map[offset]; |
@@ -253,6 +261,8 @@ static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) | |||
253 | p->lowest_bit = offset; | 261 | p->lowest_bit = offset; |
254 | if (offset > p->highest_bit) | 262 | if (offset > p->highest_bit) |
255 | p->highest_bit = offset; | 263 | p->highest_bit = offset; |
264 | if (p->prio > swap_info[swap_list.next].prio) | ||
265 | swap_list.next = p - swap_info; | ||
256 | nr_swap_pages++; | 266 | nr_swap_pages++; |
257 | p->inuse_pages--; | 267 | p->inuse_pages--; |
258 | } | 268 | } |
@@ -271,7 +281,7 @@ void swap_free(swp_entry_t entry) | |||
271 | p = swap_info_get(entry); | 281 | p = swap_info_get(entry); |
272 | if (p) { | 282 | if (p) { |
273 | swap_entry_free(p, swp_offset(entry)); | 283 | swap_entry_free(p, swp_offset(entry)); |
274 | swap_info_put(p); | 284 | spin_unlock(&swap_lock); |
275 | } | 285 | } |
276 | } | 286 | } |
277 | 287 | ||
@@ -289,7 +299,7 @@ static inline int page_swapcount(struct page *page) | |||
289 | if (p) { | 299 | if (p) { |
290 | /* Subtract the 1 for the swap cache itself */ | 300 | /* Subtract the 1 for the swap cache itself */ |
291 | count = p->swap_map[swp_offset(entry)] - 1; | 301 | count = p->swap_map[swp_offset(entry)] - 1; |
292 | swap_info_put(p); | 302 | spin_unlock(&swap_lock); |
293 | } | 303 | } |
294 | return count; | 304 | return count; |
295 | } | 305 | } |
@@ -346,7 +356,7 @@ int remove_exclusive_swap_page(struct page *page) | |||
346 | } | 356 | } |
347 | write_unlock_irq(&swapper_space.tree_lock); | 357 | write_unlock_irq(&swapper_space.tree_lock); |
348 | } | 358 | } |
349 | swap_info_put(p); | 359 | spin_unlock(&swap_lock); |
350 | 360 | ||
351 | if (retval) { | 361 | if (retval) { |
352 | swap_free(entry); | 362 | swap_free(entry); |
@@ -369,7 +379,7 @@ void free_swap_and_cache(swp_entry_t entry) | |||
369 | if (p) { | 379 | if (p) { |
370 | if (swap_entry_free(p, swp_offset(entry)) == 1) | 380 | if (swap_entry_free(p, swp_offset(entry)) == 1) |
371 | page = find_trylock_page(&swapper_space, entry.val); | 381 | page = find_trylock_page(&swapper_space, entry.val); |
372 | swap_info_put(p); | 382 | spin_unlock(&swap_lock); |
373 | } | 383 | } |
374 | if (page) { | 384 | if (page) { |
375 | int one_user; | 385 | int one_user; |
@@ -531,17 +541,18 @@ static int unuse_mm(struct mm_struct *mm, | |||
531 | * Scan swap_map from current position to next entry still in use. | 541 | * Scan swap_map from current position to next entry still in use. |
532 | * Recycle to start on reaching the end, returning 0 when empty. | 542 | * Recycle to start on reaching the end, returning 0 when empty. |
533 | */ | 543 | */ |
534 | static int find_next_to_unuse(struct swap_info_struct *si, int prev) | 544 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, |
545 | unsigned int prev) | ||
535 | { | 546 | { |
536 | int max = si->max; | 547 | unsigned int max = si->max; |
537 | int i = prev; | 548 | unsigned int i = prev; |
538 | int count; | 549 | int count; |
539 | 550 | ||
540 | /* | 551 | /* |
541 | * No need for swap_device_lock(si) here: we're just looking | 552 | * No need for swap_lock here: we're just looking |
542 | * for whether an entry is in use, not modifying it; false | 553 | * for whether an entry is in use, not modifying it; false |
543 | * hits are okay, and sys_swapoff() has already prevented new | 554 | * hits are okay, and sys_swapoff() has already prevented new |
544 | * allocations from this area (while holding swap_list_lock()). | 555 | * allocations from this area (while holding swap_lock). |
545 | */ | 556 | */ |
546 | for (;;) { | 557 | for (;;) { |
547 | if (++i >= max) { | 558 | if (++i >= max) { |
@@ -577,7 +588,7 @@ static int try_to_unuse(unsigned int type) | |||
577 | unsigned short swcount; | 588 | unsigned short swcount; |
578 | struct page *page; | 589 | struct page *page; |
579 | swp_entry_t entry; | 590 | swp_entry_t entry; |
580 | int i = 0; | 591 | unsigned int i = 0; |
581 | int retval = 0; | 592 | int retval = 0; |
582 | int reset_overflow = 0; | 593 | int reset_overflow = 0; |
583 | int shmem; | 594 | int shmem; |
@@ -731,9 +742,9 @@ static int try_to_unuse(unsigned int type) | |||
731 | * report them; but do report if we reset SWAP_MAP_MAX. | 742 | * report them; but do report if we reset SWAP_MAP_MAX. |
732 | */ | 743 | */ |
733 | if (*swap_map == SWAP_MAP_MAX) { | 744 | if (*swap_map == SWAP_MAP_MAX) { |
734 | swap_device_lock(si); | 745 | spin_lock(&swap_lock); |
735 | *swap_map = 1; | 746 | *swap_map = 1; |
736 | swap_device_unlock(si); | 747 | spin_unlock(&swap_lock); |
737 | reset_overflow = 1; | 748 | reset_overflow = 1; |
738 | } | 749 | } |
739 | 750 | ||
@@ -797,9 +808,9 @@ static int try_to_unuse(unsigned int type) | |||
797 | } | 808 | } |
798 | 809 | ||
799 | /* | 810 | /* |
800 | * After a successful try_to_unuse, if no swap is now in use, we know we | 811 | * After a successful try_to_unuse, if no swap is now in use, we know |
801 | * can empty the mmlist. swap_list_lock must be held on entry and exit. | 812 | * we can empty the mmlist. swap_lock must be held on entry and exit. |
802 | * Note that mmlist_lock nests inside swap_list_lock, and an mm must be | 813 | * Note that mmlist_lock nests inside swap_lock, and an mm must be |
803 | * added to the mmlist just after page_duplicate - before would be racy. | 814 | * added to the mmlist just after page_duplicate - before would be racy. |
804 | */ | 815 | */ |
805 | static void drain_mmlist(void) | 816 | static void drain_mmlist(void) |
@@ -832,9 +843,9 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset) | |||
832 | offset < (se->start_page + se->nr_pages)) { | 843 | offset < (se->start_page + se->nr_pages)) { |
833 | return se->start_block + (offset - se->start_page); | 844 | return se->start_block + (offset - se->start_page); |
834 | } | 845 | } |
835 | lh = se->list.prev; | 846 | lh = se->list.next; |
836 | if (lh == &sis->extent_list) | 847 | if (lh == &sis->extent_list) |
837 | lh = lh->prev; | 848 | lh = lh->next; |
838 | se = list_entry(lh, struct swap_extent, list); | 849 | se = list_entry(lh, struct swap_extent, list); |
839 | sis->curr_swap_extent = se; | 850 | sis->curr_swap_extent = se; |
840 | BUG_ON(se == start_se); /* It *must* be present */ | 851 | BUG_ON(se == start_se); /* It *must* be present */ |
@@ -854,15 +865,13 @@ static void destroy_swap_extents(struct swap_info_struct *sis) | |||
854 | list_del(&se->list); | 865 | list_del(&se->list); |
855 | kfree(se); | 866 | kfree(se); |
856 | } | 867 | } |
857 | sis->nr_extents = 0; | ||
858 | } | 868 | } |
859 | 869 | ||
860 | /* | 870 | /* |
861 | * Add a block range (and the corresponding page range) into this swapdev's | 871 | * Add a block range (and the corresponding page range) into this swapdev's |
862 | * extent list. The extent list is kept sorted in block order. | 872 | * extent list. The extent list is kept sorted in page order. |
863 | * | 873 | * |
864 | * This function rather assumes that it is called in ascending sector_t order. | 874 | * This function rather assumes that it is called in ascending page order. |
865 | * It doesn't look for extent coalescing opportunities. | ||
866 | */ | 875 | */ |
867 | static int | 876 | static int |
868 | add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, | 877 | add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, |
@@ -872,16 +881,15 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, | |||
872 | struct swap_extent *new_se; | 881 | struct swap_extent *new_se; |
873 | struct list_head *lh; | 882 | struct list_head *lh; |
874 | 883 | ||
875 | lh = sis->extent_list.next; /* The highest-addressed block */ | 884 | lh = sis->extent_list.prev; /* The highest page extent */ |
876 | while (lh != &sis->extent_list) { | 885 | if (lh != &sis->extent_list) { |
877 | se = list_entry(lh, struct swap_extent, list); | 886 | se = list_entry(lh, struct swap_extent, list); |
878 | if (se->start_block + se->nr_pages == start_block && | 887 | BUG_ON(se->start_page + se->nr_pages != start_page); |
879 | se->start_page + se->nr_pages == start_page) { | 888 | if (se->start_block + se->nr_pages == start_block) { |
880 | /* Merge it */ | 889 | /* Merge it */ |
881 | se->nr_pages += nr_pages; | 890 | se->nr_pages += nr_pages; |
882 | return 0; | 891 | return 0; |
883 | } | 892 | } |
884 | lh = lh->next; | ||
885 | } | 893 | } |
886 | 894 | ||
887 | /* | 895 | /* |
@@ -894,16 +902,8 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, | |||
894 | new_se->nr_pages = nr_pages; | 902 | new_se->nr_pages = nr_pages; |
895 | new_se->start_block = start_block; | 903 | new_se->start_block = start_block; |
896 | 904 | ||
897 | lh = sis->extent_list.prev; /* The lowest block */ | 905 | list_add_tail(&new_se->list, &sis->extent_list); |
898 | while (lh != &sis->extent_list) { | 906 | return 1; |
899 | se = list_entry(lh, struct swap_extent, list); | ||
900 | if (se->start_block > start_block) | ||
901 | break; | ||
902 | lh = lh->prev; | ||
903 | } | ||
904 | list_add_tail(&new_se->list, lh); | ||
905 | sis->nr_extents++; | ||
906 | return 0; | ||
907 | } | 907 | } |
908 | 908 | ||
909 | /* | 909 | /* |
@@ -926,7 +926,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, | |||
926 | * requirements, they are simply tossed out - we will never use those blocks | 926 | * requirements, they are simply tossed out - we will never use those blocks |
927 | * for swapping. | 927 | * for swapping. |
928 | * | 928 | * |
929 | * For S_ISREG swapfiles we hold i_sem across the life of the swapon. This | 929 | * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon. This |
930 | * prevents root from shooting her foot off by ftruncating an in-use swapfile, | 930 | * prevents root from shooting her foot off by ftruncating an in-use swapfile, |
931 | * which will scribble on the fs. | 931 | * which will scribble on the fs. |
932 | * | 932 | * |
@@ -937,7 +937,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, | |||
937 | * This is extremely effective. The average number of iterations in | 937 | * This is extremely effective. The average number of iterations in |
938 | * map_swap_page() has been measured at about 0.3 per page. - akpm. | 938 | * map_swap_page() has been measured at about 0.3 per page. - akpm. |
939 | */ | 939 | */ |
940 | static int setup_swap_extents(struct swap_info_struct *sis) | 940 | static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) |
941 | { | 941 | { |
942 | struct inode *inode; | 942 | struct inode *inode; |
943 | unsigned blocks_per_page; | 943 | unsigned blocks_per_page; |
@@ -945,11 +945,15 @@ static int setup_swap_extents(struct swap_info_struct *sis) | |||
945 | unsigned blkbits; | 945 | unsigned blkbits; |
946 | sector_t probe_block; | 946 | sector_t probe_block; |
947 | sector_t last_block; | 947 | sector_t last_block; |
948 | sector_t lowest_block = -1; | ||
949 | sector_t highest_block = 0; | ||
950 | int nr_extents = 0; | ||
948 | int ret; | 951 | int ret; |
949 | 952 | ||
950 | inode = sis->swap_file->f_mapping->host; | 953 | inode = sis->swap_file->f_mapping->host; |
951 | if (S_ISBLK(inode->i_mode)) { | 954 | if (S_ISBLK(inode->i_mode)) { |
952 | ret = add_swap_extent(sis, 0, sis->max, 0); | 955 | ret = add_swap_extent(sis, 0, sis->max, 0); |
956 | *span = sis->pages; | ||
953 | goto done; | 957 | goto done; |
954 | } | 958 | } |
955 | 959 | ||
@@ -994,22 +998,32 @@ static int setup_swap_extents(struct swap_info_struct *sis) | |||
994 | } | 998 | } |
995 | } | 999 | } |
996 | 1000 | ||
1001 | first_block >>= (PAGE_SHIFT - blkbits); | ||
1002 | if (page_no) { /* exclude the header page */ | ||
1003 | if (first_block < lowest_block) | ||
1004 | lowest_block = first_block; | ||
1005 | if (first_block > highest_block) | ||
1006 | highest_block = first_block; | ||
1007 | } | ||
1008 | |||
997 | /* | 1009 | /* |
998 | * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks | 1010 | * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks |
999 | */ | 1011 | */ |
1000 | ret = add_swap_extent(sis, page_no, 1, | 1012 | ret = add_swap_extent(sis, page_no, 1, first_block); |
1001 | first_block >> (PAGE_SHIFT - blkbits)); | 1013 | if (ret < 0) |
1002 | if (ret) | ||
1003 | goto out; | 1014 | goto out; |
1015 | nr_extents += ret; | ||
1004 | page_no++; | 1016 | page_no++; |
1005 | probe_block += blocks_per_page; | 1017 | probe_block += blocks_per_page; |
1006 | reprobe: | 1018 | reprobe: |
1007 | continue; | 1019 | continue; |
1008 | } | 1020 | } |
1009 | ret = 0; | 1021 | ret = nr_extents; |
1022 | *span = 1 + highest_block - lowest_block; | ||
1010 | if (page_no == 0) | 1023 | if (page_no == 0) |
1011 | ret = -EINVAL; | 1024 | page_no = 1; /* force Empty message */ |
1012 | sis->max = page_no; | 1025 | sis->max = page_no; |
1026 | sis->pages = page_no - 1; | ||
1013 | sis->highest_bit = page_no - 1; | 1027 | sis->highest_bit = page_no - 1; |
1014 | done: | 1028 | done: |
1015 | sis->curr_swap_extent = list_entry(sis->extent_list.prev, | 1029 | sis->curr_swap_extent = list_entry(sis->extent_list.prev, |
@@ -1069,7 +1083,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1069 | 1083 | ||
1070 | mapping = victim->f_mapping; | 1084 | mapping = victim->f_mapping; |
1071 | prev = -1; | 1085 | prev = -1; |
1072 | swap_list_lock(); | 1086 | spin_lock(&swap_lock); |
1073 | for (type = swap_list.head; type >= 0; type = swap_info[type].next) { | 1087 | for (type = swap_list.head; type >= 0; type = swap_info[type].next) { |
1074 | p = swap_info + type; | 1088 | p = swap_info + type; |
1075 | if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { | 1089 | if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { |
@@ -1080,14 +1094,14 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1080 | } | 1094 | } |
1081 | if (type < 0) { | 1095 | if (type < 0) { |
1082 | err = -EINVAL; | 1096 | err = -EINVAL; |
1083 | swap_list_unlock(); | 1097 | spin_unlock(&swap_lock); |
1084 | goto out_dput; | 1098 | goto out_dput; |
1085 | } | 1099 | } |
1086 | if (!security_vm_enough_memory(p->pages)) | 1100 | if (!security_vm_enough_memory(p->pages)) |
1087 | vm_unacct_memory(p->pages); | 1101 | vm_unacct_memory(p->pages); |
1088 | else { | 1102 | else { |
1089 | err = -ENOMEM; | 1103 | err = -ENOMEM; |
1090 | swap_list_unlock(); | 1104 | spin_unlock(&swap_lock); |
1091 | goto out_dput; | 1105 | goto out_dput; |
1092 | } | 1106 | } |
1093 | if (prev < 0) { | 1107 | if (prev < 0) { |
@@ -1102,18 +1116,15 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1102 | nr_swap_pages -= p->pages; | 1116 | nr_swap_pages -= p->pages; |
1103 | total_swap_pages -= p->pages; | 1117 | total_swap_pages -= p->pages; |
1104 | p->flags &= ~SWP_WRITEOK; | 1118 | p->flags &= ~SWP_WRITEOK; |
1105 | swap_list_unlock(); | 1119 | spin_unlock(&swap_lock); |
1120 | |||
1106 | current->flags |= PF_SWAPOFF; | 1121 | current->flags |= PF_SWAPOFF; |
1107 | err = try_to_unuse(type); | 1122 | err = try_to_unuse(type); |
1108 | current->flags &= ~PF_SWAPOFF; | 1123 | current->flags &= ~PF_SWAPOFF; |
1109 | 1124 | ||
1110 | /* wait for any unplug function to finish */ | ||
1111 | down_write(&swap_unplug_sem); | ||
1112 | up_write(&swap_unplug_sem); | ||
1113 | |||
1114 | if (err) { | 1125 | if (err) { |
1115 | /* re-insert swap space back into swap_list */ | 1126 | /* re-insert swap space back into swap_list */ |
1116 | swap_list_lock(); | 1127 | spin_lock(&swap_lock); |
1117 | for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) | 1128 | for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) |
1118 | if (p->prio >= swap_info[i].prio) | 1129 | if (p->prio >= swap_info[i].prio) |
1119 | break; | 1130 | break; |
@@ -1125,22 +1136,35 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1125 | nr_swap_pages += p->pages; | 1136 | nr_swap_pages += p->pages; |
1126 | total_swap_pages += p->pages; | 1137 | total_swap_pages += p->pages; |
1127 | p->flags |= SWP_WRITEOK; | 1138 | p->flags |= SWP_WRITEOK; |
1128 | swap_list_unlock(); | 1139 | spin_unlock(&swap_lock); |
1129 | goto out_dput; | 1140 | goto out_dput; |
1130 | } | 1141 | } |
1142 | |||
1143 | /* wait for any unplug function to finish */ | ||
1144 | down_write(&swap_unplug_sem); | ||
1145 | up_write(&swap_unplug_sem); | ||
1146 | |||
1147 | destroy_swap_extents(p); | ||
1131 | down(&swapon_sem); | 1148 | down(&swapon_sem); |
1132 | swap_list_lock(); | 1149 | spin_lock(&swap_lock); |
1133 | drain_mmlist(); | 1150 | drain_mmlist(); |
1134 | swap_device_lock(p); | 1151 | |
1152 | /* wait for anyone still in scan_swap_map */ | ||
1153 | p->highest_bit = 0; /* cuts scans short */ | ||
1154 | while (p->flags >= SWP_SCANNING) { | ||
1155 | spin_unlock(&swap_lock); | ||
1156 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1157 | schedule_timeout(1); | ||
1158 | spin_lock(&swap_lock); | ||
1159 | } | ||
1160 | |||
1135 | swap_file = p->swap_file; | 1161 | swap_file = p->swap_file; |
1136 | p->swap_file = NULL; | 1162 | p->swap_file = NULL; |
1137 | p->max = 0; | 1163 | p->max = 0; |
1138 | swap_map = p->swap_map; | 1164 | swap_map = p->swap_map; |
1139 | p->swap_map = NULL; | 1165 | p->swap_map = NULL; |
1140 | p->flags = 0; | 1166 | p->flags = 0; |
1141 | destroy_swap_extents(p); | 1167 | spin_unlock(&swap_lock); |
1142 | swap_device_unlock(p); | ||
1143 | swap_list_unlock(); | ||
1144 | up(&swapon_sem); | 1168 | up(&swapon_sem); |
1145 | vfree(swap_map); | 1169 | vfree(swap_map); |
1146 | inode = mapping->host; | 1170 | inode = mapping->host; |
@@ -1213,7 +1237,7 @@ static int swap_show(struct seq_file *swap, void *v) | |||
1213 | 1237 | ||
1214 | file = ptr->swap_file; | 1238 | file = ptr->swap_file; |
1215 | len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\"); | 1239 | len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\"); |
1216 | seq_printf(swap, "%*s%s\t%d\t%ld\t%d\n", | 1240 | seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", |
1217 | len < 40 ? 40 - len : 1, " ", | 1241 | len < 40 ? 40 - len : 1, " ", |
1218 | S_ISBLK(file->f_dentry->d_inode->i_mode) ? | 1242 | S_ISBLK(file->f_dentry->d_inode->i_mode) ? |
1219 | "partition" : "file\t", | 1243 | "partition" : "file\t", |
@@ -1272,7 +1296,9 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1272 | static int least_priority; | 1296 | static int least_priority; |
1273 | union swap_header *swap_header = NULL; | 1297 | union swap_header *swap_header = NULL; |
1274 | int swap_header_version; | 1298 | int swap_header_version; |
1275 | int nr_good_pages = 0; | 1299 | unsigned int nr_good_pages = 0; |
1300 | int nr_extents = 0; | ||
1301 | sector_t span; | ||
1276 | unsigned long maxpages = 1; | 1302 | unsigned long maxpages = 1; |
1277 | int swapfilesize; | 1303 | int swapfilesize; |
1278 | unsigned short *swap_map; | 1304 | unsigned short *swap_map; |
@@ -1282,7 +1308,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1282 | 1308 | ||
1283 | if (!capable(CAP_SYS_ADMIN)) | 1309 | if (!capable(CAP_SYS_ADMIN)) |
1284 | return -EPERM; | 1310 | return -EPERM; |
1285 | swap_list_lock(); | 1311 | spin_lock(&swap_lock); |
1286 | p = swap_info; | 1312 | p = swap_info; |
1287 | for (type = 0 ; type < nr_swapfiles ; type++,p++) | 1313 | for (type = 0 ; type < nr_swapfiles ; type++,p++) |
1288 | if (!(p->flags & SWP_USED)) | 1314 | if (!(p->flags & SWP_USED)) |
@@ -1301,14 +1327,13 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1301 | * swp_entry_t or the architecture definition of a swap pte. | 1327 | * swp_entry_t or the architecture definition of a swap pte. |
1302 | */ | 1328 | */ |
1303 | if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { | 1329 | if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { |
1304 | swap_list_unlock(); | 1330 | spin_unlock(&swap_lock); |
1305 | goto out; | 1331 | goto out; |
1306 | } | 1332 | } |
1307 | if (type >= nr_swapfiles) | 1333 | if (type >= nr_swapfiles) |
1308 | nr_swapfiles = type+1; | 1334 | nr_swapfiles = type+1; |
1309 | INIT_LIST_HEAD(&p->extent_list); | 1335 | INIT_LIST_HEAD(&p->extent_list); |
1310 | p->flags = SWP_USED; | 1336 | p->flags = SWP_USED; |
1311 | p->nr_extents = 0; | ||
1312 | p->swap_file = NULL; | 1337 | p->swap_file = NULL; |
1313 | p->old_block_size = 0; | 1338 | p->old_block_size = 0; |
1314 | p->swap_map = NULL; | 1339 | p->swap_map = NULL; |
@@ -1316,7 +1341,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1316 | p->highest_bit = 0; | 1341 | p->highest_bit = 0; |
1317 | p->cluster_nr = 0; | 1342 | p->cluster_nr = 0; |
1318 | p->inuse_pages = 0; | 1343 | p->inuse_pages = 0; |
1319 | spin_lock_init(&p->sdev_lock); | ||
1320 | p->next = -1; | 1344 | p->next = -1; |
1321 | if (swap_flags & SWAP_FLAG_PREFER) { | 1345 | if (swap_flags & SWAP_FLAG_PREFER) { |
1322 | p->prio = | 1346 | p->prio = |
@@ -1324,7 +1348,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1324 | } else { | 1348 | } else { |
1325 | p->prio = --least_priority; | 1349 | p->prio = --least_priority; |
1326 | } | 1350 | } |
1327 | swap_list_unlock(); | 1351 | spin_unlock(&swap_lock); |
1328 | name = getname(specialfile); | 1352 | name = getname(specialfile); |
1329 | error = PTR_ERR(name); | 1353 | error = PTR_ERR(name); |
1330 | if (IS_ERR(name)) { | 1354 | if (IS_ERR(name)) { |
@@ -1426,6 +1450,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1426 | } | 1450 | } |
1427 | 1451 | ||
1428 | p->lowest_bit = 1; | 1452 | p->lowest_bit = 1; |
1453 | p->cluster_next = 1; | ||
1454 | |||
1429 | /* | 1455 | /* |
1430 | * Find out how many pages are allowed for a single swap | 1456 | * Find out how many pages are allowed for a single swap |
1431 | * device. There are two limiting factors: 1) the number of | 1457 | * device. There are two limiting factors: 1) the number of |
@@ -1446,6 +1472,10 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1446 | p->highest_bit = maxpages - 1; | 1472 | p->highest_bit = maxpages - 1; |
1447 | 1473 | ||
1448 | error = -EINVAL; | 1474 | error = -EINVAL; |
1475 | if (!maxpages) | ||
1476 | goto bad_swap; | ||
1477 | if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) | ||
1478 | goto bad_swap; | ||
1449 | if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) | 1479 | if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) |
1450 | goto bad_swap; | 1480 | goto bad_swap; |
1451 | 1481 | ||
@@ -1470,35 +1500,40 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1470 | if (error) | 1500 | if (error) |
1471 | goto bad_swap; | 1501 | goto bad_swap; |
1472 | } | 1502 | } |
1473 | 1503 | ||
1474 | if (swapfilesize && maxpages > swapfilesize) { | 1504 | if (swapfilesize && maxpages > swapfilesize) { |
1475 | printk(KERN_WARNING | 1505 | printk(KERN_WARNING |
1476 | "Swap area shorter than signature indicates\n"); | 1506 | "Swap area shorter than signature indicates\n"); |
1477 | error = -EINVAL; | 1507 | error = -EINVAL; |
1478 | goto bad_swap; | 1508 | goto bad_swap; |
1479 | } | 1509 | } |
1510 | if (nr_good_pages) { | ||
1511 | p->swap_map[0] = SWAP_MAP_BAD; | ||
1512 | p->max = maxpages; | ||
1513 | p->pages = nr_good_pages; | ||
1514 | nr_extents = setup_swap_extents(p, &span); | ||
1515 | if (nr_extents < 0) { | ||
1516 | error = nr_extents; | ||
1517 | goto bad_swap; | ||
1518 | } | ||
1519 | nr_good_pages = p->pages; | ||
1520 | } | ||
1480 | if (!nr_good_pages) { | 1521 | if (!nr_good_pages) { |
1481 | printk(KERN_WARNING "Empty swap-file\n"); | 1522 | printk(KERN_WARNING "Empty swap-file\n"); |
1482 | error = -EINVAL; | 1523 | error = -EINVAL; |
1483 | goto bad_swap; | 1524 | goto bad_swap; |
1484 | } | 1525 | } |
1485 | p->swap_map[0] = SWAP_MAP_BAD; | ||
1486 | p->max = maxpages; | ||
1487 | p->pages = nr_good_pages; | ||
1488 | |||
1489 | error = setup_swap_extents(p); | ||
1490 | if (error) | ||
1491 | goto bad_swap; | ||
1492 | 1526 | ||
1493 | down(&swapon_sem); | 1527 | down(&swapon_sem); |
1494 | swap_list_lock(); | 1528 | spin_lock(&swap_lock); |
1495 | swap_device_lock(p); | ||
1496 | p->flags = SWP_ACTIVE; | 1529 | p->flags = SWP_ACTIVE; |
1497 | nr_swap_pages += nr_good_pages; | 1530 | nr_swap_pages += nr_good_pages; |
1498 | total_swap_pages += nr_good_pages; | 1531 | total_swap_pages += nr_good_pages; |
1499 | printk(KERN_INFO "Adding %dk swap on %s. Priority:%d extents:%d\n", | 1532 | |
1500 | nr_good_pages<<(PAGE_SHIFT-10), name, | 1533 | printk(KERN_INFO "Adding %uk swap on %s. " |
1501 | p->prio, p->nr_extents); | 1534 | "Priority:%d extents:%d across:%lluk\n", |
1535 | nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, | ||
1536 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10)); | ||
1502 | 1537 | ||
1503 | /* insert swap space into swap_list: */ | 1538 | /* insert swap space into swap_list: */ |
1504 | prev = -1; | 1539 | prev = -1; |
@@ -1514,8 +1549,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1514 | } else { | 1549 | } else { |
1515 | swap_info[prev].next = p - swap_info; | 1550 | swap_info[prev].next = p - swap_info; |
1516 | } | 1551 | } |
1517 | swap_device_unlock(p); | 1552 | spin_unlock(&swap_lock); |
1518 | swap_list_unlock(); | ||
1519 | up(&swapon_sem); | 1553 | up(&swapon_sem); |
1520 | error = 0; | 1554 | error = 0; |
1521 | goto out; | 1555 | goto out; |
@@ -1524,16 +1558,16 @@ bad_swap: | |||
1524 | set_blocksize(bdev, p->old_block_size); | 1558 | set_blocksize(bdev, p->old_block_size); |
1525 | bd_release(bdev); | 1559 | bd_release(bdev); |
1526 | } | 1560 | } |
1561 | destroy_swap_extents(p); | ||
1527 | bad_swap_2: | 1562 | bad_swap_2: |
1528 | swap_list_lock(); | 1563 | spin_lock(&swap_lock); |
1529 | swap_map = p->swap_map; | 1564 | swap_map = p->swap_map; |
1530 | p->swap_file = NULL; | 1565 | p->swap_file = NULL; |
1531 | p->swap_map = NULL; | 1566 | p->swap_map = NULL; |
1532 | p->flags = 0; | 1567 | p->flags = 0; |
1533 | if (!(swap_flags & SWAP_FLAG_PREFER)) | 1568 | if (!(swap_flags & SWAP_FLAG_PREFER)) |
1534 | ++least_priority; | 1569 | ++least_priority; |
1535 | swap_list_unlock(); | 1570 | spin_unlock(&swap_lock); |
1536 | destroy_swap_extents(p); | ||
1537 | vfree(swap_map); | 1571 | vfree(swap_map); |
1538 | if (swap_file) | 1572 | if (swap_file) |
1539 | filp_close(swap_file, NULL); | 1573 | filp_close(swap_file, NULL); |
@@ -1557,7 +1591,7 @@ void si_swapinfo(struct sysinfo *val) | |||
1557 | unsigned int i; | 1591 | unsigned int i; |
1558 | unsigned long nr_to_be_unused = 0; | 1592 | unsigned long nr_to_be_unused = 0; |
1559 | 1593 | ||
1560 | swap_list_lock(); | 1594 | spin_lock(&swap_lock); |
1561 | for (i = 0; i < nr_swapfiles; i++) { | 1595 | for (i = 0; i < nr_swapfiles; i++) { |
1562 | if (!(swap_info[i].flags & SWP_USED) || | 1596 | if (!(swap_info[i].flags & SWP_USED) || |
1563 | (swap_info[i].flags & SWP_WRITEOK)) | 1597 | (swap_info[i].flags & SWP_WRITEOK)) |
@@ -1566,7 +1600,7 @@ void si_swapinfo(struct sysinfo *val) | |||
1566 | } | 1600 | } |
1567 | val->freeswap = nr_swap_pages + nr_to_be_unused; | 1601 | val->freeswap = nr_swap_pages + nr_to_be_unused; |
1568 | val->totalswap = total_swap_pages + nr_to_be_unused; | 1602 | val->totalswap = total_swap_pages + nr_to_be_unused; |
1569 | swap_list_unlock(); | 1603 | spin_unlock(&swap_lock); |
1570 | } | 1604 | } |
1571 | 1605 | ||
1572 | /* | 1606 | /* |
@@ -1587,7 +1621,7 @@ int swap_duplicate(swp_entry_t entry) | |||
1587 | p = type + swap_info; | 1621 | p = type + swap_info; |
1588 | offset = swp_offset(entry); | 1622 | offset = swp_offset(entry); |
1589 | 1623 | ||
1590 | swap_device_lock(p); | 1624 | spin_lock(&swap_lock); |
1591 | if (offset < p->max && p->swap_map[offset]) { | 1625 | if (offset < p->max && p->swap_map[offset]) { |
1592 | if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { | 1626 | if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { |
1593 | p->swap_map[offset]++; | 1627 | p->swap_map[offset]++; |
@@ -1599,7 +1633,7 @@ int swap_duplicate(swp_entry_t entry) | |||
1599 | result = 1; | 1633 | result = 1; |
1600 | } | 1634 | } |
1601 | } | 1635 | } |
1602 | swap_device_unlock(p); | 1636 | spin_unlock(&swap_lock); |
1603 | out: | 1637 | out: |
1604 | return result; | 1638 | return result; |
1605 | 1639 | ||
@@ -1615,7 +1649,7 @@ get_swap_info_struct(unsigned type) | |||
1615 | } | 1649 | } |
1616 | 1650 | ||
1617 | /* | 1651 | /* |
1618 | * swap_device_lock prevents swap_map being freed. Don't grab an extra | 1652 | * swap_lock prevents swap_map being freed. Don't grab an extra |
1619 | * reference on the swaphandle, it doesn't matter if it becomes unused. | 1653 | * reference on the swaphandle, it doesn't matter if it becomes unused. |
1620 | */ | 1654 | */ |
1621 | int valid_swaphandles(swp_entry_t entry, unsigned long *offset) | 1655 | int valid_swaphandles(swp_entry_t entry, unsigned long *offset) |
@@ -1631,7 +1665,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) | |||
1631 | toff++, i--; | 1665 | toff++, i--; |
1632 | *offset = toff; | 1666 | *offset = toff; |
1633 | 1667 | ||
1634 | swap_device_lock(swapdev); | 1668 | spin_lock(&swap_lock); |
1635 | do { | 1669 | do { |
1636 | /* Don't read-ahead past the end of the swap area */ | 1670 | /* Don't read-ahead past the end of the swap area */ |
1637 | if (toff >= swapdev->max) | 1671 | if (toff >= swapdev->max) |
@@ -1644,6 +1678,6 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) | |||
1644 | toff++; | 1678 | toff++; |
1645 | ret++; | 1679 | ret++; |
1646 | } while (--i); | 1680 | } while (--i); |
1647 | swap_device_unlock(swapdev); | 1681 | spin_unlock(&swap_lock); |
1648 | return ret; | 1682 | return ret; |
1649 | } | 1683 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8ff16a1eee6a..67b358e57ef6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -158,8 +158,6 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | |||
158 | return err; | 158 | return err; |
159 | } | 159 | } |
160 | 160 | ||
161 | #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ | ||
162 | |||
163 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 161 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, |
164 | unsigned long start, unsigned long end) | 162 | unsigned long start, unsigned long end) |
165 | { | 163 | { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index cfffe5098d53..0095533cdde9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -822,6 +822,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
822 | unsigned long nr_active; | 822 | unsigned long nr_active; |
823 | unsigned long nr_inactive; | 823 | unsigned long nr_inactive; |
824 | 824 | ||
825 | atomic_inc(&zone->reclaim_in_progress); | ||
826 | |||
825 | /* | 827 | /* |
826 | * Add one to `nr_to_scan' just to make sure that the kernel will | 828 | * Add one to `nr_to_scan' just to make sure that the kernel will |
827 | * slowly sift through the active list. | 829 | * slowly sift through the active list. |
@@ -861,6 +863,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc) | |||
861 | } | 863 | } |
862 | 864 | ||
863 | throttle_vm_writeout(); | 865 | throttle_vm_writeout(); |
866 | |||
867 | atomic_dec(&zone->reclaim_in_progress); | ||
864 | } | 868 | } |
865 | 869 | ||
866 | /* | 870 | /* |
@@ -900,9 +904,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc) | |||
900 | if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY) | 904 | if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY) |
901 | continue; /* Let kswapd poll it */ | 905 | continue; /* Let kswapd poll it */ |
902 | 906 | ||
903 | atomic_inc(&zone->reclaim_in_progress); | ||
904 | shrink_zone(zone, sc); | 907 | shrink_zone(zone, sc); |
905 | atomic_dec(&zone->reclaim_in_progress); | ||
906 | } | 908 | } |
907 | } | 909 | } |
908 | 910 | ||
@@ -1358,14 +1360,13 @@ int zone_reclaim(struct zone *zone, unsigned int gfp_mask, unsigned int order) | |||
1358 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | 1360 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; |
1359 | 1361 | ||
1360 | /* Don't reclaim the zone if there are other reclaimers active */ | 1362 | /* Don't reclaim the zone if there are other reclaimers active */ |
1361 | if (!atomic_inc_and_test(&zone->reclaim_in_progress)) | 1363 | if (atomic_read(&zone->reclaim_in_progress) > 0) |
1362 | goto out; | 1364 | goto out; |
1363 | 1365 | ||
1364 | shrink_zone(zone, &sc); | 1366 | shrink_zone(zone, &sc); |
1365 | total_reclaimed = sc.nr_reclaimed; | 1367 | total_reclaimed = sc.nr_reclaimed; |
1366 | 1368 | ||
1367 | out: | 1369 | out: |
1368 | atomic_dec(&zone->reclaim_in_progress); | ||
1369 | return total_reclaimed; | 1370 | return total_reclaimed; |
1370 | } | 1371 | } |
1371 | 1372 | ||
@@ -1375,6 +1376,9 @@ asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone, | |||
1375 | struct zone *z; | 1376 | struct zone *z; |
1376 | int i; | 1377 | int i; |
1377 | 1378 | ||
1379 | if (!capable(CAP_SYS_ADMIN)) | ||
1380 | return -EACCES; | ||
1381 | |||
1378 | if (node >= MAX_NUMNODES || !node_online(node)) | 1382 | if (node >= MAX_NUMNODES || !node_online(node)) |
1379 | return -EINVAL; | 1383 | return -EINVAL; |
1380 | 1384 | ||