diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-09-03 18:54:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-09-05 03:05:42 -0400 |
commit | 5d337b9194b1ce3b6fd5f3cb2799455ed2f9a3d1 (patch) | |
tree | 91ed9ef6f4cb5f6a1832f2baaaabd53fcd83513e | |
parent | 048c27fd72816b44e096997d1c6901c3abbfd45b (diff) |
[PATCH] swap: swap_lock replace list+device
The idea of a swap_device_lock per device, and a swap_list_lock over them all,
is appealing; but in practice almost every holder of swap_device_lock must
already hold swap_list_lock, which defeats the purpose of the split.
The only exceptions have been swap_duplicate, valid_swaphandles and an
untrodden path in try_to_unuse (plus a few places added in this series).
valid_swaphandles doesn't show up high in profiles, but swap_duplicate does
demand attention. However, with the hold time in get_swap_pages so much
reduced, I've not yet found a load and set of swap device priorities to show
even swap_duplicate benefitting from the split. Certainly the split is mere
overhead in the common case of a single swap device.
So, replace swap_list_lock and swap_device_lock by spinlock_t swap_lock
(generally we seem to prefer an _ in the name, and not hide in a macro).
If someone can show a regression in swap_duplicate, then probably we should
add a hashlock for the swap_map entries alone (shorts being anatomic), so as
to help the case of the single swap device too.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | Documentation/vm/locking | 15 | ||||
-rw-r--r-- | include/linux/swap.h | 11 | ||||
-rw-r--r-- | mm/filemap.c | 7 | ||||
-rw-r--r-- | mm/rmap.c | 3 | ||||
-rw-r--r-- | mm/swapfile.c | 125 |
5 files changed, 66 insertions, 95 deletions
diff --git a/Documentation/vm/locking b/Documentation/vm/locking index c3ef09ae3bb1..f366fa956179 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking | |||
@@ -83,19 +83,18 @@ single address space optimization, so that the zap_page_range (from | |||
83 | vmtruncate) does not lose sending ipi's to cloned threads that might | 83 | vmtruncate) does not lose sending ipi's to cloned threads that might |
84 | be spawned underneath it and go to user mode to drag in pte's into tlbs. | 84 | be spawned underneath it and go to user mode to drag in pte's into tlbs. |
85 | 85 | ||
86 | swap_list_lock/swap_device_lock | 86 | swap_lock |
87 | ------------------------------- | 87 | -------------- |
88 | The swap devices are chained in priority order from the "swap_list" header. | 88 | The swap devices are chained in priority order from the "swap_list" header. |
89 | The "swap_list" is used for the round-robin swaphandle allocation strategy. | 89 | The "swap_list" is used for the round-robin swaphandle allocation strategy. |
90 | The #free swaphandles is maintained in "nr_swap_pages". These two together | 90 | The #free swaphandles is maintained in "nr_swap_pages". These two together |
91 | are protected by the swap_list_lock. | 91 | are protected by the swap_lock. |
92 | 92 | ||
93 | The swap_device_lock, which is per swap device, protects the reference | 93 | The swap_lock also protects all the device reference counts on the |
94 | counts on the corresponding swaphandles, maintained in the "swap_map" | 94 | corresponding swaphandles, maintained in the "swap_map" array, and the |
95 | array, and the "highest_bit" and "lowest_bit" fields. | 95 | "highest_bit" and "lowest_bit" fields. |
96 | 96 | ||
97 | Both of these are spinlocks, and are never acquired from intr level. The | 97 | The swap_lock is a spinlock, and is never acquired from intr level. |
98 | locking hierarchy is swap_list_lock -> swap_device_lock. | ||
99 | 98 | ||
100 | To prevent races between swap space deletion or async readahead swapins | 99 | To prevent races between swap space deletion or async readahead swapins |
101 | deciding whether a swap handle is being used, ie worthy of being read in | 100 | deciding whether a swap handle is being used, ie worthy of being read in |
diff --git a/include/linux/swap.h b/include/linux/swap.h index db3b5de7c92f..3c9ff0048153 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -121,7 +121,7 @@ enum { | |||
121 | */ | 121 | */ |
122 | struct swap_info_struct { | 122 | struct swap_info_struct { |
123 | unsigned int flags; | 123 | unsigned int flags; |
124 | spinlock_t sdev_lock; | 124 | int prio; /* swap priority */ |
125 | struct file *swap_file; | 125 | struct file *swap_file; |
126 | struct block_device *bdev; | 126 | struct block_device *bdev; |
127 | struct list_head extent_list; | 127 | struct list_head extent_list; |
@@ -135,7 +135,6 @@ struct swap_info_struct { | |||
135 | unsigned int pages; | 135 | unsigned int pages; |
136 | unsigned int max; | 136 | unsigned int max; |
137 | unsigned int inuse_pages; | 137 | unsigned int inuse_pages; |
138 | int prio; /* swap priority */ | ||
139 | int next; /* next entry on swap list */ | 138 | int next; /* next entry on swap list */ |
140 | }; | 139 | }; |
141 | 140 | ||
@@ -221,13 +220,7 @@ extern int can_share_swap_page(struct page *); | |||
221 | extern int remove_exclusive_swap_page(struct page *); | 220 | extern int remove_exclusive_swap_page(struct page *); |
222 | struct backing_dev_info; | 221 | struct backing_dev_info; |
223 | 222 | ||
224 | extern struct swap_list_t swap_list; | 223 | extern spinlock_t swap_lock; |
225 | extern spinlock_t swaplock; | ||
226 | |||
227 | #define swap_list_lock() spin_lock(&swaplock) | ||
228 | #define swap_list_unlock() spin_unlock(&swaplock) | ||
229 | #define swap_device_lock(p) spin_lock(&p->sdev_lock) | ||
230 | #define swap_device_unlock(p) spin_unlock(&p->sdev_lock) | ||
231 | 224 | ||
232 | /* linux/mm/thrash.c */ | 225 | /* linux/mm/thrash.c */ |
233 | extern struct mm_struct * swap_token_mm; | 226 | extern struct mm_struct * swap_token_mm; |
diff --git a/mm/filemap.c b/mm/filemap.c index c11418dd94e8..edc54436fa94 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -54,9 +54,8 @@ | |||
54 | * | 54 | * |
55 | * ->i_mmap_lock (vmtruncate) | 55 | * ->i_mmap_lock (vmtruncate) |
56 | * ->private_lock (__free_pte->__set_page_dirty_buffers) | 56 | * ->private_lock (__free_pte->__set_page_dirty_buffers) |
57 | * ->swap_list_lock | 57 | * ->swap_lock (exclusive_swap_page, others) |
58 | * ->swap_device_lock (exclusive_swap_page, others) | 58 | * ->mapping->tree_lock |
59 | * ->mapping->tree_lock | ||
60 | * | 59 | * |
61 | * ->i_sem | 60 | * ->i_sem |
62 | * ->i_mmap_lock (truncate->unmap_mapping_range) | 61 | * ->i_mmap_lock (truncate->unmap_mapping_range) |
@@ -86,7 +85,7 @@ | |||
86 | * ->page_table_lock (anon_vma_prepare and various) | 85 | * ->page_table_lock (anon_vma_prepare and various) |
87 | * | 86 | * |
88 | * ->page_table_lock | 87 | * ->page_table_lock |
89 | * ->swap_device_lock (try_to_unmap_one) | 88 | * ->swap_lock (try_to_unmap_one) |
90 | * ->private_lock (try_to_unmap_one) | 89 | * ->private_lock (try_to_unmap_one) |
91 | * ->tree_lock (try_to_unmap_one) | 90 | * ->tree_lock (try_to_unmap_one) |
92 | * ->zone.lru_lock (follow_page->mark_page_accessed) | 91 | * ->zone.lru_lock (follow_page->mark_page_accessed) |
@@ -34,9 +34,8 @@ | |||
34 | * anon_vma->lock | 34 | * anon_vma->lock |
35 | * mm->page_table_lock | 35 | * mm->page_table_lock |
36 | * zone->lru_lock (in mark_page_accessed) | 36 | * zone->lru_lock (in mark_page_accessed) |
37 | * swap_list_lock (in swap_free etc's swap_info_get) | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
39 | * swap_device_lock (in swap_duplicate, swap_info_get) | ||
40 | * mapping->private_lock (in __set_page_dirty_buffers) | 39 | * mapping->private_lock (in __set_page_dirty_buffers) |
41 | * inode_lock (in set_page_dirty's __mark_inode_dirty) | 40 | * inode_lock (in set_page_dirty's __mark_inode_dirty) |
42 | * sb_lock (within inode_lock in fs/fs-writeback.c) | 41 | * sb_lock (within inode_lock in fs/fs-writeback.c) |
diff --git a/mm/swapfile.c b/mm/swapfile.c index e675ae55f87d..4b6e8bf986bc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <linux/swapops.h> | 32 | #include <linux/swapops.h> |
33 | 33 | ||
34 | DEFINE_SPINLOCK(swaplock); | 34 | DEFINE_SPINLOCK(swap_lock); |
35 | unsigned int nr_swapfiles; | 35 | unsigned int nr_swapfiles; |
36 | long total_swap_pages; | 36 | long total_swap_pages; |
37 | static int swap_overflow; | 37 | static int swap_overflow; |
@@ -51,7 +51,7 @@ static DECLARE_MUTEX(swapon_sem); | |||
51 | 51 | ||
52 | /* | 52 | /* |
53 | * We need this because the bdev->unplug_fn can sleep and we cannot | 53 | * We need this because the bdev->unplug_fn can sleep and we cannot |
54 | * hold swap_list_lock while calling the unplug_fn. And swap_list_lock | 54 | * hold swap_lock while calling the unplug_fn. And swap_lock |
55 | * cannot be turned into a semaphore. | 55 | * cannot be turned into a semaphore. |
56 | */ | 56 | */ |
57 | static DECLARE_RWSEM(swap_unplug_sem); | 57 | static DECLARE_RWSEM(swap_unplug_sem); |
@@ -105,7 +105,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) | |||
105 | si->cluster_nr = SWAPFILE_CLUSTER - 1; | 105 | si->cluster_nr = SWAPFILE_CLUSTER - 1; |
106 | if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) | 106 | if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) |
107 | goto lowest; | 107 | goto lowest; |
108 | swap_device_unlock(si); | 108 | spin_unlock(&swap_lock); |
109 | 109 | ||
110 | offset = si->lowest_bit; | 110 | offset = si->lowest_bit; |
111 | last_in_cluster = offset + SWAPFILE_CLUSTER - 1; | 111 | last_in_cluster = offset + SWAPFILE_CLUSTER - 1; |
@@ -115,7 +115,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) | |||
115 | if (si->swap_map[offset]) | 115 | if (si->swap_map[offset]) |
116 | last_in_cluster = offset + SWAPFILE_CLUSTER; | 116 | last_in_cluster = offset + SWAPFILE_CLUSTER; |
117 | else if (offset == last_in_cluster) { | 117 | else if (offset == last_in_cluster) { |
118 | swap_device_lock(si); | 118 | spin_lock(&swap_lock); |
119 | si->cluster_next = offset-SWAPFILE_CLUSTER-1; | 119 | si->cluster_next = offset-SWAPFILE_CLUSTER-1; |
120 | goto cluster; | 120 | goto cluster; |
121 | } | 121 | } |
@@ -124,7 +124,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) | |||
124 | latency_ration = LATENCY_LIMIT; | 124 | latency_ration = LATENCY_LIMIT; |
125 | } | 125 | } |
126 | } | 126 | } |
127 | swap_device_lock(si); | 127 | spin_lock(&swap_lock); |
128 | goto lowest; | 128 | goto lowest; |
129 | } | 129 | } |
130 | 130 | ||
@@ -153,10 +153,10 @@ checks: if (!(si->flags & SWP_WRITEOK)) | |||
153 | return offset; | 153 | return offset; |
154 | } | 154 | } |
155 | 155 | ||
156 | swap_device_unlock(si); | 156 | spin_unlock(&swap_lock); |
157 | while (++offset <= si->highest_bit) { | 157 | while (++offset <= si->highest_bit) { |
158 | if (!si->swap_map[offset]) { | 158 | if (!si->swap_map[offset]) { |
159 | swap_device_lock(si); | 159 | spin_lock(&swap_lock); |
160 | goto checks; | 160 | goto checks; |
161 | } | 161 | } |
162 | if (unlikely(--latency_ration < 0)) { | 162 | if (unlikely(--latency_ration < 0)) { |
@@ -164,7 +164,7 @@ checks: if (!(si->flags & SWP_WRITEOK)) | |||
164 | latency_ration = LATENCY_LIMIT; | 164 | latency_ration = LATENCY_LIMIT; |
165 | } | 165 | } |
166 | } | 166 | } |
167 | swap_device_lock(si); | 167 | spin_lock(&swap_lock); |
168 | goto lowest; | 168 | goto lowest; |
169 | 169 | ||
170 | no_page: | 170 | no_page: |
@@ -179,7 +179,7 @@ swp_entry_t get_swap_page(void) | |||
179 | int type, next; | 179 | int type, next; |
180 | int wrapped = 0; | 180 | int wrapped = 0; |
181 | 181 | ||
182 | swap_list_lock(); | 182 | spin_lock(&swap_lock); |
183 | if (nr_swap_pages <= 0) | 183 | if (nr_swap_pages <= 0) |
184 | goto noswap; | 184 | goto noswap; |
185 | nr_swap_pages--; | 185 | nr_swap_pages--; |
@@ -199,19 +199,17 @@ swp_entry_t get_swap_page(void) | |||
199 | continue; | 199 | continue; |
200 | 200 | ||
201 | swap_list.next = next; | 201 | swap_list.next = next; |
202 | swap_device_lock(si); | ||
203 | swap_list_unlock(); | ||
204 | offset = scan_swap_map(si); | 202 | offset = scan_swap_map(si); |
205 | swap_device_unlock(si); | 203 | if (offset) { |
206 | if (offset) | 204 | spin_unlock(&swap_lock); |
207 | return swp_entry(type, offset); | 205 | return swp_entry(type, offset); |
208 | swap_list_lock(); | 206 | } |
209 | next = swap_list.next; | 207 | next = swap_list.next; |
210 | } | 208 | } |
211 | 209 | ||
212 | nr_swap_pages++; | 210 | nr_swap_pages++; |
213 | noswap: | 211 | noswap: |
214 | swap_list_unlock(); | 212 | spin_unlock(&swap_lock); |
215 | return (swp_entry_t) {0}; | 213 | return (swp_entry_t) {0}; |
216 | } | 214 | } |
217 | 215 | ||
@@ -233,8 +231,7 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry) | |||
233 | goto bad_offset; | 231 | goto bad_offset; |
234 | if (!p->swap_map[offset]) | 232 | if (!p->swap_map[offset]) |
235 | goto bad_free; | 233 | goto bad_free; |
236 | swap_list_lock(); | 234 | spin_lock(&swap_lock); |
237 | swap_device_lock(p); | ||
238 | return p; | 235 | return p; |
239 | 236 | ||
240 | bad_free: | 237 | bad_free: |
@@ -252,12 +249,6 @@ out: | |||
252 | return NULL; | 249 | return NULL; |
253 | } | 250 | } |
254 | 251 | ||
255 | static void swap_info_put(struct swap_info_struct * p) | ||
256 | { | ||
257 | swap_device_unlock(p); | ||
258 | swap_list_unlock(); | ||
259 | } | ||
260 | |||
261 | static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) | 252 | static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) |
262 | { | 253 | { |
263 | int count = p->swap_map[offset]; | 254 | int count = p->swap_map[offset]; |
@@ -290,7 +281,7 @@ void swap_free(swp_entry_t entry) | |||
290 | p = swap_info_get(entry); | 281 | p = swap_info_get(entry); |
291 | if (p) { | 282 | if (p) { |
292 | swap_entry_free(p, swp_offset(entry)); | 283 | swap_entry_free(p, swp_offset(entry)); |
293 | swap_info_put(p); | 284 | spin_unlock(&swap_lock); |
294 | } | 285 | } |
295 | } | 286 | } |
296 | 287 | ||
@@ -308,7 +299,7 @@ static inline int page_swapcount(struct page *page) | |||
308 | if (p) { | 299 | if (p) { |
309 | /* Subtract the 1 for the swap cache itself */ | 300 | /* Subtract the 1 for the swap cache itself */ |
310 | count = p->swap_map[swp_offset(entry)] - 1; | 301 | count = p->swap_map[swp_offset(entry)] - 1; |
311 | swap_info_put(p); | 302 | spin_unlock(&swap_lock); |
312 | } | 303 | } |
313 | return count; | 304 | return count; |
314 | } | 305 | } |
@@ -365,7 +356,7 @@ int remove_exclusive_swap_page(struct page *page) | |||
365 | } | 356 | } |
366 | write_unlock_irq(&swapper_space.tree_lock); | 357 | write_unlock_irq(&swapper_space.tree_lock); |
367 | } | 358 | } |
368 | swap_info_put(p); | 359 | spin_unlock(&swap_lock); |
369 | 360 | ||
370 | if (retval) { | 361 | if (retval) { |
371 | swap_free(entry); | 362 | swap_free(entry); |
@@ -388,7 +379,7 @@ void free_swap_and_cache(swp_entry_t entry) | |||
388 | if (p) { | 379 | if (p) { |
389 | if (swap_entry_free(p, swp_offset(entry)) == 1) | 380 | if (swap_entry_free(p, swp_offset(entry)) == 1) |
390 | page = find_trylock_page(&swapper_space, entry.val); | 381 | page = find_trylock_page(&swapper_space, entry.val); |
391 | swap_info_put(p); | 382 | spin_unlock(&swap_lock); |
392 | } | 383 | } |
393 | if (page) { | 384 | if (page) { |
394 | int one_user; | 385 | int one_user; |
@@ -558,10 +549,10 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
558 | int count; | 549 | int count; |
559 | 550 | ||
560 | /* | 551 | /* |
561 | * No need for swap_device_lock(si) here: we're just looking | 552 | * No need for swap_lock here: we're just looking |
562 | * for whether an entry is in use, not modifying it; false | 553 | * for whether an entry is in use, not modifying it; false |
563 | * hits are okay, and sys_swapoff() has already prevented new | 554 | * hits are okay, and sys_swapoff() has already prevented new |
564 | * allocations from this area (while holding swap_list_lock()). | 555 | * allocations from this area (while holding swap_lock). |
565 | */ | 556 | */ |
566 | for (;;) { | 557 | for (;;) { |
567 | if (++i >= max) { | 558 | if (++i >= max) { |
@@ -751,9 +742,9 @@ static int try_to_unuse(unsigned int type) | |||
751 | * report them; but do report if we reset SWAP_MAP_MAX. | 742 | * report them; but do report if we reset SWAP_MAP_MAX. |
752 | */ | 743 | */ |
753 | if (*swap_map == SWAP_MAP_MAX) { | 744 | if (*swap_map == SWAP_MAP_MAX) { |
754 | swap_device_lock(si); | 745 | spin_lock(&swap_lock); |
755 | *swap_map = 1; | 746 | *swap_map = 1; |
756 | swap_device_unlock(si); | 747 | spin_unlock(&swap_lock); |
757 | reset_overflow = 1; | 748 | reset_overflow = 1; |
758 | } | 749 | } |
759 | 750 | ||
@@ -817,9 +808,9 @@ static int try_to_unuse(unsigned int type) | |||
817 | } | 808 | } |
818 | 809 | ||
819 | /* | 810 | /* |
820 | * After a successful try_to_unuse, if no swap is now in use, we know we | 811 | * After a successful try_to_unuse, if no swap is now in use, we know |
821 | * can empty the mmlist. swap_list_lock must be held on entry and exit. | 812 | * we can empty the mmlist. swap_lock must be held on entry and exit. |
822 | * Note that mmlist_lock nests inside swap_list_lock, and an mm must be | 813 | * Note that mmlist_lock nests inside swap_lock, and an mm must be |
823 | * added to the mmlist just after page_duplicate - before would be racy. | 814 | * added to the mmlist just after page_duplicate - before would be racy. |
824 | */ | 815 | */ |
825 | static void drain_mmlist(void) | 816 | static void drain_mmlist(void) |
@@ -1092,7 +1083,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1092 | 1083 | ||
1093 | mapping = victim->f_mapping; | 1084 | mapping = victim->f_mapping; |
1094 | prev = -1; | 1085 | prev = -1; |
1095 | swap_list_lock(); | 1086 | spin_lock(&swap_lock); |
1096 | for (type = swap_list.head; type >= 0; type = swap_info[type].next) { | 1087 | for (type = swap_list.head; type >= 0; type = swap_info[type].next) { |
1097 | p = swap_info + type; | 1088 | p = swap_info + type; |
1098 | if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { | 1089 | if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { |
@@ -1103,14 +1094,14 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1103 | } | 1094 | } |
1104 | if (type < 0) { | 1095 | if (type < 0) { |
1105 | err = -EINVAL; | 1096 | err = -EINVAL; |
1106 | swap_list_unlock(); | 1097 | spin_unlock(&swap_lock); |
1107 | goto out_dput; | 1098 | goto out_dput; |
1108 | } | 1099 | } |
1109 | if (!security_vm_enough_memory(p->pages)) | 1100 | if (!security_vm_enough_memory(p->pages)) |
1110 | vm_unacct_memory(p->pages); | 1101 | vm_unacct_memory(p->pages); |
1111 | else { | 1102 | else { |
1112 | err = -ENOMEM; | 1103 | err = -ENOMEM; |
1113 | swap_list_unlock(); | 1104 | spin_unlock(&swap_lock); |
1114 | goto out_dput; | 1105 | goto out_dput; |
1115 | } | 1106 | } |
1116 | if (prev < 0) { | 1107 | if (prev < 0) { |
@@ -1124,10 +1115,8 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1124 | } | 1115 | } |
1125 | nr_swap_pages -= p->pages; | 1116 | nr_swap_pages -= p->pages; |
1126 | total_swap_pages -= p->pages; | 1117 | total_swap_pages -= p->pages; |
1127 | swap_device_lock(p); | ||
1128 | p->flags &= ~SWP_WRITEOK; | 1118 | p->flags &= ~SWP_WRITEOK; |
1129 | swap_device_unlock(p); | 1119 | spin_unlock(&swap_lock); |
1130 | swap_list_unlock(); | ||
1131 | 1120 | ||
1132 | current->flags |= PF_SWAPOFF; | 1121 | current->flags |= PF_SWAPOFF; |
1133 | err = try_to_unuse(type); | 1122 | err = try_to_unuse(type); |
@@ -1135,7 +1124,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1135 | 1124 | ||
1136 | if (err) { | 1125 | if (err) { |
1137 | /* re-insert swap space back into swap_list */ | 1126 | /* re-insert swap space back into swap_list */ |
1138 | swap_list_lock(); | 1127 | spin_lock(&swap_lock); |
1139 | for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) | 1128 | for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) |
1140 | if (p->prio >= swap_info[i].prio) | 1129 | if (p->prio >= swap_info[i].prio) |
1141 | break; | 1130 | break; |
@@ -1146,10 +1135,8 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1146 | swap_info[prev].next = p - swap_info; | 1135 | swap_info[prev].next = p - swap_info; |
1147 | nr_swap_pages += p->pages; | 1136 | nr_swap_pages += p->pages; |
1148 | total_swap_pages += p->pages; | 1137 | total_swap_pages += p->pages; |
1149 | swap_device_lock(p); | ||
1150 | p->flags |= SWP_WRITEOK; | 1138 | p->flags |= SWP_WRITEOK; |
1151 | swap_device_unlock(p); | 1139 | spin_unlock(&swap_lock); |
1152 | swap_list_unlock(); | ||
1153 | goto out_dput; | 1140 | goto out_dput; |
1154 | } | 1141 | } |
1155 | 1142 | ||
@@ -1157,30 +1144,27 @@ asmlinkage long sys_swapoff(const char __user * specialfile) | |||
1157 | down_write(&swap_unplug_sem); | 1144 | down_write(&swap_unplug_sem); |
1158 | up_write(&swap_unplug_sem); | 1145 | up_write(&swap_unplug_sem); |
1159 | 1146 | ||
1147 | destroy_swap_extents(p); | ||
1148 | down(&swapon_sem); | ||
1149 | spin_lock(&swap_lock); | ||
1150 | drain_mmlist(); | ||
1151 | |||
1160 | /* wait for anyone still in scan_swap_map */ | 1152 | /* wait for anyone still in scan_swap_map */ |
1161 | swap_device_lock(p); | ||
1162 | p->highest_bit = 0; /* cuts scans short */ | 1153 | p->highest_bit = 0; /* cuts scans short */ |
1163 | while (p->flags >= SWP_SCANNING) { | 1154 | while (p->flags >= SWP_SCANNING) { |
1164 | swap_device_unlock(p); | 1155 | spin_unlock(&swap_lock); |
1165 | set_current_state(TASK_UNINTERRUPTIBLE); | 1156 | set_current_state(TASK_UNINTERRUPTIBLE); |
1166 | schedule_timeout(1); | 1157 | schedule_timeout(1); |
1167 | swap_device_lock(p); | 1158 | spin_lock(&swap_lock); |
1168 | } | 1159 | } |
1169 | swap_device_unlock(p); | ||
1170 | 1160 | ||
1171 | destroy_swap_extents(p); | ||
1172 | down(&swapon_sem); | ||
1173 | swap_list_lock(); | ||
1174 | drain_mmlist(); | ||
1175 | swap_device_lock(p); | ||
1176 | swap_file = p->swap_file; | 1161 | swap_file = p->swap_file; |
1177 | p->swap_file = NULL; | 1162 | p->swap_file = NULL; |
1178 | p->max = 0; | 1163 | p->max = 0; |
1179 | swap_map = p->swap_map; | 1164 | swap_map = p->swap_map; |
1180 | p->swap_map = NULL; | 1165 | p->swap_map = NULL; |
1181 | p->flags = 0; | 1166 | p->flags = 0; |
1182 | swap_device_unlock(p); | 1167 | spin_unlock(&swap_lock); |
1183 | swap_list_unlock(); | ||
1184 | up(&swapon_sem); | 1168 | up(&swapon_sem); |
1185 | vfree(swap_map); | 1169 | vfree(swap_map); |
1186 | inode = mapping->host; | 1170 | inode = mapping->host; |
@@ -1324,7 +1308,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1324 | 1308 | ||
1325 | if (!capable(CAP_SYS_ADMIN)) | 1309 | if (!capable(CAP_SYS_ADMIN)) |
1326 | return -EPERM; | 1310 | return -EPERM; |
1327 | swap_list_lock(); | 1311 | spin_lock(&swap_lock); |
1328 | p = swap_info; | 1312 | p = swap_info; |
1329 | for (type = 0 ; type < nr_swapfiles ; type++,p++) | 1313 | for (type = 0 ; type < nr_swapfiles ; type++,p++) |
1330 | if (!(p->flags & SWP_USED)) | 1314 | if (!(p->flags & SWP_USED)) |
@@ -1343,7 +1327,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1343 | * swp_entry_t or the architecture definition of a swap pte. | 1327 | * swp_entry_t or the architecture definition of a swap pte. |
1344 | */ | 1328 | */ |
1345 | if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { | 1329 | if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { |
1346 | swap_list_unlock(); | 1330 | spin_unlock(&swap_lock); |
1347 | goto out; | 1331 | goto out; |
1348 | } | 1332 | } |
1349 | if (type >= nr_swapfiles) | 1333 | if (type >= nr_swapfiles) |
@@ -1357,7 +1341,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1357 | p->highest_bit = 0; | 1341 | p->highest_bit = 0; |
1358 | p->cluster_nr = 0; | 1342 | p->cluster_nr = 0; |
1359 | p->inuse_pages = 0; | 1343 | p->inuse_pages = 0; |
1360 | spin_lock_init(&p->sdev_lock); | ||
1361 | p->next = -1; | 1344 | p->next = -1; |
1362 | if (swap_flags & SWAP_FLAG_PREFER) { | 1345 | if (swap_flags & SWAP_FLAG_PREFER) { |
1363 | p->prio = | 1346 | p->prio = |
@@ -1365,7 +1348,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1365 | } else { | 1348 | } else { |
1366 | p->prio = --least_priority; | 1349 | p->prio = --least_priority; |
1367 | } | 1350 | } |
1368 | swap_list_unlock(); | 1351 | spin_unlock(&swap_lock); |
1369 | name = getname(specialfile); | 1352 | name = getname(specialfile); |
1370 | error = PTR_ERR(name); | 1353 | error = PTR_ERR(name); |
1371 | if (IS_ERR(name)) { | 1354 | if (IS_ERR(name)) { |
@@ -1542,8 +1525,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1542 | } | 1525 | } |
1543 | 1526 | ||
1544 | down(&swapon_sem); | 1527 | down(&swapon_sem); |
1545 | swap_list_lock(); | 1528 | spin_lock(&swap_lock); |
1546 | swap_device_lock(p); | ||
1547 | p->flags = SWP_ACTIVE; | 1529 | p->flags = SWP_ACTIVE; |
1548 | nr_swap_pages += nr_good_pages; | 1530 | nr_swap_pages += nr_good_pages; |
1549 | total_swap_pages += nr_good_pages; | 1531 | total_swap_pages += nr_good_pages; |
@@ -1567,8 +1549,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1567 | } else { | 1549 | } else { |
1568 | swap_info[prev].next = p - swap_info; | 1550 | swap_info[prev].next = p - swap_info; |
1569 | } | 1551 | } |
1570 | swap_device_unlock(p); | 1552 | spin_unlock(&swap_lock); |
1571 | swap_list_unlock(); | ||
1572 | up(&swapon_sem); | 1553 | up(&swapon_sem); |
1573 | error = 0; | 1554 | error = 0; |
1574 | goto out; | 1555 | goto out; |
@@ -1579,14 +1560,14 @@ bad_swap: | |||
1579 | } | 1560 | } |
1580 | destroy_swap_extents(p); | 1561 | destroy_swap_extents(p); |
1581 | bad_swap_2: | 1562 | bad_swap_2: |
1582 | swap_list_lock(); | 1563 | spin_lock(&swap_lock); |
1583 | swap_map = p->swap_map; | 1564 | swap_map = p->swap_map; |
1584 | p->swap_file = NULL; | 1565 | p->swap_file = NULL; |
1585 | p->swap_map = NULL; | 1566 | p->swap_map = NULL; |
1586 | p->flags = 0; | 1567 | p->flags = 0; |
1587 | if (!(swap_flags & SWAP_FLAG_PREFER)) | 1568 | if (!(swap_flags & SWAP_FLAG_PREFER)) |
1588 | ++least_priority; | 1569 | ++least_priority; |
1589 | swap_list_unlock(); | 1570 | spin_unlock(&swap_lock); |
1590 | vfree(swap_map); | 1571 | vfree(swap_map); |
1591 | if (swap_file) | 1572 | if (swap_file) |
1592 | filp_close(swap_file, NULL); | 1573 | filp_close(swap_file, NULL); |
@@ -1610,7 +1591,7 @@ void si_swapinfo(struct sysinfo *val) | |||
1610 | unsigned int i; | 1591 | unsigned int i; |
1611 | unsigned long nr_to_be_unused = 0; | 1592 | unsigned long nr_to_be_unused = 0; |
1612 | 1593 | ||
1613 | swap_list_lock(); | 1594 | spin_lock(&swap_lock); |
1614 | for (i = 0; i < nr_swapfiles; i++) { | 1595 | for (i = 0; i < nr_swapfiles; i++) { |
1615 | if (!(swap_info[i].flags & SWP_USED) || | 1596 | if (!(swap_info[i].flags & SWP_USED) || |
1616 | (swap_info[i].flags & SWP_WRITEOK)) | 1597 | (swap_info[i].flags & SWP_WRITEOK)) |
@@ -1619,7 +1600,7 @@ void si_swapinfo(struct sysinfo *val) | |||
1619 | } | 1600 | } |
1620 | val->freeswap = nr_swap_pages + nr_to_be_unused; | 1601 | val->freeswap = nr_swap_pages + nr_to_be_unused; |
1621 | val->totalswap = total_swap_pages + nr_to_be_unused; | 1602 | val->totalswap = total_swap_pages + nr_to_be_unused; |
1622 | swap_list_unlock(); | 1603 | spin_unlock(&swap_lock); |
1623 | } | 1604 | } |
1624 | 1605 | ||
1625 | /* | 1606 | /* |
@@ -1640,7 +1621,7 @@ int swap_duplicate(swp_entry_t entry) | |||
1640 | p = type + swap_info; | 1621 | p = type + swap_info; |
1641 | offset = swp_offset(entry); | 1622 | offset = swp_offset(entry); |
1642 | 1623 | ||
1643 | swap_device_lock(p); | 1624 | spin_lock(&swap_lock); |
1644 | if (offset < p->max && p->swap_map[offset]) { | 1625 | if (offset < p->max && p->swap_map[offset]) { |
1645 | if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { | 1626 | if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { |
1646 | p->swap_map[offset]++; | 1627 | p->swap_map[offset]++; |
@@ -1652,7 +1633,7 @@ int swap_duplicate(swp_entry_t entry) | |||
1652 | result = 1; | 1633 | result = 1; |
1653 | } | 1634 | } |
1654 | } | 1635 | } |
1655 | swap_device_unlock(p); | 1636 | spin_unlock(&swap_lock); |
1656 | out: | 1637 | out: |
1657 | return result; | 1638 | return result; |
1658 | 1639 | ||
@@ -1668,7 +1649,7 @@ get_swap_info_struct(unsigned type) | |||
1668 | } | 1649 | } |
1669 | 1650 | ||
1670 | /* | 1651 | /* |
1671 | * swap_device_lock prevents swap_map being freed. Don't grab an extra | 1652 | * swap_lock prevents swap_map being freed. Don't grab an extra |
1672 | * reference on the swaphandle, it doesn't matter if it becomes unused. | 1653 | * reference on the swaphandle, it doesn't matter if it becomes unused. |
1673 | */ | 1654 | */ |
1674 | int valid_swaphandles(swp_entry_t entry, unsigned long *offset) | 1655 | int valid_swaphandles(swp_entry_t entry, unsigned long *offset) |
@@ -1684,7 +1665,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) | |||
1684 | toff++, i--; | 1665 | toff++, i--; |
1685 | *offset = toff; | 1666 | *offset = toff; |
1686 | 1667 | ||
1687 | swap_device_lock(swapdev); | 1668 | spin_lock(&swap_lock); |
1688 | do { | 1669 | do { |
1689 | /* Don't read-ahead past the end of the swap area */ | 1670 | /* Don't read-ahead past the end of the swap area */ |
1690 | if (toff >= swapdev->max) | 1671 | if (toff >= swapdev->max) |
@@ -1697,6 +1678,6 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) | |||
1697 | toff++; | 1678 | toff++; |
1698 | ret++; | 1679 | ret++; |
1699 | } while (--i); | 1680 | } while (--i); |
1700 | swap_device_unlock(swapdev); | 1681 | spin_unlock(&swap_lock); |
1701 | return ret; | 1682 | return ret; |
1702 | } | 1683 | } |