aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-09-03 18:54:41 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:05:42 -0400
commit5d337b9194b1ce3b6fd5f3cb2799455ed2f9a3d1 (patch)
tree91ed9ef6f4cb5f6a1832f2baaaabd53fcd83513e
parent048c27fd72816b44e096997d1c6901c3abbfd45b (diff)
[PATCH] swap: swap_lock replace list+device
The idea of a swap_device_lock per device, and a swap_list_lock over them all, is appealing; but in practice almost every holder of swap_device_lock must already hold swap_list_lock, which defeats the purpose of the split. The only exceptions have been swap_duplicate, valid_swaphandles and an untrodden path in try_to_unuse (plus a few places added in this series). valid_swaphandles doesn't show up high in profiles, but swap_duplicate does demand attention. However, with the hold time in get_swap_pages so much reduced, I've not yet found a load and set of swap device priorities to show even swap_duplicate benefitting from the split. Certainly the split is mere overhead in the common case of a single swap device. So, replace swap_list_lock and swap_device_lock by spinlock_t swap_lock (generally we seem to prefer an _ in the name, and not hide in a macro). If someone can show a regression in swap_duplicate, then probably we should add a hashlock for the swap_map entries alone (shorts being anatomic), so as to help the case of the single swap device too. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/vm/locking15
-rw-r--r--include/linux/swap.h11
-rw-r--r--mm/filemap.c7
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/swapfile.c125
5 files changed, 66 insertions, 95 deletions
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index c3ef09ae3bb1..f366fa956179 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -83,19 +83,18 @@ single address space optimization, so that the zap_page_range (from
83vmtruncate) does not lose sending ipi's to cloned threads that might 83vmtruncate) does not lose sending ipi's to cloned threads that might
84be spawned underneath it and go to user mode to drag in pte's into tlbs. 84be spawned underneath it and go to user mode to drag in pte's into tlbs.
85 85
86swap_list_lock/swap_device_lock 86swap_lock
87------------------------------- 87--------------
88The swap devices are chained in priority order from the "swap_list" header. 88The swap devices are chained in priority order from the "swap_list" header.
89The "swap_list" is used for the round-robin swaphandle allocation strategy. 89The "swap_list" is used for the round-robin swaphandle allocation strategy.
90The #free swaphandles is maintained in "nr_swap_pages". These two together 90The #free swaphandles is maintained in "nr_swap_pages". These two together
91are protected by the swap_list_lock. 91are protected by the swap_lock.
92 92
93The swap_device_lock, which is per swap device, protects the reference 93The swap_lock also protects all the device reference counts on the
94counts on the corresponding swaphandles, maintained in the "swap_map" 94corresponding swaphandles, maintained in the "swap_map" array, and the
95array, and the "highest_bit" and "lowest_bit" fields. 95"highest_bit" and "lowest_bit" fields.
96 96
97Both of these are spinlocks, and are never acquired from intr level. The 97The swap_lock is a spinlock, and is never acquired from intr level.
98locking hierarchy is swap_list_lock -> swap_device_lock.
99 98
100To prevent races between swap space deletion or async readahead swapins 99To prevent races between swap space deletion or async readahead swapins
101deciding whether a swap handle is being used, ie worthy of being read in 100deciding whether a swap handle is being used, ie worthy of being read in
diff --git a/include/linux/swap.h b/include/linux/swap.h
index db3b5de7c92f..3c9ff0048153 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -121,7 +121,7 @@ enum {
121 */ 121 */
122struct swap_info_struct { 122struct swap_info_struct {
123 unsigned int flags; 123 unsigned int flags;
124 spinlock_t sdev_lock; 124 int prio; /* swap priority */
125 struct file *swap_file; 125 struct file *swap_file;
126 struct block_device *bdev; 126 struct block_device *bdev;
127 struct list_head extent_list; 127 struct list_head extent_list;
@@ -135,7 +135,6 @@ struct swap_info_struct {
135 unsigned int pages; 135 unsigned int pages;
136 unsigned int max; 136 unsigned int max;
137 unsigned int inuse_pages; 137 unsigned int inuse_pages;
138 int prio; /* swap priority */
139 int next; /* next entry on swap list */ 138 int next; /* next entry on swap list */
140}; 139};
141 140
@@ -221,13 +220,7 @@ extern int can_share_swap_page(struct page *);
221extern int remove_exclusive_swap_page(struct page *); 220extern int remove_exclusive_swap_page(struct page *);
222struct backing_dev_info; 221struct backing_dev_info;
223 222
224extern struct swap_list_t swap_list; 223extern spinlock_t swap_lock;
225extern spinlock_t swaplock;
226
227#define swap_list_lock() spin_lock(&swaplock)
228#define swap_list_unlock() spin_unlock(&swaplock)
229#define swap_device_lock(p) spin_lock(&p->sdev_lock)
230#define swap_device_unlock(p) spin_unlock(&p->sdev_lock)
231 224
232/* linux/mm/thrash.c */ 225/* linux/mm/thrash.c */
233extern struct mm_struct * swap_token_mm; 226extern struct mm_struct * swap_token_mm;
diff --git a/mm/filemap.c b/mm/filemap.c
index c11418dd94e8..edc54436fa94 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -54,9 +54,8 @@
54 * 54 *
55 * ->i_mmap_lock (vmtruncate) 55 * ->i_mmap_lock (vmtruncate)
56 * ->private_lock (__free_pte->__set_page_dirty_buffers) 56 * ->private_lock (__free_pte->__set_page_dirty_buffers)
57 * ->swap_list_lock 57 * ->swap_lock (exclusive_swap_page, others)
58 * ->swap_device_lock (exclusive_swap_page, others) 58 * ->mapping->tree_lock
59 * ->mapping->tree_lock
60 * 59 *
61 * ->i_sem 60 * ->i_sem
62 * ->i_mmap_lock (truncate->unmap_mapping_range) 61 * ->i_mmap_lock (truncate->unmap_mapping_range)
@@ -86,7 +85,7 @@
86 * ->page_table_lock (anon_vma_prepare and various) 85 * ->page_table_lock (anon_vma_prepare and various)
87 * 86 *
88 * ->page_table_lock 87 * ->page_table_lock
89 * ->swap_device_lock (try_to_unmap_one) 88 * ->swap_lock (try_to_unmap_one)
90 * ->private_lock (try_to_unmap_one) 89 * ->private_lock (try_to_unmap_one)
91 * ->tree_lock (try_to_unmap_one) 90 * ->tree_lock (try_to_unmap_one)
92 * ->zone.lru_lock (follow_page->mark_page_accessed) 91 * ->zone.lru_lock (follow_page->mark_page_accessed)
diff --git a/mm/rmap.c b/mm/rmap.c
index 08ac5c7fa91f..facb8cdca665 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -34,9 +34,8 @@
34 * anon_vma->lock 34 * anon_vma->lock
35 * mm->page_table_lock 35 * mm->page_table_lock
36 * zone->lru_lock (in mark_page_accessed) 36 * zone->lru_lock (in mark_page_accessed)
37 * swap_list_lock (in swap_free etc's swap_info_get) 37 * swap_lock (in swap_duplicate, swap_info_get)
38 * mmlist_lock (in mmput, drain_mmlist and others) 38 * mmlist_lock (in mmput, drain_mmlist and others)
39 * swap_device_lock (in swap_duplicate, swap_info_get)
40 * mapping->private_lock (in __set_page_dirty_buffers) 39 * mapping->private_lock (in __set_page_dirty_buffers)
41 * inode_lock (in set_page_dirty's __mark_inode_dirty) 40 * inode_lock (in set_page_dirty's __mark_inode_dirty)
42 * sb_lock (within inode_lock in fs/fs-writeback.c) 41 * sb_lock (within inode_lock in fs/fs-writeback.c)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e675ae55f87d..4b6e8bf986bc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,7 +31,7 @@
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <linux/swapops.h> 32#include <linux/swapops.h>
33 33
34DEFINE_SPINLOCK(swaplock); 34DEFINE_SPINLOCK(swap_lock);
35unsigned int nr_swapfiles; 35unsigned int nr_swapfiles;
36long total_swap_pages; 36long total_swap_pages;
37static int swap_overflow; 37static int swap_overflow;
@@ -51,7 +51,7 @@ static DECLARE_MUTEX(swapon_sem);
51 51
52/* 52/*
53 * We need this because the bdev->unplug_fn can sleep and we cannot 53 * We need this because the bdev->unplug_fn can sleep and we cannot
54 * hold swap_list_lock while calling the unplug_fn. And swap_list_lock 54 * hold swap_lock while calling the unplug_fn. And swap_lock
55 * cannot be turned into a semaphore. 55 * cannot be turned into a semaphore.
56 */ 56 */
57static DECLARE_RWSEM(swap_unplug_sem); 57static DECLARE_RWSEM(swap_unplug_sem);
@@ -105,7 +105,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
105 si->cluster_nr = SWAPFILE_CLUSTER - 1; 105 si->cluster_nr = SWAPFILE_CLUSTER - 1;
106 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) 106 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER)
107 goto lowest; 107 goto lowest;
108 swap_device_unlock(si); 108 spin_unlock(&swap_lock);
109 109
110 offset = si->lowest_bit; 110 offset = si->lowest_bit;
111 last_in_cluster = offset + SWAPFILE_CLUSTER - 1; 111 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
@@ -115,7 +115,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
115 if (si->swap_map[offset]) 115 if (si->swap_map[offset])
116 last_in_cluster = offset + SWAPFILE_CLUSTER; 116 last_in_cluster = offset + SWAPFILE_CLUSTER;
117 else if (offset == last_in_cluster) { 117 else if (offset == last_in_cluster) {
118 swap_device_lock(si); 118 spin_lock(&swap_lock);
119 si->cluster_next = offset-SWAPFILE_CLUSTER-1; 119 si->cluster_next = offset-SWAPFILE_CLUSTER-1;
120 goto cluster; 120 goto cluster;
121 } 121 }
@@ -124,7 +124,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
124 latency_ration = LATENCY_LIMIT; 124 latency_ration = LATENCY_LIMIT;
125 } 125 }
126 } 126 }
127 swap_device_lock(si); 127 spin_lock(&swap_lock);
128 goto lowest; 128 goto lowest;
129 } 129 }
130 130
@@ -153,10 +153,10 @@ checks: if (!(si->flags & SWP_WRITEOK))
153 return offset; 153 return offset;
154 } 154 }
155 155
156 swap_device_unlock(si); 156 spin_unlock(&swap_lock);
157 while (++offset <= si->highest_bit) { 157 while (++offset <= si->highest_bit) {
158 if (!si->swap_map[offset]) { 158 if (!si->swap_map[offset]) {
159 swap_device_lock(si); 159 spin_lock(&swap_lock);
160 goto checks; 160 goto checks;
161 } 161 }
162 if (unlikely(--latency_ration < 0)) { 162 if (unlikely(--latency_ration < 0)) {
@@ -164,7 +164,7 @@ checks: if (!(si->flags & SWP_WRITEOK))
164 latency_ration = LATENCY_LIMIT; 164 latency_ration = LATENCY_LIMIT;
165 } 165 }
166 } 166 }
167 swap_device_lock(si); 167 spin_lock(&swap_lock);
168 goto lowest; 168 goto lowest;
169 169
170no_page: 170no_page:
@@ -179,7 +179,7 @@ swp_entry_t get_swap_page(void)
179 int type, next; 179 int type, next;
180 int wrapped = 0; 180 int wrapped = 0;
181 181
182 swap_list_lock(); 182 spin_lock(&swap_lock);
183 if (nr_swap_pages <= 0) 183 if (nr_swap_pages <= 0)
184 goto noswap; 184 goto noswap;
185 nr_swap_pages--; 185 nr_swap_pages--;
@@ -199,19 +199,17 @@ swp_entry_t get_swap_page(void)
199 continue; 199 continue;
200 200
201 swap_list.next = next; 201 swap_list.next = next;
202 swap_device_lock(si);
203 swap_list_unlock();
204 offset = scan_swap_map(si); 202 offset = scan_swap_map(si);
205 swap_device_unlock(si); 203 if (offset) {
206 if (offset) 204 spin_unlock(&swap_lock);
207 return swp_entry(type, offset); 205 return swp_entry(type, offset);
208 swap_list_lock(); 206 }
209 next = swap_list.next; 207 next = swap_list.next;
210 } 208 }
211 209
212 nr_swap_pages++; 210 nr_swap_pages++;
213noswap: 211noswap:
214 swap_list_unlock(); 212 spin_unlock(&swap_lock);
215 return (swp_entry_t) {0}; 213 return (swp_entry_t) {0};
216} 214}
217 215
@@ -233,8 +231,7 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry)
233 goto bad_offset; 231 goto bad_offset;
234 if (!p->swap_map[offset]) 232 if (!p->swap_map[offset])
235 goto bad_free; 233 goto bad_free;
236 swap_list_lock(); 234 spin_lock(&swap_lock);
237 swap_device_lock(p);
238 return p; 235 return p;
239 236
240bad_free: 237bad_free:
@@ -252,12 +249,6 @@ out:
252 return NULL; 249 return NULL;
253} 250}
254 251
255static void swap_info_put(struct swap_info_struct * p)
256{
257 swap_device_unlock(p);
258 swap_list_unlock();
259}
260
261static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) 252static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
262{ 253{
263 int count = p->swap_map[offset]; 254 int count = p->swap_map[offset];
@@ -290,7 +281,7 @@ void swap_free(swp_entry_t entry)
290 p = swap_info_get(entry); 281 p = swap_info_get(entry);
291 if (p) { 282 if (p) {
292 swap_entry_free(p, swp_offset(entry)); 283 swap_entry_free(p, swp_offset(entry));
293 swap_info_put(p); 284 spin_unlock(&swap_lock);
294 } 285 }
295} 286}
296 287
@@ -308,7 +299,7 @@ static inline int page_swapcount(struct page *page)
308 if (p) { 299 if (p) {
309 /* Subtract the 1 for the swap cache itself */ 300 /* Subtract the 1 for the swap cache itself */
310 count = p->swap_map[swp_offset(entry)] - 1; 301 count = p->swap_map[swp_offset(entry)] - 1;
311 swap_info_put(p); 302 spin_unlock(&swap_lock);
312 } 303 }
313 return count; 304 return count;
314} 305}
@@ -365,7 +356,7 @@ int remove_exclusive_swap_page(struct page *page)
365 } 356 }
366 write_unlock_irq(&swapper_space.tree_lock); 357 write_unlock_irq(&swapper_space.tree_lock);
367 } 358 }
368 swap_info_put(p); 359 spin_unlock(&swap_lock);
369 360
370 if (retval) { 361 if (retval) {
371 swap_free(entry); 362 swap_free(entry);
@@ -388,7 +379,7 @@ void free_swap_and_cache(swp_entry_t entry)
388 if (p) { 379 if (p) {
389 if (swap_entry_free(p, swp_offset(entry)) == 1) 380 if (swap_entry_free(p, swp_offset(entry)) == 1)
390 page = find_trylock_page(&swapper_space, entry.val); 381 page = find_trylock_page(&swapper_space, entry.val);
391 swap_info_put(p); 382 spin_unlock(&swap_lock);
392 } 383 }
393 if (page) { 384 if (page) {
394 int one_user; 385 int one_user;
@@ -558,10 +549,10 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
558 int count; 549 int count;
559 550
560 /* 551 /*
561 * No need for swap_device_lock(si) here: we're just looking 552 * No need for swap_lock here: we're just looking
562 * for whether an entry is in use, not modifying it; false 553 * for whether an entry is in use, not modifying it; false
563 * hits are okay, and sys_swapoff() has already prevented new 554 * hits are okay, and sys_swapoff() has already prevented new
564 * allocations from this area (while holding swap_list_lock()). 555 * allocations from this area (while holding swap_lock).
565 */ 556 */
566 for (;;) { 557 for (;;) {
567 if (++i >= max) { 558 if (++i >= max) {
@@ -751,9 +742,9 @@ static int try_to_unuse(unsigned int type)
751 * report them; but do report if we reset SWAP_MAP_MAX. 742 * report them; but do report if we reset SWAP_MAP_MAX.
752 */ 743 */
753 if (*swap_map == SWAP_MAP_MAX) { 744 if (*swap_map == SWAP_MAP_MAX) {
754 swap_device_lock(si); 745 spin_lock(&swap_lock);
755 *swap_map = 1; 746 *swap_map = 1;
756 swap_device_unlock(si); 747 spin_unlock(&swap_lock);
757 reset_overflow = 1; 748 reset_overflow = 1;
758 } 749 }
759 750
@@ -817,9 +808,9 @@ static int try_to_unuse(unsigned int type)
817} 808}
818 809
819/* 810/*
820 * After a successful try_to_unuse, if no swap is now in use, we know we 811 * After a successful try_to_unuse, if no swap is now in use, we know
821 * can empty the mmlist. swap_list_lock must be held on entry and exit. 812 * we can empty the mmlist. swap_lock must be held on entry and exit.
822 * Note that mmlist_lock nests inside swap_list_lock, and an mm must be 813 * Note that mmlist_lock nests inside swap_lock, and an mm must be
823 * added to the mmlist just after page_duplicate - before would be racy. 814 * added to the mmlist just after page_duplicate - before would be racy.
824 */ 815 */
825static void drain_mmlist(void) 816static void drain_mmlist(void)
@@ -1092,7 +1083,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1092 1083
1093 mapping = victim->f_mapping; 1084 mapping = victim->f_mapping;
1094 prev = -1; 1085 prev = -1;
1095 swap_list_lock(); 1086 spin_lock(&swap_lock);
1096 for (type = swap_list.head; type >= 0; type = swap_info[type].next) { 1087 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1097 p = swap_info + type; 1088 p = swap_info + type;
1098 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { 1089 if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
@@ -1103,14 +1094,14 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1103 } 1094 }
1104 if (type < 0) { 1095 if (type < 0) {
1105 err = -EINVAL; 1096 err = -EINVAL;
1106 swap_list_unlock(); 1097 spin_unlock(&swap_lock);
1107 goto out_dput; 1098 goto out_dput;
1108 } 1099 }
1109 if (!security_vm_enough_memory(p->pages)) 1100 if (!security_vm_enough_memory(p->pages))
1110 vm_unacct_memory(p->pages); 1101 vm_unacct_memory(p->pages);
1111 else { 1102 else {
1112 err = -ENOMEM; 1103 err = -ENOMEM;
1113 swap_list_unlock(); 1104 spin_unlock(&swap_lock);
1114 goto out_dput; 1105 goto out_dput;
1115 } 1106 }
1116 if (prev < 0) { 1107 if (prev < 0) {
@@ -1124,10 +1115,8 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1124 } 1115 }
1125 nr_swap_pages -= p->pages; 1116 nr_swap_pages -= p->pages;
1126 total_swap_pages -= p->pages; 1117 total_swap_pages -= p->pages;
1127 swap_device_lock(p);
1128 p->flags &= ~SWP_WRITEOK; 1118 p->flags &= ~SWP_WRITEOK;
1129 swap_device_unlock(p); 1119 spin_unlock(&swap_lock);
1130 swap_list_unlock();
1131 1120
1132 current->flags |= PF_SWAPOFF; 1121 current->flags |= PF_SWAPOFF;
1133 err = try_to_unuse(type); 1122 err = try_to_unuse(type);
@@ -1135,7 +1124,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1135 1124
1136 if (err) { 1125 if (err) {
1137 /* re-insert swap space back into swap_list */ 1126 /* re-insert swap space back into swap_list */
1138 swap_list_lock(); 1127 spin_lock(&swap_lock);
1139 for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) 1128 for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
1140 if (p->prio >= swap_info[i].prio) 1129 if (p->prio >= swap_info[i].prio)
1141 break; 1130 break;
@@ -1146,10 +1135,8 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1146 swap_info[prev].next = p - swap_info; 1135 swap_info[prev].next = p - swap_info;
1147 nr_swap_pages += p->pages; 1136 nr_swap_pages += p->pages;
1148 total_swap_pages += p->pages; 1137 total_swap_pages += p->pages;
1149 swap_device_lock(p);
1150 p->flags |= SWP_WRITEOK; 1138 p->flags |= SWP_WRITEOK;
1151 swap_device_unlock(p); 1139 spin_unlock(&swap_lock);
1152 swap_list_unlock();
1153 goto out_dput; 1140 goto out_dput;
1154 } 1141 }
1155 1142
@@ -1157,30 +1144,27 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1157 down_write(&swap_unplug_sem); 1144 down_write(&swap_unplug_sem);
1158 up_write(&swap_unplug_sem); 1145 up_write(&swap_unplug_sem);
1159 1146
1147 destroy_swap_extents(p);
1148 down(&swapon_sem);
1149 spin_lock(&swap_lock);
1150 drain_mmlist();
1151
1160 /* wait for anyone still in scan_swap_map */ 1152 /* wait for anyone still in scan_swap_map */
1161 swap_device_lock(p);
1162 p->highest_bit = 0; /* cuts scans short */ 1153 p->highest_bit = 0; /* cuts scans short */
1163 while (p->flags >= SWP_SCANNING) { 1154 while (p->flags >= SWP_SCANNING) {
1164 swap_device_unlock(p); 1155 spin_unlock(&swap_lock);
1165 set_current_state(TASK_UNINTERRUPTIBLE); 1156 set_current_state(TASK_UNINTERRUPTIBLE);
1166 schedule_timeout(1); 1157 schedule_timeout(1);
1167 swap_device_lock(p); 1158 spin_lock(&swap_lock);
1168 } 1159 }
1169 swap_device_unlock(p);
1170 1160
1171 destroy_swap_extents(p);
1172 down(&swapon_sem);
1173 swap_list_lock();
1174 drain_mmlist();
1175 swap_device_lock(p);
1176 swap_file = p->swap_file; 1161 swap_file = p->swap_file;
1177 p->swap_file = NULL; 1162 p->swap_file = NULL;
1178 p->max = 0; 1163 p->max = 0;
1179 swap_map = p->swap_map; 1164 swap_map = p->swap_map;
1180 p->swap_map = NULL; 1165 p->swap_map = NULL;
1181 p->flags = 0; 1166 p->flags = 0;
1182 swap_device_unlock(p); 1167 spin_unlock(&swap_lock);
1183 swap_list_unlock();
1184 up(&swapon_sem); 1168 up(&swapon_sem);
1185 vfree(swap_map); 1169 vfree(swap_map);
1186 inode = mapping->host; 1170 inode = mapping->host;
@@ -1324,7 +1308,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1324 1308
1325 if (!capable(CAP_SYS_ADMIN)) 1309 if (!capable(CAP_SYS_ADMIN))
1326 return -EPERM; 1310 return -EPERM;
1327 swap_list_lock(); 1311 spin_lock(&swap_lock);
1328 p = swap_info; 1312 p = swap_info;
1329 for (type = 0 ; type < nr_swapfiles ; type++,p++) 1313 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1330 if (!(p->flags & SWP_USED)) 1314 if (!(p->flags & SWP_USED))
@@ -1343,7 +1327,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1343 * swp_entry_t or the architecture definition of a swap pte. 1327 * swp_entry_t or the architecture definition of a swap pte.
1344 */ 1328 */
1345 if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { 1329 if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
1346 swap_list_unlock(); 1330 spin_unlock(&swap_lock);
1347 goto out; 1331 goto out;
1348 } 1332 }
1349 if (type >= nr_swapfiles) 1333 if (type >= nr_swapfiles)
@@ -1357,7 +1341,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1357 p->highest_bit = 0; 1341 p->highest_bit = 0;
1358 p->cluster_nr = 0; 1342 p->cluster_nr = 0;
1359 p->inuse_pages = 0; 1343 p->inuse_pages = 0;
1360 spin_lock_init(&p->sdev_lock);
1361 p->next = -1; 1344 p->next = -1;
1362 if (swap_flags & SWAP_FLAG_PREFER) { 1345 if (swap_flags & SWAP_FLAG_PREFER) {
1363 p->prio = 1346 p->prio =
@@ -1365,7 +1348,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1365 } else { 1348 } else {
1366 p->prio = --least_priority; 1349 p->prio = --least_priority;
1367 } 1350 }
1368 swap_list_unlock(); 1351 spin_unlock(&swap_lock);
1369 name = getname(specialfile); 1352 name = getname(specialfile);
1370 error = PTR_ERR(name); 1353 error = PTR_ERR(name);
1371 if (IS_ERR(name)) { 1354 if (IS_ERR(name)) {
@@ -1542,8 +1525,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1542 } 1525 }
1543 1526
1544 down(&swapon_sem); 1527 down(&swapon_sem);
1545 swap_list_lock(); 1528 spin_lock(&swap_lock);
1546 swap_device_lock(p);
1547 p->flags = SWP_ACTIVE; 1529 p->flags = SWP_ACTIVE;
1548 nr_swap_pages += nr_good_pages; 1530 nr_swap_pages += nr_good_pages;
1549 total_swap_pages += nr_good_pages; 1531 total_swap_pages += nr_good_pages;
@@ -1567,8 +1549,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1567 } else { 1549 } else {
1568 swap_info[prev].next = p - swap_info; 1550 swap_info[prev].next = p - swap_info;
1569 } 1551 }
1570 swap_device_unlock(p); 1552 spin_unlock(&swap_lock);
1571 swap_list_unlock();
1572 up(&swapon_sem); 1553 up(&swapon_sem);
1573 error = 0; 1554 error = 0;
1574 goto out; 1555 goto out;
@@ -1579,14 +1560,14 @@ bad_swap:
1579 } 1560 }
1580 destroy_swap_extents(p); 1561 destroy_swap_extents(p);
1581bad_swap_2: 1562bad_swap_2:
1582 swap_list_lock(); 1563 spin_lock(&swap_lock);
1583 swap_map = p->swap_map; 1564 swap_map = p->swap_map;
1584 p->swap_file = NULL; 1565 p->swap_file = NULL;
1585 p->swap_map = NULL; 1566 p->swap_map = NULL;
1586 p->flags = 0; 1567 p->flags = 0;
1587 if (!(swap_flags & SWAP_FLAG_PREFER)) 1568 if (!(swap_flags & SWAP_FLAG_PREFER))
1588 ++least_priority; 1569 ++least_priority;
1589 swap_list_unlock(); 1570 spin_unlock(&swap_lock);
1590 vfree(swap_map); 1571 vfree(swap_map);
1591 if (swap_file) 1572 if (swap_file)
1592 filp_close(swap_file, NULL); 1573 filp_close(swap_file, NULL);
@@ -1610,7 +1591,7 @@ void si_swapinfo(struct sysinfo *val)
1610 unsigned int i; 1591 unsigned int i;
1611 unsigned long nr_to_be_unused = 0; 1592 unsigned long nr_to_be_unused = 0;
1612 1593
1613 swap_list_lock(); 1594 spin_lock(&swap_lock);
1614 for (i = 0; i < nr_swapfiles; i++) { 1595 for (i = 0; i < nr_swapfiles; i++) {
1615 if (!(swap_info[i].flags & SWP_USED) || 1596 if (!(swap_info[i].flags & SWP_USED) ||
1616 (swap_info[i].flags & SWP_WRITEOK)) 1597 (swap_info[i].flags & SWP_WRITEOK))
@@ -1619,7 +1600,7 @@ void si_swapinfo(struct sysinfo *val)
1619 } 1600 }
1620 val->freeswap = nr_swap_pages + nr_to_be_unused; 1601 val->freeswap = nr_swap_pages + nr_to_be_unused;
1621 val->totalswap = total_swap_pages + nr_to_be_unused; 1602 val->totalswap = total_swap_pages + nr_to_be_unused;
1622 swap_list_unlock(); 1603 spin_unlock(&swap_lock);
1623} 1604}
1624 1605
1625/* 1606/*
@@ -1640,7 +1621,7 @@ int swap_duplicate(swp_entry_t entry)
1640 p = type + swap_info; 1621 p = type + swap_info;
1641 offset = swp_offset(entry); 1622 offset = swp_offset(entry);
1642 1623
1643 swap_device_lock(p); 1624 spin_lock(&swap_lock);
1644 if (offset < p->max && p->swap_map[offset]) { 1625 if (offset < p->max && p->swap_map[offset]) {
1645 if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { 1626 if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
1646 p->swap_map[offset]++; 1627 p->swap_map[offset]++;
@@ -1652,7 +1633,7 @@ int swap_duplicate(swp_entry_t entry)
1652 result = 1; 1633 result = 1;
1653 } 1634 }
1654 } 1635 }
1655 swap_device_unlock(p); 1636 spin_unlock(&swap_lock);
1656out: 1637out:
1657 return result; 1638 return result;
1658 1639
@@ -1668,7 +1649,7 @@ get_swap_info_struct(unsigned type)
1668} 1649}
1669 1650
1670/* 1651/*
1671 * swap_device_lock prevents swap_map being freed. Don't grab an extra 1652 * swap_lock prevents swap_map being freed. Don't grab an extra
1672 * reference on the swaphandle, it doesn't matter if it becomes unused. 1653 * reference on the swaphandle, it doesn't matter if it becomes unused.
1673 */ 1654 */
1674int valid_swaphandles(swp_entry_t entry, unsigned long *offset) 1655int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
@@ -1684,7 +1665,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1684 toff++, i--; 1665 toff++, i--;
1685 *offset = toff; 1666 *offset = toff;
1686 1667
1687 swap_device_lock(swapdev); 1668 spin_lock(&swap_lock);
1688 do { 1669 do {
1689 /* Don't read-ahead past the end of the swap area */ 1670 /* Don't read-ahead past the end of the swap area */
1690 if (toff >= swapdev->max) 1671 if (toff >= swapdev->max)
@@ -1697,6 +1678,6 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1697 toff++; 1678 toff++;
1698 ret++; 1679 ret++;
1699 } while (--i); 1680 } while (--i);
1700 swap_device_unlock(swapdev); 1681 spin_unlock(&swap_lock);
1701 return ret; 1682 return ret;
1702} 1683}