aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-09-03 18:54:39 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:05:41 -0400
commit52b7efdbe5f5696fc80338560a3fc51e0b0a993c (patch)
tree30162de9fc8fe3dddb6462f8ff82f1594067cadd
parent7dfad4183bf9cd92f977caa3c12cc74f0eefc0e6 (diff)
[PATCH] swap: scan_swap_map drop swap_device_lock
get_swap_page has often shown up on latency traces, doing lengthy scans while holding two spinlocks. swap_list_lock is already dropped, now scan_swap_map drop swap_device_lock before scanning the swap_map. While scanning for an empty cluster, don't worry that racing tasks may allocate what was free and free what was allocated; but when allocating an entry, check it's still free after retaking the lock. Avoid dropping the lock in the expected common path. No barriers beyond the locks, just let the cookie crumble; highest_bit limit is volatile, but benign. Guard against swapoff: must check SWP_WRITEOK before allocating, must raise SWP_SCANNING reference count while in scan_swap_map, swapoff wait for that to fall - just use schedule_timeout, we don't want to burden scan_swap_map itself, and it's very unlikely that anyone can really still be in scan_swap_map once swapoff gets this far. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/swap.h2
-rw-r--r--mm/swapfile.c42
2 files changed, 37 insertions, 7 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 93f0eca7f916..db3b5de7c92f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -107,6 +107,8 @@ enum {
107 SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ 107 SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
108 SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ 108 SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
109 SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), 109 SWP_ACTIVE = (SWP_USED | SWP_WRITEOK),
110 /* add others here before... */
111 SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
110}; 112};
111 113
112#define SWAP_CLUSTER_MAX 32 114#define SWAP_CLUSTER_MAX 32
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c70248aab536..fdee145afc6f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -98,10 +98,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
98 * But we do now try to find an empty cluster. -Andrea 98 * But we do now try to find an empty cluster. -Andrea
99 */ 99 */
100 100
101 si->flags += SWP_SCANNING;
101 if (unlikely(!si->cluster_nr)) { 102 if (unlikely(!si->cluster_nr)) {
102 si->cluster_nr = SWAPFILE_CLUSTER - 1; 103 si->cluster_nr = SWAPFILE_CLUSTER - 1;
103 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) 104 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER)
104 goto lowest; 105 goto lowest;
106 swap_device_unlock(si);
105 107
106 offset = si->lowest_bit; 108 offset = si->lowest_bit;
107 last_in_cluster = offset + SWAPFILE_CLUSTER - 1; 109 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
@@ -111,10 +113,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
111 if (si->swap_map[offset]) 113 if (si->swap_map[offset])
112 last_in_cluster = offset + SWAPFILE_CLUSTER; 114 last_in_cluster = offset + SWAPFILE_CLUSTER;
113 else if (offset == last_in_cluster) { 115 else if (offset == last_in_cluster) {
116 swap_device_lock(si);
114 si->cluster_next = offset-SWAPFILE_CLUSTER-1; 117 si->cluster_next = offset-SWAPFILE_CLUSTER-1;
115 goto cluster; 118 goto cluster;
116 } 119 }
117 } 120 }
121 swap_device_lock(si);
118 goto lowest; 122 goto lowest;
119 } 123 }
120 124
@@ -123,10 +127,12 @@ cluster:
123 offset = si->cluster_next; 127 offset = si->cluster_next;
124 if (offset > si->highest_bit) 128 if (offset > si->highest_bit)
125lowest: offset = si->lowest_bit; 129lowest: offset = si->lowest_bit;
130checks: if (!(si->flags & SWP_WRITEOK))
131 goto no_page;
126 if (!si->highest_bit) 132 if (!si->highest_bit)
127 goto no_page; 133 goto no_page;
128 if (!si->swap_map[offset]) { 134 if (!si->swap_map[offset]) {
129got_page: if (offset == si->lowest_bit) 135 if (offset == si->lowest_bit)
130 si->lowest_bit++; 136 si->lowest_bit++;
131 if (offset == si->highest_bit) 137 if (offset == si->highest_bit)
132 si->highest_bit--; 138 si->highest_bit--;
@@ -137,16 +143,22 @@ got_page: if (offset == si->lowest_bit)
137 } 143 }
138 si->swap_map[offset] = 1; 144 si->swap_map[offset] = 1;
139 si->cluster_next = offset + 1; 145 si->cluster_next = offset + 1;
146 si->flags -= SWP_SCANNING;
140 return offset; 147 return offset;
141 } 148 }
142 149
150 swap_device_unlock(si);
143 while (++offset <= si->highest_bit) { 151 while (++offset <= si->highest_bit) {
144 if (!si->swap_map[offset]) 152 if (!si->swap_map[offset]) {
145 goto got_page; 153 swap_device_lock(si);
154 goto checks;
155 }
146 } 156 }
157 swap_device_lock(si);
147 goto lowest; 158 goto lowest;
148 159
149no_page: 160no_page:
161 si->flags -= SWP_SCANNING;
150 return 0; 162 return 0;
151} 163}
152 164
@@ -1111,10 +1123,6 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1111 err = try_to_unuse(type); 1123 err = try_to_unuse(type);
1112 current->flags &= ~PF_SWAPOFF; 1124 current->flags &= ~PF_SWAPOFF;
1113 1125
1114 /* wait for any unplug function to finish */
1115 down_write(&swap_unplug_sem);
1116 up_write(&swap_unplug_sem);
1117
1118 if (err) { 1126 if (err) {
1119 /* re-insert swap space back into swap_list */ 1127 /* re-insert swap space back into swap_list */
1120 swap_list_lock(); 1128 swap_list_lock();
@@ -1128,10 +1136,28 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1128 swap_info[prev].next = p - swap_info; 1136 swap_info[prev].next = p - swap_info;
1129 nr_swap_pages += p->pages; 1137 nr_swap_pages += p->pages;
1130 total_swap_pages += p->pages; 1138 total_swap_pages += p->pages;
1139 swap_device_lock(p);
1131 p->flags |= SWP_WRITEOK; 1140 p->flags |= SWP_WRITEOK;
1141 swap_device_unlock(p);
1132 swap_list_unlock(); 1142 swap_list_unlock();
1133 goto out_dput; 1143 goto out_dput;
1134 } 1144 }
1145
1146 /* wait for any unplug function to finish */
1147 down_write(&swap_unplug_sem);
1148 up_write(&swap_unplug_sem);
1149
1150 /* wait for anyone still in scan_swap_map */
1151 swap_device_lock(p);
1152 p->highest_bit = 0; /* cuts scans short */
1153 while (p->flags >= SWP_SCANNING) {
1154 swap_device_unlock(p);
1155 set_current_state(TASK_UNINTERRUPTIBLE);
1156 schedule_timeout(1);
1157 swap_device_lock(p);
1158 }
1159 swap_device_unlock(p);
1160
1135 destroy_swap_extents(p); 1161 destroy_swap_extents(p);
1136 down(&swapon_sem); 1162 down(&swapon_sem);
1137 swap_list_lock(); 1163 swap_list_lock();
@@ -1431,6 +1457,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1431 } 1457 }
1432 1458
1433 p->lowest_bit = 1; 1459 p->lowest_bit = 1;
1460 p->cluster_next = 1;
1461
1434 /* 1462 /*
1435 * Find out how many pages are allowed for a single swap 1463 * Find out how many pages are allowed for a single swap
1436 * device. There are two limiting factors: 1) the number of 1464 * device. There are two limiting factors: 1) the number of