diff options
author | Steve French <sfrench@us.ibm.com> | 2006-02-22 18:38:53 -0500 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2006-02-22 18:38:53 -0500 |
commit | 5d2f248a5f3acac4b763439327c92091be7abb1c (patch) | |
tree | 8f30098a6d17c0367adfbb780e5a8d9a5c43ad5a /mm | |
parent | a048d7a8704b35ff6372fdf5eedd4533f37b1885 (diff) | |
parent | 9e956c2dac9bec602ed1ba29181b45ba6d2b6448 (diff) |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 123 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/shmem.c | 81 |
5 files changed, 165 insertions, 47 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bedfa4f09c80..880831bd3003 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -587,7 +587,7 @@ redo: | |||
587 | } | 587 | } |
588 | list_add(&page->lru, &newlist); | 588 | list_add(&page->lru, &newlist); |
589 | nr_pages++; | 589 | nr_pages++; |
590 | if (nr_pages > MIGRATE_CHUNK_SIZE); | 590 | if (nr_pages > MIGRATE_CHUNK_SIZE) |
591 | break; | 591 | break; |
592 | } | 592 | } |
593 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | 593 | err = migrate_pages(pagelist, &newlist, &moved, &failed); |
@@ -808,7 +808,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, | |||
808 | nodes_clear(*nodes); | 808 | nodes_clear(*nodes); |
809 | if (maxnode == 0 || !nmask) | 809 | if (maxnode == 0 || !nmask) |
810 | return 0; | 810 | return 0; |
811 | if (maxnode > PAGE_SIZE) | 811 | if (maxnode > PAGE_SIZE*BITS_PER_BYTE) |
812 | return -EINVAL; | 812 | return -EINVAL; |
813 | 813 | ||
814 | nlongs = BITS_TO_LONGS(maxnode); | 814 | nlongs = BITS_TO_LONGS(maxnode); |
diff --git a/mm/nommu.c b/mm/nommu.c index c10262d68232..99d21020ec9d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -57,6 +57,8 @@ EXPORT_SYMBOL(vmalloc); | |||
57 | EXPORT_SYMBOL(vfree); | 57 | EXPORT_SYMBOL(vfree); |
58 | EXPORT_SYMBOL(vmalloc_to_page); | 58 | EXPORT_SYMBOL(vmalloc_to_page); |
59 | EXPORT_SYMBOL(vmalloc_32); | 59 | EXPORT_SYMBOL(vmalloc_32); |
60 | EXPORT_SYMBOL(vmap); | ||
61 | EXPORT_SYMBOL(vunmap); | ||
60 | 62 | ||
61 | /* | 63 | /* |
62 | * Handle all mappings that got truncated by a "truncate()" | 64 | * Handle all mappings that got truncated by a "truncate()" |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b05ab8f2a562..8123fad5a485 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -58,15 +58,17 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Processes which fork a lot of child processes are likely | 60 | * Processes which fork a lot of child processes are likely |
61 | * a good choice. We add the vmsize of the children if they | 61 | * a good choice. We add half the vmsize of the children if they |
62 | * have an own mm. This prevents forking servers to flood the | 62 | * have an own mm. This prevents forking servers to flood the |
63 | * machine with an endless amount of children | 63 | * machine with an endless amount of children. In case a single |
64 | * child is eating the vast majority of memory, adding only half | ||
65 | * to the parents will make the child our kill candidate of choice. | ||
64 | */ | 66 | */ |
65 | list_for_each(tsk, &p->children) { | 67 | list_for_each(tsk, &p->children) { |
66 | struct task_struct *chld; | 68 | struct task_struct *chld; |
67 | chld = list_entry(tsk, struct task_struct, sibling); | 69 | chld = list_entry(tsk, struct task_struct, sibling); |
68 | if (chld->mm != p->mm && chld->mm) | 70 | if (chld->mm != p->mm && chld->mm) |
69 | points += chld->mm->total_vm; | 71 | points += chld->mm->total_vm/2 + 1; |
70 | } | 72 | } |
71 | 73 | ||
72 | /* | 74 | /* |
@@ -131,17 +133,47 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
131 | } | 133 | } |
132 | 134 | ||
133 | /* | 135 | /* |
136 | * Types of limitations to the nodes from which allocations may occur | ||
137 | */ | ||
138 | #define CONSTRAINT_NONE 1 | ||
139 | #define CONSTRAINT_MEMORY_POLICY 2 | ||
140 | #define CONSTRAINT_CPUSET 3 | ||
141 | |||
142 | /* | ||
143 | * Determine the type of allocation constraint. | ||
144 | */ | ||
145 | static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | ||
146 | { | ||
147 | #ifdef CONFIG_NUMA | ||
148 | struct zone **z; | ||
149 | nodemask_t nodes = node_online_map; | ||
150 | |||
151 | for (z = zonelist->zones; *z; z++) | ||
152 | if (cpuset_zone_allowed(*z, gfp_mask)) | ||
153 | node_clear((*z)->zone_pgdat->node_id, | ||
154 | nodes); | ||
155 | else | ||
156 | return CONSTRAINT_CPUSET; | ||
157 | |||
158 | if (!nodes_empty(nodes)) | ||
159 | return CONSTRAINT_MEMORY_POLICY; | ||
160 | #endif | ||
161 | |||
162 | return CONSTRAINT_NONE; | ||
163 | } | ||
164 | |||
165 | /* | ||
134 | * Simple selection loop. We chose the process with the highest | 166 | * Simple selection loop. We chose the process with the highest |
135 | * number of 'points'. We expect the caller will lock the tasklist. | 167 | * number of 'points'. We expect the caller will lock the tasklist. |
136 | * | 168 | * |
137 | * (not docbooked, we don't want this one cluttering up the manual) | 169 | * (not docbooked, we don't want this one cluttering up the manual) |
138 | */ | 170 | */ |
139 | static struct task_struct * select_bad_process(void) | 171 | static struct task_struct *select_bad_process(unsigned long *ppoints) |
140 | { | 172 | { |
141 | unsigned long maxpoints = 0; | ||
142 | struct task_struct *g, *p; | 173 | struct task_struct *g, *p; |
143 | struct task_struct *chosen = NULL; | 174 | struct task_struct *chosen = NULL; |
144 | struct timespec uptime; | 175 | struct timespec uptime; |
176 | *ppoints = 0; | ||
145 | 177 | ||
146 | do_posix_clock_monotonic_gettime(&uptime); | 178 | do_posix_clock_monotonic_gettime(&uptime); |
147 | do_each_thread(g, p) { | 179 | do_each_thread(g, p) { |
@@ -169,9 +201,9 @@ static struct task_struct * select_bad_process(void) | |||
169 | return p; | 201 | return p; |
170 | 202 | ||
171 | points = badness(p, uptime.tv_sec); | 203 | points = badness(p, uptime.tv_sec); |
172 | if (points > maxpoints || !chosen) { | 204 | if (points > *ppoints || !chosen) { |
173 | chosen = p; | 205 | chosen = p; |
174 | maxpoints = points; | 206 | *ppoints = points; |
175 | } | 207 | } |
176 | } while_each_thread(g, p); | 208 | } while_each_thread(g, p); |
177 | return chosen; | 209 | return chosen; |
@@ -182,7 +214,7 @@ static struct task_struct * select_bad_process(void) | |||
182 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that | 214 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that |
183 | * we select a process with CAP_SYS_RAW_IO set). | 215 | * we select a process with CAP_SYS_RAW_IO set). |
184 | */ | 216 | */ |
185 | static void __oom_kill_task(task_t *p) | 217 | static void __oom_kill_task(task_t *p, const char *message) |
186 | { | 218 | { |
187 | if (p->pid == 1) { | 219 | if (p->pid == 1) { |
188 | WARN_ON(1); | 220 | WARN_ON(1); |
@@ -198,8 +230,8 @@ static void __oom_kill_task(task_t *p) | |||
198 | return; | 230 | return; |
199 | } | 231 | } |
200 | task_unlock(p); | 232 | task_unlock(p); |
201 | printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", | 233 | printk(KERN_ERR "%s: Killed process %d (%s).\n", |
202 | p->pid, p->comm); | 234 | message, p->pid, p->comm); |
203 | 235 | ||
204 | /* | 236 | /* |
205 | * We give our sacrificial lamb high priority and access to | 237 | * We give our sacrificial lamb high priority and access to |
@@ -212,7 +244,7 @@ static void __oom_kill_task(task_t *p) | |||
212 | force_sig(SIGKILL, p); | 244 | force_sig(SIGKILL, p); |
213 | } | 245 | } |
214 | 246 | ||
215 | static struct mm_struct *oom_kill_task(task_t *p) | 247 | static struct mm_struct *oom_kill_task(task_t *p, const char *message) |
216 | { | 248 | { |
217 | struct mm_struct *mm = get_task_mm(p); | 249 | struct mm_struct *mm = get_task_mm(p); |
218 | task_t * g, * q; | 250 | task_t * g, * q; |
@@ -224,35 +256,38 @@ static struct mm_struct *oom_kill_task(task_t *p) | |||
224 | return NULL; | 256 | return NULL; |
225 | } | 257 | } |
226 | 258 | ||
227 | __oom_kill_task(p); | 259 | __oom_kill_task(p, message); |
228 | /* | 260 | /* |
229 | * kill all processes that share the ->mm (i.e. all threads), | 261 | * kill all processes that share the ->mm (i.e. all threads), |
230 | * but are in a different thread group | 262 | * but are in a different thread group |
231 | */ | 263 | */ |
232 | do_each_thread(g, q) | 264 | do_each_thread(g, q) |
233 | if (q->mm == mm && q->tgid != p->tgid) | 265 | if (q->mm == mm && q->tgid != p->tgid) |
234 | __oom_kill_task(q); | 266 | __oom_kill_task(q, message); |
235 | while_each_thread(g, q); | 267 | while_each_thread(g, q); |
236 | 268 | ||
237 | return mm; | 269 | return mm; |
238 | } | 270 | } |
239 | 271 | ||
240 | static struct mm_struct *oom_kill_process(struct task_struct *p) | 272 | static struct mm_struct *oom_kill_process(struct task_struct *p, |
273 | unsigned long points, const char *message) | ||
241 | { | 274 | { |
242 | struct mm_struct *mm; | 275 | struct mm_struct *mm; |
243 | struct task_struct *c; | 276 | struct task_struct *c; |
244 | struct list_head *tsk; | 277 | struct list_head *tsk; |
245 | 278 | ||
279 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " | ||
280 | "children.\n", p->pid, p->comm, points); | ||
246 | /* Try to kill a child first */ | 281 | /* Try to kill a child first */ |
247 | list_for_each(tsk, &p->children) { | 282 | list_for_each(tsk, &p->children) { |
248 | c = list_entry(tsk, struct task_struct, sibling); | 283 | c = list_entry(tsk, struct task_struct, sibling); |
249 | if (c->mm == p->mm) | 284 | if (c->mm == p->mm) |
250 | continue; | 285 | continue; |
251 | mm = oom_kill_task(c); | 286 | mm = oom_kill_task(c, message); |
252 | if (mm) | 287 | if (mm) |
253 | return mm; | 288 | return mm; |
254 | } | 289 | } |
255 | return oom_kill_task(p); | 290 | return oom_kill_task(p, message); |
256 | } | 291 | } |
257 | 292 | ||
258 | /** | 293 | /** |
@@ -263,10 +298,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) | |||
263 | * OR try to be smart about which process to kill. Note that we | 298 | * OR try to be smart about which process to kill. Note that we |
264 | * don't have to be perfect here, we just have to be good. | 299 | * don't have to be perfect here, we just have to be good. |
265 | */ | 300 | */ |
266 | void out_of_memory(gfp_t gfp_mask, int order) | 301 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) |
267 | { | 302 | { |
268 | struct mm_struct *mm = NULL; | 303 | struct mm_struct *mm = NULL; |
269 | task_t * p; | 304 | task_t *p; |
305 | unsigned long points; | ||
270 | 306 | ||
271 | if (printk_ratelimit()) { | 307 | if (printk_ratelimit()) { |
272 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 308 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", |
@@ -277,25 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order) | |||
277 | 313 | ||
278 | cpuset_lock(); | 314 | cpuset_lock(); |
279 | read_lock(&tasklist_lock); | 315 | read_lock(&tasklist_lock); |
316 | |||
317 | /* | ||
318 | * Check if there were limitations on the allocation (only relevant for | ||
319 | * NUMA) that may require different handling. | ||
320 | */ | ||
321 | switch (constrained_alloc(zonelist, gfp_mask)) { | ||
322 | case CONSTRAINT_MEMORY_POLICY: | ||
323 | mm = oom_kill_process(current, points, | ||
324 | "No available memory (MPOL_BIND)"); | ||
325 | break; | ||
326 | |||
327 | case CONSTRAINT_CPUSET: | ||
328 | mm = oom_kill_process(current, points, | ||
329 | "No available memory in cpuset"); | ||
330 | break; | ||
331 | |||
332 | case CONSTRAINT_NONE: | ||
280 | retry: | 333 | retry: |
281 | p = select_bad_process(); | 334 | /* |
335 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
336 | * issues we may have. | ||
337 | */ | ||
338 | p = select_bad_process(&points); | ||
282 | 339 | ||
283 | if (PTR_ERR(p) == -1UL) | 340 | if (PTR_ERR(p) == -1UL) |
284 | goto out; | 341 | goto out; |
285 | 342 | ||
286 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 343 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
287 | if (!p) { | 344 | if (!p) { |
288 | read_unlock(&tasklist_lock); | 345 | read_unlock(&tasklist_lock); |
289 | cpuset_unlock(); | 346 | cpuset_unlock(); |
290 | panic("Out of memory and no killable processes...\n"); | 347 | panic("Out of memory and no killable processes...\n"); |
291 | } | 348 | } |
292 | 349 | ||
293 | mm = oom_kill_process(p); | 350 | mm = oom_kill_process(p, points, "Out of memory"); |
294 | if (!mm) | 351 | if (!mm) |
295 | goto retry; | 352 | goto retry; |
353 | |||
354 | break; | ||
355 | } | ||
296 | 356 | ||
297 | out: | 357 | out: |
298 | read_unlock(&tasklist_lock); | ||
299 | cpuset_unlock(); | 358 | cpuset_unlock(); |
300 | if (mm) | 359 | if (mm) |
301 | mmput(mm); | 360 | mmput(mm); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 208812b25597..791690d7d3fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1015,7 +1015,7 @@ rebalance: | |||
1015 | if (page) | 1015 | if (page) |
1016 | goto got_pg; | 1016 | goto got_pg; |
1017 | 1017 | ||
1018 | out_of_memory(gfp_mask, order); | 1018 | out_of_memory(zonelist, gfp_mask, order); |
1019 | goto restart; | 1019 | goto restart; |
1020 | } | 1020 | } |
1021 | 1021 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index f7ac7b812f92..7c455fbaff7b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
46 | #include <linux/mempolicy.h> | 46 | #include <linux/mempolicy.h> |
47 | #include <linux/namei.h> | 47 | #include <linux/namei.h> |
48 | #include <linux/ctype.h> | ||
48 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
49 | #include <asm/div64.h> | 50 | #include <asm/div64.h> |
50 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
@@ -874,6 +875,51 @@ redirty: | |||
874 | } | 875 | } |
875 | 876 | ||
876 | #ifdef CONFIG_NUMA | 877 | #ifdef CONFIG_NUMA |
878 | static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) | ||
879 | { | ||
880 | char *nodelist = strchr(value, ':'); | ||
881 | int err = 1; | ||
882 | |||
883 | if (nodelist) { | ||
884 | /* NUL-terminate policy string */ | ||
885 | *nodelist++ = '\0'; | ||
886 | if (nodelist_parse(nodelist, *policy_nodes)) | ||
887 | goto out; | ||
888 | } | ||
889 | if (!strcmp(value, "default")) { | ||
890 | *policy = MPOL_DEFAULT; | ||
891 | /* Don't allow a nodelist */ | ||
892 | if (!nodelist) | ||
893 | err = 0; | ||
894 | } else if (!strcmp(value, "prefer")) { | ||
895 | *policy = MPOL_PREFERRED; | ||
896 | /* Insist on a nodelist of one node only */ | ||
897 | if (nodelist) { | ||
898 | char *rest = nodelist; | ||
899 | while (isdigit(*rest)) | ||
900 | rest++; | ||
901 | if (!*rest) | ||
902 | err = 0; | ||
903 | } | ||
904 | } else if (!strcmp(value, "bind")) { | ||
905 | *policy = MPOL_BIND; | ||
906 | /* Insist on a nodelist */ | ||
907 | if (nodelist) | ||
908 | err = 0; | ||
909 | } else if (!strcmp(value, "interleave")) { | ||
910 | *policy = MPOL_INTERLEAVE; | ||
911 | /* Default to nodes online if no nodelist */ | ||
912 | if (!nodelist) | ||
913 | *policy_nodes = node_online_map; | ||
914 | err = 0; | ||
915 | } | ||
916 | out: | ||
917 | /* Restore string for error message */ | ||
918 | if (nodelist) | ||
919 | *--nodelist = ':'; | ||
920 | return err; | ||
921 | } | ||
922 | |||
877 | static struct page *shmem_swapin_async(struct shared_policy *p, | 923 | static struct page *shmem_swapin_async(struct shared_policy *p, |
878 | swp_entry_t entry, unsigned long idx) | 924 | swp_entry_t entry, unsigned long idx) |
879 | { | 925 | { |
@@ -926,6 +972,11 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, | |||
926 | return page; | 972 | return page; |
927 | } | 973 | } |
928 | #else | 974 | #else |
975 | static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) | ||
976 | { | ||
977 | return 1; | ||
978 | } | ||
979 | |||
929 | static inline struct page * | 980 | static inline struct page * |
930 | shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) | 981 | shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) |
931 | { | 982 | { |
@@ -1859,7 +1910,23 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid, | |||
1859 | { | 1910 | { |
1860 | char *this_char, *value, *rest; | 1911 | char *this_char, *value, *rest; |
1861 | 1912 | ||
1862 | while ((this_char = strsep(&options, ",")) != NULL) { | 1913 | while (options != NULL) { |
1914 | this_char = options; | ||
1915 | for (;;) { | ||
1916 | /* | ||
1917 | * NUL-terminate this option: unfortunately, | ||
1918 | * mount options form a comma-separated list, | ||
1919 | * but mpol's nodelist may also contain commas. | ||
1920 | */ | ||
1921 | options = strchr(options, ','); | ||
1922 | if (options == NULL) | ||
1923 | break; | ||
1924 | options++; | ||
1925 | if (!isdigit(*options)) { | ||
1926 | options[-1] = '\0'; | ||
1927 | break; | ||
1928 | } | ||
1929 | } | ||
1863 | if (!*this_char) | 1930 | if (!*this_char) |
1864 | continue; | 1931 | continue; |
1865 | if ((value = strchr(this_char,'=')) != NULL) { | 1932 | if ((value = strchr(this_char,'=')) != NULL) { |
@@ -1910,18 +1977,8 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid, | |||
1910 | if (*rest) | 1977 | if (*rest) |
1911 | goto bad_val; | 1978 | goto bad_val; |
1912 | } else if (!strcmp(this_char,"mpol")) { | 1979 | } else if (!strcmp(this_char,"mpol")) { |
1913 | if (!strcmp(value,"default")) | 1980 | if (shmem_parse_mpol(value,policy,policy_nodes)) |
1914 | *policy = MPOL_DEFAULT; | ||
1915 | else if (!strcmp(value,"preferred")) | ||
1916 | *policy = MPOL_PREFERRED; | ||
1917 | else if (!strcmp(value,"bind")) | ||
1918 | *policy = MPOL_BIND; | ||
1919 | else if (!strcmp(value,"interleave")) | ||
1920 | *policy = MPOL_INTERLEAVE; | ||
1921 | else | ||
1922 | goto bad_val; | 1981 | goto bad_val; |
1923 | } else if (!strcmp(this_char,"mpol_nodelist")) { | ||
1924 | nodelist_parse(value, *policy_nodes); | ||
1925 | } else { | 1982 | } else { |
1926 | printk(KERN_ERR "tmpfs: Bad mount option %s\n", | 1983 | printk(KERN_ERR "tmpfs: Bad mount option %s\n", |
1927 | this_char); | 1984 | this_char); |