diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 7 | ||||
-rw-r--r-- | mm/Makefile | 6 | ||||
-rw-r--r-- | mm/fadvise.c | 5 | ||||
-rw-r--r-- | mm/filemap.c | 48 | ||||
-rw-r--r-- | mm/hugetlb.c | 4 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 561 | ||||
-rw-r--r-- | mm/oom_kill.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 129 | ||||
-rw-r--r-- | mm/pdflush.c | 2 | ||||
-rw-r--r-- | mm/rmap.c | 7 | ||||
-rw-r--r-- | mm/slab.c | 1139 | ||||
-rw-r--r-- | mm/slob.c | 385 | ||||
-rw-r--r-- | mm/sparse.c | 4 | ||||
-rw-r--r-- | mm/swap_state.c | 4 | ||||
-rw-r--r-- | mm/swapfile.c | 12 | ||||
-rw-r--r-- | mm/truncate.c | 1 | ||||
-rw-r--r-- | mm/util.c | 39 | ||||
-rw-r--r-- | mm/vmscan.c | 343 |
19 files changed, 1934 insertions, 769 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index b3db11f137e0..a9cb80ae6409 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -132,3 +132,10 @@ config SPLIT_PTLOCK_CPUS | |||
132 | default "4096" if ARM && !CPU_CACHE_VIPT | 132 | default "4096" if ARM && !CPU_CACHE_VIPT |
133 | default "4096" if PARISC && !PA20 | 133 | default "4096" if PARISC && !PA20 |
134 | default "4" | 134 | default "4" |
135 | |||
136 | # | ||
137 | # support for page migration | ||
138 | # | ||
139 | config MIGRATION | ||
140 | def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM | ||
141 | depends on SWAP | ||
diff --git a/mm/Makefile b/mm/Makefile index 2fa6d2ca9f28..9aa03fa1dcc3 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -9,8 +9,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ | |||
9 | 9 | ||
10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ |
11 | page_alloc.o page-writeback.o pdflush.o \ | 11 | page_alloc.o page-writeback.o pdflush.o \ |
12 | readahead.o slab.o swap.o truncate.o vmscan.o \ | 12 | readahead.o swap.o truncate.o vmscan.o \ |
13 | prio_tree.o $(mmu-y) | 13 | prio_tree.o util.o $(mmu-y) |
14 | 14 | ||
15 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o | 15 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o |
16 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o | 16 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o |
@@ -18,5 +18,7 @@ obj-$(CONFIG_NUMA) += mempolicy.o | |||
18 | obj-$(CONFIG_SPARSEMEM) += sparse.o | 18 | obj-$(CONFIG_SPARSEMEM) += sparse.o |
19 | obj-$(CONFIG_SHMEM) += shmem.o | 19 | obj-$(CONFIG_SHMEM) += shmem.o |
20 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o | 20 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o |
21 | obj-$(CONFIG_SLOB) += slob.o | ||
22 | obj-$(CONFIG_SLAB) += slab.o | ||
21 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 23 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
22 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 24 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
diff --git a/mm/fadvise.c b/mm/fadvise.c index 5f19e87bc5af..d257c89e7704 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c | |||
@@ -37,6 +37,11 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) | |||
37 | if (!file) | 37 | if (!file) |
38 | return -EBADF; | 38 | return -EBADF; |
39 | 39 | ||
40 | if (S_ISFIFO(file->f_dentry->d_inode->i_mode)) { | ||
41 | ret = -ESPIPE; | ||
42 | goto out; | ||
43 | } | ||
44 | |||
40 | mapping = file->f_mapping; | 45 | mapping = file->f_mapping; |
41 | if (!mapping || len < 0) { | 46 | if (!mapping || len < 0) { |
42 | ret = -EINVAL; | 47 | ret = -EINVAL; |
diff --git a/mm/filemap.c b/mm/filemap.c index 4ef24a397684..478f4c74cc31 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -280,7 +280,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping, | |||
280 | * it is otherwise livelockable. | 280 | * it is otherwise livelockable. |
281 | */ | 281 | */ |
282 | int sync_page_range(struct inode *inode, struct address_space *mapping, | 282 | int sync_page_range(struct inode *inode, struct address_space *mapping, |
283 | loff_t pos, size_t count) | 283 | loff_t pos, loff_t count) |
284 | { | 284 | { |
285 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | 285 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; |
286 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 286 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; |
@@ -305,9 +305,8 @@ EXPORT_SYMBOL(sync_page_range); | |||
305 | * as it forces O_SYNC writers to different parts of the same file | 305 | * as it forces O_SYNC writers to different parts of the same file |
306 | * to be serialised right until io completion. | 306 | * to be serialised right until io completion. |
307 | */ | 307 | */ |
308 | static int sync_page_range_nolock(struct inode *inode, | 308 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, |
309 | struct address_space *mapping, | 309 | loff_t pos, loff_t count) |
310 | loff_t pos, size_t count) | ||
311 | { | 310 | { |
312 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | 311 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; |
313 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 312 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; |
@@ -322,6 +321,7 @@ static int sync_page_range_nolock(struct inode *inode, | |||
322 | ret = wait_on_page_writeback_range(mapping, start, end); | 321 | ret = wait_on_page_writeback_range(mapping, start, end); |
323 | return ret; | 322 | return ret; |
324 | } | 323 | } |
324 | EXPORT_SYMBOL(sync_page_range_nolock); | ||
325 | 325 | ||
326 | /** | 326 | /** |
327 | * filemap_fdatawait - walk the list of under-writeback pages of the given | 327 | * filemap_fdatawait - walk the list of under-writeback pages of the given |
@@ -343,30 +343,44 @@ EXPORT_SYMBOL(filemap_fdatawait); | |||
343 | 343 | ||
344 | int filemap_write_and_wait(struct address_space *mapping) | 344 | int filemap_write_and_wait(struct address_space *mapping) |
345 | { | 345 | { |
346 | int retval = 0; | 346 | int err = 0; |
347 | 347 | ||
348 | if (mapping->nrpages) { | 348 | if (mapping->nrpages) { |
349 | retval = filemap_fdatawrite(mapping); | 349 | err = filemap_fdatawrite(mapping); |
350 | if (retval == 0) | 350 | /* |
351 | retval = filemap_fdatawait(mapping); | 351 | * Even if the above returned error, the pages may be |
352 | * written partially (e.g. -ENOSPC), so we wait for it. | ||
353 | * But the -EIO is special case, it may indicate the worst | ||
354 | * thing (e.g. bug) happened, so we avoid waiting for it. | ||
355 | */ | ||
356 | if (err != -EIO) { | ||
357 | int err2 = filemap_fdatawait(mapping); | ||
358 | if (!err) | ||
359 | err = err2; | ||
360 | } | ||
352 | } | 361 | } |
353 | return retval; | 362 | return err; |
354 | } | 363 | } |
364 | EXPORT_SYMBOL(filemap_write_and_wait); | ||
355 | 365 | ||
356 | int filemap_write_and_wait_range(struct address_space *mapping, | 366 | int filemap_write_and_wait_range(struct address_space *mapping, |
357 | loff_t lstart, loff_t lend) | 367 | loff_t lstart, loff_t lend) |
358 | { | 368 | { |
359 | int retval = 0; | 369 | int err = 0; |
360 | 370 | ||
361 | if (mapping->nrpages) { | 371 | if (mapping->nrpages) { |
362 | retval = __filemap_fdatawrite_range(mapping, lstart, lend, | 372 | err = __filemap_fdatawrite_range(mapping, lstart, lend, |
363 | WB_SYNC_ALL); | 373 | WB_SYNC_ALL); |
364 | if (retval == 0) | 374 | /* See comment of filemap_write_and_wait() */ |
365 | retval = wait_on_page_writeback_range(mapping, | 375 | if (err != -EIO) { |
366 | lstart >> PAGE_CACHE_SHIFT, | 376 | int err2 = wait_on_page_writeback_range(mapping, |
367 | lend >> PAGE_CACHE_SHIFT); | 377 | lstart >> PAGE_CACHE_SHIFT, |
378 | lend >> PAGE_CACHE_SHIFT); | ||
379 | if (!err) | ||
380 | err = err2; | ||
381 | } | ||
368 | } | 382 | } |
369 | return retval; | 383 | return err; |
370 | } | 384 | } |
371 | 385 | ||
372 | /* | 386 | /* |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f4c43d7980ba..b21d78c941b5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/nodemask.h> | 12 | #include <linux/nodemask.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
15 | #include <linux/cpuset.h> | ||
15 | 16 | ||
16 | #include <asm/page.h> | 17 | #include <asm/page.h> |
17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
@@ -48,7 +49,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
48 | 49 | ||
49 | for (z = zonelist->zones; *z; z++) { | 50 | for (z = zonelist->zones; *z; z++) { |
50 | nid = (*z)->zone_pgdat->node_id; | 51 | nid = (*z)->zone_pgdat->node_id; |
51 | if (!list_empty(&hugepage_freelists[nid])) | 52 | if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && |
53 | !list_empty(&hugepage_freelists[nid])) | ||
52 | break; | 54 | break; |
53 | } | 55 | } |
54 | 56 | ||
diff --git a/mm/memory.c b/mm/memory.c index 7197f9bcd384..3944fec38012 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2267,6 +2267,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2267 | return handle_pte_fault(mm, vma, address, pte, pmd, write_access); | 2267 | return handle_pte_fault(mm, vma, address, pte, pmd, write_access); |
2268 | } | 2268 | } |
2269 | 2269 | ||
2270 | EXPORT_SYMBOL_GPL(__handle_mm_fault); | ||
2271 | |||
2270 | #ifndef __PAGETABLE_PUD_FOLDED | 2272 | #ifndef __PAGETABLE_PUD_FOLDED |
2271 | /* | 2273 | /* |
2272 | * Allocate page upper directory. | 2274 | * Allocate page upper directory. |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0f1d2b8a952b..1850d0aef4ac 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -83,9 +83,18 @@ | |||
83 | #include <linux/init.h> | 83 | #include <linux/init.h> |
84 | #include <linux/compat.h> | 84 | #include <linux/compat.h> |
85 | #include <linux/mempolicy.h> | 85 | #include <linux/mempolicy.h> |
86 | #include <linux/swap.h> | ||
87 | #include <linux/seq_file.h> | ||
88 | #include <linux/proc_fs.h> | ||
89 | |||
86 | #include <asm/tlbflush.h> | 90 | #include <asm/tlbflush.h> |
87 | #include <asm/uaccess.h> | 91 | #include <asm/uaccess.h> |
88 | 92 | ||
93 | /* Internal flags */ | ||
94 | #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ | ||
95 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ | ||
96 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ | ||
97 | |||
89 | static kmem_cache_t *policy_cache; | 98 | static kmem_cache_t *policy_cache; |
90 | static kmem_cache_t *sn_cache; | 99 | static kmem_cache_t *sn_cache; |
91 | 100 | ||
@@ -171,12 +180,19 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) | |||
171 | break; | 180 | break; |
172 | } | 181 | } |
173 | policy->policy = mode; | 182 | policy->policy = mode; |
183 | policy->cpuset_mems_allowed = cpuset_mems_allowed(current); | ||
174 | return policy; | 184 | return policy; |
175 | } | 185 | } |
176 | 186 | ||
177 | /* Ensure all existing pages follow the policy. */ | 187 | static void gather_stats(struct page *, void *); |
188 | static void migrate_page_add(struct vm_area_struct *vma, | ||
189 | struct page *page, struct list_head *pagelist, unsigned long flags); | ||
190 | |||
191 | /* Scan through pages checking if pages follow certain conditions. */ | ||
178 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 192 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
179 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 193 | unsigned long addr, unsigned long end, |
194 | const nodemask_t *nodes, unsigned long flags, | ||
195 | void *private) | ||
180 | { | 196 | { |
181 | pte_t *orig_pte; | 197 | pte_t *orig_pte; |
182 | pte_t *pte; | 198 | pte_t *pte; |
@@ -193,7 +209,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
193 | if (!page) | 209 | if (!page) |
194 | continue; | 210 | continue; |
195 | nid = page_to_nid(page); | 211 | nid = page_to_nid(page); |
196 | if (!node_isset(nid, *nodes)) | 212 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) |
213 | continue; | ||
214 | |||
215 | if (flags & MPOL_MF_STATS) | ||
216 | gather_stats(page, private); | ||
217 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
218 | spin_unlock(ptl); | ||
219 | migrate_page_add(vma, page, private, flags); | ||
220 | spin_lock(ptl); | ||
221 | } | ||
222 | else | ||
197 | break; | 223 | break; |
198 | } while (pte++, addr += PAGE_SIZE, addr != end); | 224 | } while (pte++, addr += PAGE_SIZE, addr != end); |
199 | pte_unmap_unlock(orig_pte, ptl); | 225 | pte_unmap_unlock(orig_pte, ptl); |
@@ -201,7 +227,9 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
201 | } | 227 | } |
202 | 228 | ||
203 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | 229 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, |
204 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 230 | unsigned long addr, unsigned long end, |
231 | const nodemask_t *nodes, unsigned long flags, | ||
232 | void *private) | ||
205 | { | 233 | { |
206 | pmd_t *pmd; | 234 | pmd_t *pmd; |
207 | unsigned long next; | 235 | unsigned long next; |
@@ -211,14 +239,17 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
211 | next = pmd_addr_end(addr, end); | 239 | next = pmd_addr_end(addr, end); |
212 | if (pmd_none_or_clear_bad(pmd)) | 240 | if (pmd_none_or_clear_bad(pmd)) |
213 | continue; | 241 | continue; |
214 | if (check_pte_range(vma, pmd, addr, next, nodes)) | 242 | if (check_pte_range(vma, pmd, addr, next, nodes, |
243 | flags, private)) | ||
215 | return -EIO; | 244 | return -EIO; |
216 | } while (pmd++, addr = next, addr != end); | 245 | } while (pmd++, addr = next, addr != end); |
217 | return 0; | 246 | return 0; |
218 | } | 247 | } |
219 | 248 | ||
220 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | 249 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, |
221 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 250 | unsigned long addr, unsigned long end, |
251 | const nodemask_t *nodes, unsigned long flags, | ||
252 | void *private) | ||
222 | { | 253 | { |
223 | pud_t *pud; | 254 | pud_t *pud; |
224 | unsigned long next; | 255 | unsigned long next; |
@@ -228,14 +259,17 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | |||
228 | next = pud_addr_end(addr, end); | 259 | next = pud_addr_end(addr, end); |
229 | if (pud_none_or_clear_bad(pud)) | 260 | if (pud_none_or_clear_bad(pud)) |
230 | continue; | 261 | continue; |
231 | if (check_pmd_range(vma, pud, addr, next, nodes)) | 262 | if (check_pmd_range(vma, pud, addr, next, nodes, |
263 | flags, private)) | ||
232 | return -EIO; | 264 | return -EIO; |
233 | } while (pud++, addr = next, addr != end); | 265 | } while (pud++, addr = next, addr != end); |
234 | return 0; | 266 | return 0; |
235 | } | 267 | } |
236 | 268 | ||
237 | static inline int check_pgd_range(struct vm_area_struct *vma, | 269 | static inline int check_pgd_range(struct vm_area_struct *vma, |
238 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 270 | unsigned long addr, unsigned long end, |
271 | const nodemask_t *nodes, unsigned long flags, | ||
272 | void *private) | ||
239 | { | 273 | { |
240 | pgd_t *pgd; | 274 | pgd_t *pgd; |
241 | unsigned long next; | 275 | unsigned long next; |
@@ -245,16 +279,30 @@ static inline int check_pgd_range(struct vm_area_struct *vma, | |||
245 | next = pgd_addr_end(addr, end); | 279 | next = pgd_addr_end(addr, end); |
246 | if (pgd_none_or_clear_bad(pgd)) | 280 | if (pgd_none_or_clear_bad(pgd)) |
247 | continue; | 281 | continue; |
248 | if (check_pud_range(vma, pgd, addr, next, nodes)) | 282 | if (check_pud_range(vma, pgd, addr, next, nodes, |
283 | flags, private)) | ||
249 | return -EIO; | 284 | return -EIO; |
250 | } while (pgd++, addr = next, addr != end); | 285 | } while (pgd++, addr = next, addr != end); |
251 | return 0; | 286 | return 0; |
252 | } | 287 | } |
253 | 288 | ||
254 | /* Step 1: check the range */ | 289 | /* Check if a vma is migratable */ |
290 | static inline int vma_migratable(struct vm_area_struct *vma) | ||
291 | { | ||
292 | if (vma->vm_flags & ( | ||
293 | VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP)) | ||
294 | return 0; | ||
295 | return 1; | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * Check if all pages in a range are on a set of nodes. | ||
300 | * If pagelist != NULL then isolate pages from the LRU and | ||
301 | * put them on the pagelist. | ||
302 | */ | ||
255 | static struct vm_area_struct * | 303 | static struct vm_area_struct * |
256 | check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 304 | check_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
257 | nodemask_t *nodes, unsigned long flags) | 305 | const nodemask_t *nodes, unsigned long flags, void *private) |
258 | { | 306 | { |
259 | int err; | 307 | int err; |
260 | struct vm_area_struct *first, *vma, *prev; | 308 | struct vm_area_struct *first, *vma, *prev; |
@@ -264,17 +312,24 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
264 | return ERR_PTR(-EFAULT); | 312 | return ERR_PTR(-EFAULT); |
265 | prev = NULL; | 313 | prev = NULL; |
266 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { | 314 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { |
267 | if (!vma->vm_next && vma->vm_end < end) | 315 | if (!(flags & MPOL_MF_DISCONTIG_OK)) { |
268 | return ERR_PTR(-EFAULT); | 316 | if (!vma->vm_next && vma->vm_end < end) |
269 | if (prev && prev->vm_end < vma->vm_start) | 317 | return ERR_PTR(-EFAULT); |
270 | return ERR_PTR(-EFAULT); | 318 | if (prev && prev->vm_end < vma->vm_start) |
271 | if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { | 319 | return ERR_PTR(-EFAULT); |
320 | } | ||
321 | if (!is_vm_hugetlb_page(vma) && | ||
322 | ((flags & MPOL_MF_STRICT) || | ||
323 | ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && | ||
324 | vma_migratable(vma)))) { | ||
272 | unsigned long endvma = vma->vm_end; | 325 | unsigned long endvma = vma->vm_end; |
326 | |||
273 | if (endvma > end) | 327 | if (endvma > end) |
274 | endvma = end; | 328 | endvma = end; |
275 | if (vma->vm_start > start) | 329 | if (vma->vm_start > start) |
276 | start = vma->vm_start; | 330 | start = vma->vm_start; |
277 | err = check_pgd_range(vma, start, endvma, nodes); | 331 | err = check_pgd_range(vma, start, endvma, nodes, |
332 | flags, private); | ||
278 | if (err) { | 333 | if (err) { |
279 | first = ERR_PTR(err); | 334 | first = ERR_PTR(err); |
280 | break; | 335 | break; |
@@ -333,51 +388,10 @@ static int contextualize_policy(int mode, nodemask_t *nodes) | |||
333 | if (!nodes) | 388 | if (!nodes) |
334 | return 0; | 389 | return 0; |
335 | 390 | ||
336 | /* Update current mems_allowed */ | 391 | cpuset_update_task_memory_state(); |
337 | cpuset_update_current_mems_allowed(); | 392 | if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) |
338 | /* Ignore nodes not set in current->mems_allowed */ | ||
339 | cpuset_restrict_to_mems_allowed(nodes->bits); | ||
340 | return mpol_check_policy(mode, nodes); | ||
341 | } | ||
342 | |||
343 | long do_mbind(unsigned long start, unsigned long len, | ||
344 | unsigned long mode, nodemask_t *nmask, unsigned long flags) | ||
345 | { | ||
346 | struct vm_area_struct *vma; | ||
347 | struct mm_struct *mm = current->mm; | ||
348 | struct mempolicy *new; | ||
349 | unsigned long end; | ||
350 | int err; | ||
351 | |||
352 | if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX) | ||
353 | return -EINVAL; | ||
354 | if (start & ~PAGE_MASK) | ||
355 | return -EINVAL; | ||
356 | if (mode == MPOL_DEFAULT) | ||
357 | flags &= ~MPOL_MF_STRICT; | ||
358 | len = (len + PAGE_SIZE - 1) & PAGE_MASK; | ||
359 | end = start + len; | ||
360 | if (end < start) | ||
361 | return -EINVAL; | 393 | return -EINVAL; |
362 | if (end == start) | 394 | return mpol_check_policy(mode, nodes); |
363 | return 0; | ||
364 | if (mpol_check_policy(mode, nmask)) | ||
365 | return -EINVAL; | ||
366 | new = mpol_new(mode, nmask); | ||
367 | if (IS_ERR(new)) | ||
368 | return PTR_ERR(new); | ||
369 | |||
370 | PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, | ||
371 | mode,nodes_addr(nodes)[0]); | ||
372 | |||
373 | down_write(&mm->mmap_sem); | ||
374 | vma = check_range(mm, start, end, nmask, flags); | ||
375 | err = PTR_ERR(vma); | ||
376 | if (!IS_ERR(vma)) | ||
377 | err = mbind_range(vma, start, end, new); | ||
378 | up_write(&mm->mmap_sem); | ||
379 | mpol_free(new); | ||
380 | return err; | ||
381 | } | 395 | } |
382 | 396 | ||
383 | /* Set the process memory policy */ | 397 | /* Set the process memory policy */ |
@@ -448,7 +462,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
448 | struct vm_area_struct *vma = NULL; | 462 | struct vm_area_struct *vma = NULL; |
449 | struct mempolicy *pol = current->mempolicy; | 463 | struct mempolicy *pol = current->mempolicy; |
450 | 464 | ||
451 | cpuset_update_current_mems_allowed(); | 465 | cpuset_update_task_memory_state(); |
452 | if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) | 466 | if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) |
453 | return -EINVAL; | 467 | return -EINVAL; |
454 | if (flags & MPOL_F_ADDR) { | 468 | if (flags & MPOL_F_ADDR) { |
@@ -500,11 +514,177 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
500 | } | 514 | } |
501 | 515 | ||
502 | /* | 516 | /* |
517 | * page migration | ||
518 | */ | ||
519 | |||
520 | /* Check if we are the only process mapping the page in question */ | ||
521 | static inline int single_mm_mapping(struct mm_struct *mm, | ||
522 | struct address_space *mapping) | ||
523 | { | ||
524 | struct vm_area_struct *vma; | ||
525 | struct prio_tree_iter iter; | ||
526 | int rc = 1; | ||
527 | |||
528 | spin_lock(&mapping->i_mmap_lock); | ||
529 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) | ||
530 | if (mm != vma->vm_mm) { | ||
531 | rc = 0; | ||
532 | goto out; | ||
533 | } | ||
534 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) | ||
535 | if (mm != vma->vm_mm) { | ||
536 | rc = 0; | ||
537 | goto out; | ||
538 | } | ||
539 | out: | ||
540 | spin_unlock(&mapping->i_mmap_lock); | ||
541 | return rc; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Add a page to be migrated to the pagelist | ||
546 | */ | ||
547 | static void migrate_page_add(struct vm_area_struct *vma, | ||
548 | struct page *page, struct list_head *pagelist, unsigned long flags) | ||
549 | { | ||
550 | /* | ||
551 | * Avoid migrating a page that is shared by others and not writable. | ||
552 | */ | ||
553 | if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || | ||
554 | mapping_writably_mapped(page->mapping) || | ||
555 | single_mm_mapping(vma->vm_mm, page->mapping)) { | ||
556 | int rc = isolate_lru_page(page); | ||
557 | |||
558 | if (rc == 1) | ||
559 | list_add(&page->lru, pagelist); | ||
560 | /* | ||
561 | * If the isolate attempt was not successful then we just | ||
562 | * encountered an unswappable page. Something must be wrong. | ||
563 | */ | ||
564 | WARN_ON(rc == 0); | ||
565 | } | ||
566 | } | ||
567 | |||
568 | static int swap_pages(struct list_head *pagelist) | ||
569 | { | ||
570 | LIST_HEAD(moved); | ||
571 | LIST_HEAD(failed); | ||
572 | int n; | ||
573 | |||
574 | n = migrate_pages(pagelist, NULL, &moved, &failed); | ||
575 | putback_lru_pages(&failed); | ||
576 | putback_lru_pages(&moved); | ||
577 | |||
578 | return n; | ||
579 | } | ||
580 | |||
581 | /* | ||
582 | * For now migrate_pages simply swaps out the pages from nodes that are in | ||
583 | * the source set but not in the target set. In the future, we would | ||
584 | * want a function that moves pages between the two nodesets in such | ||
585 | * a way as to preserve the physical layout as much as possible. | ||
586 | * | ||
587 | * Returns the number of page that could not be moved. | ||
588 | */ | ||
589 | int do_migrate_pages(struct mm_struct *mm, | ||
590 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | ||
591 | { | ||
592 | LIST_HEAD(pagelist); | ||
593 | int count = 0; | ||
594 | nodemask_t nodes; | ||
595 | |||
596 | nodes_andnot(nodes, *from_nodes, *to_nodes); | ||
597 | |||
598 | down_read(&mm->mmap_sem); | ||
599 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, | ||
600 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | ||
601 | |||
602 | if (!list_empty(&pagelist)) { | ||
603 | count = swap_pages(&pagelist); | ||
604 | putback_lru_pages(&pagelist); | ||
605 | } | ||
606 | |||
607 | up_read(&mm->mmap_sem); | ||
608 | return count; | ||
609 | } | ||
610 | |||
611 | long do_mbind(unsigned long start, unsigned long len, | ||
612 | unsigned long mode, nodemask_t *nmask, unsigned long flags) | ||
613 | { | ||
614 | struct vm_area_struct *vma; | ||
615 | struct mm_struct *mm = current->mm; | ||
616 | struct mempolicy *new; | ||
617 | unsigned long end; | ||
618 | int err; | ||
619 | LIST_HEAD(pagelist); | ||
620 | |||
621 | if ((flags & ~(unsigned long)(MPOL_MF_STRICT | | ||
622 | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | ||
623 | || mode > MPOL_MAX) | ||
624 | return -EINVAL; | ||
625 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) | ||
626 | return -EPERM; | ||
627 | |||
628 | if (start & ~PAGE_MASK) | ||
629 | return -EINVAL; | ||
630 | |||
631 | if (mode == MPOL_DEFAULT) | ||
632 | flags &= ~MPOL_MF_STRICT; | ||
633 | |||
634 | len = (len + PAGE_SIZE - 1) & PAGE_MASK; | ||
635 | end = start + len; | ||
636 | |||
637 | if (end < start) | ||
638 | return -EINVAL; | ||
639 | if (end == start) | ||
640 | return 0; | ||
641 | |||
642 | if (mpol_check_policy(mode, nmask)) | ||
643 | return -EINVAL; | ||
644 | |||
645 | new = mpol_new(mode, nmask); | ||
646 | if (IS_ERR(new)) | ||
647 | return PTR_ERR(new); | ||
648 | |||
649 | /* | ||
650 | * If we are using the default policy then operation | ||
651 | * on discontinuous address spaces is okay after all | ||
652 | */ | ||
653 | if (!new) | ||
654 | flags |= MPOL_MF_DISCONTIG_OK; | ||
655 | |||
656 | PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, | ||
657 | mode,nodes_addr(nodes)[0]); | ||
658 | |||
659 | down_write(&mm->mmap_sem); | ||
660 | vma = check_range(mm, start, end, nmask, | ||
661 | flags | MPOL_MF_INVERT, &pagelist); | ||
662 | |||
663 | err = PTR_ERR(vma); | ||
664 | if (!IS_ERR(vma)) { | ||
665 | int nr_failed = 0; | ||
666 | |||
667 | err = mbind_range(vma, start, end, new); | ||
668 | if (!list_empty(&pagelist)) | ||
669 | nr_failed = swap_pages(&pagelist); | ||
670 | |||
671 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) | ||
672 | err = -EIO; | ||
673 | } | ||
674 | if (!list_empty(&pagelist)) | ||
675 | putback_lru_pages(&pagelist); | ||
676 | |||
677 | up_write(&mm->mmap_sem); | ||
678 | mpol_free(new); | ||
679 | return err; | ||
680 | } | ||
681 | |||
682 | /* | ||
503 | * User space interface with variable sized bitmaps for nodelists. | 683 | * User space interface with variable sized bitmaps for nodelists. |
504 | */ | 684 | */ |
505 | 685 | ||
506 | /* Copy a node mask from user space. */ | 686 | /* Copy a node mask from user space. */ |
507 | static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask, | 687 | static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, |
508 | unsigned long maxnode) | 688 | unsigned long maxnode) |
509 | { | 689 | { |
510 | unsigned long k; | 690 | unsigned long k; |
@@ -593,6 +773,65 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, | |||
593 | return do_set_mempolicy(mode, &nodes); | 773 | return do_set_mempolicy(mode, &nodes); |
594 | } | 774 | } |
595 | 775 | ||
776 | asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, | ||
777 | const unsigned long __user *old_nodes, | ||
778 | const unsigned long __user *new_nodes) | ||
779 | { | ||
780 | struct mm_struct *mm; | ||
781 | struct task_struct *task; | ||
782 | nodemask_t old; | ||
783 | nodemask_t new; | ||
784 | nodemask_t task_nodes; | ||
785 | int err; | ||
786 | |||
787 | err = get_nodes(&old, old_nodes, maxnode); | ||
788 | if (err) | ||
789 | return err; | ||
790 | |||
791 | err = get_nodes(&new, new_nodes, maxnode); | ||
792 | if (err) | ||
793 | return err; | ||
794 | |||
795 | /* Find the mm_struct */ | ||
796 | read_lock(&tasklist_lock); | ||
797 | task = pid ? find_task_by_pid(pid) : current; | ||
798 | if (!task) { | ||
799 | read_unlock(&tasklist_lock); | ||
800 | return -ESRCH; | ||
801 | } | ||
802 | mm = get_task_mm(task); | ||
803 | read_unlock(&tasklist_lock); | ||
804 | |||
805 | if (!mm) | ||
806 | return -EINVAL; | ||
807 | |||
808 | /* | ||
809 | * Check if this process has the right to modify the specified | ||
810 | * process. The right exists if the process has administrative | ||
811 | * capabilities, superuser priviledges or the same | ||
812 | * userid as the target process. | ||
813 | */ | ||
814 | if ((current->euid != task->suid) && (current->euid != task->uid) && | ||
815 | (current->uid != task->suid) && (current->uid != task->uid) && | ||
816 | !capable(CAP_SYS_ADMIN)) { | ||
817 | err = -EPERM; | ||
818 | goto out; | ||
819 | } | ||
820 | |||
821 | task_nodes = cpuset_mems_allowed(task); | ||
822 | /* Is the user allowed to access the target nodes? */ | ||
823 | if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { | ||
824 | err = -EPERM; | ||
825 | goto out; | ||
826 | } | ||
827 | |||
828 | err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE); | ||
829 | out: | ||
830 | mmput(mm); | ||
831 | return err; | ||
832 | } | ||
833 | |||
834 | |||
596 | /* Retrieve NUMA policy */ | 835 | /* Retrieve NUMA policy */ |
597 | asmlinkage long sys_get_mempolicy(int __user *policy, | 836 | asmlinkage long sys_get_mempolicy(int __user *policy, |
598 | unsigned long __user *nmask, | 837 | unsigned long __user *nmask, |
@@ -699,8 +938,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
699 | #endif | 938 | #endif |
700 | 939 | ||
701 | /* Return effective policy for a VMA */ | 940 | /* Return effective policy for a VMA */ |
702 | struct mempolicy * | 941 | static struct mempolicy * get_vma_policy(struct task_struct *task, |
703 | get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) | 942 | struct vm_area_struct *vma, unsigned long addr) |
704 | { | 943 | { |
705 | struct mempolicy *pol = task->mempolicy; | 944 | struct mempolicy *pol = task->mempolicy; |
706 | 945 | ||
@@ -848,7 +1087,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
848 | { | 1087 | { |
849 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1088 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
850 | 1089 | ||
851 | cpuset_update_current_mems_allowed(); | 1090 | cpuset_update_task_memory_state(); |
852 | 1091 | ||
853 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { | 1092 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { |
854 | unsigned nid; | 1093 | unsigned nid; |
@@ -874,7 +1113,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
874 | * interrupt context and apply the current process NUMA policy. | 1113 | * interrupt context and apply the current process NUMA policy. |
875 | * Returns NULL when no page can be allocated. | 1114 | * Returns NULL when no page can be allocated. |
876 | * | 1115 | * |
877 | * Don't call cpuset_update_current_mems_allowed() unless | 1116 | * Don't call cpuset_update_task_memory_state() unless |
878 | * 1) it's ok to take cpuset_sem (can WAIT), and | 1117 | * 1) it's ok to take cpuset_sem (can WAIT), and |
879 | * 2) allocating for current task (not interrupt). | 1118 | * 2) allocating for current task (not interrupt). |
880 | */ | 1119 | */ |
@@ -883,7 +1122,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
883 | struct mempolicy *pol = current->mempolicy; | 1122 | struct mempolicy *pol = current->mempolicy; |
884 | 1123 | ||
885 | if ((gfp & __GFP_WAIT) && !in_interrupt()) | 1124 | if ((gfp & __GFP_WAIT) && !in_interrupt()) |
886 | cpuset_update_current_mems_allowed(); | 1125 | cpuset_update_task_memory_state(); |
887 | if (!pol || in_interrupt()) | 1126 | if (!pol || in_interrupt()) |
888 | pol = &default_policy; | 1127 | pol = &default_policy; |
889 | if (pol->policy == MPOL_INTERLEAVE) | 1128 | if (pol->policy == MPOL_INTERLEAVE) |
@@ -892,6 +1131,15 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
892 | } | 1131 | } |
893 | EXPORT_SYMBOL(alloc_pages_current); | 1132 | EXPORT_SYMBOL(alloc_pages_current); |
894 | 1133 | ||
1134 | /* | ||
1135 | * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it | ||
1136 | * rebinds the mempolicy its copying by calling mpol_rebind_policy() | ||
1137 | * with the mems_allowed returned by cpuset_mems_allowed(). This | ||
1138 | * keeps mempolicies cpuset relative after its cpuset moves. See | ||
1139 | * further kernel/cpuset.c update_nodemask(). | ||
1140 | */ | ||
1141 | void *cpuset_being_rebound; | ||
1142 | |||
895 | /* Slow path of a mempolicy copy */ | 1143 | /* Slow path of a mempolicy copy */ |
896 | struct mempolicy *__mpol_copy(struct mempolicy *old) | 1144 | struct mempolicy *__mpol_copy(struct mempolicy *old) |
897 | { | 1145 | { |
@@ -899,6 +1147,10 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) | |||
899 | 1147 | ||
900 | if (!new) | 1148 | if (!new) |
901 | return ERR_PTR(-ENOMEM); | 1149 | return ERR_PTR(-ENOMEM); |
1150 | if (current_cpuset_is_being_rebound()) { | ||
1151 | nodemask_t mems = cpuset_mems_allowed(current); | ||
1152 | mpol_rebind_policy(old, &mems); | ||
1153 | } | ||
902 | *new = *old; | 1154 | *new = *old; |
903 | atomic_set(&new->refcnt, 1); | 1155 | atomic_set(&new->refcnt, 1); |
904 | if (new->policy == MPOL_BIND) { | 1156 | if (new->policy == MPOL_BIND) { |
@@ -1173,25 +1425,31 @@ void numa_default_policy(void) | |||
1173 | } | 1425 | } |
1174 | 1426 | ||
1175 | /* Migrate a policy to a different set of nodes */ | 1427 | /* Migrate a policy to a different set of nodes */ |
1176 | static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, | 1428 | void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) |
1177 | const nodemask_t *new) | ||
1178 | { | 1429 | { |
1430 | nodemask_t *mpolmask; | ||
1179 | nodemask_t tmp; | 1431 | nodemask_t tmp; |
1180 | 1432 | ||
1181 | if (!pol) | 1433 | if (!pol) |
1182 | return; | 1434 | return; |
1435 | mpolmask = &pol->cpuset_mems_allowed; | ||
1436 | if (nodes_equal(*mpolmask, *newmask)) | ||
1437 | return; | ||
1183 | 1438 | ||
1184 | switch (pol->policy) { | 1439 | switch (pol->policy) { |
1185 | case MPOL_DEFAULT: | 1440 | case MPOL_DEFAULT: |
1186 | break; | 1441 | break; |
1187 | case MPOL_INTERLEAVE: | 1442 | case MPOL_INTERLEAVE: |
1188 | nodes_remap(tmp, pol->v.nodes, *old, *new); | 1443 | nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask); |
1189 | pol->v.nodes = tmp; | 1444 | pol->v.nodes = tmp; |
1190 | current->il_next = node_remap(current->il_next, *old, *new); | 1445 | *mpolmask = *newmask; |
1446 | current->il_next = node_remap(current->il_next, | ||
1447 | *mpolmask, *newmask); | ||
1191 | break; | 1448 | break; |
1192 | case MPOL_PREFERRED: | 1449 | case MPOL_PREFERRED: |
1193 | pol->v.preferred_node = node_remap(pol->v.preferred_node, | 1450 | pol->v.preferred_node = node_remap(pol->v.preferred_node, |
1194 | *old, *new); | 1451 | *mpolmask, *newmask); |
1452 | *mpolmask = *newmask; | ||
1195 | break; | 1453 | break; |
1196 | case MPOL_BIND: { | 1454 | case MPOL_BIND: { |
1197 | nodemask_t nodes; | 1455 | nodemask_t nodes; |
@@ -1201,7 +1459,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, | |||
1201 | nodes_clear(nodes); | 1459 | nodes_clear(nodes); |
1202 | for (z = pol->v.zonelist->zones; *z; z++) | 1460 | for (z = pol->v.zonelist->zones; *z; z++) |
1203 | node_set((*z)->zone_pgdat->node_id, nodes); | 1461 | node_set((*z)->zone_pgdat->node_id, nodes); |
1204 | nodes_remap(tmp, nodes, *old, *new); | 1462 | nodes_remap(tmp, nodes, *mpolmask, *newmask); |
1205 | nodes = tmp; | 1463 | nodes = tmp; |
1206 | 1464 | ||
1207 | zonelist = bind_zonelist(&nodes); | 1465 | zonelist = bind_zonelist(&nodes); |
@@ -1216,6 +1474,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, | |||
1216 | kfree(pol->v.zonelist); | 1474 | kfree(pol->v.zonelist); |
1217 | pol->v.zonelist = zonelist; | 1475 | pol->v.zonelist = zonelist; |
1218 | } | 1476 | } |
1477 | *mpolmask = *newmask; | ||
1219 | break; | 1478 | break; |
1220 | } | 1479 | } |
1221 | default: | 1480 | default: |
@@ -1225,12 +1484,156 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, | |||
1225 | } | 1484 | } |
1226 | 1485 | ||
1227 | /* | 1486 | /* |
1228 | * Someone moved this task to different nodes. Fixup mempolicies. | 1487 | * Wrapper for mpol_rebind_policy() that just requires task |
1488 | * pointer, and updates task mempolicy. | ||
1489 | */ | ||
1490 | |||
1491 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) | ||
1492 | { | ||
1493 | mpol_rebind_policy(tsk->mempolicy, new); | ||
1494 | } | ||
1495 | |||
1496 | /* | ||
1497 | * Rebind each vma in mm to new nodemask. | ||
1229 | * | 1498 | * |
1230 | * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well, | 1499 | * Call holding a reference to mm. Takes mm->mmap_sem during call. |
1231 | * once we have a cpuset mechanism to mark which cpuset subtree is migrating. | ||
1232 | */ | 1500 | */ |
1233 | void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) | 1501 | |
1502 | void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) | ||
1234 | { | 1503 | { |
1235 | rebind_policy(current->mempolicy, old, new); | 1504 | struct vm_area_struct *vma; |
1505 | |||
1506 | down_write(&mm->mmap_sem); | ||
1507 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
1508 | mpol_rebind_policy(vma->vm_policy, new); | ||
1509 | up_write(&mm->mmap_sem); | ||
1236 | } | 1510 | } |
1511 | |||
1512 | /* | ||
1513 | * Display pages allocated per node and memory policy via /proc. | ||
1514 | */ | ||
1515 | |||
1516 | static const char *policy_types[] = { "default", "prefer", "bind", | ||
1517 | "interleave" }; | ||
1518 | |||
1519 | /* | ||
1520 | * Convert a mempolicy into a string. | ||
1521 | * Returns the number of characters in buffer (if positive) | ||
1522 | * or an error (negative) | ||
1523 | */ | ||
1524 | static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) | ||
1525 | { | ||
1526 | char *p = buffer; | ||
1527 | int l; | ||
1528 | nodemask_t nodes; | ||
1529 | int mode = pol ? pol->policy : MPOL_DEFAULT; | ||
1530 | |||
1531 | switch (mode) { | ||
1532 | case MPOL_DEFAULT: | ||
1533 | nodes_clear(nodes); | ||
1534 | break; | ||
1535 | |||
1536 | case MPOL_PREFERRED: | ||
1537 | nodes_clear(nodes); | ||
1538 | node_set(pol->v.preferred_node, nodes); | ||
1539 | break; | ||
1540 | |||
1541 | case MPOL_BIND: | ||
1542 | get_zonemask(pol, &nodes); | ||
1543 | break; | ||
1544 | |||
1545 | case MPOL_INTERLEAVE: | ||
1546 | nodes = pol->v.nodes; | ||
1547 | break; | ||
1548 | |||
1549 | default: | ||
1550 | BUG(); | ||
1551 | return -EFAULT; | ||
1552 | } | ||
1553 | |||
1554 | l = strlen(policy_types[mode]); | ||
1555 | if (buffer + maxlen < p + l + 1) | ||
1556 | return -ENOSPC; | ||
1557 | |||
1558 | strcpy(p, policy_types[mode]); | ||
1559 | p += l; | ||
1560 | |||
1561 | if (!nodes_empty(nodes)) { | ||
1562 | if (buffer + maxlen < p + 2) | ||
1563 | return -ENOSPC; | ||
1564 | *p++ = '='; | ||
1565 | p += nodelist_scnprintf(p, buffer + maxlen - p, nodes); | ||
1566 | } | ||
1567 | return p - buffer; | ||
1568 | } | ||
1569 | |||
1570 | struct numa_maps { | ||
1571 | unsigned long pages; | ||
1572 | unsigned long anon; | ||
1573 | unsigned long mapped; | ||
1574 | unsigned long mapcount_max; | ||
1575 | unsigned long node[MAX_NUMNODES]; | ||
1576 | }; | ||
1577 | |||
1578 | static void gather_stats(struct page *page, void *private) | ||
1579 | { | ||
1580 | struct numa_maps *md = private; | ||
1581 | int count = page_mapcount(page); | ||
1582 | |||
1583 | if (count) | ||
1584 | md->mapped++; | ||
1585 | |||
1586 | if (count > md->mapcount_max) | ||
1587 | md->mapcount_max = count; | ||
1588 | |||
1589 | md->pages++; | ||
1590 | |||
1591 | if (PageAnon(page)) | ||
1592 | md->anon++; | ||
1593 | |||
1594 | md->node[page_to_nid(page)]++; | ||
1595 | cond_resched(); | ||
1596 | } | ||
1597 | |||
1598 | int show_numa_map(struct seq_file *m, void *v) | ||
1599 | { | ||
1600 | struct task_struct *task = m->private; | ||
1601 | struct vm_area_struct *vma = v; | ||
1602 | struct numa_maps *md; | ||
1603 | int n; | ||
1604 | char buffer[50]; | ||
1605 | |||
1606 | if (!vma->vm_mm) | ||
1607 | return 0; | ||
1608 | |||
1609 | md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); | ||
1610 | if (!md) | ||
1611 | return 0; | ||
1612 | |||
1613 | check_pgd_range(vma, vma->vm_start, vma->vm_end, | ||
1614 | &node_online_map, MPOL_MF_STATS, md); | ||
1615 | |||
1616 | if (md->pages) { | ||
1617 | mpol_to_str(buffer, sizeof(buffer), | ||
1618 | get_vma_policy(task, vma, vma->vm_start)); | ||
1619 | |||
1620 | seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu", | ||
1621 | vma->vm_start, buffer, md->pages, | ||
1622 | md->mapped, md->mapcount_max); | ||
1623 | |||
1624 | if (md->anon) | ||
1625 | seq_printf(m," anon=%lu",md->anon); | ||
1626 | |||
1627 | for_each_online_node(n) | ||
1628 | if (md->node[n]) | ||
1629 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
1630 | |||
1631 | seq_putc(m, '\n'); | ||
1632 | } | ||
1633 | kfree(md); | ||
1634 | |||
1635 | if (m->count < m->size) | ||
1636 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | ||
1637 | return 0; | ||
1638 | } | ||
1639 | |||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d348b9035955..4748b906aff2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -298,7 +298,8 @@ retry: | |||
298 | 298 | ||
299 | /* | 299 | /* |
300 | * Give "p" a good chance of killing itself before we | 300 | * Give "p" a good chance of killing itself before we |
301 | * retry to allocate memory. | 301 | * retry to allocate memory unless "p" is current |
302 | */ | 302 | */ |
303 | schedule_timeout_interruptible(1); | 303 | if (!test_thread_flag(TIF_MEMDIE)) |
304 | schedule_timeout_interruptible(1); | ||
304 | } | 305 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd47494cb989..e0e84924171b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly; | |||
53 | unsigned long totalram_pages __read_mostly; | 53 | unsigned long totalram_pages __read_mostly; |
54 | unsigned long totalhigh_pages __read_mostly; | 54 | unsigned long totalhigh_pages __read_mostly; |
55 | long nr_swap_pages; | 55 | long nr_swap_pages; |
56 | int percpu_pagelist_fraction; | ||
56 | 57 | ||
57 | static void fastcall free_hot_cold_page(struct page *page, int cold); | 58 | static void fastcall free_hot_cold_page(struct page *page, int cold); |
58 | 59 | ||
@@ -307,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order) | |||
307 | * -- wli | 308 | * -- wli |
308 | */ | 309 | */ |
309 | 310 | ||
310 | static inline void __free_pages_bulk (struct page *page, | 311 | static inline void __free_one_page(struct page *page, |
311 | struct zone *zone, unsigned int order) | 312 | struct zone *zone, unsigned int order) |
312 | { | 313 | { |
313 | unsigned long page_idx; | 314 | unsigned long page_idx; |
@@ -382,40 +383,42 @@ static inline int free_pages_check(struct page *page) | |||
382 | * And clear the zone's pages_scanned counter, to hold off the "all pages are | 383 | * And clear the zone's pages_scanned counter, to hold off the "all pages are |
383 | * pinned" detection logic. | 384 | * pinned" detection logic. |
384 | */ | 385 | */ |
385 | static int | 386 | static void free_pages_bulk(struct zone *zone, int count, |
386 | free_pages_bulk(struct zone *zone, int count, | 387 | struct list_head *list, int order) |
387 | struct list_head *list, unsigned int order) | ||
388 | { | 388 | { |
389 | struct page *page = NULL; | ||
390 | int ret = 0; | ||
391 | |||
392 | spin_lock(&zone->lock); | 389 | spin_lock(&zone->lock); |
393 | zone->all_unreclaimable = 0; | 390 | zone->all_unreclaimable = 0; |
394 | zone->pages_scanned = 0; | 391 | zone->pages_scanned = 0; |
395 | while (!list_empty(list) && count--) { | 392 | while (count--) { |
393 | struct page *page; | ||
394 | |||
395 | BUG_ON(list_empty(list)); | ||
396 | page = list_entry(list->prev, struct page, lru); | 396 | page = list_entry(list->prev, struct page, lru); |
397 | /* have to delete it as __free_pages_bulk list manipulates */ | 397 | /* have to delete it as __free_one_page list manipulates */ |
398 | list_del(&page->lru); | 398 | list_del(&page->lru); |
399 | __free_pages_bulk(page, zone, order); | 399 | __free_one_page(page, zone, order); |
400 | ret++; | ||
401 | } | 400 | } |
402 | spin_unlock(&zone->lock); | 401 | spin_unlock(&zone->lock); |
403 | return ret; | ||
404 | } | 402 | } |
405 | 403 | ||
406 | void __free_pages_ok(struct page *page, unsigned int order) | 404 | static void free_one_page(struct zone *zone, struct page *page, int order) |
407 | { | 405 | { |
408 | unsigned long flags; | ||
409 | LIST_HEAD(list); | 406 | LIST_HEAD(list); |
407 | list_add(&page->lru, &list); | ||
408 | free_pages_bulk(zone, 1, &list, order); | ||
409 | } | ||
410 | |||
411 | static void __free_pages_ok(struct page *page, unsigned int order) | ||
412 | { | ||
413 | unsigned long flags; | ||
410 | int i; | 414 | int i; |
411 | int reserved = 0; | 415 | int reserved = 0; |
412 | 416 | ||
413 | arch_free_page(page, order); | 417 | arch_free_page(page, order); |
414 | 418 | ||
415 | #ifndef CONFIG_MMU | 419 | #ifndef CONFIG_MMU |
416 | if (order > 0) | 420 | for (i = 1 ; i < (1 << order) ; ++i) |
417 | for (i = 1 ; i < (1 << order) ; ++i) | 421 | __put_page(page + i); |
418 | __put_page(page + i); | ||
419 | #endif | 422 | #endif |
420 | 423 | ||
421 | for (i = 0 ; i < (1 << order) ; ++i) | 424 | for (i = 0 ; i < (1 << order) ; ++i) |
@@ -423,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
423 | if (reserved) | 426 | if (reserved) |
424 | return; | 427 | return; |
425 | 428 | ||
426 | list_add(&page->lru, &list); | 429 | kernel_map_pages(page, 1 << order, 0); |
427 | kernel_map_pages(page, 1<<order, 0); | ||
428 | local_irq_save(flags); | 430 | local_irq_save(flags); |
429 | __mod_page_state(pgfree, 1 << order); | 431 | __mod_page_state(pgfree, 1 << order); |
430 | free_pages_bulk(page_zone(page), 1, &list, order); | 432 | free_one_page(page_zone(page), page, order); |
431 | local_irq_restore(flags); | 433 | local_irq_restore(flags); |
432 | } | 434 | } |
433 | 435 | ||
@@ -596,14 +598,13 @@ void drain_remote_pages(void) | |||
596 | if (zone->zone_pgdat->node_id == numa_node_id()) | 598 | if (zone->zone_pgdat->node_id == numa_node_id()) |
597 | continue; | 599 | continue; |
598 | 600 | ||
599 | pset = zone->pageset[smp_processor_id()]; | 601 | pset = zone_pcp(zone, smp_processor_id()); |
600 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 602 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
601 | struct per_cpu_pages *pcp; | 603 | struct per_cpu_pages *pcp; |
602 | 604 | ||
603 | pcp = &pset->pcp[i]; | 605 | pcp = &pset->pcp[i]; |
604 | if (pcp->count) | 606 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); |
605 | pcp->count -= free_pages_bulk(zone, pcp->count, | 607 | pcp->count = 0; |
606 | &pcp->list, 0); | ||
607 | } | 608 | } |
608 | } | 609 | } |
609 | local_irq_restore(flags); | 610 | local_irq_restore(flags); |
@@ -626,8 +627,8 @@ static void __drain_pages(unsigned int cpu) | |||
626 | 627 | ||
627 | pcp = &pset->pcp[i]; | 628 | pcp = &pset->pcp[i]; |
628 | local_irq_save(flags); | 629 | local_irq_save(flags); |
629 | pcp->count -= free_pages_bulk(zone, pcp->count, | 630 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); |
630 | &pcp->list, 0); | 631 | pcp->count = 0; |
631 | local_irq_restore(flags); | 632 | local_irq_restore(flags); |
632 | } | 633 | } |
633 | } | 634 | } |
@@ -718,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
718 | __inc_page_state(pgfree); | 719 | __inc_page_state(pgfree); |
719 | list_add(&page->lru, &pcp->list); | 720 | list_add(&page->lru, &pcp->list); |
720 | pcp->count++; | 721 | pcp->count++; |
721 | if (pcp->count >= pcp->high) | 722 | if (pcp->count >= pcp->high) { |
722 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | 723 | free_pages_bulk(zone, pcp->batch, &pcp->list, 0); |
724 | pcp->count -= pcp->batch; | ||
725 | } | ||
723 | local_irq_restore(flags); | 726 | local_irq_restore(flags); |
724 | put_cpu(); | 727 | put_cpu(); |
725 | } | 728 | } |
@@ -758,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist, | |||
758 | 761 | ||
759 | again: | 762 | again: |
760 | cpu = get_cpu(); | 763 | cpu = get_cpu(); |
761 | if (order == 0) { | 764 | if (likely(order == 0)) { |
762 | struct per_cpu_pages *pcp; | 765 | struct per_cpu_pages *pcp; |
763 | 766 | ||
764 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; | 767 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; |
@@ -973,6 +976,7 @@ rebalance: | |||
973 | cond_resched(); | 976 | cond_resched(); |
974 | 977 | ||
975 | /* We now go into synchronous reclaim */ | 978 | /* We now go into synchronous reclaim */ |
979 | cpuset_memory_pressure_bump(); | ||
976 | p->flags |= PF_MEMALLOC; | 980 | p->flags |= PF_MEMALLOC; |
977 | reclaim_state.reclaimed_slab = 0; | 981 | reclaim_state.reclaimed_slab = 0; |
978 | p->reclaim_state = &reclaim_state; | 982 | p->reclaim_state = &reclaim_state; |
@@ -1204,6 +1208,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | |||
1204 | int cpu = 0; | 1208 | int cpu = 0; |
1205 | 1209 | ||
1206 | memset(ret, 0, sizeof(*ret)); | 1210 | memset(ret, 0, sizeof(*ret)); |
1211 | cpus_and(*cpumask, *cpumask, cpu_online_map); | ||
1207 | 1212 | ||
1208 | cpu = first_cpu(*cpumask); | 1213 | cpu = first_cpu(*cpumask); |
1209 | while (cpu < NR_CPUS) { | 1214 | while (cpu < NR_CPUS) { |
@@ -1256,7 +1261,7 @@ unsigned long read_page_state_offset(unsigned long offset) | |||
1256 | unsigned long ret = 0; | 1261 | unsigned long ret = 0; |
1257 | int cpu; | 1262 | int cpu; |
1258 | 1263 | ||
1259 | for_each_cpu(cpu) { | 1264 | for_each_online_cpu(cpu) { |
1260 | unsigned long in; | 1265 | unsigned long in; |
1261 | 1266 | ||
1262 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | 1267 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; |
@@ -1830,6 +1835,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
1830 | INIT_LIST_HEAD(&pcp->list); | 1835 | INIT_LIST_HEAD(&pcp->list); |
1831 | } | 1836 | } |
1832 | 1837 | ||
1838 | /* | ||
1839 | * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist | ||
1840 | * to the value high for the pageset p. | ||
1841 | */ | ||
1842 | |||
1843 | static void setup_pagelist_highmark(struct per_cpu_pageset *p, | ||
1844 | unsigned long high) | ||
1845 | { | ||
1846 | struct per_cpu_pages *pcp; | ||
1847 | |||
1848 | pcp = &p->pcp[0]; /* hot list */ | ||
1849 | pcp->high = high; | ||
1850 | pcp->batch = max(1UL, high/4); | ||
1851 | if ((high/4) > (PAGE_SHIFT * 8)) | ||
1852 | pcp->batch = PAGE_SHIFT * 8; | ||
1853 | } | ||
1854 | |||
1855 | |||
1833 | #ifdef CONFIG_NUMA | 1856 | #ifdef CONFIG_NUMA |
1834 | /* | 1857 | /* |
1835 | * Boot pageset table. One per cpu which is going to be used for all | 1858 | * Boot pageset table. One per cpu which is going to be used for all |
@@ -1861,12 +1884,16 @@ static int __devinit process_zones(int cpu) | |||
1861 | 1884 | ||
1862 | for_each_zone(zone) { | 1885 | for_each_zone(zone) { |
1863 | 1886 | ||
1864 | zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), | 1887 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), |
1865 | GFP_KERNEL, cpu_to_node(cpu)); | 1888 | GFP_KERNEL, cpu_to_node(cpu)); |
1866 | if (!zone->pageset[cpu]) | 1889 | if (!zone_pcp(zone, cpu)) |
1867 | goto bad; | 1890 | goto bad; |
1868 | 1891 | ||
1869 | setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); | 1892 | setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); |
1893 | |||
1894 | if (percpu_pagelist_fraction) | ||
1895 | setup_pagelist_highmark(zone_pcp(zone, cpu), | ||
1896 | (zone->present_pages / percpu_pagelist_fraction)); | ||
1870 | } | 1897 | } |
1871 | 1898 | ||
1872 | return 0; | 1899 | return 0; |
@@ -1874,15 +1901,14 @@ bad: | |||
1874 | for_each_zone(dzone) { | 1901 | for_each_zone(dzone) { |
1875 | if (dzone == zone) | 1902 | if (dzone == zone) |
1876 | break; | 1903 | break; |
1877 | kfree(dzone->pageset[cpu]); | 1904 | kfree(zone_pcp(dzone, cpu)); |
1878 | dzone->pageset[cpu] = NULL; | 1905 | zone_pcp(dzone, cpu) = NULL; |
1879 | } | 1906 | } |
1880 | return -ENOMEM; | 1907 | return -ENOMEM; |
1881 | } | 1908 | } |
1882 | 1909 | ||
1883 | static inline void free_zone_pagesets(int cpu) | 1910 | static inline void free_zone_pagesets(int cpu) |
1884 | { | 1911 | { |
1885 | #ifdef CONFIG_NUMA | ||
1886 | struct zone *zone; | 1912 | struct zone *zone; |
1887 | 1913 | ||
1888 | for_each_zone(zone) { | 1914 | for_each_zone(zone) { |
@@ -1891,7 +1917,6 @@ static inline void free_zone_pagesets(int cpu) | |||
1891 | zone_pcp(zone, cpu) = NULL; | 1917 | zone_pcp(zone, cpu) = NULL; |
1892 | kfree(pset); | 1918 | kfree(pset); |
1893 | } | 1919 | } |
1894 | #endif | ||
1895 | } | 1920 | } |
1896 | 1921 | ||
1897 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, | 1922 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, |
@@ -1962,7 +1987,7 @@ static __devinit void zone_pcp_init(struct zone *zone) | |||
1962 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1987 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
1963 | #ifdef CONFIG_NUMA | 1988 | #ifdef CONFIG_NUMA |
1964 | /* Early boot. Slab allocator not functional yet */ | 1989 | /* Early boot. Slab allocator not functional yet */ |
1965 | zone->pageset[cpu] = &boot_pageset[cpu]; | 1990 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; |
1966 | setup_pageset(&boot_pageset[cpu],0); | 1991 | setup_pageset(&boot_pageset[cpu],0); |
1967 | #else | 1992 | #else |
1968 | setup_pageset(zone_pcp(zone,cpu), batch); | 1993 | setup_pageset(zone_pcp(zone,cpu), batch); |
@@ -2205,7 +2230,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2205 | seq_printf(m, | 2230 | seq_printf(m, |
2206 | ")" | 2231 | ")" |
2207 | "\n pagesets"); | 2232 | "\n pagesets"); |
2208 | for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { | 2233 | for_each_online_cpu(i) { |
2209 | struct per_cpu_pageset *pageset; | 2234 | struct per_cpu_pageset *pageset; |
2210 | int j; | 2235 | int j; |
2211 | 2236 | ||
@@ -2568,6 +2593,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
2568 | return 0; | 2593 | return 0; |
2569 | } | 2594 | } |
2570 | 2595 | ||
2596 | /* | ||
2597 | * percpu_pagelist_fraction - changes the pcp->high for each zone on each | ||
2598 | * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist | ||
2599 | * can have before it gets flushed back to buddy allocator. | ||
2600 | */ | ||
2601 | |||
2602 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | ||
2603 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2604 | { | ||
2605 | struct zone *zone; | ||
2606 | unsigned int cpu; | ||
2607 | int ret; | ||
2608 | |||
2609 | ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2610 | if (!write || (ret == -EINVAL)) | ||
2611 | return ret; | ||
2612 | for_each_zone(zone) { | ||
2613 | for_each_online_cpu(cpu) { | ||
2614 | unsigned long high; | ||
2615 | high = zone->present_pages / percpu_pagelist_fraction; | ||
2616 | setup_pagelist_highmark(zone_pcp(zone, cpu), high); | ||
2617 | } | ||
2618 | } | ||
2619 | return 0; | ||
2620 | } | ||
2621 | |||
2571 | __initdata int hashdist = HASHDIST_DEFAULT; | 2622 | __initdata int hashdist = HASHDIST_DEFAULT; |
2572 | 2623 | ||
2573 | #ifdef CONFIG_NUMA | 2624 | #ifdef CONFIG_NUMA |
diff --git a/mm/pdflush.c b/mm/pdflush.c index 52822c98c489..c4b6d0afd736 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c | |||
@@ -90,7 +90,7 @@ struct pdflush_work { | |||
90 | 90 | ||
91 | static int __pdflush(struct pdflush_work *my_work) | 91 | static int __pdflush(struct pdflush_work *my_work) |
92 | { | 92 | { |
93 | current->flags |= PF_FLUSHER; | 93 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; |
94 | my_work->fn = NULL; | 94 | my_work->fn = NULL; |
95 | my_work->who = current; | 95 | my_work->who = current; |
96 | INIT_LIST_HEAD(&my_work->list); | 96 | INIT_LIST_HEAD(&my_work->list); |
@@ -514,6 +514,13 @@ void page_add_file_rmap(struct page *page) | |||
514 | void page_remove_rmap(struct page *page) | 514 | void page_remove_rmap(struct page *page) |
515 | { | 515 | { |
516 | if (atomic_add_negative(-1, &page->_mapcount)) { | 516 | if (atomic_add_negative(-1, &page->_mapcount)) { |
517 | if (page_mapcount(page) < 0) { | ||
518 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); | ||
519 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); | ||
520 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); | ||
521 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); | ||
522 | } | ||
523 | |||
517 | BUG_ON(page_mapcount(page) < 0); | 524 | BUG_ON(page_mapcount(page) < 0); |
518 | /* | 525 | /* |
519 | * It would be tidy to reset the PageAnon mapping here, | 526 | * It would be tidy to reset the PageAnon mapping here, |
@@ -130,7 +130,6 @@ | |||
130 | #define FORCED_DEBUG 0 | 130 | #define FORCED_DEBUG 0 |
131 | #endif | 131 | #endif |
132 | 132 | ||
133 | |||
134 | /* Shouldn't this be in a header file somewhere? */ | 133 | /* Shouldn't this be in a header file somewhere? */ |
135 | #define BYTES_PER_WORD sizeof(void *) | 134 | #define BYTES_PER_WORD sizeof(void *) |
136 | 135 | ||
@@ -217,12 +216,12 @@ static unsigned long offslab_limit; | |||
217 | * Slabs are chained into three list: fully used, partial, fully free slabs. | 216 | * Slabs are chained into three list: fully used, partial, fully free slabs. |
218 | */ | 217 | */ |
219 | struct slab { | 218 | struct slab { |
220 | struct list_head list; | 219 | struct list_head list; |
221 | unsigned long colouroff; | 220 | unsigned long colouroff; |
222 | void *s_mem; /* including colour offset */ | 221 | void *s_mem; /* including colour offset */ |
223 | unsigned int inuse; /* num of objs active in slab */ | 222 | unsigned int inuse; /* num of objs active in slab */ |
224 | kmem_bufctl_t free; | 223 | kmem_bufctl_t free; |
225 | unsigned short nodeid; | 224 | unsigned short nodeid; |
226 | }; | 225 | }; |
227 | 226 | ||
228 | /* | 227 | /* |
@@ -242,9 +241,9 @@ struct slab { | |||
242 | * We assume struct slab_rcu can overlay struct slab when destroying. | 241 | * We assume struct slab_rcu can overlay struct slab when destroying. |
243 | */ | 242 | */ |
244 | struct slab_rcu { | 243 | struct slab_rcu { |
245 | struct rcu_head head; | 244 | struct rcu_head head; |
246 | kmem_cache_t *cachep; | 245 | kmem_cache_t *cachep; |
247 | void *addr; | 246 | void *addr; |
248 | }; | 247 | }; |
249 | 248 | ||
250 | /* | 249 | /* |
@@ -279,23 +278,23 @@ struct array_cache { | |||
279 | #define BOOT_CPUCACHE_ENTRIES 1 | 278 | #define BOOT_CPUCACHE_ENTRIES 1 |
280 | struct arraycache_init { | 279 | struct arraycache_init { |
281 | struct array_cache cache; | 280 | struct array_cache cache; |
282 | void * entries[BOOT_CPUCACHE_ENTRIES]; | 281 | void *entries[BOOT_CPUCACHE_ENTRIES]; |
283 | }; | 282 | }; |
284 | 283 | ||
285 | /* | 284 | /* |
286 | * The slab lists for all objects. | 285 | * The slab lists for all objects. |
287 | */ | 286 | */ |
288 | struct kmem_list3 { | 287 | struct kmem_list3 { |
289 | struct list_head slabs_partial; /* partial list first, better asm code */ | 288 | struct list_head slabs_partial; /* partial list first, better asm code */ |
290 | struct list_head slabs_full; | 289 | struct list_head slabs_full; |
291 | struct list_head slabs_free; | 290 | struct list_head slabs_free; |
292 | unsigned long free_objects; | 291 | unsigned long free_objects; |
293 | unsigned long next_reap; | 292 | unsigned long next_reap; |
294 | int free_touched; | 293 | int free_touched; |
295 | unsigned int free_limit; | 294 | unsigned int free_limit; |
296 | spinlock_t list_lock; | 295 | spinlock_t list_lock; |
297 | struct array_cache *shared; /* shared per node */ | 296 | struct array_cache *shared; /* shared per node */ |
298 | struct array_cache **alien; /* on other nodes */ | 297 | struct array_cache **alien; /* on other nodes */ |
299 | }; | 298 | }; |
300 | 299 | ||
301 | /* | 300 | /* |
@@ -367,63 +366,63 @@ static inline void kmem_list3_init(struct kmem_list3 *parent) | |||
367 | * | 366 | * |
368 | * manages a cache. | 367 | * manages a cache. |
369 | */ | 368 | */ |
370 | 369 | ||
371 | struct kmem_cache { | 370 | struct kmem_cache { |
372 | /* 1) per-cpu data, touched during every alloc/free */ | 371 | /* 1) per-cpu data, touched during every alloc/free */ |
373 | struct array_cache *array[NR_CPUS]; | 372 | struct array_cache *array[NR_CPUS]; |
374 | unsigned int batchcount; | 373 | unsigned int batchcount; |
375 | unsigned int limit; | 374 | unsigned int limit; |
376 | unsigned int shared; | 375 | unsigned int shared; |
377 | unsigned int objsize; | 376 | unsigned int objsize; |
378 | /* 2) touched by every alloc & free from the backend */ | 377 | /* 2) touched by every alloc & free from the backend */ |
379 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 378 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
380 | unsigned int flags; /* constant flags */ | 379 | unsigned int flags; /* constant flags */ |
381 | unsigned int num; /* # of objs per slab */ | 380 | unsigned int num; /* # of objs per slab */ |
382 | spinlock_t spinlock; | 381 | spinlock_t spinlock; |
383 | 382 | ||
384 | /* 3) cache_grow/shrink */ | 383 | /* 3) cache_grow/shrink */ |
385 | /* order of pgs per slab (2^n) */ | 384 | /* order of pgs per slab (2^n) */ |
386 | unsigned int gfporder; | 385 | unsigned int gfporder; |
387 | 386 | ||
388 | /* force GFP flags, e.g. GFP_DMA */ | 387 | /* force GFP flags, e.g. GFP_DMA */ |
389 | gfp_t gfpflags; | 388 | gfp_t gfpflags; |
390 | 389 | ||
391 | size_t colour; /* cache colouring range */ | 390 | size_t colour; /* cache colouring range */ |
392 | unsigned int colour_off; /* colour offset */ | 391 | unsigned int colour_off; /* colour offset */ |
393 | unsigned int colour_next; /* cache colouring */ | 392 | unsigned int colour_next; /* cache colouring */ |
394 | kmem_cache_t *slabp_cache; | 393 | kmem_cache_t *slabp_cache; |
395 | unsigned int slab_size; | 394 | unsigned int slab_size; |
396 | unsigned int dflags; /* dynamic flags */ | 395 | unsigned int dflags; /* dynamic flags */ |
397 | 396 | ||
398 | /* constructor func */ | 397 | /* constructor func */ |
399 | void (*ctor)(void *, kmem_cache_t *, unsigned long); | 398 | void (*ctor) (void *, kmem_cache_t *, unsigned long); |
400 | 399 | ||
401 | /* de-constructor func */ | 400 | /* de-constructor func */ |
402 | void (*dtor)(void *, kmem_cache_t *, unsigned long); | 401 | void (*dtor) (void *, kmem_cache_t *, unsigned long); |
403 | 402 | ||
404 | /* 4) cache creation/removal */ | 403 | /* 4) cache creation/removal */ |
405 | const char *name; | 404 | const char *name; |
406 | struct list_head next; | 405 | struct list_head next; |
407 | 406 | ||
408 | /* 5) statistics */ | 407 | /* 5) statistics */ |
409 | #if STATS | 408 | #if STATS |
410 | unsigned long num_active; | 409 | unsigned long num_active; |
411 | unsigned long num_allocations; | 410 | unsigned long num_allocations; |
412 | unsigned long high_mark; | 411 | unsigned long high_mark; |
413 | unsigned long grown; | 412 | unsigned long grown; |
414 | unsigned long reaped; | 413 | unsigned long reaped; |
415 | unsigned long errors; | 414 | unsigned long errors; |
416 | unsigned long max_freeable; | 415 | unsigned long max_freeable; |
417 | unsigned long node_allocs; | 416 | unsigned long node_allocs; |
418 | unsigned long node_frees; | 417 | unsigned long node_frees; |
419 | atomic_t allochit; | 418 | atomic_t allochit; |
420 | atomic_t allocmiss; | 419 | atomic_t allocmiss; |
421 | atomic_t freehit; | 420 | atomic_t freehit; |
422 | atomic_t freemiss; | 421 | atomic_t freemiss; |
423 | #endif | 422 | #endif |
424 | #if DEBUG | 423 | #if DEBUG |
425 | int dbghead; | 424 | int dbghead; |
426 | int reallen; | 425 | int reallen; |
427 | #endif | 426 | #endif |
428 | }; | 427 | }; |
429 | 428 | ||
@@ -523,14 +522,15 @@ static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp) | |||
523 | { | 522 | { |
524 | BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); | 523 | BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); |
525 | if (cachep->flags & SLAB_STORE_USER) | 524 | if (cachep->flags & SLAB_STORE_USER) |
526 | return (unsigned long*) (objp+cachep->objsize-2*BYTES_PER_WORD); | 525 | return (unsigned long *)(objp + cachep->objsize - |
527 | return (unsigned long*) (objp+cachep->objsize-BYTES_PER_WORD); | 526 | 2 * BYTES_PER_WORD); |
527 | return (unsigned long *)(objp + cachep->objsize - BYTES_PER_WORD); | ||
528 | } | 528 | } |
529 | 529 | ||
530 | static void **dbg_userword(kmem_cache_t *cachep, void *objp) | 530 | static void **dbg_userword(kmem_cache_t *cachep, void *objp) |
531 | { | 531 | { |
532 | BUG_ON(!(cachep->flags & SLAB_STORE_USER)); | 532 | BUG_ON(!(cachep->flags & SLAB_STORE_USER)); |
533 | return (void**)(objp+cachep->objsize-BYTES_PER_WORD); | 533 | return (void **)(objp + cachep->objsize - BYTES_PER_WORD); |
534 | } | 534 | } |
535 | 535 | ||
536 | #else | 536 | #else |
@@ -607,31 +607,31 @@ struct cache_names { | |||
607 | static struct cache_names __initdata cache_names[] = { | 607 | static struct cache_names __initdata cache_names[] = { |
608 | #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, | 608 | #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, |
609 | #include <linux/kmalloc_sizes.h> | 609 | #include <linux/kmalloc_sizes.h> |
610 | { NULL, } | 610 | {NULL,} |
611 | #undef CACHE | 611 | #undef CACHE |
612 | }; | 612 | }; |
613 | 613 | ||
614 | static struct arraycache_init initarray_cache __initdata = | 614 | static struct arraycache_init initarray_cache __initdata = |
615 | { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; | 615 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; |
616 | static struct arraycache_init initarray_generic = | 616 | static struct arraycache_init initarray_generic = |
617 | { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; | 617 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; |
618 | 618 | ||
619 | /* internal cache of cache description objs */ | 619 | /* internal cache of cache description objs */ |
620 | static kmem_cache_t cache_cache = { | 620 | static kmem_cache_t cache_cache = { |
621 | .batchcount = 1, | 621 | .batchcount = 1, |
622 | .limit = BOOT_CPUCACHE_ENTRIES, | 622 | .limit = BOOT_CPUCACHE_ENTRIES, |
623 | .shared = 1, | 623 | .shared = 1, |
624 | .objsize = sizeof(kmem_cache_t), | 624 | .objsize = sizeof(kmem_cache_t), |
625 | .flags = SLAB_NO_REAP, | 625 | .flags = SLAB_NO_REAP, |
626 | .spinlock = SPIN_LOCK_UNLOCKED, | 626 | .spinlock = SPIN_LOCK_UNLOCKED, |
627 | .name = "kmem_cache", | 627 | .name = "kmem_cache", |
628 | #if DEBUG | 628 | #if DEBUG |
629 | .reallen = sizeof(kmem_cache_t), | 629 | .reallen = sizeof(kmem_cache_t), |
630 | #endif | 630 | #endif |
631 | }; | 631 | }; |
632 | 632 | ||
633 | /* Guard access to the cache-chain. */ | 633 | /* Guard access to the cache-chain. */ |
634 | static struct semaphore cache_chain_sem; | 634 | static struct semaphore cache_chain_sem; |
635 | static struct list_head cache_chain; | 635 | static struct list_head cache_chain; |
636 | 636 | ||
637 | /* | 637 | /* |
@@ -655,9 +655,9 @@ static enum { | |||
655 | 655 | ||
656 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 656 | static DEFINE_PER_CPU(struct work_struct, reap_work); |
657 | 657 | ||
658 | static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); | 658 | static void free_block(kmem_cache_t *cachep, void **objpp, int len, int node); |
659 | static void enable_cpucache (kmem_cache_t *cachep); | 659 | static void enable_cpucache(kmem_cache_t *cachep); |
660 | static void cache_reap (void *unused); | 660 | static void cache_reap(void *unused); |
661 | static int __node_shrink(kmem_cache_t *cachep, int node); | 661 | static int __node_shrink(kmem_cache_t *cachep, int node); |
662 | 662 | ||
663 | static inline struct array_cache *ac_data(kmem_cache_t *cachep) | 663 | static inline struct array_cache *ac_data(kmem_cache_t *cachep) |
@@ -671,9 +671,9 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags) | |||
671 | 671 | ||
672 | #if DEBUG | 672 | #if DEBUG |
673 | /* This happens if someone tries to call | 673 | /* This happens if someone tries to call |
674 | * kmem_cache_create(), or __kmalloc(), before | 674 | * kmem_cache_create(), or __kmalloc(), before |
675 | * the generic caches are initialized. | 675 | * the generic caches are initialized. |
676 | */ | 676 | */ |
677 | BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); | 677 | BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); |
678 | #endif | 678 | #endif |
679 | while (size > csizep->cs_size) | 679 | while (size > csizep->cs_size) |
@@ -697,10 +697,10 @@ EXPORT_SYMBOL(kmem_find_general_cachep); | |||
697 | 697 | ||
698 | /* Cal the num objs, wastage, and bytes left over for a given slab size. */ | 698 | /* Cal the num objs, wastage, and bytes left over for a given slab size. */ |
699 | static void cache_estimate(unsigned long gfporder, size_t size, size_t align, | 699 | static void cache_estimate(unsigned long gfporder, size_t size, size_t align, |
700 | int flags, size_t *left_over, unsigned int *num) | 700 | int flags, size_t *left_over, unsigned int *num) |
701 | { | 701 | { |
702 | int i; | 702 | int i; |
703 | size_t wastage = PAGE_SIZE<<gfporder; | 703 | size_t wastage = PAGE_SIZE << gfporder; |
704 | size_t extra = 0; | 704 | size_t extra = 0; |
705 | size_t base = 0; | 705 | size_t base = 0; |
706 | 706 | ||
@@ -709,7 +709,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, | |||
709 | extra = sizeof(kmem_bufctl_t); | 709 | extra = sizeof(kmem_bufctl_t); |
710 | } | 710 | } |
711 | i = 0; | 711 | i = 0; |
712 | while (i*size + ALIGN(base+i*extra, align) <= wastage) | 712 | while (i * size + ALIGN(base + i * extra, align) <= wastage) |
713 | i++; | 713 | i++; |
714 | if (i > 0) | 714 | if (i > 0) |
715 | i--; | 715 | i--; |
@@ -718,8 +718,8 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, | |||
718 | i = SLAB_LIMIT; | 718 | i = SLAB_LIMIT; |
719 | 719 | ||
720 | *num = i; | 720 | *num = i; |
721 | wastage -= i*size; | 721 | wastage -= i * size; |
722 | wastage -= ALIGN(base+i*extra, align); | 722 | wastage -= ALIGN(base + i * extra, align); |
723 | *left_over = wastage; | 723 | *left_over = wastage; |
724 | } | 724 | } |
725 | 725 | ||
@@ -728,7 +728,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, | |||
728 | static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg) | 728 | static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg) |
729 | { | 729 | { |
730 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", | 730 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", |
731 | function, cachep->name, msg); | 731 | function, cachep->name, msg); |
732 | dump_stack(); | 732 | dump_stack(); |
733 | } | 733 | } |
734 | 734 | ||
@@ -755,9 +755,9 @@ static void __devinit start_cpu_timer(int cpu) | |||
755 | } | 755 | } |
756 | 756 | ||
757 | static struct array_cache *alloc_arraycache(int node, int entries, | 757 | static struct array_cache *alloc_arraycache(int node, int entries, |
758 | int batchcount) | 758 | int batchcount) |
759 | { | 759 | { |
760 | int memsize = sizeof(void*)*entries+sizeof(struct array_cache); | 760 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); |
761 | struct array_cache *nc = NULL; | 761 | struct array_cache *nc = NULL; |
762 | 762 | ||
763 | nc = kmalloc_node(memsize, GFP_KERNEL, node); | 763 | nc = kmalloc_node(memsize, GFP_KERNEL, node); |
@@ -775,7 +775,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
775 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 775 | static inline struct array_cache **alloc_alien_cache(int node, int limit) |
776 | { | 776 | { |
777 | struct array_cache **ac_ptr; | 777 | struct array_cache **ac_ptr; |
778 | int memsize = sizeof(void*)*MAX_NUMNODES; | 778 | int memsize = sizeof(void *) * MAX_NUMNODES; |
779 | int i; | 779 | int i; |
780 | 780 | ||
781 | if (limit > 1) | 781 | if (limit > 1) |
@@ -789,7 +789,7 @@ static inline struct array_cache **alloc_alien_cache(int node, int limit) | |||
789 | } | 789 | } |
790 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); | 790 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); |
791 | if (!ac_ptr[i]) { | 791 | if (!ac_ptr[i]) { |
792 | for (i--; i <=0; i--) | 792 | for (i--; i <= 0; i--) |
793 | kfree(ac_ptr[i]); | 793 | kfree(ac_ptr[i]); |
794 | kfree(ac_ptr); | 794 | kfree(ac_ptr); |
795 | return NULL; | 795 | return NULL; |
@@ -807,12 +807,13 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) | |||
807 | return; | 807 | return; |
808 | 808 | ||
809 | for_each_node(i) | 809 | for_each_node(i) |
810 | kfree(ac_ptr[i]); | 810 | kfree(ac_ptr[i]); |
811 | 811 | ||
812 | kfree(ac_ptr); | 812 | kfree(ac_ptr); |
813 | } | 813 | } |
814 | 814 | ||
815 | static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache *ac, int node) | 815 | static inline void __drain_alien_cache(kmem_cache_t *cachep, |
816 | struct array_cache *ac, int node) | ||
816 | { | 817 | { |
817 | struct kmem_list3 *rl3 = cachep->nodelists[node]; | 818 | struct kmem_list3 *rl3 = cachep->nodelists[node]; |
818 | 819 | ||
@@ -826,7 +827,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache | |||
826 | 827 | ||
827 | static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) | 828 | static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) |
828 | { | 829 | { |
829 | int i=0; | 830 | int i = 0; |
830 | struct array_cache *ac; | 831 | struct array_cache *ac; |
831 | unsigned long flags; | 832 | unsigned long flags; |
832 | 833 | ||
@@ -846,14 +847,13 @@ static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) | |||
846 | #endif | 847 | #endif |
847 | 848 | ||
848 | static int __devinit cpuup_callback(struct notifier_block *nfb, | 849 | static int __devinit cpuup_callback(struct notifier_block *nfb, |
849 | unsigned long action, void *hcpu) | 850 | unsigned long action, void *hcpu) |
850 | { | 851 | { |
851 | long cpu = (long)hcpu; | 852 | long cpu = (long)hcpu; |
852 | kmem_cache_t* cachep; | 853 | kmem_cache_t *cachep; |
853 | struct kmem_list3 *l3 = NULL; | 854 | struct kmem_list3 *l3 = NULL; |
854 | int node = cpu_to_node(cpu); | 855 | int node = cpu_to_node(cpu); |
855 | int memsize = sizeof(struct kmem_list3); | 856 | int memsize = sizeof(struct kmem_list3); |
856 | struct array_cache *nc = NULL; | ||
857 | 857 | ||
858 | switch (action) { | 858 | switch (action) { |
859 | case CPU_UP_PREPARE: | 859 | case CPU_UP_PREPARE: |
@@ -871,27 +871,29 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
871 | */ | 871 | */ |
872 | if (!cachep->nodelists[node]) { | 872 | if (!cachep->nodelists[node]) { |
873 | if (!(l3 = kmalloc_node(memsize, | 873 | if (!(l3 = kmalloc_node(memsize, |
874 | GFP_KERNEL, node))) | 874 | GFP_KERNEL, node))) |
875 | goto bad; | 875 | goto bad; |
876 | kmem_list3_init(l3); | 876 | kmem_list3_init(l3); |
877 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 877 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
878 | ((unsigned long)cachep)%REAPTIMEOUT_LIST3; | 878 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
879 | 879 | ||
880 | cachep->nodelists[node] = l3; | 880 | cachep->nodelists[node] = l3; |
881 | } | 881 | } |
882 | 882 | ||
883 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | 883 | spin_lock_irq(&cachep->nodelists[node]->list_lock); |
884 | cachep->nodelists[node]->free_limit = | 884 | cachep->nodelists[node]->free_limit = |
885 | (1 + nr_cpus_node(node)) * | 885 | (1 + nr_cpus_node(node)) * |
886 | cachep->batchcount + cachep->num; | 886 | cachep->batchcount + cachep->num; |
887 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | 887 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); |
888 | } | 888 | } |
889 | 889 | ||
890 | /* Now we can go ahead with allocating the shared array's | 890 | /* Now we can go ahead with allocating the shared array's |
891 | & array cache's */ | 891 | & array cache's */ |
892 | list_for_each_entry(cachep, &cache_chain, next) { | 892 | list_for_each_entry(cachep, &cache_chain, next) { |
893 | struct array_cache *nc; | ||
894 | |||
893 | nc = alloc_arraycache(node, cachep->limit, | 895 | nc = alloc_arraycache(node, cachep->limit, |
894 | cachep->batchcount); | 896 | cachep->batchcount); |
895 | if (!nc) | 897 | if (!nc) |
896 | goto bad; | 898 | goto bad; |
897 | cachep->array[cpu] = nc; | 899 | cachep->array[cpu] = nc; |
@@ -900,12 +902,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
900 | BUG_ON(!l3); | 902 | BUG_ON(!l3); |
901 | if (!l3->shared) { | 903 | if (!l3->shared) { |
902 | if (!(nc = alloc_arraycache(node, | 904 | if (!(nc = alloc_arraycache(node, |
903 | cachep->shared*cachep->batchcount, | 905 | cachep->shared * |
904 | 0xbaadf00d))) | 906 | cachep->batchcount, |
905 | goto bad; | 907 | 0xbaadf00d))) |
908 | goto bad; | ||
906 | 909 | ||
907 | /* we are serialised from CPU_DEAD or | 910 | /* we are serialised from CPU_DEAD or |
908 | CPU_UP_CANCELLED by the cpucontrol lock */ | 911 | CPU_UP_CANCELLED by the cpucontrol lock */ |
909 | l3->shared = nc; | 912 | l3->shared = nc; |
910 | } | 913 | } |
911 | } | 914 | } |
@@ -942,13 +945,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
942 | free_block(cachep, nc->entry, nc->avail, node); | 945 | free_block(cachep, nc->entry, nc->avail, node); |
943 | 946 | ||
944 | if (!cpus_empty(mask)) { | 947 | if (!cpus_empty(mask)) { |
945 | spin_unlock(&l3->list_lock); | 948 | spin_unlock(&l3->list_lock); |
946 | goto unlock_cache; | 949 | goto unlock_cache; |
947 | } | 950 | } |
948 | 951 | ||
949 | if (l3->shared) { | 952 | if (l3->shared) { |
950 | free_block(cachep, l3->shared->entry, | 953 | free_block(cachep, l3->shared->entry, |
951 | l3->shared->avail, node); | 954 | l3->shared->avail, node); |
952 | kfree(l3->shared); | 955 | kfree(l3->shared); |
953 | l3->shared = NULL; | 956 | l3->shared = NULL; |
954 | } | 957 | } |
@@ -966,7 +969,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
966 | } else { | 969 | } else { |
967 | spin_unlock(&l3->list_lock); | 970 | spin_unlock(&l3->list_lock); |
968 | } | 971 | } |
969 | unlock_cache: | 972 | unlock_cache: |
970 | spin_unlock_irq(&cachep->spinlock); | 973 | spin_unlock_irq(&cachep->spinlock); |
971 | kfree(nc); | 974 | kfree(nc); |
972 | } | 975 | } |
@@ -975,7 +978,7 @@ unlock_cache: | |||
975 | #endif | 978 | #endif |
976 | } | 979 | } |
977 | return NOTIFY_OK; | 980 | return NOTIFY_OK; |
978 | bad: | 981 | bad: |
979 | up(&cache_chain_sem); | 982 | up(&cache_chain_sem); |
980 | return NOTIFY_BAD; | 983 | return NOTIFY_BAD; |
981 | } | 984 | } |
@@ -985,8 +988,7 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; | |||
985 | /* | 988 | /* |
986 | * swap the static kmem_list3 with kmalloced memory | 989 | * swap the static kmem_list3 with kmalloced memory |
987 | */ | 990 | */ |
988 | static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, | 991 | static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, int nodeid) |
989 | int nodeid) | ||
990 | { | 992 | { |
991 | struct kmem_list3 *ptr; | 993 | struct kmem_list3 *ptr; |
992 | 994 | ||
@@ -1055,14 +1057,14 @@ void __init kmem_cache_init(void) | |||
1055 | cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size()); | 1057 | cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size()); |
1056 | 1058 | ||
1057 | cache_estimate(0, cache_cache.objsize, cache_line_size(), 0, | 1059 | cache_estimate(0, cache_cache.objsize, cache_line_size(), 0, |
1058 | &left_over, &cache_cache.num); | 1060 | &left_over, &cache_cache.num); |
1059 | if (!cache_cache.num) | 1061 | if (!cache_cache.num) |
1060 | BUG(); | 1062 | BUG(); |
1061 | 1063 | ||
1062 | cache_cache.colour = left_over/cache_cache.colour_off; | 1064 | cache_cache.colour = left_over / cache_cache.colour_off; |
1063 | cache_cache.colour_next = 0; | 1065 | cache_cache.colour_next = 0; |
1064 | cache_cache.slab_size = ALIGN(cache_cache.num*sizeof(kmem_bufctl_t) + | 1066 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + |
1065 | sizeof(struct slab), cache_line_size()); | 1067 | sizeof(struct slab), cache_line_size()); |
1066 | 1068 | ||
1067 | /* 2+3) create the kmalloc caches */ | 1069 | /* 2+3) create the kmalloc caches */ |
1068 | sizes = malloc_sizes; | 1070 | sizes = malloc_sizes; |
@@ -1074,14 +1076,18 @@ void __init kmem_cache_init(void) | |||
1074 | */ | 1076 | */ |
1075 | 1077 | ||
1076 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, | 1078 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, |
1077 | sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN, | 1079 | sizes[INDEX_AC].cs_size, |
1078 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); | 1080 | ARCH_KMALLOC_MINALIGN, |
1081 | (ARCH_KMALLOC_FLAGS | | ||
1082 | SLAB_PANIC), NULL, NULL); | ||
1079 | 1083 | ||
1080 | if (INDEX_AC != INDEX_L3) | 1084 | if (INDEX_AC != INDEX_L3) |
1081 | sizes[INDEX_L3].cs_cachep = | 1085 | sizes[INDEX_L3].cs_cachep = |
1082 | kmem_cache_create(names[INDEX_L3].name, | 1086 | kmem_cache_create(names[INDEX_L3].name, |
1083 | sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN, | 1087 | sizes[INDEX_L3].cs_size, |
1084 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); | 1088 | ARCH_KMALLOC_MINALIGN, |
1089 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, | ||
1090 | NULL); | ||
1085 | 1091 | ||
1086 | while (sizes->cs_size != ULONG_MAX) { | 1092 | while (sizes->cs_size != ULONG_MAX) { |
1087 | /* | 1093 | /* |
@@ -1091,35 +1097,41 @@ void __init kmem_cache_init(void) | |||
1091 | * Note for systems short on memory removing the alignment will | 1097 | * Note for systems short on memory removing the alignment will |
1092 | * allow tighter packing of the smaller caches. | 1098 | * allow tighter packing of the smaller caches. |
1093 | */ | 1099 | */ |
1094 | if(!sizes->cs_cachep) | 1100 | if (!sizes->cs_cachep) |
1095 | sizes->cs_cachep = kmem_cache_create(names->name, | 1101 | sizes->cs_cachep = kmem_cache_create(names->name, |
1096 | sizes->cs_size, ARCH_KMALLOC_MINALIGN, | 1102 | sizes->cs_size, |
1097 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); | 1103 | ARCH_KMALLOC_MINALIGN, |
1104 | (ARCH_KMALLOC_FLAGS | ||
1105 | | SLAB_PANIC), | ||
1106 | NULL, NULL); | ||
1098 | 1107 | ||
1099 | /* Inc off-slab bufctl limit until the ceiling is hit. */ | 1108 | /* Inc off-slab bufctl limit until the ceiling is hit. */ |
1100 | if (!(OFF_SLAB(sizes->cs_cachep))) { | 1109 | if (!(OFF_SLAB(sizes->cs_cachep))) { |
1101 | offslab_limit = sizes->cs_size-sizeof(struct slab); | 1110 | offslab_limit = sizes->cs_size - sizeof(struct slab); |
1102 | offslab_limit /= sizeof(kmem_bufctl_t); | 1111 | offslab_limit /= sizeof(kmem_bufctl_t); |
1103 | } | 1112 | } |
1104 | 1113 | ||
1105 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1114 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
1106 | sizes->cs_size, ARCH_KMALLOC_MINALIGN, | 1115 | sizes->cs_size, |
1107 | (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC), | 1116 | ARCH_KMALLOC_MINALIGN, |
1108 | NULL, NULL); | 1117 | (ARCH_KMALLOC_FLAGS | |
1118 | SLAB_CACHE_DMA | | ||
1119 | SLAB_PANIC), NULL, | ||
1120 | NULL); | ||
1109 | 1121 | ||
1110 | sizes++; | 1122 | sizes++; |
1111 | names++; | 1123 | names++; |
1112 | } | 1124 | } |
1113 | /* 4) Replace the bootstrap head arrays */ | 1125 | /* 4) Replace the bootstrap head arrays */ |
1114 | { | 1126 | { |
1115 | void * ptr; | 1127 | void *ptr; |
1116 | 1128 | ||
1117 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1129 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); |
1118 | 1130 | ||
1119 | local_irq_disable(); | 1131 | local_irq_disable(); |
1120 | BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache); | 1132 | BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache); |
1121 | memcpy(ptr, ac_data(&cache_cache), | 1133 | memcpy(ptr, ac_data(&cache_cache), |
1122 | sizeof(struct arraycache_init)); | 1134 | sizeof(struct arraycache_init)); |
1123 | cache_cache.array[smp_processor_id()] = ptr; | 1135 | cache_cache.array[smp_processor_id()] = ptr; |
1124 | local_irq_enable(); | 1136 | local_irq_enable(); |
1125 | 1137 | ||
@@ -1127,11 +1139,11 @@ void __init kmem_cache_init(void) | |||
1127 | 1139 | ||
1128 | local_irq_disable(); | 1140 | local_irq_disable(); |
1129 | BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep) | 1141 | BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep) |
1130 | != &initarray_generic.cache); | 1142 | != &initarray_generic.cache); |
1131 | memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep), | 1143 | memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep), |
1132 | sizeof(struct arraycache_init)); | 1144 | sizeof(struct arraycache_init)); |
1133 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = | 1145 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = |
1134 | ptr; | 1146 | ptr; |
1135 | local_irq_enable(); | 1147 | local_irq_enable(); |
1136 | } | 1148 | } |
1137 | /* 5) Replace the bootstrap kmem_list3's */ | 1149 | /* 5) Replace the bootstrap kmem_list3's */ |
@@ -1139,16 +1151,16 @@ void __init kmem_cache_init(void) | |||
1139 | int node; | 1151 | int node; |
1140 | /* Replace the static kmem_list3 structures for the boot cpu */ | 1152 | /* Replace the static kmem_list3 structures for the boot cpu */ |
1141 | init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], | 1153 | init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], |
1142 | numa_node_id()); | 1154 | numa_node_id()); |
1143 | 1155 | ||
1144 | for_each_online_node(node) { | 1156 | for_each_online_node(node) { |
1145 | init_list(malloc_sizes[INDEX_AC].cs_cachep, | 1157 | init_list(malloc_sizes[INDEX_AC].cs_cachep, |
1146 | &initkmem_list3[SIZE_AC+node], node); | 1158 | &initkmem_list3[SIZE_AC + node], node); |
1147 | 1159 | ||
1148 | if (INDEX_AC != INDEX_L3) { | 1160 | if (INDEX_AC != INDEX_L3) { |
1149 | init_list(malloc_sizes[INDEX_L3].cs_cachep, | 1161 | init_list(malloc_sizes[INDEX_L3].cs_cachep, |
1150 | &initkmem_list3[SIZE_L3+node], | 1162 | &initkmem_list3[SIZE_L3 + node], |
1151 | node); | 1163 | node); |
1152 | } | 1164 | } |
1153 | } | 1165 | } |
1154 | } | 1166 | } |
@@ -1158,7 +1170,7 @@ void __init kmem_cache_init(void) | |||
1158 | kmem_cache_t *cachep; | 1170 | kmem_cache_t *cachep; |
1159 | down(&cache_chain_sem); | 1171 | down(&cache_chain_sem); |
1160 | list_for_each_entry(cachep, &cache_chain, next) | 1172 | list_for_each_entry(cachep, &cache_chain, next) |
1161 | enable_cpucache(cachep); | 1173 | enable_cpucache(cachep); |
1162 | up(&cache_chain_sem); | 1174 | up(&cache_chain_sem); |
1163 | } | 1175 | } |
1164 | 1176 | ||
@@ -1184,7 +1196,7 @@ static int __init cpucache_init(void) | |||
1184 | * pages to gfp. | 1196 | * pages to gfp. |
1185 | */ | 1197 | */ |
1186 | for_each_online_cpu(cpu) | 1198 | for_each_online_cpu(cpu) |
1187 | start_cpu_timer(cpu); | 1199 | start_cpu_timer(cpu); |
1188 | 1200 | ||
1189 | return 0; | 1201 | return 0; |
1190 | } | 1202 | } |
@@ -1226,7 +1238,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) | |||
1226 | */ | 1238 | */ |
1227 | static void kmem_freepages(kmem_cache_t *cachep, void *addr) | 1239 | static void kmem_freepages(kmem_cache_t *cachep, void *addr) |
1228 | { | 1240 | { |
1229 | unsigned long i = (1<<cachep->gfporder); | 1241 | unsigned long i = (1 << cachep->gfporder); |
1230 | struct page *page = virt_to_page(addr); | 1242 | struct page *page = virt_to_page(addr); |
1231 | const unsigned long nr_freed = i; | 1243 | const unsigned long nr_freed = i; |
1232 | 1244 | ||
@@ -1239,13 +1251,13 @@ static void kmem_freepages(kmem_cache_t *cachep, void *addr) | |||
1239 | if (current->reclaim_state) | 1251 | if (current->reclaim_state) |
1240 | current->reclaim_state->reclaimed_slab += nr_freed; | 1252 | current->reclaim_state->reclaimed_slab += nr_freed; |
1241 | free_pages((unsigned long)addr, cachep->gfporder); | 1253 | free_pages((unsigned long)addr, cachep->gfporder); |
1242 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1254 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1243 | atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages); | 1255 | atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); |
1244 | } | 1256 | } |
1245 | 1257 | ||
1246 | static void kmem_rcu_free(struct rcu_head *head) | 1258 | static void kmem_rcu_free(struct rcu_head *head) |
1247 | { | 1259 | { |
1248 | struct slab_rcu *slab_rcu = (struct slab_rcu *) head; | 1260 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; |
1249 | kmem_cache_t *cachep = slab_rcu->cachep; | 1261 | kmem_cache_t *cachep = slab_rcu->cachep; |
1250 | 1262 | ||
1251 | kmem_freepages(cachep, slab_rcu->addr); | 1263 | kmem_freepages(cachep, slab_rcu->addr); |
@@ -1257,19 +1269,19 @@ static void kmem_rcu_free(struct rcu_head *head) | |||
1257 | 1269 | ||
1258 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1270 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1259 | static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, | 1271 | static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, |
1260 | unsigned long caller) | 1272 | unsigned long caller) |
1261 | { | 1273 | { |
1262 | int size = obj_reallen(cachep); | 1274 | int size = obj_reallen(cachep); |
1263 | 1275 | ||
1264 | addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; | 1276 | addr = (unsigned long *)&((char *)addr)[obj_dbghead(cachep)]; |
1265 | 1277 | ||
1266 | if (size < 5*sizeof(unsigned long)) | 1278 | if (size < 5 * sizeof(unsigned long)) |
1267 | return; | 1279 | return; |
1268 | 1280 | ||
1269 | *addr++=0x12345678; | 1281 | *addr++ = 0x12345678; |
1270 | *addr++=caller; | 1282 | *addr++ = caller; |
1271 | *addr++=smp_processor_id(); | 1283 | *addr++ = smp_processor_id(); |
1272 | size -= 3*sizeof(unsigned long); | 1284 | size -= 3 * sizeof(unsigned long); |
1273 | { | 1285 | { |
1274 | unsigned long *sptr = &caller; | 1286 | unsigned long *sptr = &caller; |
1275 | unsigned long svalue; | 1287 | unsigned long svalue; |
@@ -1277,7 +1289,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, | |||
1277 | while (!kstack_end(sptr)) { | 1289 | while (!kstack_end(sptr)) { |
1278 | svalue = *sptr++; | 1290 | svalue = *sptr++; |
1279 | if (kernel_text_address(svalue)) { | 1291 | if (kernel_text_address(svalue)) { |
1280 | *addr++=svalue; | 1292 | *addr++ = svalue; |
1281 | size -= sizeof(unsigned long); | 1293 | size -= sizeof(unsigned long); |
1282 | if (size <= sizeof(unsigned long)) | 1294 | if (size <= sizeof(unsigned long)) |
1283 | break; | 1295 | break; |
@@ -1285,25 +1297,25 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, | |||
1285 | } | 1297 | } |
1286 | 1298 | ||
1287 | } | 1299 | } |
1288 | *addr++=0x87654321; | 1300 | *addr++ = 0x87654321; |
1289 | } | 1301 | } |
1290 | #endif | 1302 | #endif |
1291 | 1303 | ||
1292 | static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) | 1304 | static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) |
1293 | { | 1305 | { |
1294 | int size = obj_reallen(cachep); | 1306 | int size = obj_reallen(cachep); |
1295 | addr = &((char*)addr)[obj_dbghead(cachep)]; | 1307 | addr = &((char *)addr)[obj_dbghead(cachep)]; |
1296 | 1308 | ||
1297 | memset(addr, val, size); | 1309 | memset(addr, val, size); |
1298 | *(unsigned char *)(addr+size-1) = POISON_END; | 1310 | *(unsigned char *)(addr + size - 1) = POISON_END; |
1299 | } | 1311 | } |
1300 | 1312 | ||
1301 | static void dump_line(char *data, int offset, int limit) | 1313 | static void dump_line(char *data, int offset, int limit) |
1302 | { | 1314 | { |
1303 | int i; | 1315 | int i; |
1304 | printk(KERN_ERR "%03x:", offset); | 1316 | printk(KERN_ERR "%03x:", offset); |
1305 | for (i=0;i<limit;i++) { | 1317 | for (i = 0; i < limit; i++) { |
1306 | printk(" %02x", (unsigned char)data[offset+i]); | 1318 | printk(" %02x", (unsigned char)data[offset + i]); |
1307 | } | 1319 | } |
1308 | printk("\n"); | 1320 | printk("\n"); |
1309 | } | 1321 | } |
@@ -1318,24 +1330,24 @@ static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines) | |||
1318 | 1330 | ||
1319 | if (cachep->flags & SLAB_RED_ZONE) { | 1331 | if (cachep->flags & SLAB_RED_ZONE) { |
1320 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", | 1332 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", |
1321 | *dbg_redzone1(cachep, objp), | 1333 | *dbg_redzone1(cachep, objp), |
1322 | *dbg_redzone2(cachep, objp)); | 1334 | *dbg_redzone2(cachep, objp)); |
1323 | } | 1335 | } |
1324 | 1336 | ||
1325 | if (cachep->flags & SLAB_STORE_USER) { | 1337 | if (cachep->flags & SLAB_STORE_USER) { |
1326 | printk(KERN_ERR "Last user: [<%p>]", | 1338 | printk(KERN_ERR "Last user: [<%p>]", |
1327 | *dbg_userword(cachep, objp)); | 1339 | *dbg_userword(cachep, objp)); |
1328 | print_symbol("(%s)", | 1340 | print_symbol("(%s)", |
1329 | (unsigned long)*dbg_userword(cachep, objp)); | 1341 | (unsigned long)*dbg_userword(cachep, objp)); |
1330 | printk("\n"); | 1342 | printk("\n"); |
1331 | } | 1343 | } |
1332 | realobj = (char*)objp+obj_dbghead(cachep); | 1344 | realobj = (char *)objp + obj_dbghead(cachep); |
1333 | size = obj_reallen(cachep); | 1345 | size = obj_reallen(cachep); |
1334 | for (i=0; i<size && lines;i+=16, lines--) { | 1346 | for (i = 0; i < size && lines; i += 16, lines--) { |
1335 | int limit; | 1347 | int limit; |
1336 | limit = 16; | 1348 | limit = 16; |
1337 | if (i+limit > size) | 1349 | if (i + limit > size) |
1338 | limit = size-i; | 1350 | limit = size - i; |
1339 | dump_line(realobj, i, limit); | 1351 | dump_line(realobj, i, limit); |
1340 | } | 1352 | } |
1341 | } | 1353 | } |
@@ -1346,27 +1358,28 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) | |||
1346 | int size, i; | 1358 | int size, i; |
1347 | int lines = 0; | 1359 | int lines = 0; |
1348 | 1360 | ||
1349 | realobj = (char*)objp+obj_dbghead(cachep); | 1361 | realobj = (char *)objp + obj_dbghead(cachep); |
1350 | size = obj_reallen(cachep); | 1362 | size = obj_reallen(cachep); |
1351 | 1363 | ||
1352 | for (i=0;i<size;i++) { | 1364 | for (i = 0; i < size; i++) { |
1353 | char exp = POISON_FREE; | 1365 | char exp = POISON_FREE; |
1354 | if (i == size-1) | 1366 | if (i == size - 1) |
1355 | exp = POISON_END; | 1367 | exp = POISON_END; |
1356 | if (realobj[i] != exp) { | 1368 | if (realobj[i] != exp) { |
1357 | int limit; | 1369 | int limit; |
1358 | /* Mismatch ! */ | 1370 | /* Mismatch ! */ |
1359 | /* Print header */ | 1371 | /* Print header */ |
1360 | if (lines == 0) { | 1372 | if (lines == 0) { |
1361 | printk(KERN_ERR "Slab corruption: start=%p, len=%d\n", | 1373 | printk(KERN_ERR |
1362 | realobj, size); | 1374 | "Slab corruption: start=%p, len=%d\n", |
1375 | realobj, size); | ||
1363 | print_objinfo(cachep, objp, 0); | 1376 | print_objinfo(cachep, objp, 0); |
1364 | } | 1377 | } |
1365 | /* Hexdump the affected line */ | 1378 | /* Hexdump the affected line */ |
1366 | i = (i/16)*16; | 1379 | i = (i / 16) * 16; |
1367 | limit = 16; | 1380 | limit = 16; |
1368 | if (i+limit > size) | 1381 | if (i + limit > size) |
1369 | limit = size-i; | 1382 | limit = size - i; |
1370 | dump_line(realobj, i, limit); | 1383 | dump_line(realobj, i, limit); |
1371 | i += 16; | 1384 | i += 16; |
1372 | lines++; | 1385 | lines++; |
@@ -1382,19 +1395,19 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) | |||
1382 | struct slab *slabp = page_get_slab(virt_to_page(objp)); | 1395 | struct slab *slabp = page_get_slab(virt_to_page(objp)); |
1383 | int objnr; | 1396 | int objnr; |
1384 | 1397 | ||
1385 | objnr = (objp-slabp->s_mem)/cachep->objsize; | 1398 | objnr = (objp - slabp->s_mem) / cachep->objsize; |
1386 | if (objnr) { | 1399 | if (objnr) { |
1387 | objp = slabp->s_mem+(objnr-1)*cachep->objsize; | 1400 | objp = slabp->s_mem + (objnr - 1) * cachep->objsize; |
1388 | realobj = (char*)objp+obj_dbghead(cachep); | 1401 | realobj = (char *)objp + obj_dbghead(cachep); |
1389 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1402 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
1390 | realobj, size); | 1403 | realobj, size); |
1391 | print_objinfo(cachep, objp, 2); | 1404 | print_objinfo(cachep, objp, 2); |
1392 | } | 1405 | } |
1393 | if (objnr+1 < cachep->num) { | 1406 | if (objnr + 1 < cachep->num) { |
1394 | objp = slabp->s_mem+(objnr+1)*cachep->objsize; | 1407 | objp = slabp->s_mem + (objnr + 1) * cachep->objsize; |
1395 | realobj = (char*)objp+obj_dbghead(cachep); | 1408 | realobj = (char *)objp + obj_dbghead(cachep); |
1396 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1409 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
1397 | realobj, size); | 1410 | realobj, size); |
1398 | print_objinfo(cachep, objp, 2); | 1411 | print_objinfo(cachep, objp, 2); |
1399 | } | 1412 | } |
1400 | } | 1413 | } |
@@ -1405,7 +1418,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) | |||
1405 | * Before calling the slab must have been unlinked from the cache. | 1418 | * Before calling the slab must have been unlinked from the cache. |
1406 | * The cache-lock is not held/needed. | 1419 | * The cache-lock is not held/needed. |
1407 | */ | 1420 | */ |
1408 | static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) | 1421 | static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) |
1409 | { | 1422 | { |
1410 | void *addr = slabp->s_mem - slabp->colouroff; | 1423 | void *addr = slabp->s_mem - slabp->colouroff; |
1411 | 1424 | ||
@@ -1416,8 +1429,11 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) | |||
1416 | 1429 | ||
1417 | if (cachep->flags & SLAB_POISON) { | 1430 | if (cachep->flags & SLAB_POISON) { |
1418 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1431 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1419 | if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) | 1432 | if ((cachep->objsize % PAGE_SIZE) == 0 |
1420 | kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); | 1433 | && OFF_SLAB(cachep)) |
1434 | kernel_map_pages(virt_to_page(objp), | ||
1435 | cachep->objsize / PAGE_SIZE, | ||
1436 | 1); | ||
1421 | else | 1437 | else |
1422 | check_poison_obj(cachep, objp); | 1438 | check_poison_obj(cachep, objp); |
1423 | #else | 1439 | #else |
@@ -1427,20 +1443,20 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) | |||
1427 | if (cachep->flags & SLAB_RED_ZONE) { | 1443 | if (cachep->flags & SLAB_RED_ZONE) { |
1428 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 1444 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
1429 | slab_error(cachep, "start of a freed object " | 1445 | slab_error(cachep, "start of a freed object " |
1430 | "was overwritten"); | 1446 | "was overwritten"); |
1431 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) | 1447 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) |
1432 | slab_error(cachep, "end of a freed object " | 1448 | slab_error(cachep, "end of a freed object " |
1433 | "was overwritten"); | 1449 | "was overwritten"); |
1434 | } | 1450 | } |
1435 | if (cachep->dtor && !(cachep->flags & SLAB_POISON)) | 1451 | if (cachep->dtor && !(cachep->flags & SLAB_POISON)) |
1436 | (cachep->dtor)(objp+obj_dbghead(cachep), cachep, 0); | 1452 | (cachep->dtor) (objp + obj_dbghead(cachep), cachep, 0); |
1437 | } | 1453 | } |
1438 | #else | 1454 | #else |
1439 | if (cachep->dtor) { | 1455 | if (cachep->dtor) { |
1440 | int i; | 1456 | int i; |
1441 | for (i = 0; i < cachep->num; i++) { | 1457 | for (i = 0; i < cachep->num; i++) { |
1442 | void* objp = slabp->s_mem+cachep->objsize*i; | 1458 | void *objp = slabp->s_mem + cachep->objsize * i; |
1443 | (cachep->dtor)(objp, cachep, 0); | 1459 | (cachep->dtor) (objp, cachep, 0); |
1444 | } | 1460 | } |
1445 | } | 1461 | } |
1446 | #endif | 1462 | #endif |
@@ -1448,7 +1464,7 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) | |||
1448 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { | 1464 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { |
1449 | struct slab_rcu *slab_rcu; | 1465 | struct slab_rcu *slab_rcu; |
1450 | 1466 | ||
1451 | slab_rcu = (struct slab_rcu *) slabp; | 1467 | slab_rcu = (struct slab_rcu *)slabp; |
1452 | slab_rcu->cachep = cachep; | 1468 | slab_rcu->cachep = cachep; |
1453 | slab_rcu->addr = addr; | 1469 | slab_rcu->addr = addr; |
1454 | call_rcu(&slab_rcu->head, kmem_rcu_free); | 1470 | call_rcu(&slab_rcu->head, kmem_rcu_free); |
@@ -1466,11 +1482,58 @@ static inline void set_up_list3s(kmem_cache_t *cachep, int index) | |||
1466 | int node; | 1482 | int node; |
1467 | 1483 | ||
1468 | for_each_online_node(node) { | 1484 | for_each_online_node(node) { |
1469 | cachep->nodelists[node] = &initkmem_list3[index+node]; | 1485 | cachep->nodelists[node] = &initkmem_list3[index + node]; |
1470 | cachep->nodelists[node]->next_reap = jiffies + | 1486 | cachep->nodelists[node]->next_reap = jiffies + |
1471 | REAPTIMEOUT_LIST3 + | 1487 | REAPTIMEOUT_LIST3 + |
1472 | ((unsigned long)cachep)%REAPTIMEOUT_LIST3; | 1488 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
1489 | } | ||
1490 | } | ||
1491 | |||
1492 | /** | ||
1493 | * calculate_slab_order - calculate size (page order) of slabs and the number | ||
1494 | * of objects per slab. | ||
1495 | * | ||
1496 | * This could be made much more intelligent. For now, try to avoid using | ||
1497 | * high order pages for slabs. When the gfp() functions are more friendly | ||
1498 | * towards high-order requests, this should be changed. | ||
1499 | */ | ||
1500 | static inline size_t calculate_slab_order(kmem_cache_t *cachep, size_t size, | ||
1501 | size_t align, gfp_t flags) | ||
1502 | { | ||
1503 | size_t left_over = 0; | ||
1504 | |||
1505 | for (;; cachep->gfporder++) { | ||
1506 | unsigned int num; | ||
1507 | size_t remainder; | ||
1508 | |||
1509 | if (cachep->gfporder > MAX_GFP_ORDER) { | ||
1510 | cachep->num = 0; | ||
1511 | break; | ||
1512 | } | ||
1513 | |||
1514 | cache_estimate(cachep->gfporder, size, align, flags, | ||
1515 | &remainder, &num); | ||
1516 | if (!num) | ||
1517 | continue; | ||
1518 | /* More than offslab_limit objects will cause problems */ | ||
1519 | if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) | ||
1520 | break; | ||
1521 | |||
1522 | cachep->num = num; | ||
1523 | left_over = remainder; | ||
1524 | |||
1525 | /* | ||
1526 | * Large number of objects is good, but very large slabs are | ||
1527 | * currently bad for the gfp()s. | ||
1528 | */ | ||
1529 | if (cachep->gfporder >= slab_break_gfp_order) | ||
1530 | break; | ||
1531 | |||
1532 | if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) | ||
1533 | /* Acceptable internal fragmentation */ | ||
1534 | break; | ||
1473 | } | 1535 | } |
1536 | return left_over; | ||
1474 | } | 1537 | } |
1475 | 1538 | ||
1476 | /** | 1539 | /** |
@@ -1519,14 +1582,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1519 | * Sanity checks... these are all serious usage bugs. | 1582 | * Sanity checks... these are all serious usage bugs. |
1520 | */ | 1583 | */ |
1521 | if ((!name) || | 1584 | if ((!name) || |
1522 | in_interrupt() || | 1585 | in_interrupt() || |
1523 | (size < BYTES_PER_WORD) || | 1586 | (size < BYTES_PER_WORD) || |
1524 | (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) || | 1587 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { |
1525 | (dtor && !ctor)) { | 1588 | printk(KERN_ERR "%s: Early error in slab %s\n", |
1526 | printk(KERN_ERR "%s: Early error in slab %s\n", | 1589 | __FUNCTION__, name); |
1527 | __FUNCTION__, name); | 1590 | BUG(); |
1528 | BUG(); | 1591 | } |
1529 | } | ||
1530 | 1592 | ||
1531 | down(&cache_chain_sem); | 1593 | down(&cache_chain_sem); |
1532 | 1594 | ||
@@ -1546,11 +1608,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1546 | set_fs(old_fs); | 1608 | set_fs(old_fs); |
1547 | if (res) { | 1609 | if (res) { |
1548 | printk("SLAB: cache with size %d has lost its name\n", | 1610 | printk("SLAB: cache with size %d has lost its name\n", |
1549 | pc->objsize); | 1611 | pc->objsize); |
1550 | continue; | 1612 | continue; |
1551 | } | 1613 | } |
1552 | 1614 | ||
1553 | if (!strcmp(pc->name,name)) { | 1615 | if (!strcmp(pc->name, name)) { |
1554 | printk("kmem_cache_create: duplicate cache %s\n", name); | 1616 | printk("kmem_cache_create: duplicate cache %s\n", name); |
1555 | dump_stack(); | 1617 | dump_stack(); |
1556 | goto oops; | 1618 | goto oops; |
@@ -1562,10 +1624,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1562 | if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { | 1624 | if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { |
1563 | /* No constructor, but inital state check requested */ | 1625 | /* No constructor, but inital state check requested */ |
1564 | printk(KERN_ERR "%s: No con, but init state check " | 1626 | printk(KERN_ERR "%s: No con, but init state check " |
1565 | "requested - %s\n", __FUNCTION__, name); | 1627 | "requested - %s\n", __FUNCTION__, name); |
1566 | flags &= ~SLAB_DEBUG_INITIAL; | 1628 | flags &= ~SLAB_DEBUG_INITIAL; |
1567 | } | 1629 | } |
1568 | |||
1569 | #if FORCED_DEBUG | 1630 | #if FORCED_DEBUG |
1570 | /* | 1631 | /* |
1571 | * Enable redzoning and last user accounting, except for caches with | 1632 | * Enable redzoning and last user accounting, except for caches with |
@@ -1573,8 +1634,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1573 | * above the next power of two: caches with object sizes just above a | 1634 | * above the next power of two: caches with object sizes just above a |
1574 | * power of two have a significant amount of internal fragmentation. | 1635 | * power of two have a significant amount of internal fragmentation. |
1575 | */ | 1636 | */ |
1576 | if ((size < 4096 || fls(size-1) == fls(size-1+3*BYTES_PER_WORD))) | 1637 | if ((size < 4096 |
1577 | flags |= SLAB_RED_ZONE|SLAB_STORE_USER; | 1638 | || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) |
1639 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | ||
1578 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 1640 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
1579 | flags |= SLAB_POISON; | 1641 | flags |= SLAB_POISON; |
1580 | #endif | 1642 | #endif |
@@ -1595,9 +1657,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1595 | * unaligned accesses for some archs when redzoning is used, and makes | 1657 | * unaligned accesses for some archs when redzoning is used, and makes |
1596 | * sure any on-slab bufctl's are also correctly aligned. | 1658 | * sure any on-slab bufctl's are also correctly aligned. |
1597 | */ | 1659 | */ |
1598 | if (size & (BYTES_PER_WORD-1)) { | 1660 | if (size & (BYTES_PER_WORD - 1)) { |
1599 | size += (BYTES_PER_WORD-1); | 1661 | size += (BYTES_PER_WORD - 1); |
1600 | size &= ~(BYTES_PER_WORD-1); | 1662 | size &= ~(BYTES_PER_WORD - 1); |
1601 | } | 1663 | } |
1602 | 1664 | ||
1603 | /* calculate out the final buffer alignment: */ | 1665 | /* calculate out the final buffer alignment: */ |
@@ -1608,7 +1670,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1608 | * objects into one cacheline. | 1670 | * objects into one cacheline. |
1609 | */ | 1671 | */ |
1610 | ralign = cache_line_size(); | 1672 | ralign = cache_line_size(); |
1611 | while (size <= ralign/2) | 1673 | while (size <= ralign / 2) |
1612 | ralign /= 2; | 1674 | ralign /= 2; |
1613 | } else { | 1675 | } else { |
1614 | ralign = BYTES_PER_WORD; | 1676 | ralign = BYTES_PER_WORD; |
@@ -1617,13 +1679,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1617 | if (ralign < ARCH_SLAB_MINALIGN) { | 1679 | if (ralign < ARCH_SLAB_MINALIGN) { |
1618 | ralign = ARCH_SLAB_MINALIGN; | 1680 | ralign = ARCH_SLAB_MINALIGN; |
1619 | if (ralign > BYTES_PER_WORD) | 1681 | if (ralign > BYTES_PER_WORD) |
1620 | flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER); | 1682 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
1621 | } | 1683 | } |
1622 | /* 3) caller mandated alignment: disables debug if necessary */ | 1684 | /* 3) caller mandated alignment: disables debug if necessary */ |
1623 | if (ralign < align) { | 1685 | if (ralign < align) { |
1624 | ralign = align; | 1686 | ralign = align; |
1625 | if (ralign > BYTES_PER_WORD) | 1687 | if (ralign > BYTES_PER_WORD) |
1626 | flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER); | 1688 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
1627 | } | 1689 | } |
1628 | /* 4) Store it. Note that the debug code below can reduce | 1690 | /* 4) Store it. Note that the debug code below can reduce |
1629 | * the alignment to BYTES_PER_WORD. | 1691 | * the alignment to BYTES_PER_WORD. |
@@ -1645,7 +1707,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1645 | 1707 | ||
1646 | /* add space for red zone words */ | 1708 | /* add space for red zone words */ |
1647 | cachep->dbghead += BYTES_PER_WORD; | 1709 | cachep->dbghead += BYTES_PER_WORD; |
1648 | size += 2*BYTES_PER_WORD; | 1710 | size += 2 * BYTES_PER_WORD; |
1649 | } | 1711 | } |
1650 | if (flags & SLAB_STORE_USER) { | 1712 | if (flags & SLAB_STORE_USER) { |
1651 | /* user store requires word alignment and | 1713 | /* user store requires word alignment and |
@@ -1656,7 +1718,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1656 | size += BYTES_PER_WORD; | 1718 | size += BYTES_PER_WORD; |
1657 | } | 1719 | } |
1658 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) | 1720 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) |
1659 | if (size >= malloc_sizes[INDEX_L3+1].cs_size && cachep->reallen > cache_line_size() && size < PAGE_SIZE) { | 1721 | if (size >= malloc_sizes[INDEX_L3 + 1].cs_size |
1722 | && cachep->reallen > cache_line_size() && size < PAGE_SIZE) { | ||
1660 | cachep->dbghead += PAGE_SIZE - size; | 1723 | cachep->dbghead += PAGE_SIZE - size; |
1661 | size = PAGE_SIZE; | 1724 | size = PAGE_SIZE; |
1662 | } | 1725 | } |
@@ -1664,7 +1727,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1664 | #endif | 1727 | #endif |
1665 | 1728 | ||
1666 | /* Determine if the slab management is 'on' or 'off' slab. */ | 1729 | /* Determine if the slab management is 'on' or 'off' slab. */ |
1667 | if (size >= (PAGE_SIZE>>3)) | 1730 | if (size >= (PAGE_SIZE >> 3)) |
1668 | /* | 1731 | /* |
1669 | * Size is large, assume best to place the slab management obj | 1732 | * Size is large, assume best to place the slab management obj |
1670 | * off-slab (should allow better packing of objs). | 1733 | * off-slab (should allow better packing of objs). |
@@ -1681,47 +1744,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1681 | */ | 1744 | */ |
1682 | cachep->gfporder = 0; | 1745 | cachep->gfporder = 0; |
1683 | cache_estimate(cachep->gfporder, size, align, flags, | 1746 | cache_estimate(cachep->gfporder, size, align, flags, |
1684 | &left_over, &cachep->num); | 1747 | &left_over, &cachep->num); |
1685 | } else { | 1748 | } else |
1686 | /* | 1749 | left_over = calculate_slab_order(cachep, size, align, flags); |
1687 | * Calculate size (in pages) of slabs, and the num of objs per | ||
1688 | * slab. This could be made much more intelligent. For now, | ||
1689 | * try to avoid using high page-orders for slabs. When the | ||
1690 | * gfp() funcs are more friendly towards high-order requests, | ||
1691 | * this should be changed. | ||
1692 | */ | ||
1693 | do { | ||
1694 | unsigned int break_flag = 0; | ||
1695 | cal_wastage: | ||
1696 | cache_estimate(cachep->gfporder, size, align, flags, | ||
1697 | &left_over, &cachep->num); | ||
1698 | if (break_flag) | ||
1699 | break; | ||
1700 | if (cachep->gfporder >= MAX_GFP_ORDER) | ||
1701 | break; | ||
1702 | if (!cachep->num) | ||
1703 | goto next; | ||
1704 | if (flags & CFLGS_OFF_SLAB && | ||
1705 | cachep->num > offslab_limit) { | ||
1706 | /* This num of objs will cause problems. */ | ||
1707 | cachep->gfporder--; | ||
1708 | break_flag++; | ||
1709 | goto cal_wastage; | ||
1710 | } | ||
1711 | |||
1712 | /* | ||
1713 | * Large num of objs is good, but v. large slabs are | ||
1714 | * currently bad for the gfp()s. | ||
1715 | */ | ||
1716 | if (cachep->gfporder >= slab_break_gfp_order) | ||
1717 | break; | ||
1718 | |||
1719 | if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder)) | ||
1720 | break; /* Acceptable internal fragmentation. */ | ||
1721 | next: | ||
1722 | cachep->gfporder++; | ||
1723 | } while (1); | ||
1724 | } | ||
1725 | 1750 | ||
1726 | if (!cachep->num) { | 1751 | if (!cachep->num) { |
1727 | printk("kmem_cache_create: couldn't create cache %s.\n", name); | 1752 | printk("kmem_cache_create: couldn't create cache %s.\n", name); |
@@ -1729,8 +1754,8 @@ next: | |||
1729 | cachep = NULL; | 1754 | cachep = NULL; |
1730 | goto oops; | 1755 | goto oops; |
1731 | } | 1756 | } |
1732 | slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t) | 1757 | slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) |
1733 | + sizeof(struct slab), align); | 1758 | + sizeof(struct slab), align); |
1734 | 1759 | ||
1735 | /* | 1760 | /* |
1736 | * If the slab has been placed off-slab, and we have enough space then | 1761 | * If the slab has been placed off-slab, and we have enough space then |
@@ -1743,14 +1768,15 @@ next: | |||
1743 | 1768 | ||
1744 | if (flags & CFLGS_OFF_SLAB) { | 1769 | if (flags & CFLGS_OFF_SLAB) { |
1745 | /* really off slab. No need for manual alignment */ | 1770 | /* really off slab. No need for manual alignment */ |
1746 | slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab); | 1771 | slab_size = |
1772 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); | ||
1747 | } | 1773 | } |
1748 | 1774 | ||
1749 | cachep->colour_off = cache_line_size(); | 1775 | cachep->colour_off = cache_line_size(); |
1750 | /* Offset must be a multiple of the alignment. */ | 1776 | /* Offset must be a multiple of the alignment. */ |
1751 | if (cachep->colour_off < align) | 1777 | if (cachep->colour_off < align) |
1752 | cachep->colour_off = align; | 1778 | cachep->colour_off = align; |
1753 | cachep->colour = left_over/cachep->colour_off; | 1779 | cachep->colour = left_over / cachep->colour_off; |
1754 | cachep->slab_size = slab_size; | 1780 | cachep->slab_size = slab_size; |
1755 | cachep->flags = flags; | 1781 | cachep->flags = flags; |
1756 | cachep->gfpflags = 0; | 1782 | cachep->gfpflags = 0; |
@@ -1777,7 +1803,7 @@ next: | |||
1777 | * the creation of further caches will BUG(). | 1803 | * the creation of further caches will BUG(). |
1778 | */ | 1804 | */ |
1779 | cachep->array[smp_processor_id()] = | 1805 | cachep->array[smp_processor_id()] = |
1780 | &initarray_generic.cache; | 1806 | &initarray_generic.cache; |
1781 | 1807 | ||
1782 | /* If the cache that's used by | 1808 | /* If the cache that's used by |
1783 | * kmalloc(sizeof(kmem_list3)) is the first cache, | 1809 | * kmalloc(sizeof(kmem_list3)) is the first cache, |
@@ -1791,8 +1817,7 @@ next: | |||
1791 | g_cpucache_up = PARTIAL_AC; | 1817 | g_cpucache_up = PARTIAL_AC; |
1792 | } else { | 1818 | } else { |
1793 | cachep->array[smp_processor_id()] = | 1819 | cachep->array[smp_processor_id()] = |
1794 | kmalloc(sizeof(struct arraycache_init), | 1820 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); |
1795 | GFP_KERNEL); | ||
1796 | 1821 | ||
1797 | if (g_cpucache_up == PARTIAL_AC) { | 1822 | if (g_cpucache_up == PARTIAL_AC) { |
1798 | set_up_list3s(cachep, SIZE_L3); | 1823 | set_up_list3s(cachep, SIZE_L3); |
@@ -1802,16 +1827,18 @@ next: | |||
1802 | for_each_online_node(node) { | 1827 | for_each_online_node(node) { |
1803 | 1828 | ||
1804 | cachep->nodelists[node] = | 1829 | cachep->nodelists[node] = |
1805 | kmalloc_node(sizeof(struct kmem_list3), | 1830 | kmalloc_node(sizeof |
1806 | GFP_KERNEL, node); | 1831 | (struct kmem_list3), |
1832 | GFP_KERNEL, node); | ||
1807 | BUG_ON(!cachep->nodelists[node]); | 1833 | BUG_ON(!cachep->nodelists[node]); |
1808 | kmem_list3_init(cachep->nodelists[node]); | 1834 | kmem_list3_init(cachep-> |
1835 | nodelists[node]); | ||
1809 | } | 1836 | } |
1810 | } | 1837 | } |
1811 | } | 1838 | } |
1812 | cachep->nodelists[numa_node_id()]->next_reap = | 1839 | cachep->nodelists[numa_node_id()]->next_reap = |
1813 | jiffies + REAPTIMEOUT_LIST3 + | 1840 | jiffies + REAPTIMEOUT_LIST3 + |
1814 | ((unsigned long)cachep)%REAPTIMEOUT_LIST3; | 1841 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
1815 | 1842 | ||
1816 | BUG_ON(!ac_data(cachep)); | 1843 | BUG_ON(!ac_data(cachep)); |
1817 | ac_data(cachep)->avail = 0; | 1844 | ac_data(cachep)->avail = 0; |
@@ -1820,15 +1847,15 @@ next: | |||
1820 | ac_data(cachep)->touched = 0; | 1847 | ac_data(cachep)->touched = 0; |
1821 | cachep->batchcount = 1; | 1848 | cachep->batchcount = 1; |
1822 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | 1849 | cachep->limit = BOOT_CPUCACHE_ENTRIES; |
1823 | } | 1850 | } |
1824 | 1851 | ||
1825 | /* cache setup completed, link it into the list */ | 1852 | /* cache setup completed, link it into the list */ |
1826 | list_add(&cachep->next, &cache_chain); | 1853 | list_add(&cachep->next, &cache_chain); |
1827 | unlock_cpu_hotplug(); | 1854 | unlock_cpu_hotplug(); |
1828 | oops: | 1855 | oops: |
1829 | if (!cachep && (flags & SLAB_PANIC)) | 1856 | if (!cachep && (flags & SLAB_PANIC)) |
1830 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 1857 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
1831 | name); | 1858 | name); |
1832 | up(&cache_chain_sem); | 1859 | up(&cache_chain_sem); |
1833 | return cachep; | 1860 | return cachep; |
1834 | } | 1861 | } |
@@ -1871,7 +1898,7 @@ static inline void check_spinlock_acquired_node(kmem_cache_t *cachep, int node) | |||
1871 | /* | 1898 | /* |
1872 | * Waits for all CPUs to execute func(). | 1899 | * Waits for all CPUs to execute func(). |
1873 | */ | 1900 | */ |
1874 | static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) | 1901 | static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) |
1875 | { | 1902 | { |
1876 | check_irq_on(); | 1903 | check_irq_on(); |
1877 | preempt_disable(); | 1904 | preempt_disable(); |
@@ -1886,12 +1913,12 @@ static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) | |||
1886 | preempt_enable(); | 1913 | preempt_enable(); |
1887 | } | 1914 | } |
1888 | 1915 | ||
1889 | static void drain_array_locked(kmem_cache_t* cachep, | 1916 | static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac, |
1890 | struct array_cache *ac, int force, int node); | 1917 | int force, int node); |
1891 | 1918 | ||
1892 | static void do_drain(void *arg) | 1919 | static void do_drain(void *arg) |
1893 | { | 1920 | { |
1894 | kmem_cache_t *cachep = (kmem_cache_t*)arg; | 1921 | kmem_cache_t *cachep = (kmem_cache_t *) arg; |
1895 | struct array_cache *ac; | 1922 | struct array_cache *ac; |
1896 | int node = numa_node_id(); | 1923 | int node = numa_node_id(); |
1897 | 1924 | ||
@@ -1911,7 +1938,7 @@ static void drain_cpu_caches(kmem_cache_t *cachep) | |||
1911 | smp_call_function_all_cpus(do_drain, cachep); | 1938 | smp_call_function_all_cpus(do_drain, cachep); |
1912 | check_irq_on(); | 1939 | check_irq_on(); |
1913 | spin_lock_irq(&cachep->spinlock); | 1940 | spin_lock_irq(&cachep->spinlock); |
1914 | for_each_online_node(node) { | 1941 | for_each_online_node(node) { |
1915 | l3 = cachep->nodelists[node]; | 1942 | l3 = cachep->nodelists[node]; |
1916 | if (l3) { | 1943 | if (l3) { |
1917 | spin_lock(&l3->list_lock); | 1944 | spin_lock(&l3->list_lock); |
@@ -1949,8 +1976,7 @@ static int __node_shrink(kmem_cache_t *cachep, int node) | |||
1949 | slab_destroy(cachep, slabp); | 1976 | slab_destroy(cachep, slabp); |
1950 | spin_lock_irq(&l3->list_lock); | 1977 | spin_lock_irq(&l3->list_lock); |
1951 | } | 1978 | } |
1952 | ret = !list_empty(&l3->slabs_full) || | 1979 | ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial); |
1953 | !list_empty(&l3->slabs_partial); | ||
1954 | return ret; | 1980 | return ret; |
1955 | } | 1981 | } |
1956 | 1982 | ||
@@ -2006,7 +2032,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2006 | * The caller must guarantee that noone will allocate memory from the cache | 2032 | * The caller must guarantee that noone will allocate memory from the cache |
2007 | * during the kmem_cache_destroy(). | 2033 | * during the kmem_cache_destroy(). |
2008 | */ | 2034 | */ |
2009 | int kmem_cache_destroy(kmem_cache_t * cachep) | 2035 | int kmem_cache_destroy(kmem_cache_t *cachep) |
2010 | { | 2036 | { |
2011 | int i; | 2037 | int i; |
2012 | struct kmem_list3 *l3; | 2038 | struct kmem_list3 *l3; |
@@ -2028,7 +2054,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep) | |||
2028 | if (__cache_shrink(cachep)) { | 2054 | if (__cache_shrink(cachep)) { |
2029 | slab_error(cachep, "Can't free all objects"); | 2055 | slab_error(cachep, "Can't free all objects"); |
2030 | down(&cache_chain_sem); | 2056 | down(&cache_chain_sem); |
2031 | list_add(&cachep->next,&cache_chain); | 2057 | list_add(&cachep->next, &cache_chain); |
2032 | up(&cache_chain_sem); | 2058 | up(&cache_chain_sem); |
2033 | unlock_cpu_hotplug(); | 2059 | unlock_cpu_hotplug(); |
2034 | return 1; | 2060 | return 1; |
@@ -2038,7 +2064,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep) | |||
2038 | synchronize_rcu(); | 2064 | synchronize_rcu(); |
2039 | 2065 | ||
2040 | for_each_online_cpu(i) | 2066 | for_each_online_cpu(i) |
2041 | kfree(cachep->array[i]); | 2067 | kfree(cachep->array[i]); |
2042 | 2068 | ||
2043 | /* NUMA: free the list3 structures */ | 2069 | /* NUMA: free the list3 structures */ |
2044 | for_each_online_node(i) { | 2070 | for_each_online_node(i) { |
@@ -2057,39 +2083,39 @@ int kmem_cache_destroy(kmem_cache_t * cachep) | |||
2057 | EXPORT_SYMBOL(kmem_cache_destroy); | 2083 | EXPORT_SYMBOL(kmem_cache_destroy); |
2058 | 2084 | ||
2059 | /* Get the memory for a slab management obj. */ | 2085 | /* Get the memory for a slab management obj. */ |
2060 | static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp, | 2086 | static struct slab *alloc_slabmgmt(kmem_cache_t *cachep, void *objp, |
2061 | int colour_off, gfp_t local_flags) | 2087 | int colour_off, gfp_t local_flags) |
2062 | { | 2088 | { |
2063 | struct slab *slabp; | 2089 | struct slab *slabp; |
2064 | 2090 | ||
2065 | if (OFF_SLAB(cachep)) { | 2091 | if (OFF_SLAB(cachep)) { |
2066 | /* Slab management obj is off-slab. */ | 2092 | /* Slab management obj is off-slab. */ |
2067 | slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); | 2093 | slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); |
2068 | if (!slabp) | 2094 | if (!slabp) |
2069 | return NULL; | 2095 | return NULL; |
2070 | } else { | 2096 | } else { |
2071 | slabp = objp+colour_off; | 2097 | slabp = objp + colour_off; |
2072 | colour_off += cachep->slab_size; | 2098 | colour_off += cachep->slab_size; |
2073 | } | 2099 | } |
2074 | slabp->inuse = 0; | 2100 | slabp->inuse = 0; |
2075 | slabp->colouroff = colour_off; | 2101 | slabp->colouroff = colour_off; |
2076 | slabp->s_mem = objp+colour_off; | 2102 | slabp->s_mem = objp + colour_off; |
2077 | 2103 | ||
2078 | return slabp; | 2104 | return slabp; |
2079 | } | 2105 | } |
2080 | 2106 | ||
2081 | static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) | 2107 | static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) |
2082 | { | 2108 | { |
2083 | return (kmem_bufctl_t *)(slabp+1); | 2109 | return (kmem_bufctl_t *) (slabp + 1); |
2084 | } | 2110 | } |
2085 | 2111 | ||
2086 | static void cache_init_objs(kmem_cache_t *cachep, | 2112 | static void cache_init_objs(kmem_cache_t *cachep, |
2087 | struct slab *slabp, unsigned long ctor_flags) | 2113 | struct slab *slabp, unsigned long ctor_flags) |
2088 | { | 2114 | { |
2089 | int i; | 2115 | int i; |
2090 | 2116 | ||
2091 | for (i = 0; i < cachep->num; i++) { | 2117 | for (i = 0; i < cachep->num; i++) { |
2092 | void *objp = slabp->s_mem+cachep->objsize*i; | 2118 | void *objp = slabp->s_mem + cachep->objsize * i; |
2093 | #if DEBUG | 2119 | #if DEBUG |
2094 | /* need to poison the objs? */ | 2120 | /* need to poison the objs? */ |
2095 | if (cachep->flags & SLAB_POISON) | 2121 | if (cachep->flags & SLAB_POISON) |
@@ -2107,25 +2133,28 @@ static void cache_init_objs(kmem_cache_t *cachep, | |||
2107 | * Otherwise, deadlock. They must also be threaded. | 2133 | * Otherwise, deadlock. They must also be threaded. |
2108 | */ | 2134 | */ |
2109 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2135 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
2110 | cachep->ctor(objp+obj_dbghead(cachep), cachep, ctor_flags); | 2136 | cachep->ctor(objp + obj_dbghead(cachep), cachep, |
2137 | ctor_flags); | ||
2111 | 2138 | ||
2112 | if (cachep->flags & SLAB_RED_ZONE) { | 2139 | if (cachep->flags & SLAB_RED_ZONE) { |
2113 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) | 2140 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) |
2114 | slab_error(cachep, "constructor overwrote the" | 2141 | slab_error(cachep, "constructor overwrote the" |
2115 | " end of an object"); | 2142 | " end of an object"); |
2116 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 2143 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
2117 | slab_error(cachep, "constructor overwrote the" | 2144 | slab_error(cachep, "constructor overwrote the" |
2118 | " start of an object"); | 2145 | " start of an object"); |
2119 | } | 2146 | } |
2120 | if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) | 2147 | if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) |
2121 | kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); | 2148 | && cachep->flags & SLAB_POISON) |
2149 | kernel_map_pages(virt_to_page(objp), | ||
2150 | cachep->objsize / PAGE_SIZE, 0); | ||
2122 | #else | 2151 | #else |
2123 | if (cachep->ctor) | 2152 | if (cachep->ctor) |
2124 | cachep->ctor(objp, cachep, ctor_flags); | 2153 | cachep->ctor(objp, cachep, ctor_flags); |
2125 | #endif | 2154 | #endif |
2126 | slab_bufctl(slabp)[i] = i+1; | 2155 | slab_bufctl(slabp)[i] = i + 1; |
2127 | } | 2156 | } |
2128 | slab_bufctl(slabp)[i-1] = BUFCTL_END; | 2157 | slab_bufctl(slabp)[i - 1] = BUFCTL_END; |
2129 | slabp->free = 0; | 2158 | slabp->free = 0; |
2130 | } | 2159 | } |
2131 | 2160 | ||
@@ -2161,17 +2190,17 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) | |||
2161 | */ | 2190 | */ |
2162 | static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) | 2191 | static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) |
2163 | { | 2192 | { |
2164 | struct slab *slabp; | 2193 | struct slab *slabp; |
2165 | void *objp; | 2194 | void *objp; |
2166 | size_t offset; | 2195 | size_t offset; |
2167 | gfp_t local_flags; | 2196 | gfp_t local_flags; |
2168 | unsigned long ctor_flags; | 2197 | unsigned long ctor_flags; |
2169 | struct kmem_list3 *l3; | 2198 | struct kmem_list3 *l3; |
2170 | 2199 | ||
2171 | /* Be lazy and only check for valid flags here, | 2200 | /* Be lazy and only check for valid flags here, |
2172 | * keeping it out of the critical path in kmem_cache_alloc(). | 2201 | * keeping it out of the critical path in kmem_cache_alloc(). |
2173 | */ | 2202 | */ |
2174 | if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) | 2203 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) |
2175 | BUG(); | 2204 | BUG(); |
2176 | if (flags & SLAB_NO_GROW) | 2205 | if (flags & SLAB_NO_GROW) |
2177 | return 0; | 2206 | return 0; |
@@ -2237,9 +2266,9 @@ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) | |||
2237 | l3->free_objects += cachep->num; | 2266 | l3->free_objects += cachep->num; |
2238 | spin_unlock(&l3->list_lock); | 2267 | spin_unlock(&l3->list_lock); |
2239 | return 1; | 2268 | return 1; |
2240 | opps1: | 2269 | opps1: |
2241 | kmem_freepages(cachep, objp); | 2270 | kmem_freepages(cachep, objp); |
2242 | failed: | 2271 | failed: |
2243 | if (local_flags & __GFP_WAIT) | 2272 | if (local_flags & __GFP_WAIT) |
2244 | local_irq_disable(); | 2273 | local_irq_disable(); |
2245 | return 0; | 2274 | return 0; |
@@ -2259,18 +2288,19 @@ static void kfree_debugcheck(const void *objp) | |||
2259 | 2288 | ||
2260 | if (!virt_addr_valid(objp)) { | 2289 | if (!virt_addr_valid(objp)) { |
2261 | printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", | 2290 | printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", |
2262 | (unsigned long)objp); | 2291 | (unsigned long)objp); |
2263 | BUG(); | 2292 | BUG(); |
2264 | } | 2293 | } |
2265 | page = virt_to_page(objp); | 2294 | page = virt_to_page(objp); |
2266 | if (!PageSlab(page)) { | 2295 | if (!PageSlab(page)) { |
2267 | printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", (unsigned long)objp); | 2296 | printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", |
2297 | (unsigned long)objp); | ||
2268 | BUG(); | 2298 | BUG(); |
2269 | } | 2299 | } |
2270 | } | 2300 | } |
2271 | 2301 | ||
2272 | static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, | 2302 | static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, |
2273 | void *caller) | 2303 | void *caller) |
2274 | { | 2304 | { |
2275 | struct page *page; | 2305 | struct page *page; |
2276 | unsigned int objnr; | 2306 | unsigned int objnr; |
@@ -2281,20 +2311,26 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, | |||
2281 | page = virt_to_page(objp); | 2311 | page = virt_to_page(objp); |
2282 | 2312 | ||
2283 | if (page_get_cache(page) != cachep) { | 2313 | if (page_get_cache(page) != cachep) { |
2284 | printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n", | 2314 | printk(KERN_ERR |
2285 | page_get_cache(page),cachep); | 2315 | "mismatch in kmem_cache_free: expected cache %p, got %p\n", |
2316 | page_get_cache(page), cachep); | ||
2286 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); | 2317 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); |
2287 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name); | 2318 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), |
2319 | page_get_cache(page)->name); | ||
2288 | WARN_ON(1); | 2320 | WARN_ON(1); |
2289 | } | 2321 | } |
2290 | slabp = page_get_slab(page); | 2322 | slabp = page_get_slab(page); |
2291 | 2323 | ||
2292 | if (cachep->flags & SLAB_RED_ZONE) { | 2324 | if (cachep->flags & SLAB_RED_ZONE) { |
2293 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { | 2325 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE |
2294 | slab_error(cachep, "double free, or memory outside" | 2326 | || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { |
2295 | " object was overwritten"); | 2327 | slab_error(cachep, |
2296 | printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | 2328 | "double free, or memory outside" |
2297 | objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); | 2329 | " object was overwritten"); |
2330 | printk(KERN_ERR | ||
2331 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | ||
2332 | objp, *dbg_redzone1(cachep, objp), | ||
2333 | *dbg_redzone2(cachep, objp)); | ||
2298 | } | 2334 | } |
2299 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2335 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
2300 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2336 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
@@ -2302,30 +2338,31 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, | |||
2302 | if (cachep->flags & SLAB_STORE_USER) | 2338 | if (cachep->flags & SLAB_STORE_USER) |
2303 | *dbg_userword(cachep, objp) = caller; | 2339 | *dbg_userword(cachep, objp) = caller; |
2304 | 2340 | ||
2305 | objnr = (objp-slabp->s_mem)/cachep->objsize; | 2341 | objnr = (objp - slabp->s_mem) / cachep->objsize; |
2306 | 2342 | ||
2307 | BUG_ON(objnr >= cachep->num); | 2343 | BUG_ON(objnr >= cachep->num); |
2308 | BUG_ON(objp != slabp->s_mem + objnr*cachep->objsize); | 2344 | BUG_ON(objp != slabp->s_mem + objnr * cachep->objsize); |
2309 | 2345 | ||
2310 | if (cachep->flags & SLAB_DEBUG_INITIAL) { | 2346 | if (cachep->flags & SLAB_DEBUG_INITIAL) { |
2311 | /* Need to call the slab's constructor so the | 2347 | /* Need to call the slab's constructor so the |
2312 | * caller can perform a verify of its state (debugging). | 2348 | * caller can perform a verify of its state (debugging). |
2313 | * Called without the cache-lock held. | 2349 | * Called without the cache-lock held. |
2314 | */ | 2350 | */ |
2315 | cachep->ctor(objp+obj_dbghead(cachep), | 2351 | cachep->ctor(objp + obj_dbghead(cachep), |
2316 | cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); | 2352 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); |
2317 | } | 2353 | } |
2318 | if (cachep->flags & SLAB_POISON && cachep->dtor) { | 2354 | if (cachep->flags & SLAB_POISON && cachep->dtor) { |
2319 | /* we want to cache poison the object, | 2355 | /* we want to cache poison the object, |
2320 | * call the destruction callback | 2356 | * call the destruction callback |
2321 | */ | 2357 | */ |
2322 | cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); | 2358 | cachep->dtor(objp + obj_dbghead(cachep), cachep, 0); |
2323 | } | 2359 | } |
2324 | if (cachep->flags & SLAB_POISON) { | 2360 | if (cachep->flags & SLAB_POISON) { |
2325 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2361 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2326 | if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { | 2362 | if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { |
2327 | store_stackinfo(cachep, objp, (unsigned long)caller); | 2363 | store_stackinfo(cachep, objp, (unsigned long)caller); |
2328 | kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); | 2364 | kernel_map_pages(virt_to_page(objp), |
2365 | cachep->objsize / PAGE_SIZE, 0); | ||
2329 | } else { | 2366 | } else { |
2330 | poison_obj(cachep, objp, POISON_FREE); | 2367 | poison_obj(cachep, objp, POISON_FREE); |
2331 | } | 2368 | } |
@@ -2340,7 +2377,7 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp) | |||
2340 | { | 2377 | { |
2341 | kmem_bufctl_t i; | 2378 | kmem_bufctl_t i; |
2342 | int entries = 0; | 2379 | int entries = 0; |
2343 | 2380 | ||
2344 | /* Check slab's freelist to see if this obj is there. */ | 2381 | /* Check slab's freelist to see if this obj is there. */ |
2345 | for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { | 2382 | for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { |
2346 | entries++; | 2383 | entries++; |
@@ -2348,13 +2385,16 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp) | |||
2348 | goto bad; | 2385 | goto bad; |
2349 | } | 2386 | } |
2350 | if (entries != cachep->num - slabp->inuse) { | 2387 | if (entries != cachep->num - slabp->inuse) { |
2351 | bad: | 2388 | bad: |
2352 | printk(KERN_ERR "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2389 | printk(KERN_ERR |
2353 | cachep->name, cachep->num, slabp, slabp->inuse); | 2390 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
2354 | for (i=0;i<sizeof(slabp)+cachep->num*sizeof(kmem_bufctl_t);i++) { | 2391 | cachep->name, cachep->num, slabp, slabp->inuse); |
2355 | if ((i%16)==0) | 2392 | for (i = 0; |
2393 | i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); | ||
2394 | i++) { | ||
2395 | if ((i % 16) == 0) | ||
2356 | printk("\n%03x:", i); | 2396 | printk("\n%03x:", i); |
2357 | printk(" %02x", ((unsigned char*)slabp)[i]); | 2397 | printk(" %02x", ((unsigned char *)slabp)[i]); |
2358 | } | 2398 | } |
2359 | printk("\n"); | 2399 | printk("\n"); |
2360 | BUG(); | 2400 | BUG(); |
@@ -2374,7 +2414,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) | |||
2374 | 2414 | ||
2375 | check_irq_off(); | 2415 | check_irq_off(); |
2376 | ac = ac_data(cachep); | 2416 | ac = ac_data(cachep); |
2377 | retry: | 2417 | retry: |
2378 | batchcount = ac->batchcount; | 2418 | batchcount = ac->batchcount; |
2379 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2419 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
2380 | /* if there was little recent activity on this | 2420 | /* if there was little recent activity on this |
@@ -2396,8 +2436,8 @@ retry: | |||
2396 | shared_array->avail -= batchcount; | 2436 | shared_array->avail -= batchcount; |
2397 | ac->avail = batchcount; | 2437 | ac->avail = batchcount; |
2398 | memcpy(ac->entry, | 2438 | memcpy(ac->entry, |
2399 | &(shared_array->entry[shared_array->avail]), | 2439 | &(shared_array->entry[shared_array->avail]), |
2400 | sizeof(void*)*batchcount); | 2440 | sizeof(void *) * batchcount); |
2401 | shared_array->touched = 1; | 2441 | shared_array->touched = 1; |
2402 | goto alloc_done; | 2442 | goto alloc_done; |
2403 | } | 2443 | } |
@@ -2425,7 +2465,7 @@ retry: | |||
2425 | 2465 | ||
2426 | /* get obj pointer */ | 2466 | /* get obj pointer */ |
2427 | ac->entry[ac->avail++] = slabp->s_mem + | 2467 | ac->entry[ac->avail++] = slabp->s_mem + |
2428 | slabp->free*cachep->objsize; | 2468 | slabp->free * cachep->objsize; |
2429 | 2469 | ||
2430 | slabp->inuse++; | 2470 | slabp->inuse++; |
2431 | next = slab_bufctl(slabp)[slabp->free]; | 2471 | next = slab_bufctl(slabp)[slabp->free]; |
@@ -2433,7 +2473,7 @@ retry: | |||
2433 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; | 2473 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; |
2434 | WARN_ON(numa_node_id() != slabp->nodeid); | 2474 | WARN_ON(numa_node_id() != slabp->nodeid); |
2435 | #endif | 2475 | #endif |
2436 | slabp->free = next; | 2476 | slabp->free = next; |
2437 | } | 2477 | } |
2438 | check_slabp(cachep, slabp); | 2478 | check_slabp(cachep, slabp); |
2439 | 2479 | ||
@@ -2445,9 +2485,9 @@ retry: | |||
2445 | list_add(&slabp->list, &l3->slabs_partial); | 2485 | list_add(&slabp->list, &l3->slabs_partial); |
2446 | } | 2486 | } |
2447 | 2487 | ||
2448 | must_grow: | 2488 | must_grow: |
2449 | l3->free_objects -= ac->avail; | 2489 | l3->free_objects -= ac->avail; |
2450 | alloc_done: | 2490 | alloc_done: |
2451 | spin_unlock(&l3->list_lock); | 2491 | spin_unlock(&l3->list_lock); |
2452 | 2492 | ||
2453 | if (unlikely(!ac->avail)) { | 2493 | if (unlikely(!ac->avail)) { |
@@ -2459,7 +2499,7 @@ alloc_done: | |||
2459 | if (!x && ac->avail == 0) // no objects in sight? abort | 2499 | if (!x && ac->avail == 0) // no objects in sight? abort |
2460 | return NULL; | 2500 | return NULL; |
2461 | 2501 | ||
2462 | if (!ac->avail) // objects refilled by interrupt? | 2502 | if (!ac->avail) // objects refilled by interrupt? |
2463 | goto retry; | 2503 | goto retry; |
2464 | } | 2504 | } |
2465 | ac->touched = 1; | 2505 | ac->touched = 1; |
@@ -2476,16 +2516,16 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags) | |||
2476 | } | 2516 | } |
2477 | 2517 | ||
2478 | #if DEBUG | 2518 | #if DEBUG |
2479 | static void * | 2519 | static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags, |
2480 | cache_alloc_debugcheck_after(kmem_cache_t *cachep, | 2520 | void *objp, void *caller) |
2481 | gfp_t flags, void *objp, void *caller) | ||
2482 | { | 2521 | { |
2483 | if (!objp) | 2522 | if (!objp) |
2484 | return objp; | 2523 | return objp; |
2485 | if (cachep->flags & SLAB_POISON) { | 2524 | if (cachep->flags & SLAB_POISON) { |
2486 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2525 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2487 | if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) | 2526 | if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) |
2488 | kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); | 2527 | kernel_map_pages(virt_to_page(objp), |
2528 | cachep->objsize / PAGE_SIZE, 1); | ||
2489 | else | 2529 | else |
2490 | check_poison_obj(cachep, objp); | 2530 | check_poison_obj(cachep, objp); |
2491 | #else | 2531 | #else |
@@ -2497,24 +2537,28 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, | |||
2497 | *dbg_userword(cachep, objp) = caller; | 2537 | *dbg_userword(cachep, objp) = caller; |
2498 | 2538 | ||
2499 | if (cachep->flags & SLAB_RED_ZONE) { | 2539 | if (cachep->flags & SLAB_RED_ZONE) { |
2500 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { | 2540 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE |
2501 | slab_error(cachep, "double free, or memory outside" | 2541 | || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { |
2502 | " object was overwritten"); | 2542 | slab_error(cachep, |
2503 | printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | 2543 | "double free, or memory outside" |
2504 | objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); | 2544 | " object was overwritten"); |
2545 | printk(KERN_ERR | ||
2546 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | ||
2547 | objp, *dbg_redzone1(cachep, objp), | ||
2548 | *dbg_redzone2(cachep, objp)); | ||
2505 | } | 2549 | } |
2506 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2550 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
2507 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2551 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
2508 | } | 2552 | } |
2509 | objp += obj_dbghead(cachep); | 2553 | objp += obj_dbghead(cachep); |
2510 | if (cachep->ctor && cachep->flags & SLAB_POISON) { | 2554 | if (cachep->ctor && cachep->flags & SLAB_POISON) { |
2511 | unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; | 2555 | unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; |
2512 | 2556 | ||
2513 | if (!(flags & __GFP_WAIT)) | 2557 | if (!(flags & __GFP_WAIT)) |
2514 | ctor_flags |= SLAB_CTOR_ATOMIC; | 2558 | ctor_flags |= SLAB_CTOR_ATOMIC; |
2515 | 2559 | ||
2516 | cachep->ctor(objp, cachep, ctor_flags); | 2560 | cachep->ctor(objp, cachep, ctor_flags); |
2517 | } | 2561 | } |
2518 | return objp; | 2562 | return objp; |
2519 | } | 2563 | } |
2520 | #else | 2564 | #else |
@@ -2523,7 +2567,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, | |||
2523 | 2567 | ||
2524 | static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) | 2568 | static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) |
2525 | { | 2569 | { |
2526 | void* objp; | 2570 | void *objp; |
2527 | struct array_cache *ac; | 2571 | struct array_cache *ac; |
2528 | 2572 | ||
2529 | check_irq_off(); | 2573 | check_irq_off(); |
@@ -2542,7 +2586,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) | |||
2542 | static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) | 2586 | static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) |
2543 | { | 2587 | { |
2544 | unsigned long save_flags; | 2588 | unsigned long save_flags; |
2545 | void* objp; | 2589 | void *objp; |
2546 | 2590 | ||
2547 | cache_alloc_debugcheck_before(cachep, flags); | 2591 | cache_alloc_debugcheck_before(cachep, flags); |
2548 | 2592 | ||
@@ -2550,7 +2594,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) | |||
2550 | objp = ____cache_alloc(cachep, flags); | 2594 | objp = ____cache_alloc(cachep, flags); |
2551 | local_irq_restore(save_flags); | 2595 | local_irq_restore(save_flags); |
2552 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, | 2596 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, |
2553 | __builtin_return_address(0)); | 2597 | __builtin_return_address(0)); |
2554 | prefetchw(objp); | 2598 | prefetchw(objp); |
2555 | return objp; | 2599 | return objp; |
2556 | } | 2600 | } |
@@ -2562,74 +2606,75 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) | |||
2562 | static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) | 2606 | static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) |
2563 | { | 2607 | { |
2564 | struct list_head *entry; | 2608 | struct list_head *entry; |
2565 | struct slab *slabp; | 2609 | struct slab *slabp; |
2566 | struct kmem_list3 *l3; | 2610 | struct kmem_list3 *l3; |
2567 | void *obj; | 2611 | void *obj; |
2568 | kmem_bufctl_t next; | 2612 | kmem_bufctl_t next; |
2569 | int x; | 2613 | int x; |
2570 | 2614 | ||
2571 | l3 = cachep->nodelists[nodeid]; | 2615 | l3 = cachep->nodelists[nodeid]; |
2572 | BUG_ON(!l3); | 2616 | BUG_ON(!l3); |
2573 | 2617 | ||
2574 | retry: | 2618 | retry: |
2575 | spin_lock(&l3->list_lock); | 2619 | spin_lock(&l3->list_lock); |
2576 | entry = l3->slabs_partial.next; | 2620 | entry = l3->slabs_partial.next; |
2577 | if (entry == &l3->slabs_partial) { | 2621 | if (entry == &l3->slabs_partial) { |
2578 | l3->free_touched = 1; | 2622 | l3->free_touched = 1; |
2579 | entry = l3->slabs_free.next; | 2623 | entry = l3->slabs_free.next; |
2580 | if (entry == &l3->slabs_free) | 2624 | if (entry == &l3->slabs_free) |
2581 | goto must_grow; | 2625 | goto must_grow; |
2582 | } | 2626 | } |
2583 | 2627 | ||
2584 | slabp = list_entry(entry, struct slab, list); | 2628 | slabp = list_entry(entry, struct slab, list); |
2585 | check_spinlock_acquired_node(cachep, nodeid); | 2629 | check_spinlock_acquired_node(cachep, nodeid); |
2586 | check_slabp(cachep, slabp); | 2630 | check_slabp(cachep, slabp); |
2587 | 2631 | ||
2588 | STATS_INC_NODEALLOCS(cachep); | 2632 | STATS_INC_NODEALLOCS(cachep); |
2589 | STATS_INC_ACTIVE(cachep); | 2633 | STATS_INC_ACTIVE(cachep); |
2590 | STATS_SET_HIGH(cachep); | 2634 | STATS_SET_HIGH(cachep); |
2591 | 2635 | ||
2592 | BUG_ON(slabp->inuse == cachep->num); | 2636 | BUG_ON(slabp->inuse == cachep->num); |
2593 | 2637 | ||
2594 | /* get obj pointer */ | 2638 | /* get obj pointer */ |
2595 | obj = slabp->s_mem + slabp->free*cachep->objsize; | 2639 | obj = slabp->s_mem + slabp->free * cachep->objsize; |
2596 | slabp->inuse++; | 2640 | slabp->inuse++; |
2597 | next = slab_bufctl(slabp)[slabp->free]; | 2641 | next = slab_bufctl(slabp)[slabp->free]; |
2598 | #if DEBUG | 2642 | #if DEBUG |
2599 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; | 2643 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; |
2600 | #endif | 2644 | #endif |
2601 | slabp->free = next; | 2645 | slabp->free = next; |
2602 | check_slabp(cachep, slabp); | 2646 | check_slabp(cachep, slabp); |
2603 | l3->free_objects--; | 2647 | l3->free_objects--; |
2604 | /* move slabp to correct slabp list: */ | 2648 | /* move slabp to correct slabp list: */ |
2605 | list_del(&slabp->list); | 2649 | list_del(&slabp->list); |
2606 | 2650 | ||
2607 | if (slabp->free == BUFCTL_END) { | 2651 | if (slabp->free == BUFCTL_END) { |
2608 | list_add(&slabp->list, &l3->slabs_full); | 2652 | list_add(&slabp->list, &l3->slabs_full); |
2609 | } else { | 2653 | } else { |
2610 | list_add(&slabp->list, &l3->slabs_partial); | 2654 | list_add(&slabp->list, &l3->slabs_partial); |
2611 | } | 2655 | } |
2612 | 2656 | ||
2613 | spin_unlock(&l3->list_lock); | 2657 | spin_unlock(&l3->list_lock); |
2614 | goto done; | 2658 | goto done; |
2615 | 2659 | ||
2616 | must_grow: | 2660 | must_grow: |
2617 | spin_unlock(&l3->list_lock); | 2661 | spin_unlock(&l3->list_lock); |
2618 | x = cache_grow(cachep, flags, nodeid); | 2662 | x = cache_grow(cachep, flags, nodeid); |
2619 | 2663 | ||
2620 | if (!x) | 2664 | if (!x) |
2621 | return NULL; | 2665 | return NULL; |
2622 | 2666 | ||
2623 | goto retry; | 2667 | goto retry; |
2624 | done: | 2668 | done: |
2625 | return obj; | 2669 | return obj; |
2626 | } | 2670 | } |
2627 | #endif | 2671 | #endif |
2628 | 2672 | ||
2629 | /* | 2673 | /* |
2630 | * Caller needs to acquire correct kmem_list's list_lock | 2674 | * Caller needs to acquire correct kmem_list's list_lock |
2631 | */ | 2675 | */ |
2632 | static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) | 2676 | static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, |
2677 | int node) | ||
2633 | { | 2678 | { |
2634 | int i; | 2679 | int i; |
2635 | struct kmem_list3 *l3; | 2680 | struct kmem_list3 *l3; |
@@ -2652,7 +2697,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n | |||
2652 | 2697 | ||
2653 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { | 2698 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { |
2654 | printk(KERN_ERR "slab: double free detected in cache " | 2699 | printk(KERN_ERR "slab: double free detected in cache " |
2655 | "'%s', objp %p\n", cachep->name, objp); | 2700 | "'%s', objp %p\n", cachep->name, objp); |
2656 | BUG(); | 2701 | BUG(); |
2657 | } | 2702 | } |
2658 | #endif | 2703 | #endif |
@@ -2696,20 +2741,19 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) | |||
2696 | spin_lock(&l3->list_lock); | 2741 | spin_lock(&l3->list_lock); |
2697 | if (l3->shared) { | 2742 | if (l3->shared) { |
2698 | struct array_cache *shared_array = l3->shared; | 2743 | struct array_cache *shared_array = l3->shared; |
2699 | int max = shared_array->limit-shared_array->avail; | 2744 | int max = shared_array->limit - shared_array->avail; |
2700 | if (max) { | 2745 | if (max) { |
2701 | if (batchcount > max) | 2746 | if (batchcount > max) |
2702 | batchcount = max; | 2747 | batchcount = max; |
2703 | memcpy(&(shared_array->entry[shared_array->avail]), | 2748 | memcpy(&(shared_array->entry[shared_array->avail]), |
2704 | ac->entry, | 2749 | ac->entry, sizeof(void *) * batchcount); |
2705 | sizeof(void*)*batchcount); | ||
2706 | shared_array->avail += batchcount; | 2750 | shared_array->avail += batchcount; |
2707 | goto free_done; | 2751 | goto free_done; |
2708 | } | 2752 | } |
2709 | } | 2753 | } |
2710 | 2754 | ||
2711 | free_block(cachep, ac->entry, batchcount, node); | 2755 | free_block(cachep, ac->entry, batchcount, node); |
2712 | free_done: | 2756 | free_done: |
2713 | #if STATS | 2757 | #if STATS |
2714 | { | 2758 | { |
2715 | int i = 0; | 2759 | int i = 0; |
@@ -2731,10 +2775,9 @@ free_done: | |||
2731 | spin_unlock(&l3->list_lock); | 2775 | spin_unlock(&l3->list_lock); |
2732 | ac->avail -= batchcount; | 2776 | ac->avail -= batchcount; |
2733 | memmove(ac->entry, &(ac->entry[batchcount]), | 2777 | memmove(ac->entry, &(ac->entry[batchcount]), |
2734 | sizeof(void*)*ac->avail); | 2778 | sizeof(void *) * ac->avail); |
2735 | } | 2779 | } |
2736 | 2780 | ||
2737 | |||
2738 | /* | 2781 | /* |
2739 | * __cache_free | 2782 | * __cache_free |
2740 | * Release an obj back to its cache. If the obj has a constructed | 2783 | * Release an obj back to its cache. If the obj has a constructed |
@@ -2759,7 +2802,8 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) | |||
2759 | if (unlikely(slabp->nodeid != numa_node_id())) { | 2802 | if (unlikely(slabp->nodeid != numa_node_id())) { |
2760 | struct array_cache *alien = NULL; | 2803 | struct array_cache *alien = NULL; |
2761 | int nodeid = slabp->nodeid; | 2804 | int nodeid = slabp->nodeid; |
2762 | struct kmem_list3 *l3 = cachep->nodelists[numa_node_id()]; | 2805 | struct kmem_list3 *l3 = |
2806 | cachep->nodelists[numa_node_id()]; | ||
2763 | 2807 | ||
2764 | STATS_INC_NODEFREES(cachep); | 2808 | STATS_INC_NODEFREES(cachep); |
2765 | if (l3->alien && l3->alien[nodeid]) { | 2809 | if (l3->alien && l3->alien[nodeid]) { |
@@ -2767,15 +2811,15 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) | |||
2767 | spin_lock(&alien->lock); | 2811 | spin_lock(&alien->lock); |
2768 | if (unlikely(alien->avail == alien->limit)) | 2812 | if (unlikely(alien->avail == alien->limit)) |
2769 | __drain_alien_cache(cachep, | 2813 | __drain_alien_cache(cachep, |
2770 | alien, nodeid); | 2814 | alien, nodeid); |
2771 | alien->entry[alien->avail++] = objp; | 2815 | alien->entry[alien->avail++] = objp; |
2772 | spin_unlock(&alien->lock); | 2816 | spin_unlock(&alien->lock); |
2773 | } else { | 2817 | } else { |
2774 | spin_lock(&(cachep->nodelists[nodeid])-> | 2818 | spin_lock(&(cachep->nodelists[nodeid])-> |
2775 | list_lock); | 2819 | list_lock); |
2776 | free_block(cachep, &objp, 1, nodeid); | 2820 | free_block(cachep, &objp, 1, nodeid); |
2777 | spin_unlock(&(cachep->nodelists[nodeid])-> | 2821 | spin_unlock(&(cachep->nodelists[nodeid])-> |
2778 | list_lock); | 2822 | list_lock); |
2779 | } | 2823 | } |
2780 | return; | 2824 | return; |
2781 | } | 2825 | } |
@@ -2822,9 +2866,9 @@ EXPORT_SYMBOL(kmem_cache_alloc); | |||
2822 | */ | 2866 | */ |
2823 | int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) | 2867 | int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) |
2824 | { | 2868 | { |
2825 | unsigned long addr = (unsigned long) ptr; | 2869 | unsigned long addr = (unsigned long)ptr; |
2826 | unsigned long min_addr = PAGE_OFFSET; | 2870 | unsigned long min_addr = PAGE_OFFSET; |
2827 | unsigned long align_mask = BYTES_PER_WORD-1; | 2871 | unsigned long align_mask = BYTES_PER_WORD - 1; |
2828 | unsigned long size = cachep->objsize; | 2872 | unsigned long size = cachep->objsize; |
2829 | struct page *page; | 2873 | struct page *page; |
2830 | 2874 | ||
@@ -2844,7 +2888,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) | |||
2844 | if (unlikely(page_get_cache(page) != cachep)) | 2888 | if (unlikely(page_get_cache(page) != cachep)) |
2845 | goto out; | 2889 | goto out; |
2846 | return 1; | 2890 | return 1; |
2847 | out: | 2891 | out: |
2848 | return 0; | 2892 | return 0; |
2849 | } | 2893 | } |
2850 | 2894 | ||
@@ -2871,8 +2915,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) | |||
2871 | 2915 | ||
2872 | if (unlikely(!cachep->nodelists[nodeid])) { | 2916 | if (unlikely(!cachep->nodelists[nodeid])) { |
2873 | /* Fall back to __cache_alloc if we run into trouble */ | 2917 | /* Fall back to __cache_alloc if we run into trouble */ |
2874 | printk(KERN_WARNING "slab: not allocating in inactive node %d for cache %s\n", nodeid, cachep->name); | 2918 | printk(KERN_WARNING |
2875 | return __cache_alloc(cachep,flags); | 2919 | "slab: not allocating in inactive node %d for cache %s\n", |
2920 | nodeid, cachep->name); | ||
2921 | return __cache_alloc(cachep, flags); | ||
2876 | } | 2922 | } |
2877 | 2923 | ||
2878 | cache_alloc_debugcheck_before(cachep, flags); | 2924 | cache_alloc_debugcheck_before(cachep, flags); |
@@ -2882,7 +2928,9 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) | |||
2882 | else | 2928 | else |
2883 | ptr = __cache_alloc_node(cachep, flags, nodeid); | 2929 | ptr = __cache_alloc_node(cachep, flags, nodeid); |
2884 | local_irq_restore(save_flags); | 2930 | local_irq_restore(save_flags); |
2885 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0)); | 2931 | ptr = |
2932 | cache_alloc_debugcheck_after(cachep, flags, ptr, | ||
2933 | __builtin_return_address(0)); | ||
2886 | 2934 | ||
2887 | return ptr; | 2935 | return ptr; |
2888 | } | 2936 | } |
@@ -2944,12 +2992,11 @@ EXPORT_SYMBOL(__kmalloc); | |||
2944 | * Objects should be dereferenced using the per_cpu_ptr macro only. | 2992 | * Objects should be dereferenced using the per_cpu_ptr macro only. |
2945 | * | 2993 | * |
2946 | * @size: how many bytes of memory are required. | 2994 | * @size: how many bytes of memory are required. |
2947 | * @align: the alignment, which can't be greater than SMP_CACHE_BYTES. | ||
2948 | */ | 2995 | */ |
2949 | void *__alloc_percpu(size_t size, size_t align) | 2996 | void *__alloc_percpu(size_t size) |
2950 | { | 2997 | { |
2951 | int i; | 2998 | int i; |
2952 | struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); | 2999 | struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); |
2953 | 3000 | ||
2954 | if (!pdata) | 3001 | if (!pdata) |
2955 | return NULL; | 3002 | return NULL; |
@@ -2973,9 +3020,9 @@ void *__alloc_percpu(size_t size, size_t align) | |||
2973 | } | 3020 | } |
2974 | 3021 | ||
2975 | /* Catch derefs w/o wrappers */ | 3022 | /* Catch derefs w/o wrappers */ |
2976 | return (void *) (~(unsigned long) pdata); | 3023 | return (void *)(~(unsigned long)pdata); |
2977 | 3024 | ||
2978 | unwind_oom: | 3025 | unwind_oom: |
2979 | while (--i >= 0) { | 3026 | while (--i >= 0) { |
2980 | if (!cpu_possible(i)) | 3027 | if (!cpu_possible(i)) |
2981 | continue; | 3028 | continue; |
@@ -3006,20 +3053,6 @@ void kmem_cache_free(kmem_cache_t *cachep, void *objp) | |||
3006 | EXPORT_SYMBOL(kmem_cache_free); | 3053 | EXPORT_SYMBOL(kmem_cache_free); |
3007 | 3054 | ||
3008 | /** | 3055 | /** |
3009 | * kzalloc - allocate memory. The memory is set to zero. | ||
3010 | * @size: how many bytes of memory are required. | ||
3011 | * @flags: the type of memory to allocate. | ||
3012 | */ | ||
3013 | void *kzalloc(size_t size, gfp_t flags) | ||
3014 | { | ||
3015 | void *ret = kmalloc(size, flags); | ||
3016 | if (ret) | ||
3017 | memset(ret, 0, size); | ||
3018 | return ret; | ||
3019 | } | ||
3020 | EXPORT_SYMBOL(kzalloc); | ||
3021 | |||
3022 | /** | ||
3023 | * kfree - free previously allocated memory | 3056 | * kfree - free previously allocated memory |
3024 | * @objp: pointer returned by kmalloc. | 3057 | * @objp: pointer returned by kmalloc. |
3025 | * | 3058 | * |
@@ -3038,7 +3071,7 @@ void kfree(const void *objp) | |||
3038 | local_irq_save(flags); | 3071 | local_irq_save(flags); |
3039 | kfree_debugcheck(objp); | 3072 | kfree_debugcheck(objp); |
3040 | c = page_get_cache(virt_to_page(objp)); | 3073 | c = page_get_cache(virt_to_page(objp)); |
3041 | __cache_free(c, (void*)objp); | 3074 | __cache_free(c, (void *)objp); |
3042 | local_irq_restore(flags); | 3075 | local_irq_restore(flags); |
3043 | } | 3076 | } |
3044 | EXPORT_SYMBOL(kfree); | 3077 | EXPORT_SYMBOL(kfree); |
@@ -3051,17 +3084,16 @@ EXPORT_SYMBOL(kfree); | |||
3051 | * Don't free memory not originally allocated by alloc_percpu() | 3084 | * Don't free memory not originally allocated by alloc_percpu() |
3052 | * The complemented objp is to check for that. | 3085 | * The complemented objp is to check for that. |
3053 | */ | 3086 | */ |
3054 | void | 3087 | void free_percpu(const void *objp) |
3055 | free_percpu(const void *objp) | ||
3056 | { | 3088 | { |
3057 | int i; | 3089 | int i; |
3058 | struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); | 3090 | struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); |
3059 | 3091 | ||
3060 | /* | 3092 | /* |
3061 | * We allocate for all cpus so we cannot use for online cpu here. | 3093 | * We allocate for all cpus so we cannot use for online cpu here. |
3062 | */ | 3094 | */ |
3063 | for_each_cpu(i) | 3095 | for_each_cpu(i) |
3064 | kfree(p->ptrs[i]); | 3096 | kfree(p->ptrs[i]); |
3065 | kfree(p); | 3097 | kfree(p); |
3066 | } | 3098 | } |
3067 | EXPORT_SYMBOL(free_percpu); | 3099 | EXPORT_SYMBOL(free_percpu); |
@@ -3095,44 +3127,44 @@ static int alloc_kmemlist(kmem_cache_t *cachep) | |||
3095 | if (!(new_alien = alloc_alien_cache(node, cachep->limit))) | 3127 | if (!(new_alien = alloc_alien_cache(node, cachep->limit))) |
3096 | goto fail; | 3128 | goto fail; |
3097 | #endif | 3129 | #endif |
3098 | if (!(new = alloc_arraycache(node, (cachep->shared* | 3130 | if (!(new = alloc_arraycache(node, (cachep->shared * |
3099 | cachep->batchcount), 0xbaadf00d))) | 3131 | cachep->batchcount), |
3132 | 0xbaadf00d))) | ||
3100 | goto fail; | 3133 | goto fail; |
3101 | if ((l3 = cachep->nodelists[node])) { | 3134 | if ((l3 = cachep->nodelists[node])) { |
3102 | 3135 | ||
3103 | spin_lock_irq(&l3->list_lock); | 3136 | spin_lock_irq(&l3->list_lock); |
3104 | 3137 | ||
3105 | if ((nc = cachep->nodelists[node]->shared)) | 3138 | if ((nc = cachep->nodelists[node]->shared)) |
3106 | free_block(cachep, nc->entry, | 3139 | free_block(cachep, nc->entry, nc->avail, node); |
3107 | nc->avail, node); | ||
3108 | 3140 | ||
3109 | l3->shared = new; | 3141 | l3->shared = new; |
3110 | if (!cachep->nodelists[node]->alien) { | 3142 | if (!cachep->nodelists[node]->alien) { |
3111 | l3->alien = new_alien; | 3143 | l3->alien = new_alien; |
3112 | new_alien = NULL; | 3144 | new_alien = NULL; |
3113 | } | 3145 | } |
3114 | l3->free_limit = (1 + nr_cpus_node(node))* | 3146 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3115 | cachep->batchcount + cachep->num; | 3147 | cachep->batchcount + cachep->num; |
3116 | spin_unlock_irq(&l3->list_lock); | 3148 | spin_unlock_irq(&l3->list_lock); |
3117 | kfree(nc); | 3149 | kfree(nc); |
3118 | free_alien_cache(new_alien); | 3150 | free_alien_cache(new_alien); |
3119 | continue; | 3151 | continue; |
3120 | } | 3152 | } |
3121 | if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), | 3153 | if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), |
3122 | GFP_KERNEL, node))) | 3154 | GFP_KERNEL, node))) |
3123 | goto fail; | 3155 | goto fail; |
3124 | 3156 | ||
3125 | kmem_list3_init(l3); | 3157 | kmem_list3_init(l3); |
3126 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3158 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
3127 | ((unsigned long)cachep)%REAPTIMEOUT_LIST3; | 3159 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
3128 | l3->shared = new; | 3160 | l3->shared = new; |
3129 | l3->alien = new_alien; | 3161 | l3->alien = new_alien; |
3130 | l3->free_limit = (1 + nr_cpus_node(node))* | 3162 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3131 | cachep->batchcount + cachep->num; | 3163 | cachep->batchcount + cachep->num; |
3132 | cachep->nodelists[node] = l3; | 3164 | cachep->nodelists[node] = l3; |
3133 | } | 3165 | } |
3134 | return err; | 3166 | return err; |
3135 | fail: | 3167 | fail: |
3136 | err = -ENOMEM; | 3168 | err = -ENOMEM; |
3137 | return err; | 3169 | return err; |
3138 | } | 3170 | } |
@@ -3154,18 +3186,19 @@ static void do_ccupdate_local(void *info) | |||
3154 | new->new[smp_processor_id()] = old; | 3186 | new->new[smp_processor_id()] = old; |
3155 | } | 3187 | } |
3156 | 3188 | ||
3157 | |||
3158 | static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, | 3189 | static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, |
3159 | int shared) | 3190 | int shared) |
3160 | { | 3191 | { |
3161 | struct ccupdate_struct new; | 3192 | struct ccupdate_struct new; |
3162 | int i, err; | 3193 | int i, err; |
3163 | 3194 | ||
3164 | memset(&new.new,0,sizeof(new.new)); | 3195 | memset(&new.new, 0, sizeof(new.new)); |
3165 | for_each_online_cpu(i) { | 3196 | for_each_online_cpu(i) { |
3166 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount); | 3197 | new.new[i] = |
3198 | alloc_arraycache(cpu_to_node(i), limit, batchcount); | ||
3167 | if (!new.new[i]) { | 3199 | if (!new.new[i]) { |
3168 | for (i--; i >= 0; i--) kfree(new.new[i]); | 3200 | for (i--; i >= 0; i--) |
3201 | kfree(new.new[i]); | ||
3169 | return -ENOMEM; | 3202 | return -ENOMEM; |
3170 | } | 3203 | } |
3171 | } | 3204 | } |
@@ -3193,13 +3226,12 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, | |||
3193 | err = alloc_kmemlist(cachep); | 3226 | err = alloc_kmemlist(cachep); |
3194 | if (err) { | 3227 | if (err) { |
3195 | printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", | 3228 | printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", |
3196 | cachep->name, -err); | 3229 | cachep->name, -err); |
3197 | BUG(); | 3230 | BUG(); |
3198 | } | 3231 | } |
3199 | return 0; | 3232 | return 0; |
3200 | } | 3233 | } |
3201 | 3234 | ||
3202 | |||
3203 | static void enable_cpucache(kmem_cache_t *cachep) | 3235 | static void enable_cpucache(kmem_cache_t *cachep) |
3204 | { | 3236 | { |
3205 | int err; | 3237 | int err; |
@@ -3246,14 +3278,14 @@ static void enable_cpucache(kmem_cache_t *cachep) | |||
3246 | if (limit > 32) | 3278 | if (limit > 32) |
3247 | limit = 32; | 3279 | limit = 32; |
3248 | #endif | 3280 | #endif |
3249 | err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared); | 3281 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); |
3250 | if (err) | 3282 | if (err) |
3251 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", | 3283 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", |
3252 | cachep->name, -err); | 3284 | cachep->name, -err); |
3253 | } | 3285 | } |
3254 | 3286 | ||
3255 | static void drain_array_locked(kmem_cache_t *cachep, | 3287 | static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac, |
3256 | struct array_cache *ac, int force, int node) | 3288 | int force, int node) |
3257 | { | 3289 | { |
3258 | int tofree; | 3290 | int tofree; |
3259 | 3291 | ||
@@ -3261,14 +3293,14 @@ static void drain_array_locked(kmem_cache_t *cachep, | |||
3261 | if (ac->touched && !force) { | 3293 | if (ac->touched && !force) { |
3262 | ac->touched = 0; | 3294 | ac->touched = 0; |
3263 | } else if (ac->avail) { | 3295 | } else if (ac->avail) { |
3264 | tofree = force ? ac->avail : (ac->limit+4)/5; | 3296 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
3265 | if (tofree > ac->avail) { | 3297 | if (tofree > ac->avail) { |
3266 | tofree = (ac->avail+1)/2; | 3298 | tofree = (ac->avail + 1) / 2; |
3267 | } | 3299 | } |
3268 | free_block(cachep, ac->entry, tofree, node); | 3300 | free_block(cachep, ac->entry, tofree, node); |
3269 | ac->avail -= tofree; | 3301 | ac->avail -= tofree; |
3270 | memmove(ac->entry, &(ac->entry[tofree]), | 3302 | memmove(ac->entry, &(ac->entry[tofree]), |
3271 | sizeof(void*)*ac->avail); | 3303 | sizeof(void *) * ac->avail); |
3272 | } | 3304 | } |
3273 | } | 3305 | } |
3274 | 3306 | ||
@@ -3291,13 +3323,14 @@ static void cache_reap(void *unused) | |||
3291 | 3323 | ||
3292 | if (down_trylock(&cache_chain_sem)) { | 3324 | if (down_trylock(&cache_chain_sem)) { |
3293 | /* Give up. Setup the next iteration. */ | 3325 | /* Give up. Setup the next iteration. */ |
3294 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3326 | schedule_delayed_work(&__get_cpu_var(reap_work), |
3327 | REAPTIMEOUT_CPUC); | ||
3295 | return; | 3328 | return; |
3296 | } | 3329 | } |
3297 | 3330 | ||
3298 | list_for_each(walk, &cache_chain) { | 3331 | list_for_each(walk, &cache_chain) { |
3299 | kmem_cache_t *searchp; | 3332 | kmem_cache_t *searchp; |
3300 | struct list_head* p; | 3333 | struct list_head *p; |
3301 | int tofree; | 3334 | int tofree; |
3302 | struct slab *slabp; | 3335 | struct slab *slabp; |
3303 | 3336 | ||
@@ -3314,7 +3347,7 @@ static void cache_reap(void *unused) | |||
3314 | spin_lock_irq(&l3->list_lock); | 3347 | spin_lock_irq(&l3->list_lock); |
3315 | 3348 | ||
3316 | drain_array_locked(searchp, ac_data(searchp), 0, | 3349 | drain_array_locked(searchp, ac_data(searchp), 0, |
3317 | numa_node_id()); | 3350 | numa_node_id()); |
3318 | 3351 | ||
3319 | if (time_after(l3->next_reap, jiffies)) | 3352 | if (time_after(l3->next_reap, jiffies)) |
3320 | goto next_unlock; | 3353 | goto next_unlock; |
@@ -3323,14 +3356,16 @@ static void cache_reap(void *unused) | |||
3323 | 3356 | ||
3324 | if (l3->shared) | 3357 | if (l3->shared) |
3325 | drain_array_locked(searchp, l3->shared, 0, | 3358 | drain_array_locked(searchp, l3->shared, 0, |
3326 | numa_node_id()); | 3359 | numa_node_id()); |
3327 | 3360 | ||
3328 | if (l3->free_touched) { | 3361 | if (l3->free_touched) { |
3329 | l3->free_touched = 0; | 3362 | l3->free_touched = 0; |
3330 | goto next_unlock; | 3363 | goto next_unlock; |
3331 | } | 3364 | } |
3332 | 3365 | ||
3333 | tofree = (l3->free_limit+5*searchp->num-1)/(5*searchp->num); | 3366 | tofree = |
3367 | (l3->free_limit + 5 * searchp->num - | ||
3368 | 1) / (5 * searchp->num); | ||
3334 | do { | 3369 | do { |
3335 | p = l3->slabs_free.next; | 3370 | p = l3->slabs_free.next; |
3336 | if (p == &(l3->slabs_free)) | 3371 | if (p == &(l3->slabs_free)) |
@@ -3350,10 +3385,10 @@ static void cache_reap(void *unused) | |||
3350 | spin_unlock_irq(&l3->list_lock); | 3385 | spin_unlock_irq(&l3->list_lock); |
3351 | slab_destroy(searchp, slabp); | 3386 | slab_destroy(searchp, slabp); |
3352 | spin_lock_irq(&l3->list_lock); | 3387 | spin_lock_irq(&l3->list_lock); |
3353 | } while(--tofree > 0); | 3388 | } while (--tofree > 0); |
3354 | next_unlock: | 3389 | next_unlock: |
3355 | spin_unlock_irq(&l3->list_lock); | 3390 | spin_unlock_irq(&l3->list_lock); |
3356 | next: | 3391 | next: |
3357 | cond_resched(); | 3392 | cond_resched(); |
3358 | } | 3393 | } |
3359 | check_irq_on(); | 3394 | check_irq_on(); |
@@ -3365,32 +3400,37 @@ next: | |||
3365 | 3400 | ||
3366 | #ifdef CONFIG_PROC_FS | 3401 | #ifdef CONFIG_PROC_FS |
3367 | 3402 | ||
3368 | static void *s_start(struct seq_file *m, loff_t *pos) | 3403 | static void print_slabinfo_header(struct seq_file *m) |
3369 | { | 3404 | { |
3370 | loff_t n = *pos; | 3405 | /* |
3371 | struct list_head *p; | 3406 | * Output format version, so at least we can change it |
3372 | 3407 | * without _too_ many complaints. | |
3373 | down(&cache_chain_sem); | 3408 | */ |
3374 | if (!n) { | ||
3375 | /* | ||
3376 | * Output format version, so at least we can change it | ||
3377 | * without _too_ many complaints. | ||
3378 | */ | ||
3379 | #if STATS | 3409 | #if STATS |
3380 | seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); | 3410 | seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); |
3381 | #else | 3411 | #else |
3382 | seq_puts(m, "slabinfo - version: 2.1\n"); | 3412 | seq_puts(m, "slabinfo - version: 2.1\n"); |
3383 | #endif | 3413 | #endif |
3384 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>"); | 3414 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> " |
3385 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); | 3415 | "<objperslab> <pagesperslab>"); |
3386 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | 3416 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); |
3417 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | ||
3387 | #if STATS | 3418 | #if STATS |
3388 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped>" | 3419 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " |
3389 | " <error> <maxfreeable> <nodeallocs> <remotefrees>"); | 3420 | "<error> <maxfreeable> <nodeallocs> <remotefrees>"); |
3390 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); | 3421 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); |
3391 | #endif | 3422 | #endif |
3392 | seq_putc(m, '\n'); | 3423 | seq_putc(m, '\n'); |
3393 | } | 3424 | } |
3425 | |||
3426 | static void *s_start(struct seq_file *m, loff_t *pos) | ||
3427 | { | ||
3428 | loff_t n = *pos; | ||
3429 | struct list_head *p; | ||
3430 | |||
3431 | down(&cache_chain_sem); | ||
3432 | if (!n) | ||
3433 | print_slabinfo_header(m); | ||
3394 | p = cache_chain.next; | 3434 | p = cache_chain.next; |
3395 | while (n--) { | 3435 | while (n--) { |
3396 | p = p->next; | 3436 | p = p->next; |
@@ -3405,7 +3445,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
3405 | kmem_cache_t *cachep = p; | 3445 | kmem_cache_t *cachep = p; |
3406 | ++*pos; | 3446 | ++*pos; |
3407 | return cachep->next.next == &cache_chain ? NULL | 3447 | return cachep->next.next == &cache_chain ? NULL |
3408 | : list_entry(cachep->next.next, kmem_cache_t, next); | 3448 | : list_entry(cachep->next.next, kmem_cache_t, next); |
3409 | } | 3449 | } |
3410 | 3450 | ||
3411 | static void s_stop(struct seq_file *m, void *p) | 3451 | static void s_stop(struct seq_file *m, void *p) |
@@ -3417,11 +3457,11 @@ static int s_show(struct seq_file *m, void *p) | |||
3417 | { | 3457 | { |
3418 | kmem_cache_t *cachep = p; | 3458 | kmem_cache_t *cachep = p; |
3419 | struct list_head *q; | 3459 | struct list_head *q; |
3420 | struct slab *slabp; | 3460 | struct slab *slabp; |
3421 | unsigned long active_objs; | 3461 | unsigned long active_objs; |
3422 | unsigned long num_objs; | 3462 | unsigned long num_objs; |
3423 | unsigned long active_slabs = 0; | 3463 | unsigned long active_slabs = 0; |
3424 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; | 3464 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; |
3425 | const char *name; | 3465 | const char *name; |
3426 | char *error = NULL; | 3466 | char *error = NULL; |
3427 | int node; | 3467 | int node; |
@@ -3438,14 +3478,14 @@ static int s_show(struct seq_file *m, void *p) | |||
3438 | 3478 | ||
3439 | spin_lock(&l3->list_lock); | 3479 | spin_lock(&l3->list_lock); |
3440 | 3480 | ||
3441 | list_for_each(q,&l3->slabs_full) { | 3481 | list_for_each(q, &l3->slabs_full) { |
3442 | slabp = list_entry(q, struct slab, list); | 3482 | slabp = list_entry(q, struct slab, list); |
3443 | if (slabp->inuse != cachep->num && !error) | 3483 | if (slabp->inuse != cachep->num && !error) |
3444 | error = "slabs_full accounting error"; | 3484 | error = "slabs_full accounting error"; |
3445 | active_objs += cachep->num; | 3485 | active_objs += cachep->num; |
3446 | active_slabs++; | 3486 | active_slabs++; |
3447 | } | 3487 | } |
3448 | list_for_each(q,&l3->slabs_partial) { | 3488 | list_for_each(q, &l3->slabs_partial) { |
3449 | slabp = list_entry(q, struct slab, list); | 3489 | slabp = list_entry(q, struct slab, list); |
3450 | if (slabp->inuse == cachep->num && !error) | 3490 | if (slabp->inuse == cachep->num && !error) |
3451 | error = "slabs_partial inuse accounting error"; | 3491 | error = "slabs_partial inuse accounting error"; |
@@ -3454,7 +3494,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3454 | active_objs += slabp->inuse; | 3494 | active_objs += slabp->inuse; |
3455 | active_slabs++; | 3495 | active_slabs++; |
3456 | } | 3496 | } |
3457 | list_for_each(q,&l3->slabs_free) { | 3497 | list_for_each(q, &l3->slabs_free) { |
3458 | slabp = list_entry(q, struct slab, list); | 3498 | slabp = list_entry(q, struct slab, list); |
3459 | if (slabp->inuse && !error) | 3499 | if (slabp->inuse && !error) |
3460 | error = "slabs_free/inuse accounting error"; | 3500 | error = "slabs_free/inuse accounting error"; |
@@ -3465,25 +3505,24 @@ static int s_show(struct seq_file *m, void *p) | |||
3465 | 3505 | ||
3466 | spin_unlock(&l3->list_lock); | 3506 | spin_unlock(&l3->list_lock); |
3467 | } | 3507 | } |
3468 | num_slabs+=active_slabs; | 3508 | num_slabs += active_slabs; |
3469 | num_objs = num_slabs*cachep->num; | 3509 | num_objs = num_slabs * cachep->num; |
3470 | if (num_objs - active_objs != free_objects && !error) | 3510 | if (num_objs - active_objs != free_objects && !error) |
3471 | error = "free_objects accounting error"; | 3511 | error = "free_objects accounting error"; |
3472 | 3512 | ||
3473 | name = cachep->name; | 3513 | name = cachep->name; |
3474 | if (error) | 3514 | if (error) |
3475 | printk(KERN_ERR "slab: cache %s error: %s\n", name, error); | 3515 | printk(KERN_ERR "slab: cache %s error: %s\n", name, error); |
3476 | 3516 | ||
3477 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", | 3517 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", |
3478 | name, active_objs, num_objs, cachep->objsize, | 3518 | name, active_objs, num_objs, cachep->objsize, |
3479 | cachep->num, (1<<cachep->gfporder)); | 3519 | cachep->num, (1 << cachep->gfporder)); |
3480 | seq_printf(m, " : tunables %4u %4u %4u", | 3520 | seq_printf(m, " : tunables %4u %4u %4u", |
3481 | cachep->limit, cachep->batchcount, | 3521 | cachep->limit, cachep->batchcount, cachep->shared); |
3482 | cachep->shared); | ||
3483 | seq_printf(m, " : slabdata %6lu %6lu %6lu", | 3522 | seq_printf(m, " : slabdata %6lu %6lu %6lu", |
3484 | active_slabs, num_slabs, shared_avail); | 3523 | active_slabs, num_slabs, shared_avail); |
3485 | #if STATS | 3524 | #if STATS |
3486 | { /* list3 stats */ | 3525 | { /* list3 stats */ |
3487 | unsigned long high = cachep->high_mark; | 3526 | unsigned long high = cachep->high_mark; |
3488 | unsigned long allocs = cachep->num_allocations; | 3527 | unsigned long allocs = cachep->num_allocations; |
3489 | unsigned long grown = cachep->grown; | 3528 | unsigned long grown = cachep->grown; |
@@ -3494,9 +3533,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3494 | unsigned long node_frees = cachep->node_frees; | 3533 | unsigned long node_frees = cachep->node_frees; |
3495 | 3534 | ||
3496 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 3535 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
3497 | %4lu %4lu %4lu %4lu", | 3536 | %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); |
3498 | allocs, high, grown, reaped, errors, | ||
3499 | max_freeable, node_allocs, node_frees); | ||
3500 | } | 3537 | } |
3501 | /* cpu stats */ | 3538 | /* cpu stats */ |
3502 | { | 3539 | { |
@@ -3506,7 +3543,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3506 | unsigned long freemiss = atomic_read(&cachep->freemiss); | 3543 | unsigned long freemiss = atomic_read(&cachep->freemiss); |
3507 | 3544 | ||
3508 | seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", | 3545 | seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", |
3509 | allochit, allocmiss, freehit, freemiss); | 3546 | allochit, allocmiss, freehit, freemiss); |
3510 | } | 3547 | } |
3511 | #endif | 3548 | #endif |
3512 | seq_putc(m, '\n'); | 3549 | seq_putc(m, '\n'); |
@@ -3529,10 +3566,10 @@ static int s_show(struct seq_file *m, void *p) | |||
3529 | */ | 3566 | */ |
3530 | 3567 | ||
3531 | struct seq_operations slabinfo_op = { | 3568 | struct seq_operations slabinfo_op = { |
3532 | .start = s_start, | 3569 | .start = s_start, |
3533 | .next = s_next, | 3570 | .next = s_next, |
3534 | .stop = s_stop, | 3571 | .stop = s_stop, |
3535 | .show = s_show, | 3572 | .show = s_show, |
3536 | }; | 3573 | }; |
3537 | 3574 | ||
3538 | #define MAX_SLABINFO_WRITE 128 | 3575 | #define MAX_SLABINFO_WRITE 128 |
@@ -3543,18 +3580,18 @@ struct seq_operations slabinfo_op = { | |||
3543 | * @count: data length | 3580 | * @count: data length |
3544 | * @ppos: unused | 3581 | * @ppos: unused |
3545 | */ | 3582 | */ |
3546 | ssize_t slabinfo_write(struct file *file, const char __user *buffer, | 3583 | ssize_t slabinfo_write(struct file *file, const char __user * buffer, |
3547 | size_t count, loff_t *ppos) | 3584 | size_t count, loff_t *ppos) |
3548 | { | 3585 | { |
3549 | char kbuf[MAX_SLABINFO_WRITE+1], *tmp; | 3586 | char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; |
3550 | int limit, batchcount, shared, res; | 3587 | int limit, batchcount, shared, res; |
3551 | struct list_head *p; | 3588 | struct list_head *p; |
3552 | 3589 | ||
3553 | if (count > MAX_SLABINFO_WRITE) | 3590 | if (count > MAX_SLABINFO_WRITE) |
3554 | return -EINVAL; | 3591 | return -EINVAL; |
3555 | if (copy_from_user(&kbuf, buffer, count)) | 3592 | if (copy_from_user(&kbuf, buffer, count)) |
3556 | return -EFAULT; | 3593 | return -EFAULT; |
3557 | kbuf[MAX_SLABINFO_WRITE] = '\0'; | 3594 | kbuf[MAX_SLABINFO_WRITE] = '\0'; |
3558 | 3595 | ||
3559 | tmp = strchr(kbuf, ' '); | 3596 | tmp = strchr(kbuf, ' '); |
3560 | if (!tmp) | 3597 | if (!tmp) |
@@ -3567,18 +3604,17 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, | |||
3567 | /* Find the cache in the chain of caches. */ | 3604 | /* Find the cache in the chain of caches. */ |
3568 | down(&cache_chain_sem); | 3605 | down(&cache_chain_sem); |
3569 | res = -EINVAL; | 3606 | res = -EINVAL; |
3570 | list_for_each(p,&cache_chain) { | 3607 | list_for_each(p, &cache_chain) { |
3571 | kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next); | 3608 | kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next); |
3572 | 3609 | ||
3573 | if (!strcmp(cachep->name, kbuf)) { | 3610 | if (!strcmp(cachep->name, kbuf)) { |
3574 | if (limit < 1 || | 3611 | if (limit < 1 || |
3575 | batchcount < 1 || | 3612 | batchcount < 1 || |
3576 | batchcount > limit || | 3613 | batchcount > limit || shared < 0) { |
3577 | shared < 0) { | ||
3578 | res = 0; | 3614 | res = 0; |
3579 | } else { | 3615 | } else { |
3580 | res = do_tune_cpucache(cachep, limit, | 3616 | res = do_tune_cpucache(cachep, limit, |
3581 | batchcount, shared); | 3617 | batchcount, shared); |
3582 | } | 3618 | } |
3583 | break; | 3619 | break; |
3584 | } | 3620 | } |
@@ -3609,26 +3645,3 @@ unsigned int ksize(const void *objp) | |||
3609 | 3645 | ||
3610 | return obj_reallen(page_get_cache(virt_to_page(objp))); | 3646 | return obj_reallen(page_get_cache(virt_to_page(objp))); |
3611 | } | 3647 | } |
3612 | |||
3613 | |||
3614 | /* | ||
3615 | * kstrdup - allocate space for and copy an existing string | ||
3616 | * | ||
3617 | * @s: the string to duplicate | ||
3618 | * @gfp: the GFP mask used in the kmalloc() call when allocating memory | ||
3619 | */ | ||
3620 | char *kstrdup(const char *s, gfp_t gfp) | ||
3621 | { | ||
3622 | size_t len; | ||
3623 | char *buf; | ||
3624 | |||
3625 | if (!s) | ||
3626 | return NULL; | ||
3627 | |||
3628 | len = strlen(s) + 1; | ||
3629 | buf = kmalloc(len, gfp); | ||
3630 | if (buf) | ||
3631 | memcpy(buf, s, len); | ||
3632 | return buf; | ||
3633 | } | ||
3634 | EXPORT_SYMBOL(kstrdup); | ||
diff --git a/mm/slob.c b/mm/slob.c new file mode 100644 index 000000000000..1c240c4b71d9 --- /dev/null +++ b/mm/slob.c | |||
@@ -0,0 +1,385 @@ | |||
1 | /* | ||
2 | * SLOB Allocator: Simple List Of Blocks | ||
3 | * | ||
4 | * Matt Mackall <mpm@selenic.com> 12/30/03 | ||
5 | * | ||
6 | * How SLOB works: | ||
7 | * | ||
8 | * The core of SLOB is a traditional K&R style heap allocator, with | ||
9 | * support for returning aligned objects. The granularity of this | ||
10 | * allocator is 8 bytes on x86, though it's perhaps possible to reduce | ||
11 | * this to 4 if it's deemed worth the effort. The slob heap is a | ||
12 | * singly-linked list of pages from __get_free_page, grown on demand | ||
13 | * and allocation from the heap is currently first-fit. | ||
14 | * | ||
15 | * Above this is an implementation of kmalloc/kfree. Blocks returned | ||
16 | * from kmalloc are 8-byte aligned and prepended with a 8-byte header. | ||
17 | * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls | ||
18 | * __get_free_pages directly so that it can return page-aligned blocks | ||
19 | * and keeps a linked list of such pages and their orders. These | ||
20 | * objects are detected in kfree() by their page alignment. | ||
21 | * | ||
22 | * SLAB is emulated on top of SLOB by simply calling constructors and | ||
23 | * destructors for every SLAB allocation. Objects are returned with | ||
24 | * the 8-byte alignment unless the SLAB_MUST_HWCACHE_ALIGN flag is | ||
25 | * set, in which case the low-level allocator will fragment blocks to | ||
26 | * create the proper alignment. Again, objects of page-size or greater | ||
27 | * are allocated by calling __get_free_pages. As SLAB objects know | ||
28 | * their size, no separate size bookkeeping is necessary and there is | ||
29 | * essentially no allocation space overhead. | ||
30 | */ | ||
31 | |||
32 | #include <linux/config.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/mm.h> | ||
35 | #include <linux/cache.h> | ||
36 | #include <linux/init.h> | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/timer.h> | ||
39 | |||
40 | struct slob_block { | ||
41 | int units; | ||
42 | struct slob_block *next; | ||
43 | }; | ||
44 | typedef struct slob_block slob_t; | ||
45 | |||
46 | #define SLOB_UNIT sizeof(slob_t) | ||
47 | #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) | ||
48 | #define SLOB_ALIGN L1_CACHE_BYTES | ||
49 | |||
50 | struct bigblock { | ||
51 | int order; | ||
52 | void *pages; | ||
53 | struct bigblock *next; | ||
54 | }; | ||
55 | typedef struct bigblock bigblock_t; | ||
56 | |||
57 | static slob_t arena = { .next = &arena, .units = 1 }; | ||
58 | static slob_t *slobfree = &arena; | ||
59 | static bigblock_t *bigblocks; | ||
60 | static DEFINE_SPINLOCK(slob_lock); | ||
61 | static DEFINE_SPINLOCK(block_lock); | ||
62 | |||
63 | static void slob_free(void *b, int size); | ||
64 | |||
65 | static void *slob_alloc(size_t size, gfp_t gfp, int align) | ||
66 | { | ||
67 | slob_t *prev, *cur, *aligned = 0; | ||
68 | int delta = 0, units = SLOB_UNITS(size); | ||
69 | unsigned long flags; | ||
70 | |||
71 | spin_lock_irqsave(&slob_lock, flags); | ||
72 | prev = slobfree; | ||
73 | for (cur = prev->next; ; prev = cur, cur = cur->next) { | ||
74 | if (align) { | ||
75 | aligned = (slob_t *)ALIGN((unsigned long)cur, align); | ||
76 | delta = aligned - cur; | ||
77 | } | ||
78 | if (cur->units >= units + delta) { /* room enough? */ | ||
79 | if (delta) { /* need to fragment head to align? */ | ||
80 | aligned->units = cur->units - delta; | ||
81 | aligned->next = cur->next; | ||
82 | cur->next = aligned; | ||
83 | cur->units = delta; | ||
84 | prev = cur; | ||
85 | cur = aligned; | ||
86 | } | ||
87 | |||
88 | if (cur->units == units) /* exact fit? */ | ||
89 | prev->next = cur->next; /* unlink */ | ||
90 | else { /* fragment */ | ||
91 | prev->next = cur + units; | ||
92 | prev->next->units = cur->units - units; | ||
93 | prev->next->next = cur->next; | ||
94 | cur->units = units; | ||
95 | } | ||
96 | |||
97 | slobfree = prev; | ||
98 | spin_unlock_irqrestore(&slob_lock, flags); | ||
99 | return cur; | ||
100 | } | ||
101 | if (cur == slobfree) { | ||
102 | spin_unlock_irqrestore(&slob_lock, flags); | ||
103 | |||
104 | if (size == PAGE_SIZE) /* trying to shrink arena? */ | ||
105 | return 0; | ||
106 | |||
107 | cur = (slob_t *)__get_free_page(gfp); | ||
108 | if (!cur) | ||
109 | return 0; | ||
110 | |||
111 | slob_free(cur, PAGE_SIZE); | ||
112 | spin_lock_irqsave(&slob_lock, flags); | ||
113 | cur = slobfree; | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | |||
118 | static void slob_free(void *block, int size) | ||
119 | { | ||
120 | slob_t *cur, *b = (slob_t *)block; | ||
121 | unsigned long flags; | ||
122 | |||
123 | if (!block) | ||
124 | return; | ||
125 | |||
126 | if (size) | ||
127 | b->units = SLOB_UNITS(size); | ||
128 | |||
129 | /* Find reinsertion point */ | ||
130 | spin_lock_irqsave(&slob_lock, flags); | ||
131 | for (cur = slobfree; !(b > cur && b < cur->next); cur = cur->next) | ||
132 | if (cur >= cur->next && (b > cur || b < cur->next)) | ||
133 | break; | ||
134 | |||
135 | if (b + b->units == cur->next) { | ||
136 | b->units += cur->next->units; | ||
137 | b->next = cur->next->next; | ||
138 | } else | ||
139 | b->next = cur->next; | ||
140 | |||
141 | if (cur + cur->units == b) { | ||
142 | cur->units += b->units; | ||
143 | cur->next = b->next; | ||
144 | } else | ||
145 | cur->next = b; | ||
146 | |||
147 | slobfree = cur; | ||
148 | |||
149 | spin_unlock_irqrestore(&slob_lock, flags); | ||
150 | } | ||
151 | |||
152 | static int FASTCALL(find_order(int size)); | ||
153 | static int fastcall find_order(int size) | ||
154 | { | ||
155 | int order = 0; | ||
156 | for ( ; size > 4096 ; size >>=1) | ||
157 | order++; | ||
158 | return order; | ||
159 | } | ||
160 | |||
161 | void *kmalloc(size_t size, gfp_t gfp) | ||
162 | { | ||
163 | slob_t *m; | ||
164 | bigblock_t *bb; | ||
165 | unsigned long flags; | ||
166 | |||
167 | if (size < PAGE_SIZE - SLOB_UNIT) { | ||
168 | m = slob_alloc(size + SLOB_UNIT, gfp, 0); | ||
169 | return m ? (void *)(m + 1) : 0; | ||
170 | } | ||
171 | |||
172 | bb = slob_alloc(sizeof(bigblock_t), gfp, 0); | ||
173 | if (!bb) | ||
174 | return 0; | ||
175 | |||
176 | bb->order = find_order(size); | ||
177 | bb->pages = (void *)__get_free_pages(gfp, bb->order); | ||
178 | |||
179 | if (bb->pages) { | ||
180 | spin_lock_irqsave(&block_lock, flags); | ||
181 | bb->next = bigblocks; | ||
182 | bigblocks = bb; | ||
183 | spin_unlock_irqrestore(&block_lock, flags); | ||
184 | return bb->pages; | ||
185 | } | ||
186 | |||
187 | slob_free(bb, sizeof(bigblock_t)); | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | EXPORT_SYMBOL(kmalloc); | ||
192 | |||
193 | void kfree(const void *block) | ||
194 | { | ||
195 | bigblock_t *bb, **last = &bigblocks; | ||
196 | unsigned long flags; | ||
197 | |||
198 | if (!block) | ||
199 | return; | ||
200 | |||
201 | if (!((unsigned long)block & (PAGE_SIZE-1))) { | ||
202 | /* might be on the big block list */ | ||
203 | spin_lock_irqsave(&block_lock, flags); | ||
204 | for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) { | ||
205 | if (bb->pages == block) { | ||
206 | *last = bb->next; | ||
207 | spin_unlock_irqrestore(&block_lock, flags); | ||
208 | free_pages((unsigned long)block, bb->order); | ||
209 | slob_free(bb, sizeof(bigblock_t)); | ||
210 | return; | ||
211 | } | ||
212 | } | ||
213 | spin_unlock_irqrestore(&block_lock, flags); | ||
214 | } | ||
215 | |||
216 | slob_free((slob_t *)block - 1, 0); | ||
217 | return; | ||
218 | } | ||
219 | |||
220 | EXPORT_SYMBOL(kfree); | ||
221 | |||
222 | unsigned int ksize(const void *block) | ||
223 | { | ||
224 | bigblock_t *bb; | ||
225 | unsigned long flags; | ||
226 | |||
227 | if (!block) | ||
228 | return 0; | ||
229 | |||
230 | if (!((unsigned long)block & (PAGE_SIZE-1))) { | ||
231 | spin_lock_irqsave(&block_lock, flags); | ||
232 | for (bb = bigblocks; bb; bb = bb->next) | ||
233 | if (bb->pages == block) { | ||
234 | spin_unlock_irqrestore(&slob_lock, flags); | ||
235 | return PAGE_SIZE << bb->order; | ||
236 | } | ||
237 | spin_unlock_irqrestore(&block_lock, flags); | ||
238 | } | ||
239 | |||
240 | return ((slob_t *)block - 1)->units * SLOB_UNIT; | ||
241 | } | ||
242 | |||
243 | struct kmem_cache { | ||
244 | unsigned int size, align; | ||
245 | const char *name; | ||
246 | void (*ctor)(void *, struct kmem_cache *, unsigned long); | ||
247 | void (*dtor)(void *, struct kmem_cache *, unsigned long); | ||
248 | }; | ||
249 | |||
250 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | ||
251 | size_t align, unsigned long flags, | ||
252 | void (*ctor)(void*, struct kmem_cache *, unsigned long), | ||
253 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) | ||
254 | { | ||
255 | struct kmem_cache *c; | ||
256 | |||
257 | c = slob_alloc(sizeof(struct kmem_cache), flags, 0); | ||
258 | |||
259 | if (c) { | ||
260 | c->name = name; | ||
261 | c->size = size; | ||
262 | c->ctor = ctor; | ||
263 | c->dtor = dtor; | ||
264 | /* ignore alignment unless it's forced */ | ||
265 | c->align = (flags & SLAB_MUST_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; | ||
266 | if (c->align < align) | ||
267 | c->align = align; | ||
268 | } | ||
269 | |||
270 | return c; | ||
271 | } | ||
272 | EXPORT_SYMBOL(kmem_cache_create); | ||
273 | |||
274 | int kmem_cache_destroy(struct kmem_cache *c) | ||
275 | { | ||
276 | slob_free(c, sizeof(struct kmem_cache)); | ||
277 | return 0; | ||
278 | } | ||
279 | EXPORT_SYMBOL(kmem_cache_destroy); | ||
280 | |||
281 | void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) | ||
282 | { | ||
283 | void *b; | ||
284 | |||
285 | if (c->size < PAGE_SIZE) | ||
286 | b = slob_alloc(c->size, flags, c->align); | ||
287 | else | ||
288 | b = (void *)__get_free_pages(flags, find_order(c->size)); | ||
289 | |||
290 | if (c->ctor) | ||
291 | c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR); | ||
292 | |||
293 | return b; | ||
294 | } | ||
295 | EXPORT_SYMBOL(kmem_cache_alloc); | ||
296 | |||
297 | void kmem_cache_free(struct kmem_cache *c, void *b) | ||
298 | { | ||
299 | if (c->dtor) | ||
300 | c->dtor(b, c, 0); | ||
301 | |||
302 | if (c->size < PAGE_SIZE) | ||
303 | slob_free(b, c->size); | ||
304 | else | ||
305 | free_pages((unsigned long)b, find_order(c->size)); | ||
306 | } | ||
307 | EXPORT_SYMBOL(kmem_cache_free); | ||
308 | |||
309 | unsigned int kmem_cache_size(struct kmem_cache *c) | ||
310 | { | ||
311 | return c->size; | ||
312 | } | ||
313 | EXPORT_SYMBOL(kmem_cache_size); | ||
314 | |||
315 | const char *kmem_cache_name(struct kmem_cache *c) | ||
316 | { | ||
317 | return c->name; | ||
318 | } | ||
319 | EXPORT_SYMBOL(kmem_cache_name); | ||
320 | |||
321 | static struct timer_list slob_timer = TIMER_INITIALIZER( | ||
322 | (void (*)(unsigned long))kmem_cache_init, 0, 0); | ||
323 | |||
324 | void kmem_cache_init(void) | ||
325 | { | ||
326 | void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1); | ||
327 | |||
328 | if (p) | ||
329 | free_page((unsigned long)p); | ||
330 | |||
331 | mod_timer(&slob_timer, jiffies + HZ); | ||
332 | } | ||
333 | |||
334 | atomic_t slab_reclaim_pages = ATOMIC_INIT(0); | ||
335 | EXPORT_SYMBOL(slab_reclaim_pages); | ||
336 | |||
337 | #ifdef CONFIG_SMP | ||
338 | |||
339 | void *__alloc_percpu(size_t size, size_t align) | ||
340 | { | ||
341 | int i; | ||
342 | struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); | ||
343 | |||
344 | if (!pdata) | ||
345 | return NULL; | ||
346 | |||
347 | for (i = 0; i < NR_CPUS; i++) { | ||
348 | if (!cpu_possible(i)) | ||
349 | continue; | ||
350 | pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); | ||
351 | if (!pdata->ptrs[i]) | ||
352 | goto unwind_oom; | ||
353 | memset(pdata->ptrs[i], 0, size); | ||
354 | } | ||
355 | |||
356 | /* Catch derefs w/o wrappers */ | ||
357 | return (void *) (~(unsigned long) pdata); | ||
358 | |||
359 | unwind_oom: | ||
360 | while (--i >= 0) { | ||
361 | if (!cpu_possible(i)) | ||
362 | continue; | ||
363 | kfree(pdata->ptrs[i]); | ||
364 | } | ||
365 | kfree(pdata); | ||
366 | return NULL; | ||
367 | } | ||
368 | EXPORT_SYMBOL(__alloc_percpu); | ||
369 | |||
370 | void | ||
371 | free_percpu(const void *objp) | ||
372 | { | ||
373 | int i; | ||
374 | struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); | ||
375 | |||
376 | for (i = 0; i < NR_CPUS; i++) { | ||
377 | if (!cpu_possible(i)) | ||
378 | continue; | ||
379 | kfree(p->ptrs[i]); | ||
380 | } | ||
381 | kfree(p); | ||
382 | } | ||
383 | EXPORT_SYMBOL(free_percpu); | ||
384 | |||
385 | #endif | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 72079b538e2d..0a51f36ba3a1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -18,10 +18,10 @@ | |||
18 | */ | 18 | */ |
19 | #ifdef CONFIG_SPARSEMEM_EXTREME | 19 | #ifdef CONFIG_SPARSEMEM_EXTREME |
20 | struct mem_section *mem_section[NR_SECTION_ROOTS] | 20 | struct mem_section *mem_section[NR_SECTION_ROOTS] |
21 | ____cacheline_maxaligned_in_smp; | 21 | ____cacheline_internodealigned_in_smp; |
22 | #else | 22 | #else |
23 | struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] | 23 | struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] |
24 | ____cacheline_maxaligned_in_smp; | 24 | ____cacheline_internodealigned_in_smp; |
25 | #endif | 25 | #endif |
26 | EXPORT_SYMBOL(mem_section); | 26 | EXPORT_SYMBOL(mem_section); |
27 | 27 | ||
diff --git a/mm/swap_state.c b/mm/swap_state.c index fc2aecb70a95..7b09ac503fec 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -141,7 +141,7 @@ void __delete_from_swap_cache(struct page *page) | |||
141 | * Allocate swap space for the page and add the page to the | 141 | * Allocate swap space for the page and add the page to the |
142 | * swap cache. Caller needs to hold the page lock. | 142 | * swap cache. Caller needs to hold the page lock. |
143 | */ | 143 | */ |
144 | int add_to_swap(struct page * page) | 144 | int add_to_swap(struct page * page, gfp_t gfp_mask) |
145 | { | 145 | { |
146 | swp_entry_t entry; | 146 | swp_entry_t entry; |
147 | int err; | 147 | int err; |
@@ -166,7 +166,7 @@ int add_to_swap(struct page * page) | |||
166 | * Add it to the swap cache and mark it dirty | 166 | * Add it to the swap cache and mark it dirty |
167 | */ | 167 | */ |
168 | err = __add_to_swap_cache(page, entry, | 168 | err = __add_to_swap_cache(page, entry, |
169 | GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); | 169 | gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); |
170 | 170 | ||
171 | switch (err) { | 171 | switch (err) { |
172 | case 0: /* Success */ | 172 | case 0: /* Success */ |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 6da4b28b896b..80f948a2028b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1493,7 +1493,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1493 | goto bad_swap; | 1493 | goto bad_swap; |
1494 | if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) | 1494 | if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) |
1495 | goto bad_swap; | 1495 | goto bad_swap; |
1496 | 1496 | ||
1497 | /* OK, set up the swap map and apply the bad block list */ | 1497 | /* OK, set up the swap map and apply the bad block list */ |
1498 | if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) { | 1498 | if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) { |
1499 | error = -ENOMEM; | 1499 | error = -ENOMEM; |
@@ -1502,17 +1502,17 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | |||
1502 | 1502 | ||
1503 | error = 0; | 1503 | error = 0; |
1504 | memset(p->swap_map, 0, maxpages * sizeof(short)); | 1504 | memset(p->swap_map, 0, maxpages * sizeof(short)); |
1505 | for (i=0; i<swap_header->info.nr_badpages; i++) { | 1505 | for (i = 0; i < swap_header->info.nr_badpages; i++) { |
1506 | int page = swap_header->info.badpages[i]; | 1506 | int page_nr = swap_header->info.badpages[i]; |
1507 | if (page <= 0 || page >= swap_header->info.last_page) | 1507 | if (page_nr <= 0 || page_nr >= swap_header->info.last_page) |
1508 | error = -EINVAL; | 1508 | error = -EINVAL; |
1509 | else | 1509 | else |
1510 | p->swap_map[page] = SWAP_MAP_BAD; | 1510 | p->swap_map[page_nr] = SWAP_MAP_BAD; |
1511 | } | 1511 | } |
1512 | nr_good_pages = swap_header->info.last_page - | 1512 | nr_good_pages = swap_header->info.last_page - |
1513 | swap_header->info.nr_badpages - | 1513 | swap_header->info.nr_badpages - |
1514 | 1 /* header page */; | 1514 | 1 /* header page */; |
1515 | if (error) | 1515 | if (error) |
1516 | goto bad_swap; | 1516 | goto bad_swap; |
1517 | } | 1517 | } |
1518 | 1518 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 7dee32745901..b1a463d0fe71 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -249,7 +249,6 @@ unlock: | |||
249 | break; | 249 | break; |
250 | } | 250 | } |
251 | pagevec_release(&pvec); | 251 | pagevec_release(&pvec); |
252 | cond_resched(); | ||
253 | } | 252 | } |
254 | return ret; | 253 | return ret; |
255 | } | 254 | } |
diff --git a/mm/util.c b/mm/util.c new file mode 100644 index 000000000000..5f4bb59da63c --- /dev/null +++ b/mm/util.c | |||
@@ -0,0 +1,39 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/string.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | /** | ||
6 | * kzalloc - allocate memory. The memory is set to zero. | ||
7 | * @size: how many bytes of memory are required. | ||
8 | * @flags: the type of memory to allocate. | ||
9 | */ | ||
10 | void *kzalloc(size_t size, gfp_t flags) | ||
11 | { | ||
12 | void *ret = kmalloc(size, flags); | ||
13 | if (ret) | ||
14 | memset(ret, 0, size); | ||
15 | return ret; | ||
16 | } | ||
17 | EXPORT_SYMBOL(kzalloc); | ||
18 | |||
19 | /* | ||
20 | * kstrdup - allocate space for and copy an existing string | ||
21 | * | ||
22 | * @s: the string to duplicate | ||
23 | * @gfp: the GFP mask used in the kmalloc() call when allocating memory | ||
24 | */ | ||
25 | char *kstrdup(const char *s, gfp_t gfp) | ||
26 | { | ||
27 | size_t len; | ||
28 | char *buf; | ||
29 | |||
30 | if (!s) | ||
31 | return NULL; | ||
32 | |||
33 | len = strlen(s) + 1; | ||
34 | buf = kmalloc(len, gfp); | ||
35 | if (buf) | ||
36 | memcpy(buf, s, len); | ||
37 | return buf; | ||
38 | } | ||
39 | EXPORT_SYMBOL(kstrdup); | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index be8235fb1939..bf903b2d198f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker); | |||
180 | * | 180 | * |
181 | * Returns the number of slab objects which we shrunk. | 181 | * Returns the number of slab objects which we shrunk. |
182 | */ | 182 | */ |
183 | static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | 183 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) |
184 | unsigned long lru_pages) | ||
185 | { | 184 | { |
186 | struct shrinker *shrinker; | 185 | struct shrinker *shrinker; |
187 | int ret = 0; | 186 | int ret = 0; |
@@ -269,9 +268,7 @@ static inline int is_page_cache_freeable(struct page *page) | |||
269 | 268 | ||
270 | static int may_write_to_queue(struct backing_dev_info *bdi) | 269 | static int may_write_to_queue(struct backing_dev_info *bdi) |
271 | { | 270 | { |
272 | if (current_is_kswapd()) | 271 | if (current->flags & PF_SWAPWRITE) |
273 | return 1; | ||
274 | if (current_is_pdflush()) /* This is unlikely, but why not... */ | ||
275 | return 1; | 272 | return 1; |
276 | if (!bdi_write_congested(bdi)) | 273 | if (!bdi_write_congested(bdi)) |
277 | return 1; | 274 | return 1; |
@@ -376,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
376 | return PAGE_CLEAN; | 373 | return PAGE_CLEAN; |
377 | } | 374 | } |
378 | 375 | ||
376 | static int remove_mapping(struct address_space *mapping, struct page *page) | ||
377 | { | ||
378 | if (!mapping) | ||
379 | return 0; /* truncate got there first */ | ||
380 | |||
381 | write_lock_irq(&mapping->tree_lock); | ||
382 | |||
383 | /* | ||
384 | * The non-racy check for busy page. It is critical to check | ||
385 | * PageDirty _after_ making sure that the page is freeable and | ||
386 | * not in use by anybody. (pagecache + us == 2) | ||
387 | */ | ||
388 | if (unlikely(page_count(page) != 2)) | ||
389 | goto cannot_free; | ||
390 | smp_rmb(); | ||
391 | if (unlikely(PageDirty(page))) | ||
392 | goto cannot_free; | ||
393 | |||
394 | if (PageSwapCache(page)) { | ||
395 | swp_entry_t swap = { .val = page_private(page) }; | ||
396 | __delete_from_swap_cache(page); | ||
397 | write_unlock_irq(&mapping->tree_lock); | ||
398 | swap_free(swap); | ||
399 | __put_page(page); /* The pagecache ref */ | ||
400 | return 1; | ||
401 | } | ||
402 | |||
403 | __remove_from_page_cache(page); | ||
404 | write_unlock_irq(&mapping->tree_lock); | ||
405 | __put_page(page); | ||
406 | return 1; | ||
407 | |||
408 | cannot_free: | ||
409 | write_unlock_irq(&mapping->tree_lock); | ||
410 | return 0; | ||
411 | } | ||
412 | |||
379 | /* | 413 | /* |
380 | * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed | 414 | * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed |
381 | */ | 415 | */ |
@@ -424,7 +458,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
424 | * Try to allocate it some swap space here. | 458 | * Try to allocate it some swap space here. |
425 | */ | 459 | */ |
426 | if (PageAnon(page) && !PageSwapCache(page)) { | 460 | if (PageAnon(page) && !PageSwapCache(page)) { |
427 | if (!add_to_swap(page)) | 461 | if (!add_to_swap(page, GFP_ATOMIC)) |
428 | goto activate_locked; | 462 | goto activate_locked; |
429 | } | 463 | } |
430 | #endif /* CONFIG_SWAP */ | 464 | #endif /* CONFIG_SWAP */ |
@@ -507,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
507 | goto free_it; | 541 | goto free_it; |
508 | } | 542 | } |
509 | 543 | ||
510 | if (!mapping) | 544 | if (!remove_mapping(mapping, page)) |
511 | goto keep_locked; /* truncate got there first */ | 545 | goto keep_locked; |
512 | |||
513 | write_lock_irq(&mapping->tree_lock); | ||
514 | |||
515 | /* | ||
516 | * The non-racy check for busy page. It is critical to check | ||
517 | * PageDirty _after_ making sure that the page is freeable and | ||
518 | * not in use by anybody. (pagecache + us == 2) | ||
519 | */ | ||
520 | if (unlikely(page_count(page) != 2)) | ||
521 | goto cannot_free; | ||
522 | smp_rmb(); | ||
523 | if (unlikely(PageDirty(page))) | ||
524 | goto cannot_free; | ||
525 | |||
526 | #ifdef CONFIG_SWAP | ||
527 | if (PageSwapCache(page)) { | ||
528 | swp_entry_t swap = { .val = page_private(page) }; | ||
529 | __delete_from_swap_cache(page); | ||
530 | write_unlock_irq(&mapping->tree_lock); | ||
531 | swap_free(swap); | ||
532 | __put_page(page); /* The pagecache ref */ | ||
533 | goto free_it; | ||
534 | } | ||
535 | #endif /* CONFIG_SWAP */ | ||
536 | |||
537 | __remove_from_page_cache(page); | ||
538 | write_unlock_irq(&mapping->tree_lock); | ||
539 | __put_page(page); | ||
540 | 546 | ||
541 | free_it: | 547 | free_it: |
542 | unlock_page(page); | 548 | unlock_page(page); |
@@ -545,10 +551,6 @@ free_it: | |||
545 | __pagevec_release_nonlru(&freed_pvec); | 551 | __pagevec_release_nonlru(&freed_pvec); |
546 | continue; | 552 | continue; |
547 | 553 | ||
548 | cannot_free: | ||
549 | write_unlock_irq(&mapping->tree_lock); | ||
550 | goto keep_locked; | ||
551 | |||
552 | activate_locked: | 554 | activate_locked: |
553 | SetPageActive(page); | 555 | SetPageActive(page); |
554 | pgactivate++; | 556 | pgactivate++; |
@@ -566,6 +568,241 @@ keep: | |||
566 | return reclaimed; | 568 | return reclaimed; |
567 | } | 569 | } |
568 | 570 | ||
571 | #ifdef CONFIG_MIGRATION | ||
572 | static inline void move_to_lru(struct page *page) | ||
573 | { | ||
574 | list_del(&page->lru); | ||
575 | if (PageActive(page)) { | ||
576 | /* | ||
577 | * lru_cache_add_active checks that | ||
578 | * the PG_active bit is off. | ||
579 | */ | ||
580 | ClearPageActive(page); | ||
581 | lru_cache_add_active(page); | ||
582 | } else { | ||
583 | lru_cache_add(page); | ||
584 | } | ||
585 | put_page(page); | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Add isolated pages on the list back to the LRU | ||
590 | * | ||
591 | * returns the number of pages put back. | ||
592 | */ | ||
593 | int putback_lru_pages(struct list_head *l) | ||
594 | { | ||
595 | struct page *page; | ||
596 | struct page *page2; | ||
597 | int count = 0; | ||
598 | |||
599 | list_for_each_entry_safe(page, page2, l, lru) { | ||
600 | move_to_lru(page); | ||
601 | count++; | ||
602 | } | ||
603 | return count; | ||
604 | } | ||
605 | |||
606 | /* | ||
607 | * swapout a single page | ||
608 | * page is locked upon entry, unlocked on exit | ||
609 | */ | ||
610 | static int swap_page(struct page *page) | ||
611 | { | ||
612 | struct address_space *mapping = page_mapping(page); | ||
613 | |||
614 | if (page_mapped(page) && mapping) | ||
615 | if (try_to_unmap(page) != SWAP_SUCCESS) | ||
616 | goto unlock_retry; | ||
617 | |||
618 | if (PageDirty(page)) { | ||
619 | /* Page is dirty, try to write it out here */ | ||
620 | switch(pageout(page, mapping)) { | ||
621 | case PAGE_KEEP: | ||
622 | case PAGE_ACTIVATE: | ||
623 | goto unlock_retry; | ||
624 | |||
625 | case PAGE_SUCCESS: | ||
626 | goto retry; | ||
627 | |||
628 | case PAGE_CLEAN: | ||
629 | ; /* try to free the page below */ | ||
630 | } | ||
631 | } | ||
632 | |||
633 | if (PagePrivate(page)) { | ||
634 | if (!try_to_release_page(page, GFP_KERNEL) || | ||
635 | (!mapping && page_count(page) == 1)) | ||
636 | goto unlock_retry; | ||
637 | } | ||
638 | |||
639 | if (remove_mapping(mapping, page)) { | ||
640 | /* Success */ | ||
641 | unlock_page(page); | ||
642 | return 0; | ||
643 | } | ||
644 | |||
645 | unlock_retry: | ||
646 | unlock_page(page); | ||
647 | |||
648 | retry: | ||
649 | return -EAGAIN; | ||
650 | } | ||
651 | /* | ||
652 | * migrate_pages | ||
653 | * | ||
654 | * Two lists are passed to this function. The first list | ||
655 | * contains the pages isolated from the LRU to be migrated. | ||
656 | * The second list contains new pages that the pages isolated | ||
657 | * can be moved to. If the second list is NULL then all | ||
658 | * pages are swapped out. | ||
659 | * | ||
660 | * The function returns after 10 attempts or if no pages | ||
661 | * are movable anymore because t has become empty | ||
662 | * or no retryable pages exist anymore. | ||
663 | * | ||
664 | * SIMPLIFIED VERSION: This implementation of migrate_pages | ||
665 | * is only swapping out pages and never touches the second | ||
666 | * list. The direct migration patchset | ||
667 | * extends this function to avoid the use of swap. | ||
668 | * | ||
669 | * Return: Number of pages not migrated when "to" ran empty. | ||
670 | */ | ||
671 | int migrate_pages(struct list_head *from, struct list_head *to, | ||
672 | struct list_head *moved, struct list_head *failed) | ||
673 | { | ||
674 | int retry; | ||
675 | int nr_failed = 0; | ||
676 | int pass = 0; | ||
677 | struct page *page; | ||
678 | struct page *page2; | ||
679 | int swapwrite = current->flags & PF_SWAPWRITE; | ||
680 | int rc; | ||
681 | |||
682 | if (!swapwrite) | ||
683 | current->flags |= PF_SWAPWRITE; | ||
684 | |||
685 | redo: | ||
686 | retry = 0; | ||
687 | |||
688 | list_for_each_entry_safe(page, page2, from, lru) { | ||
689 | cond_resched(); | ||
690 | |||
691 | rc = 0; | ||
692 | if (page_count(page) == 1) | ||
693 | /* page was freed from under us. So we are done. */ | ||
694 | goto next; | ||
695 | |||
696 | /* | ||
697 | * Skip locked pages during the first two passes to give the | ||
698 | * functions holding the lock time to release the page. Later we | ||
699 | * use lock_page() to have a higher chance of acquiring the | ||
700 | * lock. | ||
701 | */ | ||
702 | rc = -EAGAIN; | ||
703 | if (pass > 2) | ||
704 | lock_page(page); | ||
705 | else | ||
706 | if (TestSetPageLocked(page)) | ||
707 | goto next; | ||
708 | |||
709 | /* | ||
710 | * Only wait on writeback if we have already done a pass where | ||
711 | * we we may have triggered writeouts for lots of pages. | ||
712 | */ | ||
713 | if (pass > 0) { | ||
714 | wait_on_page_writeback(page); | ||
715 | } else { | ||
716 | if (PageWriteback(page)) | ||
717 | goto unlock_page; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * Anonymous pages must have swap cache references otherwise | ||
722 | * the information contained in the page maps cannot be | ||
723 | * preserved. | ||
724 | */ | ||
725 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
726 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
727 | rc = -ENOMEM; | ||
728 | goto unlock_page; | ||
729 | } | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * Page is properly locked and writeback is complete. | ||
734 | * Try to migrate the page. | ||
735 | */ | ||
736 | rc = swap_page(page); | ||
737 | goto next; | ||
738 | |||
739 | unlock_page: | ||
740 | unlock_page(page); | ||
741 | |||
742 | next: | ||
743 | if (rc == -EAGAIN) { | ||
744 | retry++; | ||
745 | } else if (rc) { | ||
746 | /* Permanent failure */ | ||
747 | list_move(&page->lru, failed); | ||
748 | nr_failed++; | ||
749 | } else { | ||
750 | /* Success */ | ||
751 | list_move(&page->lru, moved); | ||
752 | } | ||
753 | } | ||
754 | if (retry && pass++ < 10) | ||
755 | goto redo; | ||
756 | |||
757 | if (!swapwrite) | ||
758 | current->flags &= ~PF_SWAPWRITE; | ||
759 | |||
760 | return nr_failed + retry; | ||
761 | } | ||
762 | |||
763 | static void lru_add_drain_per_cpu(void *dummy) | ||
764 | { | ||
765 | lru_add_drain(); | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * Isolate one page from the LRU lists and put it on the | ||
770 | * indicated list. Do necessary cache draining if the | ||
771 | * page is not on the LRU lists yet. | ||
772 | * | ||
773 | * Result: | ||
774 | * 0 = page not on LRU list | ||
775 | * 1 = page removed from LRU list and added to the specified list. | ||
776 | * -ENOENT = page is being freed elsewhere. | ||
777 | */ | ||
778 | int isolate_lru_page(struct page *page) | ||
779 | { | ||
780 | int rc = 0; | ||
781 | struct zone *zone = page_zone(page); | ||
782 | |||
783 | redo: | ||
784 | spin_lock_irq(&zone->lru_lock); | ||
785 | rc = __isolate_lru_page(page); | ||
786 | if (rc == 1) { | ||
787 | if (PageActive(page)) | ||
788 | del_page_from_active_list(zone, page); | ||
789 | else | ||
790 | del_page_from_inactive_list(zone, page); | ||
791 | } | ||
792 | spin_unlock_irq(&zone->lru_lock); | ||
793 | if (rc == 0) { | ||
794 | /* | ||
795 | * Maybe this page is still waiting for a cpu to drain it | ||
796 | * from one of the lru lists? | ||
797 | */ | ||
798 | rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); | ||
799 | if (rc == 0 && PageLRU(page)) | ||
800 | goto redo; | ||
801 | } | ||
802 | return rc; | ||
803 | } | ||
804 | #endif | ||
805 | |||
569 | /* | 806 | /* |
570 | * zone->lru_lock is heavily contended. Some of the functions that | 807 | * zone->lru_lock is heavily contended. Some of the functions that |
571 | * shrink the lists perform better by taking out a batch of pages | 808 | * shrink the lists perform better by taking out a batch of pages |
@@ -594,20 +831,18 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, | |||
594 | page = lru_to_page(src); | 831 | page = lru_to_page(src); |
595 | prefetchw_prev_lru_page(page, src, flags); | 832 | prefetchw_prev_lru_page(page, src, flags); |
596 | 833 | ||
597 | if (!TestClearPageLRU(page)) | 834 | switch (__isolate_lru_page(page)) { |
598 | BUG(); | 835 | case 1: |
599 | list_del(&page->lru); | 836 | /* Succeeded to isolate page */ |
600 | if (get_page_testone(page)) { | 837 | list_move(&page->lru, dst); |
601 | /* | ||
602 | * It is being freed elsewhere | ||
603 | */ | ||
604 | __put_page(page); | ||
605 | SetPageLRU(page); | ||
606 | list_add(&page->lru, src); | ||
607 | continue; | ||
608 | } else { | ||
609 | list_add(&page->lru, dst); | ||
610 | nr_taken++; | 838 | nr_taken++; |
839 | break; | ||
840 | case -ENOENT: | ||
841 | /* Not possible to isolate */ | ||
842 | list_move(&page->lru, src); | ||
843 | break; | ||
844 | default: | ||
845 | BUG(); | ||
611 | } | 846 | } |
612 | } | 847 | } |
613 | 848 | ||
@@ -1226,7 +1461,7 @@ static int kswapd(void *p) | |||
1226 | * us from recursively trying to free more memory as we're | 1461 | * us from recursively trying to free more memory as we're |
1227 | * trying to free the first piece of memory in the first place). | 1462 | * trying to free the first piece of memory in the first place). |
1228 | */ | 1463 | */ |
1229 | tsk->flags |= PF_MEMALLOC|PF_KSWAPD; | 1464 | tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; |
1230 | 1465 | ||
1231 | order = 0; | 1466 | order = 0; |
1232 | for ( ; ; ) { | 1467 | for ( ; ; ) { |