diff options
| -rw-r--r-- | include/linux/mm_types.h | 5 | ||||
| -rw-r--r-- | include/linux/slub_def.h | 4 | ||||
| -rw-r--r-- | mm/slub.c | 204 |
3 files changed, 92 insertions, 121 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bfee0bd1d43..34023c65d46 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -64,10 +64,7 @@ struct page { | |||
| 64 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 64 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS |
| 65 | spinlock_t ptl; | 65 | spinlock_t ptl; |
| 66 | #endif | 66 | #endif |
| 67 | struct { | 67 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ |
| 68 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ | ||
| 69 | void *end; /* SLUB: end marker */ | ||
| 70 | }; | ||
| 71 | struct page *first_page; /* Compound tail pages */ | 68 | struct page *first_page; /* Compound tail pages */ |
| 72 | }; | 69 | }; |
| 73 | union { | 70 | union { |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 57deecc79d5..b00c1c73eb0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
| @@ -61,7 +61,7 @@ struct kmem_cache { | |||
| 61 | int size; /* The size of an object including meta data */ | 61 | int size; /* The size of an object including meta data */ |
| 62 | int objsize; /* The size of an object without meta data */ | 62 | int objsize; /* The size of an object without meta data */ |
| 63 | int offset; /* Free pointer offset. */ | 63 | int offset; /* Free pointer offset. */ |
| 64 | int order; | 64 | int order; /* Current preferred allocation order */ |
| 65 | 65 | ||
| 66 | /* | 66 | /* |
| 67 | * Avoid an extra cache line for UP, SMP and for the node local to | 67 | * Avoid an extra cache line for UP, SMP and for the node local to |
| @@ -138,11 +138,11 @@ static __always_inline int kmalloc_index(size_t size) | |||
| 138 | if (size <= 512) return 9; | 138 | if (size <= 512) return 9; |
| 139 | if (size <= 1024) return 10; | 139 | if (size <= 1024) return 10; |
| 140 | if (size <= 2 * 1024) return 11; | 140 | if (size <= 2 * 1024) return 11; |
| 141 | if (size <= 4 * 1024) return 12; | ||
| 141 | /* | 142 | /* |
| 142 | * The following is only needed to support architectures with a larger page | 143 | * The following is only needed to support architectures with a larger page |
| 143 | * size than 4k. | 144 | * size than 4k. |
| 144 | */ | 145 | */ |
| 145 | if (size <= 4 * 1024) return 12; | ||
| 146 | if (size <= 8 * 1024) return 13; | 146 | if (size <= 8 * 1024) return 13; |
| 147 | if (size <= 16 * 1024) return 14; | 147 | if (size <= 16 * 1024) return 14; |
| 148 | if (size <= 32 * 1024) return 15; | 148 | if (size <= 32 * 1024) return 15; |
| @@ -291,32 +291,16 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | |||
| 291 | #endif | 291 | #endif |
| 292 | } | 292 | } |
| 293 | 293 | ||
| 294 | /* | 294 | /* Verify that a pointer has an address that is valid within a slab page */ |
| 295 | * The end pointer in a slab is special. It points to the first object in the | ||
| 296 | * slab but has bit 0 set to mark it. | ||
| 297 | * | ||
| 298 | * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 | ||
| 299 | * in the mapping set. | ||
| 300 | */ | ||
| 301 | static inline int is_end(void *addr) | ||
| 302 | { | ||
| 303 | return (unsigned long)addr & PAGE_MAPPING_ANON; | ||
| 304 | } | ||
| 305 | |||
| 306 | static void *slab_address(struct page *page) | ||
| 307 | { | ||
| 308 | return page->end - PAGE_MAPPING_ANON; | ||
| 309 | } | ||
| 310 | |||
| 311 | static inline int check_valid_pointer(struct kmem_cache *s, | 295 | static inline int check_valid_pointer(struct kmem_cache *s, |
| 312 | struct page *page, const void *object) | 296 | struct page *page, const void *object) |
| 313 | { | 297 | { |
| 314 | void *base; | 298 | void *base; |
| 315 | 299 | ||
| 316 | if (object == page->end) | 300 | if (!object) |
| 317 | return 1; | 301 | return 1; |
| 318 | 302 | ||
| 319 | base = slab_address(page); | 303 | base = page_address(page); |
| 320 | if (object < base || object >= base + s->objects * s->size || | 304 | if (object < base || object >= base + s->objects * s->size || |
| 321 | (object - base) % s->size) { | 305 | (object - base) % s->size) { |
| 322 | return 0; | 306 | return 0; |
| @@ -349,8 +333,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
| 349 | 333 | ||
| 350 | /* Scan freelist */ | 334 | /* Scan freelist */ |
| 351 | #define for_each_free_object(__p, __s, __free) \ | 335 | #define for_each_free_object(__p, __s, __free) \ |
| 352 | for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ | 336 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) |
| 353 | __p)) | ||
| 354 | 337 | ||
| 355 | /* Determine object index from a given position */ | 338 | /* Determine object index from a given position */ |
| 356 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 339 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
| @@ -502,7 +485,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) | |||
| 502 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | 485 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) |
| 503 | { | 486 | { |
| 504 | unsigned int off; /* Offset of last byte */ | 487 | unsigned int off; /* Offset of last byte */ |
| 505 | u8 *addr = slab_address(page); | 488 | u8 *addr = page_address(page); |
| 506 | 489 | ||
| 507 | print_tracking(s, p); | 490 | print_tracking(s, p); |
| 508 | 491 | ||
| @@ -637,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
| 637 | * A. Free pointer (if we cannot overwrite object on free) | 620 | * A. Free pointer (if we cannot overwrite object on free) |
| 638 | * B. Tracking data for SLAB_STORE_USER | 621 | * B. Tracking data for SLAB_STORE_USER |
| 639 | * C. Padding to reach required alignment boundary or at mininum | 622 | * C. Padding to reach required alignment boundary or at mininum |
| 640 | * one word if debuggin is on to be able to detect writes | 623 | * one word if debugging is on to be able to detect writes |
| 641 | * before the word boundary. | 624 | * before the word boundary. |
| 642 | * | 625 | * |
| 643 | * Padding is done using 0x5a (POISON_INUSE) | 626 | * Padding is done using 0x5a (POISON_INUSE) |
| @@ -680,7 +663,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
| 680 | if (!(s->flags & SLAB_POISON)) | 663 | if (!(s->flags & SLAB_POISON)) |
| 681 | return 1; | 664 | return 1; |
| 682 | 665 | ||
| 683 | start = slab_address(page); | 666 | start = page_address(page); |
| 684 | end = start + (PAGE_SIZE << s->order); | 667 | end = start + (PAGE_SIZE << s->order); |
| 685 | length = s->objects * s->size; | 668 | length = s->objects * s->size; |
| 686 | remainder = end - (start + length); | 669 | remainder = end - (start + length); |
| @@ -748,7 +731,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
| 748 | * of the free objects in this slab. May cause | 731 | * of the free objects in this slab. May cause |
| 749 | * another error because the object count is now wrong. | 732 | * another error because the object count is now wrong. |
| 750 | */ | 733 | */ |
| 751 | set_freepointer(s, p, page->end); | 734 | set_freepointer(s, p, NULL); |
| 752 | return 0; | 735 | return 0; |
| 753 | } | 736 | } |
| 754 | return 1; | 737 | return 1; |
| @@ -782,18 +765,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
| 782 | void *fp = page->freelist; | 765 | void *fp = page->freelist; |
| 783 | void *object = NULL; | 766 | void *object = NULL; |
| 784 | 767 | ||
| 785 | while (fp != page->end && nr <= s->objects) { | 768 | while (fp && nr <= s->objects) { |
| 786 | if (fp == search) | 769 | if (fp == search) |
| 787 | return 1; | 770 | return 1; |
| 788 | if (!check_valid_pointer(s, page, fp)) { | 771 | if (!check_valid_pointer(s, page, fp)) { |
| 789 | if (object) { | 772 | if (object) { |
| 790 | object_err(s, page, object, | 773 | object_err(s, page, object, |
| 791 | "Freechain corrupt"); | 774 | "Freechain corrupt"); |
| 792 | set_freepointer(s, object, page->end); | 775 | set_freepointer(s, object, NULL); |
| 793 | break; | 776 | break; |
| 794 | } else { | 777 | } else { |
| 795 | slab_err(s, page, "Freepointer corrupt"); | 778 | slab_err(s, page, "Freepointer corrupt"); |
| 796 | page->freelist = page->end; | 779 | page->freelist = NULL; |
| 797 | page->inuse = s->objects; | 780 | page->inuse = s->objects; |
| 798 | slab_fix(s, "Freelist cleared"); | 781 | slab_fix(s, "Freelist cleared"); |
| 799 | return 0; | 782 | return 0; |
| @@ -870,7 +853,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
| 870 | if (!check_slab(s, page)) | 853 | if (!check_slab(s, page)) |
| 871 | goto bad; | 854 | goto bad; |
| 872 | 855 | ||
| 873 | if (object && !on_freelist(s, page, object)) { | 856 | if (!on_freelist(s, page, object)) { |
| 874 | object_err(s, page, object, "Object already allocated"); | 857 | object_err(s, page, object, "Object already allocated"); |
| 875 | goto bad; | 858 | goto bad; |
| 876 | } | 859 | } |
| @@ -880,7 +863,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
| 880 | goto bad; | 863 | goto bad; |
| 881 | } | 864 | } |
| 882 | 865 | ||
| 883 | if (object && !check_object(s, page, object, 0)) | 866 | if (!check_object(s, page, object, 0)) |
| 884 | goto bad; | 867 | goto bad; |
| 885 | 868 | ||
| 886 | /* Success perform special debug activities for allocs */ | 869 | /* Success perform special debug activities for allocs */ |
| @@ -899,7 +882,7 @@ bad: | |||
| 899 | */ | 882 | */ |
| 900 | slab_fix(s, "Marking all objects used"); | 883 | slab_fix(s, "Marking all objects used"); |
| 901 | page->inuse = s->objects; | 884 | page->inuse = s->objects; |
| 902 | page->freelist = page->end; | 885 | page->freelist = NULL; |
| 903 | } | 886 | } |
| 904 | return 0; | 887 | return 0; |
| 905 | } | 888 | } |
| @@ -939,7 +922,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
| 939 | } | 922 | } |
| 940 | 923 | ||
| 941 | /* Special debug activities for freeing objects */ | 924 | /* Special debug activities for freeing objects */ |
| 942 | if (!SlabFrozen(page) && page->freelist == page->end) | 925 | if (!SlabFrozen(page) && !page->freelist) |
| 943 | remove_full(s, page); | 926 | remove_full(s, page); |
| 944 | if (s->flags & SLAB_STORE_USER) | 927 | if (s->flags & SLAB_STORE_USER) |
| 945 | set_track(s, object, TRACK_FREE, addr); | 928 | set_track(s, object, TRACK_FREE, addr); |
| @@ -1015,30 +998,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize, | |||
| 1015 | void (*ctor)(struct kmem_cache *, void *)) | 998 | void (*ctor)(struct kmem_cache *, void *)) |
| 1016 | { | 999 | { |
| 1017 | /* | 1000 | /* |
| 1018 | * The page->offset field is only 16 bit wide. This is an offset | 1001 | * Enable debugging if selected on the kernel commandline. |
| 1019 | * in units of words from the beginning of an object. If the slab | ||
| 1020 | * size is bigger then we cannot move the free pointer behind the | ||
| 1021 | * object anymore. | ||
| 1022 | * | ||
| 1023 | * On 32 bit platforms the limit is 256k. On 64bit platforms | ||
| 1024 | * the limit is 512k. | ||
| 1025 | * | ||
| 1026 | * Debugging or ctor may create a need to move the free | ||
| 1027 | * pointer. Fail if this happens. | ||
| 1028 | */ | 1002 | */ |
| 1029 | if (objsize >= 65535 * sizeof(void *)) { | 1003 | if (slub_debug && (!slub_debug_slabs || |
| 1030 | BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | | 1004 | strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) |
| 1031 | SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); | 1005 | flags |= slub_debug; |
| 1032 | BUG_ON(ctor); | ||
| 1033 | } else { | ||
| 1034 | /* | ||
| 1035 | * Enable debugging if selected on the kernel commandline. | ||
| 1036 | */ | ||
| 1037 | if (slub_debug && (!slub_debug_slabs || | ||
| 1038 | strncmp(slub_debug_slabs, name, | ||
| 1039 | strlen(slub_debug_slabs)) == 0)) | ||
| 1040 | flags |= slub_debug; | ||
| 1041 | } | ||
| 1042 | 1006 | ||
| 1043 | return flags; | 1007 | return flags; |
| 1044 | } | 1008 | } |
| @@ -1124,7 +1088,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1124 | SetSlabDebug(page); | 1088 | SetSlabDebug(page); |
| 1125 | 1089 | ||
| 1126 | start = page_address(page); | 1090 | start = page_address(page); |
| 1127 | page->end = start + 1; | ||
| 1128 | 1091 | ||
| 1129 | if (unlikely(s->flags & SLAB_POISON)) | 1092 | if (unlikely(s->flags & SLAB_POISON)) |
| 1130 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1093 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); |
| @@ -1136,7 +1099,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1136 | last = p; | 1099 | last = p; |
| 1137 | } | 1100 | } |
| 1138 | setup_object(s, page, last); | 1101 | setup_object(s, page, last); |
| 1139 | set_freepointer(s, last, page->end); | 1102 | set_freepointer(s, last, NULL); |
| 1140 | 1103 | ||
| 1141 | page->freelist = start; | 1104 | page->freelist = start; |
| 1142 | page->inuse = 0; | 1105 | page->inuse = 0; |
| @@ -1152,7 +1115,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
| 1152 | void *p; | 1115 | void *p; |
| 1153 | 1116 | ||
| 1154 | slab_pad_check(s, page); | 1117 | slab_pad_check(s, page); |
| 1155 | for_each_object(p, s, slab_address(page)) | 1118 | for_each_object(p, s, page_address(page)) |
| 1156 | check_object(s, page, p, 0); | 1119 | check_object(s, page, p, 0); |
| 1157 | ClearSlabDebug(page); | 1120 | ClearSlabDebug(page); |
| 1158 | } | 1121 | } |
| @@ -1162,7 +1125,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
| 1162 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1125 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
| 1163 | -pages); | 1126 | -pages); |
| 1164 | 1127 | ||
| 1165 | page->mapping = NULL; | ||
| 1166 | __free_pages(page, s->order); | 1128 | __free_pages(page, s->order); |
| 1167 | } | 1129 | } |
| 1168 | 1130 | ||
| @@ -1307,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
| 1307 | * may return off node objects because partial slabs are obtained | 1269 | * may return off node objects because partial slabs are obtained |
| 1308 | * from other nodes and filled up. | 1270 | * from other nodes and filled up. |
| 1309 | * | 1271 | * |
| 1310 | * If /sys/slab/xx/defrag_ratio is set to 100 (which makes | 1272 | * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes |
| 1311 | * defrag_ratio = 1000) then every (well almost) allocation will | 1273 | * defrag_ratio = 1000) then every (well almost) allocation will |
| 1312 | * first attempt to defrag slab caches on other nodes. This means | 1274 | * first attempt to defrag slab caches on other nodes. This means |
| 1313 | * scanning over all nodes to look for partial slabs which may be | 1275 | * scanning over all nodes to look for partial slabs which may be |
| @@ -1366,7 +1328,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
| 1366 | ClearSlabFrozen(page); | 1328 | ClearSlabFrozen(page); |
| 1367 | if (page->inuse) { | 1329 | if (page->inuse) { |
| 1368 | 1330 | ||
| 1369 | if (page->freelist != page->end) { | 1331 | if (page->freelist) { |
| 1370 | add_partial(n, page, tail); | 1332 | add_partial(n, page, tail); |
| 1371 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1333 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
| 1372 | } else { | 1334 | } else { |
| @@ -1382,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
| 1382 | * Adding an empty slab to the partial slabs in order | 1344 | * Adding an empty slab to the partial slabs in order |
| 1383 | * to avoid page allocator overhead. This slab needs | 1345 | * to avoid page allocator overhead. This slab needs |
| 1384 | * to come after the other slabs with objects in | 1346 | * to come after the other slabs with objects in |
| 1385 | * order to fill them up. That way the size of the | 1347 | * so that the others get filled first. That way the |
| 1386 | * partial list stays small. kmem_cache_shrink can | 1348 | * size of the partial list stays small. |
| 1387 | * reclaim empty slabs from the partial list. | 1349 | * |
| 1350 | * kmem_cache_shrink can reclaim any empty slabs from the | ||
| 1351 | * partial list. | ||
| 1388 | */ | 1352 | */ |
| 1389 | add_partial(n, page, 1); | 1353 | add_partial(n, page, 1); |
| 1390 | slab_unlock(page); | 1354 | slab_unlock(page); |
| @@ -1407,15 +1371,11 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
| 1407 | if (c->freelist) | 1371 | if (c->freelist) |
| 1408 | stat(c, DEACTIVATE_REMOTE_FREES); | 1372 | stat(c, DEACTIVATE_REMOTE_FREES); |
| 1409 | /* | 1373 | /* |
| 1410 | * Merge cpu freelist into freelist. Typically we get here | 1374 | * Merge cpu freelist into slab freelist. Typically we get here |
| 1411 | * because both freelists are empty. So this is unlikely | 1375 | * because both freelists are empty. So this is unlikely |
| 1412 | * to occur. | 1376 | * to occur. |
| 1413 | * | ||
| 1414 | * We need to use _is_end here because deactivate slab may | ||
| 1415 | * be called for a debug slab. Then c->freelist may contain | ||
| 1416 | * a dummy pointer. | ||
| 1417 | */ | 1377 | */ |
| 1418 | while (unlikely(!is_end(c->freelist))) { | 1378 | while (unlikely(c->freelist)) { |
| 1419 | void **object; | 1379 | void **object; |
| 1420 | 1380 | ||
| 1421 | tail = 0; /* Hot objects. Put the slab first */ | 1381 | tail = 0; /* Hot objects. Put the slab first */ |
| @@ -1442,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
| 1442 | 1402 | ||
| 1443 | /* | 1403 | /* |
| 1444 | * Flush cpu slab. | 1404 | * Flush cpu slab. |
| 1405 | * | ||
| 1445 | * Called from IPI handler with interrupts disabled. | 1406 | * Called from IPI handler with interrupts disabled. |
| 1446 | */ | 1407 | */ |
| 1447 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1408 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
| @@ -1500,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) | |||
| 1500 | * rest of the freelist to the lockless freelist. | 1461 | * rest of the freelist to the lockless freelist. |
| 1501 | * | 1462 | * |
| 1502 | * And if we were unable to get a new slab from the partial slab lists then | 1463 | * And if we were unable to get a new slab from the partial slab lists then |
| 1503 | * we need to allocate a new slab. This is slowest path since we may sleep. | 1464 | * we need to allocate a new slab. This is the slowest path since it involves |
| 1465 | * a call to the page allocator and the setup of a new slab. | ||
| 1504 | */ | 1466 | */ |
| 1505 | static void *__slab_alloc(struct kmem_cache *s, | 1467 | static void *__slab_alloc(struct kmem_cache *s, |
| 1506 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) | 1468 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) |
| @@ -1514,18 +1476,19 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
| 1514 | slab_lock(c->page); | 1476 | slab_lock(c->page); |
| 1515 | if (unlikely(!node_match(c, node))) | 1477 | if (unlikely(!node_match(c, node))) |
| 1516 | goto another_slab; | 1478 | goto another_slab; |
| 1479 | |||
| 1517 | stat(c, ALLOC_REFILL); | 1480 | stat(c, ALLOC_REFILL); |
| 1481 | |||
| 1518 | load_freelist: | 1482 | load_freelist: |
| 1519 | object = c->page->freelist; | 1483 | object = c->page->freelist; |
| 1520 | if (unlikely(object == c->page->end)) | 1484 | if (unlikely(!object)) |
| 1521 | goto another_slab; | 1485 | goto another_slab; |
| 1522 | if (unlikely(SlabDebug(c->page))) | 1486 | if (unlikely(SlabDebug(c->page))) |
| 1523 | goto debug; | 1487 | goto debug; |
| 1524 | 1488 | ||
| 1525 | object = c->page->freelist; | ||
| 1526 | c->freelist = object[c->offset]; | 1489 | c->freelist = object[c->offset]; |
| 1527 | c->page->inuse = s->objects; | 1490 | c->page->inuse = s->objects; |
| 1528 | c->page->freelist = c->page->end; | 1491 | c->page->freelist = NULL; |
| 1529 | c->node = page_to_nid(c->page); | 1492 | c->node = page_to_nid(c->page); |
| 1530 | unlock_out: | 1493 | unlock_out: |
| 1531 | slab_unlock(c->page); | 1494 | slab_unlock(c->page); |
| @@ -1578,7 +1541,6 @@ new_slab: | |||
| 1578 | 1541 | ||
| 1579 | return NULL; | 1542 | return NULL; |
| 1580 | debug: | 1543 | debug: |
| 1581 | object = c->page->freelist; | ||
| 1582 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1544 | if (!alloc_debug_processing(s, c->page, object, addr)) |
| 1583 | goto another_slab; | 1545 | goto another_slab; |
| 1584 | 1546 | ||
| @@ -1607,7 +1569,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
| 1607 | 1569 | ||
| 1608 | local_irq_save(flags); | 1570 | local_irq_save(flags); |
| 1609 | c = get_cpu_slab(s, smp_processor_id()); | 1571 | c = get_cpu_slab(s, smp_processor_id()); |
| 1610 | if (unlikely(is_end(c->freelist) || !node_match(c, node))) | 1572 | if (unlikely(!c->freelist || !node_match(c, node))) |
| 1611 | 1573 | ||
| 1612 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1574 | object = __slab_alloc(s, gfpflags, node, addr, c); |
| 1613 | 1575 | ||
| @@ -1659,6 +1621,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
| 1659 | 1621 | ||
| 1660 | if (unlikely(SlabDebug(page))) | 1622 | if (unlikely(SlabDebug(page))) |
| 1661 | goto debug; | 1623 | goto debug; |
| 1624 | |||
| 1662 | checks_ok: | 1625 | checks_ok: |
| 1663 | prior = object[offset] = page->freelist; | 1626 | prior = object[offset] = page->freelist; |
| 1664 | page->freelist = object; | 1627 | page->freelist = object; |
| @@ -1673,11 +1636,10 @@ checks_ok: | |||
| 1673 | goto slab_empty; | 1636 | goto slab_empty; |
| 1674 | 1637 | ||
| 1675 | /* | 1638 | /* |
| 1676 | * Objects left in the slab. If it | 1639 | * Objects left in the slab. If it was not on the partial list before |
| 1677 | * was not on the partial list before | ||
| 1678 | * then add it. | 1640 | * then add it. |
| 1679 | */ | 1641 | */ |
| 1680 | if (unlikely(prior == page->end)) { | 1642 | if (unlikely(!prior)) { |
| 1681 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1643 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
| 1682 | stat(c, FREE_ADD_PARTIAL); | 1644 | stat(c, FREE_ADD_PARTIAL); |
| 1683 | } | 1645 | } |
| @@ -1687,7 +1649,7 @@ out_unlock: | |||
| 1687 | return; | 1649 | return; |
| 1688 | 1650 | ||
| 1689 | slab_empty: | 1651 | slab_empty: |
| 1690 | if (prior != page->end) { | 1652 | if (prior) { |
| 1691 | /* | 1653 | /* |
| 1692 | * Slab still on the partial list. | 1654 | * Slab still on the partial list. |
| 1693 | */ | 1655 | */ |
| @@ -1724,8 +1686,8 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
| 1724 | unsigned long flags; | 1686 | unsigned long flags; |
| 1725 | 1687 | ||
| 1726 | local_irq_save(flags); | 1688 | local_irq_save(flags); |
| 1727 | debug_check_no_locks_freed(object, s->objsize); | ||
| 1728 | c = get_cpu_slab(s, smp_processor_id()); | 1689 | c = get_cpu_slab(s, smp_processor_id()); |
| 1690 | debug_check_no_locks_freed(object, c->objsize); | ||
| 1729 | if (likely(page == c->page && c->node >= 0)) { | 1691 | if (likely(page == c->page && c->node >= 0)) { |
| 1730 | object[c->offset] = c->freelist; | 1692 | object[c->offset] = c->freelist; |
| 1731 | c->freelist = object; | 1693 | c->freelist = object; |
| @@ -1888,13 +1850,11 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
| 1888 | unsigned long align, unsigned long size) | 1850 | unsigned long align, unsigned long size) |
| 1889 | { | 1851 | { |
| 1890 | /* | 1852 | /* |
| 1891 | * If the user wants hardware cache aligned objects then | 1853 | * If the user wants hardware cache aligned objects then follow that |
| 1892 | * follow that suggestion if the object is sufficiently | 1854 | * suggestion if the object is sufficiently large. |
| 1893 | * large. | ||
| 1894 | * | 1855 | * |
| 1895 | * The hardware cache alignment cannot override the | 1856 | * The hardware cache alignment cannot override the specified |
| 1896 | * specified alignment though. If that is greater | 1857 | * alignment though. If that is greater then use it. |
| 1897 | * then use it. | ||
| 1898 | */ | 1858 | */ |
| 1899 | if ((flags & SLAB_HWCACHE_ALIGN) && | 1859 | if ((flags & SLAB_HWCACHE_ALIGN) && |
| 1900 | size > cache_line_size() / 2) | 1860 | size > cache_line_size() / 2) |
| @@ -1910,7 +1870,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, | |||
| 1910 | struct kmem_cache_cpu *c) | 1870 | struct kmem_cache_cpu *c) |
| 1911 | { | 1871 | { |
| 1912 | c->page = NULL; | 1872 | c->page = NULL; |
| 1913 | c->freelist = (void *)PAGE_MAPPING_ANON; | 1873 | c->freelist = NULL; |
| 1914 | c->node = 0; | 1874 | c->node = 0; |
| 1915 | c->offset = s->offset / sizeof(void *); | 1875 | c->offset = s->offset / sizeof(void *); |
| 1916 | c->objsize = s->objsize; | 1876 | c->objsize = s->objsize; |
| @@ -2092,6 +2052,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, | |||
| 2092 | #endif | 2052 | #endif |
| 2093 | init_kmem_cache_node(n); | 2053 | init_kmem_cache_node(n); |
| 2094 | atomic_long_inc(&n->nr_slabs); | 2054 | atomic_long_inc(&n->nr_slabs); |
| 2055 | |||
| 2095 | /* | 2056 | /* |
| 2096 | * lockdep requires consistent irq usage for each lock | 2057 | * lockdep requires consistent irq usage for each lock |
| 2097 | * so even though there cannot be a race this early in | 2058 | * so even though there cannot be a race this early in |
| @@ -2173,6 +2134,14 @@ static int calculate_sizes(struct kmem_cache *s) | |||
| 2173 | unsigned long align = s->align; | 2134 | unsigned long align = s->align; |
| 2174 | 2135 | ||
| 2175 | /* | 2136 | /* |
| 2137 | * Round up object size to the next word boundary. We can only | ||
| 2138 | * place the free pointer at word boundaries and this determines | ||
| 2139 | * the possible location of the free pointer. | ||
| 2140 | */ | ||
| 2141 | size = ALIGN(size, sizeof(void *)); | ||
| 2142 | |||
| 2143 | #ifdef CONFIG_SLUB_DEBUG | ||
| 2144 | /* | ||
| 2176 | * Determine if we can poison the object itself. If the user of | 2145 | * Determine if we can poison the object itself. If the user of |
| 2177 | * the slab may touch the object after free or before allocation | 2146 | * the slab may touch the object after free or before allocation |
| 2178 | * then we should never poison the object itself. | 2147 | * then we should never poison the object itself. |
| @@ -2183,14 +2152,7 @@ static int calculate_sizes(struct kmem_cache *s) | |||
| 2183 | else | 2152 | else |
| 2184 | s->flags &= ~__OBJECT_POISON; | 2153 | s->flags &= ~__OBJECT_POISON; |
| 2185 | 2154 | ||
| 2186 | /* | ||
| 2187 | * Round up object size to the next word boundary. We can only | ||
| 2188 | * place the free pointer at word boundaries and this determines | ||
| 2189 | * the possible location of the free pointer. | ||
| 2190 | */ | ||
| 2191 | size = ALIGN(size, sizeof(void *)); | ||
| 2192 | 2155 | ||
| 2193 | #ifdef CONFIG_SLUB_DEBUG | ||
| 2194 | /* | 2156 | /* |
| 2195 | * If we are Redzoning then check if there is some space between the | 2157 | * If we are Redzoning then check if there is some space between the |
| 2196 | * end of the object and the free pointer. If not then add an | 2158 | * end of the object and the free pointer. If not then add an |
| @@ -2343,7 +2305,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) | |||
| 2343 | /* | 2305 | /* |
| 2344 | * We could also check if the object is on the slabs freelist. | 2306 | * We could also check if the object is on the slabs freelist. |
| 2345 | * But this would be too expensive and it seems that the main | 2307 | * But this would be too expensive and it seems that the main |
| 2346 | * purpose of kmem_ptr_valid is to check if the object belongs | 2308 | * purpose of kmem_ptr_valid() is to check if the object belongs |
| 2347 | * to a certain slab. | 2309 | * to a certain slab. |
| 2348 | */ | 2310 | */ |
| 2349 | return 1; | 2311 | return 1; |
| @@ -2630,13 +2592,24 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
| 2630 | } | 2592 | } |
| 2631 | EXPORT_SYMBOL(__kmalloc); | 2593 | EXPORT_SYMBOL(__kmalloc); |
| 2632 | 2594 | ||
| 2595 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | ||
| 2596 | { | ||
| 2597 | struct page *page = alloc_pages_node(node, flags | __GFP_COMP, | ||
| 2598 | get_order(size)); | ||
| 2599 | |||
| 2600 | if (page) | ||
| 2601 | return page_address(page); | ||
| 2602 | else | ||
| 2603 | return NULL; | ||
| 2604 | } | ||
| 2605 | |||
| 2633 | #ifdef CONFIG_NUMA | 2606 | #ifdef CONFIG_NUMA |
| 2634 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 2607 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
| 2635 | { | 2608 | { |
| 2636 | struct kmem_cache *s; | 2609 | struct kmem_cache *s; |
| 2637 | 2610 | ||
| 2638 | if (unlikely(size > PAGE_SIZE)) | 2611 | if (unlikely(size > PAGE_SIZE)) |
| 2639 | return kmalloc_large(size, flags); | 2612 | return kmalloc_large_node(size, flags, node); |
| 2640 | 2613 | ||
| 2641 | s = get_slab(size, flags); | 2614 | s = get_slab(size, flags); |
| 2642 | 2615 | ||
| @@ -2653,19 +2626,17 @@ size_t ksize(const void *object) | |||
| 2653 | struct page *page; | 2626 | struct page *page; |
| 2654 | struct kmem_cache *s; | 2627 | struct kmem_cache *s; |
| 2655 | 2628 | ||
| 2656 | BUG_ON(!object); | ||
| 2657 | if (unlikely(object == ZERO_SIZE_PTR)) | 2629 | if (unlikely(object == ZERO_SIZE_PTR)) |
| 2658 | return 0; | 2630 | return 0; |
| 2659 | 2631 | ||
| 2660 | page = virt_to_head_page(object); | 2632 | page = virt_to_head_page(object); |
| 2661 | BUG_ON(!page); | ||
| 2662 | 2633 | ||
| 2663 | if (unlikely(!PageSlab(page))) | 2634 | if (unlikely(!PageSlab(page))) |
| 2664 | return PAGE_SIZE << compound_order(page); | 2635 | return PAGE_SIZE << compound_order(page); |
| 2665 | 2636 | ||
| 2666 | s = page->slab; | 2637 | s = page->slab; |
| 2667 | BUG_ON(!s); | ||
| 2668 | 2638 | ||
| 2639 | #ifdef CONFIG_SLUB_DEBUG | ||
| 2669 | /* | 2640 | /* |
| 2670 | * Debugging requires use of the padding between object | 2641 | * Debugging requires use of the padding between object |
| 2671 | * and whatever may come after it. | 2642 | * and whatever may come after it. |
| @@ -2673,6 +2644,7 @@ size_t ksize(const void *object) | |||
| 2673 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | 2644 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) |
| 2674 | return s->objsize; | 2645 | return s->objsize; |
| 2675 | 2646 | ||
| 2647 | #endif | ||
| 2676 | /* | 2648 | /* |
| 2677 | * If we have the need to store the freelist pointer | 2649 | * If we have the need to store the freelist pointer |
| 2678 | * back there or track user information then we can | 2650 | * back there or track user information then we can |
| @@ -2680,7 +2652,6 @@ size_t ksize(const void *object) | |||
| 2680 | */ | 2652 | */ |
| 2681 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | 2653 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) |
| 2682 | return s->inuse; | 2654 | return s->inuse; |
| 2683 | |||
| 2684 | /* | 2655 | /* |
| 2685 | * Else we can use all the padding etc for the allocation | 2656 | * Else we can use all the padding etc for the allocation |
| 2686 | */ | 2657 | */ |
| @@ -2957,7 +2928,7 @@ void __init kmem_cache_init(void) | |||
| 2957 | /* | 2928 | /* |
| 2958 | * Patch up the size_index table if we have strange large alignment | 2929 | * Patch up the size_index table if we have strange large alignment |
| 2959 | * requirements for the kmalloc array. This is only the case for | 2930 | * requirements for the kmalloc array. This is only the case for |
| 2960 | * mips it seems. The standard arches will not generate any code here. | 2931 | * MIPS it seems. The standard arches will not generate any code here. |
| 2961 | * | 2932 | * |
| 2962 | * Largest permitted alignment is 256 bytes due to the way we | 2933 | * Largest permitted alignment is 256 bytes due to the way we |
| 2963 | * handle the index determination for the smaller caches. | 2934 | * handle the index determination for the smaller caches. |
| @@ -2986,7 +2957,6 @@ void __init kmem_cache_init(void) | |||
| 2986 | kmem_size = sizeof(struct kmem_cache); | 2957 | kmem_size = sizeof(struct kmem_cache); |
| 2987 | #endif | 2958 | #endif |
| 2988 | 2959 | ||
| 2989 | |||
| 2990 | printk(KERN_INFO | 2960 | printk(KERN_INFO |
| 2991 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2961 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
| 2992 | " CPUs=%d, Nodes=%d\n", | 2962 | " CPUs=%d, Nodes=%d\n", |
| @@ -3083,12 +3053,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
| 3083 | */ | 3053 | */ |
| 3084 | for_each_online_cpu(cpu) | 3054 | for_each_online_cpu(cpu) |
| 3085 | get_cpu_slab(s, cpu)->objsize = s->objsize; | 3055 | get_cpu_slab(s, cpu)->objsize = s->objsize; |
| 3056 | |||
| 3086 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3057 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
| 3087 | up_write(&slub_lock); | 3058 | up_write(&slub_lock); |
| 3059 | |||
| 3088 | if (sysfs_slab_alias(s, name)) | 3060 | if (sysfs_slab_alias(s, name)) |
| 3089 | goto err; | 3061 | goto err; |
| 3090 | return s; | 3062 | return s; |
| 3091 | } | 3063 | } |
| 3064 | |||
| 3092 | s = kmalloc(kmem_size, GFP_KERNEL); | 3065 | s = kmalloc(kmem_size, GFP_KERNEL); |
| 3093 | if (s) { | 3066 | if (s) { |
| 3094 | if (kmem_cache_open(s, GFP_KERNEL, name, | 3067 | if (kmem_cache_open(s, GFP_KERNEL, name, |
| @@ -3184,7 +3157,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
| 3184 | struct kmem_cache *s; | 3157 | struct kmem_cache *s; |
| 3185 | 3158 | ||
| 3186 | if (unlikely(size > PAGE_SIZE)) | 3159 | if (unlikely(size > PAGE_SIZE)) |
| 3187 | return kmalloc_large(size, gfpflags); | 3160 | return kmalloc_large_node(size, gfpflags, node); |
| 3188 | 3161 | ||
| 3189 | s = get_slab(size, gfpflags); | 3162 | s = get_slab(size, gfpflags); |
| 3190 | 3163 | ||
| @@ -3199,7 +3172,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
| 3199 | unsigned long *map) | 3172 | unsigned long *map) |
| 3200 | { | 3173 | { |
| 3201 | void *p; | 3174 | void *p; |
| 3202 | void *addr = slab_address(page); | 3175 | void *addr = page_address(page); |
| 3203 | 3176 | ||
| 3204 | if (!check_slab(s, page) || | 3177 | if (!check_slab(s, page) || |
| 3205 | !on_freelist(s, page, NULL)) | 3178 | !on_freelist(s, page, NULL)) |
| @@ -3482,7 +3455,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
| 3482 | static void process_slab(struct loc_track *t, struct kmem_cache *s, | 3455 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
| 3483 | struct page *page, enum track_item alloc) | 3456 | struct page *page, enum track_item alloc) |
| 3484 | { | 3457 | { |
| 3485 | void *addr = slab_address(page); | 3458 | void *addr = page_address(page); |
| 3486 | DECLARE_BITMAP(map, s->objects); | 3459 | DECLARE_BITMAP(map, s->objects); |
| 3487 | void *p; | 3460 | void *p; |
| 3488 | 3461 | ||
| @@ -3591,8 +3564,8 @@ enum slab_stat_type { | |||
| 3591 | #define SO_CPU (1 << SL_CPU) | 3564 | #define SO_CPU (1 << SL_CPU) |
| 3592 | #define SO_OBJECTS (1 << SL_OBJECTS) | 3565 | #define SO_OBJECTS (1 << SL_OBJECTS) |
| 3593 | 3566 | ||
| 3594 | static unsigned long slab_objects(struct kmem_cache *s, | 3567 | static ssize_t show_slab_objects(struct kmem_cache *s, |
| 3595 | char *buf, unsigned long flags) | 3568 | char *buf, unsigned long flags) |
| 3596 | { | 3569 | { |
| 3597 | unsigned long total = 0; | 3570 | unsigned long total = 0; |
| 3598 | int cpu; | 3571 | int cpu; |
| @@ -3602,6 +3575,8 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
| 3602 | unsigned long *per_cpu; | 3575 | unsigned long *per_cpu; |
| 3603 | 3576 | ||
| 3604 | nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); | 3577 | nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); |
| 3578 | if (!nodes) | ||
| 3579 | return -ENOMEM; | ||
| 3605 | per_cpu = nodes + nr_node_ids; | 3580 | per_cpu = nodes + nr_node_ids; |
| 3606 | 3581 | ||
| 3607 | for_each_possible_cpu(cpu) { | 3582 | for_each_possible_cpu(cpu) { |
| @@ -3754,25 +3729,25 @@ SLAB_ATTR_RO(aliases); | |||
| 3754 | 3729 | ||
| 3755 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | 3730 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) |
| 3756 | { | 3731 | { |
| 3757 | return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); | 3732 | return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); |
| 3758 | } | 3733 | } |
| 3759 | SLAB_ATTR_RO(slabs); | 3734 | SLAB_ATTR_RO(slabs); |
| 3760 | 3735 | ||
| 3761 | static ssize_t partial_show(struct kmem_cache *s, char *buf) | 3736 | static ssize_t partial_show(struct kmem_cache *s, char *buf) |
| 3762 | { | 3737 | { |
| 3763 | return slab_objects(s, buf, SO_PARTIAL); | 3738 | return show_slab_objects(s, buf, SO_PARTIAL); |
| 3764 | } | 3739 | } |
| 3765 | SLAB_ATTR_RO(partial); | 3740 | SLAB_ATTR_RO(partial); |
| 3766 | 3741 | ||
| 3767 | static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) | 3742 | static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) |
| 3768 | { | 3743 | { |
| 3769 | return slab_objects(s, buf, SO_CPU); | 3744 | return show_slab_objects(s, buf, SO_CPU); |
| 3770 | } | 3745 | } |
| 3771 | SLAB_ATTR_RO(cpu_slabs); | 3746 | SLAB_ATTR_RO(cpu_slabs); |
| 3772 | 3747 | ||
| 3773 | static ssize_t objects_show(struct kmem_cache *s, char *buf) | 3748 | static ssize_t objects_show(struct kmem_cache *s, char *buf) |
| 3774 | { | 3749 | { |
| 3775 | return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); | 3750 | return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); |
| 3776 | } | 3751 | } |
| 3777 | SLAB_ATTR_RO(objects); | 3752 | SLAB_ATTR_RO(objects); |
| 3778 | 3753 | ||
| @@ -3971,7 +3946,6 @@ SLAB_ATTR(remote_node_defrag_ratio); | |||
| 3971 | #endif | 3946 | #endif |
| 3972 | 3947 | ||
| 3973 | #ifdef CONFIG_SLUB_STATS | 3948 | #ifdef CONFIG_SLUB_STATS |
| 3974 | |||
| 3975 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | 3949 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) |
| 3976 | { | 3950 | { |
| 3977 | unsigned long sum = 0; | 3951 | unsigned long sum = 0; |
| @@ -4155,8 +4129,8 @@ static struct kset *slab_kset; | |||
| 4155 | #define ID_STR_LENGTH 64 | 4129 | #define ID_STR_LENGTH 64 |
| 4156 | 4130 | ||
| 4157 | /* Create a unique string id for a slab cache: | 4131 | /* Create a unique string id for a slab cache: |
| 4158 | * format | 4132 | * |
| 4159 | * :[flags-]size:[memory address of kmemcache] | 4133 | * Format :[flags-]size |
| 4160 | */ | 4134 | */ |
| 4161 | static char *create_unique_id(struct kmem_cache *s) | 4135 | static char *create_unique_id(struct kmem_cache *s) |
| 4162 | { | 4136 | { |
