diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 520 |
1 files changed, 349 insertions, 171 deletions
@@ -90,7 +90,7 @@ | |||
90 | * One use of this flag is to mark slabs that are | 90 | * One use of this flag is to mark slabs that are |
91 | * used for allocations. Then such a slab becomes a cpu | 91 | * used for allocations. Then such a slab becomes a cpu |
92 | * slab. The cpu slab may be equipped with an additional | 92 | * slab. The cpu slab may be equipped with an additional |
93 | * lockless_freelist that allows lockless access to | 93 | * freelist that allows lockless access to |
94 | * free objects in addition to the regular freelist | 94 | * free objects in addition to the regular freelist |
95 | * that requires the slab lock. | 95 | * that requires the slab lock. |
96 | * | 96 | * |
@@ -140,11 +140,6 @@ static inline void ClearSlabDebug(struct page *page) | |||
140 | /* | 140 | /* |
141 | * Issues still to be resolved: | 141 | * Issues still to be resolved: |
142 | * | 142 | * |
143 | * - The per cpu array is updated for each new slab and and is a remote | ||
144 | * cacheline for most nodes. This could become a bouncing cacheline given | ||
145 | * enough frequent updates. There are 16 pointers in a cacheline, so at | ||
146 | * max 16 cpus could compete for the cacheline which may be okay. | ||
147 | * | ||
148 | * - Support PAGE_ALLOC_DEBUG. Should be easy to do. | 143 | * - Support PAGE_ALLOC_DEBUG. Should be easy to do. |
149 | * | 144 | * |
150 | * - Variable sizing of the per node arrays | 145 | * - Variable sizing of the per node arrays |
@@ -205,11 +200,6 @@ static inline void ClearSlabDebug(struct page *page) | |||
205 | #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) | 200 | #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) |
206 | #endif | 201 | #endif |
207 | 202 | ||
208 | /* | ||
209 | * The page->inuse field is 16 bit thus we have this limitation | ||
210 | */ | ||
211 | #define MAX_OBJECTS_PER_SLAB 65535 | ||
212 | |||
213 | /* Internal SLUB flags */ | 203 | /* Internal SLUB flags */ |
214 | #define __OBJECT_POISON 0x80000000 /* Poison object */ | 204 | #define __OBJECT_POISON 0x80000000 /* Poison object */ |
215 | #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ | 205 | #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ |
@@ -277,6 +267,15 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | |||
277 | #endif | 267 | #endif |
278 | } | 268 | } |
279 | 269 | ||
270 | static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | ||
271 | { | ||
272 | #ifdef CONFIG_SMP | ||
273 | return s->cpu_slab[cpu]; | ||
274 | #else | ||
275 | return &s->cpu_slab; | ||
276 | #endif | ||
277 | } | ||
278 | |||
280 | static inline int check_valid_pointer(struct kmem_cache *s, | 279 | static inline int check_valid_pointer(struct kmem_cache *s, |
281 | struct page *page, const void *object) | 280 | struct page *page, const void *object) |
282 | { | 281 | { |
@@ -729,11 +728,6 @@ static int check_slab(struct kmem_cache *s, struct page *page) | |||
729 | slab_err(s, page, "Not a valid slab page"); | 728 | slab_err(s, page, "Not a valid slab page"); |
730 | return 0; | 729 | return 0; |
731 | } | 730 | } |
732 | if (page->offset * sizeof(void *) != s->offset) { | ||
733 | slab_err(s, page, "Corrupted offset %lu", | ||
734 | (unsigned long)(page->offset * sizeof(void *))); | ||
735 | return 0; | ||
736 | } | ||
737 | if (page->inuse > s->objects) { | 731 | if (page->inuse > s->objects) { |
738 | slab_err(s, page, "inuse %u > max %u", | 732 | slab_err(s, page, "inuse %u > max %u", |
739 | s->name, page->inuse, s->objects); | 733 | s->name, page->inuse, s->objects); |
@@ -872,8 +866,6 @@ bad: | |||
872 | slab_fix(s, "Marking all objects used"); | 866 | slab_fix(s, "Marking all objects used"); |
873 | page->inuse = s->objects; | 867 | page->inuse = s->objects; |
874 | page->freelist = NULL; | 868 | page->freelist = NULL; |
875 | /* Fix up fields that may be corrupted */ | ||
876 | page->offset = s->offset / sizeof(void *); | ||
877 | } | 869 | } |
878 | return 0; | 870 | return 0; |
879 | } | 871 | } |
@@ -988,7 +980,7 @@ __setup("slub_debug", setup_slub_debug); | |||
988 | 980 | ||
989 | static unsigned long kmem_cache_flags(unsigned long objsize, | 981 | static unsigned long kmem_cache_flags(unsigned long objsize, |
990 | unsigned long flags, const char *name, | 982 | unsigned long flags, const char *name, |
991 | void (*ctor)(void *, struct kmem_cache *, unsigned long)) | 983 | void (*ctor)(struct kmem_cache *, void *)) |
992 | { | 984 | { |
993 | /* | 985 | /* |
994 | * The page->offset field is only 16 bit wide. This is an offset | 986 | * The page->offset field is only 16 bit wide. This is an offset |
@@ -1035,7 +1027,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, | |||
1035 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} | 1027 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} |
1036 | static inline unsigned long kmem_cache_flags(unsigned long objsize, | 1028 | static inline unsigned long kmem_cache_flags(unsigned long objsize, |
1037 | unsigned long flags, const char *name, | 1029 | unsigned long flags, const char *name, |
1038 | void (*ctor)(void *, struct kmem_cache *, unsigned long)) | 1030 | void (*ctor)(struct kmem_cache *, void *)) |
1039 | { | 1031 | { |
1040 | return flags; | 1032 | return flags; |
1041 | } | 1033 | } |
@@ -1055,6 +1047,9 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1055 | if (s->flags & SLAB_CACHE_DMA) | 1047 | if (s->flags & SLAB_CACHE_DMA) |
1056 | flags |= SLUB_DMA; | 1048 | flags |= SLUB_DMA; |
1057 | 1049 | ||
1050 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | ||
1051 | flags |= __GFP_RECLAIMABLE; | ||
1052 | |||
1058 | if (node == -1) | 1053 | if (node == -1) |
1059 | page = alloc_pages(flags, s->order); | 1054 | page = alloc_pages(flags, s->order); |
1060 | else | 1055 | else |
@@ -1076,7 +1071,7 @@ static void setup_object(struct kmem_cache *s, struct page *page, | |||
1076 | { | 1071 | { |
1077 | setup_object_debug(s, page, object); | 1072 | setup_object_debug(s, page, object); |
1078 | if (unlikely(s->ctor)) | 1073 | if (unlikely(s->ctor)) |
1079 | s->ctor(object, s, 0); | 1074 | s->ctor(s, object); |
1080 | } | 1075 | } |
1081 | 1076 | ||
1082 | static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | 1077 | static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) |
@@ -1088,19 +1083,16 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1088 | void *last; | 1083 | void *last; |
1089 | void *p; | 1084 | void *p; |
1090 | 1085 | ||
1091 | BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK)); | 1086 | BUG_ON(flags & GFP_SLAB_BUG_MASK); |
1092 | |||
1093 | if (flags & __GFP_WAIT) | ||
1094 | local_irq_enable(); | ||
1095 | 1087 | ||
1096 | page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); | 1088 | page = allocate_slab(s, |
1089 | flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); | ||
1097 | if (!page) | 1090 | if (!page) |
1098 | goto out; | 1091 | goto out; |
1099 | 1092 | ||
1100 | n = get_node(s, page_to_nid(page)); | 1093 | n = get_node(s, page_to_nid(page)); |
1101 | if (n) | 1094 | if (n) |
1102 | atomic_long_inc(&n->nr_slabs); | 1095 | atomic_long_inc(&n->nr_slabs); |
1103 | page->offset = s->offset / sizeof(void *); | ||
1104 | page->slab = s; | 1096 | page->slab = s; |
1105 | page->flags |= 1 << PG_slab; | 1097 | page->flags |= 1 << PG_slab; |
1106 | if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | | 1098 | if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | |
@@ -1123,11 +1115,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1123 | set_freepointer(s, last, NULL); | 1115 | set_freepointer(s, last, NULL); |
1124 | 1116 | ||
1125 | page->freelist = start; | 1117 | page->freelist = start; |
1126 | page->lockless_freelist = NULL; | ||
1127 | page->inuse = 0; | 1118 | page->inuse = 0; |
1128 | out: | 1119 | out: |
1129 | if (flags & __GFP_WAIT) | ||
1130 | local_irq_disable(); | ||
1131 | return page; | 1120 | return page; |
1132 | } | 1121 | } |
1133 | 1122 | ||
@@ -1149,7 +1138,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1149 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1138 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1150 | - pages); | 1139 | - pages); |
1151 | 1140 | ||
1152 | page->mapping = NULL; | ||
1153 | __free_pages(page, s->order); | 1141 | __free_pages(page, s->order); |
1154 | } | 1142 | } |
1155 | 1143 | ||
@@ -1383,33 +1371,34 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page) | |||
1383 | /* | 1371 | /* |
1384 | * Remove the cpu slab | 1372 | * Remove the cpu slab |
1385 | */ | 1373 | */ |
1386 | static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) | 1374 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1387 | { | 1375 | { |
1376 | struct page *page = c->page; | ||
1388 | /* | 1377 | /* |
1389 | * Merge cpu freelist into freelist. Typically we get here | 1378 | * Merge cpu freelist into freelist. Typically we get here |
1390 | * because both freelists are empty. So this is unlikely | 1379 | * because both freelists are empty. So this is unlikely |
1391 | * to occur. | 1380 | * to occur. |
1392 | */ | 1381 | */ |
1393 | while (unlikely(page->lockless_freelist)) { | 1382 | while (unlikely(c->freelist)) { |
1394 | void **object; | 1383 | void **object; |
1395 | 1384 | ||
1396 | /* Retrieve object from cpu_freelist */ | 1385 | /* Retrieve object from cpu_freelist */ |
1397 | object = page->lockless_freelist; | 1386 | object = c->freelist; |
1398 | page->lockless_freelist = page->lockless_freelist[page->offset]; | 1387 | c->freelist = c->freelist[c->offset]; |
1399 | 1388 | ||
1400 | /* And put onto the regular freelist */ | 1389 | /* And put onto the regular freelist */ |
1401 | object[page->offset] = page->freelist; | 1390 | object[c->offset] = page->freelist; |
1402 | page->freelist = object; | 1391 | page->freelist = object; |
1403 | page->inuse--; | 1392 | page->inuse--; |
1404 | } | 1393 | } |
1405 | s->cpu_slab[cpu] = NULL; | 1394 | c->page = NULL; |
1406 | unfreeze_slab(s, page); | 1395 | unfreeze_slab(s, page); |
1407 | } | 1396 | } |
1408 | 1397 | ||
1409 | static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) | 1398 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1410 | { | 1399 | { |
1411 | slab_lock(page); | 1400 | slab_lock(c->page); |
1412 | deactivate_slab(s, page, cpu); | 1401 | deactivate_slab(s, c); |
1413 | } | 1402 | } |
1414 | 1403 | ||
1415 | /* | 1404 | /* |
@@ -1418,18 +1407,17 @@ static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) | |||
1418 | */ | 1407 | */ |
1419 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1408 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
1420 | { | 1409 | { |
1421 | struct page *page = s->cpu_slab[cpu]; | 1410 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); |
1422 | 1411 | ||
1423 | if (likely(page)) | 1412 | if (likely(c && c->page)) |
1424 | flush_slab(s, page, cpu); | 1413 | flush_slab(s, c); |
1425 | } | 1414 | } |
1426 | 1415 | ||
1427 | static void flush_cpu_slab(void *d) | 1416 | static void flush_cpu_slab(void *d) |
1428 | { | 1417 | { |
1429 | struct kmem_cache *s = d; | 1418 | struct kmem_cache *s = d; |
1430 | int cpu = smp_processor_id(); | ||
1431 | 1419 | ||
1432 | __flush_cpu_slab(s, cpu); | 1420 | __flush_cpu_slab(s, smp_processor_id()); |
1433 | } | 1421 | } |
1434 | 1422 | ||
1435 | static void flush_all(struct kmem_cache *s) | 1423 | static void flush_all(struct kmem_cache *s) |
@@ -1446,6 +1434,19 @@ static void flush_all(struct kmem_cache *s) | |||
1446 | } | 1434 | } |
1447 | 1435 | ||
1448 | /* | 1436 | /* |
1437 | * Check if the objects in a per cpu structure fit numa | ||
1438 | * locality expectations. | ||
1439 | */ | ||
1440 | static inline int node_match(struct kmem_cache_cpu *c, int node) | ||
1441 | { | ||
1442 | #ifdef CONFIG_NUMA | ||
1443 | if (node != -1 && c->node != node) | ||
1444 | return 0; | ||
1445 | #endif | ||
1446 | return 1; | ||
1447 | } | ||
1448 | |||
1449 | /* | ||
1449 | * Slow path. The lockless freelist is empty or we need to perform | 1450 | * Slow path. The lockless freelist is empty or we need to perform |
1450 | * debugging duties. | 1451 | * debugging duties. |
1451 | * | 1452 | * |
@@ -1463,45 +1464,53 @@ static void flush_all(struct kmem_cache *s) | |||
1463 | * we need to allocate a new slab. This is slowest path since we may sleep. | 1464 | * we need to allocate a new slab. This is slowest path since we may sleep. |
1464 | */ | 1465 | */ |
1465 | static void *__slab_alloc(struct kmem_cache *s, | 1466 | static void *__slab_alloc(struct kmem_cache *s, |
1466 | gfp_t gfpflags, int node, void *addr, struct page *page) | 1467 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) |
1467 | { | 1468 | { |
1468 | void **object; | 1469 | void **object; |
1469 | int cpu = smp_processor_id(); | 1470 | struct page *new; |
1470 | 1471 | ||
1471 | if (!page) | 1472 | if (!c->page) |
1472 | goto new_slab; | 1473 | goto new_slab; |
1473 | 1474 | ||
1474 | slab_lock(page); | 1475 | slab_lock(c->page); |
1475 | if (unlikely(node != -1 && page_to_nid(page) != node)) | 1476 | if (unlikely(!node_match(c, node))) |
1476 | goto another_slab; | 1477 | goto another_slab; |
1477 | load_freelist: | 1478 | load_freelist: |
1478 | object = page->freelist; | 1479 | object = c->page->freelist; |
1479 | if (unlikely(!object)) | 1480 | if (unlikely(!object)) |
1480 | goto another_slab; | 1481 | goto another_slab; |
1481 | if (unlikely(SlabDebug(page))) | 1482 | if (unlikely(SlabDebug(c->page))) |
1482 | goto debug; | 1483 | goto debug; |
1483 | 1484 | ||
1484 | object = page->freelist; | 1485 | object = c->page->freelist; |
1485 | page->lockless_freelist = object[page->offset]; | 1486 | c->freelist = object[c->offset]; |
1486 | page->inuse = s->objects; | 1487 | c->page->inuse = s->objects; |
1487 | page->freelist = NULL; | 1488 | c->page->freelist = NULL; |
1488 | slab_unlock(page); | 1489 | c->node = page_to_nid(c->page); |
1490 | slab_unlock(c->page); | ||
1489 | return object; | 1491 | return object; |
1490 | 1492 | ||
1491 | another_slab: | 1493 | another_slab: |
1492 | deactivate_slab(s, page, cpu); | 1494 | deactivate_slab(s, c); |
1493 | 1495 | ||
1494 | new_slab: | 1496 | new_slab: |
1495 | page = get_partial(s, gfpflags, node); | 1497 | new = get_partial(s, gfpflags, node); |
1496 | if (page) { | 1498 | if (new) { |
1497 | s->cpu_slab[cpu] = page; | 1499 | c->page = new; |
1498 | goto load_freelist; | 1500 | goto load_freelist; |
1499 | } | 1501 | } |
1500 | 1502 | ||
1501 | page = new_slab(s, gfpflags, node); | 1503 | if (gfpflags & __GFP_WAIT) |
1502 | if (page) { | 1504 | local_irq_enable(); |
1503 | cpu = smp_processor_id(); | 1505 | |
1504 | if (s->cpu_slab[cpu]) { | 1506 | new = new_slab(s, gfpflags, node); |
1507 | |||
1508 | if (gfpflags & __GFP_WAIT) | ||
1509 | local_irq_disable(); | ||
1510 | |||
1511 | if (new) { | ||
1512 | c = get_cpu_slab(s, smp_processor_id()); | ||
1513 | if (c->page) { | ||
1505 | /* | 1514 | /* |
1506 | * Someone else populated the cpu_slab while we | 1515 | * Someone else populated the cpu_slab while we |
1507 | * enabled interrupts, or we have gotten scheduled | 1516 | * enabled interrupts, or we have gotten scheduled |
@@ -1509,34 +1518,33 @@ new_slab: | |||
1509 | * requested node even if __GFP_THISNODE was | 1518 | * requested node even if __GFP_THISNODE was |
1510 | * specified. So we need to recheck. | 1519 | * specified. So we need to recheck. |
1511 | */ | 1520 | */ |
1512 | if (node == -1 || | 1521 | if (node_match(c, node)) { |
1513 | page_to_nid(s->cpu_slab[cpu]) == node) { | ||
1514 | /* | 1522 | /* |
1515 | * Current cpuslab is acceptable and we | 1523 | * Current cpuslab is acceptable and we |
1516 | * want the current one since its cache hot | 1524 | * want the current one since its cache hot |
1517 | */ | 1525 | */ |
1518 | discard_slab(s, page); | 1526 | discard_slab(s, new); |
1519 | page = s->cpu_slab[cpu]; | 1527 | slab_lock(c->page); |
1520 | slab_lock(page); | ||
1521 | goto load_freelist; | 1528 | goto load_freelist; |
1522 | } | 1529 | } |
1523 | /* New slab does not fit our expectations */ | 1530 | /* New slab does not fit our expectations */ |
1524 | flush_slab(s, s->cpu_slab[cpu], cpu); | 1531 | flush_slab(s, c); |
1525 | } | 1532 | } |
1526 | slab_lock(page); | 1533 | slab_lock(new); |
1527 | SetSlabFrozen(page); | 1534 | SetSlabFrozen(new); |
1528 | s->cpu_slab[cpu] = page; | 1535 | c->page = new; |
1529 | goto load_freelist; | 1536 | goto load_freelist; |
1530 | } | 1537 | } |
1531 | return NULL; | 1538 | return NULL; |
1532 | debug: | 1539 | debug: |
1533 | object = page->freelist; | 1540 | object = c->page->freelist; |
1534 | if (!alloc_debug_processing(s, page, object, addr)) | 1541 | if (!alloc_debug_processing(s, c->page, object, addr)) |
1535 | goto another_slab; | 1542 | goto another_slab; |
1536 | 1543 | ||
1537 | page->inuse++; | 1544 | c->page->inuse++; |
1538 | page->freelist = object[page->offset]; | 1545 | c->page->freelist = object[c->offset]; |
1539 | slab_unlock(page); | 1546 | c->node = -1; |
1547 | slab_unlock(c->page); | ||
1540 | return object; | 1548 | return object; |
1541 | } | 1549 | } |
1542 | 1550 | ||
@@ -1553,25 +1561,24 @@ debug: | |||
1553 | static void __always_inline *slab_alloc(struct kmem_cache *s, | 1561 | static void __always_inline *slab_alloc(struct kmem_cache *s, |
1554 | gfp_t gfpflags, int node, void *addr) | 1562 | gfp_t gfpflags, int node, void *addr) |
1555 | { | 1563 | { |
1556 | struct page *page; | ||
1557 | void **object; | 1564 | void **object; |
1558 | unsigned long flags; | 1565 | unsigned long flags; |
1566 | struct kmem_cache_cpu *c; | ||
1559 | 1567 | ||
1560 | local_irq_save(flags); | 1568 | local_irq_save(flags); |
1561 | page = s->cpu_slab[smp_processor_id()]; | 1569 | c = get_cpu_slab(s, smp_processor_id()); |
1562 | if (unlikely(!page || !page->lockless_freelist || | 1570 | if (unlikely(!c->freelist || !node_match(c, node))) |
1563 | (node != -1 && page_to_nid(page) != node))) | ||
1564 | 1571 | ||
1565 | object = __slab_alloc(s, gfpflags, node, addr, page); | 1572 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1566 | 1573 | ||
1567 | else { | 1574 | else { |
1568 | object = page->lockless_freelist; | 1575 | object = c->freelist; |
1569 | page->lockless_freelist = object[page->offset]; | 1576 | c->freelist = object[c->offset]; |
1570 | } | 1577 | } |
1571 | local_irq_restore(flags); | 1578 | local_irq_restore(flags); |
1572 | 1579 | ||
1573 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1580 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
1574 | memset(object, 0, s->objsize); | 1581 | memset(object, 0, c->objsize); |
1575 | 1582 | ||
1576 | return object; | 1583 | return object; |
1577 | } | 1584 | } |
@@ -1599,7 +1606,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); | |||
1599 | * handling required then we can return immediately. | 1606 | * handling required then we can return immediately. |
1600 | */ | 1607 | */ |
1601 | static void __slab_free(struct kmem_cache *s, struct page *page, | 1608 | static void __slab_free(struct kmem_cache *s, struct page *page, |
1602 | void *x, void *addr) | 1609 | void *x, void *addr, unsigned int offset) |
1603 | { | 1610 | { |
1604 | void *prior; | 1611 | void *prior; |
1605 | void **object = (void *)x; | 1612 | void **object = (void *)x; |
@@ -1609,7 +1616,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1609 | if (unlikely(SlabDebug(page))) | 1616 | if (unlikely(SlabDebug(page))) |
1610 | goto debug; | 1617 | goto debug; |
1611 | checks_ok: | 1618 | checks_ok: |
1612 | prior = object[page->offset] = page->freelist; | 1619 | prior = object[offset] = page->freelist; |
1613 | page->freelist = object; | 1620 | page->freelist = object; |
1614 | page->inuse--; | 1621 | page->inuse--; |
1615 | 1622 | ||
@@ -1664,15 +1671,16 @@ static void __always_inline slab_free(struct kmem_cache *s, | |||
1664 | { | 1671 | { |
1665 | void **object = (void *)x; | 1672 | void **object = (void *)x; |
1666 | unsigned long flags; | 1673 | unsigned long flags; |
1674 | struct kmem_cache_cpu *c; | ||
1667 | 1675 | ||
1668 | local_irq_save(flags); | 1676 | local_irq_save(flags); |
1669 | debug_check_no_locks_freed(object, s->objsize); | 1677 | debug_check_no_locks_freed(object, s->objsize); |
1670 | if (likely(page == s->cpu_slab[smp_processor_id()] && | 1678 | c = get_cpu_slab(s, smp_processor_id()); |
1671 | !SlabDebug(page))) { | 1679 | if (likely(page == c->page && c->node >= 0)) { |
1672 | object[page->offset] = page->lockless_freelist; | 1680 | object[c->offset] = c->freelist; |
1673 | page->lockless_freelist = object; | 1681 | c->freelist = object; |
1674 | } else | 1682 | } else |
1675 | __slab_free(s, page, x, addr); | 1683 | __slab_free(s, page, x, addr, c->offset); |
1676 | 1684 | ||
1677 | local_irq_restore(flags); | 1685 | local_irq_restore(flags); |
1678 | } | 1686 | } |
@@ -1759,14 +1767,6 @@ static inline int slab_order(int size, int min_objects, | |||
1759 | int rem; | 1767 | int rem; |
1760 | int min_order = slub_min_order; | 1768 | int min_order = slub_min_order; |
1761 | 1769 | ||
1762 | /* | ||
1763 | * If we would create too many object per slab then reduce | ||
1764 | * the slab order even if it goes below slub_min_order. | ||
1765 | */ | ||
1766 | while (min_order > 0 && | ||
1767 | (PAGE_SIZE << min_order) >= MAX_OBJECTS_PER_SLAB * size) | ||
1768 | min_order--; | ||
1769 | |||
1770 | for (order = max(min_order, | 1770 | for (order = max(min_order, |
1771 | fls(min_objects * size - 1) - PAGE_SHIFT); | 1771 | fls(min_objects * size - 1) - PAGE_SHIFT); |
1772 | order <= max_order; order++) { | 1772 | order <= max_order; order++) { |
@@ -1781,9 +1781,6 @@ static inline int slab_order(int size, int min_objects, | |||
1781 | if (rem <= slab_size / fract_leftover) | 1781 | if (rem <= slab_size / fract_leftover) |
1782 | break; | 1782 | break; |
1783 | 1783 | ||
1784 | /* If the next size is too high then exit now */ | ||
1785 | if (slab_size * 2 >= MAX_OBJECTS_PER_SLAB * size) | ||
1786 | break; | ||
1787 | } | 1784 | } |
1788 | 1785 | ||
1789 | return order; | 1786 | return order; |
@@ -1858,6 +1855,16 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
1858 | return ALIGN(align, sizeof(void *)); | 1855 | return ALIGN(align, sizeof(void *)); |
1859 | } | 1856 | } |
1860 | 1857 | ||
1858 | static void init_kmem_cache_cpu(struct kmem_cache *s, | ||
1859 | struct kmem_cache_cpu *c) | ||
1860 | { | ||
1861 | c->page = NULL; | ||
1862 | c->freelist = NULL; | ||
1863 | c->node = 0; | ||
1864 | c->offset = s->offset / sizeof(void *); | ||
1865 | c->objsize = s->objsize; | ||
1866 | } | ||
1867 | |||
1861 | static void init_kmem_cache_node(struct kmem_cache_node *n) | 1868 | static void init_kmem_cache_node(struct kmem_cache_node *n) |
1862 | { | 1869 | { |
1863 | n->nr_partial = 0; | 1870 | n->nr_partial = 0; |
@@ -1869,6 +1876,131 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) | |||
1869 | #endif | 1876 | #endif |
1870 | } | 1877 | } |
1871 | 1878 | ||
1879 | #ifdef CONFIG_SMP | ||
1880 | /* | ||
1881 | * Per cpu array for per cpu structures. | ||
1882 | * | ||
1883 | * The per cpu array places all kmem_cache_cpu structures from one processor | ||
1884 | * close together meaning that it becomes possible that multiple per cpu | ||
1885 | * structures are contained in one cacheline. This may be particularly | ||
1886 | * beneficial for the kmalloc caches. | ||
1887 | * | ||
1888 | * A desktop system typically has around 60-80 slabs. With 100 here we are | ||
1889 | * likely able to get per cpu structures for all caches from the array defined | ||
1890 | * here. We must be able to cover all kmalloc caches during bootstrap. | ||
1891 | * | ||
1892 | * If the per cpu array is exhausted then fall back to kmalloc | ||
1893 | * of individual cachelines. No sharing is possible then. | ||
1894 | */ | ||
1895 | #define NR_KMEM_CACHE_CPU 100 | ||
1896 | |||
1897 | static DEFINE_PER_CPU(struct kmem_cache_cpu, | ||
1898 | kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; | ||
1899 | |||
1900 | static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); | ||
1901 | static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE; | ||
1902 | |||
1903 | static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, | ||
1904 | int cpu, gfp_t flags) | ||
1905 | { | ||
1906 | struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); | ||
1907 | |||
1908 | if (c) | ||
1909 | per_cpu(kmem_cache_cpu_free, cpu) = | ||
1910 | (void *)c->freelist; | ||
1911 | else { | ||
1912 | /* Table overflow: So allocate ourselves */ | ||
1913 | c = kmalloc_node( | ||
1914 | ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), | ||
1915 | flags, cpu_to_node(cpu)); | ||
1916 | if (!c) | ||
1917 | return NULL; | ||
1918 | } | ||
1919 | |||
1920 | init_kmem_cache_cpu(s, c); | ||
1921 | return c; | ||
1922 | } | ||
1923 | |||
1924 | static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) | ||
1925 | { | ||
1926 | if (c < per_cpu(kmem_cache_cpu, cpu) || | ||
1927 | c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { | ||
1928 | kfree(c); | ||
1929 | return; | ||
1930 | } | ||
1931 | c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); | ||
1932 | per_cpu(kmem_cache_cpu_free, cpu) = c; | ||
1933 | } | ||
1934 | |||
1935 | static void free_kmem_cache_cpus(struct kmem_cache *s) | ||
1936 | { | ||
1937 | int cpu; | ||
1938 | |||
1939 | for_each_online_cpu(cpu) { | ||
1940 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
1941 | |||
1942 | if (c) { | ||
1943 | s->cpu_slab[cpu] = NULL; | ||
1944 | free_kmem_cache_cpu(c, cpu); | ||
1945 | } | ||
1946 | } | ||
1947 | } | ||
1948 | |||
1949 | static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
1950 | { | ||
1951 | int cpu; | ||
1952 | |||
1953 | for_each_online_cpu(cpu) { | ||
1954 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
1955 | |||
1956 | if (c) | ||
1957 | continue; | ||
1958 | |||
1959 | c = alloc_kmem_cache_cpu(s, cpu, flags); | ||
1960 | if (!c) { | ||
1961 | free_kmem_cache_cpus(s); | ||
1962 | return 0; | ||
1963 | } | ||
1964 | s->cpu_slab[cpu] = c; | ||
1965 | } | ||
1966 | return 1; | ||
1967 | } | ||
1968 | |||
1969 | /* | ||
1970 | * Initialize the per cpu array. | ||
1971 | */ | ||
1972 | static void init_alloc_cpu_cpu(int cpu) | ||
1973 | { | ||
1974 | int i; | ||
1975 | |||
1976 | if (cpu_isset(cpu, kmem_cach_cpu_free_init_once)) | ||
1977 | return; | ||
1978 | |||
1979 | for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) | ||
1980 | free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); | ||
1981 | |||
1982 | cpu_set(cpu, kmem_cach_cpu_free_init_once); | ||
1983 | } | ||
1984 | |||
1985 | static void __init init_alloc_cpu(void) | ||
1986 | { | ||
1987 | int cpu; | ||
1988 | |||
1989 | for_each_online_cpu(cpu) | ||
1990 | init_alloc_cpu_cpu(cpu); | ||
1991 | } | ||
1992 | |||
1993 | #else | ||
1994 | static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} | ||
1995 | static inline void init_alloc_cpu(void) {} | ||
1996 | |||
1997 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
1998 | { | ||
1999 | init_kmem_cache_cpu(s, &s->cpu_slab); | ||
2000 | return 1; | ||
2001 | } | ||
2002 | #endif | ||
2003 | |||
1872 | #ifdef CONFIG_NUMA | 2004 | #ifdef CONFIG_NUMA |
1873 | /* | 2005 | /* |
1874 | * No kmalloc_node yet so do it by hand. We know that this is the first | 2006 | * No kmalloc_node yet so do it by hand. We know that this is the first |
@@ -1876,10 +2008,11 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) | |||
1876 | * possible. | 2008 | * possible. |
1877 | * | 2009 | * |
1878 | * Note that this function only works on the kmalloc_node_cache | 2010 | * Note that this function only works on the kmalloc_node_cache |
1879 | * when allocating for the kmalloc_node_cache. | 2011 | * when allocating for the kmalloc_node_cache. This is used for bootstrapping |
2012 | * memory on a fresh node that has no slab structures yet. | ||
1880 | */ | 2013 | */ |
1881 | static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags, | 2014 | static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, |
1882 | int node) | 2015 | int node) |
1883 | { | 2016 | { |
1884 | struct page *page; | 2017 | struct page *page; |
1885 | struct kmem_cache_node *n; | 2018 | struct kmem_cache_node *n; |
@@ -1908,12 +2041,6 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag | |||
1908 | init_kmem_cache_node(n); | 2041 | init_kmem_cache_node(n); |
1909 | atomic_long_inc(&n->nr_slabs); | 2042 | atomic_long_inc(&n->nr_slabs); |
1910 | add_partial(n, page); | 2043 | add_partial(n, page); |
1911 | |||
1912 | /* | ||
1913 | * new_slab() disables interupts. If we do not reenable interrupts here | ||
1914 | * then bootup would continue with interrupts disabled. | ||
1915 | */ | ||
1916 | local_irq_enable(); | ||
1917 | return n; | 2044 | return n; |
1918 | } | 2045 | } |
1919 | 2046 | ||
@@ -1921,7 +2048,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) | |||
1921 | { | 2048 | { |
1922 | int node; | 2049 | int node; |
1923 | 2050 | ||
1924 | for_each_online_node(node) { | 2051 | for_each_node_state(node, N_NORMAL_MEMORY) { |
1925 | struct kmem_cache_node *n = s->node[node]; | 2052 | struct kmem_cache_node *n = s->node[node]; |
1926 | if (n && n != &s->local_node) | 2053 | if (n && n != &s->local_node) |
1927 | kmem_cache_free(kmalloc_caches, n); | 2054 | kmem_cache_free(kmalloc_caches, n); |
@@ -1939,7 +2066,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | |||
1939 | else | 2066 | else |
1940 | local_node = 0; | 2067 | local_node = 0; |
1941 | 2068 | ||
1942 | for_each_online_node(node) { | 2069 | for_each_node_state(node, N_NORMAL_MEMORY) { |
1943 | struct kmem_cache_node *n; | 2070 | struct kmem_cache_node *n; |
1944 | 2071 | ||
1945 | if (local_node == node) | 2072 | if (local_node == node) |
@@ -2077,21 +2204,14 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2077 | */ | 2204 | */ |
2078 | s->objects = (PAGE_SIZE << s->order) / size; | 2205 | s->objects = (PAGE_SIZE << s->order) / size; |
2079 | 2206 | ||
2080 | /* | 2207 | return !!s->objects; |
2081 | * Verify that the number of objects is within permitted limits. | ||
2082 | * The page->inuse field is only 16 bit wide! So we cannot have | ||
2083 | * more than 64k objects per slab. | ||
2084 | */ | ||
2085 | if (!s->objects || s->objects > MAX_OBJECTS_PER_SLAB) | ||
2086 | return 0; | ||
2087 | return 1; | ||
2088 | 2208 | ||
2089 | } | 2209 | } |
2090 | 2210 | ||
2091 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | 2211 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, |
2092 | const char *name, size_t size, | 2212 | const char *name, size_t size, |
2093 | size_t align, unsigned long flags, | 2213 | size_t align, unsigned long flags, |
2094 | void (*ctor)(void *, struct kmem_cache *, unsigned long)) | 2214 | void (*ctor)(struct kmem_cache *, void *)) |
2095 | { | 2215 | { |
2096 | memset(s, 0, kmem_size); | 2216 | memset(s, 0, kmem_size); |
2097 | s->name = name; | 2217 | s->name = name; |
@@ -2107,9 +2227,12 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2107 | #ifdef CONFIG_NUMA | 2227 | #ifdef CONFIG_NUMA |
2108 | s->defrag_ratio = 100; | 2228 | s->defrag_ratio = 100; |
2109 | #endif | 2229 | #endif |
2230 | if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) | ||
2231 | goto error; | ||
2110 | 2232 | ||
2111 | if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) | 2233 | if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) |
2112 | return 1; | 2234 | return 1; |
2235 | free_kmem_cache_nodes(s); | ||
2113 | error: | 2236 | error: |
2114 | if (flags & SLAB_PANIC) | 2237 | if (flags & SLAB_PANIC) |
2115 | panic("Cannot create slab %s size=%lu realsize=%u " | 2238 | panic("Cannot create slab %s size=%lu realsize=%u " |
@@ -2192,7 +2315,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
2192 | flush_all(s); | 2315 | flush_all(s); |
2193 | 2316 | ||
2194 | /* Attempt to free all objects */ | 2317 | /* Attempt to free all objects */ |
2195 | for_each_online_node(node) { | 2318 | free_kmem_cache_cpus(s); |
2319 | for_each_node_state(node, N_NORMAL_MEMORY) { | ||
2196 | struct kmem_cache_node *n = get_node(s, node); | 2320 | struct kmem_cache_node *n = get_node(s, node); |
2197 | 2321 | ||
2198 | n->nr_partial -= free_list(s, n, &n->partial); | 2322 | n->nr_partial -= free_list(s, n, &n->partial); |
@@ -2227,11 +2351,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2227 | * Kmalloc subsystem | 2351 | * Kmalloc subsystem |
2228 | *******************************************************************/ | 2352 | *******************************************************************/ |
2229 | 2353 | ||
2230 | struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned; | 2354 | struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; |
2231 | EXPORT_SYMBOL(kmalloc_caches); | 2355 | EXPORT_SYMBOL(kmalloc_caches); |
2232 | 2356 | ||
2233 | #ifdef CONFIG_ZONE_DMA | 2357 | #ifdef CONFIG_ZONE_DMA |
2234 | static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1]; | 2358 | static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; |
2235 | #endif | 2359 | #endif |
2236 | 2360 | ||
2237 | static int __init setup_slub_min_order(char *str) | 2361 | static int __init setup_slub_min_order(char *str) |
@@ -2397,12 +2521,8 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) | |||
2397 | return ZERO_SIZE_PTR; | 2521 | return ZERO_SIZE_PTR; |
2398 | 2522 | ||
2399 | index = size_index[(size - 1) / 8]; | 2523 | index = size_index[(size - 1) / 8]; |
2400 | } else { | 2524 | } else |
2401 | if (size > KMALLOC_MAX_SIZE) | ||
2402 | return NULL; | ||
2403 | |||
2404 | index = fls(size - 1); | 2525 | index = fls(size - 1); |
2405 | } | ||
2406 | 2526 | ||
2407 | #ifdef CONFIG_ZONE_DMA | 2527 | #ifdef CONFIG_ZONE_DMA |
2408 | if (unlikely((flags & SLUB_DMA))) | 2528 | if (unlikely((flags & SLUB_DMA))) |
@@ -2414,9 +2534,15 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) | |||
2414 | 2534 | ||
2415 | void *__kmalloc(size_t size, gfp_t flags) | 2535 | void *__kmalloc(size_t size, gfp_t flags) |
2416 | { | 2536 | { |
2417 | struct kmem_cache *s = get_slab(size, flags); | 2537 | struct kmem_cache *s; |
2418 | 2538 | ||
2419 | if (ZERO_OR_NULL_PTR(s)) | 2539 | if (unlikely(size > PAGE_SIZE / 2)) |
2540 | return (void *)__get_free_pages(flags | __GFP_COMP, | ||
2541 | get_order(size)); | ||
2542 | |||
2543 | s = get_slab(size, flags); | ||
2544 | |||
2545 | if (unlikely(ZERO_OR_NULL_PTR(s))) | ||
2420 | return s; | 2546 | return s; |
2421 | 2547 | ||
2422 | return slab_alloc(s, flags, -1, __builtin_return_address(0)); | 2548 | return slab_alloc(s, flags, -1, __builtin_return_address(0)); |
@@ -2426,9 +2552,15 @@ EXPORT_SYMBOL(__kmalloc); | |||
2426 | #ifdef CONFIG_NUMA | 2552 | #ifdef CONFIG_NUMA |
2427 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 2553 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
2428 | { | 2554 | { |
2429 | struct kmem_cache *s = get_slab(size, flags); | 2555 | struct kmem_cache *s; |
2430 | 2556 | ||
2431 | if (ZERO_OR_NULL_PTR(s)) | 2557 | if (unlikely(size > PAGE_SIZE / 2)) |
2558 | return (void *)__get_free_pages(flags | __GFP_COMP, | ||
2559 | get_order(size)); | ||
2560 | |||
2561 | s = get_slab(size, flags); | ||
2562 | |||
2563 | if (unlikely(ZERO_OR_NULL_PTR(s))) | ||
2432 | return s; | 2564 | return s; |
2433 | 2565 | ||
2434 | return slab_alloc(s, flags, node, __builtin_return_address(0)); | 2566 | return slab_alloc(s, flags, node, __builtin_return_address(0)); |
@@ -2441,7 +2573,8 @@ size_t ksize(const void *object) | |||
2441 | struct page *page; | 2573 | struct page *page; |
2442 | struct kmem_cache *s; | 2574 | struct kmem_cache *s; |
2443 | 2575 | ||
2444 | if (ZERO_OR_NULL_PTR(object)) | 2576 | BUG_ON(!object); |
2577 | if (unlikely(object == ZERO_SIZE_PTR)) | ||
2445 | return 0; | 2578 | return 0; |
2446 | 2579 | ||
2447 | page = get_object_page(object); | 2580 | page = get_object_page(object); |
@@ -2473,22 +2606,17 @@ EXPORT_SYMBOL(ksize); | |||
2473 | 2606 | ||
2474 | void kfree(const void *x) | 2607 | void kfree(const void *x) |
2475 | { | 2608 | { |
2476 | struct kmem_cache *s; | ||
2477 | struct page *page; | 2609 | struct page *page; |
2478 | 2610 | ||
2479 | /* | 2611 | if (unlikely(ZERO_OR_NULL_PTR(x))) |
2480 | * This has to be an unsigned comparison. According to Linus | ||
2481 | * some gcc version treat a pointer as a signed entity. Then | ||
2482 | * this comparison would be true for all "negative" pointers | ||
2483 | * (which would cover the whole upper half of the address space). | ||
2484 | */ | ||
2485 | if (ZERO_OR_NULL_PTR(x)) | ||
2486 | return; | 2612 | return; |
2487 | 2613 | ||
2488 | page = virt_to_head_page(x); | 2614 | page = virt_to_head_page(x); |
2489 | s = page->slab; | 2615 | if (unlikely(!PageSlab(page))) { |
2490 | 2616 | put_page(page); | |
2491 | slab_free(s, page, (void *)x, __builtin_return_address(0)); | 2617 | return; |
2618 | } | ||
2619 | slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); | ||
2492 | } | 2620 | } |
2493 | EXPORT_SYMBOL(kfree); | 2621 | EXPORT_SYMBOL(kfree); |
2494 | 2622 | ||
@@ -2517,7 +2645,7 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2517 | return -ENOMEM; | 2645 | return -ENOMEM; |
2518 | 2646 | ||
2519 | flush_all(s); | 2647 | flush_all(s); |
2520 | for_each_online_node(node) { | 2648 | for_each_node_state(node, N_NORMAL_MEMORY) { |
2521 | n = get_node(s, node); | 2649 | n = get_node(s, node); |
2522 | 2650 | ||
2523 | if (!n->nr_partial) | 2651 | if (!n->nr_partial) |
@@ -2575,6 +2703,8 @@ void __init kmem_cache_init(void) | |||
2575 | int i; | 2703 | int i; |
2576 | int caches = 0; | 2704 | int caches = 0; |
2577 | 2705 | ||
2706 | init_alloc_cpu(); | ||
2707 | |||
2578 | #ifdef CONFIG_NUMA | 2708 | #ifdef CONFIG_NUMA |
2579 | /* | 2709 | /* |
2580 | * Must first have the slab cache available for the allocations of the | 2710 | * Must first have the slab cache available for the allocations of the |
@@ -2602,7 +2732,7 @@ void __init kmem_cache_init(void) | |||
2602 | caches++; | 2732 | caches++; |
2603 | } | 2733 | } |
2604 | 2734 | ||
2605 | for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { | 2735 | for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { |
2606 | create_kmalloc_cache(&kmalloc_caches[i], | 2736 | create_kmalloc_cache(&kmalloc_caches[i], |
2607 | "kmalloc", 1 << i, GFP_KERNEL); | 2737 | "kmalloc", 1 << i, GFP_KERNEL); |
2608 | caches++; | 2738 | caches++; |
@@ -2629,16 +2759,18 @@ void __init kmem_cache_init(void) | |||
2629 | slab_state = UP; | 2759 | slab_state = UP; |
2630 | 2760 | ||
2631 | /* Provide the correct kmalloc names now that the caches are up */ | 2761 | /* Provide the correct kmalloc names now that the caches are up */ |
2632 | for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) | 2762 | for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) |
2633 | kmalloc_caches[i]. name = | 2763 | kmalloc_caches[i]. name = |
2634 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); | 2764 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); |
2635 | 2765 | ||
2636 | #ifdef CONFIG_SMP | 2766 | #ifdef CONFIG_SMP |
2637 | register_cpu_notifier(&slab_notifier); | 2767 | register_cpu_notifier(&slab_notifier); |
2768 | kmem_size = offsetof(struct kmem_cache, cpu_slab) + | ||
2769 | nr_cpu_ids * sizeof(struct kmem_cache_cpu *); | ||
2770 | #else | ||
2771 | kmem_size = sizeof(struct kmem_cache); | ||
2638 | #endif | 2772 | #endif |
2639 | 2773 | ||
2640 | kmem_size = offsetof(struct kmem_cache, cpu_slab) + | ||
2641 | nr_cpu_ids * sizeof(struct page *); | ||
2642 | 2774 | ||
2643 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2775 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
2644 | " CPUs=%d, Nodes=%d\n", | 2776 | " CPUs=%d, Nodes=%d\n", |
@@ -2669,7 +2801,7 @@ static int slab_unmergeable(struct kmem_cache *s) | |||
2669 | 2801 | ||
2670 | static struct kmem_cache *find_mergeable(size_t size, | 2802 | static struct kmem_cache *find_mergeable(size_t size, |
2671 | size_t align, unsigned long flags, const char *name, | 2803 | size_t align, unsigned long flags, const char *name, |
2672 | void (*ctor)(void *, struct kmem_cache *, unsigned long)) | 2804 | void (*ctor)(struct kmem_cache *, void *)) |
2673 | { | 2805 | { |
2674 | struct kmem_cache *s; | 2806 | struct kmem_cache *s; |
2675 | 2807 | ||
@@ -2710,19 +2842,28 @@ static struct kmem_cache *find_mergeable(size_t size, | |||
2710 | 2842 | ||
2711 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | 2843 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, |
2712 | size_t align, unsigned long flags, | 2844 | size_t align, unsigned long flags, |
2713 | void (*ctor)(void *, struct kmem_cache *, unsigned long)) | 2845 | void (*ctor)(struct kmem_cache *, void *)) |
2714 | { | 2846 | { |
2715 | struct kmem_cache *s; | 2847 | struct kmem_cache *s; |
2716 | 2848 | ||
2717 | down_write(&slub_lock); | 2849 | down_write(&slub_lock); |
2718 | s = find_mergeable(size, align, flags, name, ctor); | 2850 | s = find_mergeable(size, align, flags, name, ctor); |
2719 | if (s) { | 2851 | if (s) { |
2852 | int cpu; | ||
2853 | |||
2720 | s->refcount++; | 2854 | s->refcount++; |
2721 | /* | 2855 | /* |
2722 | * Adjust the object sizes so that we clear | 2856 | * Adjust the object sizes so that we clear |
2723 | * the complete object on kzalloc. | 2857 | * the complete object on kzalloc. |
2724 | */ | 2858 | */ |
2725 | s->objsize = max(s->objsize, (int)size); | 2859 | s->objsize = max(s->objsize, (int)size); |
2860 | |||
2861 | /* | ||
2862 | * And then we need to update the object size in the | ||
2863 | * per cpu structures | ||
2864 | */ | ||
2865 | for_each_online_cpu(cpu) | ||
2866 | get_cpu_slab(s, cpu)->objsize = s->objsize; | ||
2726 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 2867 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
2727 | up_write(&slub_lock); | 2868 | up_write(&slub_lock); |
2728 | if (sysfs_slab_alias(s, name)) | 2869 | if (sysfs_slab_alias(s, name)) |
@@ -2765,15 +2906,29 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
2765 | unsigned long flags; | 2906 | unsigned long flags; |
2766 | 2907 | ||
2767 | switch (action) { | 2908 | switch (action) { |
2909 | case CPU_UP_PREPARE: | ||
2910 | case CPU_UP_PREPARE_FROZEN: | ||
2911 | init_alloc_cpu_cpu(cpu); | ||
2912 | down_read(&slub_lock); | ||
2913 | list_for_each_entry(s, &slab_caches, list) | ||
2914 | s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, | ||
2915 | GFP_KERNEL); | ||
2916 | up_read(&slub_lock); | ||
2917 | break; | ||
2918 | |||
2768 | case CPU_UP_CANCELED: | 2919 | case CPU_UP_CANCELED: |
2769 | case CPU_UP_CANCELED_FROZEN: | 2920 | case CPU_UP_CANCELED_FROZEN: |
2770 | case CPU_DEAD: | 2921 | case CPU_DEAD: |
2771 | case CPU_DEAD_FROZEN: | 2922 | case CPU_DEAD_FROZEN: |
2772 | down_read(&slub_lock); | 2923 | down_read(&slub_lock); |
2773 | list_for_each_entry(s, &slab_caches, list) { | 2924 | list_for_each_entry(s, &slab_caches, list) { |
2925 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
2926 | |||
2774 | local_irq_save(flags); | 2927 | local_irq_save(flags); |
2775 | __flush_cpu_slab(s, cpu); | 2928 | __flush_cpu_slab(s, cpu); |
2776 | local_irq_restore(flags); | 2929 | local_irq_restore(flags); |
2930 | free_kmem_cache_cpu(c, cpu); | ||
2931 | s->cpu_slab[cpu] = NULL; | ||
2777 | } | 2932 | } |
2778 | up_read(&slub_lock); | 2933 | up_read(&slub_lock); |
2779 | break; | 2934 | break; |
@@ -2790,9 +2945,14 @@ static struct notifier_block __cpuinitdata slab_notifier = | |||
2790 | 2945 | ||
2791 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) | 2946 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) |
2792 | { | 2947 | { |
2793 | struct kmem_cache *s = get_slab(size, gfpflags); | 2948 | struct kmem_cache *s; |
2949 | |||
2950 | if (unlikely(size > PAGE_SIZE / 2)) | ||
2951 | return (void *)__get_free_pages(gfpflags | __GFP_COMP, | ||
2952 | get_order(size)); | ||
2953 | s = get_slab(size, gfpflags); | ||
2794 | 2954 | ||
2795 | if (ZERO_OR_NULL_PTR(s)) | 2955 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
2796 | return s; | 2956 | return s; |
2797 | 2957 | ||
2798 | return slab_alloc(s, gfpflags, -1, caller); | 2958 | return slab_alloc(s, gfpflags, -1, caller); |
@@ -2801,9 +2961,14 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) | |||
2801 | void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | 2961 | void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, |
2802 | int node, void *caller) | 2962 | int node, void *caller) |
2803 | { | 2963 | { |
2804 | struct kmem_cache *s = get_slab(size, gfpflags); | 2964 | struct kmem_cache *s; |
2965 | |||
2966 | if (unlikely(size > PAGE_SIZE / 2)) | ||
2967 | return (void *)__get_free_pages(gfpflags | __GFP_COMP, | ||
2968 | get_order(size)); | ||
2969 | s = get_slab(size, gfpflags); | ||
2805 | 2970 | ||
2806 | if (ZERO_OR_NULL_PTR(s)) | 2971 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
2807 | return s; | 2972 | return s; |
2808 | 2973 | ||
2809 | return slab_alloc(s, gfpflags, node, caller); | 2974 | return slab_alloc(s, gfpflags, node, caller); |
@@ -2902,7 +3067,7 @@ static long validate_slab_cache(struct kmem_cache *s) | |||
2902 | return -ENOMEM; | 3067 | return -ENOMEM; |
2903 | 3068 | ||
2904 | flush_all(s); | 3069 | flush_all(s); |
2905 | for_each_online_node(node) { | 3070 | for_each_node_state(node, N_NORMAL_MEMORY) { |
2906 | struct kmem_cache_node *n = get_node(s, node); | 3071 | struct kmem_cache_node *n = get_node(s, node); |
2907 | 3072 | ||
2908 | count += validate_slab_node(s, n, map); | 3073 | count += validate_slab_node(s, n, map); |
@@ -3116,13 +3281,13 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
3116 | int node; | 3281 | int node; |
3117 | 3282 | ||
3118 | if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), | 3283 | if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), |
3119 | GFP_KERNEL)) | 3284 | GFP_TEMPORARY)) |
3120 | return sprintf(buf, "Out of memory\n"); | 3285 | return sprintf(buf, "Out of memory\n"); |
3121 | 3286 | ||
3122 | /* Push back cpu slabs */ | 3287 | /* Push back cpu slabs */ |
3123 | flush_all(s); | 3288 | flush_all(s); |
3124 | 3289 | ||
3125 | for_each_online_node(node) { | 3290 | for_each_node_state(node, N_NORMAL_MEMORY) { |
3126 | struct kmem_cache_node *n = get_node(s, node); | 3291 | struct kmem_cache_node *n = get_node(s, node); |
3127 | unsigned long flags; | 3292 | unsigned long flags; |
3128 | struct page *page; | 3293 | struct page *page; |
@@ -3230,11 +3395,18 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
3230 | per_cpu = nodes + nr_node_ids; | 3395 | per_cpu = nodes + nr_node_ids; |
3231 | 3396 | ||
3232 | for_each_possible_cpu(cpu) { | 3397 | for_each_possible_cpu(cpu) { |
3233 | struct page *page = s->cpu_slab[cpu]; | 3398 | struct page *page; |
3234 | int node; | 3399 | int node; |
3400 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
3235 | 3401 | ||
3402 | if (!c) | ||
3403 | continue; | ||
3404 | |||
3405 | page = c->page; | ||
3406 | node = c->node; | ||
3407 | if (node < 0) | ||
3408 | continue; | ||
3236 | if (page) { | 3409 | if (page) { |
3237 | node = page_to_nid(page); | ||
3238 | if (flags & SO_CPU) { | 3410 | if (flags & SO_CPU) { |
3239 | int x = 0; | 3411 | int x = 0; |
3240 | 3412 | ||
@@ -3249,7 +3421,7 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
3249 | } | 3421 | } |
3250 | } | 3422 | } |
3251 | 3423 | ||
3252 | for_each_online_node(node) { | 3424 | for_each_node_state(node, N_NORMAL_MEMORY) { |
3253 | struct kmem_cache_node *n = get_node(s, node); | 3425 | struct kmem_cache_node *n = get_node(s, node); |
3254 | 3426 | ||
3255 | if (flags & SO_PARTIAL) { | 3427 | if (flags & SO_PARTIAL) { |
@@ -3277,7 +3449,7 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
3277 | 3449 | ||
3278 | x = sprintf(buf, "%lu", total); | 3450 | x = sprintf(buf, "%lu", total); |
3279 | #ifdef CONFIG_NUMA | 3451 | #ifdef CONFIG_NUMA |
3280 | for_each_online_node(node) | 3452 | for_each_node_state(node, N_NORMAL_MEMORY) |
3281 | if (nodes[node]) | 3453 | if (nodes[node]) |
3282 | x += sprintf(buf + x, " N%d=%lu", | 3454 | x += sprintf(buf + x, " N%d=%lu", |
3283 | node, nodes[node]); | 3455 | node, nodes[node]); |
@@ -3291,13 +3463,19 @@ static int any_slab_objects(struct kmem_cache *s) | |||
3291 | int node; | 3463 | int node; |
3292 | int cpu; | 3464 | int cpu; |
3293 | 3465 | ||
3294 | for_each_possible_cpu(cpu) | 3466 | for_each_possible_cpu(cpu) { |
3295 | if (s->cpu_slab[cpu]) | 3467 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); |
3468 | |||
3469 | if (c && c->page) | ||
3296 | return 1; | 3470 | return 1; |
3471 | } | ||
3297 | 3472 | ||
3298 | for_each_node(node) { | 3473 | for_each_online_node(node) { |
3299 | struct kmem_cache_node *n = get_node(s, node); | 3474 | struct kmem_cache_node *n = get_node(s, node); |
3300 | 3475 | ||
3476 | if (!n) | ||
3477 | continue; | ||
3478 | |||
3301 | if (n->nr_partial || atomic_long_read(&n->nr_slabs)) | 3479 | if (n->nr_partial || atomic_long_read(&n->nr_slabs)) |
3302 | return 1; | 3480 | return 1; |
3303 | } | 3481 | } |