aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c520
1 files changed, 349 insertions, 171 deletions
diff --git a/mm/slub.c b/mm/slub.c
index addb20a6d67d..e29a42988c78 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -90,7 +90,7 @@
90 * One use of this flag is to mark slabs that are 90 * One use of this flag is to mark slabs that are
91 * used for allocations. Then such a slab becomes a cpu 91 * used for allocations. Then such a slab becomes a cpu
92 * slab. The cpu slab may be equipped with an additional 92 * slab. The cpu slab may be equipped with an additional
93 * lockless_freelist that allows lockless access to 93 * freelist that allows lockless access to
94 * free objects in addition to the regular freelist 94 * free objects in addition to the regular freelist
95 * that requires the slab lock. 95 * that requires the slab lock.
96 * 96 *
@@ -140,11 +140,6 @@ static inline void ClearSlabDebug(struct page *page)
140/* 140/*
141 * Issues still to be resolved: 141 * Issues still to be resolved:
142 * 142 *
143 * - The per cpu array is updated for each new slab and and is a remote
144 * cacheline for most nodes. This could become a bouncing cacheline given
145 * enough frequent updates. There are 16 pointers in a cacheline, so at
146 * max 16 cpus could compete for the cacheline which may be okay.
147 *
148 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 143 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
149 * 144 *
150 * - Variable sizing of the per node arrays 145 * - Variable sizing of the per node arrays
@@ -205,11 +200,6 @@ static inline void ClearSlabDebug(struct page *page)
205#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 200#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
206#endif 201#endif
207 202
208/*
209 * The page->inuse field is 16 bit thus we have this limitation
210 */
211#define MAX_OBJECTS_PER_SLAB 65535
212
213/* Internal SLUB flags */ 203/* Internal SLUB flags */
214#define __OBJECT_POISON 0x80000000 /* Poison object */ 204#define __OBJECT_POISON 0x80000000 /* Poison object */
215#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ 205#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
@@ -277,6 +267,15 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
277#endif 267#endif
278} 268}
279 269
270static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
271{
272#ifdef CONFIG_SMP
273 return s->cpu_slab[cpu];
274#else
275 return &s->cpu_slab;
276#endif
277}
278
280static inline int check_valid_pointer(struct kmem_cache *s, 279static inline int check_valid_pointer(struct kmem_cache *s,
281 struct page *page, const void *object) 280 struct page *page, const void *object)
282{ 281{
@@ -729,11 +728,6 @@ static int check_slab(struct kmem_cache *s, struct page *page)
729 slab_err(s, page, "Not a valid slab page"); 728 slab_err(s, page, "Not a valid slab page");
730 return 0; 729 return 0;
731 } 730 }
732 if (page->offset * sizeof(void *) != s->offset) {
733 slab_err(s, page, "Corrupted offset %lu",
734 (unsigned long)(page->offset * sizeof(void *)));
735 return 0;
736 }
737 if (page->inuse > s->objects) { 731 if (page->inuse > s->objects) {
738 slab_err(s, page, "inuse %u > max %u", 732 slab_err(s, page, "inuse %u > max %u",
739 s->name, page->inuse, s->objects); 733 s->name, page->inuse, s->objects);
@@ -872,8 +866,6 @@ bad:
872 slab_fix(s, "Marking all objects used"); 866 slab_fix(s, "Marking all objects used");
873 page->inuse = s->objects; 867 page->inuse = s->objects;
874 page->freelist = NULL; 868 page->freelist = NULL;
875 /* Fix up fields that may be corrupted */
876 page->offset = s->offset / sizeof(void *);
877 } 869 }
878 return 0; 870 return 0;
879} 871}
@@ -988,7 +980,7 @@ __setup("slub_debug", setup_slub_debug);
988 980
989static unsigned long kmem_cache_flags(unsigned long objsize, 981static unsigned long kmem_cache_flags(unsigned long objsize,
990 unsigned long flags, const char *name, 982 unsigned long flags, const char *name,
991 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 983 void (*ctor)(struct kmem_cache *, void *))
992{ 984{
993 /* 985 /*
994 * The page->offset field is only 16 bit wide. This is an offset 986 * The page->offset field is only 16 bit wide. This is an offset
@@ -1035,7 +1027,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1035static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1027static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1036static inline unsigned long kmem_cache_flags(unsigned long objsize, 1028static inline unsigned long kmem_cache_flags(unsigned long objsize,
1037 unsigned long flags, const char *name, 1029 unsigned long flags, const char *name,
1038 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 1030 void (*ctor)(struct kmem_cache *, void *))
1039{ 1031{
1040 return flags; 1032 return flags;
1041} 1033}
@@ -1055,6 +1047,9 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1055 if (s->flags & SLAB_CACHE_DMA) 1047 if (s->flags & SLAB_CACHE_DMA)
1056 flags |= SLUB_DMA; 1048 flags |= SLUB_DMA;
1057 1049
1050 if (s->flags & SLAB_RECLAIM_ACCOUNT)
1051 flags |= __GFP_RECLAIMABLE;
1052
1058 if (node == -1) 1053 if (node == -1)
1059 page = alloc_pages(flags, s->order); 1054 page = alloc_pages(flags, s->order);
1060 else 1055 else
@@ -1076,7 +1071,7 @@ static void setup_object(struct kmem_cache *s, struct page *page,
1076{ 1071{
1077 setup_object_debug(s, page, object); 1072 setup_object_debug(s, page, object);
1078 if (unlikely(s->ctor)) 1073 if (unlikely(s->ctor))
1079 s->ctor(object, s, 0); 1074 s->ctor(s, object);
1080} 1075}
1081 1076
1082static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1077static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1088,19 +1083,16 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1088 void *last; 1083 void *last;
1089 void *p; 1084 void *p;
1090 1085
1091 BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK)); 1086 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1092
1093 if (flags & __GFP_WAIT)
1094 local_irq_enable();
1095 1087
1096 page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); 1088 page = allocate_slab(s,
1089 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1097 if (!page) 1090 if (!page)
1098 goto out; 1091 goto out;
1099 1092
1100 n = get_node(s, page_to_nid(page)); 1093 n = get_node(s, page_to_nid(page));
1101 if (n) 1094 if (n)
1102 atomic_long_inc(&n->nr_slabs); 1095 atomic_long_inc(&n->nr_slabs);
1103 page->offset = s->offset / sizeof(void *);
1104 page->slab = s; 1096 page->slab = s;
1105 page->flags |= 1 << PG_slab; 1097 page->flags |= 1 << PG_slab;
1106 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | 1098 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
@@ -1123,11 +1115,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1123 set_freepointer(s, last, NULL); 1115 set_freepointer(s, last, NULL);
1124 1116
1125 page->freelist = start; 1117 page->freelist = start;
1126 page->lockless_freelist = NULL;
1127 page->inuse = 0; 1118 page->inuse = 0;
1128out: 1119out:
1129 if (flags & __GFP_WAIT)
1130 local_irq_disable();
1131 return page; 1120 return page;
1132} 1121}
1133 1122
@@ -1149,7 +1138,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1149 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1138 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1150 - pages); 1139 - pages);
1151 1140
1152 page->mapping = NULL;
1153 __free_pages(page, s->order); 1141 __free_pages(page, s->order);
1154} 1142}
1155 1143
@@ -1383,33 +1371,34 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page)
1383/* 1371/*
1384 * Remove the cpu slab 1372 * Remove the cpu slab
1385 */ 1373 */
1386static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) 1374static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1387{ 1375{
1376 struct page *page = c->page;
1388 /* 1377 /*
1389 * Merge cpu freelist into freelist. Typically we get here 1378 * Merge cpu freelist into freelist. Typically we get here
1390 * because both freelists are empty. So this is unlikely 1379 * because both freelists are empty. So this is unlikely
1391 * to occur. 1380 * to occur.
1392 */ 1381 */
1393 while (unlikely(page->lockless_freelist)) { 1382 while (unlikely(c->freelist)) {
1394 void **object; 1383 void **object;
1395 1384
1396 /* Retrieve object from cpu_freelist */ 1385 /* Retrieve object from cpu_freelist */
1397 object = page->lockless_freelist; 1386 object = c->freelist;
1398 page->lockless_freelist = page->lockless_freelist[page->offset]; 1387 c->freelist = c->freelist[c->offset];
1399 1388
1400 /* And put onto the regular freelist */ 1389 /* And put onto the regular freelist */
1401 object[page->offset] = page->freelist; 1390 object[c->offset] = page->freelist;
1402 page->freelist = object; 1391 page->freelist = object;
1403 page->inuse--; 1392 page->inuse--;
1404 } 1393 }
1405 s->cpu_slab[cpu] = NULL; 1394 c->page = NULL;
1406 unfreeze_slab(s, page); 1395 unfreeze_slab(s, page);
1407} 1396}
1408 1397
1409static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) 1398static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1410{ 1399{
1411 slab_lock(page); 1400 slab_lock(c->page);
1412 deactivate_slab(s, page, cpu); 1401 deactivate_slab(s, c);
1413} 1402}
1414 1403
1415/* 1404/*
@@ -1418,18 +1407,17 @@ static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu)
1418 */ 1407 */
1419static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1408static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1420{ 1409{
1421 struct page *page = s->cpu_slab[cpu]; 1410 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1422 1411
1423 if (likely(page)) 1412 if (likely(c && c->page))
1424 flush_slab(s, page, cpu); 1413 flush_slab(s, c);
1425} 1414}
1426 1415
1427static void flush_cpu_slab(void *d) 1416static void flush_cpu_slab(void *d)
1428{ 1417{
1429 struct kmem_cache *s = d; 1418 struct kmem_cache *s = d;
1430 int cpu = smp_processor_id();
1431 1419
1432 __flush_cpu_slab(s, cpu); 1420 __flush_cpu_slab(s, smp_processor_id());
1433} 1421}
1434 1422
1435static void flush_all(struct kmem_cache *s) 1423static void flush_all(struct kmem_cache *s)
@@ -1446,6 +1434,19 @@ static void flush_all(struct kmem_cache *s)
1446} 1434}
1447 1435
1448/* 1436/*
1437 * Check if the objects in a per cpu structure fit numa
1438 * locality expectations.
1439 */
1440static inline int node_match(struct kmem_cache_cpu *c, int node)
1441{
1442#ifdef CONFIG_NUMA
1443 if (node != -1 && c->node != node)
1444 return 0;
1445#endif
1446 return 1;
1447}
1448
1449/*
1449 * Slow path. The lockless freelist is empty or we need to perform 1450 * Slow path. The lockless freelist is empty or we need to perform
1450 * debugging duties. 1451 * debugging duties.
1451 * 1452 *
@@ -1463,45 +1464,53 @@ static void flush_all(struct kmem_cache *s)
1463 * we need to allocate a new slab. This is slowest path since we may sleep. 1464 * we need to allocate a new slab. This is slowest path since we may sleep.
1464 */ 1465 */
1465static void *__slab_alloc(struct kmem_cache *s, 1466static void *__slab_alloc(struct kmem_cache *s,
1466 gfp_t gfpflags, int node, void *addr, struct page *page) 1467 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
1467{ 1468{
1468 void **object; 1469 void **object;
1469 int cpu = smp_processor_id(); 1470 struct page *new;
1470 1471
1471 if (!page) 1472 if (!c->page)
1472 goto new_slab; 1473 goto new_slab;
1473 1474
1474 slab_lock(page); 1475 slab_lock(c->page);
1475 if (unlikely(node != -1 && page_to_nid(page) != node)) 1476 if (unlikely(!node_match(c, node)))
1476 goto another_slab; 1477 goto another_slab;
1477load_freelist: 1478load_freelist:
1478 object = page->freelist; 1479 object = c->page->freelist;
1479 if (unlikely(!object)) 1480 if (unlikely(!object))
1480 goto another_slab; 1481 goto another_slab;
1481 if (unlikely(SlabDebug(page))) 1482 if (unlikely(SlabDebug(c->page)))
1482 goto debug; 1483 goto debug;
1483 1484
1484 object = page->freelist; 1485 object = c->page->freelist;
1485 page->lockless_freelist = object[page->offset]; 1486 c->freelist = object[c->offset];
1486 page->inuse = s->objects; 1487 c->page->inuse = s->objects;
1487 page->freelist = NULL; 1488 c->page->freelist = NULL;
1488 slab_unlock(page); 1489 c->node = page_to_nid(c->page);
1490 slab_unlock(c->page);
1489 return object; 1491 return object;
1490 1492
1491another_slab: 1493another_slab:
1492 deactivate_slab(s, page, cpu); 1494 deactivate_slab(s, c);
1493 1495
1494new_slab: 1496new_slab:
1495 page = get_partial(s, gfpflags, node); 1497 new = get_partial(s, gfpflags, node);
1496 if (page) { 1498 if (new) {
1497 s->cpu_slab[cpu] = page; 1499 c->page = new;
1498 goto load_freelist; 1500 goto load_freelist;
1499 } 1501 }
1500 1502
1501 page = new_slab(s, gfpflags, node); 1503 if (gfpflags & __GFP_WAIT)
1502 if (page) { 1504 local_irq_enable();
1503 cpu = smp_processor_id(); 1505
1504 if (s->cpu_slab[cpu]) { 1506 new = new_slab(s, gfpflags, node);
1507
1508 if (gfpflags & __GFP_WAIT)
1509 local_irq_disable();
1510
1511 if (new) {
1512 c = get_cpu_slab(s, smp_processor_id());
1513 if (c->page) {
1505 /* 1514 /*
1506 * Someone else populated the cpu_slab while we 1515 * Someone else populated the cpu_slab while we
1507 * enabled interrupts, or we have gotten scheduled 1516 * enabled interrupts, or we have gotten scheduled
@@ -1509,34 +1518,33 @@ new_slab:
1509 * requested node even if __GFP_THISNODE was 1518 * requested node even if __GFP_THISNODE was
1510 * specified. So we need to recheck. 1519 * specified. So we need to recheck.
1511 */ 1520 */
1512 if (node == -1 || 1521 if (node_match(c, node)) {
1513 page_to_nid(s->cpu_slab[cpu]) == node) {
1514 /* 1522 /*
1515 * Current cpuslab is acceptable and we 1523 * Current cpuslab is acceptable and we
1516 * want the current one since its cache hot 1524 * want the current one since its cache hot
1517 */ 1525 */
1518 discard_slab(s, page); 1526 discard_slab(s, new);
1519 page = s->cpu_slab[cpu]; 1527 slab_lock(c->page);
1520 slab_lock(page);
1521 goto load_freelist; 1528 goto load_freelist;
1522 } 1529 }
1523 /* New slab does not fit our expectations */ 1530 /* New slab does not fit our expectations */
1524 flush_slab(s, s->cpu_slab[cpu], cpu); 1531 flush_slab(s, c);
1525 } 1532 }
1526 slab_lock(page); 1533 slab_lock(new);
1527 SetSlabFrozen(page); 1534 SetSlabFrozen(new);
1528 s->cpu_slab[cpu] = page; 1535 c->page = new;
1529 goto load_freelist; 1536 goto load_freelist;
1530 } 1537 }
1531 return NULL; 1538 return NULL;
1532debug: 1539debug:
1533 object = page->freelist; 1540 object = c->page->freelist;
1534 if (!alloc_debug_processing(s, page, object, addr)) 1541 if (!alloc_debug_processing(s, c->page, object, addr))
1535 goto another_slab; 1542 goto another_slab;
1536 1543
1537 page->inuse++; 1544 c->page->inuse++;
1538 page->freelist = object[page->offset]; 1545 c->page->freelist = object[c->offset];
1539 slab_unlock(page); 1546 c->node = -1;
1547 slab_unlock(c->page);
1540 return object; 1548 return object;
1541} 1549}
1542 1550
@@ -1553,25 +1561,24 @@ debug:
1553static void __always_inline *slab_alloc(struct kmem_cache *s, 1561static void __always_inline *slab_alloc(struct kmem_cache *s,
1554 gfp_t gfpflags, int node, void *addr) 1562 gfp_t gfpflags, int node, void *addr)
1555{ 1563{
1556 struct page *page;
1557 void **object; 1564 void **object;
1558 unsigned long flags; 1565 unsigned long flags;
1566 struct kmem_cache_cpu *c;
1559 1567
1560 local_irq_save(flags); 1568 local_irq_save(flags);
1561 page = s->cpu_slab[smp_processor_id()]; 1569 c = get_cpu_slab(s, smp_processor_id());
1562 if (unlikely(!page || !page->lockless_freelist || 1570 if (unlikely(!c->freelist || !node_match(c, node)))
1563 (node != -1 && page_to_nid(page) != node)))
1564 1571
1565 object = __slab_alloc(s, gfpflags, node, addr, page); 1572 object = __slab_alloc(s, gfpflags, node, addr, c);
1566 1573
1567 else { 1574 else {
1568 object = page->lockless_freelist; 1575 object = c->freelist;
1569 page->lockless_freelist = object[page->offset]; 1576 c->freelist = object[c->offset];
1570 } 1577 }
1571 local_irq_restore(flags); 1578 local_irq_restore(flags);
1572 1579
1573 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1580 if (unlikely((gfpflags & __GFP_ZERO) && object))
1574 memset(object, 0, s->objsize); 1581 memset(object, 0, c->objsize);
1575 1582
1576 return object; 1583 return object;
1577} 1584}
@@ -1599,7 +1606,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
1599 * handling required then we can return immediately. 1606 * handling required then we can return immediately.
1600 */ 1607 */
1601static void __slab_free(struct kmem_cache *s, struct page *page, 1608static void __slab_free(struct kmem_cache *s, struct page *page,
1602 void *x, void *addr) 1609 void *x, void *addr, unsigned int offset)
1603{ 1610{
1604 void *prior; 1611 void *prior;
1605 void **object = (void *)x; 1612 void **object = (void *)x;
@@ -1609,7 +1616,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1609 if (unlikely(SlabDebug(page))) 1616 if (unlikely(SlabDebug(page)))
1610 goto debug; 1617 goto debug;
1611checks_ok: 1618checks_ok:
1612 prior = object[page->offset] = page->freelist; 1619 prior = object[offset] = page->freelist;
1613 page->freelist = object; 1620 page->freelist = object;
1614 page->inuse--; 1621 page->inuse--;
1615 1622
@@ -1664,15 +1671,16 @@ static void __always_inline slab_free(struct kmem_cache *s,
1664{ 1671{
1665 void **object = (void *)x; 1672 void **object = (void *)x;
1666 unsigned long flags; 1673 unsigned long flags;
1674 struct kmem_cache_cpu *c;
1667 1675
1668 local_irq_save(flags); 1676 local_irq_save(flags);
1669 debug_check_no_locks_freed(object, s->objsize); 1677 debug_check_no_locks_freed(object, s->objsize);
1670 if (likely(page == s->cpu_slab[smp_processor_id()] && 1678 c = get_cpu_slab(s, smp_processor_id());
1671 !SlabDebug(page))) { 1679 if (likely(page == c->page && c->node >= 0)) {
1672 object[page->offset] = page->lockless_freelist; 1680 object[c->offset] = c->freelist;
1673 page->lockless_freelist = object; 1681 c->freelist = object;
1674 } else 1682 } else
1675 __slab_free(s, page, x, addr); 1683 __slab_free(s, page, x, addr, c->offset);
1676 1684
1677 local_irq_restore(flags); 1685 local_irq_restore(flags);
1678} 1686}
@@ -1759,14 +1767,6 @@ static inline int slab_order(int size, int min_objects,
1759 int rem; 1767 int rem;
1760 int min_order = slub_min_order; 1768 int min_order = slub_min_order;
1761 1769
1762 /*
1763 * If we would create too many object per slab then reduce
1764 * the slab order even if it goes below slub_min_order.
1765 */
1766 while (min_order > 0 &&
1767 (PAGE_SIZE << min_order) >= MAX_OBJECTS_PER_SLAB * size)
1768 min_order--;
1769
1770 for (order = max(min_order, 1770 for (order = max(min_order,
1771 fls(min_objects * size - 1) - PAGE_SHIFT); 1771 fls(min_objects * size - 1) - PAGE_SHIFT);
1772 order <= max_order; order++) { 1772 order <= max_order; order++) {
@@ -1781,9 +1781,6 @@ static inline int slab_order(int size, int min_objects,
1781 if (rem <= slab_size / fract_leftover) 1781 if (rem <= slab_size / fract_leftover)
1782 break; 1782 break;
1783 1783
1784 /* If the next size is too high then exit now */
1785 if (slab_size * 2 >= MAX_OBJECTS_PER_SLAB * size)
1786 break;
1787 } 1784 }
1788 1785
1789 return order; 1786 return order;
@@ -1858,6 +1855,16 @@ static unsigned long calculate_alignment(unsigned long flags,
1858 return ALIGN(align, sizeof(void *)); 1855 return ALIGN(align, sizeof(void *));
1859} 1856}
1860 1857
1858static void init_kmem_cache_cpu(struct kmem_cache *s,
1859 struct kmem_cache_cpu *c)
1860{
1861 c->page = NULL;
1862 c->freelist = NULL;
1863 c->node = 0;
1864 c->offset = s->offset / sizeof(void *);
1865 c->objsize = s->objsize;
1866}
1867
1861static void init_kmem_cache_node(struct kmem_cache_node *n) 1868static void init_kmem_cache_node(struct kmem_cache_node *n)
1862{ 1869{
1863 n->nr_partial = 0; 1870 n->nr_partial = 0;
@@ -1869,6 +1876,131 @@ static void init_kmem_cache_node(struct kmem_cache_node *n)
1869#endif 1876#endif
1870} 1877}
1871 1878
1879#ifdef CONFIG_SMP
1880/*
1881 * Per cpu array for per cpu structures.
1882 *
1883 * The per cpu array places all kmem_cache_cpu structures from one processor
1884 * close together meaning that it becomes possible that multiple per cpu
1885 * structures are contained in one cacheline. This may be particularly
1886 * beneficial for the kmalloc caches.
1887 *
1888 * A desktop system typically has around 60-80 slabs. With 100 here we are
1889 * likely able to get per cpu structures for all caches from the array defined
1890 * here. We must be able to cover all kmalloc caches during bootstrap.
1891 *
1892 * If the per cpu array is exhausted then fall back to kmalloc
1893 * of individual cachelines. No sharing is possible then.
1894 */
1895#define NR_KMEM_CACHE_CPU 100
1896
1897static DEFINE_PER_CPU(struct kmem_cache_cpu,
1898 kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
1899
1900static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
1901static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
1902
1903static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
1904 int cpu, gfp_t flags)
1905{
1906 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
1907
1908 if (c)
1909 per_cpu(kmem_cache_cpu_free, cpu) =
1910 (void *)c->freelist;
1911 else {
1912 /* Table overflow: So allocate ourselves */
1913 c = kmalloc_node(
1914 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
1915 flags, cpu_to_node(cpu));
1916 if (!c)
1917 return NULL;
1918 }
1919
1920 init_kmem_cache_cpu(s, c);
1921 return c;
1922}
1923
1924static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
1925{
1926 if (c < per_cpu(kmem_cache_cpu, cpu) ||
1927 c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
1928 kfree(c);
1929 return;
1930 }
1931 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
1932 per_cpu(kmem_cache_cpu_free, cpu) = c;
1933}
1934
1935static void free_kmem_cache_cpus(struct kmem_cache *s)
1936{
1937 int cpu;
1938
1939 for_each_online_cpu(cpu) {
1940 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1941
1942 if (c) {
1943 s->cpu_slab[cpu] = NULL;
1944 free_kmem_cache_cpu(c, cpu);
1945 }
1946 }
1947}
1948
1949static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1950{
1951 int cpu;
1952
1953 for_each_online_cpu(cpu) {
1954 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1955
1956 if (c)
1957 continue;
1958
1959 c = alloc_kmem_cache_cpu(s, cpu, flags);
1960 if (!c) {
1961 free_kmem_cache_cpus(s);
1962 return 0;
1963 }
1964 s->cpu_slab[cpu] = c;
1965 }
1966 return 1;
1967}
1968
1969/*
1970 * Initialize the per cpu array.
1971 */
1972static void init_alloc_cpu_cpu(int cpu)
1973{
1974 int i;
1975
1976 if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
1977 return;
1978
1979 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
1980 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
1981
1982 cpu_set(cpu, kmem_cach_cpu_free_init_once);
1983}
1984
1985static void __init init_alloc_cpu(void)
1986{
1987 int cpu;
1988
1989 for_each_online_cpu(cpu)
1990 init_alloc_cpu_cpu(cpu);
1991 }
1992
1993#else
1994static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
1995static inline void init_alloc_cpu(void) {}
1996
1997static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1998{
1999 init_kmem_cache_cpu(s, &s->cpu_slab);
2000 return 1;
2001}
2002#endif
2003
1872#ifdef CONFIG_NUMA 2004#ifdef CONFIG_NUMA
1873/* 2005/*
1874 * No kmalloc_node yet so do it by hand. We know that this is the first 2006 * No kmalloc_node yet so do it by hand. We know that this is the first
@@ -1876,10 +2008,11 @@ static void init_kmem_cache_node(struct kmem_cache_node *n)
1876 * possible. 2008 * possible.
1877 * 2009 *
1878 * Note that this function only works on the kmalloc_node_cache 2010 * Note that this function only works on the kmalloc_node_cache
1879 * when allocating for the kmalloc_node_cache. 2011 * when allocating for the kmalloc_node_cache. This is used for bootstrapping
2012 * memory on a fresh node that has no slab structures yet.
1880 */ 2013 */
1881static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags, 2014static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
1882 int node) 2015 int node)
1883{ 2016{
1884 struct page *page; 2017 struct page *page;
1885 struct kmem_cache_node *n; 2018 struct kmem_cache_node *n;
@@ -1908,12 +2041,6 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag
1908 init_kmem_cache_node(n); 2041 init_kmem_cache_node(n);
1909 atomic_long_inc(&n->nr_slabs); 2042 atomic_long_inc(&n->nr_slabs);
1910 add_partial(n, page); 2043 add_partial(n, page);
1911
1912 /*
1913 * new_slab() disables interupts. If we do not reenable interrupts here
1914 * then bootup would continue with interrupts disabled.
1915 */
1916 local_irq_enable();
1917 return n; 2044 return n;
1918} 2045}
1919 2046
@@ -1921,7 +2048,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
1921{ 2048{
1922 int node; 2049 int node;
1923 2050
1924 for_each_online_node(node) { 2051 for_each_node_state(node, N_NORMAL_MEMORY) {
1925 struct kmem_cache_node *n = s->node[node]; 2052 struct kmem_cache_node *n = s->node[node];
1926 if (n && n != &s->local_node) 2053 if (n && n != &s->local_node)
1927 kmem_cache_free(kmalloc_caches, n); 2054 kmem_cache_free(kmalloc_caches, n);
@@ -1939,7 +2066,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
1939 else 2066 else
1940 local_node = 0; 2067 local_node = 0;
1941 2068
1942 for_each_online_node(node) { 2069 for_each_node_state(node, N_NORMAL_MEMORY) {
1943 struct kmem_cache_node *n; 2070 struct kmem_cache_node *n;
1944 2071
1945 if (local_node == node) 2072 if (local_node == node)
@@ -2077,21 +2204,14 @@ static int calculate_sizes(struct kmem_cache *s)
2077 */ 2204 */
2078 s->objects = (PAGE_SIZE << s->order) / size; 2205 s->objects = (PAGE_SIZE << s->order) / size;
2079 2206
2080 /* 2207 return !!s->objects;
2081 * Verify that the number of objects is within permitted limits.
2082 * The page->inuse field is only 16 bit wide! So we cannot have
2083 * more than 64k objects per slab.
2084 */
2085 if (!s->objects || s->objects > MAX_OBJECTS_PER_SLAB)
2086 return 0;
2087 return 1;
2088 2208
2089} 2209}
2090 2210
2091static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, 2211static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2092 const char *name, size_t size, 2212 const char *name, size_t size,
2093 size_t align, unsigned long flags, 2213 size_t align, unsigned long flags,
2094 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2214 void (*ctor)(struct kmem_cache *, void *))
2095{ 2215{
2096 memset(s, 0, kmem_size); 2216 memset(s, 0, kmem_size);
2097 s->name = name; 2217 s->name = name;
@@ -2107,9 +2227,12 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2107#ifdef CONFIG_NUMA 2227#ifdef CONFIG_NUMA
2108 s->defrag_ratio = 100; 2228 s->defrag_ratio = 100;
2109#endif 2229#endif
2230 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2231 goto error;
2110 2232
2111 if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2233 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2112 return 1; 2234 return 1;
2235 free_kmem_cache_nodes(s);
2113error: 2236error:
2114 if (flags & SLAB_PANIC) 2237 if (flags & SLAB_PANIC)
2115 panic("Cannot create slab %s size=%lu realsize=%u " 2238 panic("Cannot create slab %s size=%lu realsize=%u "
@@ -2192,7 +2315,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2192 flush_all(s); 2315 flush_all(s);
2193 2316
2194 /* Attempt to free all objects */ 2317 /* Attempt to free all objects */
2195 for_each_online_node(node) { 2318 free_kmem_cache_cpus(s);
2319 for_each_node_state(node, N_NORMAL_MEMORY) {
2196 struct kmem_cache_node *n = get_node(s, node); 2320 struct kmem_cache_node *n = get_node(s, node);
2197 2321
2198 n->nr_partial -= free_list(s, n, &n->partial); 2322 n->nr_partial -= free_list(s, n, &n->partial);
@@ -2227,11 +2351,11 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2227 * Kmalloc subsystem 2351 * Kmalloc subsystem
2228 *******************************************************************/ 2352 *******************************************************************/
2229 2353
2230struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned; 2354struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned;
2231EXPORT_SYMBOL(kmalloc_caches); 2355EXPORT_SYMBOL(kmalloc_caches);
2232 2356
2233#ifdef CONFIG_ZONE_DMA 2357#ifdef CONFIG_ZONE_DMA
2234static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1]; 2358static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT];
2235#endif 2359#endif
2236 2360
2237static int __init setup_slub_min_order(char *str) 2361static int __init setup_slub_min_order(char *str)
@@ -2397,12 +2521,8 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2397 return ZERO_SIZE_PTR; 2521 return ZERO_SIZE_PTR;
2398 2522
2399 index = size_index[(size - 1) / 8]; 2523 index = size_index[(size - 1) / 8];
2400 } else { 2524 } else
2401 if (size > KMALLOC_MAX_SIZE)
2402 return NULL;
2403
2404 index = fls(size - 1); 2525 index = fls(size - 1);
2405 }
2406 2526
2407#ifdef CONFIG_ZONE_DMA 2527#ifdef CONFIG_ZONE_DMA
2408 if (unlikely((flags & SLUB_DMA))) 2528 if (unlikely((flags & SLUB_DMA)))
@@ -2414,9 +2534,15 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2414 2534
2415void *__kmalloc(size_t size, gfp_t flags) 2535void *__kmalloc(size_t size, gfp_t flags)
2416{ 2536{
2417 struct kmem_cache *s = get_slab(size, flags); 2537 struct kmem_cache *s;
2418 2538
2419 if (ZERO_OR_NULL_PTR(s)) 2539 if (unlikely(size > PAGE_SIZE / 2))
2540 return (void *)__get_free_pages(flags | __GFP_COMP,
2541 get_order(size));
2542
2543 s = get_slab(size, flags);
2544
2545 if (unlikely(ZERO_OR_NULL_PTR(s)))
2420 return s; 2546 return s;
2421 2547
2422 return slab_alloc(s, flags, -1, __builtin_return_address(0)); 2548 return slab_alloc(s, flags, -1, __builtin_return_address(0));
@@ -2426,9 +2552,15 @@ EXPORT_SYMBOL(__kmalloc);
2426#ifdef CONFIG_NUMA 2552#ifdef CONFIG_NUMA
2427void *__kmalloc_node(size_t size, gfp_t flags, int node) 2553void *__kmalloc_node(size_t size, gfp_t flags, int node)
2428{ 2554{
2429 struct kmem_cache *s = get_slab(size, flags); 2555 struct kmem_cache *s;
2430 2556
2431 if (ZERO_OR_NULL_PTR(s)) 2557 if (unlikely(size > PAGE_SIZE / 2))
2558 return (void *)__get_free_pages(flags | __GFP_COMP,
2559 get_order(size));
2560
2561 s = get_slab(size, flags);
2562
2563 if (unlikely(ZERO_OR_NULL_PTR(s)))
2432 return s; 2564 return s;
2433 2565
2434 return slab_alloc(s, flags, node, __builtin_return_address(0)); 2566 return slab_alloc(s, flags, node, __builtin_return_address(0));
@@ -2441,7 +2573,8 @@ size_t ksize(const void *object)
2441 struct page *page; 2573 struct page *page;
2442 struct kmem_cache *s; 2574 struct kmem_cache *s;
2443 2575
2444 if (ZERO_OR_NULL_PTR(object)) 2576 BUG_ON(!object);
2577 if (unlikely(object == ZERO_SIZE_PTR))
2445 return 0; 2578 return 0;
2446 2579
2447 page = get_object_page(object); 2580 page = get_object_page(object);
@@ -2473,22 +2606,17 @@ EXPORT_SYMBOL(ksize);
2473 2606
2474void kfree(const void *x) 2607void kfree(const void *x)
2475{ 2608{
2476 struct kmem_cache *s;
2477 struct page *page; 2609 struct page *page;
2478 2610
2479 /* 2611 if (unlikely(ZERO_OR_NULL_PTR(x)))
2480 * This has to be an unsigned comparison. According to Linus
2481 * some gcc version treat a pointer as a signed entity. Then
2482 * this comparison would be true for all "negative" pointers
2483 * (which would cover the whole upper half of the address space).
2484 */
2485 if (ZERO_OR_NULL_PTR(x))
2486 return; 2612 return;
2487 2613
2488 page = virt_to_head_page(x); 2614 page = virt_to_head_page(x);
2489 s = page->slab; 2615 if (unlikely(!PageSlab(page))) {
2490 2616 put_page(page);
2491 slab_free(s, page, (void *)x, __builtin_return_address(0)); 2617 return;
2618 }
2619 slab_free(page->slab, page, (void *)x, __builtin_return_address(0));
2492} 2620}
2493EXPORT_SYMBOL(kfree); 2621EXPORT_SYMBOL(kfree);
2494 2622
@@ -2517,7 +2645,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
2517 return -ENOMEM; 2645 return -ENOMEM;
2518 2646
2519 flush_all(s); 2647 flush_all(s);
2520 for_each_online_node(node) { 2648 for_each_node_state(node, N_NORMAL_MEMORY) {
2521 n = get_node(s, node); 2649 n = get_node(s, node);
2522 2650
2523 if (!n->nr_partial) 2651 if (!n->nr_partial)
@@ -2575,6 +2703,8 @@ void __init kmem_cache_init(void)
2575 int i; 2703 int i;
2576 int caches = 0; 2704 int caches = 0;
2577 2705
2706 init_alloc_cpu();
2707
2578#ifdef CONFIG_NUMA 2708#ifdef CONFIG_NUMA
2579 /* 2709 /*
2580 * Must first have the slab cache available for the allocations of the 2710 * Must first have the slab cache available for the allocations of the
@@ -2602,7 +2732,7 @@ void __init kmem_cache_init(void)
2602 caches++; 2732 caches++;
2603 } 2733 }
2604 2734
2605 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 2735 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) {
2606 create_kmalloc_cache(&kmalloc_caches[i], 2736 create_kmalloc_cache(&kmalloc_caches[i],
2607 "kmalloc", 1 << i, GFP_KERNEL); 2737 "kmalloc", 1 << i, GFP_KERNEL);
2608 caches++; 2738 caches++;
@@ -2629,16 +2759,18 @@ void __init kmem_cache_init(void)
2629 slab_state = UP; 2759 slab_state = UP;
2630 2760
2631 /* Provide the correct kmalloc names now that the caches are up */ 2761 /* Provide the correct kmalloc names now that the caches are up */
2632 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) 2762 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++)
2633 kmalloc_caches[i]. name = 2763 kmalloc_caches[i]. name =
2634 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 2764 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
2635 2765
2636#ifdef CONFIG_SMP 2766#ifdef CONFIG_SMP
2637 register_cpu_notifier(&slab_notifier); 2767 register_cpu_notifier(&slab_notifier);
2768 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
2769 nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
2770#else
2771 kmem_size = sizeof(struct kmem_cache);
2638#endif 2772#endif
2639 2773
2640 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
2641 nr_cpu_ids * sizeof(struct page *);
2642 2774
2643 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2775 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2644 " CPUs=%d, Nodes=%d\n", 2776 " CPUs=%d, Nodes=%d\n",
@@ -2669,7 +2801,7 @@ static int slab_unmergeable(struct kmem_cache *s)
2669 2801
2670static struct kmem_cache *find_mergeable(size_t size, 2802static struct kmem_cache *find_mergeable(size_t size,
2671 size_t align, unsigned long flags, const char *name, 2803 size_t align, unsigned long flags, const char *name,
2672 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2804 void (*ctor)(struct kmem_cache *, void *))
2673{ 2805{
2674 struct kmem_cache *s; 2806 struct kmem_cache *s;
2675 2807
@@ -2710,19 +2842,28 @@ static struct kmem_cache *find_mergeable(size_t size,
2710 2842
2711struct kmem_cache *kmem_cache_create(const char *name, size_t size, 2843struct kmem_cache *kmem_cache_create(const char *name, size_t size,
2712 size_t align, unsigned long flags, 2844 size_t align, unsigned long flags,
2713 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2845 void (*ctor)(struct kmem_cache *, void *))
2714{ 2846{
2715 struct kmem_cache *s; 2847 struct kmem_cache *s;
2716 2848
2717 down_write(&slub_lock); 2849 down_write(&slub_lock);
2718 s = find_mergeable(size, align, flags, name, ctor); 2850 s = find_mergeable(size, align, flags, name, ctor);
2719 if (s) { 2851 if (s) {
2852 int cpu;
2853
2720 s->refcount++; 2854 s->refcount++;
2721 /* 2855 /*
2722 * Adjust the object sizes so that we clear 2856 * Adjust the object sizes so that we clear
2723 * the complete object on kzalloc. 2857 * the complete object on kzalloc.
2724 */ 2858 */
2725 s->objsize = max(s->objsize, (int)size); 2859 s->objsize = max(s->objsize, (int)size);
2860
2861 /*
2862 * And then we need to update the object size in the
2863 * per cpu structures
2864 */
2865 for_each_online_cpu(cpu)
2866 get_cpu_slab(s, cpu)->objsize = s->objsize;
2726 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 2867 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
2727 up_write(&slub_lock); 2868 up_write(&slub_lock);
2728 if (sysfs_slab_alias(s, name)) 2869 if (sysfs_slab_alias(s, name))
@@ -2765,15 +2906,29 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
2765 unsigned long flags; 2906 unsigned long flags;
2766 2907
2767 switch (action) { 2908 switch (action) {
2909 case CPU_UP_PREPARE:
2910 case CPU_UP_PREPARE_FROZEN:
2911 init_alloc_cpu_cpu(cpu);
2912 down_read(&slub_lock);
2913 list_for_each_entry(s, &slab_caches, list)
2914 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
2915 GFP_KERNEL);
2916 up_read(&slub_lock);
2917 break;
2918
2768 case CPU_UP_CANCELED: 2919 case CPU_UP_CANCELED:
2769 case CPU_UP_CANCELED_FROZEN: 2920 case CPU_UP_CANCELED_FROZEN:
2770 case CPU_DEAD: 2921 case CPU_DEAD:
2771 case CPU_DEAD_FROZEN: 2922 case CPU_DEAD_FROZEN:
2772 down_read(&slub_lock); 2923 down_read(&slub_lock);
2773 list_for_each_entry(s, &slab_caches, list) { 2924 list_for_each_entry(s, &slab_caches, list) {
2925 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2926
2774 local_irq_save(flags); 2927 local_irq_save(flags);
2775 __flush_cpu_slab(s, cpu); 2928 __flush_cpu_slab(s, cpu);
2776 local_irq_restore(flags); 2929 local_irq_restore(flags);
2930 free_kmem_cache_cpu(c, cpu);
2931 s->cpu_slab[cpu] = NULL;
2777 } 2932 }
2778 up_read(&slub_lock); 2933 up_read(&slub_lock);
2779 break; 2934 break;
@@ -2790,9 +2945,14 @@ static struct notifier_block __cpuinitdata slab_notifier =
2790 2945
2791void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) 2946void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
2792{ 2947{
2793 struct kmem_cache *s = get_slab(size, gfpflags); 2948 struct kmem_cache *s;
2949
2950 if (unlikely(size > PAGE_SIZE / 2))
2951 return (void *)__get_free_pages(gfpflags | __GFP_COMP,
2952 get_order(size));
2953 s = get_slab(size, gfpflags);
2794 2954
2795 if (ZERO_OR_NULL_PTR(s)) 2955 if (unlikely(ZERO_OR_NULL_PTR(s)))
2796 return s; 2956 return s;
2797 2957
2798 return slab_alloc(s, gfpflags, -1, caller); 2958 return slab_alloc(s, gfpflags, -1, caller);
@@ -2801,9 +2961,14 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
2801void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 2961void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
2802 int node, void *caller) 2962 int node, void *caller)
2803{ 2963{
2804 struct kmem_cache *s = get_slab(size, gfpflags); 2964 struct kmem_cache *s;
2965
2966 if (unlikely(size > PAGE_SIZE / 2))
2967 return (void *)__get_free_pages(gfpflags | __GFP_COMP,
2968 get_order(size));
2969 s = get_slab(size, gfpflags);
2805 2970
2806 if (ZERO_OR_NULL_PTR(s)) 2971 if (unlikely(ZERO_OR_NULL_PTR(s)))
2807 return s; 2972 return s;
2808 2973
2809 return slab_alloc(s, gfpflags, node, caller); 2974 return slab_alloc(s, gfpflags, node, caller);
@@ -2902,7 +3067,7 @@ static long validate_slab_cache(struct kmem_cache *s)
2902 return -ENOMEM; 3067 return -ENOMEM;
2903 3068
2904 flush_all(s); 3069 flush_all(s);
2905 for_each_online_node(node) { 3070 for_each_node_state(node, N_NORMAL_MEMORY) {
2906 struct kmem_cache_node *n = get_node(s, node); 3071 struct kmem_cache_node *n = get_node(s, node);
2907 3072
2908 count += validate_slab_node(s, n, map); 3073 count += validate_slab_node(s, n, map);
@@ -3116,13 +3281,13 @@ static int list_locations(struct kmem_cache *s, char *buf,
3116 int node; 3281 int node;
3117 3282
3118 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 3283 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
3119 GFP_KERNEL)) 3284 GFP_TEMPORARY))
3120 return sprintf(buf, "Out of memory\n"); 3285 return sprintf(buf, "Out of memory\n");
3121 3286
3122 /* Push back cpu slabs */ 3287 /* Push back cpu slabs */
3123 flush_all(s); 3288 flush_all(s);
3124 3289
3125 for_each_online_node(node) { 3290 for_each_node_state(node, N_NORMAL_MEMORY) {
3126 struct kmem_cache_node *n = get_node(s, node); 3291 struct kmem_cache_node *n = get_node(s, node);
3127 unsigned long flags; 3292 unsigned long flags;
3128 struct page *page; 3293 struct page *page;
@@ -3230,11 +3395,18 @@ static unsigned long slab_objects(struct kmem_cache *s,
3230 per_cpu = nodes + nr_node_ids; 3395 per_cpu = nodes + nr_node_ids;
3231 3396
3232 for_each_possible_cpu(cpu) { 3397 for_each_possible_cpu(cpu) {
3233 struct page *page = s->cpu_slab[cpu]; 3398 struct page *page;
3234 int node; 3399 int node;
3400 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3235 3401
3402 if (!c)
3403 continue;
3404
3405 page = c->page;
3406 node = c->node;
3407 if (node < 0)
3408 continue;
3236 if (page) { 3409 if (page) {
3237 node = page_to_nid(page);
3238 if (flags & SO_CPU) { 3410 if (flags & SO_CPU) {
3239 int x = 0; 3411 int x = 0;
3240 3412
@@ -3249,7 +3421,7 @@ static unsigned long slab_objects(struct kmem_cache *s,
3249 } 3421 }
3250 } 3422 }
3251 3423
3252 for_each_online_node(node) { 3424 for_each_node_state(node, N_NORMAL_MEMORY) {
3253 struct kmem_cache_node *n = get_node(s, node); 3425 struct kmem_cache_node *n = get_node(s, node);
3254 3426
3255 if (flags & SO_PARTIAL) { 3427 if (flags & SO_PARTIAL) {
@@ -3277,7 +3449,7 @@ static unsigned long slab_objects(struct kmem_cache *s,
3277 3449
3278 x = sprintf(buf, "%lu", total); 3450 x = sprintf(buf, "%lu", total);
3279#ifdef CONFIG_NUMA 3451#ifdef CONFIG_NUMA
3280 for_each_online_node(node) 3452 for_each_node_state(node, N_NORMAL_MEMORY)
3281 if (nodes[node]) 3453 if (nodes[node])
3282 x += sprintf(buf + x, " N%d=%lu", 3454 x += sprintf(buf + x, " N%d=%lu",
3283 node, nodes[node]); 3455 node, nodes[node]);
@@ -3291,13 +3463,19 @@ static int any_slab_objects(struct kmem_cache *s)
3291 int node; 3463 int node;
3292 int cpu; 3464 int cpu;
3293 3465
3294 for_each_possible_cpu(cpu) 3466 for_each_possible_cpu(cpu) {
3295 if (s->cpu_slab[cpu]) 3467 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3468
3469 if (c && c->page)
3296 return 1; 3470 return 1;
3471 }
3297 3472
3298 for_each_node(node) { 3473 for_each_online_node(node) {
3299 struct kmem_cache_node *n = get_node(s, node); 3474 struct kmem_cache_node *n = get_node(s, node);
3300 3475
3476 if (!n)
3477 continue;
3478
3301 if (n->nr_partial || atomic_long_read(&n->nr_slabs)) 3479 if (n->nr_partial || atomic_long_read(&n->nr_slabs))
3302 return 1; 3480 return 1;
3303 } 3481 }