aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/slub_def.h9
-rw-r--r--mm/slub.c190
2 files changed, 124 insertions, 75 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 3b361b2906bd..0a7ae25f7e8d 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,6 +11,13 @@
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13 13
14struct kmem_cache_cpu {
15 void **freelist;
16 struct page *page;
17 int node;
18 /* Lots of wasted space */
19} ____cacheline_aligned_in_smp;
20
14struct kmem_cache_node { 21struct kmem_cache_node {
15 spinlock_t list_lock; /* Protect partial list and nr_partial */ 22 spinlock_t list_lock; /* Protect partial list and nr_partial */
16 unsigned long nr_partial; 23 unsigned long nr_partial;
@@ -54,7 +61,7 @@ struct kmem_cache {
54 int defrag_ratio; 61 int defrag_ratio;
55 struct kmem_cache_node *node[MAX_NUMNODES]; 62 struct kmem_cache_node *node[MAX_NUMNODES];
56#endif 63#endif
57 struct page *cpu_slab[NR_CPUS]; 64 struct kmem_cache_cpu cpu_slab[NR_CPUS];
58}; 65};
59 66
60/* 67/*
diff --git a/mm/slub.c b/mm/slub.c
index a90c4ffc9576..4b8037f14fce 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -90,7 +90,7 @@
90 * One use of this flag is to mark slabs that are 90 * One use of this flag is to mark slabs that are
91 * used for allocations. Then such a slab becomes a cpu 91 * used for allocations. Then such a slab becomes a cpu
92 * slab. The cpu slab may be equipped with an additional 92 * slab. The cpu slab may be equipped with an additional
93 * lockless_freelist that allows lockless access to 93 * freelist that allows lockless access to
94 * free objects in addition to the regular freelist 94 * free objects in addition to the regular freelist
95 * that requires the slab lock. 95 * that requires the slab lock.
96 * 96 *
@@ -140,11 +140,6 @@ static inline void ClearSlabDebug(struct page *page)
140/* 140/*
141 * Issues still to be resolved: 141 * Issues still to be resolved:
142 * 142 *
143 * - The per cpu array is updated for each new slab and and is a remote
144 * cacheline for most nodes. This could become a bouncing cacheline given
145 * enough frequent updates. There are 16 pointers in a cacheline, so at
146 * max 16 cpus could compete for the cacheline which may be okay.
147 *
148 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 143 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
149 * 144 *
150 * - Variable sizing of the per node arrays 145 * - Variable sizing of the per node arrays
@@ -277,6 +272,11 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
277#endif 272#endif
278} 273}
279 274
275static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
276{
277 return &s->cpu_slab[cpu];
278}
279
280static inline int check_valid_pointer(struct kmem_cache *s, 280static inline int check_valid_pointer(struct kmem_cache *s,
281 struct page *page, const void *object) 281 struct page *page, const void *object)
282{ 282{
@@ -1387,33 +1387,34 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page)
1387/* 1387/*
1388 * Remove the cpu slab 1388 * Remove the cpu slab
1389 */ 1389 */
1390static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) 1390static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1391{ 1391{
1392 struct page *page = c->page;
1392 /* 1393 /*
1393 * Merge cpu freelist into freelist. Typically we get here 1394 * Merge cpu freelist into freelist. Typically we get here
1394 * because both freelists are empty. So this is unlikely 1395 * because both freelists are empty. So this is unlikely
1395 * to occur. 1396 * to occur.
1396 */ 1397 */
1397 while (unlikely(page->lockless_freelist)) { 1398 while (unlikely(c->freelist)) {
1398 void **object; 1399 void **object;
1399 1400
1400 /* Retrieve object from cpu_freelist */ 1401 /* Retrieve object from cpu_freelist */
1401 object = page->lockless_freelist; 1402 object = c->freelist;
1402 page->lockless_freelist = page->lockless_freelist[page->offset]; 1403 c->freelist = c->freelist[page->offset];
1403 1404
1404 /* And put onto the regular freelist */ 1405 /* And put onto the regular freelist */
1405 object[page->offset] = page->freelist; 1406 object[page->offset] = page->freelist;
1406 page->freelist = object; 1407 page->freelist = object;
1407 page->inuse--; 1408 page->inuse--;
1408 } 1409 }
1409 s->cpu_slab[cpu] = NULL; 1410 c->page = NULL;
1410 unfreeze_slab(s, page); 1411 unfreeze_slab(s, page);
1411} 1412}
1412 1413
1413static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) 1414static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1414{ 1415{
1415 slab_lock(page); 1416 slab_lock(c->page);
1416 deactivate_slab(s, page, cpu); 1417 deactivate_slab(s, c);
1417} 1418}
1418 1419
1419/* 1420/*
@@ -1422,18 +1423,17 @@ static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu)
1422 */ 1423 */
1423static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1424static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1424{ 1425{
1425 struct page *page = s->cpu_slab[cpu]; 1426 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1426 1427
1427 if (likely(page)) 1428 if (likely(c && c->page))
1428 flush_slab(s, page, cpu); 1429 flush_slab(s, c);
1429} 1430}
1430 1431
1431static void flush_cpu_slab(void *d) 1432static void flush_cpu_slab(void *d)
1432{ 1433{
1433 struct kmem_cache *s = d; 1434 struct kmem_cache *s = d;
1434 int cpu = smp_processor_id();
1435 1435
1436 __flush_cpu_slab(s, cpu); 1436 __flush_cpu_slab(s, smp_processor_id());
1437} 1437}
1438 1438
1439static void flush_all(struct kmem_cache *s) 1439static void flush_all(struct kmem_cache *s)
@@ -1450,6 +1450,19 @@ static void flush_all(struct kmem_cache *s)
1450} 1450}
1451 1451
1452/* 1452/*
1453 * Check if the objects in a per cpu structure fit numa
1454 * locality expectations.
1455 */
1456static inline int node_match(struct kmem_cache_cpu *c, int node)
1457{
1458#ifdef CONFIG_NUMA
1459 if (node != -1 && c->node != node)
1460 return 0;
1461#endif
1462 return 1;
1463}
1464
1465/*
1453 * Slow path. The lockless freelist is empty or we need to perform 1466 * Slow path. The lockless freelist is empty or we need to perform
1454 * debugging duties. 1467 * debugging duties.
1455 * 1468 *
@@ -1467,45 +1480,46 @@ static void flush_all(struct kmem_cache *s)
1467 * we need to allocate a new slab. This is slowest path since we may sleep. 1480 * we need to allocate a new slab. This is slowest path since we may sleep.
1468 */ 1481 */
1469static void *__slab_alloc(struct kmem_cache *s, 1482static void *__slab_alloc(struct kmem_cache *s,
1470 gfp_t gfpflags, int node, void *addr, struct page *page) 1483 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
1471{ 1484{
1472 void **object; 1485 void **object;
1473 int cpu = smp_processor_id(); 1486 struct page *new;
1474 1487
1475 if (!page) 1488 if (!c->page)
1476 goto new_slab; 1489 goto new_slab;
1477 1490
1478 slab_lock(page); 1491 slab_lock(c->page);
1479 if (unlikely(node != -1 && page_to_nid(page) != node)) 1492 if (unlikely(!node_match(c, node)))
1480 goto another_slab; 1493 goto another_slab;
1481load_freelist: 1494load_freelist:
1482 object = page->freelist; 1495 object = c->page->freelist;
1483 if (unlikely(!object)) 1496 if (unlikely(!object))
1484 goto another_slab; 1497 goto another_slab;
1485 if (unlikely(SlabDebug(page))) 1498 if (unlikely(SlabDebug(c->page)))
1486 goto debug; 1499 goto debug;
1487 1500
1488 object = page->freelist; 1501 object = c->page->freelist;
1489 page->lockless_freelist = object[page->offset]; 1502 c->freelist = object[c->page->offset];
1490 page->inuse = s->objects; 1503 c->page->inuse = s->objects;
1491 page->freelist = NULL; 1504 c->page->freelist = NULL;
1492 slab_unlock(page); 1505 c->node = page_to_nid(c->page);
1506 slab_unlock(c->page);
1493 return object; 1507 return object;
1494 1508
1495another_slab: 1509another_slab:
1496 deactivate_slab(s, page, cpu); 1510 deactivate_slab(s, c);
1497 1511
1498new_slab: 1512new_slab:
1499 page = get_partial(s, gfpflags, node); 1513 new = get_partial(s, gfpflags, node);
1500 if (page) { 1514 if (new) {
1501 s->cpu_slab[cpu] = page; 1515 c->page = new;
1502 goto load_freelist; 1516 goto load_freelist;
1503 } 1517 }
1504 1518
1505 page = new_slab(s, gfpflags, node); 1519 new = new_slab(s, gfpflags, node);
1506 if (page) { 1520 if (new) {
1507 cpu = smp_processor_id(); 1521 c = get_cpu_slab(s, smp_processor_id());
1508 if (s->cpu_slab[cpu]) { 1522 if (c->page) {
1509 /* 1523 /*
1510 * Someone else populated the cpu_slab while we 1524 * Someone else populated the cpu_slab while we
1511 * enabled interrupts, or we have gotten scheduled 1525 * enabled interrupts, or we have gotten scheduled
@@ -1513,34 +1527,32 @@ new_slab:
1513 * requested node even if __GFP_THISNODE was 1527 * requested node even if __GFP_THISNODE was
1514 * specified. So we need to recheck. 1528 * specified. So we need to recheck.
1515 */ 1529 */
1516 if (node == -1 || 1530 if (node_match(c, node)) {
1517 page_to_nid(s->cpu_slab[cpu]) == node) {
1518 /* 1531 /*
1519 * Current cpuslab is acceptable and we 1532 * Current cpuslab is acceptable and we
1520 * want the current one since its cache hot 1533 * want the current one since its cache hot
1521 */ 1534 */
1522 discard_slab(s, page); 1535 discard_slab(s, new);
1523 page = s->cpu_slab[cpu]; 1536 slab_lock(c->page);
1524 slab_lock(page);
1525 goto load_freelist; 1537 goto load_freelist;
1526 } 1538 }
1527 /* New slab does not fit our expectations */ 1539 /* New slab does not fit our expectations */
1528 flush_slab(s, s->cpu_slab[cpu], cpu); 1540 flush_slab(s, c);
1529 } 1541 }
1530 slab_lock(page); 1542 slab_lock(new);
1531 SetSlabFrozen(page); 1543 SetSlabFrozen(new);
1532 s->cpu_slab[cpu] = page; 1544 c->page = new;
1533 goto load_freelist; 1545 goto load_freelist;
1534 } 1546 }
1535 return NULL; 1547 return NULL;
1536debug: 1548debug:
1537 object = page->freelist; 1549 object = c->page->freelist;
1538 if (!alloc_debug_processing(s, page, object, addr)) 1550 if (!alloc_debug_processing(s, c->page, object, addr))
1539 goto another_slab; 1551 goto another_slab;
1540 1552
1541 page->inuse++; 1553 c->page->inuse++;
1542 page->freelist = object[page->offset]; 1554 c->page->freelist = object[c->page->offset];
1543 slab_unlock(page); 1555 slab_unlock(c->page);
1544 return object; 1556 return object;
1545} 1557}
1546 1558
@@ -1557,20 +1569,20 @@ debug:
1557static void __always_inline *slab_alloc(struct kmem_cache *s, 1569static void __always_inline *slab_alloc(struct kmem_cache *s,
1558 gfp_t gfpflags, int node, void *addr) 1570 gfp_t gfpflags, int node, void *addr)
1559{ 1571{
1560 struct page *page;
1561 void **object; 1572 void **object;
1562 unsigned long flags; 1573 unsigned long flags;
1574 struct kmem_cache_cpu *c;
1563 1575
1564 local_irq_save(flags); 1576 local_irq_save(flags);
1565 page = s->cpu_slab[smp_processor_id()]; 1577 c = get_cpu_slab(s, smp_processor_id());
1566 if (unlikely(!page || !page->lockless_freelist || 1578 if (unlikely(!c->page || !c->freelist ||
1567 (node != -1 && page_to_nid(page) != node))) 1579 !node_match(c, node)))
1568 1580
1569 object = __slab_alloc(s, gfpflags, node, addr, page); 1581 object = __slab_alloc(s, gfpflags, node, addr, c);
1570 1582
1571 else { 1583 else {
1572 object = page->lockless_freelist; 1584 object = c->freelist;
1573 page->lockless_freelist = object[page->offset]; 1585 c->freelist = object[c->page->offset];
1574 } 1586 }
1575 local_irq_restore(flags); 1587 local_irq_restore(flags);
1576 1588
@@ -1668,13 +1680,14 @@ static void __always_inline slab_free(struct kmem_cache *s,
1668{ 1680{
1669 void **object = (void *)x; 1681 void **object = (void *)x;
1670 unsigned long flags; 1682 unsigned long flags;
1683 struct kmem_cache_cpu *c;
1671 1684
1672 local_irq_save(flags); 1685 local_irq_save(flags);
1673 debug_check_no_locks_freed(object, s->objsize); 1686 debug_check_no_locks_freed(object, s->objsize);
1674 if (likely(page == s->cpu_slab[smp_processor_id()] && 1687 c = get_cpu_slab(s, smp_processor_id());
1675 !SlabDebug(page))) { 1688 if (likely(page == c->page && !SlabDebug(page))) {
1676 object[page->offset] = page->lockless_freelist; 1689 object[page->offset] = c->freelist;
1677 page->lockless_freelist = object; 1690 c->freelist = object;
1678 } else 1691 } else
1679 __slab_free(s, page, x, addr); 1692 __slab_free(s, page, x, addr);
1680 1693
@@ -1862,6 +1875,24 @@ static unsigned long calculate_alignment(unsigned long flags,
1862 return ALIGN(align, sizeof(void *)); 1875 return ALIGN(align, sizeof(void *));
1863} 1876}
1864 1877
1878static void init_kmem_cache_cpu(struct kmem_cache *s,
1879 struct kmem_cache_cpu *c)
1880{
1881 c->page = NULL;
1882 c->freelist = NULL;
1883 c->node = 0;
1884}
1885
1886static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1887{
1888 int cpu;
1889
1890 for_each_possible_cpu(cpu)
1891 init_kmem_cache_cpu(s, get_cpu_slab(s, cpu));
1892
1893 return 1;
1894}
1895
1865static void init_kmem_cache_node(struct kmem_cache_node *n) 1896static void init_kmem_cache_node(struct kmem_cache_node *n)
1866{ 1897{
1867 n->nr_partial = 0; 1898 n->nr_partial = 0;
@@ -2111,8 +2142,10 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2111#ifdef CONFIG_NUMA 2142#ifdef CONFIG_NUMA
2112 s->defrag_ratio = 100; 2143 s->defrag_ratio = 100;
2113#endif 2144#endif
2145 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2146 goto error;
2114 2147
2115 if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2148 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2116 return 1; 2149 return 1;
2117error: 2150error:
2118 if (flags & SLAB_PANIC) 2151 if (flags & SLAB_PANIC)
@@ -2646,7 +2679,7 @@ void __init kmem_cache_init(void)
2646#endif 2679#endif
2647 2680
2648 kmem_size = offsetof(struct kmem_cache, cpu_slab) + 2681 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
2649 nr_cpu_ids * sizeof(struct page *); 2682 nr_cpu_ids * sizeof(struct kmem_cache_cpu);
2650 2683
2651 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2684 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2652 " CPUs=%d, Nodes=%d\n", 2685 " CPUs=%d, Nodes=%d\n",
@@ -3248,11 +3281,14 @@ static unsigned long slab_objects(struct kmem_cache *s,
3248 per_cpu = nodes + nr_node_ids; 3281 per_cpu = nodes + nr_node_ids;
3249 3282
3250 for_each_possible_cpu(cpu) { 3283 for_each_possible_cpu(cpu) {
3251 struct page *page = s->cpu_slab[cpu]; 3284 struct page *page;
3252 int node; 3285 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3253 3286
3287 if (!c)
3288 continue;
3289
3290 page = c->page;
3254 if (page) { 3291 if (page) {
3255 node = page_to_nid(page);
3256 if (flags & SO_CPU) { 3292 if (flags & SO_CPU) {
3257 int x = 0; 3293 int x = 0;
3258 3294
@@ -3261,9 +3297,9 @@ static unsigned long slab_objects(struct kmem_cache *s,
3261 else 3297 else
3262 x = 1; 3298 x = 1;
3263 total += x; 3299 total += x;
3264 nodes[node] += x; 3300 nodes[c->node] += x;
3265 } 3301 }
3266 per_cpu[node]++; 3302 per_cpu[c->node]++;
3267 } 3303 }
3268 } 3304 }
3269 3305
@@ -3309,13 +3345,19 @@ static int any_slab_objects(struct kmem_cache *s)
3309 int node; 3345 int node;
3310 int cpu; 3346 int cpu;
3311 3347
3312 for_each_possible_cpu(cpu) 3348 for_each_possible_cpu(cpu) {
3313 if (s->cpu_slab[cpu]) 3349 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3350
3351 if (c && c->page)
3314 return 1; 3352 return 1;
3353 }
3315 3354
3316 for_each_node(node) { 3355 for_each_online_node(node) {
3317 struct kmem_cache_node *n = get_node(s, node); 3356 struct kmem_cache_node *n = get_node(s, node);
3318 3357
3358 if (!n)
3359 continue;
3360
3319 if (n->nr_partial || atomic_long_read(&n->nr_slabs)) 3361 if (n->nr_partial || atomic_long_read(&n->nr_slabs))
3320 return 1; 3362 return 1;
3321 } 3363 }