aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
authorKumar Gala <galak@kernel.crashing.org>2006-03-20 12:58:02 -0500
committerKumar Gala <galak@kernel.crashing.org>2006-03-20 12:58:02 -0500
commit1a02e59a2970f9ed28ab51d3b08624b79e54d848 (patch)
tree470cce472be3b08c160e0c569648e7228651b12a /mm/slab.c
parentebcff3c773b42bce6182ec16485abca4e53fba97 (diff)
parent2c276603c3e5ebf38155a9d1fbbda656d52d138e (diff)
Merge branch 'master'
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c132
1 files changed, 99 insertions, 33 deletions
diff --git a/mm/slab.c b/mm/slab.c
index d66c2b0d9715..d0bd7f07ab04 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char *
789 dump_stack(); 789 dump_stack();
790} 790}
791 791
792#ifdef CONFIG_NUMA
793/*
794 * Special reaping functions for NUMA systems called from cache_reap().
795 * These take care of doing round robin flushing of alien caches (containing
796 * objects freed on different nodes from which they were allocated) and the
797 * flushing of remote pcps by calling drain_node_pages.
798 */
799static DEFINE_PER_CPU(unsigned long, reap_node);
800
801static void init_reap_node(int cpu)
802{
803 int node;
804
805 node = next_node(cpu_to_node(cpu), node_online_map);
806 if (node == MAX_NUMNODES)
807 node = 0;
808
809 __get_cpu_var(reap_node) = node;
810}
811
812static void next_reap_node(void)
813{
814 int node = __get_cpu_var(reap_node);
815
816 /*
817 * Also drain per cpu pages on remote zones
818 */
819 if (node != numa_node_id())
820 drain_node_pages(node);
821
822 node = next_node(node, node_online_map);
823 if (unlikely(node >= MAX_NUMNODES))
824 node = first_node(node_online_map);
825 __get_cpu_var(reap_node) = node;
826}
827
828#else
829#define init_reap_node(cpu) do { } while (0)
830#define next_reap_node(void) do { } while (0)
831#endif
832
792/* 833/*
793 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz 834 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
794 * via the workqueue/eventd. 835 * via the workqueue/eventd.
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu)
806 * at that time. 847 * at that time.
807 */ 848 */
808 if (keventd_up() && reap_work->func == NULL) { 849 if (keventd_up() && reap_work->func == NULL) {
850 init_reap_node(cpu);
809 INIT_WORK(reap_work, cache_reap, NULL); 851 INIT_WORK(reap_work, cache_reap, NULL);
810 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); 852 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
811 } 853 }
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
884 } 926 }
885} 927}
886 928
929/*
930 * Called from cache_reap() to regularly drain alien caches round robin.
931 */
932static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
933{
934 int node = __get_cpu_var(reap_node);
935
936 if (l3->alien) {
937 struct array_cache *ac = l3->alien[node];
938 if (ac && ac->avail) {
939 spin_lock_irq(&ac->lock);
940 __drain_alien_cache(cachep, ac, node);
941 spin_unlock_irq(&ac->lock);
942 }
943 }
944}
945
887static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) 946static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
888{ 947{
889 int i = 0; 948 int i = 0;
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al
902#else 961#else
903 962
904#define drain_alien_cache(cachep, alien) do { } while (0) 963#define drain_alien_cache(cachep, alien) do { } while (0)
964#define reap_alien(cachep, l3) do { } while (0)
905 965
906static inline struct array_cache **alloc_alien_cache(int node, int limit) 966static inline struct array_cache **alloc_alien_cache(int node, int limit)
907{ 967{
@@ -1124,6 +1184,7 @@ void __init kmem_cache_init(void)
1124 struct cache_sizes *sizes; 1184 struct cache_sizes *sizes;
1125 struct cache_names *names; 1185 struct cache_names *names;
1126 int i; 1186 int i;
1187 int order;
1127 1188
1128 for (i = 0; i < NUM_INIT_LISTS; i++) { 1189 for (i = 0; i < NUM_INIT_LISTS; i++) {
1129 kmem_list3_init(&initkmem_list3[i]); 1190 kmem_list3_init(&initkmem_list3[i]);
@@ -1167,11 +1228,15 @@ void __init kmem_cache_init(void)
1167 1228
1168 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); 1229 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());
1169 1230
1170 cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, 1231 for (order = 0; order < MAX_ORDER; order++) {
1171 &left_over, &cache_cache.num); 1232 cache_estimate(order, cache_cache.buffer_size,
1233 cache_line_size(), 0, &left_over, &cache_cache.num);
1234 if (cache_cache.num)
1235 break;
1236 }
1172 if (!cache_cache.num) 1237 if (!cache_cache.num)
1173 BUG(); 1238 BUG();
1174 1239 cache_cache.gfporder = order;
1175 cache_cache.colour = left_over / cache_cache.colour_off; 1240 cache_cache.colour = left_over / cache_cache.colour_off;
1176 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1241 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1177 sizeof(struct slab), cache_line_size()); 1242 sizeof(struct slab), cache_line_size());
@@ -1628,36 +1693,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1628 size_t size, size_t align, unsigned long flags) 1693 size_t size, size_t align, unsigned long flags)
1629{ 1694{
1630 size_t left_over = 0; 1695 size_t left_over = 0;
1696 int gfporder;
1631 1697
1632 for (;; cachep->gfporder++) { 1698 for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) {
1633 unsigned int num; 1699 unsigned int num;
1634 size_t remainder; 1700 size_t remainder;
1635 1701
1636 if (cachep->gfporder > MAX_GFP_ORDER) { 1702 cache_estimate(gfporder, size, align, flags, &remainder, &num);
1637 cachep->num = 0;
1638 break;
1639 }
1640
1641 cache_estimate(cachep->gfporder, size, align, flags,
1642 &remainder, &num);
1643 if (!num) 1703 if (!num)
1644 continue; 1704 continue;
1705
1645 /* More than offslab_limit objects will cause problems */ 1706 /* More than offslab_limit objects will cause problems */
1646 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) 1707 if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
1647 break; 1708 break;
1648 1709
1710 /* Found something acceptable - save it away */
1649 cachep->num = num; 1711 cachep->num = num;
1712 cachep->gfporder = gfporder;
1650 left_over = remainder; 1713 left_over = remainder;
1651 1714
1652 /* 1715 /*
1716 * A VFS-reclaimable slab tends to have most allocations
1717 * as GFP_NOFS and we really don't want to have to be allocating
1718 * higher-order pages when we are unable to shrink dcache.
1719 */
1720 if (flags & SLAB_RECLAIM_ACCOUNT)
1721 break;
1722
1723 /*
1653 * Large number of objects is good, but very large slabs are 1724 * Large number of objects is good, but very large slabs are
1654 * currently bad for the gfp()s. 1725 * currently bad for the gfp()s.
1655 */ 1726 */
1656 if (cachep->gfporder >= slab_break_gfp_order) 1727 if (gfporder >= slab_break_gfp_order)
1657 break; 1728 break;
1658 1729
1659 if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) 1730 /*
1660 /* Acceptable internal fragmentation */ 1731 * Acceptable internal fragmentation?
1732 */
1733 if ((left_over * 8) <= (PAGE_SIZE << gfporder))
1661 break; 1734 break;
1662 } 1735 }
1663 return left_over; 1736 return left_over;
@@ -1717,6 +1790,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1717 BUG(); 1790 BUG();
1718 } 1791 }
1719 1792
1793 /*
1794 * Prevent CPUs from coming and going.
1795 * lock_cpu_hotplug() nests outside cache_chain_mutex
1796 */
1797 lock_cpu_hotplug();
1798
1720 mutex_lock(&cache_chain_mutex); 1799 mutex_lock(&cache_chain_mutex);
1721 1800
1722 list_for_each(p, &cache_chain) { 1801 list_for_each(p, &cache_chain) {
@@ -1863,17 +1942,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1863 1942
1864 size = ALIGN(size, align); 1943 size = ALIGN(size, align);
1865 1944
1866 if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { 1945 left_over = calculate_slab_order(cachep, size, align, flags);
1867 /*
1868 * A VFS-reclaimable slab tends to have most allocations
1869 * as GFP_NOFS and we really don't want to have to be allocating
1870 * higher-order pages when we are unable to shrink dcache.
1871 */
1872 cachep->gfporder = 0;
1873 cache_estimate(cachep->gfporder, size, align, flags,
1874 &left_over, &cachep->num);
1875 } else
1876 left_over = calculate_slab_order(cachep, size, align, flags);
1877 1946
1878 if (!cachep->num) { 1947 if (!cachep->num) {
1879 printk("kmem_cache_create: couldn't create cache %s.\n", name); 1948 printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -1918,8 +1987,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1918 cachep->dtor = dtor; 1987 cachep->dtor = dtor;
1919 cachep->name = name; 1988 cachep->name = name;
1920 1989
1921 /* Don't let CPUs to come and go */
1922 lock_cpu_hotplug();
1923 1990
1924 if (g_cpucache_up == FULL) { 1991 if (g_cpucache_up == FULL) {
1925 enable_cpucache(cachep); 1992 enable_cpucache(cachep);
@@ -1978,12 +2045,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1978 2045
1979 /* cache setup completed, link it into the list */ 2046 /* cache setup completed, link it into the list */
1980 list_add(&cachep->next, &cache_chain); 2047 list_add(&cachep->next, &cache_chain);
1981 unlock_cpu_hotplug();
1982 oops: 2048 oops:
1983 if (!cachep && (flags & SLAB_PANIC)) 2049 if (!cachep && (flags & SLAB_PANIC))
1984 panic("kmem_cache_create(): failed to create slab `%s'\n", 2050 panic("kmem_cache_create(): failed to create slab `%s'\n",
1985 name); 2051 name);
1986 mutex_unlock(&cache_chain_mutex); 2052 mutex_unlock(&cache_chain_mutex);
2053 unlock_cpu_hotplug();
1987 return cachep; 2054 return cachep;
1988} 2055}
1989EXPORT_SYMBOL(kmem_cache_create); 2056EXPORT_SYMBOL(kmem_cache_create);
@@ -2550,7 +2617,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2550 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2617 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2551 cachep->name, cachep->num, slabp, slabp->inuse); 2618 cachep->name, cachep->num, slabp, slabp->inuse);
2552 for (i = 0; 2619 for (i = 0;
2553 i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); 2620 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2554 i++) { 2621 i++) {
2555 if ((i % 16) == 0) 2622 if ((i % 16) == 0)
2556 printk("\n%03x:", i); 2623 printk("\n%03x:", i);
@@ -3490,8 +3557,7 @@ static void cache_reap(void *unused)
3490 check_irq_on(); 3557 check_irq_on();
3491 3558
3492 l3 = searchp->nodelists[numa_node_id()]; 3559 l3 = searchp->nodelists[numa_node_id()];
3493 if (l3->alien) 3560 reap_alien(searchp, l3);
3494 drain_alien_cache(searchp, l3->alien);
3495 spin_lock_irq(&l3->list_lock); 3561 spin_lock_irq(&l3->list_lock);
3496 3562
3497 drain_array_locked(searchp, cpu_cache_get(searchp), 0, 3563 drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3541,7 +3607,7 @@ static void cache_reap(void *unused)
3541 } 3607 }
3542 check_irq_on(); 3608 check_irq_on();
3543 mutex_unlock(&cache_chain_mutex); 3609 mutex_unlock(&cache_chain_mutex);
3544 drain_remote_pages(); 3610 next_reap_node();
3545 /* Setup the next iteration */ 3611 /* Setup the next iteration */
3546 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3612 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3547} 3613}