diff options
author | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-03-14 15:18:01 -0500 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-03-14 15:18:01 -0500 |
commit | f33b5d783b4f56be5ace6a1c98fb5f76b2d2d07d (patch) | |
tree | b027b5f3429d416b3da5b9195024007dab062a5e /mm/slab.c | |
parent | e935d5da8e5d12fabe5b632736c50eae0427e8c8 (diff) | |
parent | 67963132638e67ad3c5aa16765e6f3f2f3cdd85c (diff) |
Merge ../linux-2.6
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 122 |
1 files changed, 92 insertions, 30 deletions
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char * | |||
789 | dump_stack(); | 789 | dump_stack(); |
790 | } | 790 | } |
791 | 791 | ||
792 | #ifdef CONFIG_NUMA | ||
793 | /* | ||
794 | * Special reaping functions for NUMA systems called from cache_reap(). | ||
795 | * These take care of doing round robin flushing of alien caches (containing | ||
796 | * objects freed on different nodes from which they were allocated) and the | ||
797 | * flushing of remote pcps by calling drain_node_pages. | ||
798 | */ | ||
799 | static DEFINE_PER_CPU(unsigned long, reap_node); | ||
800 | |||
801 | static void init_reap_node(int cpu) | ||
802 | { | ||
803 | int node; | ||
804 | |||
805 | node = next_node(cpu_to_node(cpu), node_online_map); | ||
806 | if (node == MAX_NUMNODES) | ||
807 | node = 0; | ||
808 | |||
809 | __get_cpu_var(reap_node) = node; | ||
810 | } | ||
811 | |||
812 | static void next_reap_node(void) | ||
813 | { | ||
814 | int node = __get_cpu_var(reap_node); | ||
815 | |||
816 | /* | ||
817 | * Also drain per cpu pages on remote zones | ||
818 | */ | ||
819 | if (node != numa_node_id()) | ||
820 | drain_node_pages(node); | ||
821 | |||
822 | node = next_node(node, node_online_map); | ||
823 | if (unlikely(node >= MAX_NUMNODES)) | ||
824 | node = first_node(node_online_map); | ||
825 | __get_cpu_var(reap_node) = node; | ||
826 | } | ||
827 | |||
828 | #else | ||
829 | #define init_reap_node(cpu) do { } while (0) | ||
830 | #define next_reap_node(void) do { } while (0) | ||
831 | #endif | ||
832 | |||
792 | /* | 833 | /* |
793 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz | 834 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz |
794 | * via the workqueue/eventd. | 835 | * via the workqueue/eventd. |
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu) | |||
806 | * at that time. | 847 | * at that time. |
807 | */ | 848 | */ |
808 | if (keventd_up() && reap_work->func == NULL) { | 849 | if (keventd_up() && reap_work->func == NULL) { |
850 | init_reap_node(cpu); | ||
809 | INIT_WORK(reap_work, cache_reap, NULL); | 851 | INIT_WORK(reap_work, cache_reap, NULL); |
810 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); | 852 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); |
811 | } | 853 | } |
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
884 | } | 926 | } |
885 | } | 927 | } |
886 | 928 | ||
929 | /* | ||
930 | * Called from cache_reap() to regularly drain alien caches round robin. | ||
931 | */ | ||
932 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | ||
933 | { | ||
934 | int node = __get_cpu_var(reap_node); | ||
935 | |||
936 | if (l3->alien) { | ||
937 | struct array_cache *ac = l3->alien[node]; | ||
938 | if (ac && ac->avail) { | ||
939 | spin_lock_irq(&ac->lock); | ||
940 | __drain_alien_cache(cachep, ac, node); | ||
941 | spin_unlock_irq(&ac->lock); | ||
942 | } | ||
943 | } | ||
944 | } | ||
945 | |||
887 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) |
888 | { | 947 | { |
889 | int i = 0; | 948 | int i = 0; |
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al | |||
902 | #else | 961 | #else |
903 | 962 | ||
904 | #define drain_alien_cache(cachep, alien) do { } while (0) | 963 | #define drain_alien_cache(cachep, alien) do { } while (0) |
964 | #define reap_alien(cachep, l3) do { } while (0) | ||
905 | 965 | ||
906 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 966 | static inline struct array_cache **alloc_alien_cache(int node, int limit) |
907 | { | 967 | { |
@@ -1124,6 +1184,7 @@ void __init kmem_cache_init(void) | |||
1124 | struct cache_sizes *sizes; | 1184 | struct cache_sizes *sizes; |
1125 | struct cache_names *names; | 1185 | struct cache_names *names; |
1126 | int i; | 1186 | int i; |
1187 | int order; | ||
1127 | 1188 | ||
1128 | for (i = 0; i < NUM_INIT_LISTS; i++) { | 1189 | for (i = 0; i < NUM_INIT_LISTS; i++) { |
1129 | kmem_list3_init(&initkmem_list3[i]); | 1190 | kmem_list3_init(&initkmem_list3[i]); |
@@ -1167,11 +1228,15 @@ void __init kmem_cache_init(void) | |||
1167 | 1228 | ||
1168 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); | 1229 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); |
1169 | 1230 | ||
1170 | cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, | 1231 | for (order = 0; order < MAX_ORDER; order++) { |
1171 | &left_over, &cache_cache.num); | 1232 | cache_estimate(order, cache_cache.buffer_size, |
1233 | cache_line_size(), 0, &left_over, &cache_cache.num); | ||
1234 | if (cache_cache.num) | ||
1235 | break; | ||
1236 | } | ||
1172 | if (!cache_cache.num) | 1237 | if (!cache_cache.num) |
1173 | BUG(); | 1238 | BUG(); |
1174 | 1239 | cache_cache.gfporder = order; | |
1175 | cache_cache.colour = left_over / cache_cache.colour_off; | 1240 | cache_cache.colour = left_over / cache_cache.colour_off; |
1176 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + | 1241 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + |
1177 | sizeof(struct slab), cache_line_size()); | 1242 | sizeof(struct slab), cache_line_size()); |
@@ -1628,36 +1693,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1628 | size_t size, size_t align, unsigned long flags) | 1693 | size_t size, size_t align, unsigned long flags) |
1629 | { | 1694 | { |
1630 | size_t left_over = 0; | 1695 | size_t left_over = 0; |
1696 | int gfporder; | ||
1631 | 1697 | ||
1632 | for (;; cachep->gfporder++) { | 1698 | for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { |
1633 | unsigned int num; | 1699 | unsigned int num; |
1634 | size_t remainder; | 1700 | size_t remainder; |
1635 | 1701 | ||
1636 | if (cachep->gfporder > MAX_GFP_ORDER) { | 1702 | cache_estimate(gfporder, size, align, flags, &remainder, &num); |
1637 | cachep->num = 0; | ||
1638 | break; | ||
1639 | } | ||
1640 | |||
1641 | cache_estimate(cachep->gfporder, size, align, flags, | ||
1642 | &remainder, &num); | ||
1643 | if (!num) | 1703 | if (!num) |
1644 | continue; | 1704 | continue; |
1705 | |||
1645 | /* More than offslab_limit objects will cause problems */ | 1706 | /* More than offslab_limit objects will cause problems */ |
1646 | if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) | 1707 | if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit) |
1647 | break; | 1708 | break; |
1648 | 1709 | ||
1710 | /* Found something acceptable - save it away */ | ||
1649 | cachep->num = num; | 1711 | cachep->num = num; |
1712 | cachep->gfporder = gfporder; | ||
1650 | left_over = remainder; | 1713 | left_over = remainder; |
1651 | 1714 | ||
1652 | /* | 1715 | /* |
1716 | * A VFS-reclaimable slab tends to have most allocations | ||
1717 | * as GFP_NOFS and we really don't want to have to be allocating | ||
1718 | * higher-order pages when we are unable to shrink dcache. | ||
1719 | */ | ||
1720 | if (flags & SLAB_RECLAIM_ACCOUNT) | ||
1721 | break; | ||
1722 | |||
1723 | /* | ||
1653 | * Large number of objects is good, but very large slabs are | 1724 | * Large number of objects is good, but very large slabs are |
1654 | * currently bad for the gfp()s. | 1725 | * currently bad for the gfp()s. |
1655 | */ | 1726 | */ |
1656 | if (cachep->gfporder >= slab_break_gfp_order) | 1727 | if (gfporder >= slab_break_gfp_order) |
1657 | break; | 1728 | break; |
1658 | 1729 | ||
1659 | if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) | 1730 | /* |
1660 | /* Acceptable internal fragmentation */ | 1731 | * Acceptable internal fragmentation? |
1732 | */ | ||
1733 | if ((left_over * 8) <= (PAGE_SIZE << gfporder)) | ||
1661 | break; | 1734 | break; |
1662 | } | 1735 | } |
1663 | return left_over; | 1736 | return left_over; |
@@ -1869,17 +1942,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1869 | 1942 | ||
1870 | size = ALIGN(size, align); | 1943 | size = ALIGN(size, align); |
1871 | 1944 | ||
1872 | if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { | 1945 | left_over = calculate_slab_order(cachep, size, align, flags); |
1873 | /* | ||
1874 | * A VFS-reclaimable slab tends to have most allocations | ||
1875 | * as GFP_NOFS and we really don't want to have to be allocating | ||
1876 | * higher-order pages when we are unable to shrink dcache. | ||
1877 | */ | ||
1878 | cachep->gfporder = 0; | ||
1879 | cache_estimate(cachep->gfporder, size, align, flags, | ||
1880 | &left_over, &cachep->num); | ||
1881 | } else | ||
1882 | left_over = calculate_slab_order(cachep, size, align, flags); | ||
1883 | 1946 | ||
1884 | if (!cachep->num) { | 1947 | if (!cachep->num) { |
1885 | printk("kmem_cache_create: couldn't create cache %s.\n", name); | 1948 | printk("kmem_cache_create: couldn't create cache %s.\n", name); |
@@ -2554,7 +2617,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
2554 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2617 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
2555 | cachep->name, cachep->num, slabp, slabp->inuse); | 2618 | cachep->name, cachep->num, slabp, slabp->inuse); |
2556 | for (i = 0; | 2619 | for (i = 0; |
2557 | i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2620 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
2558 | i++) { | 2621 | i++) { |
2559 | if ((i % 16) == 0) | 2622 | if ((i % 16) == 0) |
2560 | printk("\n%03x:", i); | 2623 | printk("\n%03x:", i); |
@@ -3494,8 +3557,7 @@ static void cache_reap(void *unused) | |||
3494 | check_irq_on(); | 3557 | check_irq_on(); |
3495 | 3558 | ||
3496 | l3 = searchp->nodelists[numa_node_id()]; | 3559 | l3 = searchp->nodelists[numa_node_id()]; |
3497 | if (l3->alien) | 3560 | reap_alien(searchp, l3); |
3498 | drain_alien_cache(searchp, l3->alien); | ||
3499 | spin_lock_irq(&l3->list_lock); | 3561 | spin_lock_irq(&l3->list_lock); |
3500 | 3562 | ||
3501 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, |
@@ -3545,7 +3607,7 @@ static void cache_reap(void *unused) | |||
3545 | } | 3607 | } |
3546 | check_irq_on(); | 3608 | check_irq_on(); |
3547 | mutex_unlock(&cache_chain_mutex); | 3609 | mutex_unlock(&cache_chain_mutex); |
3548 | drain_remote_pages(); | 3610 | next_reap_node(); |
3549 | /* Setup the next iteration */ | 3611 | /* Setup the next iteration */ |
3550 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
3551 | } | 3613 | } |