aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/mtd/onenand/omap2.c22
-rw-r--r--drivers/mtd/onenand/onenand_base.c173
-rw-r--r--include/linux/mtd/flashchip.h4
-rw-r--r--include/linux/mtd/onenand_regs.h2
4 files changed, 194 insertions, 7 deletions
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 0108ed42e877..2dafd0949be5 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -112,10 +112,24 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
112 unsigned long timeout; 112 unsigned long timeout;
113 u32 syscfg; 113 u32 syscfg;
114 114
115 if (state == FL_RESETING) { 115 if (state == FL_RESETING || state == FL_PREPARING_ERASE ||
116 int i; 116 state == FL_VERIFYING_ERASE) {
117 int i = 21;
118 unsigned int intr_flags = ONENAND_INT_MASTER;
119
120 switch (state) {
121 case FL_RESETING:
122 intr_flags |= ONENAND_INT_RESET;
123 break;
124 case FL_PREPARING_ERASE:
125 intr_flags |= ONENAND_INT_ERASE;
126 break;
127 case FL_VERIFYING_ERASE:
128 i = 101;
129 break;
130 }
117 131
118 for (i = 0; i < 20; i++) { 132 while (--i) {
119 udelay(1); 133 udelay(1);
120 intr = read_reg(c, ONENAND_REG_INTERRUPT); 134 intr = read_reg(c, ONENAND_REG_INTERRUPT);
121 if (intr & ONENAND_INT_MASTER) 135 if (intr & ONENAND_INT_MASTER)
@@ -126,7 +140,7 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
126 wait_err("controller error", state, ctrl, intr); 140 wait_err("controller error", state, ctrl, intr);
127 return -EIO; 141 return -EIO;
128 } 142 }
129 if (!(intr & ONENAND_INT_RESET)) { 143 if ((intr & intr_flags) != intr_flags) {
130 wait_err("timeout", state, ctrl, intr); 144 wait_err("timeout", state, ctrl, intr);
131 return -EIO; 145 return -EIO;
132 } 146 }
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 894ebadc3e69..266f471901e5 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -34,6 +34,13 @@
34 34
35#include <asm/io.h> 35#include <asm/io.h>
36 36
37/*
38 * Multiblock erase if number of blocks to erase is 2 or more.
39 * Maximum number of blocks for simultaneous erase is 64.
40 */
41#define MB_ERASE_MIN_BLK_COUNT 2
42#define MB_ERASE_MAX_BLK_COUNT 64
43
37/* Default Flex-OneNAND boundary and lock respectively */ 44/* Default Flex-OneNAND boundary and lock respectively */
38static int flex_bdry[MAX_DIES * 2] = { -1, 0, -1, 0 }; 45static int flex_bdry[MAX_DIES * 2] = { -1, 0, -1, 0 };
39 46
@@ -353,6 +360,8 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
353 break; 360 break;
354 361
355 case ONENAND_CMD_ERASE: 362 case ONENAND_CMD_ERASE:
363 case ONENAND_CMD_MULTIBLOCK_ERASE:
364 case ONENAND_CMD_ERASE_VERIFY:
356 case ONENAND_CMD_BUFFERRAM: 365 case ONENAND_CMD_BUFFERRAM:
357 case ONENAND_CMD_OTP_ACCESS: 366 case ONENAND_CMD_OTP_ACCESS:
358 block = onenand_block(this, addr); 367 block = onenand_block(this, addr);
@@ -497,7 +506,7 @@ static int onenand_wait(struct mtd_info *mtd, int state)
497 if (interrupt & flags) 506 if (interrupt & flags)
498 break; 507 break;
499 508
500 if (state != FL_READING) 509 if (state != FL_READING && state != FL_PREPARING_ERASE)
501 cond_resched(); 510 cond_resched();
502 } 511 }
503 /* To get correct interrupt status in timeout case */ 512 /* To get correct interrupt status in timeout case */
@@ -530,6 +539,18 @@ static int onenand_wait(struct mtd_info *mtd, int state)
530 return -EIO; 539 return -EIO;
531 } 540 }
532 541
542 if (state == FL_PREPARING_ERASE && !(interrupt & ONENAND_INT_ERASE)) {
543 printk(KERN_ERR "%s: mb erase timeout! ctrl=0x%04x intr=0x%04x\n",
544 __func__, ctrl, interrupt);
545 return -EIO;
546 }
547
548 if (!(interrupt & ONENAND_INT_MASTER)) {
549 printk(KERN_ERR "%s: timeout! ctrl=0x%04x intr=0x%04x\n",
550 __func__, ctrl, interrupt);
551 return -EIO;
552 }
553
533 /* If there's controller error, it's a real error */ 554 /* If there's controller error, it's a real error */
534 if (ctrl & ONENAND_CTRL_ERROR) { 555 if (ctrl & ONENAND_CTRL_ERROR) {
535 printk(KERN_ERR "%s: controller error = 0x%04x\n", 556 printk(KERN_ERR "%s: controller error = 0x%04x\n",
@@ -2182,6 +2203,148 @@ static int onenand_block_isbad_nolock(struct mtd_info *mtd, loff_t ofs, int allo
2182 return bbm->isbad_bbt(mtd, ofs, allowbbt); 2203 return bbm->isbad_bbt(mtd, ofs, allowbbt);
2183} 2204}
2184 2205
2206
2207static int onenand_multiblock_erase_verify(struct mtd_info *mtd,
2208 struct erase_info *instr)
2209{
2210 struct onenand_chip *this = mtd->priv;
2211 loff_t addr = instr->addr;
2212 int len = instr->len;
2213 unsigned int block_size = (1 << this->erase_shift);
2214 int ret = 0;
2215
2216 while (len) {
2217 this->command(mtd, ONENAND_CMD_ERASE_VERIFY, addr, block_size);
2218 ret = this->wait(mtd, FL_VERIFYING_ERASE);
2219 if (ret) {
2220 printk(KERN_ERR "%s: Failed verify, block %d\n",
2221 __func__, onenand_block(this, addr));
2222 instr->state = MTD_ERASE_FAILED;
2223 instr->fail_addr = addr;
2224 return -1;
2225 }
2226 len -= block_size;
2227 addr += block_size;
2228 }
2229 return 0;
2230}
2231
2232/**
2233 * onenand_multiblock_erase - [Internal] erase block(s) using multiblock erase
2234 * @param mtd MTD device structure
2235 * @param instr erase instruction
2236 * @param region erase region
2237 *
2238 * Erase one or more blocks up to 64 block at a time
2239 */
2240static int onenand_multiblock_erase(struct mtd_info *mtd,
2241 struct erase_info *instr,
2242 unsigned int block_size)
2243{
2244 struct onenand_chip *this = mtd->priv;
2245 loff_t addr = instr->addr;
2246 int len = instr->len;
2247 int eb_count = 0;
2248 int ret = 0;
2249 int bdry_block = 0;
2250
2251 instr->state = MTD_ERASING;
2252
2253 if (ONENAND_IS_DDP(this)) {
2254 loff_t bdry_addr = this->chipsize >> 1;
2255 if (addr < bdry_addr && (addr + len) > bdry_addr)
2256 bdry_block = bdry_addr >> this->erase_shift;
2257 }
2258
2259 /* Pre-check bbs */
2260 while (len) {
2261 /* Check if we have a bad block, we do not erase bad blocks */
2262 if (onenand_block_isbad_nolock(mtd, addr, 0)) {
2263 printk(KERN_WARNING "%s: attempt to erase a bad block "
2264 "at addr 0x%012llx\n",
2265 __func__, (unsigned long long) addr);
2266 instr->state = MTD_ERASE_FAILED;
2267 return -EIO;
2268 }
2269 len -= block_size;
2270 addr += block_size;
2271 }
2272
2273 len = instr->len;
2274 addr = instr->addr;
2275
2276 /* loop over 64 eb batches */
2277 while (len) {
2278 struct erase_info verify_instr = *instr;
2279 int max_eb_count = MB_ERASE_MAX_BLK_COUNT;
2280
2281 verify_instr.addr = addr;
2282 verify_instr.len = 0;
2283
2284 /* do not cross chip boundary */
2285 if (bdry_block) {
2286 int this_block = (addr >> this->erase_shift);
2287
2288 if (this_block < bdry_block) {
2289 max_eb_count = min(max_eb_count,
2290 (bdry_block - this_block));
2291 }
2292 }
2293
2294 eb_count = 0;
2295
2296 while (len > block_size && eb_count < (max_eb_count - 1)) {
2297 this->command(mtd, ONENAND_CMD_MULTIBLOCK_ERASE,
2298 addr, block_size);
2299 onenand_invalidate_bufferram(mtd, addr, block_size);
2300
2301 ret = this->wait(mtd, FL_PREPARING_ERASE);
2302 if (ret) {
2303 printk(KERN_ERR "%s: Failed multiblock erase, "
2304 "block %d\n", __func__,
2305 onenand_block(this, addr));
2306 instr->state = MTD_ERASE_FAILED;
2307 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
2308 return -EIO;
2309 }
2310
2311 len -= block_size;
2312 addr += block_size;
2313 eb_count++;
2314 }
2315
2316 /* last block of 64-eb series */
2317 cond_resched();
2318 this->command(mtd, ONENAND_CMD_ERASE, addr, block_size);
2319 onenand_invalidate_bufferram(mtd, addr, block_size);
2320
2321 ret = this->wait(mtd, FL_ERASING);
2322 /* Check if it is write protected */
2323 if (ret) {
2324 printk(KERN_ERR "%s: Failed erase, block %d\n",
2325 __func__, onenand_block(this, addr));
2326 instr->state = MTD_ERASE_FAILED;
2327 instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
2328 return -EIO;
2329 }
2330
2331 len -= block_size;
2332 addr += block_size;
2333 eb_count++;
2334
2335 /* verify */
2336 verify_instr.len = eb_count * block_size;
2337 if (onenand_multiblock_erase_verify(mtd, &verify_instr)) {
2338 instr->state = verify_instr.state;
2339 instr->fail_addr = verify_instr.fail_addr;
2340 return -EIO;
2341 }
2342
2343 }
2344 return 0;
2345}
2346
2347
2185/** 2348/**
2186 * onenand_block_by_block_erase - [Internal] erase block(s) using regular erase 2349 * onenand_block_by_block_erase - [Internal] erase block(s) using regular erase
2187 * @param mtd MTD device structure 2350 * @param mtd MTD device structure
@@ -2315,7 +2478,13 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
2315 /* Grab the lock and see if the device is available */ 2478 /* Grab the lock and see if the device is available */
2316 onenand_get_device(mtd, FL_ERASING); 2479 onenand_get_device(mtd, FL_ERASING);
2317 2480
2318 ret = onenand_block_by_block_erase(mtd, instr, region, block_size); 2481 if (region || instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) {
2482 /* region is set for Flex-OneNAND (no mb erase) */
2483 ret = onenand_block_by_block_erase(mtd, instr,
2484 region, block_size);
2485 } else {
2486 ret = onenand_multiblock_erase(mtd, instr, block_size);
2487 }
2319 2488
2320 /* Deselect and wake up anyone waiting on the device */ 2489 /* Deselect and wake up anyone waiting on the device */
2321 onenand_release_device(mtd); 2490 onenand_release_device(mtd);
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index f350a4879f75..d0bf422ae374 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -41,9 +41,11 @@ typedef enum {
41 /* These 2 come from nand_state_t, which has been unified here */ 41 /* These 2 come from nand_state_t, which has been unified here */
42 FL_READING, 42 FL_READING,
43 FL_CACHEDPRG, 43 FL_CACHEDPRG,
44 /* These 2 come from onenand_state_t, which has been unified here */ 44 /* These 4 come from onenand_state_t, which has been unified here */
45 FL_RESETING, 45 FL_RESETING,
46 FL_OTPING, 46 FL_OTPING,
47 FL_PREPARING_ERASE,
48 FL_VERIFYING_ERASE,
47 49
48 FL_UNKNOWN 50 FL_UNKNOWN
49} flstate_t; 51} flstate_t;
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index acadbf53a69f..cd6f3b431195 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -131,6 +131,8 @@
131#define ONENAND_CMD_LOCK_TIGHT (0x2C) 131#define ONENAND_CMD_LOCK_TIGHT (0x2C)
132#define ONENAND_CMD_UNLOCK_ALL (0x27) 132#define ONENAND_CMD_UNLOCK_ALL (0x27)
133#define ONENAND_CMD_ERASE (0x94) 133#define ONENAND_CMD_ERASE (0x94)
134#define ONENAND_CMD_MULTIBLOCK_ERASE (0x95)
135#define ONENAND_CMD_ERASE_VERIFY (0x71)
134#define ONENAND_CMD_RESET (0xF0) 136#define ONENAND_CMD_RESET (0xF0)
135#define ONENAND_CMD_OTP_ACCESS (0x65) 137#define ONENAND_CMD_OTP_ACCESS (0x65)
136#define ONENAND_CMD_READID (0x90) 138#define ONENAND_CMD_READID (0x90)
521' href='#n2521'>2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023
/*
 * SLUB: A slab allocator that limits cache line use instead of queuing
 * objects in per cpu and per node lists.
 *
 * The allocator synchronizes using per slab locks and only
 * uses a centralized lock to manage a pool of partial slabs.
 *
 * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>
 */

#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bit_spinlock.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/kallsyms.h>

/*
 * Lock order:
 *   1. slab_lock(page)
 *   2. slab->list_lock
 *
 *   The slab_lock protects operations on the object of a particular
 *   slab and its metadata in the page struct. If the slab lock
 *   has been taken then no allocations nor frees can be performed
 *   on the objects in the slab nor can the slab be added or removed
 *   from the partial or full lists since this would mean modifying
 *   the page_struct of the slab.
 *
 *   The list_lock protects the partial and full list on each node and
 *   the partial slab counter. If taken then no new slabs may be added or
 *   removed from the lists nor make the number of partial slabs be modified.
 *   (Note that the total number of slabs is an atomic value that may be
 *   modified without taking the list lock).
 *
 *   The list_lock is a centralized lock and thus we avoid taking it as
 *   much as possible. As long as SLUB does not have to handle partial
 *   slabs, operations can continue without any centralized lock. F.e.
 *   allocating a long series of objects that fill up slabs does not require
 *   the list lock.
 *
 *   The lock order is sometimes inverted when we are trying to get a slab
 *   off a list. We take the list_lock and then look for a page on the list
 *   to use. While we do that objects in the slabs may be freed. We can
 *   only operate on the slab if we have also taken the slab_lock. So we use
 *   a slab_trylock() on the slab. If trylock was successful then no frees
 *   can occur anymore and we can use the slab for allocations etc. If the
 *   slab_trylock() does not succeed then frees are in progress in the slab and
 *   we must stay away from it for a while since we may cause a bouncing
 *   cacheline if we try to acquire the lock. So go onto the next slab.
 *   If all pages are busy then we may allocate a new slab instead of reusing
 *   a partial slab. A new slab has noone operating on it and thus there is
 *   no danger of cacheline contention.
 *
 *   Interrupts are disabled during allocation and deallocation in order to
 *   make the slab allocator safe to use in the context of an irq. In addition
 *   interrupts are disabled to ensure that the processor does not change
 *   while handling per_cpu slabs, due to kernel preemption.
 *
 * SLUB assigns one slab for allocation to each processor.
 * Allocations only occur from these slabs called cpu slabs.
 *
 * Slabs with free elements are kept on a partial list and during regular
 * operations no list for full slabs is used. If an object in a full slab is
 * freed then the slab will show up again on the partial lists.
 * We track full slabs for debugging purposes though because otherwise we
 * cannot scan all objects.
 *
 * Slabs are freed when they become empty. Teardown and setup is
 * minimal so we rely on the page allocators per cpu caches for
 * fast frees and allocs.
 *
 * Overloading of page flags that are otherwise used for LRU management.
 *
 * PageActive 		The slab is frozen and exempt from list processing.
 * 			This means that the slab is dedicated to a purpose
 * 			such as satisfying allocations for a specific
 * 			processor. Objects may be freed in the slab while
 * 			it is frozen but slab_free will then skip the usual
 * 			list operations. It is up to the processor holding
 * 			the slab to integrate the slab into the slab lists
 * 			when the slab is no longer needed.
 *
 * 			One use of this flag is to mark slabs that are
 * 			used for allocations. Then such a slab becomes a cpu
 * 			slab. The cpu slab may be equipped with an additional
 * 			freelist that allows lockless access to
 * 			free objects in addition to the regular freelist
 * 			that requires the slab lock.
 *
 * PageError		Slab requires special handling due to debug
 * 			options set. This moves	slab handling out of
 * 			the fast path and disables lockless freelists.
 */

#define FROZEN (1 << PG_active)

#ifdef CONFIG_SLUB_DEBUG
#define SLABDEBUG (1 << PG_error)
#else
#define SLABDEBUG 0
#endif

static inline int SlabFrozen(struct page *page)
{
	return page->flags & FROZEN;
}

static inline void SetSlabFrozen(struct page *page)
{
	page->flags |= FROZEN;
}

static inline void ClearSlabFrozen(struct page *page)
{
	page->flags &= ~FROZEN;
}

static inline int SlabDebug(struct page *page)
{
	return page->flags & SLABDEBUG;
}

static inline void SetSlabDebug(struct page *page)
{
	page->flags |= SLABDEBUG;
}

static inline void ClearSlabDebug(struct page *page)
{
	page->flags &= ~SLABDEBUG;
}

/*
 * Issues still to be resolved:
 *
 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
 *
 * - Variable sizing of the per node arrays
 */

/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST

#if PAGE_SHIFT <= 12

/*
 * Small page size. Make sure that we do not fragment memory
 */
#define DEFAULT_MAX_ORDER 1
#define DEFAULT_MIN_OBJECTS 4

#else

/*
 * Large page machines are customarily able to handle larger
 * page orders.
 */
#define DEFAULT_MAX_ORDER 2
#define DEFAULT_MIN_OBJECTS 8

#endif

/*
 * Mininum number of partial slabs. These will be left on the partial
 * lists even if they are empty. kmem_cache_shrink may reclaim them.
 */
#define MIN_PARTIAL 2

/*
 * Maximum number of desirable partial slabs.
 * The existence of more partial slabs makes kmem_cache_shrink
 * sort the partial list by the number of objects in the.
 */
#define MAX_PARTIAL 10

#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
				SLAB_POISON | SLAB_STORE_USER)

/*
 * Set of flags that will prevent slab merging
 */
#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
		SLAB_TRACE | SLAB_DESTROY_BY_RCU)

#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
		SLAB_CACHE_DMA)

#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif

#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif

/* Internal SLUB flags */
#define __OBJECT_POISON		0x80000000 /* Poison object */
#define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */

/* Not all arches define cache_line_size */
#ifndef cache_line_size
#define cache_line_size()	L1_CACHE_BYTES
#endif

static int kmem_size = sizeof(struct kmem_cache);

#ifdef CONFIG_SMP
static struct notifier_block slab_notifier;
#endif

static enum {
	DOWN,		/* No slab functionality available */
	PARTIAL,	/* kmem_cache_open() works but kmalloc does not */
	UP,		/* Everything works but does not show up in sysfs */
	SYSFS		/* Sysfs up */
} slab_state = DOWN;

/* A list of all slab caches on the system */
static DECLARE_RWSEM(slub_lock);
static LIST_HEAD(slab_caches);

/*
 * Tracking user of a slab.
 */
struct track {
	void *addr;		/* Called from address */
	int cpu;		/* Was running on cpu */
	int pid;		/* Pid context */
	unsigned long when;	/* When did the operation occur */
};

enum track_item { TRACK_ALLOC, TRACK_FREE };

#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
static void sysfs_slab_remove(struct kmem_cache *);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
							{ return 0; }
static inline void sysfs_slab_remove(struct kmem_cache *s) {}
#endif

/********************************************************************
 * 			Core slab cache functions
 *******************************************************************/

int slab_is_available(void)
{
	return slab_state >= UP;
}

static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
#ifdef CONFIG_NUMA
	return s->node[node];
#else
	return &s->local_node;
#endif
}

static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
{
#ifdef CONFIG_SMP
	return s->cpu_slab[cpu];
#else
	return &s->cpu_slab;
#endif
}

static inline int check_valid_pointer(struct kmem_cache *s,
				struct page *page, const void *object)
{
	void *base;

	if (!object)
		return 1;

	base = page_address(page);
	if (object < base || object >= base + s->objects * s->size ||
		(object - base) % s->size) {
		return 0;
	}

	return 1;
}

/*
 * Slow version of get and set free pointer.
 *
 * This version requires touching the cache lines of kmem_cache which
 * we avoid to do in the fast alloc free paths. There we obtain the offset
 * from the page struct.
 */
static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
	return *(void **)(object + s->offset);
}

static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
{
	*(void **)(object + s->offset) = fp;
}

/* Loop over all objects in a slab */
#define for_each_object(__p, __s, __addr) \
	for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\
			__p += (__s)->size)

/* Scan freelist */
#define for_each_free_object(__p, __s, __free) \
	for (__p = (__free); __p; __p = get_freepointer((__s), __p))

/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
{
	return (p - addr) / s->size;
}

#ifdef CONFIG_SLUB_DEBUG
/*
 * Debug settings:
 */
#ifdef CONFIG_SLUB_DEBUG_ON
static int slub_debug = DEBUG_DEFAULT_FLAGS;
#else
static int slub_debug;
#endif

static char *slub_debug_slabs;

/*
 * Object debugging
 */
static void print_section(char *text, u8 *addr, unsigned int length)
{
	int i, offset;
	int newline = 1;
	char ascii[17];

	ascii[16] = 0;

	for (i = 0; i < length; i++) {
		if (newline) {
			printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
			newline = 0;
		}
		printk(" %02x", addr[i]);
		offset = i % 16;
		ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
		if (offset == 15) {
			printk(" %s\n",ascii);
			newline = 1;
		}
	}
	if (!newline) {
		i %= 16;
		while (i < 16) {
			printk("   ");
			ascii[i] = ' ';
			i++;
		}
		printk(" %s\n", ascii);
	}
}

static struct track *get_track(struct kmem_cache *s, void *object,
	enum track_item alloc)
{
	struct track *p;

	if (s->offset)
		p = object + s->offset + sizeof(void *);
	else
		p = object + s->inuse;

	return p + alloc;
}

static void set_track(struct kmem_cache *s, void *object,
				enum track_item alloc, void *addr)
{
	struct track *p;

	if (s->offset)
		p = object + s->offset + sizeof(void *);
	else
		p = object + s->inuse;

	p += alloc;
	if (addr) {
		p->addr = addr;
		p->cpu = smp_processor_id();
		p->pid = current ? current->pid : -1;
		p->when = jiffies;
	} else
		memset(p, 0, sizeof(struct track));
}

static void init_tracking(struct kmem_cache *s, void *object)
{
	if (!(s->flags & SLAB_STORE_USER))
		return;

	set_track(s, object, TRACK_FREE, NULL);
	set_track(s, object, TRACK_ALLOC, NULL);
}

static void print_track(const char *s, struct track *t)
{
	if (!t->addr)
		return;

	printk(KERN_ERR "INFO: %s in ", s);
	__print_symbol("%s", (unsigned long)t->addr);
	printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid);
}

static void print_tracking(struct kmem_cache *s, void *object)
{
	if (!(s->flags & SLAB_STORE_USER))
		return;

	print_track("Allocated", get_track(s, object, TRACK_ALLOC));
	print_track("Freed", get_track(s, object, TRACK_FREE));
}

static void print_page_info(struct page *page)
{
	printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n",
		page, page->inuse, page->freelist, page->flags);

}

static void slab_bug(struct kmem_cache *s, char *fmt, ...)
{
	va_list args;
	char buf[100];

	va_start(args, fmt);
	vsnprintf(buf, sizeof(buf), fmt, args);
	va_end(args);
	printk(KERN_ERR "========================================"
			"=====================================\n");
	printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
	printk(KERN_ERR "----------------------------------------"
			"-------------------------------------\n\n");
}

static void slab_fix(struct kmem_cache *s, char *fmt, ...)
{
	va_list args;
	char buf[100];

	va_start(args, fmt);
	vsnprintf(buf, sizeof(buf), fmt, args);
	va_end(args);
	printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
}

static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
	unsigned int off;	/* Offset of last byte */
	u8 *addr = page_address(page);

	print_tracking(s, p);

	print_page_info(page);

	printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
			p, p - addr, get_freepointer(s, p));

	if (p > addr + 16)
		print_section("Bytes b4", p - 16, 16);

	print_section("Object", p, min(s->objsize, 128));

	if (s->flags & SLAB_RED_ZONE)
		print_section("Redzone", p + s->objsize,
			s->inuse - s->objsize);

	if (s->offset)
		off = s->offset + sizeof(void *);
	else
		off = s->inuse;

	if (s->flags & SLAB_STORE_USER)
		off += 2 * sizeof(struct track);

	if (off != s->size)
		/* Beginning of the filler is the free pointer */
		print_section("Padding", p + off, s->size - off);

	dump_stack();
}

static void object_err(struct kmem_cache *s, struct page *page,
			u8 *object, char *reason)
{
	slab_bug(s, reason);
	print_trailer(s, page, object);
}

static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
{
	va_list args;
	char buf[100];

	va_start(args, fmt);
	vsnprintf(buf, sizeof(buf), fmt, args);
	va_end(args);
	slab_bug(s, fmt);
	print_page_info(page);
	dump_stack();
}

static void init_object(struct kmem_cache *s, void *object, int active)
{
	u8 *p = object;

	if (s->flags & __OBJECT_POISON) {
		memset(p, POISON_FREE, s->objsize - 1);
		p[s->objsize -1] = POISON_END;
	}

	if (s->flags & SLAB_RED_ZONE)
		memset(p + s->objsize,
			active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
			s->inuse - s->objsize);
}

static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
{
	while (bytes) {
		if (*start != (u8)value)
			return start;
		start++;
		bytes--;
	}
	return NULL;
}

static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
						void *from, void *to)
{
	slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
	memset(from, data, to - from);
}

static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
			u8 *object, char *what,
			u8* start, unsigned int value, unsigned int bytes)
{
	u8 *fault;
	u8 *end;

	fault = check_bytes(start, value, bytes);
	if (!fault)
		return 1;

	end = start + bytes;
	while (end > fault && end[-1] == value)
		end--;

	slab_bug(s, "%s overwritten", what);
	printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
					fault, end - 1, fault[0], value);
	print_trailer(s, page, object);

	restore_bytes(s, what, value, fault, end);
	return 0;
}

/*
 * Object layout:
 *
 * object address
 * 	Bytes of the object to be managed.
 * 	If the freepointer may overlay the object then the free
 * 	pointer is the first word of the object.
 *
 * 	Poisoning uses 0x6b (POISON_FREE) and the last byte is
 * 	0xa5 (POISON_END)
 *
 * object + s->objsize
 * 	Padding to reach word boundary. This is also used for Redzoning.
 * 	Padding is extended by another word if Redzoning is enabled and
 * 	objsize == inuse.
 *
 * 	We fill with 0xbb (RED_INACTIVE) for inactive objects and with
 * 	0xcc (RED_ACTIVE) for objects in use.
 *
 * object + s->inuse
 * 	Meta data starts here.
 *
 * 	A. Free pointer (if we cannot overwrite object on free)
 * 	B. Tracking data for SLAB_STORE_USER
 * 	C. Padding to reach required alignment boundary or at mininum
 * 		one word if debuggin is on to be able to detect writes
 * 		before the word boundary.
 *
 *	Padding is done using 0x5a (POISON_INUSE)
 *
 * object + s->size
 * 	Nothing is used beyond s->size.
 *
 * If slabcaches are merged then the objsize and inuse boundaries are mostly
 * ignored. And therefore no slab options that rely on these boundaries
 * may be used with merged slabcaches.
 */

static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
{
	unsigned long off = s->inuse;	/* The end of info */

	if (s->offset)
		/* Freepointer is placed after the object. */
		off += sizeof(void *);

	if (s->flags & SLAB_STORE_USER)
		/* We also have user information there */
		off += 2 * sizeof(struct track);

	if (s->size == off)
		return 1;

	return check_bytes_and_report(s, page, p, "Object padding",
				p + off, POISON_INUSE, s->size - off);
}

static int slab_pad_check(struct kmem_cache *s, struct page *page)
{
	u8 *start;
	u8 *fault;
	u8 *end;
	int length;
	int remainder;

	if (!(s->flags & SLAB_POISON))
		return 1;

	start = page_address(page);
	end = start + (PAGE_SIZE << s->order);
	length = s->objects * s->size;
	remainder = end - (start + length);
	if (!remainder)
		return 1;

	fault = check_bytes(start + length, POISON_INUSE, remainder);
	if (!fault)
		return 1;
	while (end > fault && end[-1] == POISON_INUSE)
		end--;

	slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
	print_section("Padding", start, length);

	restore_bytes(s, "slab padding", POISON_INUSE, start, end);
	return 0;
}

static int check_object(struct kmem_cache *s, struct page *page,
					void *object, int active)
{
	u8 *p = object;
	u8 *endobject = object + s->objsize;

	if (s->flags & SLAB_RED_ZONE) {
		unsigned int red =
			active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;

		if (!check_bytes_and_report(s, page, object, "Redzone",
			endobject, red, s->inuse - s->objsize))
			return 0;
	} else {
		if ((s->flags & SLAB_POISON) && s->objsize < s->inuse)
			check_bytes_and_report(s, page, p, "Alignment padding", endobject,
				POISON_INUSE, s->inuse - s->objsize);
	}

	if (s->flags & SLAB_POISON) {
		if (!active && (s->flags & __OBJECT_POISON) &&
			(!check_bytes_and_report(s, page, p, "Poison", p,
					POISON_FREE, s->objsize - 1) ||
			 !check_bytes_and_report(s, page, p, "Poison",
			 	p + s->objsize -1, POISON_END, 1)))
			return 0;
		/*
		 * check_pad_bytes cleans up on its own.
		 */
		check_pad_bytes(s, page, p);
	}

	if (!s->offset && active)
		/*
		 * Object and freepointer overlap. Cannot check
		 * freepointer while object is allocated.
		 */
		return 1;

	/* Check free pointer validity */
	if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
		object_err(s, page, p, "Freepointer corrupt");
		/*
		 * No choice but to zap it and thus loose the remainder
		 * of the free objects in this slab. May cause
		 * another error because the object count is now wrong.
		 */
		set_freepointer(s, p, NULL);
		return 0;
	}
	return 1;
}

static int check_slab(struct kmem_cache *s, struct page *page)
{
	VM_BUG_ON(!irqs_disabled());

	if (!PageSlab(page)) {
		slab_err(s, page, "Not a valid slab page");
		return 0;
	}