diff options
95 files changed, 5518 insertions, 4098 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index af50f9bbe68e..4d880b3d1f35 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1014,49 +1014,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 1014 | 1014 | ||
| 1015 | mga= [HW,DRM] | 1015 | mga= [HW,DRM] |
| 1016 | 1016 | ||
| 1017 | migration_cost= | ||
| 1018 | [KNL,SMP] debug: override scheduler migration costs | ||
| 1019 | Format: <level-1-usecs>,<level-2-usecs>,... | ||
| 1020 | This debugging option can be used to override the | ||
| 1021 | default scheduler migration cost matrix. The numbers | ||
| 1022 | are indexed by 'CPU domain distance'. | ||
| 1023 | E.g. migration_cost=1000,2000,3000 on an SMT NUMA | ||
| 1024 | box will set up an intra-core migration cost of | ||
| 1025 | 1 msec, an inter-core migration cost of 2 msecs, | ||
| 1026 | and an inter-node migration cost of 3 msecs. | ||
| 1027 | |||
| 1028 | WARNING: using the wrong values here can break | ||
| 1029 | scheduler performance, so it's only for scheduler | ||
| 1030 | development purposes, not production environments. | ||
| 1031 | |||
| 1032 | migration_debug= | ||
| 1033 | [KNL,SMP] migration cost auto-detect verbosity | ||
| 1034 | Format=<0|1|2> | ||
| 1035 | If a system's migration matrix reported at bootup | ||
| 1036 | seems erroneous then this option can be used to | ||
| 1037 | increase verbosity of the detection process. | ||
| 1038 | We default to 0 (no extra messages), 1 will print | ||
| 1039 | some more information, and 2 will be really | ||
| 1040 | verbose (probably only useful if you also have a | ||
| 1041 | serial console attached to the system). | ||
| 1042 | |||
| 1043 | migration_factor= | ||
| 1044 | [KNL,SMP] multiply/divide migration costs by a factor | ||
| 1045 | Format=<percent> | ||
| 1046 | This debug option can be used to proportionally | ||
| 1047 | increase or decrease the auto-detected migration | ||
| 1048 | costs for all entries of the migration matrix. | ||
| 1049 | E.g. migration_factor=150 will increase migration | ||
| 1050 | costs by 50%. (and thus the scheduler will be less | ||
| 1051 | eager migrating cache-hot tasks) | ||
| 1052 | migration_factor=80 will decrease migration costs | ||
| 1053 | by 20%. (thus the scheduler will be more eager to | ||
| 1054 | migrate tasks) | ||
| 1055 | |||
| 1056 | WARNING: using the wrong values here can break | ||
| 1057 | scheduler performance, so it's only for scheduler | ||
| 1058 | development purposes, not production environments. | ||
| 1059 | |||
| 1060 | mousedev.tap_time= | 1017 | mousedev.tap_time= |
| 1061 | [MOUSE] Maximum time between finger touching and | 1018 | [MOUSE] Maximum time between finger touching and |
| 1062 | leaving touchpad surface for touch to be considered | 1019 | leaving touchpad surface for touch to be considered |
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt new file mode 100644 index 000000000000..16feebb7bdc0 --- /dev/null +++ b/Documentation/sched-design-CFS.txt | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | |||
| 2 | This is the CFS scheduler. | ||
| 3 | |||
| 4 | 80% of CFS's design can be summed up in a single sentence: CFS basically | ||
| 5 | models an "ideal, precise multi-tasking CPU" on real hardware. | ||
| 6 | |||
| 7 | "Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% | ||
| 8 | physical power and which can run each task at precise equal speed, in | ||
| 9 | parallel, each at 1/nr_running speed. For example: if there are 2 tasks | ||
| 10 | running then it runs each at 50% physical power - totally in parallel. | ||
| 11 | |||
| 12 | On real hardware, we can run only a single task at once, so while that | ||
| 13 | one task runs, the other tasks that are waiting for the CPU are at a | ||
| 14 | disadvantage - the current task gets an unfair amount of CPU time. In | ||
| 15 | CFS this fairness imbalance is expressed and tracked via the per-task | ||
| 16 | p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of | ||
| 17 | time the task should now run on the CPU for it to become completely fair | ||
| 18 | and balanced. | ||
| 19 | |||
| 20 | ( small detail: on 'ideal' hardware, the p->wait_runtime value would | ||
| 21 | always be zero - no task would ever get 'out of balance' from the | ||
| 22 | 'ideal' share of CPU time. ) | ||
| 23 | |||
| 24 | CFS's task picking logic is based on this p->wait_runtime value and it | ||
| 25 | is thus very simple: it always tries to run the task with the largest | ||
| 26 | p->wait_runtime value. In other words, CFS tries to run the task with | ||
| 27 | the 'gravest need' for more CPU time. So CFS always tries to split up | ||
| 28 | CPU time between runnable tasks as close to 'ideal multitasking | ||
| 29 | hardware' as possible. | ||
| 30 | |||
| 31 | Most of the rest of CFS's design just falls out of this really simple | ||
| 32 | concept, with a few add-on embellishments like nice levels, | ||
| 33 | multiprocessing and various algorithm variants to recognize sleepers. | ||
| 34 | |||
| 35 | In practice it works like this: the system runs a task a bit, and when | ||
| 36 | the task schedules (or a scheduler tick happens) the task's CPU usage is | ||
| 37 | 'accounted for': the (small) time it just spent using the physical CPU | ||
| 38 | is deducted from p->wait_runtime. [minus the 'fair share' it would have | ||
| 39 | gotten anyway]. Once p->wait_runtime gets low enough so that another | ||
| 40 | task becomes the 'leftmost task' of the time-ordered rbtree it maintains | ||
| 41 | (plus a small amount of 'granularity' distance relative to the leftmost | ||
| 42 | task so that we do not over-schedule tasks and trash the cache) then the | ||
| 43 | new leftmost task is picked and the current task is preempted. | ||
| 44 | |||
| 45 | The rq->fair_clock value tracks the 'CPU time a runnable task would have | ||
| 46 | fairly gotten, had it been runnable during that time'. So by using | ||
| 47 | rq->fair_clock values we can accurately timestamp and measure the | ||
| 48 | 'expected CPU time' a task should have gotten. All runnable tasks are | ||
| 49 | sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and | ||
| 50 | CFS picks the 'leftmost' task and sticks to it. As the system progresses | ||
| 51 | forwards, newly woken tasks are put into the tree more and more to the | ||
| 52 | right - slowly but surely giving a chance for every task to become the | ||
| 53 | 'leftmost task' and thus get on the CPU within a deterministic amount of | ||
| 54 | time. | ||
| 55 | |||
| 56 | Some implementation details: | ||
| 57 | |||
| 58 | - the introduction of Scheduling Classes: an extensible hierarchy of | ||
| 59 | scheduler modules. These modules encapsulate scheduling policy | ||
| 60 | details and are handled by the scheduler core without the core | ||
| 61 | code assuming about them too much. | ||
| 62 | |||
| 63 | - sched_fair.c implements the 'CFS desktop scheduler': it is a | ||
| 64 | replacement for the vanilla scheduler's SCHED_OTHER interactivity | ||
| 65 | code. | ||
| 66 | |||
| 67 | I'd like to give credit to Con Kolivas for the general approach here: | ||
| 68 | he has proven via RSDL/SD that 'fair scheduling' is possible and that | ||
| 69 | it results in better desktop scheduling. Kudos Con! | ||
| 70 | |||
| 71 | The CFS patch uses a completely different approach and implementation | ||
| 72 | from RSDL/SD. My goal was to make CFS's interactivity quality exceed | ||
| 73 | that of RSDL/SD, which is a high standard to meet :-) Testing | ||
| 74 | feedback is welcome to decide this one way or another. [ and, in any | ||
| 75 | case, all of SD's logic could be added via a kernel/sched_sd.c module | ||
| 76 | as well, if Con is interested in such an approach. ] | ||
| 77 | |||
| 78 | CFS's design is quite radical: it does not use runqueues, it uses a | ||
| 79 | time-ordered rbtree to build a 'timeline' of future task execution, | ||
| 80 | and thus has no 'array switch' artifacts (by which both the vanilla | ||
| 81 | scheduler and RSDL/SD are affected). | ||
| 82 | |||
| 83 | CFS uses nanosecond granularity accounting and does not rely on any | ||
| 84 | jiffies or other HZ detail. Thus the CFS scheduler has no notion of | ||
| 85 | 'timeslices' and has no heuristics whatsoever. There is only one | ||
| 86 | central tunable: | ||
| 87 | |||
| 88 | /proc/sys/kernel/sched_granularity_ns | ||
| 89 | |||
| 90 | which can be used to tune the scheduler from 'desktop' (low | ||
| 91 | latencies) to 'server' (good batching) workloads. It defaults to a | ||
| 92 | setting suitable for desktop workloads. SCHED_BATCH is handled by the | ||
| 93 | CFS scheduler module too. | ||
| 94 | |||
| 95 | Due to its design, the CFS scheduler is not prone to any of the | ||
| 96 | 'attacks' that exist today against the heuristics of the stock | ||
| 97 | scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all | ||
| 98 | work fine and do not impact interactivity and produce the expected | ||
| 99 | behavior. | ||
| 100 | |||
| 101 | the CFS scheduler has a much stronger handling of nice levels and | ||
| 102 | SCHED_BATCH: both types of workloads should be isolated much more | ||
| 103 | agressively than under the vanilla scheduler. | ||
| 104 | |||
| 105 | ( another detail: due to nanosec accounting and timeline sorting, | ||
| 106 | sched_yield() support is very simple under CFS, and in fact under | ||
| 107 | CFS sched_yield() behaves much better than under any other | ||
| 108 | scheduler i have tested so far. ) | ||
| 109 | |||
| 110 | - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler | ||
| 111 | way than the vanilla scheduler does. It uses 100 runqueues (for all | ||
| 112 | 100 RT priority levels, instead of 140 in the vanilla scheduler) | ||
| 113 | and it needs no expired array. | ||
| 114 | |||
| 115 | - reworked/sanitized SMP load-balancing: the runqueue-walking | ||
| 116 | assumptions are gone from the load-balancing code now, and | ||
| 117 | iterators of the scheduling modules are used. The balancing code got | ||
| 118 | quite a bit simpler as a result. | ||
| 119 | |||
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 88baed1e7e83..0b2954534b8e 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
| @@ -941,17 +941,6 @@ exit: | |||
| 941 | } | 941 | } |
| 942 | #endif | 942 | #endif |
| 943 | 943 | ||
| 944 | static void smp_tune_scheduling(void) | ||
| 945 | { | ||
| 946 | if (cpu_khz) { | ||
| 947 | /* cache size in kB */ | ||
| 948 | long cachesize = boot_cpu_data.x86_cache_size; | ||
| 949 | |||
| 950 | if (cachesize > 0) | ||
| 951 | max_cache_size = cachesize * 1024; | ||
| 952 | } | ||
| 953 | } | ||
| 954 | |||
| 955 | /* | 944 | /* |
| 956 | * Cycle through the processors sending APIC IPIs to boot each. | 945 | * Cycle through the processors sending APIC IPIs to boot each. |
| 957 | */ | 946 | */ |
| @@ -980,7 +969,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
| 980 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; | 969 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; |
| 981 | 970 | ||
| 982 | current_thread_info()->cpu = 0; | 971 | current_thread_info()->cpu = 0; |
| 983 | smp_tune_scheduling(); | ||
| 984 | 972 | ||
| 985 | set_cpu_sibling_map(0); | 973 | set_cpu_sibling_map(0); |
| 986 | 974 | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index f64b81f3033b..ea63a30ca3e8 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | * See comments there for proper credits. | 4 | * See comments there for proper credits. |
| 5 | */ | 5 | */ |
| 6 | 6 | ||
| 7 | #include <linux/sched.h> | ||
| 7 | #include <linux/clocksource.h> | 8 | #include <linux/clocksource.h> |
| 8 | #include <linux/workqueue.h> | 9 | #include <linux/workqueue.h> |
| 9 | #include <linux/cpufreq.h> | 10 | #include <linux/cpufreq.h> |
| @@ -106,8 +107,13 @@ unsigned long long sched_clock(void) | |||
| 106 | 107 | ||
| 107 | /* | 108 | /* |
| 108 | * Fall back to jiffies if there's no TSC available: | 109 | * Fall back to jiffies if there's no TSC available: |
| 110 | * ( But note that we still use it if the TSC is marked | ||
| 111 | * unstable. We do this because unlike Time Of Day, | ||
| 112 | * the scheduler clock tolerates small errors and it's | ||
| 113 | * very important for it to be as fast as the platform | ||
| 114 | * can achive it. ) | ||
| 109 | */ | 115 | */ |
| 110 | if (unlikely(!tsc_enabled)) | 116 | if (unlikely(!tsc_enabled && !tsc_unstable)) |
| 111 | /* No locking but a rare wrong value is not a big deal: */ | 117 | /* No locking but a rare wrong value is not a big deal: */ |
| 112 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 118 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
| 113 | 119 | ||
| @@ -277,6 +283,7 @@ static struct clocksource clocksource_tsc = { | |||
| 277 | 283 | ||
| 278 | void mark_tsc_unstable(char *reason) | 284 | void mark_tsc_unstable(char *reason) |
| 279 | { | 285 | { |
| 286 | sched_clock_unstable_event(); | ||
| 280 | if (!tsc_unstable) { | 287 | if (!tsc_unstable) { |
| 281 | tsc_unstable = 1; | 288 | tsc_unstable = 1; |
| 282 | tsc_enabled = 0; | 289 | tsc_enabled = 0; |
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index eaa6a24bc0b6..188fb73c6845 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
| @@ -805,7 +805,6 @@ static void __cpuinit | |||
| 805 | get_max_cacheline_size (void) | 805 | get_max_cacheline_size (void) |
| 806 | { | 806 | { |
| 807 | unsigned long line_size, max = 1; | 807 | unsigned long line_size, max = 1; |
| 808 | unsigned int cache_size = 0; | ||
| 809 | u64 l, levels, unique_caches; | 808 | u64 l, levels, unique_caches; |
| 810 | pal_cache_config_info_t cci; | 809 | pal_cache_config_info_t cci; |
| 811 | s64 status; | 810 | s64 status; |
| @@ -835,8 +834,6 @@ get_max_cacheline_size (void) | |||
| 835 | line_size = 1 << cci.pcci_line_size; | 834 | line_size = 1 << cci.pcci_line_size; |
| 836 | if (line_size > max) | 835 | if (line_size > max) |
| 837 | max = line_size; | 836 | max = line_size; |
| 838 | if (cache_size < cci.pcci_cache_size) | ||
| 839 | cache_size = cci.pcci_cache_size; | ||
| 840 | if (!cci.pcci_unified) { | 837 | if (!cci.pcci_unified) { |
| 841 | status = ia64_pal_cache_config_info(l, | 838 | status = ia64_pal_cache_config_info(l, |
| 842 | /* cache_type (instruction)= */ 1, | 839 | /* cache_type (instruction)= */ 1, |
| @@ -853,9 +850,6 @@ get_max_cacheline_size (void) | |||
| 853 | ia64_i_cache_stride_shift = cci.pcci_stride; | 850 | ia64_i_cache_stride_shift = cci.pcci_stride; |
| 854 | } | 851 | } |
| 855 | out: | 852 | out: |
| 856 | #ifdef CONFIG_SMP | ||
| 857 | max_cache_size = max(max_cache_size, cache_size); | ||
| 858 | #endif | ||
| 859 | if (max > ia64_max_cacheline_size) | 853 | if (max > ia64_max_cacheline_size) |
| 860 | ia64_max_cacheline_size = max; | 854 | ia64_max_cacheline_size = max; |
| 861 | } | 855 | } |
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 67edfa7ed93a..a1b017f2dbb3 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c | |||
| @@ -51,16 +51,6 @@ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ | |||
| 51 | EXPORT_SYMBOL(phys_cpu_present_map); | 51 | EXPORT_SYMBOL(phys_cpu_present_map); |
| 52 | EXPORT_SYMBOL(cpu_online_map); | 52 | EXPORT_SYMBOL(cpu_online_map); |
| 53 | 53 | ||
| 54 | /* This happens early in bootup, can't really do it better */ | ||
| 55 | static void smp_tune_scheduling (void) | ||
| 56 | { | ||
| 57 | struct cache_desc *cd = ¤t_cpu_data.scache; | ||
| 58 | unsigned long cachesize = cd->linesz * cd->sets * cd->ways; | ||
| 59 | |||
| 60 | if (cachesize > max_cache_size) | ||
| 61 | max_cache_size = cachesize; | ||
| 62 | } | ||
| 63 | |||
| 64 | extern void __init calibrate_delay(void); | 54 | extern void __init calibrate_delay(void); |
| 65 | extern ATTRIB_NORET void cpu_idle(void); | 55 | extern ATTRIB_NORET void cpu_idle(void); |
| 66 | 56 | ||
| @@ -228,7 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
| 228 | { | 218 | { |
| 229 | init_new_context(current, &init_mm); | 219 | init_new_context(current, &init_mm); |
| 230 | current_thread_info()->cpu = 0; | 220 | current_thread_info()->cpu = 0; |
| 231 | smp_tune_scheduling(); | ||
| 232 | plat_prepare_cpus(max_cpus); | 221 | plat_prepare_cpus(max_cpus); |
| 233 | #ifndef CONFIG_HOTPLUG_CPU | 222 | #ifndef CONFIG_HOTPLUG_CPU |
| 234 | cpu_present_map = cpu_possible_map; | 223 | cpu_present_map = cpu_possible_map; |
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c index 4d9ad59031bb..4fea3ac7bff0 100644 --- a/arch/sparc/kernel/smp.c +++ b/arch/sparc/kernel/smp.c | |||
| @@ -68,16 +68,6 @@ void __cpuinit smp_store_cpu_info(int id) | |||
| 68 | cpu_data(id).prom_node = cpu_node; | 68 | cpu_data(id).prom_node = cpu_node; |
| 69 | cpu_data(id).mid = cpu_get_hwmid(cpu_node); | 69 | cpu_data(id).mid = cpu_get_hwmid(cpu_node); |
| 70 | 70 | ||
| 71 | /* this is required to tune the scheduler correctly */ | ||
| 72 | /* is it possible to have CPUs with different cache sizes? */ | ||
| 73 | if (id == boot_cpu_id) { | ||
| 74 | int cache_line,cache_nlines; | ||
| 75 | cache_line = 0x20; | ||
| 76 | cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line); | ||
| 77 | cache_nlines = 0x8000; | ||
| 78 | cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines); | ||
| 79 | max_cache_size = cache_line * cache_nlines; | ||
| 80 | } | ||
| 81 | if (cpu_data(id).mid < 0) | 71 | if (cpu_data(id).mid < 0) |
| 82 | panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); | 72 | panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); |
| 83 | } | 73 | } |
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 4dcd7d0b60f2..40e40f968d61 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
| @@ -1163,32 +1163,6 @@ int setup_profiling_timer(unsigned int multiplier) | |||
| 1163 | return -EINVAL; | 1163 | return -EINVAL; |
| 1164 | } | 1164 | } |
| 1165 | 1165 | ||
| 1166 | static void __init smp_tune_scheduling(void) | ||
| 1167 | { | ||
| 1168 | unsigned int smallest = ~0U; | ||
| 1169 | int i; | ||
| 1170 | |||
| 1171 | for (i = 0; i < NR_CPUS; i++) { | ||
| 1172 | unsigned int val = cpu_data(i).ecache_size; | ||
| 1173 | |||
| 1174 | if (val && val < smallest) | ||
| 1175 | smallest = val; | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | /* Any value less than 256K is nonsense. */ | ||
| 1179 | if (smallest < (256U * 1024U)) | ||
| 1180 | smallest = 256 * 1024; | ||
| 1181 | |||
| 1182 | max_cache_size = smallest; | ||
| 1183 | |||
| 1184 | if (smallest < 1U * 1024U * 1024U) | ||
| 1185 | printk(KERN_INFO "Using max_cache_size of %uKB\n", | ||
| 1186 | smallest / 1024U); | ||
| 1187 | else | ||
| 1188 | printk(KERN_INFO "Using max_cache_size of %uMB\n", | ||
| 1189 | smallest / 1024U / 1024U); | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | /* Constrain the number of cpus to max_cpus. */ | 1166 | /* Constrain the number of cpus to max_cpus. */ |
| 1193 | void __init smp_prepare_cpus(unsigned int max_cpus) | 1167 | void __init smp_prepare_cpus(unsigned int max_cpus) |
| 1194 | { | 1168 | { |
| @@ -1206,7 +1180,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
| 1206 | } | 1180 | } |
| 1207 | 1181 | ||
| 1208 | cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; | 1182 | cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; |
| 1209 | smp_tune_scheduling(); | ||
| 1210 | } | 1183 | } |
| 1211 | 1184 | ||
| 1212 | void __devinit smp_prepare_boot_cpu(void) | 1185 | void __devinit smp_prepare_boot_cpu(void) |
diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index 66f826252aee..444a0b84f5bd 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c | |||
| @@ -448,23 +448,21 @@ static int icside_dma_test_irq(ide_drive_t *drive) | |||
| 448 | ICS_ARCIN_V6_INTRSTAT_1)) & 1; | 448 | ICS_ARCIN_V6_INTRSTAT_1)) & 1; |
| 449 | } | 449 | } |
| 450 | 450 | ||
| 451 | static int icside_dma_timeout(ide_drive_t *drive) | 451 | static void icside_dma_timeout(ide_drive_t *drive) |
| 452 | { | 452 | { |
| 453 | printk(KERN_ERR "%s: DMA timeout occurred: ", drive->name); | 453 | printk(KERN_ERR "%s: DMA timeout occurred: ", drive->name); |
| 454 | 454 | ||
| 455 | if (icside_dma_test_irq(drive)) | 455 | if (icside_dma_test_irq(drive)) |
| 456 | return 0; | 456 | return; |
| 457 | 457 | ||
| 458 | ide_dump_status(drive, "DMA timeout", | 458 | ide_dump_status(drive, "DMA timeout", HWIF(drive)->INB(IDE_STATUS_REG)); |
| 459 | HWIF(drive)->INB(IDE_STATUS_REG)); | ||
| 460 | 459 | ||
| 461 | return icside_dma_end(drive); | 460 | icside_dma_end(drive); |
| 462 | } | 461 | } |
| 463 | 462 | ||
| 464 | static int icside_dma_lostirq(ide_drive_t *drive) | 463 | static void icside_dma_lost_irq(ide_drive_t *drive) |
| 465 | { | 464 | { |
| 466 | printk(KERN_ERR "%s: IRQ lost\n", drive->name); | 465 | printk(KERN_ERR "%s: IRQ lost\n", drive->name); |
| 467 | return 1; | ||
| 468 | } | 466 | } |
| 469 | 467 | ||
| 470 | static void icside_dma_init(ide_hwif_t *hwif) | 468 | static void icside_dma_init(ide_hwif_t *hwif) |
| @@ -490,8 +488,8 @@ static void icside_dma_init(ide_hwif_t *hwif) | |||
| 490 | hwif->dma_start = icside_dma_start; | 488 | hwif->dma_start = icside_dma_start; |
| 491 | hwif->ide_dma_end = icside_dma_end; | 489 | hwif->ide_dma_end = icside_dma_end; |
| 492 | hwif->ide_dma_test_irq = icside_dma_test_irq; | 490 | hwif->ide_dma_test_irq = icside_dma_test_irq; |
| 493 | hwif->ide_dma_timeout = icside_dma_timeout; | 491 | hwif->dma_timeout = icside_dma_timeout; |
| 494 | hwif->ide_dma_lostirq = icside_dma_lostirq; | 492 | hwif->dma_lost_irq = icside_dma_lost_irq; |
| 495 | 493 | ||
| 496 | hwif->drives[0].autodma = hwif->autodma; | 494 | hwif->drives[0].autodma = hwif->autodma; |
| 497 | hwif->drives[1].autodma = hwif->autodma; | 495 | hwif->drives[1].autodma = hwif->autodma; |
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c index ca0341c05e55..886091bc7db0 100644 --- a/drivers/ide/cris/ide-cris.c +++ b/drivers/ide/cris/ide-cris.c | |||
| @@ -819,7 +819,7 @@ init_e100_ide (void) | |||
| 819 | hwif->dma_host_off = &cris_dma_off; | 819 | hwif->dma_host_off = &cris_dma_off; |
| 820 | hwif->dma_host_on = &cris_dma_on; | 820 | hwif->dma_host_on = &cris_dma_on; |
| 821 | hwif->dma_off_quietly = &cris_dma_off; | 821 | hwif->dma_off_quietly = &cris_dma_off; |
| 822 | hwif->udma_four = 0; | 822 | hwif->cbl = ATA_CBL_PATA40; |
| 823 | hwif->ultra_mask = cris_ultra_mask; | 823 | hwif->ultra_mask = cris_ultra_mask; |
| 824 | hwif->mwdma_mask = 0x07; /* Multiword DMA 0-2 */ | 824 | hwif->mwdma_mask = 0x07; /* Multiword DMA 0-2 */ |
| 825 | hwif->autodma = 1; | 825 | hwif->autodma = 1; |
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 252ab8295edf..1486eb212ccc 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c | |||
| @@ -481,7 +481,7 @@ void cdrom_analyze_sense_data(ide_drive_t *drive, | |||
| 481 | else | 481 | else |
| 482 | printk(" Unknown Error Type: "); | 482 | printk(" Unknown Error Type: "); |
| 483 | 483 | ||
| 484 | if (sense->sense_key < ARY_LEN(sense_key_texts)) | 484 | if (sense->sense_key < ARRAY_SIZE(sense_key_texts)) |
| 485 | s = sense_key_texts[sense->sense_key]; | 485 | s = sense_key_texts[sense->sense_key]; |
| 486 | 486 | ||
| 487 | printk("%s -- (Sense key=0x%02x)\n", s, sense->sense_key); | 487 | printk("%s -- (Sense key=0x%02x)\n", s, sense->sense_key); |
| @@ -491,7 +491,7 @@ void cdrom_analyze_sense_data(ide_drive_t *drive, | |||
| 491 | sense->ascq); | 491 | sense->ascq); |
| 492 | s = buf; | 492 | s = buf; |
| 493 | } else { | 493 | } else { |
| 494 | int lo = 0, mid, hi = ARY_LEN(sense_data_texts); | 494 | int lo = 0, mid, hi = ARRAY_SIZE(sense_data_texts); |
| 495 | unsigned long key = (sense->sense_key << 16); | 495 | unsigned long key = (sense->sense_key << 16); |
| 496 | key |= (sense->asc << 8); | 496 | key |= (sense->asc << 8); |
| 497 | if (!(sense->ascq >= 0x80 && sense->ascq <= 0xdd)) | 497 | if (!(sense->ascq >= 0x80 && sense->ascq <= 0xdd)) |
| @@ -524,7 +524,7 @@ void cdrom_analyze_sense_data(ide_drive_t *drive, | |||
| 524 | 524 | ||
| 525 | if (failed_command != NULL) { | 525 | if (failed_command != NULL) { |
| 526 | 526 | ||
| 527 | int lo=0, mid, hi= ARY_LEN (packet_command_texts); | 527 | int lo=0, mid, hi= ARRAY_SIZE(packet_command_texts); |
| 528 | s = NULL; | 528 | s = NULL; |
| 529 | 529 | ||
| 530 | while (hi > lo) { | 530 | while (hi > lo) { |
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index ad1f2ed14a37..228b29c5d2e4 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h | |||
| @@ -498,8 +498,6 @@ struct cdrom_info { | |||
| 498 | * Descriptions of ATAPI error codes. | 498 | * Descriptions of ATAPI error codes. |
| 499 | */ | 499 | */ |
| 500 | 500 | ||
| 501 | #define ARY_LEN(a) ((sizeof(a) / sizeof(a[0]))) | ||
| 502 | |||
| 503 | /* This stuff should be in cdrom.h, since it is now generic... */ | 501 | /* This stuff should be in cdrom.h, since it is now generic... */ |
| 504 | 502 | ||
| 505 | /* ATAPI sense keys (from table 140 of ATAPI 2.6) */ | 503 | /* ATAPI sense keys (from table 140 of ATAPI 2.6) */ |
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index dc2175c81f5e..b1304a7f3e0a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c | |||
| @@ -1190,11 +1190,11 @@ static int idedisk_ioctl(struct inode *inode, struct file *file, | |||
| 1190 | return generic_ide_ioctl(drive, file, bdev, cmd, arg); | 1190 | return generic_ide_ioctl(drive, file, bdev, cmd, arg); |
| 1191 | 1191 | ||
| 1192 | read_val: | 1192 | read_val: |
| 1193 | down(&ide_setting_sem); | 1193 | mutex_lock(&ide_setting_mtx); |
| 1194 | spin_lock_irqsave(&ide_lock, flags); | 1194 | spin_lock_irqsave(&ide_lock, flags); |
| 1195 | err = *val; | 1195 | err = *val; |
| 1196 | spin_unlock_irqrestore(&ide_lock, flags); | 1196 | spin_unlock_irqrestore(&ide_lock, flags); |
| 1197 | up(&ide_setting_sem); | 1197 | mutex_unlock(&ide_setting_mtx); |
| 1198 | return err >= 0 ? put_user(err, (long __user *)arg) : err; | 1198 | return err >= 0 ? put_user(err, (long __user *)arg) : err; |
| 1199 | 1199 | ||
| 1200 | set_val: | 1200 | set_val: |
| @@ -1204,9 +1204,9 @@ set_val: | |||
| 1204 | if (!capable(CAP_SYS_ADMIN)) | 1204 | if (!capable(CAP_SYS_ADMIN)) |
| 1205 | err = -EACCES; | 1205 | err = -EACCES; |
| 1206 | else { | 1206 | else { |
| 1207 | down(&ide_setting_sem); | 1207 | mutex_lock(&ide_setting_mtx); |
| 1208 | err = setfunc(drive, arg); | 1208 | err = setfunc(drive, arg); |
| 1209 | up(&ide_setting_sem); | 1209 | mutex_unlock(&ide_setting_mtx); |
| 1210 | } | 1210 | } |
| 1211 | } | 1211 | } |
| 1212 | return err; | 1212 | return err; |
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index ead141e2db9e..5fe1d72ab451 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c | |||
| @@ -91,45 +91,45 @@ | |||
| 91 | 91 | ||
| 92 | static const struct drive_list_entry drive_whitelist [] = { | 92 | static const struct drive_list_entry drive_whitelist [] = { |
| 93 | 93 | ||
| 94 | { "Micropolis 2112A" , "ALL" }, | 94 | { "Micropolis 2112A" , NULL }, |
| 95 | { "CONNER CTMA 4000" , "ALL" }, | 95 | { "CONNER CTMA 4000" , NULL }, |
| 96 | { "CONNER CTT8000-A" , "ALL" }, | 96 | { "CONNER CTT8000-A" , NULL }, |
| 97 | { "ST34342A" , "ALL" }, | 97 | { "ST34342A" , NULL }, |
| 98 | { NULL , NULL } | 98 | { NULL , NULL } |
| 99 | }; | 99 | }; |
| 100 | 100 | ||
| 101 | static const struct drive_list_entry drive_blacklist [] = { | 101 | static const struct drive_list_entry drive_blacklist [] = { |
| 102 | 102 | ||
| 103 | { "WDC AC11000H" , "ALL" }, | 103 | { "WDC AC11000H" , NULL }, |
| 104 | { "WDC AC22100H" , "ALL" }, | 104 | { "WDC AC22100H" , NULL }, |
| 105 | { "WDC AC32500H" , "ALL" }, | 105 | { "WDC AC32500H" , NULL }, |
| 106 | { "WDC AC33100H" , "ALL" }, | 106 | { "WDC AC33100H" , NULL }, |
| 107 | { "WDC AC31600H" , "ALL" }, | 107 | { "WDC AC31600H" , NULL }, |
| 108 | { "WDC AC32100H" , "24.09P07" }, | 108 | { "WDC AC32100H" , "24.09P07" }, |
| 109 | { "WDC AC23200L" , "21.10N21" }, | 109 | { "WDC AC23200L" , "21.10N21" }, |
| 110 | { "Compaq CRD-8241B" , "ALL" }, | 110 | { "Compaq CRD-8241B" , NULL }, |
| 111 | { "CRD-8400B" , "ALL" }, | 111 | { "CRD-8400B" , NULL }, |
| 112 | { "CRD-8480B", "ALL" }, | 112 | { "CRD-8480B", NULL }, |
| 113 | { "CRD-8482B", "ALL" }, | 113 | { "CRD-8482B", NULL }, |
| 114 | { "CRD-84" , "ALL" }, | 114 | { "CRD-84" , NULL }, |
| 115 | { "SanDisk SDP3B" , "ALL" }, | 115 | { "SanDisk SDP3B" , NULL }, |
| 116 | { "SanDisk SDP3B-64" , "ALL" }, | 116 | { "SanDisk SDP3B-64" , NULL }, |
| 117 | { "SANYO CD-ROM CRD" , "ALL" }, | 117 | { "SANYO CD-ROM CRD" , NULL }, |
| 118 | { "HITACHI CDR-8" , "ALL" }, | 118 | { "HITACHI CDR-8" , NULL }, |
| 119 | { "HITACHI CDR-8335" , "ALL" }, | 119 | { "HITACHI CDR-8335" , NULL }, |
| 120 | { "HITACHI CDR-8435" , "ALL" }, | 120 | { "HITACHI CDR-8435" , NULL }, |
| 121 | { "Toshiba CD-ROM XM-6202B" , "ALL" }, | 121 | { "Toshiba CD-ROM XM-6202B" , NULL }, |
| 122 | { "TOSHIBA CD-ROM XM-1702BC", "ALL" }, | 122 | { "TOSHIBA CD-ROM XM-1702BC", NULL }, |
| 123 | { "CD-532E-A" , "ALL" }, | 123 | { "CD-532E-A" , NULL }, |
| 124 | { "E-IDE CD-ROM CR-840", "ALL" }, | 124 | { "E-IDE CD-ROM CR-840", NULL }, |
| 125 | { "CD-ROM Drive/F5A", "ALL" }, | 125 | { "CD-ROM Drive/F5A", NULL }, |
| 126 | { "WPI CDD-820", "ALL" }, | 126 | { "WPI CDD-820", NULL }, |
| 127 | { "SAMSUNG CD-ROM SC-148C", "ALL" }, | 127 | { "SAMSUNG CD-ROM SC-148C", NULL }, |
| 128 | { "SAMSUNG CD-ROM SC", "ALL" }, | 128 | { "SAMSUNG CD-ROM SC", NULL }, |
| 129 | { "ATAPI CD-ROM DRIVE 40X MAXIMUM", "ALL" }, | 129 | { "ATAPI CD-ROM DRIVE 40X MAXIMUM", NULL }, |
| 130 | { "_NEC DV5800A", "ALL" }, | 130 | { "_NEC DV5800A", NULL }, |
| 131 | { "SAMSUNG CD-ROM SN-124", "N001" }, | 131 | { "SAMSUNG CD-ROM SN-124", "N001" }, |
| 132 | { "Seagate STT20000A", "ALL" }, | 132 | { "Seagate STT20000A", NULL }, |
| 133 | { NULL , NULL } | 133 | { NULL , NULL } |
| 134 | 134 | ||
| 135 | }; | 135 | }; |
| @@ -147,8 +147,8 @@ int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *driv | |||
| 147 | { | 147 | { |
| 148 | for ( ; drive_table->id_model ; drive_table++) | 148 | for ( ; drive_table->id_model ; drive_table++) |
| 149 | if ((!strcmp(drive_table->id_model, id->model)) && | 149 | if ((!strcmp(drive_table->id_model, id->model)) && |
| 150 | ((strstr(id->fw_rev, drive_table->id_firmware)) || | 150 | (!drive_table->id_firmware || |
| 151 | (!strcmp(drive_table->id_firmware, "ALL")))) | 151 | strstr(id->fw_rev, drive_table->id_firmware))) |
| 152 | return 1; | 152 | return 1; |
| 153 | return 0; | 153 | return 0; |
| 154 | } | 154 | } |
| @@ -702,8 +702,22 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base) | |||
| 702 | mask = id->dma_mword & hwif->mwdma_mask; | 702 | mask = id->dma_mword & hwif->mwdma_mask; |
| 703 | break; | 703 | break; |
| 704 | case XFER_SW_DMA_0: | 704 | case XFER_SW_DMA_0: |
| 705 | if (id->field_valid & 2) | 705 | if (id->field_valid & 2) { |
| 706 | mask = id->dma_1word & hwif->swdma_mask; | 706 | mask = id->dma_1word & hwif->swdma_mask; |
| 707 | } else if (id->tDMA) { | ||
| 708 | /* | ||
| 709 | * ide_fix_driveid() doesn't convert ->tDMA to the | ||
| 710 | * CPU endianness so we need to do it here | ||
| 711 | */ | ||
| 712 | u8 mode = le16_to_cpu(id->tDMA); | ||
| 713 | |||
| 714 | /* | ||
| 715 | * if the mode is valid convert it to the mask | ||
| 716 | * (the maximum allowed mode is XFER_SW_DMA_2) | ||
| 717 | */ | ||
| 718 | if (mode <= 2) | ||
| 719 | mask = ((2 << mode) - 1) & hwif->swdma_mask; | ||
| 720 | } | ||
| 707 | break; | 721 | break; |
| 708 | default: | 722 | default: |
| 709 | BUG(); | 723 | BUG(); |
| @@ -847,27 +861,27 @@ int ide_set_dma(ide_drive_t *drive) | |||
| 847 | return rc; | 861 | return rc; |
| 848 | } | 862 | } |
| 849 | 863 | ||
| 850 | EXPORT_SYMBOL_GPL(ide_set_dma); | ||
| 851 | |||
| 852 | #ifdef CONFIG_BLK_DEV_IDEDMA_PCI | 864 | #ifdef CONFIG_BLK_DEV_IDEDMA_PCI |
| 853 | int __ide_dma_lostirq (ide_drive_t *drive) | 865 | void ide_dma_lost_irq (ide_drive_t *drive) |
| 854 | { | 866 | { |
| 855 | printk("%s: DMA interrupt recovery\n", drive->name); | 867 | printk("%s: DMA interrupt recovery\n", drive->name); |
| 856 | return 1; | ||
| 857 | } | 868 | } |
| 858 | 869 | ||
| 859 | EXPORT_SYMBOL(__ide_dma_lostirq); | 870 | EXPORT_SYMBOL(ide_dma_lost_irq); |
| 860 | 871 | ||
| 861 | int __ide_dma_timeout (ide_drive_t *drive) | 872 | void ide_dma_timeout (ide_drive_t *drive) |
| 862 | { | 873 | { |
| 874 | ide_hwif_t *hwif = HWIF(drive); | ||
| 875 | |||
| 863 | printk(KERN_ERR "%s: timeout waiting for DMA\n", drive->name); | 876 | printk(KERN_ERR "%s: timeout waiting for DMA\n", drive->name); |
| 864 | if (HWIF(drive)->ide_dma_test_irq(drive)) | ||
| 865 | return 0; | ||
| 866 | 877 | ||
| 867 | return HWIF(drive)->ide_dma_end(drive); | 878 | if (hwif->ide_dma_test_irq(drive)) |
| 879 | return; | ||
| 880 | |||
| 881 | hwif->ide_dma_end(drive); | ||
| 868 | } | 882 | } |
| 869 | 883 | ||
| 870 | EXPORT_SYMBOL(__ide_dma_timeout); | 884 | EXPORT_SYMBOL(ide_dma_timeout); |
| 871 | 885 | ||
| 872 | /* | 886 | /* |
| 873 | * Needed for allowing full modular support of ide-driver | 887 | * Needed for allowing full modular support of ide-driver |
| @@ -1018,10 +1032,10 @@ void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_p | |||
| 1018 | hwif->ide_dma_end = &__ide_dma_end; | 1032 | hwif->ide_dma_end = &__ide_dma_end; |
| 1019 | if (!hwif->ide_dma_test_irq) | 1033 | if (!hwif->ide_dma_test_irq) |
| 1020 | hwif->ide_dma_test_irq = &__ide_dma_test_irq; | 1034 | hwif->ide_dma_test_irq = &__ide_dma_test_irq; |
| 1021 | if (!hwif->ide_dma_timeout) | 1035 | if (!hwif->dma_timeout) |
| 1022 | hwif->ide_dma_timeout = &__ide_dma_timeout; | 1036 | hwif->dma_timeout = &ide_dma_timeout; |
| 1023 | if (!hwif->ide_dma_lostirq) | 1037 | if (!hwif->dma_lost_irq) |
| 1024 | hwif->ide_dma_lostirq = &__ide_dma_lostirq; | 1038 | hwif->dma_lost_irq = &ide_dma_lost_irq; |
| 1025 | 1039 | ||
| 1026 | if (hwif->chipset != ide_trm290) { | 1040 | if (hwif->chipset != ide_trm290) { |
| 1027 | u8 dma_stat = hwif->INB(hwif->dma_status); | 1041 | u8 dma_stat = hwif->INB(hwif->dma_status); |
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index bfe8f1b712ba..c5b5011da56e 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c | |||
| @@ -1350,7 +1350,7 @@ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) | |||
| 1350 | hwif->INB(IDE_STATUS_REG)); | 1350 | hwif->INB(IDE_STATUS_REG)); |
| 1351 | } else { | 1351 | } else { |
| 1352 | printk(KERN_WARNING "%s: DMA timeout retry\n", drive->name); | 1352 | printk(KERN_WARNING "%s: DMA timeout retry\n", drive->name); |
| 1353 | (void) hwif->ide_dma_timeout(drive); | 1353 | hwif->dma_timeout(drive); |
| 1354 | } | 1354 | } |
| 1355 | 1355 | ||
| 1356 | /* | 1356 | /* |
| @@ -1466,7 +1466,7 @@ void ide_timer_expiry (unsigned long data) | |||
| 1466 | startstop = handler(drive); | 1466 | startstop = handler(drive); |
| 1467 | } else if (drive_is_ready(drive)) { | 1467 | } else if (drive_is_ready(drive)) { |
| 1468 | if (drive->waiting_for_dma) | 1468 | if (drive->waiting_for_dma) |
| 1469 | (void) hwgroup->hwif->ide_dma_lostirq(drive); | 1469 | hwgroup->hwif->dma_lost_irq(drive); |
| 1470 | (void)ide_ack_intr(hwif); | 1470 | (void)ide_ack_intr(hwif); |
| 1471 | printk(KERN_WARNING "%s: lost interrupt\n", drive->name); | 1471 | printk(KERN_WARNING "%s: lost interrupt\n", drive->name); |
| 1472 | startstop = handler(drive); | 1472 | startstop = handler(drive); |
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index f0be5f665a0e..92578b6832e9 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c | |||
| @@ -574,7 +574,10 @@ u8 eighty_ninty_three (ide_drive_t *drive) | |||
| 574 | ide_hwif_t *hwif = drive->hwif; | 574 | ide_hwif_t *hwif = drive->hwif; |
| 575 | struct hd_driveid *id = drive->id; | 575 | struct hd_driveid *id = drive->id; |
| 576 | 576 | ||
| 577 | if (hwif->udma_four == 0) | 577 | if (hwif->cbl == ATA_CBL_PATA40_SHORT) |
| 578 | return 1; | ||
| 579 | |||
| 580 | if (hwif->cbl != ATA_CBL_PATA80) | ||
| 578 | goto no_80w; | 581 | goto no_80w; |
| 579 | 582 | ||
| 580 | /* Check for SATA but only if we are ATA5 or higher */ | 583 | /* Check for SATA but only if we are ATA5 or higher */ |
| @@ -600,7 +603,8 @@ no_80w: | |||
| 600 | 603 | ||
| 601 | printk(KERN_WARNING "%s: %s side 80-wire cable detection failed, " | 604 | printk(KERN_WARNING "%s: %s side 80-wire cable detection failed, " |
| 602 | "limiting max speed to UDMA33\n", | 605 | "limiting max speed to UDMA33\n", |
| 603 | drive->name, hwif->udma_four ? "drive" : "host"); | 606 | drive->name, |
| 607 | hwif->cbl == ATA_CBL_PATA80 ? "drive" : "host"); | ||
| 604 | 608 | ||
| 605 | drive->udma33_warned = 1; | 609 | drive->udma33_warned = 1; |
| 606 | 610 | ||
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index f5ce22c38f82..cc5801399467 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c | |||
| @@ -144,7 +144,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) | |||
| 144 | local_irq_enable(); | 144 | local_irq_enable(); |
| 145 | ide_fix_driveid(id); | 145 | ide_fix_driveid(id); |
| 146 | 146 | ||
| 147 | #if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) | 147 | #if defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) |
| 148 | /* | 148 | /* |
| 149 | * EATA SCSI controllers do a hardware ATA emulation: | 149 | * EATA SCSI controllers do a hardware ATA emulation: |
| 150 | * Ignore them if there is a driver for them available. | 150 | * Ignore them if there is a driver for them available. |
| @@ -154,7 +154,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) | |||
| 154 | printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model); | 154 | printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model); |
| 155 | goto err_misc; | 155 | goto err_misc; |
| 156 | } | 156 | } |
| 157 | #endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */ | 157 | #endif /* CONFIG_SCSI_EATA || CONFIG_SCSI_EATA_PIO */ |
| 158 | 158 | ||
| 159 | /* | 159 | /* |
| 160 | * WIN_IDENTIFY returns little-endian info, | 160 | * WIN_IDENTIFY returns little-endian info, |
| @@ -1025,7 +1025,7 @@ static int init_irq (ide_hwif_t *hwif) | |||
| 1025 | BUG_ON(irqs_disabled()); | 1025 | BUG_ON(irqs_disabled()); |
| 1026 | BUG_ON(hwif == NULL); | 1026 | BUG_ON(hwif == NULL); |
| 1027 | 1027 | ||
| 1028 | down(&ide_cfg_sem); | 1028 | mutex_lock(&ide_cfg_mtx); |
| 1029 | hwif->hwgroup = NULL; | 1029 | hwif->hwgroup = NULL; |
| 1030 | #if MAX_HWIFS > 1 | 1030 | #if MAX_HWIFS > 1 |
| 1031 | /* | 1031 | /* |
| @@ -1154,7 +1154,7 @@ static int init_irq (ide_hwif_t *hwif) | |||
| 1154 | printk(" (%sed with %s)", | 1154 | printk(" (%sed with %s)", |
| 1155 | hwif->sharing_irq ? "shar" : "serializ", match->name); | 1155 | hwif->sharing_irq ? "shar" : "serializ", match->name); |
| 1156 | printk("\n"); | 1156 | printk("\n"); |
| 1157 | up(&ide_cfg_sem); | 1157 | mutex_unlock(&ide_cfg_mtx); |
| 1158 | return 0; | 1158 | return 0; |
| 1159 | out_unlink: | 1159 | out_unlink: |
| 1160 | spin_lock_irq(&ide_lock); | 1160 | spin_lock_irq(&ide_lock); |
| @@ -1177,7 +1177,7 @@ out_unlink: | |||
| 1177 | } | 1177 | } |
| 1178 | spin_unlock_irq(&ide_lock); | 1178 | spin_unlock_irq(&ide_lock); |
| 1179 | out_up: | 1179 | out_up: |
| 1180 | up(&ide_cfg_sem); | 1180 | mutex_unlock(&ide_cfg_mtx); |
| 1181 | return 1; | 1181 | return 1; |
| 1182 | } | 1182 | } |
| 1183 | 1183 | ||
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index ea94c9aa1220..fc1d8ae6a803 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c | |||
| @@ -156,7 +156,7 @@ static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int d | |||
| 156 | { | 156 | { |
| 157 | ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL; | 157 | ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL; |
| 158 | 158 | ||
| 159 | down(&ide_setting_sem); | 159 | mutex_lock(&ide_setting_mtx); |
| 160 | while ((*p) && strcmp((*p)->name, name) < 0) | 160 | while ((*p) && strcmp((*p)->name, name) < 0) |
| 161 | p = &((*p)->next); | 161 | p = &((*p)->next); |
| 162 | if ((setting = kzalloc(sizeof(*setting), GFP_KERNEL)) == NULL) | 162 | if ((setting = kzalloc(sizeof(*setting), GFP_KERNEL)) == NULL) |
| @@ -177,10 +177,10 @@ static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int d | |||
| 177 | if (auto_remove) | 177 | if (auto_remove) |
| 178 | setting->auto_remove = 1; | 178 | setting->auto_remove = 1; |
| 179 | *p = setting; | 179 | *p = setting; |
| 180 | up(&ide_setting_sem); | 180 | mutex_unlock(&ide_setting_mtx); |
| 181 | return 0; | 181 | return 0; |
| 182 | abort: | 182 | abort: |
| 183 | up(&ide_setting_sem); | 183 | mutex_unlock(&ide_setting_mtx); |
| 184 | kfree(setting); | 184 | kfree(setting); |
| 185 | return -1; | 185 | return -1; |
| 186 | } | 186 | } |
| @@ -224,7 +224,7 @@ static void __ide_remove_setting (ide_drive_t *drive, char *name) | |||
| 224 | * | 224 | * |
| 225 | * Automatically remove all the driver specific settings for this | 225 | * Automatically remove all the driver specific settings for this |
| 226 | * drive. This function may not be called from IRQ context. The | 226 | * drive. This function may not be called from IRQ context. The |
| 227 | * caller must hold ide_setting_sem. | 227 | * caller must hold ide_setting_mtx. |
| 228 | */ | 228 | */ |
| 229 | 229 | ||
| 230 | static void auto_remove_settings (ide_drive_t *drive) | 230 | static void auto_remove_settings (ide_drive_t *drive) |
| @@ -269,7 +269,7 @@ static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name) | |||
| 269 | * @setting: drive setting | 269 | * @setting: drive setting |
| 270 | * | 270 | * |
| 271 | * Read a drive setting and return the value. The caller | 271 | * Read a drive setting and return the value. The caller |
| 272 | * must hold the ide_setting_sem when making this call. | 272 | * must hold the ide_setting_mtx when making this call. |
| 273 | * | 273 | * |
| 274 | * BUGS: the data return and error are the same return value | 274 | * BUGS: the data return and error are the same return value |
| 275 | * so an error -EINVAL and true return of the same value cannot | 275 | * so an error -EINVAL and true return of the same value cannot |
| @@ -306,7 +306,7 @@ static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting) | |||
| 306 | * @val: value | 306 | * @val: value |
| 307 | * | 307 | * |
| 308 | * Write a drive setting if it is possible. The caller | 308 | * Write a drive setting if it is possible. The caller |
| 309 | * must hold the ide_setting_sem when making this call. | 309 | * must hold the ide_setting_mtx when making this call. |
| 310 | * | 310 | * |
| 311 | * BUGS: the data return and error are the same return value | 311 | * BUGS: the data return and error are the same return value |
| 312 | * so an error -EINVAL and true return of the same value cannot | 312 | * so an error -EINVAL and true return of the same value cannot |
| @@ -367,7 +367,7 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) | |||
| 367 | * @drive: drive being configured | 367 | * @drive: drive being configured |
| 368 | * | 368 | * |
| 369 | * Add the generic parts of the system settings to the /proc files. | 369 | * Add the generic parts of the system settings to the /proc files. |
| 370 | * The caller must not be holding the ide_setting_sem. | 370 | * The caller must not be holding the ide_setting_mtx. |
| 371 | */ | 371 | */ |
| 372 | 372 | ||
| 373 | void ide_add_generic_settings (ide_drive_t *drive) | 373 | void ide_add_generic_settings (ide_drive_t *drive) |
| @@ -408,7 +408,7 @@ static int proc_ide_read_settings | |||
| 408 | 408 | ||
| 409 | proc_ide_settings_warn(); | 409 | proc_ide_settings_warn(); |
| 410 | 410 | ||
| 411 | down(&ide_setting_sem); | 411 | mutex_lock(&ide_setting_mtx); |
| 412 | out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); | 412 | out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); |
| 413 | out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); | 413 | out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); |
| 414 | while(setting) { | 414 | while(setting) { |
| @@ -428,7 +428,7 @@ static int proc_ide_read_settings | |||
| 428 | setting = setting->next; | 428 | setting = setting->next; |
| 429 | } | 429 | } |
| 430 | len = out - page; | 430 | len = out - page; |
| 431 | up(&ide_setting_sem); | 431 | mutex_unlock(&ide_setting_mtx); |
| 432 | PROC_IDE_READ_RETURN(page,start,off,count,eof,len); | 432 | PROC_IDE_READ_RETURN(page,start,off,count,eof,len); |
| 433 | } | 433 | } |
| 434 | 434 | ||
| @@ -508,16 +508,16 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, | |||
| 508 | ++p; | 508 | ++p; |
| 509 | } | 509 | } |
| 510 | 510 | ||
| 511 | down(&ide_setting_sem); | 511 | mutex_lock(&ide_setting_mtx); |
| 512 | setting = ide_find_setting_by_name(drive, name); | 512 | setting = ide_find_setting_by_name(drive, name); |
| 513 | if (!setting) | 513 | if (!setting) |
| 514 | { | 514 | { |
| 515 | up(&ide_setting_sem); | 515 | mutex_unlock(&ide_setting_mtx); |
| 516 | goto parse_error; | 516 | goto parse_error; |
| 517 | } | 517 | } |
| 518 | if (for_real) | 518 | if (for_real) |
| 519 | ide_write_setting(drive, setting, val * setting->div_factor / setting->mul_factor); | 519 | ide_write_setting(drive, setting, val * setting->div_factor / setting->mul_factor); |
| 520 | up(&ide_setting_sem); | 520 | mutex_unlock(&ide_setting_mtx); |
| 521 | } | 521 | } |
| 522 | } while (!for_real++); | 522 | } while (!for_real++); |
| 523 | free_page((unsigned long)buf); | 523 | free_page((unsigned long)buf); |
| @@ -705,7 +705,7 @@ EXPORT_SYMBOL(ide_proc_register_driver); | |||
| 705 | * Clean up the driver specific /proc files and IDE settings | 705 | * Clean up the driver specific /proc files and IDE settings |
| 706 | * for a given drive. | 706 | * for a given drive. |
| 707 | * | 707 | * |
| 708 | * Takes ide_setting_sem and ide_lock. | 708 | * Takes ide_setting_mtx and ide_lock. |
| 709 | * Caller must hold none of the locks. | 709 | * Caller must hold none of the locks. |
| 710 | */ | 710 | */ |
| 711 | 711 | ||
| @@ -715,10 +715,10 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) | |||
| 715 | 715 | ||
| 716 | ide_remove_proc_entries(drive->proc, driver->proc); | 716 | ide_remove_proc_entries(drive->proc, driver->proc); |
| 717 | 717 | ||
| 718 | down(&ide_setting_sem); | 718 | mutex_lock(&ide_setting_mtx); |
| 719 | spin_lock_irqsave(&ide_lock, flags); | 719 | spin_lock_irqsave(&ide_lock, flags); |
| 720 | /* | 720 | /* |
| 721 | * ide_setting_sem protects the settings list | 721 | * ide_setting_mtx protects the settings list |
| 722 | * ide_lock protects the use of settings | 722 | * ide_lock protects the use of settings |
| 723 | * | 723 | * |
| 724 | * so we need to hold both, ide_settings_sem because we want to | 724 | * so we need to hold both, ide_settings_sem because we want to |
| @@ -726,11 +726,11 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) | |||
| 726 | * a setting out that is being used. | 726 | * a setting out that is being used. |
| 727 | * | 727 | * |
| 728 | * OTOH both ide_{read,write}_setting are only ever used under | 728 | * OTOH both ide_{read,write}_setting are only ever used under |
| 729 | * ide_setting_sem. | 729 | * ide_setting_mtx. |
| 730 | */ | 730 | */ |
| 731 | auto_remove_settings(drive); | 731 | auto_remove_settings(drive); |
| 732 | spin_unlock_irqrestore(&ide_lock, flags); | 732 | spin_unlock_irqrestore(&ide_lock, flags); |
| 733 | up(&ide_setting_sem); | 733 | mutex_unlock(&ide_setting_mtx); |
| 734 | } | 734 | } |
| 735 | 735 | ||
| 736 | EXPORT_SYMBOL(ide_proc_unregister_driver); | 736 | EXPORT_SYMBOL(ide_proc_unregister_driver); |
diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h index c0864b1e9228..e6cb8593b5ba 100644 --- a/drivers/ide/ide-timing.h +++ b/drivers/ide/ide-timing.h | |||
| @@ -102,66 +102,16 @@ static struct ide_timing ide_timing[] = { | |||
| 102 | #define EZ(v,unit) ((v)?ENOUGH(v,unit):0) | 102 | #define EZ(v,unit) ((v)?ENOUGH(v,unit):0) |
| 103 | 103 | ||
| 104 | #define XFER_MODE 0xf0 | 104 | #define XFER_MODE 0xf0 |
| 105 | #define XFER_UDMA_133 0x48 | ||
| 106 | #define XFER_UDMA_100 0x44 | ||
| 107 | #define XFER_UDMA_66 0x42 | ||
| 108 | #define XFER_UDMA 0x40 | ||
| 109 | #define XFER_MWDMA 0x20 | 105 | #define XFER_MWDMA 0x20 |
| 110 | #define XFER_SWDMA 0x10 | ||
| 111 | #define XFER_EPIO 0x01 | 106 | #define XFER_EPIO 0x01 |
| 112 | #define XFER_PIO 0x00 | 107 | #define XFER_PIO 0x00 |
| 113 | 108 | ||
| 114 | static short ide_find_best_mode(ide_drive_t *drive, int map) | 109 | static short ide_find_best_pio_mode(ide_drive_t *drive) |
| 115 | { | 110 | { |
| 116 | struct hd_driveid *id = drive->id; | 111 | struct hd_driveid *id = drive->id; |
| 117 | short best = 0; | 112 | short best = 0; |
| 118 | 113 | ||
| 119 | if (!id) | 114 | if (id->field_valid & 2) { /* EIDE PIO modes */ |
| 120 | return XFER_PIO_SLOW; | ||
| 121 | |||
| 122 | if ((map & XFER_UDMA) && (id->field_valid & 4)) { /* Want UDMA and UDMA bitmap valid */ | ||
| 123 | |||
| 124 | if ((map & XFER_UDMA_133) == XFER_UDMA_133) | ||
| 125 | if ((best = (id->dma_ultra & 0x0040) ? XFER_UDMA_6 : 0)) return best; | ||
| 126 | |||
| 127 | if ((map & XFER_UDMA_100) == XFER_UDMA_100) | ||
| 128 | if ((best = (id->dma_ultra & 0x0020) ? XFER_UDMA_5 : 0)) return best; | ||
| 129 | |||
| 130 | if ((map & XFER_UDMA_66) == XFER_UDMA_66) | ||
| 131 | if ((best = (id->dma_ultra & 0x0010) ? XFER_UDMA_4 : | ||
| 132 | (id->dma_ultra & 0x0008) ? XFER_UDMA_3 : 0)) return best; | ||
| 133 | |||
| 134 | if ((best = (id->dma_ultra & 0x0004) ? XFER_UDMA_2 : | ||
| 135 | (id->dma_ultra & 0x0002) ? XFER_UDMA_1 : | ||
| 136 | (id->dma_ultra & 0x0001) ? XFER_UDMA_0 : 0)) return best; | ||
| 137 | } | ||
| 138 | |||
| 139 | if ((map & XFER_MWDMA) && (id->field_valid & 2)) { /* Want MWDMA and drive has EIDE fields */ | ||
| 140 | |||
| 141 | if ((best = (id->dma_mword & 0x0004) ? XFER_MW_DMA_2 : | ||
| 142 | (id->dma_mword & 0x0002) ? XFER_MW_DMA_1 : | ||
| 143 | (id->dma_mword & 0x0001) ? XFER_MW_DMA_0 : 0)) return best; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (map & XFER_SWDMA) { /* Want SWDMA */ | ||
| 147 | |||
| 148 | if (id->field_valid & 2) { /* EIDE SWDMA */ | ||
| 149 | |||
| 150 | if ((best = (id->dma_1word & 0x0004) ? XFER_SW_DMA_2 : | ||
| 151 | (id->dma_1word & 0x0002) ? XFER_SW_DMA_1 : | ||
| 152 | (id->dma_1word & 0x0001) ? XFER_SW_DMA_0 : 0)) return best; | ||
| 153 | } | ||
| 154 | |||
| 155 | if (id->capability & 1) { /* Pre-EIDE style SWDMA */ | ||
| 156 | |||
| 157 | if ((best = (id->tDMA == 2) ? XFER_SW_DMA_2 : | ||
| 158 | (id->tDMA == 1) ? XFER_SW_DMA_1 : | ||
| 159 | (id->tDMA == 0) ? XFER_SW_DMA_0 : 0)) return best; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | |||
| 164 | if ((map & XFER_EPIO) && (id->field_valid & 2)) { /* EIDE PIO modes */ | ||
| 165 | 115 | ||
| 166 | if ((best = (drive->id->eide_pio_modes & 4) ? XFER_PIO_5 : | 116 | if ((best = (drive->id->eide_pio_modes & 4) ? XFER_PIO_5 : |
| 167 | (drive->id->eide_pio_modes & 2) ? XFER_PIO_4 : | 117 | (drive->id->eide_pio_modes & 2) ? XFER_PIO_4 : |
| @@ -262,7 +212,7 @@ static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing | |||
| 262 | */ | 212 | */ |
| 263 | 213 | ||
| 264 | if ((speed & XFER_MODE) != XFER_PIO) { | 214 | if ((speed & XFER_MODE) != XFER_PIO) { |
| 265 | ide_timing_compute(drive, ide_find_best_mode(drive, XFER_PIO | XFER_EPIO), &p, T, UT); | 215 | ide_timing_compute(drive, ide_find_best_pio_mode(drive), &p, T, UT); |
| 266 | ide_timing_merge(&p, t, t, IDE_TIMING_ALL); | 216 | ide_timing_merge(&p, t, t, IDE_TIMING_ALL); |
| 267 | } | 217 | } |
| 268 | 218 | ||
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 0cd76bf66833..c948a5c17a5d 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c | |||
| @@ -169,7 +169,7 @@ static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, | |||
| 169 | static int idebus_parameter; /* holds the "idebus=" parameter */ | 169 | static int idebus_parameter; /* holds the "idebus=" parameter */ |
| 170 | static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ | 170 | static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ |
| 171 | 171 | ||
| 172 | DECLARE_MUTEX(ide_cfg_sem); | 172 | DEFINE_MUTEX(ide_cfg_mtx); |
| 173 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock); | 173 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock); |
| 174 | 174 | ||
| 175 | #ifdef CONFIG_IDEPCI_PCIBUS_ORDER | 175 | #ifdef CONFIG_IDEPCI_PCIBUS_ORDER |
| @@ -460,6 +460,8 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif) | |||
| 460 | hwif->mwdma_mask = tmp_hwif->mwdma_mask; | 460 | hwif->mwdma_mask = tmp_hwif->mwdma_mask; |
| 461 | hwif->swdma_mask = tmp_hwif->swdma_mask; | 461 | hwif->swdma_mask = tmp_hwif->swdma_mask; |
| 462 | 462 | ||
| 463 | hwif->cbl = tmp_hwif->cbl; | ||
| 464 | |||
| 463 | hwif->chipset = tmp_hwif->chipset; | 465 | hwif->chipset = tmp_hwif->chipset; |
| 464 | hwif->hold = tmp_hwif->hold; | 466 | hwif->hold = tmp_hwif->hold; |
| 465 | 467 | ||
| @@ -496,8 +498,8 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif) | |||
| 496 | hwif->ide_dma_clear_irq = tmp_hwif->ide_dma_clear_irq; | 498 | hwif->ide_dma_clear_irq = tmp_hwif->ide_dma_clear_irq; |
| 497 | hwif->dma_host_on = tmp_hwif->dma_host_on; | 499 | hwif->dma_host_on = tmp_hwif->dma_host_on; |
| 498 | hwif->dma_host_off = tmp_hwif->dma_host_off; | 500 | hwif->dma_host_off = tmp_hwif->dma_host_off; |
| 499 | hwif->ide_dma_lostirq = tmp_hwif->ide_dma_lostirq; | 501 | hwif->dma_lost_irq = tmp_hwif->dma_lost_irq; |
| 500 | hwif->ide_dma_timeout = tmp_hwif->ide_dma_timeout; | 502 | hwif->dma_timeout = tmp_hwif->dma_timeout; |
| 501 | 503 | ||
| 502 | hwif->OUTB = tmp_hwif->OUTB; | 504 | hwif->OUTB = tmp_hwif->OUTB; |
| 503 | hwif->OUTBSYNC = tmp_hwif->OUTBSYNC; | 505 | hwif->OUTBSYNC = tmp_hwif->OUTBSYNC; |
| @@ -533,7 +535,6 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif) | |||
| 533 | hwif->extra_base = tmp_hwif->extra_base; | 535 | hwif->extra_base = tmp_hwif->extra_base; |
| 534 | hwif->extra_ports = tmp_hwif->extra_ports; | 536 | hwif->extra_ports = tmp_hwif->extra_ports; |
| 535 | hwif->autodma = tmp_hwif->autodma; | 537 | hwif->autodma = tmp_hwif->autodma; |
| 536 | hwif->udma_four = tmp_hwif->udma_four; | ||
| 537 | 538 | ||
| 538 | hwif->hwif_data = tmp_hwif->hwif_data; | 539 | hwif->hwif_data = tmp_hwif->hwif_data; |
| 539 | } | 540 | } |
| @@ -564,7 +565,7 @@ void ide_unregister(unsigned int index) | |||
| 564 | { | 565 | { |
| 565 | ide_drive_t *drive; | 566 | ide_drive_t *drive; |
| 566 | ide_hwif_t *hwif, *g; | 567 | ide_hwif_t *hwif, *g; |
| 567 | static ide_hwif_t tmp_hwif; /* protected by ide_cfg_sem */ | 568 | static ide_hwif_t tmp_hwif; /* protected by ide_cfg_mtx */ |
| 568 | ide_hwgroup_t *hwgroup; | 569 | ide_hwgroup_t *hwgroup; |
| 569 | int irq_count = 0, unit; | 570 | int irq_count = 0, unit; |
| 570 | 571 | ||
| @@ -572,7 +573,7 @@ void ide_unregister(unsigned int index) | |||
| 572 | 573 | ||
| 573 | BUG_ON(in_interrupt()); | 574 | BUG_ON(in_interrupt()); |
| 574 | BUG_ON(irqs_disabled()); | 575 | BUG_ON(irqs_disabled()); |
| 575 | down(&ide_cfg_sem); | 576 | mutex_lock(&ide_cfg_mtx); |
| 576 | spin_lock_irq(&ide_lock); | 577 | spin_lock_irq(&ide_lock); |
| 577 | hwif = &ide_hwifs[index]; | 578 | hwif = &ide_hwifs[index]; |
| 578 | if (!hwif->present) | 579 | if (!hwif->present) |
| @@ -679,7 +680,7 @@ void ide_unregister(unsigned int index) | |||
| 679 | 680 | ||
| 680 | abort: | 681 | abort: |
| 681 | spin_unlock_irq(&ide_lock); | 682 | spin_unlock_irq(&ide_lock); |
| 682 | up(&ide_cfg_sem); | 683 | mutex_unlock(&ide_cfg_mtx); |
| 683 | } | 684 | } |
| 684 | 685 | ||
| 685 | EXPORT_SYMBOL(ide_unregister); | 686 | EXPORT_SYMBOL(ide_unregister); |
| @@ -817,9 +818,9 @@ EXPORT_SYMBOL(ide_register_hw); | |||
| 817 | * Locks for IDE setting functionality | 818 | * Locks for IDE setting functionality |
| 818 | */ | 819 | */ |
| 819 | 820 | ||
| 820 | DECLARE_MUTEX(ide_setting_sem); | 821 | DEFINE_MUTEX(ide_setting_mtx); |
| 821 | 822 | ||
| 822 | EXPORT_SYMBOL_GPL(ide_setting_sem); | 823 | EXPORT_SYMBOL_GPL(ide_setting_mtx); |
| 823 | 824 | ||
| 824 | /** | 825 | /** |
| 825 | * ide_spin_wait_hwgroup - wait for group | 826 | * ide_spin_wait_hwgroup - wait for group |
| @@ -1192,11 +1193,11 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device | |||
| 1192 | } | 1193 | } |
| 1193 | 1194 | ||
| 1194 | read_val: | 1195 | read_val: |
| 1195 | down(&ide_setting_sem); | 1196 | mutex_lock(&ide_setting_mtx); |
| 1196 | spin_lock_irqsave(&ide_lock, flags); | 1197 | spin_lock_irqsave(&ide_lock, flags); |
| 1197 | err = *val; | 1198 | err = *val; |
| 1198 | spin_unlock_irqrestore(&ide_lock, flags); | 1199 | spin_unlock_irqrestore(&ide_lock, flags); |
| 1199 | up(&ide_setting_sem); | 1200 | mutex_unlock(&ide_setting_mtx); |
| 1200 | return err >= 0 ? put_user(err, (long __user *)arg) : err; | 1201 | return err >= 0 ? put_user(err, (long __user *)arg) : err; |
| 1201 | 1202 | ||
| 1202 | set_val: | 1203 | set_val: |
| @@ -1206,9 +1207,9 @@ set_val: | |||
| 1206 | if (!capable(CAP_SYS_ADMIN)) | 1207 | if (!capable(CAP_SYS_ADMIN)) |
| 1207 | err = -EACCES; | 1208 | err = -EACCES; |
| 1208 | else { | 1209 | else { |
| 1209 | down(&ide_setting_sem); | 1210 | mutex_lock(&ide_setting_mtx); |
| 1210 | err = setfunc(drive, arg); | 1211 | err = setfunc(drive, arg); |
| 1211 | up(&ide_setting_sem); | 1212 | mutex_unlock(&ide_setting_mtx); |
| 1212 | } | 1213 | } |
| 1213 | } | 1214 | } |
| 1214 | return err; | 1215 | return err; |
| @@ -1548,7 +1549,11 @@ static int __init ide_setup(char *s) | |||
| 1548 | goto bad_option; | 1549 | goto bad_option; |
| 1549 | case -7: /* ata66 */ | 1550 | case -7: /* ata66 */ |
| 1550 | #ifdef CONFIG_BLK_DEV_IDEPCI | 1551 | #ifdef CONFIG_BLK_DEV_IDEPCI |
| 1551 | hwif->udma_four = 1; | 1552 | /* |
| 1553 | * Use ATA_CBL_PATA40_SHORT so drive side | ||
| 1554 | * cable detection is also overriden. | ||
| 1555 | */ | ||
| 1556 | hwif->cbl = ATA_CBL_PATA40_SHORT; | ||
| 1552 | goto obsolete_option; | 1557 | goto obsolete_option; |
| 1553 | #else | 1558 | #else |
| 1554 | goto bad_hwif; | 1559 | goto bad_hwif; |
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c index 45ed03591cd8..661c12f6dda6 100644 --- a/drivers/ide/legacy/hd.c +++ b/drivers/ide/legacy/hd.c | |||
| @@ -130,7 +130,7 @@ struct hd_i_struct { | |||
| 130 | 130 | ||
| 131 | #ifdef HD_TYPE | 131 | #ifdef HD_TYPE |
| 132 | static struct hd_i_struct hd_info[] = { HD_TYPE }; | 132 | static struct hd_i_struct hd_info[] = { HD_TYPE }; |
| 133 | static int NR_HD = ((sizeof (hd_info))/(sizeof (struct hd_i_struct))); | 133 | static int NR_HD = ARRAY_SIZE(hd_info); |
| 134 | #else | 134 | #else |
| 135 | static struct hd_i_struct hd_info[MAX_HD]; | 135 | static struct hd_i_struct hd_info[MAX_HD]; |
| 136 | static int NR_HD; | 136 | static int NR_HD; |
diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c index c211fc78345d..b557c45a5a9d 100644 --- a/drivers/ide/legacy/macide.c +++ b/drivers/ide/legacy/macide.c | |||
| @@ -77,15 +77,6 @@ int macide_ack_intr(ide_hwif_t* hwif) | |||
| 77 | return 0; | 77 | return 0; |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | #ifdef CONFIG_BLK_DEV_MAC_MEDIABAY | ||
| 81 | static void macide_mediabay_interrupt(int irq, void *dev_id) | ||
| 82 | { | ||
| 83 | int state = baboon->mb_status & 0x04; | ||
| 84 | |||
| 85 | printk(KERN_INFO "macide: media bay %s detected\n", state? "removal":"insertion"); | ||
| 86 | } | ||
| 87 | #endif | ||
| 88 | |||
| 89 | /* | 80 | /* |
| 90 | * Probe for a Macintosh IDE interface | 81 | * Probe for a Macintosh IDE interface |
| 91 | */ | 82 | */ |
| @@ -128,11 +119,6 @@ void macide_init(void) | |||
| 128 | ide_drive_t *drive = &ide_hwifs[index].drives[0]; | 119 | ide_drive_t *drive = &ide_hwifs[index].drives[0]; |
| 129 | drive->capacity64 = drive->cyl*drive->head*drive->sect; | 120 | drive->capacity64 = drive->cyl*drive->head*drive->sect; |
| 130 | 121 | ||
| 131 | #ifdef CONFIG_BLK_DEV_MAC_MEDIABAY | ||
| 132 | request_irq(IRQ_BABOON_2, macide_mediabay_interrupt, | ||
| 133 | IRQ_FLG_FAST, "mediabay", | ||
| 134 | macide_mediabay_interrupt); | ||
| 135 | #endif | ||
| 136 | } | 122 | } |
| 137 | break; | 123 | break; |
| 138 | 124 | ||
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index ca95e990862e..2e7013a2a7f6 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c | |||
| @@ -381,9 +381,7 @@ static int auide_dma_setup(ide_drive_t *drive) | |||
| 381 | 381 | ||
| 382 | static int auide_dma_check(ide_drive_t *drive) | 382 | static int auide_dma_check(ide_drive_t *drive) |
| 383 | { | 383 | { |
| 384 | u8 speed; | 384 | u8 speed = ide_max_dma_mode(drive); |
| 385 | |||
| 386 | #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA | ||
| 387 | 385 | ||
| 388 | if( dbdma_init_done == 0 ){ | 386 | if( dbdma_init_done == 0 ){ |
| 389 | auide_hwif.white_list = ide_in_drive_list(drive->id, | 387 | auide_hwif.white_list = ide_in_drive_list(drive->id, |
| @@ -394,7 +392,6 @@ static int auide_dma_check(ide_drive_t *drive) | |||
| 394 | auide_ddma_init(&auide_hwif); | 392 | auide_ddma_init(&auide_hwif); |
| 395 | dbdma_init_done = 1; | 393 | dbdma_init_done = 1; |
| 396 | } | 394 | } |
| 397 | #endif | ||
| 398 | 395 | ||
| 399 | /* Is the drive in our DMA black list? */ | 396 | /* Is the drive in our DMA black list? */ |
| 400 | 397 | ||
| @@ -409,8 +406,6 @@ static int auide_dma_check(ide_drive_t *drive) | |||
| 409 | else | 406 | else |
| 410 | drive->using_dma = 1; | 407 | drive->using_dma = 1; |
| 411 | 408 | ||
| 412 | speed = ide_find_best_mode(drive, XFER_PIO | XFER_MWDMA); | ||
| 413 | |||
| 414 | if (drive->autodma && (speed & XFER_MODE) != XFER_PIO) | 409 | if (drive->autodma && (speed & XFER_MODE) != XFER_PIO) |
| 415 | return 0; | 410 | return 0; |
| 416 | 411 | ||
| @@ -456,10 +451,9 @@ static void auide_dma_off_quietly(ide_drive_t *drive) | |||
| 456 | drive->using_dma = 0; | 451 | drive->using_dma = 0; |
| 457 | } | 452 | } |
| 458 | 453 | ||
| 459 | static int auide_dma_lostirq(ide_drive_t *drive) | 454 | static void auide_dma_lost_irq(ide_drive_t *drive) |
| 460 | { | 455 | { |
| 461 | printk(KERN_ERR "%s: IRQ lost\n", drive->name); | 456 | printk(KERN_ERR "%s: IRQ lost\n", drive->name); |
| 462 | return 0; | ||
| 463 | } | 457 | } |
| 464 | 458 | ||
| 465 | static void auide_ddma_tx_callback(int irq, void *param) | 459 | static void auide_ddma_tx_callback(int irq, void *param) |
| @@ -489,16 +483,16 @@ static void auide_init_dbdma_dev(dbdev_tab_t *dev, u32 dev_id, u32 tsize, u32 de | |||
| 489 | 483 | ||
| 490 | #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA) | 484 | #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA) |
| 491 | 485 | ||
| 492 | static int auide_dma_timeout(ide_drive_t *drive) | 486 | static void auide_dma_timeout(ide_drive_t *drive) |
| 493 | { | 487 | { |
| 494 | // printk("%s\n", __FUNCTION__); | 488 | ide_hwif_t *hwif = HWIF(drive); |
| 495 | 489 | ||
| 496 | printk(KERN_ERR "%s: DMA timeout occurred: ", drive->name); | 490 | printk(KERN_ERR "%s: DMA timeout occurred: ", drive->name); |
| 497 | 491 | ||
| 498 | if (HWIF(drive)->ide_dma_test_irq(drive)) | 492 | if (hwif->ide_dma_test_irq(drive)) |
| 499 | return 0; | 493 | return; |
| 500 | 494 | ||
| 501 | return HWIF(drive)->ide_dma_end(drive); | 495 | hwif->ide_dma_end(drive); |
| 502 | } | 496 | } |
| 503 | 497 | ||
| 504 | 498 | ||
| @@ -721,7 +715,7 @@ static int au_ide_probe(struct device *dev) | |||
| 721 | 715 | ||
| 722 | #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA | 716 | #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA |
| 723 | hwif->dma_off_quietly = &auide_dma_off_quietly; | 717 | hwif->dma_off_quietly = &auide_dma_off_quietly; |
| 724 | hwif->ide_dma_timeout = &auide_dma_timeout; | 718 | hwif->dma_timeout = &auide_dma_timeout; |
| 725 | 719 | ||
| 726 | hwif->ide_dma_check = &auide_dma_check; | 720 | hwif->ide_dma_check = &auide_dma_check; |
| 727 | hwif->dma_exec_cmd = &auide_dma_exec_cmd; | 721 | hwif->dma_exec_cmd = &auide_dma_exec_cmd; |
| @@ -731,7 +725,7 @@ static int au_ide_probe(struct device *dev) | |||
| 731 | hwif->ide_dma_test_irq = &auide_dma_test_irq; | 725 | hwif->ide_dma_test_irq = &auide_dma_test_irq; |
| 732 | hwif->dma_host_off = &auide_dma_host_off; | 726 | hwif->dma_host_off = &auide_dma_host_off; |
| 733 | hwif->dma_host_on = &auide_dma_host_on; | 727 | hwif->dma_host_on = &auide_dma_host_on; |
| 734 | hwif->ide_dma_lostirq = &auide_dma_lostirq; | 728 | hwif->dma_lost_irq = &auide_dma_lost_irq; |
| 735 | hwif->ide_dma_on = &auide_dma_on; | 729 | hwif->ide_dma_on = &auide_dma_on; |
| 736 | 730 | ||
| 737 | hwif->autodma = 1; | 731 | hwif->autodma = 1; |
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c index b173bc66ce1e..e5d09367627e 100644 --- a/drivers/ide/pci/aec62xx.c +++ b/drivers/ide/pci/aec62xx.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/aec62xx.c Version 0.21 Apr 21, 2007 | 2 | * linux/drivers/ide/pci/aec62xx.c Version 0.24 May 24, 2007 |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1999-2002 Andre Hedrick <andre@linux-ide.org> | 4 | * Copyright (C) 1999-2002 Andre Hedrick <andre@linux-ide.org> |
| 5 | * Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com> | 5 | * Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com> |
| @@ -140,25 +140,10 @@ static int aec6260_tune_chipset (ide_drive_t *drive, u8 xferspeed) | |||
| 140 | return(ide_config_drive_speed(drive, speed)); | 140 | return(ide_config_drive_speed(drive, speed)); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | static int aec62xx_tune_chipset (ide_drive_t *drive, u8 speed) | ||
| 144 | { | ||
| 145 | switch (HWIF(drive)->pci_dev->device) { | ||
| 146 | case PCI_DEVICE_ID_ARTOP_ATP865: | ||
| 147 | case PCI_DEVICE_ID_ARTOP_ATP865R: | ||
| 148 | case PCI_DEVICE_ID_ARTOP_ATP860: | ||
| 149 | case PCI_DEVICE_ID_ARTOP_ATP860R: | ||
| 150 | return ((int) aec6260_tune_chipset(drive, speed)); | ||
| 151 | case PCI_DEVICE_ID_ARTOP_ATP850UF: | ||
| 152 | return ((int) aec6210_tune_chipset(drive, speed)); | ||
| 153 | default: | ||
| 154 | return -1; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | static void aec62xx_tune_drive (ide_drive_t *drive, u8 pio) | 143 | static void aec62xx_tune_drive (ide_drive_t *drive, u8 pio) |
| 159 | { | 144 | { |
| 160 | pio = ide_get_best_pio_mode(drive, pio, 4, NULL); | 145 | pio = ide_get_best_pio_mode(drive, pio, 4, NULL); |
| 161 | (void) aec62xx_tune_chipset(drive, pio + XFER_PIO_0); | 146 | (void) HWIF(drive)->speedproc(drive, pio + XFER_PIO_0); |
| 162 | } | 147 | } |
| 163 | 148 | ||
| 164 | static int aec62xx_config_drive_xfer_rate (ide_drive_t *drive) | 149 | static int aec62xx_config_drive_xfer_rate (ide_drive_t *drive) |
| @@ -172,12 +157,9 @@ static int aec62xx_config_drive_xfer_rate (ide_drive_t *drive) | |||
| 172 | return -1; | 157 | return -1; |
| 173 | } | 158 | } |
| 174 | 159 | ||
| 175 | static int aec62xx_irq_timeout (ide_drive_t *drive) | 160 | static void aec62xx_dma_lost_irq (ide_drive_t *drive) |
| 176 | { | 161 | { |
| 177 | ide_hwif_t *hwif = HWIF(drive); | 162 | switch (HWIF(drive)->pci_dev->device) { |
| 178 | struct pci_dev *dev = hwif->pci_dev; | ||
| 179 | |||
| 180 | switch(dev->device) { | ||
| 181 | case PCI_DEVICE_ID_ARTOP_ATP860: | 163 | case PCI_DEVICE_ID_ARTOP_ATP860: |
| 182 | case PCI_DEVICE_ID_ARTOP_ATP860R: | 164 | case PCI_DEVICE_ID_ARTOP_ATP860R: |
| 183 | case PCI_DEVICE_ID_ARTOP_ATP865: | 165 | case PCI_DEVICE_ID_ARTOP_ATP865: |
| @@ -186,7 +168,6 @@ static int aec62xx_irq_timeout (ide_drive_t *drive) | |||
| 186 | default: | 168 | default: |
| 187 | break; | 169 | break; |
| 188 | } | 170 | } |
| 189 | return 0; | ||
| 190 | } | 171 | } |
| 191 | 172 | ||
| 192 | static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name) | 173 | static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name) |
| @@ -224,64 +205,46 @@ static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const ch | |||
| 224 | 205 | ||
| 225 | static void __devinit init_hwif_aec62xx(ide_hwif_t *hwif) | 206 | static void __devinit init_hwif_aec62xx(ide_hwif_t *hwif) |
| 226 | { | 207 | { |
| 227 | struct pci_dev *dev = hwif->pci_dev; | 208 | struct pci_dev *dev = hwif->pci_dev; |
| 209 | u8 reg54 = 0, mask = hwif->channel ? 0xf0 : 0x0f; | ||
| 210 | unsigned long flags; | ||
| 228 | 211 | ||
| 229 | hwif->autodma = 0; | ||
| 230 | hwif->tuneproc = &aec62xx_tune_drive; | 212 | hwif->tuneproc = &aec62xx_tune_drive; |
| 231 | hwif->speedproc = &aec62xx_tune_chipset; | ||
| 232 | 213 | ||
| 233 | if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF) | 214 | if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF) { |
| 234 | hwif->serialized = hwif->channel; | 215 | if(hwif->mate) |
| 235 | 216 | hwif->mate->serialized = hwif->serialized = 1; | |
| 236 | if (hwif->mate) | 217 | hwif->speedproc = &aec6210_tune_chipset; |
| 237 | hwif->mate->serialized = hwif->serialized; | 218 | } else |
| 219 | hwif->speedproc = &aec6260_tune_chipset; | ||
| 238 | 220 | ||
| 239 | if (!hwif->dma_base) { | 221 | if (!hwif->dma_base) { |
| 240 | hwif->drives[0].autotune = 1; | 222 | hwif->drives[0].autotune = hwif->drives[1].autotune = 1; |
| 241 | hwif->drives[1].autotune = 1; | ||
| 242 | return; | 223 | return; |
| 243 | } | 224 | } |
| 244 | 225 | ||
| 245 | hwif->ultra_mask = hwif->cds->udma_mask; | 226 | hwif->ultra_mask = hwif->cds->udma_mask; |
| 246 | |||
| 247 | /* atp865 and atp865r */ | ||
| 248 | if (hwif->ultra_mask == 0x3f) { | ||
| 249 | /* check bit 0x10 of DMA status register */ | ||
| 250 | if (inb(pci_resource_start(dev, 4) + 2) & 0x10) | ||
| 251 | hwif->ultra_mask = 0x7f; /* udma0-6 */ | ||
| 252 | } | ||
| 253 | |||
| 254 | hwif->mwdma_mask = 0x07; | 227 | hwif->mwdma_mask = 0x07; |
| 255 | 228 | ||
| 256 | hwif->ide_dma_check = &aec62xx_config_drive_xfer_rate; | 229 | hwif->ide_dma_check = &aec62xx_config_drive_xfer_rate; |
| 257 | hwif->ide_dma_lostirq = &aec62xx_irq_timeout; | 230 | hwif->dma_lost_irq = &aec62xx_dma_lost_irq; |
| 258 | |||
| 259 | if (!noautodma) | ||
| 260 | hwif->autodma = 1; | ||
| 261 | hwif->drives[0].autodma = hwif->autodma; | ||
| 262 | hwif->drives[1].autodma = hwif->autodma; | ||
| 263 | } | ||
| 264 | |||
| 265 | static void __devinit init_dma_aec62xx(ide_hwif_t *hwif, unsigned long dmabase) | ||
| 266 | { | ||
| 267 | struct pci_dev *dev = hwif->pci_dev; | ||
| 268 | 231 | ||
| 269 | if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF) { | 232 | if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF) { |
| 270 | u8 reg54h = 0; | ||
| 271 | unsigned long flags; | ||
| 272 | |||
| 273 | spin_lock_irqsave(&ide_lock, flags); | 233 | spin_lock_irqsave(&ide_lock, flags); |
| 274 | pci_read_config_byte(dev, 0x54, ®54h); | 234 | pci_read_config_byte (dev, 0x54, ®54); |
| 275 | pci_write_config_byte(dev, 0x54, reg54h & ~(hwif->channel ? 0xF0 : 0x0F)); | 235 | pci_write_config_byte(dev, 0x54, (reg54 & ~mask)); |
| 276 | spin_unlock_irqrestore(&ide_lock, flags); | 236 | spin_unlock_irqrestore(&ide_lock, flags); |
| 277 | } else { | 237 | } else if (hwif->cbl != ATA_CBL_PATA40_SHORT) { |
| 278 | u8 ata66 = 0; | 238 | u8 ata66 = 0, mask = hwif->channel ? 0x02 : 0x01; |
| 239 | |||
| 279 | pci_read_config_byte(hwif->pci_dev, 0x49, &ata66); | 240 | pci_read_config_byte(hwif->pci_dev, 0x49, &ata66); |
| 280 | if (!(hwif->udma_four)) | 241 | |
| 281 | hwif->udma_four = (ata66&(hwif->channel?0x02:0x01))?0:1; | 242 | hwif->cbl = (ata66 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80; |
| 282 | } | 243 | } |
| 283 | 244 | ||
| 284 | ide_setup_dma(hwif, dmabase, 8); | 245 | if (!noautodma) |
| 246 | hwif->autodma = 1; | ||
| 247 | hwif->drives[0].autodma = hwif->drives[1].autodma = hwif->autodma; | ||
| 285 | } | 248 | } |
| 286 | 249 | ||
| 287 | static int __devinit init_setup_aec62xx(struct pci_dev *dev, ide_pci_device_t *d) | 250 | static int __devinit init_setup_aec62xx(struct pci_dev *dev, ide_pci_device_t *d) |
| @@ -291,16 +254,12 @@ static int __devinit init_setup_aec62xx(struct pci_dev *dev, ide_pci_device_t *d | |||
| 291 | 254 | ||
| 292 | static int __devinit init_setup_aec6x80(struct pci_dev *dev, ide_pci_device_t *d) | 255 | static int __devinit init_setup_aec6x80(struct pci_dev *dev, ide_pci_device_t *d) |
| 293 | { | 256 | { |
| 294 | unsigned long bar4reg = pci_resource_start(dev, 4); | 257 | unsigned long dma_base = pci_resource_start(dev, 4); |
| 295 | 258 | ||
| 296 | if (inb(bar4reg+2) & 0x10) { | 259 | if (inb(dma_base + 2) & 0x10) { |
| 297 | strcpy(d->name, "AEC6880"); | 260 | d->name = (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R) ? |
| 298 | if (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R) | 261 | "AEC6880R" : "AEC6880"; |
| 299 | strcpy(d->name, "AEC6880R"); | 262 | d->udma_mask = 0x7f; /* udma0-6 */ |
| 300 | } else { | ||
| 301 | strcpy(d->name, "AEC6280"); | ||
| 302 | if (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R) | ||
| 303 | strcpy(d->name, "AEC6280R"); | ||
| 304 | } | 263 | } |
| 305 | 264 | ||
| 306 | return ide_setup_pci_device(dev, d); | 265 | return ide_setup_pci_device(dev, d); |
| @@ -312,7 +271,6 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = { | |||
| 312 | .init_setup = init_setup_aec62xx, | 271 | .init_setup = init_setup_aec62xx, |
| 313 | .init_chipset = init_chipset_aec62xx, | 272 | .init_chipset = init_chipset_aec62xx, |
| 314 | .init_hwif = init_hwif_aec62xx, | 273 | .init_hwif = init_hwif_aec62xx, |
| 315 | .init_dma = init_dma_aec62xx, | ||
| 316 | .channels = 2, | 274 | .channels = 2, |
| 317 | .autodma = AUTODMA, | 275 | .autodma = AUTODMA, |
| 318 | .enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, | 276 | .enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, |
| @@ -323,7 +281,6 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = { | |||
| 323 | .init_setup = init_setup_aec62xx, | 281 | .init_setup = init_setup_aec62xx, |
| 324 | .init_chipset = init_chipset_aec62xx, | 282 | .init_chipset = init_chipset_aec62xx, |
| 325 | .init_hwif = init_hwif_aec62xx, | 283 | .init_hwif = init_hwif_aec62xx, |
| 326 | .init_dma = init_dma_aec62xx, | ||
| 327 | .channels = 2, | 284 | .channels = 2, |
| 328 | .autodma = NOAUTODMA, | 285 | .autodma = NOAUTODMA, |
| 329 | .bootable = OFF_BOARD, | 286 | .bootable = OFF_BOARD, |
| @@ -333,28 +290,25 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = { | |||
| 333 | .init_setup = init_setup_aec62xx, | 290 | .init_setup = init_setup_aec62xx, |
| 334 | .init_chipset = init_chipset_aec62xx, | 291 | .init_chipset = init_chipset_aec62xx, |
| 335 | .init_hwif = init_hwif_aec62xx, | 292 | .init_hwif = init_hwif_aec62xx, |
| 336 | .init_dma = init_dma_aec62xx, | ||
| 337 | .channels = 2, | 293 | .channels = 2, |
| 338 | .autodma = AUTODMA, | 294 | .autodma = AUTODMA, |
| 339 | .enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, | 295 | .enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, |
| 340 | .bootable = NEVER_BOARD, | 296 | .bootable = NEVER_BOARD, |
| 341 | .udma_mask = 0x1f, /* udma0-4 */ | 297 | .udma_mask = 0x1f, /* udma0-4 */ |
| 342 | },{ /* 3 */ | 298 | },{ /* 3 */ |
| 343 | .name = "AEC6X80", | 299 | .name = "AEC6280", |
| 344 | .init_setup = init_setup_aec6x80, | 300 | .init_setup = init_setup_aec6x80, |
| 345 | .init_chipset = init_chipset_aec62xx, | 301 | .init_chipset = init_chipset_aec62xx, |
| 346 | .init_hwif = init_hwif_aec62xx, | 302 | .init_hwif = init_hwif_aec62xx, |
| 347 | .init_dma = init_dma_aec62xx, | ||
| 348 | .channels = 2, | 303 | .channels = 2, |
| 349 | .autodma = AUTODMA, | 304 | .autodma = AUTODMA, |
| 350 | .bootable = OFF_BOARD, | 305 | .bootable = OFF_BOARD, |
| 351 | .udma_mask = 0x3f, /* udma0-5 */ | 306 | .udma_mask = 0x3f, /* udma0-5 */ |
| 352 | },{ /* 4 */ | 307 | },{ /* 4 */ |
| 353 | .name = "AEC6X80R", | 308 | .name = "AEC6280R", |
| 354 | .init_setup = init_setup_aec6x80, | 309 | .init_setup = init_setup_aec6x80, |
| 355 | .init_chipset = init_chipset_aec62xx, | 310 | .init_chipset = init_chipset_aec62xx, |
| 356 | .init_hwif = init_hwif_aec62xx, | 311 | .init_hwif = init_hwif_aec62xx, |
| 357 | .init_dma = init_dma_aec62xx, | ||
| 358 | .channels = 2, | 312 | .channels = 2, |
| 359 | .autodma = AUTODMA, | 313 | .autodma = AUTODMA, |
| 360 | .enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, | 314 | .enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, |
| @@ -370,13 +324,16 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = { | |||
| 370 | * | 324 | * |
| 371 | * Called when the PCI registration layer (or the IDE initialization) | 325 | * Called when the PCI registration layer (or the IDE initialization) |
| 372 | * finds a device matching our IDE device tables. | 326 | * finds a device matching our IDE device tables. |
| 327 | * | ||
| 328 | * NOTE: since we're going to modify the 'name' field for AEC-6[26]80[R] | ||
| 329 | * chips, pass a local copy of 'struct pci_device_id' down the call chain. | ||
| 373 | */ | 330 | */ |
| 374 | 331 | ||
| 375 | static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id) | 332 | static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id) |
| 376 | { | 333 | { |
| 377 | ide_pci_device_t *d = &aec62xx_chipsets[id->driver_data]; | 334 | ide_pci_device_t d = aec62xx_chipsets[id->driver_data]; |
| 378 | 335 | ||
| 379 | return d->init_setup(dev, d); | 336 | return d.init_setup(dev, &d); |
| 380 | } | 337 | } |
| 381 | 338 | ||
| 382 | static struct pci_device_id aec62xx_pci_tbl[] = { | 339 | static struct pci_device_id aec62xx_pci_tbl[] = { |
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c index 27525ec2e19a..8a6b27b3bcc3 100644 --- a/drivers/ide/pci/alim15x3.c +++ b/drivers/ide/pci/alim15x3.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/alim15x3.c Version 0.21 2007/02/03 | 2 | * linux/drivers/ide/pci/alim15x3.c Version 0.25 Jun 9 2007 |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1998-2000 Michel Aubry, Maintainer | 4 | * Copyright (C) 1998-2000 Michel Aubry, Maintainer |
| 5 | * Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer | 5 | * Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer |
| @@ -10,6 +10,7 @@ | |||
| 10 | * Copyright (C) 2002 Alan Cox <alan@redhat.com> | 10 | * Copyright (C) 2002 Alan Cox <alan@redhat.com> |
| 11 | * ALi (now ULi M5228) support by Clear Zhang <Clear.Zhang@ali.com.tw> | 11 | * ALi (now ULi M5228) support by Clear Zhang <Clear.Zhang@ali.com.tw> |
| 12 | * Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com> | 12 | * Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com> |
| 13 | * Copyright (C) 2007 Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> | ||
| 13 | * | 14 | * |
| 14 | * (U)DMA capable version of ali 1533/1543(C), 1535(D) | 15 | * (U)DMA capable version of ali 1533/1543(C), 1535(D) |
| 15 | * | 16 | * |
| @@ -36,6 +37,7 @@ | |||
| 36 | #include <linux/hdreg.h> | 37 | #include <linux/hdreg.h> |
| 37 | #include <linux/ide.h> | 38 | #include <linux/ide.h> |
| 38 | #include <linux/init.h> | 39 | #include <linux/init.h> |
| 40 | #include <linux/dmi.h> | ||
| 39 | 41 | ||
| 40 | #include <asm/io.h> | 42 | #include <asm/io.h> |
| 41 | 43 | ||
| @@ -583,6 +585,35 @@ out: | |||
| 583 | return 0; | 585 | return 0; |
| 584 | } | 586 | } |
| 585 | 587 | ||
| 588 | /* | ||
| 589 | * Cable special cases | ||
| 590 | */ | ||
| 591 | |||
| 592 | static struct dmi_system_id cable_dmi_table[] = { | ||
| 593 | { | ||
| 594 | .ident = "HP Pavilion N5430", | ||
| 595 | .matches = { | ||
| 596 | DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), | ||
| 597 | DMI_MATCH(DMI_BOARD_NAME, "OmniBook N32N-736"), | ||
| 598 | }, | ||
| 599 | }, | ||
| 600 | { } | ||
| 601 | }; | ||
| 602 | |||
| 603 | static int ali_cable_override(struct pci_dev *pdev) | ||
| 604 | { | ||
| 605 | /* Fujitsu P2000 */ | ||
| 606 | if (pdev->subsystem_vendor == 0x10CF && | ||
| 607 | pdev->subsystem_device == 0x10AF) | ||
| 608 | return 1; | ||
| 609 | |||
| 610 | /* Systems by DMI */ | ||
| 611 | if (dmi_check_system(cable_dmi_table)) | ||
| 612 | return 1; | ||
| 613 | |||
| 614 | return 0; | ||
| 615 | } | ||
| 616 | |||
| 586 | /** | 617 | /** |
| 587 | * ata66_ali15x3 - check for UDMA 66 support | 618 | * ata66_ali15x3 - check for UDMA 66 support |
| 588 | * @hwif: IDE interface | 619 | * @hwif: IDE interface |
| @@ -594,37 +625,31 @@ out: | |||
| 594 | * FIXME: frobs bits that are not defined on newer ALi devicea | 625 | * FIXME: frobs bits that are not defined on newer ALi devicea |
| 595 | */ | 626 | */ |
| 596 | 627 | ||
| 597 | static unsigned int __devinit ata66_ali15x3 (ide_hwif_t *hwif) | 628 | static u8 __devinit ata66_ali15x3(ide_hwif_t *hwif) |
| 598 | { | 629 | { |
| 599 | struct pci_dev *dev = hwif->pci_dev; | 630 | struct pci_dev *dev = hwif->pci_dev; |
| 600 | unsigned int ata66 = 0; | ||
| 601 | u8 cable_80_pin[2] = { 0, 0 }; | ||
| 602 | |||
| 603 | unsigned long flags; | 631 | unsigned long flags; |
| 604 | u8 tmpbyte; | 632 | u8 cbl = ATA_CBL_PATA40, tmpbyte; |
| 605 | 633 | ||
| 606 | local_irq_save(flags); | 634 | local_irq_save(flags); |
| 607 | 635 | ||
| 608 | if (m5229_revision >= 0xC2) { | 636 | if (m5229_revision >= 0xC2) { |
| 609 | /* | 637 | /* |
| 610 | * Ultra66 cable detection (from Host View) | 638 | * m5229 80-pin cable detection (from Host View) |
| 611 | * m5229, 0x4a, bit0: primary, bit1: secondary 80 pin | 639 | * |
| 612 | */ | 640 | * 0x4a bit0 is 0 => primary channel has 80-pin |
| 613 | pci_read_config_byte(dev, 0x4a, &tmpbyte); | 641 | * 0x4a bit1 is 0 => secondary channel has 80-pin |
| 614 | /* | 642 | * |
| 615 | * 0x4a, bit0 is 0 => primary channel | 643 | * Certain laptops use short but suitable cables |
| 616 | * has 80-pin (from host view) | 644 | * and don't implement the detect logic. |
| 617 | */ | ||
| 618 | if (!(tmpbyte & 0x01)) cable_80_pin[0] = 1; | ||
| 619 | /* | ||
| 620 | * 0x4a, bit1 is 0 => secondary channel | ||
| 621 | * has 80-pin (from host view) | ||
| 622 | */ | ||
| 623 | if (!(tmpbyte & 0x02)) cable_80_pin[1] = 1; | ||
| 624 | /* | ||
| 625 | * Allow ata66 if cable of current channel has 80 pins | ||
| 626 | */ | 645 | */ |
| 627 | ata66 = (hwif->channel)?cable_80_pin[1]:cable_80_pin[0]; | 646 | if (ali_cable_override(dev)) |
| 647 | cbl = ATA_CBL_PATA40_SHORT; | ||
| 648 | else { | ||
| 649 | pci_read_config_byte(dev, 0x4a, &tmpbyte); | ||
| 650 | if ((tmpbyte & (1 << hwif->channel)) == 0) | ||
| 651 | cbl = ATA_CBL_PATA80; | ||
| 652 | } | ||
| 628 | } else { | 653 | } else { |
| 629 | /* | 654 | /* |
| 630 | * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010 | 655 | * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010 |
| @@ -657,7 +682,7 @@ static unsigned int __devinit ata66_ali15x3 (ide_hwif_t *hwif) | |||
| 657 | 682 | ||
| 658 | local_irq_restore(flags); | 683 | local_irq_restore(flags); |
| 659 | 684 | ||
| 660 | return(ata66); | 685 | return cbl; |
| 661 | } | 686 | } |
| 662 | 687 | ||
| 663 | /** | 688 | /** |
| @@ -708,8 +733,9 @@ static void __devinit init_hwif_common_ali15x3 (ide_hwif_t *hwif) | |||
| 708 | hwif->dma_setup = &ali15x3_dma_setup; | 733 | hwif->dma_setup = &ali15x3_dma_setup; |
| 709 | if (!noautodma) | 734 | if (!noautodma) |
| 710 | hwif->autodma = 1; | 735 | hwif->autodma = 1; |
| 711 | if (!(hwif->udma_four)) | 736 | |
| 712 | hwif->udma_four = ata66_ali15x3(hwif); | 737 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 738 | hwif->cbl = ata66_ali15x3(hwif); | ||
| 713 | } | 739 | } |
| 714 | hwif->drives[0].autodma = hwif->autodma; | 740 | hwif->drives[0].autodma = hwif->autodma; |
| 715 | hwif->drives[1].autodma = hwif->autodma; | 741 | hwif->drives[1].autodma = hwif->autodma; |
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c index a2be65fcf89c..84ed30cdb324 100644 --- a/drivers/ide/pci/amd74xx.c +++ b/drivers/ide/pci/amd74xx.c | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Version 2.16 | 2 | * Version 2.20 |
| 3 | * | 3 | * |
| 4 | * AMD 755/756/766/8111 and nVidia nForce/2/2s/3/3s/CK804/MCP04 | 4 | * AMD 755/756/766/8111 and nVidia nForce/2/2s/3/3s/CK804/MCP04 |
| 5 | * IDE driver for Linux. | 5 | * IDE driver for Linux. |
| 6 | * | 6 | * |
| 7 | * Copyright (c) 2000-2002 Vojtech Pavlik | 7 | * Copyright (c) 2000-2002 Vojtech Pavlik |
| 8 | * Copyright (c) 2007 Bartlomiej Zolnierkiewicz | ||
| 8 | * | 9 | * |
| 9 | * Based on the work of: | 10 | * Based on the work of: |
| 10 | * Andre Hedrick | 11 | * Andre Hedrick |
| @@ -37,11 +38,6 @@ | |||
| 37 | #define AMD_ADDRESS_SETUP (0x0c + amd_config->base) | 38 | #define AMD_ADDRESS_SETUP (0x0c + amd_config->base) |
| 38 | #define AMD_UDMA_TIMING (0x10 + amd_config->base) | 39 | #define AMD_UDMA_TIMING (0x10 + amd_config->base) |
| 39 | 40 | ||
| 40 | #define AMD_UDMA 0x07 | ||
| 41 | #define AMD_UDMA_33 0x01 | ||
| 42 | #define AMD_UDMA_66 0x02 | ||
| 43 | #define AMD_UDMA_100 0x03 | ||
| 44 | #define AMD_UDMA_133 0x04 | ||
| 45 | #define AMD_CHECK_SWDMA 0x08 | 41 | #define AMD_CHECK_SWDMA 0x08 |
| 46 | #define AMD_BAD_SWDMA 0x10 | 42 | #define AMD_BAD_SWDMA 0x10 |
| 47 | #define AMD_BAD_FIFO 0x20 | 43 | #define AMD_BAD_FIFO 0x20 |
| @@ -53,32 +49,33 @@ | |||
| 53 | 49 | ||
| 54 | static struct amd_ide_chip { | 50 | static struct amd_ide_chip { |
| 55 | unsigned short id; | 51 | unsigned short id; |
| 56 | unsigned long base; | 52 | u8 base; |
| 57 | unsigned char flags; | 53 | u8 udma_mask; |
| 54 | u8 flags; | ||
| 58 | } amd_ide_chips[] = { | 55 | } amd_ide_chips[] = { |
| 59 | { PCI_DEVICE_ID_AMD_COBRA_7401, 0x40, AMD_UDMA_33 | AMD_BAD_SWDMA }, | 56 | { PCI_DEVICE_ID_AMD_COBRA_7401, 0x40, ATA_UDMA2, AMD_BAD_SWDMA }, |
| 60 | { PCI_DEVICE_ID_AMD_VIPER_7409, 0x40, AMD_UDMA_66 | AMD_CHECK_SWDMA }, | 57 | { PCI_DEVICE_ID_AMD_VIPER_7409, 0x40, ATA_UDMA4, AMD_CHECK_SWDMA }, |
| 61 | { PCI_DEVICE_ID_AMD_VIPER_7411, 0x40, AMD_UDMA_100 | AMD_BAD_FIFO }, | 58 | { PCI_DEVICE_ID_AMD_VIPER_7411, 0x40, ATA_UDMA5, AMD_BAD_FIFO }, |
| 62 | { PCI_DEVICE_ID_AMD_OPUS_7441, 0x40, AMD_UDMA_100 }, | 59 | { PCI_DEVICE_ID_AMD_OPUS_7441, 0x40, ATA_UDMA5, }, |
| 63 | { PCI_DEVICE_ID_AMD_8111_IDE, 0x40, AMD_UDMA_133 | AMD_CHECK_SERENADE }, | 60 | { PCI_DEVICE_ID_AMD_8111_IDE, 0x40, ATA_UDMA6, AMD_CHECK_SERENADE }, |
| 64 | { PCI_DEVICE_ID_NVIDIA_NFORCE_IDE, 0x50, AMD_UDMA_100 }, | 61 | { PCI_DEVICE_ID_NVIDIA_NFORCE_IDE, 0x50, ATA_UDMA5, }, |
| 65 | { PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE, 0x50, AMD_UDMA_133 }, | 62 | { PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE, 0x50, ATA_UDMA6, }, |
| 66 | { PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE, 0x50, AMD_UDMA_133 }, | 63 | { PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE, 0x50, ATA_UDMA6, }, |
| 67 | { PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA, 0x50, AMD_UDMA_133 }, | 64 | { PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA, 0x50, ATA_UDMA6, }, |
| 68 | { PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE, 0x50, AMD_UDMA_133 }, | 65 | { PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE, 0x50, ATA_UDMA6, }, |
| 69 | { PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE, 0x50, AMD_UDMA_133 }, | 66 | { PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE, 0x50, ATA_UDMA6, }, |
| 70 | { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA, 0x50, AMD_UDMA_133 }, | 67 | { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA, 0x50, ATA_UDMA6, }, |
| 71 | { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2, 0x50, AMD_UDMA_133 }, | 68 | { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2, 0x50, ATA_UDMA6, }, |
| 72 | { PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, 0x50, AMD_UDMA_133 }, | 69 | { PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, 0x50, ATA_UDMA6, }, |
| 73 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, 0x50, AMD_UDMA_133 }, | 70 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, 0x50, ATA_UDMA6, }, |
| 74 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, 0x50, AMD_UDMA_133 }, | 71 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, 0x50, ATA_UDMA6, }, |
| 75 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 }, | 72 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, ATA_UDMA6, }, |
| 76 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, AMD_UDMA_133 }, | 73 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, ATA_UDMA6, }, |
| 77 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, AMD_UDMA_133 }, | 74 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, ATA_UDMA6, }, |
| 78 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, 0x50, AMD_UDMA_133 }, | 75 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, 0x50, ATA_UDMA6, }, |
| 79 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE, 0x50, AMD_UDMA_133 }, | 76 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE, 0x50, ATA_UDMA6, }, |
| 80 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE, 0x50, AMD_UDMA_133 }, | 77 | { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE, 0x50, ATA_UDMA6, }, |
| 81 | { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, AMD_UDMA_100 }, | 78 | { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, ATA_UDMA5, }, |
| 82 | { 0 } | 79 | { 0 } |
| 83 | }; | 80 | }; |
| 84 | 81 | ||
| @@ -87,7 +84,7 @@ static ide_pci_device_t *amd_chipset; | |||
| 87 | static unsigned int amd_80w; | 84 | static unsigned int amd_80w; |
| 88 | static unsigned int amd_clock; | 85 | static unsigned int amd_clock; |
| 89 | 86 | ||
| 90 | static char *amd_dma[] = { "MWDMA16", "UDMA33", "UDMA66", "UDMA100", "UDMA133" }; | 87 | static char *amd_dma[] = { "16", "25", "33", "44", "66", "100", "133" }; |
| 91 | static unsigned char amd_cyc2udma[] = { 6, 6, 5, 4, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 7 }; | 88 | static unsigned char amd_cyc2udma[] = { 6, 6, 5, 4, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 7 }; |
| 92 | 89 | ||
| 93 | /* | 90 | /* |
| @@ -128,7 +125,7 @@ static int amd74xx_get_info(char *buffer, char **addr, off_t offset, int count) | |||
| 128 | 125 | ||
| 129 | pci_read_config_byte(dev, PCI_REVISION_ID, &t); | 126 | pci_read_config_byte(dev, PCI_REVISION_ID, &t); |
| 130 | amd_print("Revision: IDE %#x", t); | 127 | amd_print("Revision: IDE %#x", t); |
| 131 | amd_print("Highest DMA rate: %s", amd_dma[amd_config->flags & AMD_UDMA]); | 128 | amd_print("Highest DMA rate: UDMA%s", amd_dma[fls(amd_config->udma_mask) - 1]); |
| 132 | 129 | ||
| 133 | amd_print("BM-DMA base: %#lx", amd_base); | 130 | amd_print("BM-DMA base: %#lx", amd_base); |
| 134 | amd_print("PCI clock: %d.%dMHz", amd_clock / 1000, amd_clock / 100 % 10); | 131 | amd_print("PCI clock: %d.%dMHz", amd_clock / 1000, amd_clock / 100 % 10); |
| @@ -221,12 +218,12 @@ static void amd_set_speed(struct pci_dev *dev, unsigned char dn, struct ide_timi | |||
| 221 | pci_write_config_byte(dev, AMD_DRIVE_TIMING + (3 - dn), | 218 | pci_write_config_byte(dev, AMD_DRIVE_TIMING + (3 - dn), |
| 222 | ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1)); | 219 | ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1)); |
| 223 | 220 | ||
| 224 | switch (amd_config->flags & AMD_UDMA) { | 221 | switch (amd_config->udma_mask) { |
| 225 | case AMD_UDMA_33: t = timing->udma ? (0xc0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break; | 222 | case ATA_UDMA2: t = timing->udma ? (0xc0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break; |
| 226 | case AMD_UDMA_66: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 2, 10)]) : 0x03; break; | 223 | case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 2, 10)]) : 0x03; break; |
| 227 | case AMD_UDMA_100: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 10)]) : 0x03; break; | 224 | case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 10)]) : 0x03; break; |
| 228 | case AMD_UDMA_133: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 15)]) : 0x03; break; | 225 | case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 15)]) : 0x03; break; |
| 229 | default: return; | 226 | default: return; |
| 230 | } | 227 | } |
| 231 | 228 | ||
| 232 | pci_write_config_byte(dev, AMD_UDMA_TIMING + (3 - dn), t); | 229 | pci_write_config_byte(dev, AMD_UDMA_TIMING + (3 - dn), t); |
| @@ -248,7 +245,7 @@ static int amd_set_drive(ide_drive_t *drive, u8 speed) | |||
| 248 | ide_config_drive_speed(drive, speed); | 245 | ide_config_drive_speed(drive, speed); |
| 249 | 246 | ||
| 250 | T = 1000000000 / amd_clock; | 247 | T = 1000000000 / amd_clock; |
| 251 | UT = T / min_t(int, max_t(int, amd_config->flags & AMD_UDMA, 1), 2); | 248 | UT = (amd_config->udma_mask == ATA_UDMA2) ? T : (T / 2); |
| 252 | 249 | ||
| 253 | ide_timing_compute(drive, speed, &t, T, UT); | 250 | ide_timing_compute(drive, speed, &t, T, UT); |
| 254 | 251 | ||
| @@ -277,29 +274,19 @@ static int amd_set_drive(ide_drive_t *drive, u8 speed) | |||
| 277 | static void amd74xx_tune_drive(ide_drive_t *drive, u8 pio) | 274 | static void amd74xx_tune_drive(ide_drive_t *drive, u8 pio) |
| 278 | { | 275 | { |
| 279 | if (pio == 255) { | 276 | if (pio == 255) { |
| 280 | amd_set_drive(drive, ide_find_best_mode(drive, XFER_PIO | XFER_EPIO)); | 277 | amd_set_drive(drive, ide_find_best_pio_mode(drive)); |
| 281 | return; | 278 | return; |
| 282 | } | 279 | } |
| 283 | 280 | ||
| 284 | amd_set_drive(drive, XFER_PIO_0 + min_t(byte, pio, 5)); | 281 | amd_set_drive(drive, XFER_PIO_0 + min_t(byte, pio, 5)); |
| 285 | } | 282 | } |
| 286 | 283 | ||
| 287 | /* | ||
| 288 | * amd74xx_dmaproc() is a callback from upper layers that can do | ||
| 289 | * a lot, but we use it for DMA/PIO tuning only, delegating everything | ||
| 290 | * else to the default ide_dmaproc(). | ||
| 291 | */ | ||
| 292 | |||
| 293 | static int amd74xx_ide_dma_check(ide_drive_t *drive) | 284 | static int amd74xx_ide_dma_check(ide_drive_t *drive) |
| 294 | { | 285 | { |
| 295 | int w80 = HWIF(drive)->udma_four; | 286 | u8 speed = ide_max_dma_mode(drive); |
| 296 | 287 | ||
| 297 | u8 speed = ide_find_best_mode(drive, | 288 | if (speed == 0) |
| 298 | XFER_PIO | XFER_EPIO | XFER_MWDMA | XFER_UDMA | | 289 | speed = ide_find_best_pio_mode(drive); |
| 299 | ((amd_config->flags & AMD_BAD_SWDMA) ? 0 : XFER_SWDMA) | | ||
| 300 | (w80 && (amd_config->flags & AMD_UDMA) >= AMD_UDMA_66 ? XFER_UDMA_66 : 0) | | ||
| 301 | (w80 && (amd_config->flags & AMD_UDMA) >= AMD_UDMA_100 ? XFER_UDMA_100 : 0) | | ||
| 302 | (w80 && (amd_config->flags & AMD_UDMA) >= AMD_UDMA_133 ? XFER_UDMA_133 : 0)); | ||
| 303 | 290 | ||
| 304 | amd_set_drive(drive, speed); | 291 | amd_set_drive(drive, speed); |
| 305 | 292 | ||
| @@ -334,10 +321,10 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch | |||
| 334 | * Check 80-wire cable presence. | 321 | * Check 80-wire cable presence. |
| 335 | */ | 322 | */ |
| 336 | 323 | ||
| 337 | switch (amd_config->flags & AMD_UDMA) { | 324 | switch (amd_config->udma_mask) { |
| 338 | 325 | ||
| 339 | case AMD_UDMA_133: | 326 | case ATA_UDMA6: |
| 340 | case AMD_UDMA_100: | 327 | case ATA_UDMA5: |
| 341 | pci_read_config_byte(dev, AMD_CABLE_DETECT, &t); | 328 | pci_read_config_byte(dev, AMD_CABLE_DETECT, &t); |
| 342 | pci_read_config_dword(dev, AMD_UDMA_TIMING, &u); | 329 | pci_read_config_dword(dev, AMD_UDMA_TIMING, &u); |
| 343 | amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0); | 330 | amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0); |
| @@ -349,7 +336,7 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch | |||
| 349 | } | 336 | } |
| 350 | break; | 337 | break; |
| 351 | 338 | ||
| 352 | case AMD_UDMA_66: | 339 | case ATA_UDMA4: |
| 353 | /* no host side cable detection */ | 340 | /* no host side cable detection */ |
| 354 | amd_80w = 0x03; | 341 | amd_80w = 0x03; |
| 355 | break; | 342 | break; |
| @@ -370,7 +357,7 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch | |||
| 370 | if ((amd_config->flags & AMD_CHECK_SERENADE) && | 357 | if ((amd_config->flags & AMD_CHECK_SERENADE) && |
| 371 | dev->subsystem_vendor == PCI_VENDOR_ID_AMD && | 358 | dev->subsystem_vendor == PCI_VENDOR_ID_AMD && |
| 372 | dev->subsystem_device == PCI_DEVICE_ID_AMD_SERENADE) | 359 | dev->subsystem_device == PCI_DEVICE_ID_AMD_SERENADE) |
| 373 | amd_config->flags = AMD_UDMA_100; | 360 | amd_config->udma_mask = ATA_UDMA5; |
| 374 | 361 | ||
| 375 | /* | 362 | /* |
| 376 | * Determine the system bus clock. | 363 | * Determine the system bus clock. |
| @@ -395,8 +382,9 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch | |||
| 395 | */ | 382 | */ |
| 396 | 383 | ||
| 397 | pci_read_config_byte(dev, PCI_REVISION_ID, &t); | 384 | pci_read_config_byte(dev, PCI_REVISION_ID, &t); |
| 398 | printk(KERN_INFO "%s: %s (rev %02x) %s controller\n", | 385 | printk(KERN_INFO "%s: %s (rev %02x) UDMA%s controller\n", |
| 399 | amd_chipset->name, pci_name(dev), t, amd_dma[amd_config->flags & AMD_UDMA]); | 386 | amd_chipset->name, pci_name(dev), t, |
| 387 | amd_dma[fls(amd_config->udma_mask) - 1]); | ||
| 400 | 388 | ||
| 401 | /* | 389 | /* |
| 402 | * Register /proc/ide/amd74xx entry | 390 | * Register /proc/ide/amd74xx entry |
| @@ -437,12 +425,19 @@ static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif) | |||
| 437 | return; | 425 | return; |
| 438 | 426 | ||
| 439 | hwif->atapi_dma = 1; | 427 | hwif->atapi_dma = 1; |
| 440 | hwif->ultra_mask = 0x7f; | ||
| 441 | hwif->mwdma_mask = 0x07; | ||
| 442 | hwif->swdma_mask = 0x07; | ||
| 443 | 428 | ||
| 444 | if (!hwif->udma_four) | 429 | hwif->ultra_mask = amd_config->udma_mask; |
| 445 | hwif->udma_four = (amd_80w >> hwif->channel) & 1; | 430 | hwif->mwdma_mask = 0x07; |
| 431 | if ((amd_config->flags & AMD_BAD_SWDMA) == 0) | ||
| 432 | hwif->swdma_mask = 0x07; | ||
| 433 | |||
| 434 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) { | ||
| 435 | if ((amd_80w >> hwif->channel) & 1) | ||
| 436 | hwif->cbl = ATA_CBL_PATA80; | ||
| 437 | else | ||
| 438 | hwif->cbl = ATA_CBL_PATA40; | ||
| 439 | } | ||
| 440 | |||
| 446 | hwif->ide_dma_check = &amd74xx_ide_dma_check; | 441 | hwif->ide_dma_check = &amd74xx_ide_dma_check; |
| 447 | if (!noautodma) | 442 | if (!noautodma) |
| 448 | hwif->autodma = 1; | 443 | hwif->autodma = 1; |
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c index 8ab33faf6f76..2761510309b3 100644 --- a/drivers/ide/pci/atiixp.c +++ b/drivers/ide/pci/atiixp.c | |||
| @@ -264,10 +264,11 @@ static void __devinit init_hwif_atiixp(ide_hwif_t *hwif) | |||
| 264 | hwif->swdma_mask = 0x04; | 264 | hwif->swdma_mask = 0x04; |
| 265 | 265 | ||
| 266 | pci_read_config_byte(pdev, ATIIXP_IDE_UDMA_MODE + ch, &udma_mode); | 266 | pci_read_config_byte(pdev, ATIIXP_IDE_UDMA_MODE + ch, &udma_mode); |
| 267 | |||
| 267 | if ((udma_mode & 0x07) >= 0x04 || (udma_mode & 0x70) >= 0x40) | 268 | if ((udma_mode & 0x07) >= 0x04 || (udma_mode & 0x70) >= 0x40) |
| 268 | hwif->udma_four = 1; | 269 | hwif->cbl = ATA_CBL_PATA80; |
| 269 | else | 270 | else |
| 270 | hwif->udma_four = 0; | 271 | hwif->cbl = ATA_CBL_PATA40; |
| 271 | 272 | ||
| 272 | hwif->dma_host_on = &atiixp_dma_host_on; | 273 | hwif->dma_host_on = &atiixp_dma_host_on; |
| 273 | hwif->dma_host_off = &atiixp_dma_host_off; | 274 | hwif->dma_host_off = &atiixp_dma_host_off; |
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c index 7c57dc696f52..8631b6c8aa15 100644 --- a/drivers/ide/pci/cmd64x.c +++ b/drivers/ide/pci/cmd64x.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/cmd64x.c Version 1.47 Mar 19, 2007 | 2 | * linux/drivers/ide/pci/cmd64x.c Version 1.50 May 10, 2007 |
| 3 | * | 3 | * |
| 4 | * cmd64x.c: Enable interrupts at initialization time on Ultra/PCI machines. | 4 | * cmd64x.c: Enable interrupts at initialization time on Ultra/PCI machines. |
| 5 | * Due to massive hardware bugs, UltraDMA is only supported | 5 | * Due to massive hardware bugs, UltraDMA is only supported |
| @@ -52,9 +52,6 @@ | |||
| 52 | #define ARTTIM23_DIS_RA2 0x04 | 52 | #define ARTTIM23_DIS_RA2 0x04 |
| 53 | #define ARTTIM23_DIS_RA3 0x08 | 53 | #define ARTTIM23_DIS_RA3 0x08 |
| 54 | #define ARTTIM23_INTR_CH1 0x10 | 54 | #define ARTTIM23_INTR_CH1 0x10 |
| 55 | #define ARTTIM2 0x57 | ||
| 56 | #define ARTTIM3 0x57 | ||
| 57 | #define DRWTIM23 0x58 | ||
| 58 | #define DRWTIM2 0x58 | 55 | #define DRWTIM2 0x58 |
| 59 | #define BRST 0x59 | 56 | #define BRST 0x59 |
| 60 | #define DRWTIM3 0x5b | 57 | #define DRWTIM3 0x5b |
| @@ -469,71 +466,43 @@ static int cmd646_1_ide_dma_end (ide_drive_t *drive) | |||
| 469 | 466 | ||
| 470 | static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const char *name) | 467 | static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const char *name) |
| 471 | { | 468 | { |
| 472 | u32 class_rev = 0; | ||
| 473 | u8 mrdmode = 0; | 469 | u8 mrdmode = 0; |
| 474 | 470 | ||
| 475 | pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); | 471 | if (dev->device == PCI_DEVICE_ID_CMD_646) { |
| 476 | class_rev &= 0xff; | 472 | u8 rev = 0; |
| 477 | 473 | ||
| 478 | switch(dev->device) { | 474 | pci_read_config_byte(dev, PCI_REVISION_ID, &rev); |
| 479 | case PCI_DEVICE_ID_CMD_643: | 475 | |
| 480 | break; | 476 | switch (rev) { |
| 481 | case PCI_DEVICE_ID_CMD_646: | 477 | case 0x07: |
| 482 | printk(KERN_INFO "%s: chipset revision 0x%02X, ", name, class_rev); | 478 | case 0x05: |
| 483 | switch(class_rev) { | 479 | printk("%s: UltraDMA capable", name); |
| 484 | case 0x07: | ||
| 485 | case 0x05: | ||
| 486 | printk("UltraDMA Capable"); | ||
| 487 | break; | ||
| 488 | case 0x03: | ||
| 489 | printk("MultiWord DMA Force Limited"); | ||
| 490 | break; | ||
| 491 | case 0x01: | ||
| 492 | default: | ||
| 493 | printk("MultiWord DMA Limited, IRQ workaround enabled"); | ||
| 494 | break; | ||
| 495 | } | ||
| 496 | printk("\n"); | ||
| 497 | break; | ||
| 498 | case PCI_DEVICE_ID_CMD_648: | ||
| 499 | case PCI_DEVICE_ID_CMD_649: | ||
| 500 | break; | 480 | break; |
| 481 | case 0x03: | ||
| 501 | default: | 482 | default: |
| 483 | printk("%s: MultiWord DMA force limited", name); | ||
| 484 | break; | ||
| 485 | case 0x01: | ||
| 486 | printk("%s: MultiWord DMA limited, " | ||
| 487 | "IRQ workaround enabled\n", name); | ||
| 502 | break; | 488 | break; |
| 489 | } | ||
| 503 | } | 490 | } |
| 504 | 491 | ||
| 505 | /* Set a good latency timer and cache line size value. */ | 492 | /* Set a good latency timer and cache line size value. */ |
| 506 | (void) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64); | 493 | (void) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64); |
| 507 | /* FIXME: pci_set_master() to ensure a good latency timer value */ | 494 | /* FIXME: pci_set_master() to ensure a good latency timer value */ |
| 508 | 495 | ||
| 509 | /* Setup interrupts. */ | 496 | /* |
| 510 | (void) pci_read_config_byte(dev, MRDMODE, &mrdmode); | 497 | * Enable interrupts, select MEMORY READ LINE for reads. |
| 511 | mrdmode &= ~(0x30); | 498 | * |
| 512 | (void) pci_write_config_byte(dev, MRDMODE, mrdmode); | 499 | * NOTE: although not mentioned in the PCI0646U specs, |
| 513 | 500 | * bits 0-1 are write only and won't be read back as | |
| 514 | /* Use MEMORY READ LINE for reads. | 501 | * set or not -- PCI0646U2 specs clarify this point. |
| 515 | * NOTE: Although not mentioned in the PCI0646U specs, | ||
| 516 | * these bits are write only and won't be read | ||
| 517 | * back as set or not. The PCI0646U2 specs clarify | ||
| 518 | * this point. | ||
| 519 | */ | 502 | */ |
| 520 | (void) pci_write_config_byte(dev, MRDMODE, mrdmode | 0x02); | 503 | (void) pci_read_config_byte (dev, MRDMODE, &mrdmode); |
| 521 | 504 | mrdmode &= ~0x30; | |
| 522 | /* Set reasonable active/recovery/address-setup values. */ | 505 | (void) pci_write_config_byte(dev, MRDMODE, (mrdmode | 0x02)); |
| 523 | (void) pci_write_config_byte(dev, ARTTIM0, 0x40); | ||
| 524 | (void) pci_write_config_byte(dev, DRWTIM0, 0x3f); | ||
| 525 | (void) pci_write_config_byte(dev, ARTTIM1, 0x40); | ||
| 526 | (void) pci_write_config_byte(dev, DRWTIM1, 0x3f); | ||
| 527 | #ifdef __i386__ | ||
| 528 | (void) pci_write_config_byte(dev, ARTTIM23, 0x1c); | ||
| 529 | #else | ||
| 530 | (void) pci_write_config_byte(dev, ARTTIM23, 0x5c); | ||
| 531 | #endif | ||
| 532 | (void) pci_write_config_byte(dev, DRWTIM23, 0x3f); | ||
| 533 | (void) pci_write_config_byte(dev, DRWTIM3, 0x3f); | ||
| 534 | #ifdef CONFIG_PPC | ||
| 535 | (void) pci_write_config_byte(dev, UDIDETCR0, 0xf0); | ||
| 536 | #endif /* CONFIG_PPC */ | ||
| 537 | 506 | ||
| 538 | #if defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_IDE_PROC_FS) | 507 | #if defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_IDE_PROC_FS) |
| 539 | 508 | ||
| @@ -548,29 +517,27 @@ static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const cha | |||
| 548 | return 0; | 517 | return 0; |
| 549 | } | 518 | } |
| 550 | 519 | ||
| 551 | static unsigned int __devinit ata66_cmd64x(ide_hwif_t *hwif) | 520 | static u8 __devinit ata66_cmd64x(ide_hwif_t *hwif) |
| 552 | { | 521 | { |
| 553 | u8 ata66 = 0, mask = (hwif->channel) ? 0x02 : 0x01; | 522 | struct pci_dev *dev = hwif->pci_dev; |
| 523 | u8 bmidecsr = 0, mask = hwif->channel ? 0x02 : 0x01; | ||
| 554 | 524 | ||
| 555 | switch(hwif->pci_dev->device) { | 525 | switch (dev->device) { |
| 556 | case PCI_DEVICE_ID_CMD_643: | 526 | case PCI_DEVICE_ID_CMD_648: |
| 557 | case PCI_DEVICE_ID_CMD_646: | 527 | case PCI_DEVICE_ID_CMD_649: |
| 558 | return ata66; | 528 | pci_read_config_byte(dev, BMIDECSR, &bmidecsr); |
| 559 | default: | 529 | return (bmidecsr & mask) ? ATA_CBL_PATA80 : ATA_CBL_PATA40; |
| 560 | break; | 530 | default: |
| 531 | return ATA_CBL_PATA40; | ||
| 561 | } | 532 | } |
| 562 | pci_read_config_byte(hwif->pci_dev, BMIDECSR, &ata66); | ||
| 563 | return (ata66 & mask) ? 1 : 0; | ||
| 564 | } | 533 | } |
| 565 | 534 | ||
| 566 | static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif) | 535 | static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif) |
| 567 | { | 536 | { |
| 568 | struct pci_dev *dev = hwif->pci_dev; | 537 | struct pci_dev *dev = hwif->pci_dev; |
| 569 | unsigned int class_rev; | 538 | u8 rev = 0; |
| 570 | 539 | ||
| 571 | hwif->autodma = 0; | 540 | pci_read_config_byte(dev, PCI_REVISION_ID, &rev); |
| 572 | pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); | ||
| 573 | class_rev &= 0xff; | ||
| 574 | 541 | ||
| 575 | hwif->tuneproc = &cmd64x_tune_drive; | 542 | hwif->tuneproc = &cmd64x_tune_drive; |
| 576 | hwif->speedproc = &cmd64x_tune_chipset; | 543 | hwif->speedproc = &cmd64x_tune_chipset; |
| @@ -580,8 +547,8 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif) | |||
| 580 | if (!hwif->dma_base) | 547 | if (!hwif->dma_base) |
| 581 | return; | 548 | return; |
| 582 | 549 | ||
| 583 | hwif->atapi_dma = 1; | 550 | hwif->atapi_dma = 1; |
| 584 | 551 | hwif->mwdma_mask = 0x07; | |
| 585 | hwif->ultra_mask = hwif->cds->udma_mask; | 552 | hwif->ultra_mask = hwif->cds->udma_mask; |
| 586 | 553 | ||
| 587 | /* | 554 | /* |
| @@ -596,16 +563,15 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif) | |||
| 596 | * | 563 | * |
| 597 | * So we only do UltraDMA on revision 0x05 and 0x07 chipsets. | 564 | * So we only do UltraDMA on revision 0x05 and 0x07 chipsets. |
| 598 | */ | 565 | */ |
| 599 | if (dev->device == PCI_DEVICE_ID_CMD_646 && class_rev < 5) | 566 | if (dev->device == PCI_DEVICE_ID_CMD_646 && rev < 5) |
| 600 | hwif->ultra_mask = 0x00; | 567 | hwif->ultra_mask = 0x00; |
| 601 | 568 | ||
| 602 | hwif->mwdma_mask = 0x07; | ||
| 603 | |||
| 604 | hwif->ide_dma_check = &cmd64x_config_drive_for_dma; | 569 | hwif->ide_dma_check = &cmd64x_config_drive_for_dma; |
| 605 | if (!(hwif->udma_four)) | ||
| 606 | hwif->udma_four = ata66_cmd64x(hwif); | ||
| 607 | 570 | ||
| 608 | switch(dev->device) { | 571 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 572 | hwif->cbl = ata66_cmd64x(hwif); | ||
| 573 | |||
| 574 | switch (dev->device) { | ||
| 609 | case PCI_DEVICE_ID_CMD_648: | 575 | case PCI_DEVICE_ID_CMD_648: |
| 610 | case PCI_DEVICE_ID_CMD_649: | 576 | case PCI_DEVICE_ID_CMD_649: |
| 611 | alt_irq_bits: | 577 | alt_irq_bits: |
| @@ -614,10 +580,10 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif) | |||
| 614 | break; | 580 | break; |
| 615 | case PCI_DEVICE_ID_CMD_646: | 581 | case PCI_DEVICE_ID_CMD_646: |
| 616 | hwif->chipset = ide_cmd646; | 582 | hwif->chipset = ide_cmd646; |
| 617 | if (class_rev == 0x01) { | 583 | if (rev == 0x01) { |
| 618 | hwif->ide_dma_end = &cmd646_1_ide_dma_end; | 584 | hwif->ide_dma_end = &cmd646_1_ide_dma_end; |
| 619 | break; | 585 | break; |
| 620 | } else if (class_rev >= 0x03) | 586 | } else if (rev >= 0x03) |
| 621 | goto alt_irq_bits; | 587 | goto alt_irq_bits; |
| 622 | /* fall thru */ | 588 | /* fall thru */ |
| 623 | default: | 589 | default: |
| @@ -626,11 +592,9 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif) | |||
| 626 | break; | 592 | break; |
| 627 | } | 593 | } |
| 628 | 594 | ||
| 629 | |||
| 630 | if (!noautodma) | 595 | if (!noautodma) |
| 631 | hwif->autodma = 1; | 596 | hwif->autodma = 1; |
| 632 | hwif->drives[0].autodma = hwif->autodma; | 597 | hwif->drives[0].autodma = hwif->drives[1].autodma = hwif->autodma; |
| 633 | hwif->drives[1].autodma = hwif->autodma; | ||
| 634 | } | 598 | } |
| 635 | 599 | ||
| 636 | static int __devinit init_setup_cmd64x(struct pci_dev *dev, ide_pci_device_t *d) | 600 | static int __devinit init_setup_cmd64x(struct pci_dev *dev, ide_pci_device_t *d) |
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c index 41925c47ef05..10f61f38243c 100644 --- a/drivers/ide/pci/cs5535.c +++ b/drivers/ide/pci/cs5535.c | |||
| @@ -187,7 +187,8 @@ static u8 __devinit cs5535_cable_detect(struct pci_dev *dev) | |||
| 187 | 187 | ||
| 188 | /* if a 80 wire cable was detected */ | 188 | /* if a 80 wire cable was detected */ |
| 189 | pci_read_config_byte(dev, CS5535_CABLE_DETECT, &bit); | 189 | pci_read_config_byte(dev, CS5535_CABLE_DETECT, &bit); |
| 190 | return (bit & 1); | 190 | |
| 191 | return (bit & 1) ? ATA_CBL_PATA80 : ATA_CBL_PATA40; | ||
| 191 | } | 192 | } |
| 192 | 193 | ||
| 193 | /**** | 194 | /**** |
| @@ -212,8 +213,7 @@ static void __devinit init_hwif_cs5535(ide_hwif_t *hwif) | |||
| 212 | hwif->ultra_mask = 0x1F; | 213 | hwif->ultra_mask = 0x1F; |
| 213 | hwif->mwdma_mask = 0x07; | 214 | hwif->mwdma_mask = 0x07; |
| 214 | 215 | ||
| 215 | 216 | hwif->cbl = cs5535_cable_detect(hwif->pci_dev); | |
| 216 | hwif->udma_four = cs5535_cable_detect(hwif->pci_dev); | ||
| 217 | 217 | ||
| 218 | if (!noautodma) | 218 | if (!noautodma) |
| 219 | hwif->autodma = 1; | 219 | hwif->autodma = 1; |
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c index c33d0b0f11c9..4b6bae8eee82 100644 --- a/drivers/ide/pci/hpt366.c +++ b/drivers/ide/pci/hpt366.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/hpt366.c Version 1.06 Jun 27, 2007 | 2 | * linux/drivers/ide/pci/hpt366.c Version 1.10 Jun 29, 2007 |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1999-2003 Andre Hedrick <andre@linux-ide.org> | 4 | * Copyright (C) 1999-2003 Andre Hedrick <andre@linux-ide.org> |
| 5 | * Portions Copyright (C) 2001 Sun Microsystems, Inc. | 5 | * Portions Copyright (C) 2001 Sun Microsystems, Inc. |
| @@ -77,7 +77,7 @@ | |||
| 77 | * since they may tamper with its fields | 77 | * since they may tamper with its fields |
| 78 | * - prefix the driver startup messages with the real chip name | 78 | * - prefix the driver startup messages with the real chip name |
| 79 | * - claim the extra 240 bytes of I/O space for all chips | 79 | * - claim the extra 240 bytes of I/O space for all chips |
| 80 | * - optimize the rate masking/filtering and the drive list lookup code | 80 | * - optimize the UltraDMA filtering and the drive list lookup code |
| 81 | * - use pci_get_slot() to get to the function 1 of HPT36x/374 | 81 | * - use pci_get_slot() to get to the function 1 of HPT36x/374 |
| 82 | * - cache offset of the channel's misc. control registers (MCRs) being used | 82 | * - cache offset of the channel's misc. control registers (MCRs) being used |
| 83 | * throughout the driver | 83 | * throughout the driver |
| @@ -99,9 +99,9 @@ | |||
| 99 | * stop duplicating it for each channel by storing the pointer in the pci_dev | 99 | * stop duplicating it for each channel by storing the pointer in the pci_dev |
| 100 | * structure: first, at the init_setup stage, point it to a static "template" | 100 | * structure: first, at the init_setup stage, point it to a static "template" |
| 101 | * with only the chip type and its specific base DPLL frequency, the highest | 101 | * with only the chip type and its specific base DPLL frequency, the highest |
| 102 | * supported DMA mode, and the chip settings table pointer filled, then, at | 102 | * UltraDMA mode, and the chip settings table pointer filled, then, at the |
| 103 | * the init_chipset stage, allocate per-chip instance and fill it with the | 103 | * init_chipset stage, allocate per-chip instance and fill it with the rest |
| 104 | * rest of the necessary information | 104 | * of the necessary information |
| 105 | * - get rid of the constant thresholds in the HPT37x PCI clock detection code, | 105 | * - get rid of the constant thresholds in the HPT37x PCI clock detection code, |
| 106 | * switch to calculating PCI clock frequency based on the chip's base DPLL | 106 | * switch to calculating PCI clock frequency based on the chip's base DPLL |
| 107 | * frequency | 107 | * frequency |
| @@ -112,6 +112,7 @@ | |||
| 112 | * also fixing the interchanged 25/40 MHz PCI clock cases for HPT36x chips; | 112 | * also fixing the interchanged 25/40 MHz PCI clock cases for HPT36x chips; |
| 113 | * unify HPT36x/37x timing setup code and the speedproc handlers by joining | 113 | * unify HPT36x/37x timing setup code and the speedproc handlers by joining |
| 114 | * the register setting lists into the table indexed by the clock selected | 114 | * the register setting lists into the table indexed by the clock selected |
| 115 | * - set the correct hwif->ultra_mask for each individual chip | ||
| 115 | * Sergei Shtylyov, <sshtylyov@ru.mvista.com> or <source@mvista.com> | 116 | * Sergei Shtylyov, <sshtylyov@ru.mvista.com> or <source@mvista.com> |
| 116 | */ | 117 | */ |
| 117 | 118 | ||
| @@ -391,7 +392,7 @@ enum ata_clock { | |||
| 391 | 392 | ||
| 392 | struct hpt_info { | 393 | struct hpt_info { |
| 393 | u8 chip_type; /* Chip type */ | 394 | u8 chip_type; /* Chip type */ |
| 394 | u8 max_mode; /* Speeds allowed */ | 395 | u8 max_ultra; /* Max. UltraDMA mode allowed */ |
| 395 | u8 dpll_clk; /* DPLL clock in MHz */ | 396 | u8 dpll_clk; /* DPLL clock in MHz */ |
| 396 | u8 pci_clk; /* PCI clock in MHz */ | 397 | u8 pci_clk; /* PCI clock in MHz */ |
| 397 | u32 **settings; /* Chipset settings table */ | 398 | u32 **settings; /* Chipset settings table */ |
| @@ -430,77 +431,77 @@ static u32 *hpt37x_settings[NUM_ATA_CLOCKS] = { | |||
| 430 | 431 | ||
| 431 | static struct hpt_info hpt36x __devinitdata = { | 432 | static struct hpt_info hpt36x __devinitdata = { |
| 432 | .chip_type = HPT36x, | 433 | .chip_type = HPT36x, |
| 433 | .max_mode = (HPT366_ALLOW_ATA66_4 || HPT366_ALLOW_ATA66_3) ? 2 : 1, | 434 | .max_ultra = HPT366_ALLOW_ATA66_3 ? (HPT366_ALLOW_ATA66_4 ? 4 : 3) : 2, |
| 434 | .dpll_clk = 0, /* no DPLL */ | 435 | .dpll_clk = 0, /* no DPLL */ |
| 435 | .settings = hpt36x_settings | 436 | .settings = hpt36x_settings |
| 436 | }; | 437 | }; |
| 437 | 438 | ||
| 438 | static struct hpt_info hpt370 __devinitdata = { | 439 | static struct hpt_info hpt370 __devinitdata = { |
| 439 | .chip_type = HPT370, | 440 | .chip_type = HPT370, |
| 440 | .max_mode = HPT370_ALLOW_ATA100_5 ? 3 : 2, | 441 | .max_ultra = HPT370_ALLOW_ATA100_5 ? 5 : 4, |
| 441 | .dpll_clk = 48, | 442 | .dpll_clk = 48, |
| 442 | .settings = hpt37x_settings | 443 | .settings = hpt37x_settings |
| 443 | }; | 444 | }; |
| 444 | 445 | ||
| 445 | static struct hpt_info hpt370a __devinitdata = { | 446 | static struct hpt_info hpt370a __devinitdata = { |
| 446 | .chip_type = HPT370A, | 447 | .chip_type = HPT370A, |
| 447 | .max_mode = HPT370_ALLOW_ATA100_5 ? 3 : 2, | 448 | .max_ultra = HPT370_ALLOW_ATA100_5 ? 5 : 4, |
| 448 | .dpll_clk = 48, | 449 | .dpll_clk = 48, |
| 449 | .settings = hpt37x_settings | 450 | .settings = hpt37x_settings |
| 450 | }; | 451 | }; |
| 451 | 452 | ||
| 452 | static struct hpt_info hpt374 __devinitdata = { | 453 | static struct hpt_info hpt374 __devinitdata = { |
| 453 | .chip_type = HPT374, | 454 | .chip_type = HPT374, |
| 454 | .max_mode = 3, | 455 | .max_ultra = 5, |
| 455 | .dpll_clk = 48, | 456 | .dpll_clk = 48, |
| 456 | .settings = hpt37x_settings | 457 | .settings = hpt37x_settings |
| 457 | }; | 458 | }; |
| 458 | 459 | ||
| 459 | static struct hpt_info hpt372 __devinitdata = { | 460 | static struct hpt_info hpt372 __devinitdata = { |
| 460 | .chip_type = HPT372, | 461 | .chip_type = HPT372, |
| 461 | .max_mode = HPT372_ALLOW_ATA133_6 ? 4 : 3, | 462 | .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5, |
| 462 | .dpll_clk = 55, | 463 | .dpll_clk = 55, |
| 463 | .settings = hpt37x_settings | 464 | .settings = hpt37x_settings |
| 464 | }; | 465 | }; |
| 465 | 466 | ||
| 466 | static struct hpt_info hpt372a __devinitdata = { | 467 | static struct hpt_info hpt372a __devinitdata = { |
| 467 | .chip_type = HPT372A, | 468 | .chip_type = HPT372A, |
| 468 | .max_mode = HPT372_ALLOW_ATA133_6 ? 4 : 3, | 469 | .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5, |
| 469 | .dpll_clk = 66, | 470 | .dpll_clk = 66, |
| 470 | .settings = hpt37x_settings | 471 | .settings = hpt37x_settings |
| 471 | }; | 472 | }; |
| 472 | 473 | ||
| 473 | static struct hpt_info hpt302 __devinitdata = { | 474 | static struct hpt_info hpt302 __devinitdata = { |
| 474 | .chip_type = HPT302, | 475 | .chip_type = HPT302, |
| 475 | .max_mode = HPT302_ALLOW_ATA133_6 ? 4 : 3, | 476 | .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5, |
| 476 | .dpll_clk = 66, | 477 | .dpll_clk = 66, |
| 477 | .settings = hpt37x_settings | 478 | .settings = hpt37x_settings |
| 478 | }; | 479 | }; |
| 479 | 480 | ||
| 480 | static struct hpt_info hpt371 __devinitdata = { | 481 | static struct hpt_info hpt371 __devinitdata = { |
| 481 | .chip_type = HPT371, | 482 | .chip_type = HPT371, |
| 482 | .max_mode = HPT371_ALLOW_ATA133_6 ? 4 : 3, | 483 | .max_ultra = HPT371_ALLOW_ATA133_6 ? 6 : 5, |
| 483 | .dpll_clk = 66, | 484 | .dpll_clk = 66, |
| 484 | .settings = hpt37x_settings | 485 | .settings = hpt37x_settings |
| 485 | }; | 486 | }; |
| 486 | 487 | ||
| 487 | static struct hpt_info hpt372n __devinitdata = { | 488 | static struct hpt_info hpt372n __devinitdata = { |
| 488 | .chip_type = HPT372N, | 489 | .chip_type = HPT372N, |
| 489 | .max_mode = HPT372_ALLOW_ATA133_6 ? 4 : 3, | 490 | .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5, |
| 490 | .dpll_clk = 77, | 491 | .dpll_clk = 77, |
| 491 | .settings = hpt37x_settings | 492 | .settings = hpt37x_settings |
| 492 | }; | 493 | }; |
| 493 | 494 | ||
| 494 | static struct hpt_info hpt302n __devinitdata = { | 495 | static struct hpt_info hpt302n __devinitdata = { |
| 495 | .chip_type = HPT302N, | 496 | .chip_type = HPT302N, |
| 496 | .max_mode = HPT302_ALLOW_ATA133_6 ? 4 : 3, | 497 | .max_ultra = HPT302_ALLOW_ATA133_6 ? 6 : 5, |
| 497 | .dpll_clk = 77, | 498 | .dpll_clk = 77, |
| 498 | .settings = hpt37x_settings | 499 | .settings = hpt37x_settings |
| 499 | }; | 500 | }; |
| 500 | 501 | ||
| 501 | static struct hpt_info hpt371n __devinitdata = { | 502 | static struct hpt_info hpt371n __devinitdata = { |
| 502 | .chip_type = HPT371N, | 503 | .chip_type = HPT371N, |
| 503 | .max_mode = HPT371_ALLOW_ATA133_6 ? 4 : 3, | 504 | .max_ultra = HPT371_ALLOW_ATA133_6 ? 6 : 5, |
| 504 | .dpll_clk = 77, | 505 | .dpll_clk = 77, |
| 505 | .settings = hpt37x_settings | 506 | .settings = hpt37x_settings |
| 506 | }; | 507 | }; |
| @@ -523,53 +524,38 @@ static int check_in_drive_list(ide_drive_t *drive, const char **list) | |||
| 523 | static u8 hpt3xx_udma_filter(ide_drive_t *drive) | 524 | static u8 hpt3xx_udma_filter(ide_drive_t *drive) |
| 524 | { | 525 | { |
| 525 | struct hpt_info *info = pci_get_drvdata(HWIF(drive)->pci_dev); | 526 | struct hpt_info *info = pci_get_drvdata(HWIF(drive)->pci_dev); |
| 526 | u8 chip_type = info->chip_type; | ||
| 527 | u8 mode = info->max_mode; | ||
| 528 | u8 mask; | 527 | u8 mask; |
| 529 | 528 | ||
| 530 | switch (mode) { | 529 | switch (info->chip_type) { |
| 531 | case 0x04: | 530 | case HPT370A: |
| 532 | mask = 0x7f; | 531 | if (!HPT370_ALLOW_ATA100_5 || |
| 533 | break; | 532 | check_in_drive_list(drive, bad_ata100_5)) |
| 534 | case 0x03: | 533 | return 0x1f; |
| 534 | else | ||
| 535 | return 0x3f; | ||
| 536 | case HPT370: | ||
| 537 | if (!HPT370_ALLOW_ATA100_5 || | ||
| 538 | check_in_drive_list(drive, bad_ata100_5)) | ||
| 539 | mask = 0x1f; | ||
| 540 | else | ||
| 535 | mask = 0x3f; | 541 | mask = 0x3f; |
| 536 | if (chip_type >= HPT374) | 542 | break; |
| 537 | break; | 543 | case HPT36x: |
| 538 | if (!check_in_drive_list(drive, bad_ata100_5)) | 544 | if (!HPT366_ALLOW_ATA66_4 || |
| 539 | goto check_bad_ata33; | 545 | check_in_drive_list(drive, bad_ata66_4)) |
| 540 | /* fall thru */ | 546 | mask = 0x0f; |
| 541 | case 0x02: | 547 | else |
| 542 | mask = 0x1f; | 548 | mask = 0x1f; |
| 543 | 549 | ||
| 544 | /* | 550 | if (!HPT366_ALLOW_ATA66_3 || |
| 545 | * CHECK ME, Does this need to be changed to HPT374 ?? | 551 | check_in_drive_list(drive, bad_ata66_3)) |
| 546 | */ | ||
| 547 | if (chip_type >= HPT370) | ||
| 548 | goto check_bad_ata33; | ||
| 549 | if (HPT366_ALLOW_ATA66_4 && | ||
| 550 | !check_in_drive_list(drive, bad_ata66_4)) | ||
| 551 | goto check_bad_ata33; | ||
| 552 | |||
| 553 | mask = 0x0f; | ||
| 554 | if (HPT366_ALLOW_ATA66_3 && | ||
| 555 | !check_in_drive_list(drive, bad_ata66_3)) | ||
| 556 | goto check_bad_ata33; | ||
| 557 | /* fall thru */ | ||
| 558 | case 0x01: | ||
| 559 | mask = 0x07; | 552 | mask = 0x07; |
| 560 | 553 | break; | |
| 561 | check_bad_ata33: | 554 | default: |
| 562 | if (chip_type >= HPT370A) | 555 | return 0x7f; |
| 563 | break; | ||
| 564 | if (!check_in_drive_list(drive, bad_ata33)) | ||
| 565 | break; | ||
| 566 | /* fall thru */ | ||
| 567 | case 0x00: | ||
| 568 | default: | ||
| 569 | mask = 0x00; | ||
| 570 | break; | ||
| 571 | } | 556 | } |
| 572 | return mask; | 557 | |
| 558 | return check_in_drive_list(drive, bad_ata33) ? 0x00 : mask; | ||
| 573 | } | 559 | } |
| 574 | 560 | ||
| 575 | static u32 get_speed_setting(u8 speed, struct hpt_info *info) | 561 | static u32 get_speed_setting(u8 speed, struct hpt_info *info) |
| @@ -737,7 +723,7 @@ static int hpt366_config_drive_xfer_rate(ide_drive_t *drive) | |||
| 737 | * This is specific to the HPT366 UDMA chipset | 723 | * This is specific to the HPT366 UDMA chipset |
| 738 | * by HighPoint|Triones Technologies, Inc. | 724 | * by HighPoint|Triones Technologies, Inc. |
| 739 | */ | 725 | */ |
| 740 | static int hpt366_ide_dma_lostirq(ide_drive_t *drive) | 726 | static void hpt366_dma_lost_irq(ide_drive_t *drive) |
| 741 | { | 727 | { |
| 742 | struct pci_dev *dev = HWIF(drive)->pci_dev; | 728 | struct pci_dev *dev = HWIF(drive)->pci_dev; |
| 743 | u8 mcr1 = 0, mcr3 = 0, scr1 = 0; | 729 | u8 mcr1 = 0, mcr3 = 0, scr1 = 0; |
| @@ -749,7 +735,7 @@ static int hpt366_ide_dma_lostirq(ide_drive_t *drive) | |||
| 749 | drive->name, __FUNCTION__, mcr1, mcr3, scr1); | 735 | drive->name, __FUNCTION__, mcr1, mcr3, scr1); |
| 750 | if (scr1 & 0x10) | 736 | if (scr1 & 0x10) |
| 751 | pci_write_config_byte(dev, 0x5a, scr1 & ~0x10); | 737 | pci_write_config_byte(dev, 0x5a, scr1 & ~0x10); |
| 752 | return __ide_dma_lostirq(drive); | 738 | ide_dma_lost_irq(drive); |
| 753 | } | 739 | } |
| 754 | 740 | ||
| 755 | static void hpt370_clear_engine(ide_drive_t *drive) | 741 | static void hpt370_clear_engine(ide_drive_t *drive) |
| @@ -799,10 +785,10 @@ static int hpt370_ide_dma_end(ide_drive_t *drive) | |||
| 799 | return __ide_dma_end(drive); | 785 | return __ide_dma_end(drive); |
| 800 | } | 786 | } |
| 801 | 787 | ||
| 802 | static int hpt370_ide_dma_timeout(ide_drive_t *drive) | 788 | static void hpt370_dma_timeout(ide_drive_t *drive) |
| 803 | { | 789 | { |
| 804 | hpt370_irq_timeout(drive); | 790 | hpt370_irq_timeout(drive); |
| 805 | return __ide_dma_timeout(drive); | 791 | ide_dma_timeout(drive); |
| 806 | } | 792 | } |
| 807 | 793 | ||
| 808 | /* returns 1 if DMA IRQ issued, 0 otherwise */ | 794 | /* returns 1 if DMA IRQ issued, 0 otherwise */ |
| @@ -1150,7 +1136,7 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha | |||
| 1150 | * Select 66 MHz DPLL clock only if UltraATA/133 mode is | 1136 | * Select 66 MHz DPLL clock only if UltraATA/133 mode is |
| 1151 | * supported/enabled, use 50 MHz DPLL clock otherwise... | 1137 | * supported/enabled, use 50 MHz DPLL clock otherwise... |
| 1152 | */ | 1138 | */ |
| 1153 | if (info->max_mode == 0x04) { | 1139 | if (info->max_ultra == 6) { |
| 1154 | dpll_clk = 66; | 1140 | dpll_clk = 66; |
| 1155 | clock = ATA_CLOCK_66MHZ; | 1141 | clock = ATA_CLOCK_66MHZ; |
| 1156 | } else if (dpll_clk) { /* HPT36x chips don't have DPLL */ | 1142 | } else if (dpll_clk) { /* HPT36x chips don't have DPLL */ |
| @@ -1243,7 +1229,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) | |||
| 1243 | struct pci_dev *dev = hwif->pci_dev; | 1229 | struct pci_dev *dev = hwif->pci_dev; |
| 1244 | struct hpt_info *info = pci_get_drvdata(dev); | 1230 | struct hpt_info *info = pci_get_drvdata(dev); |
| 1245 | int serialize = HPT_SERIALIZE_IO; | 1231 | int serialize = HPT_SERIALIZE_IO; |
| 1246 | u8 scr1 = 0, ata66 = (hwif->channel) ? 0x01 : 0x02; | 1232 | u8 scr1 = 0, ata66 = hwif->channel ? 0x01 : 0x02; |
| 1247 | u8 chip_type = info->chip_type; | 1233 | u8 chip_type = info->chip_type; |
| 1248 | u8 new_mcr, old_mcr = 0; | 1234 | u8 new_mcr, old_mcr = 0; |
| 1249 | 1235 | ||
| @@ -1256,7 +1242,9 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) | |||
| 1256 | hwif->intrproc = &hpt3xx_intrproc; | 1242 | hwif->intrproc = &hpt3xx_intrproc; |
| 1257 | hwif->maskproc = &hpt3xx_maskproc; | 1243 | hwif->maskproc = &hpt3xx_maskproc; |
| 1258 | hwif->busproc = &hpt3xx_busproc; | 1244 | hwif->busproc = &hpt3xx_busproc; |
| 1259 | hwif->udma_filter = &hpt3xx_udma_filter; | 1245 | |
| 1246 | if (chip_type <= HPT370A) | ||
| 1247 | hwif->udma_filter = &hpt3xx_udma_filter; | ||
| 1260 | 1248 | ||
| 1261 | /* | 1249 | /* |
| 1262 | * HPT3xxN chips have some complications: | 1250 | * HPT3xxN chips have some complications: |
| @@ -1305,7 +1293,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) | |||
| 1305 | return; | 1293 | return; |
| 1306 | } | 1294 | } |
| 1307 | 1295 | ||
| 1308 | hwif->ultra_mask = 0x7f; | 1296 | hwif->ultra_mask = hwif->cds->udma_mask; |
| 1309 | hwif->mwdma_mask = 0x07; | 1297 | hwif->mwdma_mask = 0x07; |
| 1310 | 1298 | ||
| 1311 | /* | 1299 | /* |
| @@ -1342,8 +1330,8 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) | |||
| 1342 | } else | 1330 | } else |
| 1343 | pci_read_config_byte (dev, 0x5a, &scr1); | 1331 | pci_read_config_byte (dev, 0x5a, &scr1); |
| 1344 | 1332 | ||
| 1345 | if (!hwif->udma_four) | 1333 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 1346 | hwif->udma_four = (scr1 & ata66) ? 0 : 1; | 1334 | hwif->cbl = (scr1 & ata66) ? ATA_CBL_PATA40 : ATA_CBL_PATA80; |
| 1347 | 1335 | ||
| 1348 | hwif->ide_dma_check = &hpt366_config_drive_xfer_rate; | 1336 | hwif->ide_dma_check = &hpt366_config_drive_xfer_rate; |
| 1349 | 1337 | ||
| @@ -1353,9 +1341,9 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) | |||
| 1353 | } else if (chip_type >= HPT370) { | 1341 | } else if (chip_type >= HPT370) { |
| 1354 | hwif->dma_start = &hpt370_ide_dma_start; | 1342 | hwif->dma_start = &hpt370_ide_dma_start; |
| 1355 | hwif->ide_dma_end = &hpt370_ide_dma_end; | 1343 | hwif->ide_dma_end = &hpt370_ide_dma_end; |
| 1356 | hwif->ide_dma_timeout = &hpt370_ide_dma_timeout; | 1344 | hwif->dma_timeout = &hpt370_dma_timeout; |
| 1357 | } else | 1345 | } else |
| 1358 | hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq; | 1346 | hwif->dma_lost_irq = &hpt366_dma_lost_irq; |
| 1359 | 1347 | ||
| 1360 | if (!noautodma) | 1348 | if (!noautodma) |
| 1361 | hwif->autodma = 1; | 1349 | hwif->autodma = 1; |
| @@ -1503,9 +1491,35 @@ static int __devinit init_setup_hpt366(struct pci_dev *dev, ide_pci_device_t *d) | |||
| 1503 | 1491 | ||
| 1504 | pci_read_config_byte(dev, PCI_REVISION_ID, &rev); | 1492 | pci_read_config_byte(dev, PCI_REVISION_ID, &rev); |
| 1505 | 1493 | ||
| 1506 | if (rev > 6) | 1494 | switch (rev) { |
| 1495 | case 0: | ||
| 1496 | case 1: | ||
| 1497 | case 2: | ||
| 1498 | /* | ||
| 1499 | * HPT36x chips have one channel per function and have | ||
| 1500 | * both channel enable bits located differently and visible | ||
| 1501 | * to both functions -- really stupid design decision... :-( | ||
| 1502 | * Bit 4 is for the primary channel, bit 5 for the secondary. | ||
| 1503 | */ | ||
| 1504 | d->channels = 1; | ||
| 1505 | d->enablebits[0].mask = d->enablebits[0].val = 0x10; | ||
| 1506 | |||
| 1507 | d->udma_mask = HPT366_ALLOW_ATA66_3 ? | ||
| 1508 | (HPT366_ALLOW_ATA66_4 ? 0x1f : 0x0f) : 0x07; | ||
| 1509 | break; | ||
| 1510 | case 3: | ||
| 1511 | case 4: | ||
| 1512 | d->udma_mask = HPT370_ALLOW_ATA100_5 ? 0x3f : 0x1f; | ||
| 1513 | break; | ||
| 1514 | default: | ||
| 1507 | rev = 6; | 1515 | rev = 6; |
| 1508 | 1516 | /* fall thru */ | |
| 1517 | case 5: | ||
| 1518 | case 6: | ||
| 1519 | d->udma_mask = HPT372_ALLOW_ATA133_6 ? 0x7f : 0x3f; | ||
| 1520 | break; | ||
| 1521 | } | ||
| 1522 | |||
| 1509 | d->name = chipset_names[rev]; | 1523 | d->name = chipset_names[rev]; |
| 1510 | 1524 | ||
| 1511 | pci_set_drvdata(dev, info[rev]); | 1525 | pci_set_drvdata(dev, info[rev]); |
| @@ -1513,15 +1527,6 @@ static int __devinit init_setup_hpt366(struct pci_dev *dev, ide_pci_device_t *d) | |||
| 1513 | if (rev > 2) | 1527 | if (rev > 2) |
| 1514 | goto init_single; | 1528 | goto init_single; |
| 1515 | 1529 | ||
| 1516 | /* | ||
| 1517 | * HPT36x chips have one channel per function and have | ||
| 1518 | * both channel enable bits located differently and visible | ||
| 1519 | * to both functions -- really stupid design decision... :-( | ||
| 1520 | * Bit 4 is for the primary channel, bit 5 for the secondary. | ||
| 1521 | */ | ||
| 1522 | d->channels = 1; | ||
| 1523 | d->enablebits[0].mask = d->enablebits[0].val = 0x10; | ||
| 1524 | |||
| 1525 | if ((dev2 = pci_get_slot(dev->bus, dev->devfn + 1)) != NULL) { | 1530 | if ((dev2 = pci_get_slot(dev->bus, dev->devfn + 1)) != NULL) { |
| 1526 | u8 mcr1 = 0, pin1 = 0, pin2 = 0; | 1531 | u8 mcr1 = 0, pin1 = 0, pin2 = 0; |
| 1527 | int ret; | 1532 | int ret; |
| @@ -1573,6 +1578,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = { | |||
| 1573 | .channels = 2, | 1578 | .channels = 2, |
| 1574 | .autodma = AUTODMA, | 1579 | .autodma = AUTODMA, |
| 1575 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, | 1580 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, |
| 1581 | .udma_mask = HPT372_ALLOW_ATA133_6 ? 0x7f : 0x3f, | ||
| 1576 | .bootable = OFF_BOARD, | 1582 | .bootable = OFF_BOARD, |
| 1577 | .extra = 240 | 1583 | .extra = 240 |
| 1578 | },{ /* 2 */ | 1584 | },{ /* 2 */ |
| @@ -1584,6 +1590,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = { | |||
| 1584 | .channels = 2, | 1590 | .channels = 2, |
| 1585 | .autodma = AUTODMA, | 1591 | .autodma = AUTODMA, |
| 1586 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, | 1592 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, |
| 1593 | .udma_mask = HPT302_ALLOW_ATA133_6 ? 0x7f : 0x3f, | ||
| 1587 | .bootable = OFF_BOARD, | 1594 | .bootable = OFF_BOARD, |
| 1588 | .extra = 240 | 1595 | .extra = 240 |
| 1589 | },{ /* 3 */ | 1596 | },{ /* 3 */ |
| @@ -1595,6 +1602,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = { | |||
| 1595 | .channels = 2, | 1602 | .channels = 2, |
| 1596 | .autodma = AUTODMA, | 1603 | .autodma = AUTODMA, |
| 1597 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, | 1604 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, |
| 1605 | .udma_mask = HPT371_ALLOW_ATA133_6 ? 0x7f : 0x3f, | ||
| 1598 | .bootable = OFF_BOARD, | 1606 | .bootable = OFF_BOARD, |
| 1599 | .extra = 240 | 1607 | .extra = 240 |
| 1600 | },{ /* 4 */ | 1608 | },{ /* 4 */ |
| @@ -1606,6 +1614,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = { | |||
| 1606 | .channels = 2, /* 4 */ | 1614 | .channels = 2, /* 4 */ |
| 1607 | .autodma = AUTODMA, | 1615 | .autodma = AUTODMA, |
| 1608 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, | 1616 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, |
| 1617 | .udma_mask = 0x3f, | ||
| 1609 | .bootable = OFF_BOARD, | 1618 | .bootable = OFF_BOARD, |
| 1610 | .extra = 240 | 1619 | .extra = 240 |
| 1611 | },{ /* 5 */ | 1620 | },{ /* 5 */ |
| @@ -1617,6 +1626,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = { | |||
| 1617 | .channels = 2, /* 4 */ | 1626 | .channels = 2, /* 4 */ |
| 1618 | .autodma = AUTODMA, | 1627 | .autodma = AUTODMA, |
| 1619 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, | 1628 | .enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}}, |
| 1629 | .udma_mask = HPT372_ALLOW_ATA133_6 ? 0x7f : 0x3f, | ||
| 1620 | .bootable = OFF_BOARD, | 1630 | .bootable = OFF_BOARD, |
| 1621 | .extra = 240 | 1631 | .extra = 240 |
| 1622 | } | 1632 | } |
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c index c04a02687b95..ff48c23e571e 100644 --- a/drivers/ide/pci/it8213.c +++ b/drivers/ide/pci/it8213.c | |||
| @@ -231,7 +231,7 @@ static int it8213_config_drive_for_dma (ide_drive_t *drive) | |||
| 231 | 231 | ||
| 232 | static void __devinit init_hwif_it8213(ide_hwif_t *hwif) | 232 | static void __devinit init_hwif_it8213(ide_hwif_t *hwif) |
| 233 | { | 233 | { |
| 234 | u8 reg42h = 0, ata66 = 0; | 234 | u8 reg42h = 0; |
| 235 | 235 | ||
| 236 | hwif->speedproc = &it8213_tune_chipset; | 236 | hwif->speedproc = &it8213_tune_chipset; |
| 237 | hwif->tuneproc = &it8213_tuneproc; | 237 | hwif->tuneproc = &it8213_tuneproc; |
| @@ -250,11 +250,11 @@ static void __devinit init_hwif_it8213(ide_hwif_t *hwif) | |||
| 250 | hwif->swdma_mask = 0x04; | 250 | hwif->swdma_mask = 0x04; |
| 251 | 251 | ||
| 252 | pci_read_config_byte(hwif->pci_dev, 0x42, ®42h); | 252 | pci_read_config_byte(hwif->pci_dev, 0x42, ®42h); |
| 253 | ata66 = (reg42h & 0x02) ? 0 : 1; | ||
| 254 | 253 | ||
| 255 | hwif->ide_dma_check = &it8213_config_drive_for_dma; | 254 | hwif->ide_dma_check = &it8213_config_drive_for_dma; |
| 256 | if (!(hwif->udma_four)) | 255 | |
| 257 | hwif->udma_four = ata66; | 256 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 257 | hwif->cbl = (reg42h & 0x02) ? ATA_CBL_PATA40 : ATA_CBL_PATA80; | ||
| 258 | 258 | ||
| 259 | /* | 259 | /* |
| 260 | * The BIOS often doesn't set up DMA on this controller | 260 | * The BIOS often doesn't set up DMA on this controller |
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c index 3aeb7f1b7916..8197b653ba1e 100644 --- a/drivers/ide/pci/it821x.c +++ b/drivers/ide/pci/it821x.c | |||
| @@ -491,10 +491,10 @@ static int it821x_config_drive_for_dma (ide_drive_t *drive) | |||
| 491 | * the needed logic onboard. | 491 | * the needed logic onboard. |
| 492 | */ | 492 | */ |
| 493 | 493 | ||
| 494 | static unsigned int __devinit ata66_it821x(ide_hwif_t *hwif) | 494 | static u8 __devinit ata66_it821x(ide_hwif_t *hwif) |
| 495 | { | 495 | { |
| 496 | /* The reference driver also only does disk side */ | 496 | /* The reference driver also only does disk side */ |
| 497 | return 1; | 497 | return ATA_CBL_PATA80; |
| 498 | } | 498 | } |
| 499 | 499 | ||
| 500 | /** | 500 | /** |
| @@ -662,8 +662,9 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif) | |||
| 662 | hwif->mwdma_mask = 0x07; | 662 | hwif->mwdma_mask = 0x07; |
| 663 | 663 | ||
| 664 | hwif->ide_dma_check = &it821x_config_drive_for_dma; | 664 | hwif->ide_dma_check = &it821x_config_drive_for_dma; |
| 665 | if (!(hwif->udma_four)) | 665 | |
| 666 | hwif->udma_four = ata66_it821x(hwif); | 666 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 667 | hwif->cbl = ata66_it821x(hwif); | ||
| 667 | 668 | ||
| 668 | /* | 669 | /* |
| 669 | * The BIOS often doesn't set up DMA on this controller | 670 | * The BIOS often doesn't set up DMA on this controller |
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c index 76ed25147229..a6008f63e71e 100644 --- a/drivers/ide/pci/jmicron.c +++ b/drivers/ide/pci/jmicron.c | |||
| @@ -25,10 +25,10 @@ typedef enum { | |||
| 25 | * ata66_jmicron - Cable check | 25 | * ata66_jmicron - Cable check |
| 26 | * @hwif: IDE port | 26 | * @hwif: IDE port |
| 27 | * | 27 | * |
| 28 | * Return 1 if the cable is 80pin | 28 | * Returns the cable type. |
| 29 | */ | 29 | */ |
| 30 | 30 | ||
| 31 | static int __devinit ata66_jmicron(ide_hwif_t *hwif) | 31 | static u8 __devinit ata66_jmicron(ide_hwif_t *hwif) |
| 32 | { | 32 | { |
| 33 | struct pci_dev *pdev = hwif->pci_dev; | 33 | struct pci_dev *pdev = hwif->pci_dev; |
| 34 | 34 | ||
| @@ -70,16 +70,17 @@ static int __devinit ata66_jmicron(ide_hwif_t *hwif) | |||
| 70 | { | 70 | { |
| 71 | case PORT_PATA0: | 71 | case PORT_PATA0: |
| 72 | if (control & (1 << 3)) /* 40/80 pin primary */ | 72 | if (control & (1 << 3)) /* 40/80 pin primary */ |
| 73 | return 0; | 73 | return ATA_CBL_PATA40; |
| 74 | return 1; | 74 | return ATA_CBL_PATA80; |
| 75 | case PORT_PATA1: | 75 | case PORT_PATA1: |
| 76 | if (control5 & (1 << 19)) /* 40/80 pin secondary */ | 76 | if (control5 & (1 << 19)) /* 40/80 pin secondary */ |
| 77 | return 0; | 77 | return ATA_CBL_PATA40; |
| 78 | return 1; | 78 | return ATA_CBL_PATA80; |
| 79 | case PORT_SATA: | 79 | case PORT_SATA: |
| 80 | break; | 80 | break; |
| 81 | } | 81 | } |
| 82 | return 1; /* Avoid bogus "control reaches end of non-void function" */ | 82 | /* Avoid bogus "control reaches end of non-void function" */ |
| 83 | return ATA_CBL_PATA80; | ||
| 83 | } | 84 | } |
| 84 | 85 | ||
| 85 | static void jmicron_tuneproc (ide_drive_t *drive, byte mode_wanted) | 86 | static void jmicron_tuneproc (ide_drive_t *drive, byte mode_wanted) |
| @@ -159,8 +160,9 @@ static void __devinit init_hwif_jmicron(ide_hwif_t *hwif) | |||
| 159 | hwif->mwdma_mask = 0x07; | 160 | hwif->mwdma_mask = 0x07; |
| 160 | 161 | ||
| 161 | hwif->ide_dma_check = &jmicron_config_drive_for_dma; | 162 | hwif->ide_dma_check = &jmicron_config_drive_for_dma; |
| 162 | if (!(hwif->udma_four)) | 163 | |
| 163 | hwif->udma_four = ata66_jmicron(hwif); | 164 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 165 | hwif->cbl = ata66_jmicron(hwif); | ||
| 164 | 166 | ||
| 165 | hwif->autodma = 1; | 167 | hwif->autodma = 1; |
| 166 | hwif->drives[0].autodma = hwif->autodma; | 168 | hwif->drives[0].autodma = hwif->autodma; |
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c index 0765dce6948e..ee5020df005d 100644 --- a/drivers/ide/pci/pdc202xx_new.c +++ b/drivers/ide/pci/pdc202xx_new.c | |||
| @@ -225,7 +225,10 @@ static void pdcnew_tune_drive(ide_drive_t *drive, u8 pio) | |||
| 225 | 225 | ||
| 226 | static u8 pdcnew_cable_detect(ide_hwif_t *hwif) | 226 | static u8 pdcnew_cable_detect(ide_hwif_t *hwif) |
| 227 | { | 227 | { |
| 228 | return get_indexed_reg(hwif, 0x0b) & 0x04; | 228 | if (get_indexed_reg(hwif, 0x0b) & 0x04) |
| 229 | return ATA_CBL_PATA40; | ||
| 230 | else | ||
| 231 | return ATA_CBL_PATA80; | ||
| 229 | } | 232 | } |
| 230 | 233 | ||
| 231 | static int pdcnew_config_drive_xfer_rate(ide_drive_t *drive) | 234 | static int pdcnew_config_drive_xfer_rate(ide_drive_t *drive) |
| @@ -509,8 +512,8 @@ static void __devinit init_hwif_pdc202new(ide_hwif_t *hwif) | |||
| 509 | 512 | ||
| 510 | hwif->ide_dma_check = &pdcnew_config_drive_xfer_rate; | 513 | hwif->ide_dma_check = &pdcnew_config_drive_xfer_rate; |
| 511 | 514 | ||
| 512 | if (!hwif->udma_four) | 515 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 513 | hwif->udma_four = pdcnew_cable_detect(hwif) ? 0 : 1; | 516 | hwif->cbl = pdcnew_cable_detect(hwif); |
| 514 | 517 | ||
| 515 | if (!noautodma) | 518 | if (!noautodma) |
| 516 | hwif->autodma = 1; | 519 | hwif->autodma = 1; |
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c index 23844687deea..41ac4a94959f 100644 --- a/drivers/ide/pci/pdc202xx_old.c +++ b/drivers/ide/pci/pdc202xx_old.c | |||
| @@ -152,8 +152,10 @@ static void pdc202xx_tune_drive(ide_drive_t *drive, u8 pio) | |||
| 152 | static u8 pdc202xx_old_cable_detect (ide_hwif_t *hwif) | 152 | static u8 pdc202xx_old_cable_detect (ide_hwif_t *hwif) |
| 153 | { | 153 | { |
| 154 | u16 CIS = 0, mask = (hwif->channel) ? (1<<11) : (1<<10); | 154 | u16 CIS = 0, mask = (hwif->channel) ? (1<<11) : (1<<10); |
| 155 | |||
| 155 | pci_read_config_word(hwif->pci_dev, 0x50, &CIS); | 156 | pci_read_config_word(hwif->pci_dev, 0x50, &CIS); |
| 156 | return (CIS & mask) ? 1 : 0; | 157 | |
| 158 | return (CIS & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80; | ||
| 157 | } | 159 | } |
| 158 | 160 | ||
| 159 | /* | 161 | /* |
| @@ -267,18 +269,24 @@ somebody_else: | |||
| 267 | return (dma_stat & 4) == 4; /* return 1 if INTR asserted */ | 269 | return (dma_stat & 4) == 4; /* return 1 if INTR asserted */ |
| 268 | } | 270 | } |
| 269 | 271 | ||
| 270 | static int pdc202xx_ide_dma_lostirq(ide_drive_t *drive) | 272 | static void pdc202xx_dma_lost_irq(ide_drive_t *drive) |
| 271 | { | 273 | { |
| 272 | if (HWIF(drive)->resetproc != NULL) | 274 | ide_hwif_t *hwif = HWIF(drive); |
| 273 | HWIF(drive)->resetproc(drive); | 275 | |
| 274 | return __ide_dma_lostirq(drive); | 276 | if (hwif->resetproc != NULL) |
| 277 | hwif->resetproc(drive); | ||
| 278 | |||
| 279 | ide_dma_lost_irq(drive); | ||
| 275 | } | 280 | } |
| 276 | 281 | ||
| 277 | static int pdc202xx_ide_dma_timeout(ide_drive_t *drive) | 282 | static void pdc202xx_dma_timeout(ide_drive_t *drive) |
| 278 | { | 283 | { |
| 279 | if (HWIF(drive)->resetproc != NULL) | 284 | ide_hwif_t *hwif = HWIF(drive); |
| 280 | HWIF(drive)->resetproc(drive); | 285 | |
| 281 | return __ide_dma_timeout(drive); | 286 | if (hwif->resetproc != NULL) |
| 287 | hwif->resetproc(drive); | ||
| 288 | |||
| 289 | ide_dma_timeout(drive); | ||
| 282 | } | 290 | } |
| 283 | 291 | ||
| 284 | static void pdc202xx_reset_host (ide_hwif_t *hwif) | 292 | static void pdc202xx_reset_host (ide_hwif_t *hwif) |
| @@ -347,12 +355,13 @@ static void __devinit init_hwif_pdc202xx(ide_hwif_t *hwif) | |||
| 347 | hwif->err_stops_fifo = 1; | 355 | hwif->err_stops_fifo = 1; |
| 348 | 356 | ||
| 349 | hwif->ide_dma_check = &pdc202xx_config_drive_xfer_rate; | 357 | hwif->ide_dma_check = &pdc202xx_config_drive_xfer_rate; |
| 350 | hwif->ide_dma_lostirq = &pdc202xx_ide_dma_lostirq; | 358 | hwif->dma_lost_irq = &pdc202xx_dma_lost_irq; |
| 351 | hwif->ide_dma_timeout = &pdc202xx_ide_dma_timeout; | 359 | hwif->dma_timeout = &pdc202xx_dma_timeout; |
| 352 | 360 | ||
| 353 | if (hwif->pci_dev->device != PCI_DEVICE_ID_PROMISE_20246) { | 361 | if (hwif->pci_dev->device != PCI_DEVICE_ID_PROMISE_20246) { |
| 354 | if (!(hwif->udma_four)) | 362 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 355 | hwif->udma_four = (pdc202xx_old_cable_detect(hwif)) ? 0 : 1; | 363 | hwif->cbl = pdc202xx_old_cable_detect(hwif); |
| 364 | |||
| 356 | hwif->dma_start = &pdc202xx_old_ide_dma_start; | 365 | hwif->dma_start = &pdc202xx_old_ide_dma_start; |
| 357 | hwif->ide_dma_end = &pdc202xx_old_ide_dma_end; | 366 | hwif->ide_dma_end = &pdc202xx_old_ide_dma_end; |
| 358 | } | 367 | } |
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c index 8b219dd63024..2e0b29ef596a 100644 --- a/drivers/ide/pci/piix.c +++ b/drivers/ide/pci/piix.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/piix.c Version 0.47 February 8, 2007 | 2 | * linux/drivers/ide/pci/piix.c Version 0.50 Jun 10, 2007 |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer | 4 | * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer |
| 5 | * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org> | 5 | * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org> |
| @@ -394,14 +394,45 @@ static void piix_dma_clear_irq(ide_drive_t *drive) | |||
| 394 | hwif->OUTB(dma_stat, hwif->dma_status); | 394 | hwif->OUTB(dma_stat, hwif->dma_status); |
| 395 | } | 395 | } |
| 396 | 396 | ||
| 397 | static int __devinit piix_cable_detect(ide_hwif_t *hwif) | 397 | struct ich_laptop { |
| 398 | u16 device; | ||
| 399 | u16 subvendor; | ||
| 400 | u16 subdevice; | ||
| 401 | }; | ||
| 402 | |||
| 403 | /* | ||
| 404 | * List of laptops that use short cables rather than 80 wire | ||
| 405 | */ | ||
| 406 | |||
| 407 | static const struct ich_laptop ich_laptop[] = { | ||
| 408 | /* devid, subvendor, subdev */ | ||
| 409 | { 0x27DF, 0x0005, 0x0280 }, /* ICH7 on Acer 5602WLMi */ | ||
| 410 | { 0x27DF, 0x1025, 0x0110 }, /* ICH7 on Acer 3682WLMi */ | ||
| 411 | { 0x27DF, 0x1043, 0x1267 }, /* ICH7 on Asus W5F */ | ||
| 412 | { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on Acer Aspire 2023WLMi */ | ||
| 413 | /* end marker */ | ||
| 414 | { 0, } | ||
| 415 | }; | ||
| 416 | |||
| 417 | static u8 __devinit piix_cable_detect(ide_hwif_t *hwif) | ||
| 398 | { | 418 | { |
| 399 | struct pci_dev *dev = hwif->pci_dev; | 419 | struct pci_dev *pdev = hwif->pci_dev; |
| 420 | const struct ich_laptop *lap = &ich_laptop[0]; | ||
| 400 | u8 reg54h = 0, mask = hwif->channel ? 0xc0 : 0x30; | 421 | u8 reg54h = 0, mask = hwif->channel ? 0xc0 : 0x30; |
| 401 | 422 | ||
| 402 | pci_read_config_byte(dev, 0x54, ®54h); | 423 | /* check for specials */ |
| 424 | while (lap->device) { | ||
| 425 | if (lap->device == pdev->device && | ||
| 426 | lap->subvendor == pdev->subsystem_vendor && | ||
| 427 | lap->subdevice == pdev->subsystem_device) { | ||
| 428 | return ATA_CBL_PATA40_SHORT; | ||
| 429 | } | ||
| 430 | lap++; | ||
| 431 | } | ||
| 432 | |||
| 433 | pci_read_config_byte(pdev, 0x54, ®54h); | ||
| 403 | 434 | ||
| 404 | return (reg54h & mask) ? 1 : 0; | 435 | return (reg54h & mask) ? ATA_CBL_PATA80 : ATA_CBL_PATA40; |
| 405 | } | 436 | } |
| 406 | 437 | ||
| 407 | /** | 438 | /** |
| @@ -444,8 +475,8 @@ static void __devinit init_hwif_piix(ide_hwif_t *hwif) | |||
| 444 | hwif->swdma_mask = 0x04; | 475 | hwif->swdma_mask = 0x04; |
| 445 | 476 | ||
| 446 | if (hwif->ultra_mask & 0x78) { | 477 | if (hwif->ultra_mask & 0x78) { |
| 447 | if (!hwif->udma_four) | 478 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 448 | hwif->udma_four = piix_cable_detect(hwif); | 479 | hwif->cbl = piix_cable_detect(hwif); |
| 449 | } | 480 | } |
| 450 | 481 | ||
| 451 | if (no_piix_dma) | 482 | if (no_piix_dma) |
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index 55bc0a32e34f..7b87488e3daa 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c | |||
| @@ -716,7 +716,7 @@ static void __devinit init_hwif_scc(ide_hwif_t *hwif) | |||
| 716 | hwif->atapi_dma = 1; | 716 | hwif->atapi_dma = 1; |
| 717 | 717 | ||
| 718 | /* we support 80c cable only. */ | 718 | /* we support 80c cable only. */ |
| 719 | hwif->udma_four = 1; | 719 | hwif->cbl = ATA_CBL_PATA80; |
| 720 | 720 | ||
| 721 | hwif->autodma = 0; | 721 | hwif->autodma = 0; |
| 722 | if (!noautodma) | 722 | if (!noautodma) |
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c index d9c4fd1ae996..1371b5bf6bf0 100644 --- a/drivers/ide/pci/serverworks.c +++ b/drivers/ide/pci/serverworks.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/serverworks.c Version 0.11 Jun 2 2007 | 2 | * linux/drivers/ide/pci/serverworks.c Version 0.20 Jun 3 2007 |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1998-2000 Michel Aubry | 4 | * Copyright (C) 1998-2000 Michel Aubry |
| 5 | * Copyright (C) 1998-2000 Andrzej Krzysztofowicz | 5 | * Copyright (C) 1998-2000 Andrzej Krzysztofowicz |
| @@ -151,84 +151,11 @@ static int svwks_tune_chipset (ide_drive_t *drive, u8 xferspeed) | |||
| 151 | if(dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4 && | 151 | if(dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4 && |
| 152 | drive->media == ide_disk && speed >= XFER_UDMA_0) | 152 | drive->media == ide_disk && speed >= XFER_UDMA_0) |
| 153 | BUG(); | 153 | BUG(); |
| 154 | 154 | ||
| 155 | pci_read_config_byte(dev, drive_pci[drive->dn], &pio_timing); | ||
| 156 | pci_read_config_byte(dev, drive_pci2[drive->dn], &dma_timing); | ||
| 157 | pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); | 155 | pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); |
| 158 | pci_read_config_word(dev, 0x4A, &csb5_pio); | 156 | pci_read_config_word(dev, 0x4A, &csb5_pio); |
| 159 | pci_read_config_byte(dev, 0x54, &ultra_enable); | 157 | pci_read_config_byte(dev, 0x54, &ultra_enable); |
| 160 | 158 | ||
| 161 | /* If we are in RAID mode (eg AMI MegaIDE) then we can't it | ||
| 162 | turns out trust the firmware configuration */ | ||
| 163 | |||
| 164 | if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) | ||
| 165 | goto oem_setup_failed; | ||
| 166 | |||
| 167 | /* Per Specified Design by OEM, and ASIC Architect */ | ||
| 168 | if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) || | ||
| 169 | (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) { | ||
| 170 | if (!drive->init_speed) { | ||
| 171 | u8 dma_stat = inb(hwif->dma_status); | ||
| 172 | |||
| 173 | if (((ultra_enable << (7-drive->dn) & 0x80) == 0x80) && | ||
| 174 | ((dma_stat & (1<<(5+unit))) == (1<<(5+unit)))) { | ||
| 175 | drive->current_speed = drive->init_speed = XFER_UDMA_0 + udma_modes[(ultra_timing >> (4*unit)) & ~(0xF0)]; | ||
| 176 | return 0; | ||
| 177 | } else if ((dma_timing) && | ||
| 178 | ((dma_stat&(1<<(5+unit)))==(1<<(5+unit)))) { | ||
| 179 | u8 dmaspeed; | ||
| 180 | |||
| 181 | switch (dma_timing & 0x77) { | ||
| 182 | case 0x20: | ||
| 183 | dmaspeed = XFER_MW_DMA_2; | ||
| 184 | break; | ||
| 185 | case 0x21: | ||
| 186 | dmaspeed = XFER_MW_DMA_1; | ||
| 187 | break; | ||
| 188 | case 0x77: | ||
| 189 | dmaspeed = XFER_MW_DMA_0; | ||
| 190 | break; | ||
| 191 | default: | ||
| 192 | goto dma_pio; | ||
| 193 | } | ||
| 194 | |||
| 195 | drive->current_speed = drive->init_speed = dmaspeed; | ||
| 196 | return 0; | ||
| 197 | } | ||
| 198 | dma_pio: | ||
| 199 | if (pio_timing) { | ||
| 200 | u8 piospeed; | ||
| 201 | |||
| 202 | switch (pio_timing & 0x7f) { | ||
| 203 | case 0x20: | ||
| 204 | piospeed = XFER_PIO_4; | ||
| 205 | break; | ||
| 206 | case 0x22: | ||
| 207 | piospeed = XFER_PIO_3; | ||
| 208 | break; | ||
| 209 | case 0x34: | ||
| 210 | piospeed = XFER_PIO_2; | ||
| 211 | break; | ||
| 212 | case 0x47: | ||
| 213 | piospeed = XFER_PIO_1; | ||
| 214 | break; | ||
| 215 | case 0x5d: | ||
| 216 | piospeed = XFER_PIO_0; | ||
| 217 | break; | ||
| 218 | default: | ||
| 219 | goto oem_setup_failed; | ||
| 220 | } | ||
| 221 | |||
| 222 | drive->current_speed = drive->init_speed = piospeed; | ||
| 223 | return 0; | ||
| 224 | } | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | oem_setup_failed: | ||
| 229 | |||
| 230 | pio_timing = 0; | ||
| 231 | dma_timing = 0; | ||
| 232 | ultra_timing &= ~(0x0F << (4*unit)); | 159 | ultra_timing &= ~(0x0F << (4*unit)); |
| 233 | ultra_enable &= ~(0x01 << drive->dn); | 160 | ultra_enable &= ~(0x01 << drive->dn); |
| 234 | csb5_pio &= ~(0x0F << (4*drive->dn)); | 161 | csb5_pio &= ~(0x0F << (4*drive->dn)); |
| @@ -402,9 +329,9 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha | |||
| 402 | return dev->irq; | 329 | return dev->irq; |
| 403 | } | 330 | } |
| 404 | 331 | ||
| 405 | static unsigned int __devinit ata66_svwks_svwks (ide_hwif_t *hwif) | 332 | static u8 __devinit ata66_svwks_svwks(ide_hwif_t *hwif) |
| 406 | { | 333 | { |
| 407 | return 1; | 334 | return ATA_CBL_PATA80; |
| 408 | } | 335 | } |
| 409 | 336 | ||
| 410 | /* On Dell PowerEdge servers with a CSB5/CSB6, the top two bits | 337 | /* On Dell PowerEdge servers with a CSB5/CSB6, the top two bits |
| @@ -414,7 +341,7 @@ static unsigned int __devinit ata66_svwks_svwks (ide_hwif_t *hwif) | |||
| 414 | * Bit 14 clear = primary IDE channel does not have 80-pin cable. | 341 | * Bit 14 clear = primary IDE channel does not have 80-pin cable. |
| 415 | * Bit 14 set = primary IDE channel has 80-pin cable. | 342 | * Bit 14 set = primary IDE channel has 80-pin cable. |
| 416 | */ | 343 | */ |
| 417 | static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif) | 344 | static u8 __devinit ata66_svwks_dell(ide_hwif_t *hwif) |
| 418 | { | 345 | { |
| 419 | struct pci_dev *dev = hwif->pci_dev; | 346 | struct pci_dev *dev = hwif->pci_dev; |
| 420 | if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL && | 347 | if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL && |
| @@ -422,8 +349,8 @@ static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif) | |||
| 422 | (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE || | 349 | (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE || |
| 423 | dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE)) | 350 | dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE)) |
| 424 | return ((1 << (hwif->channel + 14)) & | 351 | return ((1 << (hwif->channel + 14)) & |
| 425 | dev->subsystem_device) ? 1 : 0; | 352 | dev->subsystem_device) ? ATA_CBL_PATA80 : ATA_CBL_PATA40; |
| 426 | return 0; | 353 | return ATA_CBL_PATA40; |
| 427 | } | 354 | } |
| 428 | 355 | ||
| 429 | /* Sun Cobalt Alpine hardware avoids the 80-pin cable | 356 | /* Sun Cobalt Alpine hardware avoids the 80-pin cable |
| @@ -432,18 +359,18 @@ static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif) | |||
| 432 | * | 359 | * |
| 433 | * WARNING: this only works on Alpine hardware! | 360 | * WARNING: this only works on Alpine hardware! |
| 434 | */ | 361 | */ |
| 435 | static unsigned int __devinit ata66_svwks_cobalt (ide_hwif_t *hwif) | 362 | static u8 __devinit ata66_svwks_cobalt(ide_hwif_t *hwif) |
| 436 | { | 363 | { |
| 437 | struct pci_dev *dev = hwif->pci_dev; | 364 | struct pci_dev *dev = hwif->pci_dev; |
| 438 | if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN && | 365 | if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN && |
| 439 | dev->vendor == PCI_VENDOR_ID_SERVERWORKS && | 366 | dev->vendor == PCI_VENDOR_ID_SERVERWORKS && |
| 440 | dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) | 367 | dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) |
| 441 | return ((1 << (hwif->channel + 14)) & | 368 | return ((1 << (hwif->channel + 14)) & |
| 442 | dev->subsystem_device) ? 1 : 0; | 369 | dev->subsystem_device) ? ATA_CBL_PATA80 : ATA_CBL_PATA40; |
| 443 | return 0; | 370 | return ATA_CBL_PATA40; |
| 444 | } | 371 | } |
| 445 | 372 | ||
| 446 | static unsigned int __devinit ata66_svwks (ide_hwif_t *hwif) | 373 | static u8 __devinit ata66_svwks(ide_hwif_t *hwif) |
| 447 | { | 374 | { |
| 448 | struct pci_dev *dev = hwif->pci_dev; | 375 | struct pci_dev *dev = hwif->pci_dev; |
| 449 | 376 | ||
| @@ -462,9 +389,9 @@ static unsigned int __devinit ata66_svwks (ide_hwif_t *hwif) | |||
| 462 | /* Per Specified Design by OEM, and ASIC Architect */ | 389 | /* Per Specified Design by OEM, and ASIC Architect */ |
| 463 | if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) || | 390 | if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) || |
| 464 | (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) | 391 | (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) |
| 465 | return 1; | 392 | return ATA_CBL_PATA80; |
| 466 | 393 | ||
| 467 | return 0; | 394 | return ATA_CBL_PATA40; |
| 468 | } | 395 | } |
| 469 | 396 | ||
| 470 | static void __devinit init_hwif_svwks (ide_hwif_t *hwif) | 397 | static void __devinit init_hwif_svwks (ide_hwif_t *hwif) |
| @@ -495,8 +422,8 @@ static void __devinit init_hwif_svwks (ide_hwif_t *hwif) | |||
| 495 | 422 | ||
| 496 | hwif->ide_dma_check = &svwks_config_drive_xfer_rate; | 423 | hwif->ide_dma_check = &svwks_config_drive_xfer_rate; |
| 497 | if (hwif->pci_dev->device != PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) { | 424 | if (hwif->pci_dev->device != PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) { |
| 498 | if (!hwif->udma_four) | 425 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 499 | hwif->udma_four = ata66_svwks(hwif); | 426 | hwif->cbl = ata66_svwks(hwif); |
| 500 | } | 427 | } |
| 501 | if (!noautodma) | 428 | if (!noautodma) |
| 502 | hwif->autodma = 1; | 429 | hwif->autodma = 1; |
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index d3185e29a38e..d396b2929ed8 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c | |||
| @@ -316,14 +316,6 @@ static void sgiioc4_dma_host_off(ide_drive_t * drive) | |||
| 316 | sgiioc4_clearirq(drive); | 316 | sgiioc4_clearirq(drive); |
| 317 | } | 317 | } |
| 318 | 318 | ||
| 319 | static int | ||
| 320 | sgiioc4_ide_dma_lostirq(ide_drive_t * drive) | ||
| 321 | { | ||
| 322 | HWIF(drive)->resetproc(drive); | ||
| 323 | |||
| 324 | return __ide_dma_lostirq(drive); | ||
| 325 | } | ||
| 326 | |||
| 327 | static void | 319 | static void |
| 328 | sgiioc4_resetproc(ide_drive_t * drive) | 320 | sgiioc4_resetproc(ide_drive_t * drive) |
| 329 | { | 321 | { |
| @@ -331,6 +323,14 @@ sgiioc4_resetproc(ide_drive_t * drive) | |||
| 331 | sgiioc4_clearirq(drive); | 323 | sgiioc4_clearirq(drive); |
| 332 | } | 324 | } |
| 333 | 325 | ||
| 326 | static void | ||
| 327 | sgiioc4_dma_lost_irq(ide_drive_t * drive) | ||
| 328 | { | ||
| 329 | sgiioc4_resetproc(drive); | ||
| 330 | |||
| 331 | ide_dma_lost_irq(drive); | ||
| 332 | } | ||
| 333 | |||
| 334 | static u8 | 334 | static u8 |
| 335 | sgiioc4_INB(unsigned long port) | 335 | sgiioc4_INB(unsigned long port) |
| 336 | { | 336 | { |
| @@ -607,8 +607,8 @@ ide_init_sgiioc4(ide_hwif_t * hwif) | |||
| 607 | hwif->ide_dma_test_irq = &sgiioc4_ide_dma_test_irq; | 607 | hwif->ide_dma_test_irq = &sgiioc4_ide_dma_test_irq; |
| 608 | hwif->dma_host_on = &sgiioc4_dma_host_on; | 608 | hwif->dma_host_on = &sgiioc4_dma_host_on; |
| 609 | hwif->dma_host_off = &sgiioc4_dma_host_off; | 609 | hwif->dma_host_off = &sgiioc4_dma_host_off; |
| 610 | hwif->ide_dma_lostirq = &sgiioc4_ide_dma_lostirq; | 610 | hwif->dma_lost_irq = &sgiioc4_dma_lost_irq; |
| 611 | hwif->ide_dma_timeout = &__ide_dma_timeout; | 611 | hwif->dma_timeout = &ide_dma_timeout; |
| 612 | 612 | ||
| 613 | hwif->INB = &sgiioc4_INB; | 613 | hwif->INB = &sgiioc4_INB; |
| 614 | } | 614 | } |
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c index 1a4444e7226a..1c3e35487893 100644 --- a/drivers/ide/pci/siimage.c +++ b/drivers/ide/pci/siimage.c | |||
| @@ -933,16 +933,17 @@ static void __devinit init_iops_siimage(ide_hwif_t *hwif) | |||
| 933 | * interface. | 933 | * interface. |
| 934 | */ | 934 | */ |
| 935 | 935 | ||
| 936 | static unsigned int __devinit ata66_siimage(ide_hwif_t *hwif) | 936 | static u8 __devinit ata66_siimage(ide_hwif_t *hwif) |
| 937 | { | 937 | { |
| 938 | unsigned long addr = siimage_selreg(hwif, 0); | 938 | unsigned long addr = siimage_selreg(hwif, 0); |
| 939 | if (pci_get_drvdata(hwif->pci_dev) == NULL) { | 939 | u8 ata66 = 0; |
| 940 | u8 ata66 = 0; | 940 | |
| 941 | if (pci_get_drvdata(hwif->pci_dev) == NULL) | ||
| 941 | pci_read_config_byte(hwif->pci_dev, addr, &ata66); | 942 | pci_read_config_byte(hwif->pci_dev, addr, &ata66); |
| 942 | return (ata66 & 0x01) ? 1 : 0; | 943 | else |
| 943 | } | 944 | ata66 = hwif->INB(addr); |
| 944 | 945 | ||
| 945 | return (hwif->INB(addr) & 0x01) ? 1 : 0; | 946 | return (ata66 & 0x01) ? ATA_CBL_PATA80 : ATA_CBL_PATA40; |
| 946 | } | 947 | } |
| 947 | 948 | ||
| 948 | /** | 949 | /** |
| @@ -988,8 +989,9 @@ static void __devinit init_hwif_siimage(ide_hwif_t *hwif) | |||
| 988 | hwif->atapi_dma = 1; | 989 | hwif->atapi_dma = 1; |
| 989 | 990 | ||
| 990 | hwif->ide_dma_check = &siimage_config_drive_for_dma; | 991 | hwif->ide_dma_check = &siimage_config_drive_for_dma; |
| 991 | if (!(hwif->udma_four)) | 992 | |
| 992 | hwif->udma_four = ata66_siimage(hwif); | 993 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 994 | hwif->cbl = ata66_siimage(hwif); | ||
| 993 | 995 | ||
| 994 | if (hwif->mmio) { | 996 | if (hwif->mmio) { |
| 995 | hwif->ide_dma_test_irq = &siimage_mmio_ide_dma_test_irq; | 997 | hwif->ide_dma_test_irq = &siimage_mmio_ide_dma_test_irq; |
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c index ec0adad9ef61..f875183ac8d9 100644 --- a/drivers/ide/pci/sis5513.c +++ b/drivers/ide/pci/sis5513.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/drivers/ide/pci/sis5513.c Version 0.20 Mar 4, 2007 | 2 | * linux/drivers/ide/pci/sis5513.c Version 0.25 Jun 10, 2007 |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1999-2000 Andre Hedrick <andre@linux-ide.org> | 4 | * Copyright (C) 1999-2000 Andre Hedrick <andre@linux-ide.org> |
| 5 | * Copyright (C) 2002 Lionel Bouton <Lionel.Bouton@inet6.fr>, Maintainer | 5 | * Copyright (C) 2002 Lionel Bouton <Lionel.Bouton@inet6.fr>, Maintainer |
| @@ -796,10 +796,33 @@ static unsigned int __devinit init_chipset_sis5513 (struct pci_dev *dev, const c | |||
| 796 | return 0; | 796 | return 0; |
| 797 | } | 797 | } |
| 798 | 798 | ||
| 799 | static unsigned int __devinit ata66_sis5513 (ide_hwif_t *hwif) | 799 | struct sis_laptop { |
| 800 | u16 device; | ||
| 801 | u16 subvendor; | ||
| 802 | u16 subdevice; | ||
| 803 | }; | ||
| 804 | |||
| 805 | static const struct sis_laptop sis_laptop[] = { | ||
| 806 | /* devid, subvendor, subdev */ | ||
| 807 | { 0x5513, 0x1043, 0x1107 }, /* ASUS A6K */ | ||
| 808 | /* end marker */ | ||
| 809 | { 0, } | ||
| 810 | }; | ||
| 811 | |||
| 812 | static u8 __devinit ata66_sis5513(ide_hwif_t *hwif) | ||
| 800 | { | 813 | { |
| 814 | struct pci_dev *pdev = hwif->pci_dev; | ||
| 815 | const struct sis_laptop *lap = &sis_laptop[0]; | ||
| 801 | u8 ata66 = 0; | 816 | u8 ata66 = 0; |
| 802 | 817 | ||
| 818 | while (lap->device) { | ||
| 819 | if (lap->device == pdev->device && | ||
| 820 | lap->subvendor == pdev->subsystem_vendor && | ||
| 821 | lap->subdevice == pdev->subsystem_device) | ||
| 822 | return ATA_CBL_PATA40_SHORT; | ||
| 823 | lap++; | ||
| 824 | } | ||
| 825 | |||
| 803 | if (chipset_family >= ATA_133) { | 826 | if (chipset_family >= ATA_133) { |
| 804 | u16 regw = 0; | 827 | u16 regw = 0; |
| 805 | u16 reg_addr = hwif->channel ? 0x52: 0x50; | 828 | u16 reg_addr = hwif->channel ? 0x52: 0x50; |
| @@ -811,7 +834,8 @@ static unsigned int __devinit ata66_sis5513 (ide_hwif_t *hwif) | |||
| 811 | pci_read_config_byte(hwif->pci_dev, 0x48, ®48h); | 834 | pci_read_config_byte(hwif->pci_dev, 0x48, ®48h); |
| 812 | ata66 = (reg48h & mask) ? 0 : 1; | 835 | ata66 = (reg48h & mask) ? 0 : 1; |
| 813 | } | 836 | } |
| 814 | return ata66; | 837 | |
| 838 | return ata66 ? ATA_CBL_PATA80 : ATA_CBL_PATA40; | ||
| 815 | } | 839 | } |
| 816 | 840 | ||
| 817 | static void __devinit init_hwif_sis5513 (ide_hwif_t *hwif) | 841 | static void __devinit init_hwif_sis5513 (ide_hwif_t *hwif) |
| @@ -841,8 +865,8 @@ static void __devinit init_hwif_sis5513 (ide_hwif_t *hwif) | |||
| 841 | if (!chipset_family) | 865 | if (!chipset_family) |
| 842 | return; | 866 | return; |
| 843 | 867 | ||
| 844 | if (!(hwif->udma_four)) | 868 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 845 | hwif->udma_four = ata66_sis5513(hwif); | 869 | hwif->cbl = ata66_sis5513(hwif); |
| 846 | 870 | ||
| 847 | if (chipset_family > ATA_16) { | 871 | if (chipset_family > ATA_16) { |
| 848 | hwif->ide_dma_check = &sis5513_config_xfer_rate; | 872 | hwif->ide_dma_check = &sis5513_config_xfer_rate; |
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c index 7c383d9cc472..487879842af4 100644 --- a/drivers/ide/pci/sl82c105.c +++ b/drivers/ide/pci/sl82c105.c | |||
| @@ -195,7 +195,7 @@ static inline void sl82c105_reset_host(struct pci_dev *dev) | |||
| 195 | * This function is called when the IDE timer expires, the drive | 195 | * This function is called when the IDE timer expires, the drive |
| 196 | * indicates that it is READY, and we were waiting for DMA to complete. | 196 | * indicates that it is READY, and we were waiting for DMA to complete. |
| 197 | */ | 197 | */ |
| 198 | static int sl82c105_ide_dma_lostirq(ide_drive_t *drive) | 198 | static void sl82c105_dma_lost_irq(ide_drive_t *drive) |
| 199 | { | 199 | { |
| 200 | ide_hwif_t *hwif = HWIF(drive); | 200 | ide_hwif_t *hwif = HWIF(drive); |
| 201 | struct pci_dev *dev = hwif->pci_dev; | 201 | struct pci_dev *dev = hwif->pci_dev; |
| @@ -222,9 +222,6 @@ static int sl82c105_ide_dma_lostirq(ide_drive_t *drive) | |||
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | sl82c105_reset_host(dev); | 224 | sl82c105_reset_host(dev); |
| 225 | |||
| 226 | /* __ide_dma_lostirq would return 1, so we do as well */ | ||
| 227 | return 1; | ||
| 228 | } | 225 | } |
| 229 | 226 | ||
| 230 | /* | 227 | /* |
| @@ -244,15 +241,12 @@ static void sl82c105_dma_start(ide_drive_t *drive) | |||
| 244 | ide_dma_start(drive); | 241 | ide_dma_start(drive); |
| 245 | } | 242 | } |
| 246 | 243 | ||
| 247 | static int sl82c105_ide_dma_timeout(ide_drive_t *drive) | 244 | static void sl82c105_dma_timeout(ide_drive_t *drive) |
| 248 | { | 245 | { |
| 249 | ide_hwif_t *hwif = HWIF(drive); | 246 | DBG(("sl82c105_dma_timeout(drive:%s)\n", drive->name)); |
| 250 | struct pci_dev *dev = hwif->pci_dev; | ||
| 251 | 247 | ||
| 252 | DBG(("sl82c105_ide_dma_timeout(drive:%s)\n", drive->name)); | 248 | sl82c105_reset_host(HWIF(drive)->pci_dev); |
| 253 | 249 | ide_dma_timeout(drive); | |
| 254 | sl82c105_reset_host(dev); | ||
| 255 | return __ide_dma_timeout(drive); | ||
| 256 | } | 250 | } |
| 257 | 251 | ||
| 258 | static int sl82c105_ide_dma_on(ide_drive_t *drive) | 252 | static int sl82c105_ide_dma_on(ide_drive_t *drive) |
| @@ -441,9 +435,9 @@ static void __devinit init_hwif_sl82c105(ide_hwif_t *hwif) | |||
| 441 | hwif->ide_dma_check = &sl82c105_ide_dma_check; | 435 | hwif->ide_dma_check = &sl82c105_ide_dma_check; |
| 442 | hwif->ide_dma_on = &sl82c105_ide_dma_on; | 436 | hwif->ide_dma_on = &sl82c105_ide_dma_on; |
| 443 | hwif->dma_off_quietly = &sl82c105_dma_off_quietly; | 437 | hwif->dma_off_quietly = &sl82c105_dma_off_quietly; |
| 444 | hwif->ide_dma_lostirq = &sl82c105_ide_dma_lostirq; | 438 | hwif->dma_lost_irq = &sl82c105_dma_lost_irq; |
| 445 | hwif->dma_start = &sl82c105_dma_start; | 439 | hwif->dma_start = &sl82c105_dma_start; |
| 446 | hwif->ide_dma_timeout = &sl82c105_ide_dma_timeout; | 440 | hwif->dma_timeout = &sl82c105_dma_timeout; |
| 447 | 441 | ||
| 448 | if (!noautodma) | 442 | if (!noautodma) |
| 449 | hwif->autodma = 1; | 443 | hwif->autodma = 1; |
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c index c40f291f91e0..575dbbd8b482 100644 --- a/drivers/ide/pci/slc90e66.c +++ b/drivers/ide/pci/slc90e66.c | |||
| @@ -199,10 +199,9 @@ static void __devinit init_hwif_slc90e66 (ide_hwif_t *hwif) | |||
| 199 | hwif->mwdma_mask = 0x06; | 199 | hwif->mwdma_mask = 0x06; |
| 200 | hwif->swdma_mask = 0x04; | 200 | hwif->swdma_mask = 0x04; |
| 201 | 201 | ||
| 202 | if (!hwif->udma_four) { | 202 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 203 | /* bit[0(1)]: 0:80, 1:40 */ | 203 | /* bit[0(1)]: 0:80, 1:40 */ |
| 204 | hwif->udma_four = (reg47 & mask) ? 0 : 1; | 204 | hwif->cbl = (reg47 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80; |
| 205 | } | ||
| 206 | 205 | ||
| 207 | hwif->ide_dma_check = &slc90e66_config_drive_xfer_rate; | 206 | hwif->ide_dma_check = &slc90e66_config_drive_xfer_rate; |
| 208 | 207 | ||
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c index cee619bb2eaf..8de1f8e22494 100644 --- a/drivers/ide/pci/tc86c001.c +++ b/drivers/ide/pci/tc86c001.c | |||
| @@ -220,13 +220,13 @@ static void __devinit init_hwif_tc86c001(ide_hwif_t *hwif) | |||
| 220 | hwif->ide_dma_check = &tc86c001_config_drive_xfer_rate; | 220 | hwif->ide_dma_check = &tc86c001_config_drive_xfer_rate; |
| 221 | hwif->dma_start = &tc86c001_dma_start; | 221 | hwif->dma_start = &tc86c001_dma_start; |
| 222 | 222 | ||
| 223 | if (!hwif->udma_four) { | 223 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) { |
| 224 | /* | 224 | /* |
| 225 | * System Control 1 Register bit 13 (PDIAGN): | 225 | * System Control 1 Register bit 13 (PDIAGN): |
| 226 | * 0=80-pin cable, 1=40-pin cable | 226 | * 0=80-pin cable, 1=40-pin cable |
| 227 | */ | 227 | */ |
| 228 | scr1 = hwif->INW(sc_base + 0x00); | 228 | scr1 = hwif->INW(sc_base + 0x00); |
| 229 | hwif->udma_four = (scr1 & 0x2000) ? 0 : 1; | 229 | hwif->cbl = (scr1 & 0x2000) ? ATA_CBL_PATA40 : ATA_CBL_PATA80; |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | if (!noautodma) | 232 | if (!noautodma) |
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index a508550c4095..d21dd2e7eeb3 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * | 2 | * |
| 3 | * Version 3.38 | 3 | * Version 3.45 |
| 4 | * | 4 | * |
| 5 | * VIA IDE driver for Linux. Supported southbridges: | 5 | * VIA IDE driver for Linux. Supported southbridges: |
| 6 | * | 6 | * |
| @@ -9,6 +9,7 @@ | |||
| 9 | * vt8235, vt8237, vt8237a | 9 | * vt8235, vt8237, vt8237a |
| 10 | * | 10 | * |
| 11 | * Copyright (c) 2000-2002 Vojtech Pavlik | 11 | * Copyright (c) 2000-2002 Vojtech Pavlik |
| 12 | * Copyright (c) 2007 Bartlomiej Zolnierkiewicz | ||
| 12 | * | 13 | * |
| 13 | * Based on the work of: | 14 | * Based on the work of: |
| 14 | * Michel Aubry | 15 | * Michel Aubry |
| @@ -33,6 +34,8 @@ | |||
| 33 | #include <linux/pci.h> | 34 | #include <linux/pci.h> |
| 34 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 35 | #include <linux/ide.h> | 36 | #include <linux/ide.h> |
| 37 | #include <linux/dmi.h> | ||
| 38 | |||
| 36 | #include <asm/io.h> | 39 | #include <asm/io.h> |
| 37 | 40 | ||
| 38 | #ifdef CONFIG_PPC_CHRP | 41 | #ifdef CONFIG_PPC_CHRP |
| @@ -41,8 +44,6 @@ | |||
| 41 | 44 | ||
| 42 | #include "ide-timing.h" | 45 | #include "ide-timing.h" |
| 43 | 46 | ||
| 44 | #define DISPLAY_VIA_TIMINGS | ||
| 45 | |||
| 46 | #define VIA_IDE_ENABLE 0x40 | 47 | #define VIA_IDE_ENABLE 0x40 |
| 47 | #define VIA_IDE_CONFIG 0x41 | 48 | #define VIA_IDE_CONFIG 0x41 |
| 48 | #define VIA_FIFO_CONFIG 0x43 | 49 | #define VIA_FIFO_CONFIG 0x43 |
| @@ -54,18 +55,12 @@ | |||
| 54 | #define VIA_ADDRESS_SETUP 0x4c | 55 | #define VIA_ADDRESS_SETUP 0x4c |
| 55 | #define VIA_UDMA_TIMING 0x50 | 56 | #define VIA_UDMA_TIMING 0x50 |
| 56 | 57 | ||
| 57 | #define VIA_UDMA 0x007 | 58 | #define VIA_BAD_PREQ 0x01 /* Crashes if PREQ# till DDACK# set */ |
| 58 | #define VIA_UDMA_NONE 0x000 | 59 | #define VIA_BAD_CLK66 0x02 /* 66 MHz clock doesn't work correctly */ |
| 59 | #define VIA_UDMA_33 0x001 | 60 | #define VIA_SET_FIFO 0x04 /* Needs to have FIFO split set */ |
| 60 | #define VIA_UDMA_66 0x002 | 61 | #define VIA_NO_UNMASK 0x08 /* Doesn't work with IRQ unmasking on */ |
| 61 | #define VIA_UDMA_100 0x003 | 62 | #define VIA_BAD_ID 0x10 /* Has wrong vendor ID (0x1107) */ |
| 62 | #define VIA_UDMA_133 0x004 | 63 | #define VIA_BAD_AST 0x20 /* Don't touch Address Setup Timing */ |
| 63 | #define VIA_BAD_PREQ 0x010 /* Crashes if PREQ# till DDACK# set */ | ||
| 64 | #define VIA_BAD_CLK66 0x020 /* 66 MHz clock doesn't work correctly */ | ||
| 65 | #define VIA_SET_FIFO 0x040 /* Needs to have FIFO split set */ | ||
| 66 | #define VIA_NO_UNMASK 0x080 /* Doesn't work with IRQ unmasking on */ | ||
| 67 | #define VIA_BAD_ID 0x100 /* Has wrong vendor ID (0x1107) */ | ||
| 68 | #define VIA_BAD_AST 0x200 /* Don't touch Address Setup Timing */ | ||
| 69 | 64 | ||
| 70 | /* | 65 | /* |
| 71 | * VIA SouthBridge chips. | 66 | * VIA SouthBridge chips. |
| @@ -76,36 +71,37 @@ static struct via_isa_bridge { | |||
| 76 | u16 id; | 71 | u16 id; |
| 77 | u8 rev_min; | 72 | u8 rev_min; |
| 78 | u8 rev_max; | 73 | u8 rev_max; |
| 79 | u16 flags; | 74 | u8 udma_mask; |
| 75 | u8 flags; | ||
| 80 | } via_isa_bridges[] = { | 76 | } via_isa_bridges[] = { |
| 81 | { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 77 | { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 82 | { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 78 | { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 83 | { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 79 | { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 84 | { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 80 | { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 85 | { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 81 | { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 86 | { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 82 | { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 87 | { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 83 | { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 88 | { "vt8233a", PCI_DEVICE_ID_VIA_8233A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, | 84 | { "vt8233a", PCI_DEVICE_ID_VIA_8233A, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, |
| 89 | { "vt8233c", PCI_DEVICE_ID_VIA_8233C_0, 0x00, 0x2f, VIA_UDMA_100 }, | 85 | { "vt8233c", PCI_DEVICE_ID_VIA_8233C_0, 0x00, 0x2f, ATA_UDMA5, }, |
| 90 | { "vt8233", PCI_DEVICE_ID_VIA_8233_0, 0x00, 0x2f, VIA_UDMA_100 }, | 86 | { "vt8233", PCI_DEVICE_ID_VIA_8233_0, 0x00, 0x2f, ATA_UDMA5, }, |
| 91 | { "vt8231", PCI_DEVICE_ID_VIA_8231, 0x00, 0x2f, VIA_UDMA_100 }, | 87 | { "vt8231", PCI_DEVICE_ID_VIA_8231, 0x00, 0x2f, ATA_UDMA5, }, |
| 92 | { "vt82c686b", PCI_DEVICE_ID_VIA_82C686, 0x40, 0x4f, VIA_UDMA_100 }, | 88 | { "vt82c686b", PCI_DEVICE_ID_VIA_82C686, 0x40, 0x4f, ATA_UDMA5, }, |
| 93 | { "vt82c686a", PCI_DEVICE_ID_VIA_82C686, 0x10, 0x2f, VIA_UDMA_66 }, | 89 | { "vt82c686a", PCI_DEVICE_ID_VIA_82C686, 0x10, 0x2f, ATA_UDMA4, }, |
| 94 | { "vt82c686", PCI_DEVICE_ID_VIA_82C686, 0x00, 0x0f, VIA_UDMA_33 | VIA_BAD_CLK66 }, | 90 | { "vt82c686", PCI_DEVICE_ID_VIA_82C686, 0x00, 0x0f, ATA_UDMA2, VIA_BAD_CLK66 }, |
| 95 | { "vt82c596b", PCI_DEVICE_ID_VIA_82C596, 0x10, 0x2f, VIA_UDMA_66 }, | 91 | { "vt82c596b", PCI_DEVICE_ID_VIA_82C596, 0x10, 0x2f, ATA_UDMA4, }, |
| 96 | { "vt82c596a", PCI_DEVICE_ID_VIA_82C596, 0x00, 0x0f, VIA_UDMA_33 | VIA_BAD_CLK66 }, | 92 | { "vt82c596a", PCI_DEVICE_ID_VIA_82C596, 0x00, 0x0f, ATA_UDMA2, VIA_BAD_CLK66 }, |
| 97 | { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x47, 0x4f, VIA_UDMA_33 | VIA_SET_FIFO }, | 93 | { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x47, 0x4f, ATA_UDMA2, VIA_SET_FIFO }, |
| 98 | { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x40, 0x46, VIA_UDMA_33 | VIA_SET_FIFO | VIA_BAD_PREQ }, | 94 | { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x40, 0x46, ATA_UDMA2, VIA_SET_FIFO | VIA_BAD_PREQ }, |
| 99 | { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x30, 0x3f, VIA_UDMA_33 | VIA_SET_FIFO }, | 95 | { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x30, 0x3f, ATA_UDMA2, VIA_SET_FIFO }, |
| 100 | { "vt82c586a", PCI_DEVICE_ID_VIA_82C586_0, 0x20, 0x2f, VIA_UDMA_33 | VIA_SET_FIFO }, | 96 | { "vt82c586a", PCI_DEVICE_ID_VIA_82C586_0, 0x20, 0x2f, ATA_UDMA2, VIA_SET_FIFO }, |
| 101 | { "vt82c586", PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f, VIA_UDMA_NONE | VIA_SET_FIFO }, | 97 | { "vt82c586", PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f, 0x00, VIA_SET_FIFO }, |
| 102 | { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK }, | 98 | { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, 0x00, VIA_SET_FIFO | VIA_NO_UNMASK }, |
| 103 | { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID }, | 99 | { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, 0x00, VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID }, |
| 104 | { NULL } | 100 | { NULL } |
| 105 | }; | 101 | }; |
| 106 | 102 | ||
| 107 | static unsigned int via_clock; | 103 | static unsigned int via_clock; |
| 108 | static char *via_dma[] = { "MWDMA16", "UDMA33", "UDMA66", "UDMA100", "UDMA133" }; | 104 | static char *via_dma[] = { "16", "25", "33", "44", "66", "100", "133" }; |
| 109 | 105 | ||
| 110 | struct via82cxxx_dev | 106 | struct via82cxxx_dev |
| 111 | { | 107 | { |
| @@ -140,12 +136,12 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) | |||
| 140 | pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn), | 136 | pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn), |
| 141 | ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1)); | 137 | ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1)); |
| 142 | 138 | ||
| 143 | switch (vdev->via_config->flags & VIA_UDMA) { | 139 | switch (vdev->via_config->udma_mask) { |
| 144 | case VIA_UDMA_33: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break; | 140 | case ATA_UDMA2: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break; |
| 145 | case VIA_UDMA_66: t = timing->udma ? (0xe8 | (FIT(timing->udma, 2, 9) - 2)) : 0x0f; break; | 141 | case ATA_UDMA4: t = timing->udma ? (0xe8 | (FIT(timing->udma, 2, 9) - 2)) : 0x0f; break; |
| 146 | case VIA_UDMA_100: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break; | 142 | case ATA_UDMA5: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break; |
| 147 | case VIA_UDMA_133: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break; | 143 | case ATA_UDMA6: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break; |
| 148 | default: return; | 144 | default: return; |
| 149 | } | 145 | } |
| 150 | 146 | ||
| 151 | pci_write_config_byte(dev, VIA_UDMA_TIMING + (3 - dn), t); | 147 | pci_write_config_byte(dev, VIA_UDMA_TIMING + (3 - dn), t); |
| @@ -173,12 +169,12 @@ static int via_set_drive(ide_drive_t *drive, u8 speed) | |||
| 173 | 169 | ||
| 174 | T = 1000000000 / via_clock; | 170 | T = 1000000000 / via_clock; |
| 175 | 171 | ||
| 176 | switch (vdev->via_config->flags & VIA_UDMA) { | 172 | switch (vdev->via_config->udma_mask) { |
| 177 | case VIA_UDMA_33: UT = T; break; | 173 | case ATA_UDMA2: UT = T; break; |
| 178 | case VIA_UDMA_66: UT = T/2; break; | 174 | case ATA_UDMA4: UT = T/2; break; |
| 179 | case VIA_UDMA_100: UT = T/3; break; | 175 | case ATA_UDMA5: UT = T/3; break; |
| 180 | case VIA_UDMA_133: UT = T/4; break; | 176 | case ATA_UDMA6: UT = T/4; break; |
| 181 | default: UT = T; | 177 | default: UT = T; |
| 182 | } | 178 | } |
| 183 | 179 | ||
| 184 | ide_timing_compute(drive, speed, &t, T, UT); | 180 | ide_timing_compute(drive, speed, &t, T, UT); |
| @@ -208,8 +204,7 @@ static int via_set_drive(ide_drive_t *drive, u8 speed) | |||
| 208 | static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio) | 204 | static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio) |
| 209 | { | 205 | { |
| 210 | if (pio == 255) { | 206 | if (pio == 255) { |
| 211 | via_set_drive(drive, | 207 | via_set_drive(drive, ide_find_best_pio_mode(drive)); |
| 212 | ide_find_best_mode(drive, XFER_PIO | XFER_EPIO)); | ||
| 213 | return; | 208 | return; |
| 214 | } | 209 | } |
| 215 | 210 | ||
| @@ -226,16 +221,10 @@ static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio) | |||
| 226 | 221 | ||
| 227 | static int via82cxxx_ide_dma_check (ide_drive_t *drive) | 222 | static int via82cxxx_ide_dma_check (ide_drive_t *drive) |
| 228 | { | 223 | { |
| 229 | ide_hwif_t *hwif = HWIF(drive); | 224 | u8 speed = ide_max_dma_mode(drive); |
| 230 | struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); | ||
| 231 | u16 w80 = hwif->udma_four; | ||
| 232 | 225 | ||
| 233 | u16 speed = ide_find_best_mode(drive, | 226 | if (speed == 0) |
| 234 | XFER_PIO | XFER_EPIO | XFER_SWDMA | XFER_MWDMA | | 227 | speed = ide_find_best_pio_mode(drive); |
| 235 | (vdev->via_config->flags & VIA_UDMA ? XFER_UDMA : 0) | | ||
| 236 | (w80 && (vdev->via_config->flags & VIA_UDMA) >= VIA_UDMA_66 ? XFER_UDMA_66 : 0) | | ||
| 237 | (w80 && (vdev->via_config->flags & VIA_UDMA) >= VIA_UDMA_100 ? XFER_UDMA_100 : 0) | | ||
| 238 | (w80 && (vdev->via_config->flags & VIA_UDMA) >= VIA_UDMA_133 ? XFER_UDMA_133 : 0)); | ||
| 239 | 228 | ||
| 240 | via_set_drive(drive, speed); | 229 | via_set_drive(drive, speed); |
| 241 | 230 | ||
| @@ -272,8 +261,8 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) | |||
| 272 | { | 261 | { |
| 273 | int i; | 262 | int i; |
| 274 | 263 | ||
| 275 | switch (vdev->via_config->flags & VIA_UDMA) { | 264 | switch (vdev->via_config->udma_mask) { |
| 276 | case VIA_UDMA_66: | 265 | case ATA_UDMA4: |
| 277 | for (i = 24; i >= 0; i -= 8) | 266 | for (i = 24; i >= 0; i -= 8) |
| 278 | if (((u >> (i & 16)) & 8) && | 267 | if (((u >> (i & 16)) & 8) && |
| 279 | ((u >> i) & 0x20) && | 268 | ((u >> i) & 0x20) && |
| @@ -286,7 +275,7 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) | |||
| 286 | } | 275 | } |
| 287 | break; | 276 | break; |
| 288 | 277 | ||
| 289 | case VIA_UDMA_100: | 278 | case ATA_UDMA5: |
| 290 | for (i = 24; i >= 0; i -= 8) | 279 | for (i = 24; i >= 0; i -= 8) |
| 291 | if (((u >> i) & 0x10) || | 280 | if (((u >> i) & 0x10) || |
| 292 | (((u >> i) & 0x20) && | 281 | (((u >> i) & 0x20) && |
| @@ -298,7 +287,7 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) | |||
| 298 | } | 287 | } |
| 299 | break; | 288 | break; |
| 300 | 289 | ||
| 301 | case VIA_UDMA_133: | 290 | case ATA_UDMA6: |
| 302 | for (i = 24; i >= 0; i -= 8) | 291 | for (i = 24; i >= 0; i -= 8) |
| 303 | if (((u >> i) & 0x10) || | 292 | if (((u >> i) & 0x10) || |
| 304 | (((u >> i) & 0x20) && | 293 | (((u >> i) & 0x20) && |
| @@ -353,7 +342,7 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const | |||
| 353 | 342 | ||
| 354 | via_cable_detect(vdev, u); | 343 | via_cable_detect(vdev, u); |
| 355 | 344 | ||
| 356 | if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) { | 345 | if (via_config->udma_mask == ATA_UDMA4) { |
| 357 | /* Enable Clk66 */ | 346 | /* Enable Clk66 */ |
| 358 | pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008); | 347 | pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008); |
| 359 | } else if (via_config->flags & VIA_BAD_CLK66) { | 348 | } else if (via_config->flags & VIA_BAD_CLK66) { |
| @@ -416,16 +405,54 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const | |||
| 416 | */ | 405 | */ |
| 417 | 406 | ||
| 418 | pci_read_config_byte(isa, PCI_REVISION_ID, &t); | 407 | pci_read_config_byte(isa, PCI_REVISION_ID, &t); |
| 419 | printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %s " | 408 | printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %sDMA%s " |
| 420 | "controller on pci%s\n", | 409 | "controller on pci%s\n", |
| 421 | via_config->name, t, | 410 | via_config->name, t, |
| 422 | via_dma[via_config->flags & VIA_UDMA], | 411 | via_config->udma_mask ? "U" : "MW", |
| 412 | via_dma[via_config->udma_mask ? | ||
| 413 | (fls(via_config->udma_mask) - 1) : 0], | ||
| 423 | pci_name(dev)); | 414 | pci_name(dev)); |
| 424 | 415 | ||
| 425 | pci_dev_put(isa); | 416 | pci_dev_put(isa); |
| 426 | return 0; | 417 | return 0; |
| 427 | } | 418 | } |
| 428 | 419 | ||
| 420 | /* | ||
| 421 | * Cable special cases | ||
| 422 | */ | ||
| 423 | |||
| 424 | static struct dmi_system_id cable_dmi_table[] = { | ||
| 425 | { | ||
| 426 | .ident = "Acer Ferrari 3400", | ||
| 427 | .matches = { | ||
| 428 | DMI_MATCH(DMI_BOARD_VENDOR, "Acer,Inc."), | ||
| 429 | DMI_MATCH(DMI_BOARD_NAME, "Ferrari 3400"), | ||
| 430 | }, | ||
| 431 | }, | ||
| 432 | { } | ||
| 433 | }; | ||
| 434 | |||
| 435 | static int via_cable_override(void) | ||
| 436 | { | ||
| 437 | /* Systems by DMI */ | ||
| 438 | if (dmi_check_system(cable_dmi_table)) | ||
| 439 | return 1; | ||
| 440 | return 0; | ||
| 441 | } | ||
| 442 | |||
| 443 | static u8 __devinit via82cxxx_cable_detect(ide_hwif_t *hwif) | ||
| 444 | { | ||
| 445 | struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); | ||
| 446 | |||
| 447 | if (via_cable_override()) | ||
| 448 | return ATA_CBL_PATA40_SHORT; | ||
| 449 | |||
| 450 | if ((vdev->via_80w >> hwif->channel) & 1) | ||
| 451 | return ATA_CBL_PATA80; | ||
| 452 | else | ||
| 453 | return ATA_CBL_PATA40; | ||
| 454 | } | ||
| 455 | |||
| 429 | static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif) | 456 | static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif) |
| 430 | { | 457 | { |
| 431 | struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); | 458 | struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev); |
| @@ -454,12 +481,14 @@ static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif) | |||
| 454 | return; | 481 | return; |
| 455 | 482 | ||
| 456 | hwif->atapi_dma = 1; | 483 | hwif->atapi_dma = 1; |
| 457 | hwif->ultra_mask = 0x7f; | 484 | |
| 485 | hwif->ultra_mask = vdev->via_config->udma_mask; | ||
| 458 | hwif->mwdma_mask = 0x07; | 486 | hwif->mwdma_mask = 0x07; |
| 459 | hwif->swdma_mask = 0x07; | 487 | hwif->swdma_mask = 0x07; |
| 460 | 488 | ||
| 461 | if (!hwif->udma_four) | 489 | if (hwif->cbl != ATA_CBL_PATA40_SHORT) |
| 462 | hwif->udma_four = (vdev->via_80w >> hwif->channel) & 1; | 490 | hwif->cbl = via82cxxx_cable_detect(hwif); |
| 491 | |||
| 463 | hwif->ide_dma_check = &via82cxxx_ide_dma_check; | 492 | hwif->ide_dma_check = &via82cxxx_ide_dma_check; |
| 464 | if (!noautodma) | 493 | if (!noautodma) |
| 465 | hwif->autodma = 1; | 494 | hwif->autodma = 1; |
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 45fc36f0f219..e46f47206542 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c | |||
| @@ -942,8 +942,8 @@ pmac_ide_tune_chipset (ide_drive_t *drive, byte speed) | |||
| 942 | return 1; | 942 | return 1; |
| 943 | case XFER_UDMA_4: | 943 | case XFER_UDMA_4: |
| 944 | case XFER_UDMA_3: | 944 | case XFER_UDMA_3: |
| 945 | if (HWIF(drive)->udma_four == 0) | 945 | if (drive->hwif->cbl != ATA_CBL_PATA80) |
| 946 | return 1; | 946 | return 1; |
| 947 | case XFER_UDMA_2: | 947 | case XFER_UDMA_2: |
| 948 | case XFER_UDMA_1: | 948 | case XFER_UDMA_1: |
| 949 | case XFER_UDMA_0: | 949 | case XFER_UDMA_0: |
| @@ -1244,7 +1244,7 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) | |||
| 1244 | hwif->chipset = ide_pmac; | 1244 | hwif->chipset = ide_pmac; |
| 1245 | hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET] || pmif->mediabay; | 1245 | hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET] || pmif->mediabay; |
| 1246 | hwif->hold = pmif->mediabay; | 1246 | hwif->hold = pmif->mediabay; |
| 1247 | hwif->udma_four = pmif->cable_80; | 1247 | hwif->cbl = pmif->cable_80 ? ATA_CBL_PATA80 : ATA_CBL_PATA40; |
| 1248 | hwif->drives[0].unmask = 1; | 1248 | hwif->drives[0].unmask = 1; |
| 1249 | hwif->drives[1].unmask = 1; | 1249 | hwif->drives[1].unmask = 1; |
| 1250 | hwif->tuneproc = pmac_ide_tuneproc; | 1250 | hwif->tuneproc = pmac_ide_tuneproc; |
| @@ -1821,28 +1821,11 @@ pmac_ide_dma_check(ide_drive_t *drive) | |||
| 1821 | enable = 0; | 1821 | enable = 0; |
| 1822 | 1822 | ||
| 1823 | if (enable) { | 1823 | if (enable) { |
| 1824 | short mode; | 1824 | u8 mode = ide_max_dma_mode(drive); |
| 1825 | 1825 | ||
| 1826 | map = XFER_MWDMA; | 1826 | if (mode >= XFER_UDMA_0) |
| 1827 | if (pmif->kind == controller_kl_ata4 | ||
| 1828 | || pmif->kind == controller_un_ata6 | ||
| 1829 | || pmif->kind == controller_k2_ata6 | ||
| 1830 | || pmif->kind == controller_sh_ata6) { | ||
| 1831 | map |= XFER_UDMA; | ||
| 1832 | if (pmif->cable_80) { | ||
| 1833 | map |= XFER_UDMA_66; | ||
| 1834 | if (pmif->kind == controller_un_ata6 || | ||
| 1835 | pmif->kind == controller_k2_ata6 || | ||
| 1836 | pmif->kind == controller_sh_ata6) | ||
| 1837 | map |= XFER_UDMA_100; | ||
| 1838 | if (pmif->kind == controller_sh_ata6) | ||
| 1839 | map |= XFER_UDMA_133; | ||
| 1840 | } | ||
| 1841 | } | ||
| 1842 | mode = ide_find_best_mode(drive, map); | ||
| 1843 | if (mode & XFER_UDMA) | ||
| 1844 | drive->using_dma = pmac_ide_udma_enable(drive, mode); | 1827 | drive->using_dma = pmac_ide_udma_enable(drive, mode); |
| 1845 | else if (mode & XFER_MWDMA) | 1828 | else if (mode >= XFER_MW_DMA_0) |
| 1846 | drive->using_dma = pmac_ide_mdma_enable(drive, mode); | 1829 | drive->using_dma = pmac_ide_mdma_enable(drive, mode); |
| 1847 | hwif->OUTB(0, IDE_CONTROL_REG); | 1830 | hwif->OUTB(0, IDE_CONTROL_REG); |
| 1848 | /* Apply settings to controller */ | 1831 | /* Apply settings to controller */ |
| @@ -2004,20 +1987,19 @@ static void pmac_ide_dma_host_on(ide_drive_t *drive) | |||
| 2004 | { | 1987 | { |
| 2005 | } | 1988 | } |
| 2006 | 1989 | ||
| 2007 | static int | 1990 | static void |
| 2008 | pmac_ide_dma_lostirq (ide_drive_t *drive) | 1991 | pmac_ide_dma_lost_irq (ide_drive_t *drive) |
| 2009 | { | 1992 | { |
| 2010 | pmac_ide_hwif_t* pmif = (pmac_ide_hwif_t *)HWIF(drive)->hwif_data; | 1993 | pmac_ide_hwif_t* pmif = (pmac_ide_hwif_t *)HWIF(drive)->hwif_data; |
| 2011 | volatile struct dbdma_regs __iomem *dma; | 1994 | volatile struct dbdma_regs __iomem *dma; |
| 2012 | unsigned long status; | 1995 | unsigned long status; |
| 2013 | 1996 | ||
| 2014 | if (pmif == NULL) | 1997 | if (pmif == NULL) |
| 2015 | return 0; | 1998 | return; |
| 2016 | dma = pmif->dma_regs; | 1999 | dma = pmif->dma_regs; |
| 2017 | 2000 | ||
| 2018 | status = readl(&dma->status); | 2001 | status = readl(&dma->status); |
| 2019 | printk(KERN_ERR "ide-pmac lost interrupt, dma status: %lx\n", status); | 2002 | printk(KERN_ERR "ide-pmac lost interrupt, dma status: %lx\n", status); |
| 2020 | return 0; | ||
| 2021 | } | 2003 | } |
| 2022 | 2004 | ||
| 2023 | /* | 2005 | /* |
| @@ -2057,8 +2039,8 @@ pmac_ide_setup_dma(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) | |||
| 2057 | hwif->ide_dma_test_irq = &pmac_ide_dma_test_irq; | 2039 | hwif->ide_dma_test_irq = &pmac_ide_dma_test_irq; |
| 2058 | hwif->dma_host_off = &pmac_ide_dma_host_off; | 2040 | hwif->dma_host_off = &pmac_ide_dma_host_off; |
| 2059 | hwif->dma_host_on = &pmac_ide_dma_host_on; | 2041 | hwif->dma_host_on = &pmac_ide_dma_host_on; |
| 2060 | hwif->ide_dma_timeout = &__ide_dma_timeout; | 2042 | hwif->dma_timeout = &ide_dma_timeout; |
| 2061 | hwif->ide_dma_lostirq = &pmac_ide_dma_lostirq; | 2043 | hwif->dma_lost_irq = &pmac_ide_dma_lost_irq; |
| 2062 | 2044 | ||
| 2063 | hwif->atapi_dma = 1; | 2045 | hwif->atapi_dma = 1; |
| 2064 | switch(pmif->kind) { | 2046 | switch(pmif->kind) { |
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h index 79494c4f2b10..fa92f7f1d0d0 100644 --- a/fs/jfs/endian24.h +++ b/fs/jfs/endian24.h | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | __u32 __x = (x); \ | 29 | __u32 __x = (x); \ |
| 30 | ((__u32)( \ | 30 | ((__u32)( \ |
| 31 | ((__x & (__u32)0x000000ffUL) << 16) | \ | 31 | ((__x & (__u32)0x000000ffUL) << 16) | \ |
| 32 | (__x & (__u32)0x0000ff00UL) | \ | 32 | (__x & (__u32)0x0000ff00UL) | \ |
| 33 | ((__x & (__u32)0x00ff0000UL) >> 16) )); \ | 33 | ((__x & (__u32)0x00ff0000UL) >> 16) )); \ |
| 34 | }) | 34 | }) |
| 35 | 35 | ||
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 9c5d59632aac..887f5759e536 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c | |||
| @@ -26,34 +26,6 @@ | |||
| 26 | #include "jfs_filsys.h" | 26 | #include "jfs_filsys.h" |
| 27 | #include "jfs_debug.h" | 27 | #include "jfs_debug.h" |
| 28 | 28 | ||
| 29 | #ifdef CONFIG_JFS_DEBUG | ||
| 30 | void dump_mem(char *label, void *data, int length) | ||
| 31 | { | ||
| 32 | int i, j; | ||
| 33 | int *intptr = data; | ||
| 34 | char *charptr = data; | ||
| 35 | char buf[10], line[80]; | ||
| 36 | |||
| 37 | printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length, | ||
| 38 | data); | ||
| 39 | for (i = 0; i < length; i += 16) { | ||
| 40 | line[0] = 0; | ||
| 41 | for (j = 0; (j < 4) && (i + j * 4 < length); j++) { | ||
| 42 | sprintf(buf, " %08x", intptr[i / 4 + j]); | ||
| 43 | strcat(line, buf); | ||
| 44 | } | ||
| 45 | buf[0] = ' '; | ||
| 46 | buf[2] = 0; | ||
| 47 | for (j = 0; (j < 16) && (i + j < length); j++) { | ||
| 48 | buf[1] = | ||
| 49 | isprint(charptr[i + j]) ? charptr[i + j] : '.'; | ||
| 50 | strcat(line, buf); | ||
| 51 | } | ||
| 52 | printk("%s\n", line); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | #endif | ||
| 56 | |||
| 57 | #ifdef PROC_FS_JFS /* see jfs_debug.h */ | 29 | #ifdef PROC_FS_JFS /* see jfs_debug.h */ |
| 58 | 30 | ||
| 59 | static struct proc_dir_entry *base; | 31 | static struct proc_dir_entry *base; |
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index 7378798f0b21..044c1e654cc0 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h | |||
| @@ -62,7 +62,6 @@ extern void jfs_proc_clean(void); | |||
| 62 | 62 | ||
| 63 | extern int jfsloglevel; | 63 | extern int jfsloglevel; |
| 64 | 64 | ||
| 65 | extern void dump_mem(char *label, void *data, int length); | ||
| 66 | extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); | 65 | extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); |
| 67 | 66 | ||
| 68 | /* information message: e.g., configuration, major event */ | 67 | /* information message: e.g., configuration, major event */ |
| @@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); | |||
| 94 | * --------- | 93 | * --------- |
| 95 | */ | 94 | */ |
| 96 | #else /* CONFIG_JFS_DEBUG */ | 95 | #else /* CONFIG_JFS_DEBUG */ |
| 97 | #define dump_mem(label,data,length) do {} while (0) | ||
| 98 | #define ASSERT(p) do {} while (0) | 96 | #define ASSERT(p) do {} while (0) |
| 99 | #define jfs_info(fmt, arg...) do {} while (0) | 97 | #define jfs_info(fmt, arg...) do {} while (0) |
| 100 | #define jfs_debug(fmt, arg...) do {} while (0) | 98 | #define jfs_debug(fmt, arg...) do {} while (0) |
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 40b20111383c..c387540d3425 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h | |||
| @@ -19,23 +19,23 @@ | |||
| 19 | #define _H_JFS_DINODE | 19 | #define _H_JFS_DINODE |
| 20 | 20 | ||
| 21 | /* | 21 | /* |
| 22 | * jfs_dinode.h: on-disk inode manager | 22 | * jfs_dinode.h: on-disk inode manager |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #define INODESLOTSIZE 128 | 25 | #define INODESLOTSIZE 128 |
| 26 | #define L2INODESLOTSIZE 7 | 26 | #define L2INODESLOTSIZE 7 |
| 27 | #define log2INODESIZE 9 /* log2(bytes per dinode) */ | 27 | #define log2INODESIZE 9 /* log2(bytes per dinode) */ |
| 28 | 28 | ||
| 29 | 29 | ||
| 30 | /* | 30 | /* |
| 31 | * on-disk inode : 512 bytes | 31 | * on-disk inode : 512 bytes |
| 32 | * | 32 | * |
| 33 | * note: align 64-bit fields on 8-byte boundary. | 33 | * note: align 64-bit fields on 8-byte boundary. |
| 34 | */ | 34 | */ |
| 35 | struct dinode { | 35 | struct dinode { |
| 36 | /* | 36 | /* |
| 37 | * I. base area (128 bytes) | 37 | * I. base area (128 bytes) |
| 38 | * ------------------------ | 38 | * ------------------------ |
| 39 | * | 39 | * |
| 40 | * define generic/POSIX attributes | 40 | * define generic/POSIX attributes |
| 41 | */ | 41 | */ |
| @@ -70,16 +70,16 @@ struct dinode { | |||
| 70 | __le32 di_acltype; /* 4: Type of ACL */ | 70 | __le32 di_acltype; /* 4: Type of ACL */ |
| 71 | 71 | ||
| 72 | /* | 72 | /* |
| 73 | * Extension Areas. | 73 | * Extension Areas. |
| 74 | * | 74 | * |
| 75 | * Historically, the inode was partitioned into 4 128-byte areas, | 75 | * Historically, the inode was partitioned into 4 128-byte areas, |
| 76 | * the last 3 being defined as unions which could have multiple | 76 | * the last 3 being defined as unions which could have multiple |
| 77 | * uses. The first 96 bytes had been completely unused until | 77 | * uses. The first 96 bytes had been completely unused until |
| 78 | * an index table was added to the directory. It is now more | 78 | * an index table was added to the directory. It is now more |
| 79 | * useful to describe the last 3/4 of the inode as a single | 79 | * useful to describe the last 3/4 of the inode as a single |
| 80 | * union. We would probably be better off redesigning the | 80 | * union. We would probably be better off redesigning the |
| 81 | * entire structure from scratch, but we don't want to break | 81 | * entire structure from scratch, but we don't want to break |
| 82 | * commonality with OS/2's JFS at this time. | 82 | * commonality with OS/2's JFS at this time. |
| 83 | */ | 83 | */ |
| 84 | union { | 84 | union { |
| 85 | struct { | 85 | struct { |
| @@ -95,7 +95,7 @@ struct dinode { | |||
| 95 | } _dir; /* (384) */ | 95 | } _dir; /* (384) */ |
| 96 | #define di_dirtable u._dir._table | 96 | #define di_dirtable u._dir._table |
| 97 | #define di_dtroot u._dir._dtroot | 97 | #define di_dtroot u._dir._dtroot |
| 98 | #define di_parent di_dtroot.header.idotdot | 98 | #define di_parent di_dtroot.header.idotdot |
| 99 | #define di_DASD di_dtroot.header.DASD | 99 | #define di_DASD di_dtroot.header.DASD |
| 100 | 100 | ||
| 101 | struct { | 101 | struct { |
| @@ -127,14 +127,14 @@ struct dinode { | |||
| 127 | #define di_inlinedata u._file._u2._special._u | 127 | #define di_inlinedata u._file._u2._special._u |
| 128 | #define di_rdev u._file._u2._special._u._rdev | 128 | #define di_rdev u._file._u2._special._u._rdev |
| 129 | #define di_fastsymlink u._file._u2._special._u._fastsymlink | 129 | #define di_fastsymlink u._file._u2._special._u._fastsymlink |
| 130 | #define di_inlineea u._file._u2._special._inlineea | 130 | #define di_inlineea u._file._u2._special._inlineea |
| 131 | } u; | 131 | } u; |
| 132 | }; | 132 | }; |
| 133 | 133 | ||
| 134 | /* extended mode bits (on-disk inode di_mode) */ | 134 | /* extended mode bits (on-disk inode di_mode) */ |
| 135 | #define IFJOURNAL 0x00010000 /* journalled file */ | 135 | #define IFJOURNAL 0x00010000 /* journalled file */ |
| 136 | #define ISPARSE 0x00020000 /* sparse file enabled */ | 136 | #define ISPARSE 0x00020000 /* sparse file enabled */ |
| 137 | #define INLINEEA 0x00040000 /* inline EA area free */ | 137 | #define INLINEEA 0x00040000 /* inline EA area free */ |
| 138 | #define ISWAPFILE 0x00800000 /* file open for pager swap space */ | 138 | #define ISWAPFILE 0x00800000 /* file open for pager swap space */ |
| 139 | 139 | ||
| 140 | /* more extended mode bits: attributes for OS/2 */ | 140 | /* more extended mode bits: attributes for OS/2 */ |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index f3b1ebb22280..e1985066b1c6 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
| @@ -154,12 +154,12 @@ static const s8 budtab[256] = { | |||
| 154 | * the in-core descriptor is initialized from disk. | 154 | * the in-core descriptor is initialized from disk. |
| 155 | * | 155 | * |
| 156 | * PARAMETERS: | 156 | * PARAMETERS: |
| 157 | * ipbmap - pointer to in-core inode for the block map. | 157 | * ipbmap - pointer to in-core inode for the block map. |
| 158 | * | 158 | * |
| 159 | * RETURN VALUES: | 159 | * RETURN VALUES: |
| 160 | * 0 - success | 160 | * 0 - success |
| 161 | * -ENOMEM - insufficient memory | 161 | * -ENOMEM - insufficient memory |
| 162 | * -EIO - i/o error | 162 | * -EIO - i/o error |
| 163 | */ | 163 | */ |
| 164 | int dbMount(struct inode *ipbmap) | 164 | int dbMount(struct inode *ipbmap) |
| 165 | { | 165 | { |
| @@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap) | |||
| 232 | * the memory for this descriptor is freed. | 232 | * the memory for this descriptor is freed. |
| 233 | * | 233 | * |
| 234 | * PARAMETERS: | 234 | * PARAMETERS: |
| 235 | * ipbmap - pointer to in-core inode for the block map. | 235 | * ipbmap - pointer to in-core inode for the block map. |
| 236 | * | 236 | * |
| 237 | * RETURN VALUES: | 237 | * RETURN VALUES: |
| 238 | * 0 - success | 238 | * 0 - success |
| 239 | * -EIO - i/o error | 239 | * -EIO - i/o error |
| 240 | */ | 240 | */ |
| 241 | int dbUnmount(struct inode *ipbmap, int mounterror) | 241 | int dbUnmount(struct inode *ipbmap, int mounterror) |
| 242 | { | 242 | { |
| @@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap) | |||
| 320 | * at a time. | 320 | * at a time. |
| 321 | * | 321 | * |
| 322 | * PARAMETERS: | 322 | * PARAMETERS: |
| 323 | * ip - pointer to in-core inode; | 323 | * ip - pointer to in-core inode; |
| 324 | * blkno - starting block number to be freed. | 324 | * blkno - starting block number to be freed. |
| 325 | * nblocks - number of blocks to be freed. | 325 | * nblocks - number of blocks to be freed. |
| 326 | * | 326 | * |
| 327 | * RETURN VALUES: | 327 | * RETURN VALUES: |
| 328 | * 0 - success | 328 | * 0 - success |
| 329 | * -EIO - i/o error | 329 | * -EIO - i/o error |
| 330 | */ | 330 | */ |
| 331 | int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | 331 | int dbFree(struct inode *ip, s64 blkno, s64 nblocks) |
| 332 | { | 332 | { |
| @@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
| 395 | /* | 395 | /* |
| 396 | * NAME: dbUpdatePMap() | 396 | * NAME: dbUpdatePMap() |
| 397 | * | 397 | * |
| 398 | * FUNCTION: update the allocation state (free or allocate) of the | 398 | * FUNCTION: update the allocation state (free or allocate) of the |
| 399 | * specified block range in the persistent block allocation map. | 399 | * specified block range in the persistent block allocation map. |
| 400 | * | 400 | * |
| 401 | * the blocks will be updated in the persistent map one | 401 | * the blocks will be updated in the persistent map one |
| 402 | * dmap at a time. | 402 | * dmap at a time. |
| 403 | * | 403 | * |
| 404 | * PARAMETERS: | 404 | * PARAMETERS: |
| 405 | * ipbmap - pointer to in-core inode for the block map. | 405 | * ipbmap - pointer to in-core inode for the block map. |
| 406 | * free - 'true' if block range is to be freed from the persistent | 406 | * free - 'true' if block range is to be freed from the persistent |
| 407 | * map; 'false' if it is to be allocated. | 407 | * map; 'false' if it is to be allocated. |
| 408 | * blkno - starting block number of the range. | 408 | * blkno - starting block number of the range. |
| 409 | * nblocks - number of contiguous blocks in the range. | 409 | * nblocks - number of contiguous blocks in the range. |
| 410 | * tblk - transaction block; | 410 | * tblk - transaction block; |
| 411 | * | 411 | * |
| 412 | * RETURN VALUES: | 412 | * RETURN VALUES: |
| 413 | * 0 - success | 413 | * 0 - success |
| 414 | * -EIO - i/o error | 414 | * -EIO - i/o error |
| 415 | */ | 415 | */ |
| 416 | int | 416 | int |
| 417 | dbUpdatePMap(struct inode *ipbmap, | 417 | dbUpdatePMap(struct inode *ipbmap, |
| @@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap, | |||
| 573 | /* | 573 | /* |
| 574 | * NAME: dbNextAG() | 574 | * NAME: dbNextAG() |
| 575 | * | 575 | * |
| 576 | * FUNCTION: find the preferred allocation group for new allocations. | 576 | * FUNCTION: find the preferred allocation group for new allocations. |
| 577 | * | 577 | * |
| 578 | * Within the allocation groups, we maintain a preferred | 578 | * Within the allocation groups, we maintain a preferred |
| 579 | * allocation group which consists of a group with at least | 579 | * allocation group which consists of a group with at least |
| @@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap, | |||
| 589 | * empty ags around for large allocations. | 589 | * empty ags around for large allocations. |
| 590 | * | 590 | * |
| 591 | * PARAMETERS: | 591 | * PARAMETERS: |
| 592 | * ipbmap - pointer to in-core inode for the block map. | 592 | * ipbmap - pointer to in-core inode for the block map. |
| 593 | * | 593 | * |
| 594 | * RETURN VALUES: | 594 | * RETURN VALUES: |
| 595 | * the preferred allocation group number. | 595 | * the preferred allocation group number. |
| 596 | */ | 596 | */ |
| 597 | int dbNextAG(struct inode *ipbmap) | 597 | int dbNextAG(struct inode *ipbmap) |
| 598 | { | 598 | { |
| @@ -656,7 +656,7 @@ unlock: | |||
| 656 | /* | 656 | /* |
| 657 | * NAME: dbAlloc() | 657 | * NAME: dbAlloc() |
| 658 | * | 658 | * |
| 659 | * FUNCTION: attempt to allocate a specified number of contiguous free | 659 | * FUNCTION: attempt to allocate a specified number of contiguous free |
| 660 | * blocks from the working allocation block map. | 660 | * blocks from the working allocation block map. |
| 661 | * | 661 | * |
| 662 | * the block allocation policy uses hints and a multi-step | 662 | * the block allocation policy uses hints and a multi-step |
| @@ -680,16 +680,16 @@ unlock: | |||
| 680 | * size or requests that specify no hint value. | 680 | * size or requests that specify no hint value. |
| 681 | * | 681 | * |
| 682 | * PARAMETERS: | 682 | * PARAMETERS: |
| 683 | * ip - pointer to in-core inode; | 683 | * ip - pointer to in-core inode; |
| 684 | * hint - allocation hint. | 684 | * hint - allocation hint. |
| 685 | * nblocks - number of contiguous blocks in the range. | 685 | * nblocks - number of contiguous blocks in the range. |
| 686 | * results - on successful return, set to the starting block number | 686 | * results - on successful return, set to the starting block number |
| 687 | * of the newly allocated contiguous range. | 687 | * of the newly allocated contiguous range. |
| 688 | * | 688 | * |
| 689 | * RETURN VALUES: | 689 | * RETURN VALUES: |
| 690 | * 0 - success | 690 | * 0 - success |
| 691 | * -ENOSPC - insufficient disk resources | 691 | * -ENOSPC - insufficient disk resources |
| 692 | * -EIO - i/o error | 692 | * -EIO - i/o error |
| 693 | */ | 693 | */ |
| 694 | int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | 694 | int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) |
| 695 | { | 695 | { |
| @@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
| 706 | /* assert that nblocks is valid */ | 706 | /* assert that nblocks is valid */ |
| 707 | assert(nblocks > 0); | 707 | assert(nblocks > 0); |
| 708 | 708 | ||
| 709 | #ifdef _STILL_TO_PORT | ||
| 710 | /* DASD limit check F226941 */ | ||
| 711 | if (OVER_LIMIT(ip, nblocks)) | ||
| 712 | return -ENOSPC; | ||
| 713 | #endif /* _STILL_TO_PORT */ | ||
| 714 | |||
| 715 | /* get the log2 number of blocks to be allocated. | 709 | /* get the log2 number of blocks to be allocated. |
| 716 | * if the number of blocks is not a log2 multiple, | 710 | * if the number of blocks is not a log2 multiple, |
| 717 | * it will be rounded up to the next log2 multiple. | 711 | * it will be rounded up to the next log2 multiple. |
| @@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
| 720 | 714 | ||
| 721 | bmp = JFS_SBI(ip->i_sb)->bmap; | 715 | bmp = JFS_SBI(ip->i_sb)->bmap; |
| 722 | 716 | ||
| 723 | //retry: /* serialize w.r.t.extendfs() */ | ||
| 724 | mapSize = bmp->db_mapsize; | 717 | mapSize = bmp->db_mapsize; |
| 725 | 718 | ||
| 726 | /* the hint should be within the map */ | 719 | /* the hint should be within the map */ |
| @@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
| 879 | /* | 872 | /* |
| 880 | * NAME: dbAllocExact() | 873 | * NAME: dbAllocExact() |
| 881 | * | 874 | * |
| 882 | * FUNCTION: try to allocate the requested extent; | 875 | * FUNCTION: try to allocate the requested extent; |
| 883 | * | 876 | * |
| 884 | * PARAMETERS: | 877 | * PARAMETERS: |
| 885 | * ip - pointer to in-core inode; | 878 | * ip - pointer to in-core inode; |
| 886 | * blkno - extent address; | 879 | * blkno - extent address; |
| 887 | * nblocks - extent length; | 880 | * nblocks - extent length; |
| 888 | * | 881 | * |
| 889 | * RETURN VALUES: | 882 | * RETURN VALUES: |
| 890 | * 0 - success | 883 | * 0 - success |
| 891 | * -ENOSPC - insufficient disk resources | 884 | * -ENOSPC - insufficient disk resources |
| 892 | * -EIO - i/o error | 885 | * -EIO - i/o error |
| 893 | */ | 886 | */ |
| 894 | int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | 887 | int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) |
| 895 | { | 888 | { |
| @@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | |||
| 946 | /* | 939 | /* |
| 947 | * NAME: dbReAlloc() | 940 | * NAME: dbReAlloc() |
| 948 | * | 941 | * |
| 949 | * FUNCTION: attempt to extend a current allocation by a specified | 942 | * FUNCTION: attempt to extend a current allocation by a specified |
| 950 | * number of blocks. | 943 | * number of blocks. |
| 951 | * | 944 | * |
| 952 | * this routine attempts to satisfy the allocation request | 945 | * this routine attempts to satisfy the allocation request |
| @@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | |||
| 959 | * number of blocks required. | 952 | * number of blocks required. |
| 960 | * | 953 | * |
| 961 | * PARAMETERS: | 954 | * PARAMETERS: |
| 962 | * ip - pointer to in-core inode requiring allocation. | 955 | * ip - pointer to in-core inode requiring allocation. |
| 963 | * blkno - starting block of the current allocation. | 956 | * blkno - starting block of the current allocation. |
| 964 | * nblocks - number of contiguous blocks within the current | 957 | * nblocks - number of contiguous blocks within the current |
| 965 | * allocation. | 958 | * allocation. |
| 966 | * addnblocks - number of blocks to add to the allocation. | 959 | * addnblocks - number of blocks to add to the allocation. |
| 967 | * results - on successful return, set to the starting block number | 960 | * results - on successful return, set to the starting block number |
| 968 | * of the existing allocation if the existing allocation | 961 | * of the existing allocation if the existing allocation |
| 969 | * was extended in place or to a newly allocated contiguous | 962 | * was extended in place or to a newly allocated contiguous |
| 970 | * range if the existing allocation could not be extended | 963 | * range if the existing allocation could not be extended |
| 971 | * in place. | 964 | * in place. |
| 972 | * | 965 | * |
| 973 | * RETURN VALUES: | 966 | * RETURN VALUES: |
| 974 | * 0 - success | 967 | * 0 - success |
| 975 | * -ENOSPC - insufficient disk resources | 968 | * -ENOSPC - insufficient disk resources |
| 976 | * -EIO - i/o error | 969 | * -EIO - i/o error |
| 977 | */ | 970 | */ |
| 978 | int | 971 | int |
| 979 | dbReAlloc(struct inode *ip, | 972 | dbReAlloc(struct inode *ip, |
| @@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip, | |||
| 1004 | /* | 997 | /* |
| 1005 | * NAME: dbExtend() | 998 | * NAME: dbExtend() |
| 1006 | * | 999 | * |
| 1007 | * FUNCTION: attempt to extend a current allocation by a specified | 1000 | * FUNCTION: attempt to extend a current allocation by a specified |
| 1008 | * number of blocks. | 1001 | * number of blocks. |
| 1009 | * | 1002 | * |
| 1010 | * this routine attempts to satisfy the allocation request | 1003 | * this routine attempts to satisfy the allocation request |
| @@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip, | |||
| 1013 | * immediately following the current allocation. | 1006 | * immediately following the current allocation. |
| 1014 | * | 1007 | * |
| 1015 | * PARAMETERS: | 1008 | * PARAMETERS: |
| 1016 | * ip - pointer to in-core inode requiring allocation. | 1009 | * ip - pointer to in-core inode requiring allocation. |
| 1017 | * blkno - starting block of the current allocation. | 1010 | * blkno - starting block of the current allocation. |
| 1018 | * nblocks - number of contiguous blocks within the current | 1011 | * nblocks - number of contiguous blocks within the current |
| 1019 | * allocation. | 1012 | * allocation. |
| 1020 | * addnblocks - number of blocks to add to the allocation. | 1013 | * addnblocks - number of blocks to add to the allocation. |
| 1021 | * | 1014 | * |
| 1022 | * RETURN VALUES: | 1015 | * RETURN VALUES: |
| 1023 | * 0 - success | 1016 | * 0 - success |
| 1024 | * -ENOSPC - insufficient disk resources | 1017 | * -ENOSPC - insufficient disk resources |
| 1025 | * -EIO - i/o error | 1018 | * -EIO - i/o error |
| 1026 | */ | 1019 | */ |
| 1027 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | 1020 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) |
| 1028 | { | 1021 | { |
| @@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | |||
| 1109 | /* | 1102 | /* |
| 1110 | * NAME: dbAllocNext() | 1103 | * NAME: dbAllocNext() |
| 1111 | * | 1104 | * |
| 1112 | * FUNCTION: attempt to allocate the blocks of the specified block | 1105 | * FUNCTION: attempt to allocate the blocks of the specified block |
| 1113 | * range within a dmap. | 1106 | * range within a dmap. |
| 1114 | * | 1107 | * |
| 1115 | * PARAMETERS: | 1108 | * PARAMETERS: |
| 1116 | * bmp - pointer to bmap descriptor | 1109 | * bmp - pointer to bmap descriptor |
| 1117 | * dp - pointer to dmap. | 1110 | * dp - pointer to dmap. |
| 1118 | * blkno - starting block number of the range. | 1111 | * blkno - starting block number of the range. |
| 1119 | * nblocks - number of contiguous free blocks of the range. | 1112 | * nblocks - number of contiguous free blocks of the range. |
| 1120 | * | 1113 | * |
| 1121 | * RETURN VALUES: | 1114 | * RETURN VALUES: |
| 1122 | * 0 - success | 1115 | * 0 - success |
| 1123 | * -ENOSPC - insufficient disk resources | 1116 | * -ENOSPC - insufficient disk resources |
| 1124 | * -EIO - i/o error | 1117 | * -EIO - i/o error |
| 1125 | * | 1118 | * |
| 1126 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | 1119 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; |
| 1127 | */ | 1120 | */ |
| @@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 1233 | /* | 1226 | /* |
| 1234 | * NAME: dbAllocNear() | 1227 | * NAME: dbAllocNear() |
| 1235 | * | 1228 | * |
| 1236 | * FUNCTION: attempt to allocate a number of contiguous free blocks near | 1229 | * FUNCTION: attempt to allocate a number of contiguous free blocks near |
| 1237 | * a specified block (hint) within a dmap. | 1230 | * a specified block (hint) within a dmap. |
| 1238 | * | 1231 | * |
| 1239 | * starting with the dmap leaf that covers the hint, we'll | 1232 | * starting with the dmap leaf that covers the hint, we'll |
| @@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 1242 | * the desired free space. | 1235 | * the desired free space. |
| 1243 | * | 1236 | * |
| 1244 | * PARAMETERS: | 1237 | * PARAMETERS: |
| 1245 | * bmp - pointer to bmap descriptor | 1238 | * bmp - pointer to bmap descriptor |
| 1246 | * dp - pointer to dmap. | 1239 | * dp - pointer to dmap. |
| 1247 | * blkno - block number to allocate near. | 1240 | * blkno - block number to allocate near. |
| 1248 | * nblocks - actual number of contiguous free blocks desired. | 1241 | * nblocks - actual number of contiguous free blocks desired. |
| 1249 | * l2nb - log2 number of contiguous free blocks desired. | 1242 | * l2nb - log2 number of contiguous free blocks desired. |
| 1250 | * results - on successful return, set to the starting block number | 1243 | * results - on successful return, set to the starting block number |
| 1251 | * of the newly allocated range. | 1244 | * of the newly allocated range. |
| 1252 | * | 1245 | * |
| 1253 | * RETURN VALUES: | 1246 | * RETURN VALUES: |
| 1254 | * 0 - success | 1247 | * 0 - success |
| 1255 | * -ENOSPC - insufficient disk resources | 1248 | * -ENOSPC - insufficient disk resources |
| 1256 | * -EIO - i/o error | 1249 | * -EIO - i/o error |
| 1257 | * | 1250 | * |
| 1258 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | 1251 | * serialization: IREAD_LOCK(ipbmap) held on entry/exit; |
| 1259 | */ | 1252 | */ |
| @@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp, | |||
| 1316 | /* | 1309 | /* |
| 1317 | * NAME: dbAllocAG() | 1310 | * NAME: dbAllocAG() |
| 1318 | * | 1311 | * |
| 1319 | * FUNCTION: attempt to allocate the specified number of contiguous | 1312 | * FUNCTION: attempt to allocate the specified number of contiguous |
| 1320 | * free blocks within the specified allocation group. | 1313 | * free blocks within the specified allocation group. |
| 1321 | * | 1314 | * |
| 1322 | * unless the allocation group size is equal to the number | 1315 | * unless the allocation group size is equal to the number |
| @@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp, | |||
| 1353 | * the allocation group. | 1346 | * the allocation group. |
| 1354 | * | 1347 | * |
| 1355 | * PARAMETERS: | 1348 | * PARAMETERS: |
| 1356 | * bmp - pointer to bmap descriptor | 1349 | * bmp - pointer to bmap descriptor |
| 1357 | * agno - allocation group number. | 1350 | * agno - allocation group number. |
| 1358 | * nblocks - actual number of contiguous free blocks desired. | 1351 | * nblocks - actual number of contiguous free blocks desired. |
| 1359 | * l2nb - log2 number of contiguous free blocks desired. | 1352 | * l2nb - log2 number of contiguous free blocks desired. |
| 1360 | * results - on successful return, set to the starting block number | 1353 | * results - on successful return, set to the starting block number |
| 1361 | * of the newly allocated range. | 1354 | * of the newly allocated range. |
| 1362 | * | 1355 | * |
| 1363 | * RETURN VALUES: | 1356 | * RETURN VALUES: |
| 1364 | * 0 - success | 1357 | * 0 - success |
| 1365 | * -ENOSPC - insufficient disk resources | 1358 | * -ENOSPC - insufficient disk resources |
| 1366 | * -EIO - i/o error | 1359 | * -EIO - i/o error |
| 1367 | * | 1360 | * |
| 1368 | * note: IWRITE_LOCK(ipmap) held on entry/exit; | 1361 | * note: IWRITE_LOCK(ipmap) held on entry/exit; |
| 1369 | */ | 1362 | */ |
| @@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1546 | /* | 1539 | /* |
| 1547 | * NAME: dbAllocAny() | 1540 | * NAME: dbAllocAny() |
| 1548 | * | 1541 | * |
| 1549 | * FUNCTION: attempt to allocate the specified number of contiguous | 1542 | * FUNCTION: attempt to allocate the specified number of contiguous |
| 1550 | * free blocks anywhere in the file system. | 1543 | * free blocks anywhere in the file system. |
| 1551 | * | 1544 | * |
| 1552 | * dbAllocAny() attempts to find the sufficient free space by | 1545 | * dbAllocAny() attempts to find the sufficient free space by |
| @@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
| 1556 | * desired free space is allocated. | 1549 | * desired free space is allocated. |
| 1557 | * | 1550 | * |
| 1558 | * PARAMETERS: | 1551 | * PARAMETERS: |
| 1559 | * bmp - pointer to bmap descriptor | 1552 | * bmp - pointer to bmap descriptor |
| 1560 | * nblocks - actual number of contiguous free blocks desired. | 1553 | * nblocks - actual number of contiguous free blocks desired. |
| 1561 | * l2nb - log2 number of contiguous free blocks desired. | 1554 | * l2nb - log2 number of contiguous free blocks desired. |
| 1562 | * results - on successful return, set to the starting block number | 1555 | * results - on successful return, set to the starting block number |
| 1563 | * of the newly allocated range. | 1556 | * of the newly allocated range. |
| 1564 | * | 1557 | * |
| 1565 | * RETURN VALUES: | 1558 | * RETURN VALUES: |
| 1566 | * 0 - success | 1559 | * 0 - success |
| 1567 | * -ENOSPC - insufficient disk resources | 1560 | * -ENOSPC - insufficient disk resources |
| 1568 | * -EIO - i/o error | 1561 | * -EIO - i/o error |
| 1569 | * | 1562 | * |
| 1570 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 1563 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; |
| 1571 | */ | 1564 | */ |
| @@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | |||
| 1598 | /* | 1591 | /* |
| 1599 | * NAME: dbFindCtl() | 1592 | * NAME: dbFindCtl() |
| 1600 | * | 1593 | * |
| 1601 | * FUNCTION: starting at a specified dmap control page level and block | 1594 | * FUNCTION: starting at a specified dmap control page level and block |
| 1602 | * number, search down the dmap control levels for a range of | 1595 | * number, search down the dmap control levels for a range of |
| 1603 | * contiguous free blocks large enough to satisfy an allocation | 1596 | * contiguous free blocks large enough to satisfy an allocation |
| 1604 | * request for the specified number of free blocks. | 1597 | * request for the specified number of free blocks. |
| 1605 | * | 1598 | * |
| 1606 | * if sufficient contiguous free blocks are found, this routine | 1599 | * if sufficient contiguous free blocks are found, this routine |
| @@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | |||
| 1609 | * is sufficient in size. | 1602 | * is sufficient in size. |
| 1610 | * | 1603 | * |
| 1611 | * PARAMETERS: | 1604 | * PARAMETERS: |
| 1612 | * bmp - pointer to bmap descriptor | 1605 | * bmp - pointer to bmap descriptor |
| 1613 | * level - starting dmap control page level. | 1606 | * level - starting dmap control page level. |
| 1614 | * l2nb - log2 number of contiguous free blocks desired. | 1607 | * l2nb - log2 number of contiguous free blocks desired. |
| 1615 | * *blkno - on entry, starting block number for conducting the search. | 1608 | * *blkno - on entry, starting block number for conducting the search. |
| 1616 | * on successful return, the first block within a dmap page | 1609 | * on successful return, the first block within a dmap page |
| 1617 | * that contains or starts a range of contiguous free blocks. | 1610 | * that contains or starts a range of contiguous free blocks. |
| 1618 | * | 1611 | * |
| 1619 | * RETURN VALUES: | 1612 | * RETURN VALUES: |
| 1620 | * 0 - success | 1613 | * 0 - success |
| 1621 | * -ENOSPC - insufficient disk resources | 1614 | * -ENOSPC - insufficient disk resources |
| 1622 | * -EIO - i/o error | 1615 | * -EIO - i/o error |
| 1623 | * | 1616 | * |
| 1624 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 1617 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; |
| 1625 | */ | 1618 | */ |
| @@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
| 1699 | /* | 1692 | /* |
| 1700 | * NAME: dbAllocCtl() | 1693 | * NAME: dbAllocCtl() |
| 1701 | * | 1694 | * |
| 1702 | * FUNCTION: attempt to allocate a specified number of contiguous | 1695 | * FUNCTION: attempt to allocate a specified number of contiguous |
| 1703 | * blocks starting within a specific dmap. | 1696 | * blocks starting within a specific dmap. |
| 1704 | * | 1697 | * |
| 1705 | * this routine is called by higher level routines that search | 1698 | * this routine is called by higher level routines that search |
| @@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
| 1726 | * first dmap (i.e. blkno). | 1719 | * first dmap (i.e. blkno). |
| 1727 | * | 1720 | * |
| 1728 | * PARAMETERS: | 1721 | * PARAMETERS: |
| 1729 | * bmp - pointer to bmap descriptor | 1722 | * bmp - pointer to bmap descriptor |
| 1730 | * nblocks - actual number of contiguous free blocks to allocate. | 1723 | * nblocks - actual number of contiguous free blocks to allocate. |
| 1731 | * l2nb - log2 number of contiguous free blocks to allocate. | 1724 | * l2nb - log2 number of contiguous free blocks to allocate. |
| 1732 | * blkno - starting block number of the dmap to start the allocation | 1725 | * blkno - starting block number of the dmap to start the allocation |
| 1733 | * from. | 1726 | * from. |
| 1734 | * results - on successful return, set to the starting block number | 1727 | * results - on successful return, set to the starting block number |
| 1735 | * of the newly allocated range. | 1728 | * of the newly allocated range. |
| 1736 | * | 1729 | * |
| 1737 | * RETURN VALUES: | 1730 | * RETURN VALUES: |
| 1738 | * 0 - success | 1731 | * 0 - success |
| 1739 | * -ENOSPC - insufficient disk resources | 1732 | * -ENOSPC - insufficient disk resources |
| 1740 | * -EIO - i/o error | 1733 | * -EIO - i/o error |
| 1741 | * | 1734 | * |
| 1742 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 1735 | * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; |
| 1743 | */ | 1736 | */ |
| @@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
| 1870 | /* | 1863 | /* |
| 1871 | * NAME: dbAllocDmapLev() | 1864 | * NAME: dbAllocDmapLev() |
| 1872 | * | 1865 | * |
| 1873 | * FUNCTION: attempt to allocate a specified number of contiguous blocks | 1866 | * FUNCTION: attempt to allocate a specified number of contiguous blocks |
| 1874 | * from a specified dmap. | 1867 | * from a specified dmap. |
| 1875 | * | 1868 | * |
| 1876 | * this routine checks if the contiguous blocks are available. | 1869 | * this routine checks if the contiguous blocks are available. |
| @@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
| 1878 | * returned. | 1871 | * returned. |
| 1879 | * | 1872 | * |
| 1880 | * PARAMETERS: | 1873 | * PARAMETERS: |
| 1881 | * mp - pointer to bmap descriptor | 1874 | * mp - pointer to bmap descriptor |
| 1882 | * dp - pointer to dmap to attempt to allocate blocks from. | 1875 | * dp - pointer to dmap to attempt to allocate blocks from. |
| 1883 | * l2nb - log2 number of contiguous block desired. | 1876 | * l2nb - log2 number of contiguous block desired. |
| 1884 | * nblocks - actual number of contiguous block desired. | 1877 | * nblocks - actual number of contiguous block desired. |
| 1885 | * results - on successful return, set to the starting block number | 1878 | * results - on successful return, set to the starting block number |
| 1886 | * of the newly allocated range. | 1879 | * of the newly allocated range. |
| 1887 | * | 1880 | * |
| 1888 | * RETURN VALUES: | 1881 | * RETURN VALUES: |
| 1889 | * 0 - success | 1882 | * 0 - success |
| 1890 | * -ENOSPC - insufficient disk resources | 1883 | * -ENOSPC - insufficient disk resources |
| 1891 | * -EIO - i/o error | 1884 | * -EIO - i/o error |
| 1892 | * | 1885 | * |
| 1893 | * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or | 1886 | * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or |
| 1894 | * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; | 1887 | * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; |
| @@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp, | |||
| 1933 | /* | 1926 | /* |
| 1934 | * NAME: dbAllocDmap() | 1927 | * NAME: dbAllocDmap() |
| 1935 | * | 1928 | * |
| 1936 | * FUNCTION: adjust the disk allocation map to reflect the allocation | 1929 | * FUNCTION: adjust the disk allocation map to reflect the allocation |
| 1937 | * of a specified block range within a dmap. | 1930 | * of a specified block range within a dmap. |
| 1938 | * | 1931 | * |
| 1939 | * this routine allocates the specified blocks from the dmap | 1932 | * this routine allocates the specified blocks from the dmap |
| @@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp, | |||
| 1946 | * covers this dmap. | 1939 | * covers this dmap. |
| 1947 | * | 1940 | * |
| 1948 | * PARAMETERS: | 1941 | * PARAMETERS: |
| 1949 | * bmp - pointer to bmap descriptor | 1942 | * bmp - pointer to bmap descriptor |
| 1950 | * dp - pointer to dmap to allocate the block range from. | 1943 | * dp - pointer to dmap to allocate the block range from. |
| 1951 | * blkno - starting block number of the block to be allocated. | 1944 | * blkno - starting block number of the block to be allocated. |
| 1952 | * nblocks - number of blocks to be allocated. | 1945 | * nblocks - number of blocks to be allocated. |
| 1953 | * | 1946 | * |
| 1954 | * RETURN VALUES: | 1947 | * RETURN VALUES: |
| 1955 | * 0 - success | 1948 | * 0 - success |
| 1956 | * -EIO - i/o error | 1949 | * -EIO - i/o error |
| 1957 | * | 1950 | * |
| 1958 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 1951 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
| 1959 | */ | 1952 | */ |
| @@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 1989 | /* | 1982 | /* |
| 1990 | * NAME: dbFreeDmap() | 1983 | * NAME: dbFreeDmap() |
| 1991 | * | 1984 | * |
| 1992 | * FUNCTION: adjust the disk allocation map to reflect the allocation | 1985 | * FUNCTION: adjust the disk allocation map to reflect the allocation |
| 1993 | * of a specified block range within a dmap. | 1986 | * of a specified block range within a dmap. |
| 1994 | * | 1987 | * |
| 1995 | * this routine frees the specified blocks from the dmap through | 1988 | * this routine frees the specified blocks from the dmap through |
| @@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 1997 | * causes the maximum string of free blocks within the dmap to | 1990 | * causes the maximum string of free blocks within the dmap to |
| 1998 | * change (i.e. the value of the root of the dmap's dmtree), this | 1991 | * change (i.e. the value of the root of the dmap's dmtree), this |
| 1999 | * routine will cause this change to be reflected up through the | 1992 | * routine will cause this change to be reflected up through the |
| 2000 | * appropriate levels of the dmap control pages by a call to | 1993 | * appropriate levels of the dmap control pages by a call to |
| 2001 | * dbAdjCtl() for the L0 dmap control page that covers this dmap. | 1994 | * dbAdjCtl() for the L0 dmap control page that covers this dmap. |
| 2002 | * | 1995 | * |
| 2003 | * PARAMETERS: | 1996 | * PARAMETERS: |
| 2004 | * bmp - pointer to bmap descriptor | 1997 | * bmp - pointer to bmap descriptor |
| 2005 | * dp - pointer to dmap to free the block range from. | 1998 | * dp - pointer to dmap to free the block range from. |
| 2006 | * blkno - starting block number of the block to be freed. | 1999 | * blkno - starting block number of the block to be freed. |
| 2007 | * nblocks - number of blocks to be freed. | 2000 | * nblocks - number of blocks to be freed. |
| 2008 | * | 2001 | * |
| 2009 | * RETURN VALUES: | 2002 | * RETURN VALUES: |
| 2010 | * 0 - success | 2003 | * 0 - success |
| 2011 | * -EIO - i/o error | 2004 | * -EIO - i/o error |
| 2012 | * | 2005 | * |
| 2013 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2006 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
| 2014 | */ | 2007 | */ |
| @@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2055 | /* | 2048 | /* |
| 2056 | * NAME: dbAllocBits() | 2049 | * NAME: dbAllocBits() |
| 2057 | * | 2050 | * |
| 2058 | * FUNCTION: allocate a specified block range from a dmap. | 2051 | * FUNCTION: allocate a specified block range from a dmap. |
| 2059 | * | 2052 | * |
| 2060 | * this routine updates the dmap to reflect the working | 2053 | * this routine updates the dmap to reflect the working |
| 2061 | * state allocation of the specified block range. it directly | 2054 | * state allocation of the specified block range. it directly |
| @@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2065 | * dmap's dmtree, as a whole, to reflect the allocated range. | 2058 | * dmap's dmtree, as a whole, to reflect the allocated range. |
| 2066 | * | 2059 | * |
| 2067 | * PARAMETERS: | 2060 | * PARAMETERS: |
| 2068 | * bmp - pointer to bmap descriptor | 2061 | * bmp - pointer to bmap descriptor |
| 2069 | * dp - pointer to dmap to allocate bits from. | 2062 | * dp - pointer to dmap to allocate bits from. |
| 2070 | * blkno - starting block number of the bits to be allocated. | 2063 | * blkno - starting block number of the bits to be allocated. |
| 2071 | * nblocks - number of bits to be allocated. | 2064 | * nblocks - number of bits to be allocated. |
| 2072 | * | 2065 | * |
| 2073 | * RETURN VALUES: none | 2066 | * RETURN VALUES: none |
| 2074 | * | 2067 | * |
| @@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2149 | * the allocated words. | 2142 | * the allocated words. |
| 2150 | */ | 2143 | */ |
| 2151 | for (; nwords > 0; nwords -= nw) { | 2144 | for (; nwords > 0; nwords -= nw) { |
| 2152 | if (leaf[word] < BUDMIN) { | 2145 | if (leaf[word] < BUDMIN) { |
| 2153 | jfs_error(bmp->db_ipbmap->i_sb, | 2146 | jfs_error(bmp->db_ipbmap->i_sb, |
| 2154 | "dbAllocBits: leaf page " | 2147 | "dbAllocBits: leaf page " |
| 2155 | "corrupt"); | 2148 | "corrupt"); |
| @@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2202 | /* | 2195 | /* |
| 2203 | * NAME: dbFreeBits() | 2196 | * NAME: dbFreeBits() |
| 2204 | * | 2197 | * |
| 2205 | * FUNCTION: free a specified block range from a dmap. | 2198 | * FUNCTION: free a specified block range from a dmap. |
| 2206 | * | 2199 | * |
| 2207 | * this routine updates the dmap to reflect the working | 2200 | * this routine updates the dmap to reflect the working |
| 2208 | * state allocation of the specified block range. it directly | 2201 | * state allocation of the specified block range. it directly |
| @@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2212 | * dmtree, as a whole, to reflect the deallocated range. | 2205 | * dmtree, as a whole, to reflect the deallocated range. |
| 2213 | * | 2206 | * |
| 2214 | * PARAMETERS: | 2207 | * PARAMETERS: |
| 2215 | * bmp - pointer to bmap descriptor | 2208 | * bmp - pointer to bmap descriptor |
| 2216 | * dp - pointer to dmap to free bits from. | 2209 | * dp - pointer to dmap to free bits from. |
| 2217 | * blkno - starting block number of the bits to be freed. | 2210 | * blkno - starting block number of the bits to be freed. |
| 2218 | * nblocks - number of bits to be freed. | 2211 | * nblocks - number of bits to be freed. |
| 2219 | * | 2212 | * |
| 2220 | * RETURN VALUES: 0 for success | 2213 | * RETURN VALUES: 0 for success |
| 2221 | * | 2214 | * |
| @@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2388 | * the new root value and the next dmap control page level to | 2381 | * the new root value and the next dmap control page level to |
| 2389 | * be adjusted. | 2382 | * be adjusted. |
| 2390 | * PARAMETERS: | 2383 | * PARAMETERS: |
| 2391 | * bmp - pointer to bmap descriptor | 2384 | * bmp - pointer to bmap descriptor |
| 2392 | * blkno - the first block of a block range within a dmap. it is | 2385 | * blkno - the first block of a block range within a dmap. it is |
| 2393 | * the allocation or deallocation of this block range that | 2386 | * the allocation or deallocation of this block range that |
| 2394 | * requires the dmap control page to be adjusted. | 2387 | * requires the dmap control page to be adjusted. |
| 2395 | * newval - the new value of the lower level dmap or dmap control | 2388 | * newval - the new value of the lower level dmap or dmap control |
| 2396 | * page root. | 2389 | * page root. |
| 2397 | * alloc - 'true' if adjustment is due to an allocation. | 2390 | * alloc - 'true' if adjustment is due to an allocation. |
| 2398 | * level - current level of dmap control page (i.e. L0, L1, L2) to | 2391 | * level - current level of dmap control page (i.e. L0, L1, L2) to |
| 2399 | * be adjusted. | 2392 | * be adjusted. |
| 2400 | * | 2393 | * |
| 2401 | * RETURN VALUES: | 2394 | * RETURN VALUES: |
| 2402 | * 0 - success | 2395 | * 0 - success |
| 2403 | * -EIO - i/o error | 2396 | * -EIO - i/o error |
| 2404 | * | 2397 | * |
| 2405 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 2398 | * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; |
| 2406 | */ | 2399 | */ |
| @@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | |||
| 2544 | /* | 2537 | /* |
| 2545 | * NAME: dbSplit() | 2538 | * NAME: dbSplit() |
| 2546 | * | 2539 | * |
| 2547 | * FUNCTION: update the leaf of a dmtree with a new value, splitting | 2540 | * FUNCTION: update the leaf of a dmtree with a new value, splitting |
| 2548 | * the leaf from the binary buddy system of the dmtree's | 2541 | * the leaf from the binary buddy system of the dmtree's |
| 2549 | * leaves, as required. | 2542 | * leaves, as required. |
| 2550 | * | 2543 | * |
| 2551 | * PARAMETERS: | 2544 | * PARAMETERS: |
| 2552 | * tp - pointer to the tree containing the leaf. | 2545 | * tp - pointer to the tree containing the leaf. |
| 2553 | * leafno - the number of the leaf to be updated. | 2546 | * leafno - the number of the leaf to be updated. |
| 2554 | * splitsz - the size the binary buddy system starting at the leaf | 2547 | * splitsz - the size the binary buddy system starting at the leaf |
| 2555 | * must be split to, specified as the log2 number of blocks. | 2548 | * must be split to, specified as the log2 number of blocks. |
| 2556 | * newval - the new value for the leaf. | 2549 | * newval - the new value for the leaf. |
| 2557 | * | 2550 | * |
| 2558 | * RETURN VALUES: none | 2551 | * RETURN VALUES: none |
| 2559 | * | 2552 | * |
| @@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) | |||
| 2600 | /* | 2593 | /* |
| 2601 | * NAME: dbBackSplit() | 2594 | * NAME: dbBackSplit() |
| 2602 | * | 2595 | * |
| 2603 | * FUNCTION: back split the binary buddy system of dmtree leaves | 2596 | * FUNCTION: back split the binary buddy system of dmtree leaves |
| 2604 | * that hold a specified leaf until the specified leaf | 2597 | * that hold a specified leaf until the specified leaf |
| 2605 | * starts its own binary buddy system. | 2598 | * starts its own binary buddy system. |
| 2606 | * | 2599 | * |
| @@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) | |||
| 2617 | * in which a previous join operation must be backed out. | 2610 | * in which a previous join operation must be backed out. |
| 2618 | * | 2611 | * |
| 2619 | * PARAMETERS: | 2612 | * PARAMETERS: |
| 2620 | * tp - pointer to the tree containing the leaf. | 2613 | * tp - pointer to the tree containing the leaf. |
| 2621 | * leafno - the number of the leaf to be updated. | 2614 | * leafno - the number of the leaf to be updated. |
| 2622 | * | 2615 | * |
| 2623 | * RETURN VALUES: none | 2616 | * RETURN VALUES: none |
| 2624 | * | 2617 | * |
| @@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno) | |||
| 2692 | /* | 2685 | /* |
| 2693 | * NAME: dbJoin() | 2686 | * NAME: dbJoin() |
| 2694 | * | 2687 | * |
| 2695 | * FUNCTION: update the leaf of a dmtree with a new value, joining | 2688 | * FUNCTION: update the leaf of a dmtree with a new value, joining |
| 2696 | * the leaf with other leaves of the dmtree into a multi-leaf | 2689 | * the leaf with other leaves of the dmtree into a multi-leaf |
| 2697 | * binary buddy system, as required. | 2690 | * binary buddy system, as required. |
| 2698 | * | 2691 | * |
| 2699 | * PARAMETERS: | 2692 | * PARAMETERS: |
| 2700 | * tp - pointer to the tree containing the leaf. | 2693 | * tp - pointer to the tree containing the leaf. |
| 2701 | * leafno - the number of the leaf to be updated. | 2694 | * leafno - the number of the leaf to be updated. |
| 2702 | * newval - the new value for the leaf. | 2695 | * newval - the new value for the leaf. |
| 2703 | * | 2696 | * |
| 2704 | * RETURN VALUES: none | 2697 | * RETURN VALUES: none |
| 2705 | */ | 2698 | */ |
| @@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) | |||
| 2785 | /* | 2778 | /* |
| 2786 | * NAME: dbAdjTree() | 2779 | * NAME: dbAdjTree() |
| 2787 | * | 2780 | * |
| 2788 | * FUNCTION: update a leaf of a dmtree with a new value, adjusting | 2781 | * FUNCTION: update a leaf of a dmtree with a new value, adjusting |
| 2789 | * the dmtree, as required, to reflect the new leaf value. | 2782 | * the dmtree, as required, to reflect the new leaf value. |
| 2790 | * the combination of any buddies must already be done before | 2783 | * the combination of any buddies must already be done before |
| 2791 | * this is called. | 2784 | * this is called. |
| 2792 | * | 2785 | * |
| 2793 | * PARAMETERS: | 2786 | * PARAMETERS: |
| 2794 | * tp - pointer to the tree to be adjusted. | 2787 | * tp - pointer to the tree to be adjusted. |
| 2795 | * leafno - the number of the leaf to be updated. | 2788 | * leafno - the number of the leaf to be updated. |
| 2796 | * newval - the new value for the leaf. | 2789 | * newval - the new value for the leaf. |
| 2797 | * | 2790 | * |
| 2798 | * RETURN VALUES: none | 2791 | * RETURN VALUES: none |
| 2799 | */ | 2792 | */ |
| @@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) | |||
| 2852 | /* | 2845 | /* |
| 2853 | * NAME: dbFindLeaf() | 2846 | * NAME: dbFindLeaf() |
| 2854 | * | 2847 | * |
| 2855 | * FUNCTION: search a dmtree_t for sufficient free blocks, returning | 2848 | * FUNCTION: search a dmtree_t for sufficient free blocks, returning |
| 2856 | * the index of a leaf describing the free blocks if | 2849 | * the index of a leaf describing the free blocks if |
| 2857 | * sufficient free blocks are found. | 2850 | * sufficient free blocks are found. |
| 2858 | * | 2851 | * |
| @@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) | |||
| 2861 | * free space. | 2854 | * free space. |
| 2862 | * | 2855 | * |
| 2863 | * PARAMETERS: | 2856 | * PARAMETERS: |
| 2864 | * tp - pointer to the tree to be searched. | 2857 | * tp - pointer to the tree to be searched. |
| 2865 | * l2nb - log2 number of free blocks to search for. | 2858 | * l2nb - log2 number of free blocks to search for. |
| 2866 | * leafidx - return pointer to be set to the index of the leaf | 2859 | * leafidx - return pointer to be set to the index of the leaf |
| 2867 | * describing at least l2nb free blocks if sufficient | 2860 | * describing at least l2nb free blocks if sufficient |
| 2868 | * free blocks are found. | 2861 | * free blocks are found. |
| 2869 | * | 2862 | * |
| 2870 | * RETURN VALUES: | 2863 | * RETURN VALUES: |
| 2871 | * 0 - success | 2864 | * 0 - success |
| 2872 | * -ENOSPC - insufficient free blocks. | 2865 | * -ENOSPC - insufficient free blocks. |
| 2873 | */ | 2866 | */ |
| 2874 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) | 2867 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) |
| 2875 | { | 2868 | { |
| @@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) | |||
| 2916 | /* | 2909 | /* |
| 2917 | * NAME: dbFindBits() | 2910 | * NAME: dbFindBits() |
| 2918 | * | 2911 | * |
| 2919 | * FUNCTION: find a specified number of binary buddy free bits within a | 2912 | * FUNCTION: find a specified number of binary buddy free bits within a |
| 2920 | * dmap bitmap word value. | 2913 | * dmap bitmap word value. |
| 2921 | * | 2914 | * |
| 2922 | * this routine searches the bitmap value for (1 << l2nb) free | 2915 | * this routine searches the bitmap value for (1 << l2nb) free |
| 2923 | * bits at (1 << l2nb) alignments within the value. | 2916 | * bits at (1 << l2nb) alignments within the value. |
| 2924 | * | 2917 | * |
| 2925 | * PARAMETERS: | 2918 | * PARAMETERS: |
| 2926 | * word - dmap bitmap word value. | 2919 | * word - dmap bitmap word value. |
| 2927 | * l2nb - number of free bits specified as a log2 number. | 2920 | * l2nb - number of free bits specified as a log2 number. |
| 2928 | * | 2921 | * |
| 2929 | * RETURN VALUES: | 2922 | * RETURN VALUES: |
| 2930 | * starting bit number of free bits. | 2923 | * starting bit number of free bits. |
| 2931 | */ | 2924 | */ |
| 2932 | static int dbFindBits(u32 word, int l2nb) | 2925 | static int dbFindBits(u32 word, int l2nb) |
| 2933 | { | 2926 | { |
| @@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb) | |||
| 2963 | /* | 2956 | /* |
| 2964 | * NAME: dbMaxBud(u8 *cp) | 2957 | * NAME: dbMaxBud(u8 *cp) |
| 2965 | * | 2958 | * |
| 2966 | * FUNCTION: determine the largest binary buddy string of free | 2959 | * FUNCTION: determine the largest binary buddy string of free |
| 2967 | * bits within 32-bits of the map. | 2960 | * bits within 32-bits of the map. |
| 2968 | * | 2961 | * |
| 2969 | * PARAMETERS: | 2962 | * PARAMETERS: |
| 2970 | * cp - pointer to the 32-bit value. | 2963 | * cp - pointer to the 32-bit value. |
| 2971 | * | 2964 | * |
| 2972 | * RETURN VALUES: | 2965 | * RETURN VALUES: |
| 2973 | * largest binary buddy of free bits within a dmap word. | 2966 | * largest binary buddy of free bits within a dmap word. |
| 2974 | */ | 2967 | */ |
| 2975 | static int dbMaxBud(u8 * cp) | 2968 | static int dbMaxBud(u8 * cp) |
| 2976 | { | 2969 | { |
| @@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp) | |||
| 3000 | /* | 2993 | /* |
| 3001 | * NAME: cnttz(uint word) | 2994 | * NAME: cnttz(uint word) |
| 3002 | * | 2995 | * |
| 3003 | * FUNCTION: determine the number of trailing zeros within a 32-bit | 2996 | * FUNCTION: determine the number of trailing zeros within a 32-bit |
| 3004 | * value. | 2997 | * value. |
| 3005 | * | 2998 | * |
| 3006 | * PARAMETERS: | 2999 | * PARAMETERS: |
| 3007 | * value - 32-bit value to be examined. | 3000 | * value - 32-bit value to be examined. |
| 3008 | * | 3001 | * |
| 3009 | * RETURN VALUES: | 3002 | * RETURN VALUES: |
| 3010 | * count of trailing zeros | 3003 | * count of trailing zeros |
| 3011 | */ | 3004 | */ |
| 3012 | static int cnttz(u32 word) | 3005 | static int cnttz(u32 word) |
| 3013 | { | 3006 | { |
| @@ -3025,14 +3018,14 @@ static int cnttz(u32 word) | |||
| 3025 | /* | 3018 | /* |
| 3026 | * NAME: cntlz(u32 value) | 3019 | * NAME: cntlz(u32 value) |
| 3027 | * | 3020 | * |
| 3028 | * FUNCTION: determine the number of leading zeros within a 32-bit | 3021 | * FUNCTION: determine the number of leading zeros within a 32-bit |
| 3029 | * value. | 3022 | * value. |
| 3030 | * | 3023 | * |
| 3031 | * PARAMETERS: | 3024 | * PARAMETERS: |
| 3032 | * value - 32-bit value to be examined. | 3025 | * value - 32-bit value to be examined. |
| 3033 | * | 3026 | * |
| 3034 | * RETURN VALUES: | 3027 | * RETURN VALUES: |
| 3035 | * count of leading zeros | 3028 | * count of leading zeros |
| 3036 | */ | 3029 | */ |
| 3037 | static int cntlz(u32 value) | 3030 | static int cntlz(u32 value) |
| 3038 | { | 3031 | { |
| @@ -3050,14 +3043,14 @@ static int cntlz(u32 value) | |||
| 3050 | * NAME: blkstol2(s64 nb) | 3043 | * NAME: blkstol2(s64 nb) |
| 3051 | * | 3044 | * |
| 3052 | * FUNCTION: convert a block count to its log2 value. if the block | 3045 | * FUNCTION: convert a block count to its log2 value. if the block |
| 3053 | * count is not a l2 multiple, it is rounded up to the next | 3046 | * count is not a l2 multiple, it is rounded up to the next |
| 3054 | * larger l2 multiple. | 3047 | * larger l2 multiple. |
| 3055 | * | 3048 | * |
| 3056 | * PARAMETERS: | 3049 | * PARAMETERS: |
| 3057 | * nb - number of blocks | 3050 | * nb - number of blocks |
| 3058 | * | 3051 | * |
| 3059 | * RETURN VALUES: | 3052 | * RETURN VALUES: |
| 3060 | * log2 number of blocks | 3053 | * log2 number of blocks |
| 3061 | */ | 3054 | */ |
| 3062 | static int blkstol2(s64 nb) | 3055 | static int blkstol2(s64 nb) |
| 3063 | { | 3056 | { |
| @@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb) | |||
| 3099 | * at a time. | 3092 | * at a time. |
| 3100 | * | 3093 | * |
| 3101 | * PARAMETERS: | 3094 | * PARAMETERS: |
| 3102 | * ip - pointer to in-core inode; | 3095 | * ip - pointer to in-core inode; |
| 3103 | * blkno - starting block number to be freed. | 3096 | * blkno - starting block number to be freed. |
| 3104 | * nblocks - number of blocks to be freed. | 3097 | * nblocks - number of blocks to be freed. |
| 3105 | * | 3098 | * |
| 3106 | * RETURN VALUES: | 3099 | * RETURN VALUES: |
| 3107 | * 0 - success | 3100 | * 0 - success |
| 3108 | * -EIO - i/o error | 3101 | * -EIO - i/o error |
| 3109 | */ | 3102 | */ |
| 3110 | int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) | 3103 | int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) |
| 3111 | { | 3104 | { |
| @@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 3278 | * L2 | 3271 | * L2 |
| 3279 | * | | 3272 | * | |
| 3280 | * L1---------------------------------L1 | 3273 | * L1---------------------------------L1 |
| 3281 | * | | | 3274 | * | | |
| 3282 | * L0---------L0---------L0 L0---------L0---------L0 | 3275 | * L0---------L0---------L0 L0---------L0---------L0 |
| 3283 | * | | | | | | | 3276 | * | | | | | | |
| 3284 | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; | 3277 | * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; |
| 3285 | * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm | 3278 | * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm |
| 3286 | * | 3279 | * |
| 3287 | * <---old---><----------------------------extend-----------------------> | 3280 | * <---old---><----------------------------extend-----------------------> |
| @@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
| 3307 | (long long) blkno, (long long) nblocks, (long long) newsize); | 3300 | (long long) blkno, (long long) nblocks, (long long) newsize); |
| 3308 | 3301 | ||
| 3309 | /* | 3302 | /* |
| 3310 | * initialize bmap control page. | 3303 | * initialize bmap control page. |
| 3311 | * | 3304 | * |
| 3312 | * all the data in bmap control page should exclude | 3305 | * all the data in bmap control page should exclude |
| 3313 | * the mkfs hidden dmap page. | 3306 | * the mkfs hidden dmap page. |
| @@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
| 3330 | bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; | 3323 | bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; |
| 3331 | 3324 | ||
| 3332 | /* | 3325 | /* |
| 3333 | * reconfigure db_agfree[] | 3326 | * reconfigure db_agfree[] |
| 3334 | * from old AG configuration to new AG configuration; | 3327 | * from old AG configuration to new AG configuration; |
| 3335 | * | 3328 | * |
| 3336 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 3329 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; |
| @@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
| 3362 | bmp->db_maxag = bmp->db_maxag / k; | 3355 | bmp->db_maxag = bmp->db_maxag / k; |
| 3363 | 3356 | ||
| 3364 | /* | 3357 | /* |
| 3365 | * extend bmap | 3358 | * extend bmap |
| 3366 | * | 3359 | * |
| 3367 | * update bit maps and corresponding level control pages; | 3360 | * update bit maps and corresponding level control pages; |
| 3368 | * global control page db_nfree, db_agfree[agno], db_maxfreebud; | 3361 | * global control page db_nfree, db_agfree[agno], db_maxfreebud; |
| @@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
| 3410 | /* compute start L0 */ | 3403 | /* compute start L0 */ |
| 3411 | j = 0; | 3404 | j = 0; |
| 3412 | l1leaf = l1dcp->stree + CTLLEAFIND; | 3405 | l1leaf = l1dcp->stree + CTLLEAFIND; |
| 3413 | p += nbperpage; /* 1st L0 of L1.k */ | 3406 | p += nbperpage; /* 1st L0 of L1.k */ |
| 3414 | } | 3407 | } |
| 3415 | 3408 | ||
| 3416 | /* | 3409 | /* |
| @@ -3548,7 +3541,7 @@ errout: | |||
| 3548 | return -EIO; | 3541 | return -EIO; |
| 3549 | 3542 | ||
| 3550 | /* | 3543 | /* |
| 3551 | * finalize bmap control page | 3544 | * finalize bmap control page |
| 3552 | */ | 3545 | */ |
| 3553 | finalize: | 3546 | finalize: |
| 3554 | 3547 | ||
| @@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
| 3567 | int i, n; | 3560 | int i, n; |
| 3568 | 3561 | ||
| 3569 | /* | 3562 | /* |
| 3570 | * finalize bmap control page | 3563 | * finalize bmap control page |
| 3571 | */ | 3564 | */ |
| 3572 | //finalize: | 3565 | //finalize: |
| 3573 | /* | 3566 | /* |
| @@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks) | |||
| 3953 | * convert number of map pages to the zero origin top dmapctl level | 3946 | * convert number of map pages to the zero origin top dmapctl level |
| 3954 | */ | 3947 | */ |
| 3955 | #define BMAPPGTOLEV(npages) \ | 3948 | #define BMAPPGTOLEV(npages) \ |
| 3956 | (((npages) <= 3 + MAXL0PAGES) ? 0 \ | 3949 | (((npages) <= 3 + MAXL0PAGES) ? 0 : \ |
| 3957 | : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) | 3950 | ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) |
| 3958 | 3951 | ||
| 3959 | s64 dbMapFileSizeToMapSize(struct inode * ipbmap) | 3952 | s64 dbMapFileSizeToMapSize(struct inode * ipbmap) |
| 3960 | { | 3953 | { |
| @@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) | |||
| 3981 | factor = | 3974 | factor = |
| 3982 | (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); | 3975 | (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); |
| 3983 | complete = (u32) npages / factor; | 3976 | complete = (u32) npages / factor; |
| 3984 | ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL | 3977 | ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : |
| 3985 | : ((i == 1) ? LPERCTL : 1)); | 3978 | ((i == 1) ? LPERCTL : 1)); |
| 3986 | 3979 | ||
| 3987 | /* pages in last/incomplete child */ | 3980 | /* pages in last/incomplete child */ |
| 3988 | npages = (u32) npages % factor; | 3981 | npages = (u32) npages % factor; |
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 45ea454c74bd..11e6d471b364 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h | |||
| @@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp) | |||
| 83 | * - 1 is added to account for the control page of the map. | 83 | * - 1 is added to account for the control page of the map. |
| 84 | */ | 84 | */ |
| 85 | #define BLKTODMAP(b,s) \ | 85 | #define BLKTODMAP(b,s) \ |
| 86 | ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) | 86 | ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) |
| 87 | 87 | ||
| 88 | /* | 88 | /* |
| 89 | * convert disk block number to the logical block number of the LEVEL 0 | 89 | * convert disk block number to the logical block number of the LEVEL 0 |
| @@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp) | |||
| 98 | * - 1 is added to account for the control page of the map. | 98 | * - 1 is added to account for the control page of the map. |
| 99 | */ | 99 | */ |
| 100 | #define BLKTOL0(b,s) \ | 100 | #define BLKTOL0(b,s) \ |
| 101 | (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) | 101 | (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) |
| 102 | 102 | ||
| 103 | /* | 103 | /* |
| 104 | * convert disk block number to the logical block number of the LEVEL 1 | 104 | * convert disk block number to the logical block number of the LEVEL 1 |
| @@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp) | |||
| 120 | * at the specified level which describes the disk block. | 120 | * at the specified level which describes the disk block. |
| 121 | */ | 121 | */ |
| 122 | #define BLKTOCTL(b,s,l) \ | 122 | #define BLKTOCTL(b,s,l) \ |
| 123 | (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) | 123 | (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) |
| 124 | 124 | ||
| 125 | /* | 125 | /* |
| 126 | * convert aggregate map size to the zero origin dmapctl level of the | 126 | * convert aggregate map size to the zero origin dmapctl level of the |
| @@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp) | |||
| 145 | * dmaptree must be consistent with dmapctl. | 145 | * dmaptree must be consistent with dmapctl. |
| 146 | */ | 146 | */ |
| 147 | struct dmaptree { | 147 | struct dmaptree { |
| 148 | __le32 nleafs; /* 4: number of tree leafs */ | 148 | __le32 nleafs; /* 4: number of tree leafs */ |
| 149 | __le32 l2nleafs; /* 4: l2 number of tree leafs */ | 149 | __le32 l2nleafs; /* 4: l2 number of tree leafs */ |
| 150 | __le32 leafidx; /* 4: index of first tree leaf */ | 150 | __le32 leafidx; /* 4: index of first tree leaf */ |
| 151 | __le32 height; /* 4: height of the tree */ | 151 | __le32 height; /* 4: height of the tree */ |
| 152 | s8 budmin; /* 1: min l2 tree leaf value to combine */ | 152 | s8 budmin; /* 1: min l2 tree leaf value to combine */ |
| 153 | s8 stree[TREESIZE]; /* TREESIZE: tree */ | 153 | s8 stree[TREESIZE]; /* TREESIZE: tree */ |
| 154 | u8 pad[2]; /* 2: pad to word boundary */ | 154 | u8 pad[2]; /* 2: pad to word boundary */ |
| 155 | }; /* - 360 - */ | 155 | }; /* - 360 - */ |
| 156 | 156 | ||
| 157 | /* | 157 | /* |
| 158 | * dmap page per 8K blocks bitmap | 158 | * dmap page per 8K blocks bitmap |
| 159 | */ | 159 | */ |
| 160 | struct dmap { | 160 | struct dmap { |
| 161 | __le32 nblocks; /* 4: num blks covered by this dmap */ | 161 | __le32 nblocks; /* 4: num blks covered by this dmap */ |
| 162 | __le32 nfree; /* 4: num of free blks in this dmap */ | 162 | __le32 nfree; /* 4: num of free blks in this dmap */ |
| 163 | __le64 start; /* 8: starting blkno for this dmap */ | 163 | __le64 start; /* 8: starting blkno for this dmap */ |
| 164 | struct dmaptree tree; /* 360: dmap tree */ | 164 | struct dmaptree tree; /* 360: dmap tree */ |
| 165 | u8 pad[1672]; /* 1672: pad to 2048 bytes */ | 165 | u8 pad[1672]; /* 1672: pad to 2048 bytes */ |
| 166 | __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ | 166 | __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ |
| 167 | __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ | 167 | __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ |
| 168 | }; /* - 4096 - */ | 168 | }; /* - 4096 - */ |
| 169 | 169 | ||
| 170 | /* | 170 | /* |
| 171 | * disk map control page per level. | 171 | * disk map control page per level. |
| @@ -173,14 +173,14 @@ struct dmap { | |||
| 173 | * dmapctl must be consistent with dmaptree. | 173 | * dmapctl must be consistent with dmaptree. |
| 174 | */ | 174 | */ |
| 175 | struct dmapctl { | 175 | struct dmapctl { |
| 176 | __le32 nleafs; /* 4: number of tree leafs */ | 176 | __le32 nleafs; /* 4: number of tree leafs */ |
| 177 | __le32 l2nleafs; /* 4: l2 number of tree leafs */ | 177 | __le32 l2nleafs; /* 4: l2 number of tree leafs */ |
| 178 | __le32 leafidx; /* 4: index of the first tree leaf */ | 178 | __le32 leafidx; /* 4: index of the first tree leaf */ |
| 179 | __le32 height; /* 4: height of tree */ | 179 | __le32 height; /* 4: height of tree */ |
| 180 | s8 budmin; /* 1: minimum l2 tree leaf value */ | 180 | s8 budmin; /* 1: minimum l2 tree leaf value */ |
| 181 | s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ | 181 | s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ |
| 182 | u8 pad[2714]; /* 2714: pad to 4096 */ | 182 | u8 pad[2714]; /* 2714: pad to 4096 */ |
| 183 | }; /* - 4096 - */ | 183 | }; /* - 4096 - */ |
| 184 | 184 | ||
| 185 | /* | 185 | /* |
| 186 | * common definition for dmaptree within dmap and dmapctl | 186 | * common definition for dmaptree within dmap and dmapctl |
| @@ -202,41 +202,41 @@ typedef union dmtree { | |||
| 202 | * on-disk aggregate disk allocation map descriptor. | 202 | * on-disk aggregate disk allocation map descriptor. |
| 203 | */ | 203 | */ |
| 204 | struct dbmap_disk { | 204 | struct dbmap_disk { |
| 205 | __le64 dn_mapsize; /* 8: number of blocks in aggregate */ | 205 | __le64 dn_mapsize; /* 8: number of blocks in aggregate */ |
| 206 | __le64 dn_nfree; /* 8: num free blks in aggregate map */ | 206 | __le64 dn_nfree; /* 8: num free blks in aggregate map */ |
| 207 | __le32 dn_l2nbperpage; /* 4: number of blks per page */ | 207 | __le32 dn_l2nbperpage; /* 4: number of blks per page */ |
| 208 | __le32 dn_numag; /* 4: total number of ags */ | 208 | __le32 dn_numag; /* 4: total number of ags */ |
| 209 | __le32 dn_maxlevel; /* 4: number of active ags */ | 209 | __le32 dn_maxlevel; /* 4: number of active ags */ |
| 210 | __le32 dn_maxag; /* 4: max active alloc group number */ | 210 | __le32 dn_maxag; /* 4: max active alloc group number */ |
| 211 | __le32 dn_agpref; /* 4: preferred alloc group (hint) */ | 211 | __le32 dn_agpref; /* 4: preferred alloc group (hint) */ |
| 212 | __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ | 212 | __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ |
| 213 | __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ | 213 | __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ |
| 214 | __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ | 214 | __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ |
| 215 | __le32 dn_agstart; /* 4: start tree index at AG height */ | 215 | __le32 dn_agstart; /* 4: start tree index at AG height */ |
| 216 | __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ | 216 | __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ |
| 217 | __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ | 217 | __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ |
| 218 | __le64 dn_agsize; /* 8: num of blks per alloc group */ | 218 | __le64 dn_agsize; /* 8: num of blks per alloc group */ |
| 219 | s8 dn_maxfreebud; /* 1: max free buddy system */ | 219 | s8 dn_maxfreebud; /* 1: max free buddy system */ |
| 220 | u8 pad[3007]; /* 3007: pad to 4096 */ | 220 | u8 pad[3007]; /* 3007: pad to 4096 */ |
| 221 | }; /* - 4096 - */ | 221 | }; /* - 4096 - */ |
| 222 | 222 | ||
| 223 | struct dbmap { | 223 | struct dbmap { |
| 224 | s64 dn_mapsize; /* number of blocks in aggregate */ | 224 | s64 dn_mapsize; /* number of blocks in aggregate */ |
| 225 | s64 dn_nfree; /* num free blks in aggregate map */ | 225 | s64 dn_nfree; /* num free blks in aggregate map */ |
| 226 | int dn_l2nbperpage; /* number of blks per page */ | 226 | int dn_l2nbperpage; /* number of blks per page */ |
| 227 | int dn_numag; /* total number of ags */ | 227 | int dn_numag; /* total number of ags */ |
| 228 | int dn_maxlevel; /* number of active ags */ | 228 | int dn_maxlevel; /* number of active ags */ |
| 229 | int dn_maxag; /* max active alloc group number */ | 229 | int dn_maxag; /* max active alloc group number */ |
| 230 | int dn_agpref; /* preferred alloc group (hint) */ | 230 | int dn_agpref; /* preferred alloc group (hint) */ |
| 231 | int dn_aglevel; /* dmapctl level holding the AG */ | 231 | int dn_aglevel; /* dmapctl level holding the AG */ |
| 232 | int dn_agheigth; /* height in dmapctl of the AG */ | 232 | int dn_agheigth; /* height in dmapctl of the AG */ |
| 233 | int dn_agwidth; /* width in dmapctl of the AG */ | 233 | int dn_agwidth; /* width in dmapctl of the AG */ |
| 234 | int dn_agstart; /* start tree index at AG height */ | 234 | int dn_agstart; /* start tree index at AG height */ |
| 235 | int dn_agl2size; /* l2 num of blks per alloc group */ | 235 | int dn_agl2size; /* l2 num of blks per alloc group */ |
| 236 | s64 dn_agfree[MAXAG]; /* per AG free count */ | 236 | s64 dn_agfree[MAXAG]; /* per AG free count */ |
| 237 | s64 dn_agsize; /* num of blks per alloc group */ | 237 | s64 dn_agsize; /* num of blks per alloc group */ |
| 238 | signed char dn_maxfreebud; /* max free buddy system */ | 238 | signed char dn_maxfreebud; /* max free buddy system */ |
| 239 | }; /* - 4096 - */ | 239 | }; /* - 4096 - */ |
| 240 | /* | 240 | /* |
| 241 | * in-memory aggregate disk allocation map descriptor. | 241 | * in-memory aggregate disk allocation map descriptor. |
| 242 | */ | 242 | */ |
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 6d62f3222892..c14ba3cfa818 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
| @@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, | |||
| 315 | lv = &llck->lv[llck->index]; | 315 | lv = &llck->lv[llck->index]; |
| 316 | 316 | ||
| 317 | /* | 317 | /* |
| 318 | * Linelock slot size is twice the size of directory table | 318 | * Linelock slot size is twice the size of directory table |
| 319 | * slot size. 512 entries per page. | 319 | * slot size. 512 entries per page. |
| 320 | */ | 320 | */ |
| 321 | lv->offset = ((index - 2) & 511) >> 1; | 321 | lv->offset = ((index - 2) & 511) >> 1; |
| 322 | lv->length = 1; | 322 | lv->length = 1; |
| @@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
| 615 | btstack->nsplit = 1; | 615 | btstack->nsplit = 1; |
| 616 | 616 | ||
| 617 | /* | 617 | /* |
| 618 | * search down tree from root: | 618 | * search down tree from root: |
| 619 | * | 619 | * |
| 620 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of | 620 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of |
| 621 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. | 621 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. |
| @@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
| 659 | } | 659 | } |
| 660 | if (cmp == 0) { | 660 | if (cmp == 0) { |
| 661 | /* | 661 | /* |
| 662 | * search hit | 662 | * search hit |
| 663 | */ | 663 | */ |
| 664 | /* search hit - leaf page: | 664 | /* search hit - leaf page: |
| 665 | * return the entry found | 665 | * return the entry found |
| @@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
| 723 | } | 723 | } |
| 724 | 724 | ||
| 725 | /* | 725 | /* |
| 726 | * search miss | 726 | * search miss |
| 727 | * | 727 | * |
| 728 | * base is the smallest index with key (Kj) greater than | 728 | * base is the smallest index with key (Kj) greater than |
| 729 | * search key (K) and may be zero or (maxindex + 1) index. | 729 | * search key (K) and may be zero or (maxindex + 1) index. |
| @@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip, | |||
| 834 | struct lv *lv; | 834 | struct lv *lv; |
| 835 | 835 | ||
| 836 | /* | 836 | /* |
| 837 | * retrieve search result | 837 | * retrieve search result |
| 838 | * | 838 | * |
| 839 | * dtSearch() returns (leaf page pinned, index at which to insert). | 839 | * dtSearch() returns (leaf page pinned, index at which to insert). |
| 840 | * n.b. dtSearch() may return index of (maxindex + 1) of | 840 | * n.b. dtSearch() may return index of (maxindex + 1) of |
| @@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip, | |||
| 843 | DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); | 843 | DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); |
| 844 | 844 | ||
| 845 | /* | 845 | /* |
| 846 | * insert entry for new key | 846 | * insert entry for new key |
| 847 | */ | 847 | */ |
| 848 | if (DO_INDEX(ip)) { | 848 | if (DO_INDEX(ip)) { |
| 849 | if (JFS_IP(ip)->next_index == DIREND) { | 849 | if (JFS_IP(ip)->next_index == DIREND) { |
| @@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip, | |||
| 860 | data.leaf.ino = *fsn; | 860 | data.leaf.ino = *fsn; |
| 861 | 861 | ||
| 862 | /* | 862 | /* |
| 863 | * leaf page does not have enough room for new entry: | 863 | * leaf page does not have enough room for new entry: |
| 864 | * | 864 | * |
| 865 | * extend/split the leaf page; | 865 | * extend/split the leaf page; |
| 866 | * | 866 | * |
| 867 | * dtSplitUp() will insert the entry and unpin the leaf page. | 867 | * dtSplitUp() will insert the entry and unpin the leaf page. |
| 868 | */ | 868 | */ |
| @@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip, | |||
| 877 | } | 877 | } |
| 878 | 878 | ||
| 879 | /* | 879 | /* |
| 880 | * leaf page does have enough room for new entry: | 880 | * leaf page does have enough room for new entry: |
| 881 | * | 881 | * |
| 882 | * insert the new data entry into the leaf page; | 882 | * insert the new data entry into the leaf page; |
| 883 | */ | 883 | */ |
| 884 | BT_MARK_DIRTY(mp, ip); | 884 | BT_MARK_DIRTY(mp, ip); |
| 885 | /* | 885 | /* |
| @@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid, | |||
| 967 | } | 967 | } |
| 968 | 968 | ||
| 969 | /* | 969 | /* |
| 970 | * split leaf page | 970 | * split leaf page |
| 971 | * | 971 | * |
| 972 | * The split routines insert the new entry, and | 972 | * The split routines insert the new entry, and |
| 973 | * acquire txLock as appropriate. | 973 | * acquire txLock as appropriate. |
| 974 | */ | 974 | */ |
| 975 | /* | 975 | /* |
| 976 | * split root leaf page: | 976 | * split root leaf page: |
| 977 | */ | 977 | */ |
| 978 | if (sp->header.flag & BT_ROOT) { | 978 | if (sp->header.flag & BT_ROOT) { |
| 979 | /* | 979 | /* |
| @@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid, | |||
| 1012 | } | 1012 | } |
| 1013 | 1013 | ||
| 1014 | /* | 1014 | /* |
| 1015 | * extend first leaf page | 1015 | * extend first leaf page |
| 1016 | * | 1016 | * |
| 1017 | * extend the 1st extent if less than buffer page size | 1017 | * extend the 1st extent if less than buffer page size |
| 1018 | * (dtExtendPage() reurns leaf page unpinned) | 1018 | * (dtExtendPage() reurns leaf page unpinned) |
| @@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid, | |||
| 1068 | } | 1068 | } |
| 1069 | 1069 | ||
| 1070 | /* | 1070 | /* |
| 1071 | * split leaf page <sp> into <sp> and a new right page <rp>. | 1071 | * split leaf page <sp> into <sp> and a new right page <rp>. |
| 1072 | * | 1072 | * |
| 1073 | * return <rp> pinned and its extent descriptor <rpxd> | 1073 | * return <rp> pinned and its extent descriptor <rpxd> |
| 1074 | */ | 1074 | */ |
| @@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, | |||
| 1433 | rp->header.freecnt = rp->header.maxslot - fsi; | 1433 | rp->header.freecnt = rp->header.maxslot - fsi; |
| 1434 | 1434 | ||
| 1435 | /* | 1435 | /* |
| 1436 | * sequential append at tail: append without split | 1436 | * sequential append at tail: append without split |
| 1437 | * | 1437 | * |
| 1438 | * If splitting the last page on a level because of appending | 1438 | * If splitting the last page on a level because of appending |
| 1439 | * a entry to it (skip is maxentry), it's likely that the access is | 1439 | * a entry to it (skip is maxentry), it's likely that the access is |
| @@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, | |||
| 1467 | } | 1467 | } |
| 1468 | 1468 | ||
| 1469 | /* | 1469 | /* |
| 1470 | * non-sequential insert (at possibly middle page) | 1470 | * non-sequential insert (at possibly middle page) |
| 1471 | */ | 1471 | */ |
| 1472 | 1472 | ||
| 1473 | /* | 1473 | /* |
| @@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, | |||
| 1508 | left = 0; | 1508 | left = 0; |
| 1509 | 1509 | ||
| 1510 | /* | 1510 | /* |
| 1511 | * compute fill factor for split pages | 1511 | * compute fill factor for split pages |
| 1512 | * | 1512 | * |
| 1513 | * <nxt> traces the next entry to move to rp | 1513 | * <nxt> traces the next entry to move to rp |
| 1514 | * <off> traces the next entry to stay in sp | 1514 | * <off> traces the next entry to stay in sp |
| @@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, | |||
| 1551 | /* <nxt> poins to the 1st entry to move */ | 1551 | /* <nxt> poins to the 1st entry to move */ |
| 1552 | 1552 | ||
| 1553 | /* | 1553 | /* |
| 1554 | * move entries to right page | 1554 | * move entries to right page |
| 1555 | * | 1555 | * |
| 1556 | * dtMoveEntry() initializes rp and reserves entry for insertion | 1556 | * dtMoveEntry() initializes rp and reserves entry for insertion |
| 1557 | * | 1557 | * |
| @@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid, | |||
| 1677 | return (rc); | 1677 | return (rc); |
| 1678 | 1678 | ||
| 1679 | /* | 1679 | /* |
| 1680 | * extend the extent | 1680 | * extend the extent |
| 1681 | */ | 1681 | */ |
| 1682 | pxdlist = split->pxdlist; | 1682 | pxdlist = split->pxdlist; |
| 1683 | pxd = &pxdlist->pxd[pxdlist->npxd]; | 1683 | pxd = &pxdlist->pxd[pxdlist->npxd]; |
| @@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid, | |||
| 1722 | } | 1722 | } |
| 1723 | 1723 | ||
| 1724 | /* | 1724 | /* |
| 1725 | * extend the page | 1725 | * extend the page |
| 1726 | */ | 1726 | */ |
| 1727 | sp->header.self = *pxd; | 1727 | sp->header.self = *pxd; |
| 1728 | 1728 | ||
| @@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid, | |||
| 1739 | /* update buffer extent descriptor of extended page */ | 1739 | /* update buffer extent descriptor of extended page */ |
| 1740 | xlen = lengthPXD(pxd); | 1740 | xlen = lengthPXD(pxd); |
| 1741 | xsize = xlen << JFS_SBI(sb)->l2bsize; | 1741 | xsize = xlen << JFS_SBI(sb)->l2bsize; |
| 1742 | #ifdef _STILL_TO_PORT | ||
| 1743 | bmSetXD(smp, xaddr, xsize); | ||
| 1744 | #endif /* _STILL_TO_PORT */ | ||
| 1745 | 1742 | ||
| 1746 | /* | 1743 | /* |
| 1747 | * copy old stbl to new stbl at start of extended area | 1744 | * copy old stbl to new stbl at start of extended area |
| @@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid, | |||
| 1836 | } | 1833 | } |
| 1837 | 1834 | ||
| 1838 | /* | 1835 | /* |
| 1839 | * update parent entry on the parent/root page | 1836 | * update parent entry on the parent/root page |
| 1840 | */ | 1837 | */ |
| 1841 | /* | 1838 | /* |
| 1842 | * acquire a transaction lock on the parent/root page | 1839 | * acquire a transaction lock on the parent/root page |
| @@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid, | |||
| 1904 | sp = &JFS_IP(ip)->i_dtroot; | 1901 | sp = &JFS_IP(ip)->i_dtroot; |
| 1905 | 1902 | ||
| 1906 | /* | 1903 | /* |
| 1907 | * allocate/initialize a single (right) child page | 1904 | * allocate/initialize a single (right) child page |
| 1908 | * | 1905 | * |
| 1909 | * N.B. at first split, a one (or two) block to fit new entry | 1906 | * N.B. at first split, a one (or two) block to fit new entry |
| 1910 | * is allocated; at subsequent split, a full page is allocated; | 1907 | * is allocated; at subsequent split, a full page is allocated; |
| @@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid, | |||
| 1943 | rp->header.prev = 0; | 1940 | rp->header.prev = 0; |
| 1944 | 1941 | ||
| 1945 | /* | 1942 | /* |
| 1946 | * move in-line root page into new right page extent | 1943 | * move in-line root page into new right page extent |
| 1947 | */ | 1944 | */ |
| 1948 | /* linelock header + copied entries + new stbl (1st slot) in new page */ | 1945 | /* linelock header + copied entries + new stbl (1st slot) in new page */ |
| 1949 | ASSERT(dtlck->index == 0); | 1946 | ASSERT(dtlck->index == 0); |
| @@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid, | |||
| 2016 | dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); | 2013 | dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); |
| 2017 | 2014 | ||
| 2018 | /* | 2015 | /* |
| 2019 | * reset parent/root page | 2016 | * reset parent/root page |
| 2020 | * | 2017 | * |
| 2021 | * set the 1st entry offset to 0, which force the left-most key | 2018 | * set the 1st entry offset to 0, which force the left-most key |
| 2022 | * at any level of the tree to be less than any search key. | 2019 | * at any level of the tree to be less than any search key. |
| @@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid, | |||
| 2102 | dtpage_t *np; | 2099 | dtpage_t *np; |
| 2103 | 2100 | ||
| 2104 | /* | 2101 | /* |
| 2105 | * search for the entry to delete: | 2102 | * search for the entry to delete: |
| 2106 | * | 2103 | * |
| 2107 | * dtSearch() returns (leaf page pinned, index at which to delete). | 2104 | * dtSearch() returns (leaf page pinned, index at which to delete). |
| 2108 | */ | 2105 | */ |
| @@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, | |||
| 2253 | int i; | 2250 | int i; |
| 2254 | 2251 | ||
| 2255 | /* | 2252 | /* |
| 2256 | * keep the root leaf page which has become empty | 2253 | * keep the root leaf page which has become empty |
| 2257 | */ | 2254 | */ |
| 2258 | if (BT_IS_ROOT(fmp)) { | 2255 | if (BT_IS_ROOT(fmp)) { |
| 2259 | /* | 2256 | /* |
| @@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, | |||
| 2269 | } | 2266 | } |
| 2270 | 2267 | ||
| 2271 | /* | 2268 | /* |
| 2272 | * free the non-root leaf page | 2269 | * free the non-root leaf page |
| 2273 | */ | 2270 | */ |
| 2274 | /* | 2271 | /* |
| 2275 | * acquire a transaction lock on the page | 2272 | * acquire a transaction lock on the page |
| @@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, | |||
| 2299 | discard_metapage(fmp); | 2296 | discard_metapage(fmp); |
| 2300 | 2297 | ||
| 2301 | /* | 2298 | /* |
| 2302 | * propagate page deletion up the directory tree | 2299 | * propagate page deletion up the directory tree |
| 2303 | * | 2300 | * |
| 2304 | * If the delete from the parent page makes it empty, | 2301 | * If the delete from the parent page makes it empty, |
| 2305 | * continue all the way up the tree. | 2302 | * continue all the way up the tree. |
| @@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, | |||
| 2440 | 2437 | ||
| 2441 | #ifdef _NOTYET | 2438 | #ifdef _NOTYET |
| 2442 | /* | 2439 | /* |
| 2443 | * NAME: dtRelocate() | 2440 | * NAME: dtRelocate() |
| 2444 | * | 2441 | * |
| 2445 | * FUNCTION: relocate dtpage (internal or leaf) of directory; | 2442 | * FUNCTION: relocate dtpage (internal or leaf) of directory; |
| 2446 | * This function is mainly used by defragfs utility. | 2443 | * This function is mainly used by defragfs utility. |
| 2447 | */ | 2444 | */ |
| 2448 | int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | 2445 | int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, |
| 2449 | s64 nxaddr) | 2446 | s64 nxaddr) |
| @@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | |||
| 2471 | xlen); | 2468 | xlen); |
| 2472 | 2469 | ||
| 2473 | /* | 2470 | /* |
| 2474 | * 1. get the internal parent dtpage covering | 2471 | * 1. get the internal parent dtpage covering |
| 2475 | * router entry for the tartget page to be relocated; | 2472 | * router entry for the tartget page to be relocated; |
| 2476 | */ | 2473 | */ |
| 2477 | rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); | 2474 | rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); |
| 2478 | if (rc) | 2475 | if (rc) |
| @@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | |||
| 2483 | jfs_info("dtRelocate: parent router entry validated."); | 2480 | jfs_info("dtRelocate: parent router entry validated."); |
| 2484 | 2481 | ||
| 2485 | /* | 2482 | /* |
| 2486 | * 2. relocate the target dtpage | 2483 | * 2. relocate the target dtpage |
| 2487 | */ | 2484 | */ |
| 2488 | /* read in the target page from src extent */ | 2485 | /* read in the target page from src extent */ |
| 2489 | DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); | 2486 | DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); |
| @@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | |||
| 2581 | 2578 | ||
| 2582 | /* update the buffer extent descriptor of the dtpage */ | 2579 | /* update the buffer extent descriptor of the dtpage */ |
| 2583 | xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; | 2580 | xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; |
| 2584 | #ifdef _STILL_TO_PORT | 2581 | |
| 2585 | bmSetXD(mp, nxaddr, xsize); | ||
| 2586 | #endif /* _STILL_TO_PORT */ | ||
| 2587 | /* unpin the relocated page */ | 2582 | /* unpin the relocated page */ |
| 2588 | DT_PUTPAGE(mp); | 2583 | DT_PUTPAGE(mp); |
| 2589 | jfs_info("dtRelocate: target dtpage relocated."); | 2584 | jfs_info("dtRelocate: target dtpage relocated."); |
| @@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | |||
| 2594 | */ | 2589 | */ |
| 2595 | 2590 | ||
| 2596 | /* | 2591 | /* |
| 2597 | * 3. acquire maplock for the source extent to be freed; | 2592 | * 3. acquire maplock for the source extent to be freed; |
| 2598 | */ | 2593 | */ |
| 2599 | /* for dtpage relocation, write a LOG_NOREDOPAGE record | 2594 | /* for dtpage relocation, write a LOG_NOREDOPAGE record |
| 2600 | * for the source dtpage (logredo() will init NoRedoPage | 2595 | * for the source dtpage (logredo() will init NoRedoPage |
| @@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | |||
| 2609 | pxdlock->index = 1; | 2604 | pxdlock->index = 1; |
| 2610 | 2605 | ||
| 2611 | /* | 2606 | /* |
| 2612 | * 4. update the parent router entry for relocation; | 2607 | * 4. update the parent router entry for relocation; |
| 2613 | * | 2608 | * |
| 2614 | * acquire tlck for the parent entry covering the target dtpage; | 2609 | * acquire tlck for the parent entry covering the target dtpage; |
| 2615 | * write LOG_REDOPAGE to apply after image only; | 2610 | * write LOG_REDOPAGE to apply after image only; |
| @@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, | |||
| 2637 | * NAME: dtSearchNode() | 2632 | * NAME: dtSearchNode() |
| 2638 | * | 2633 | * |
| 2639 | * FUNCTION: Search for an dtpage containing a specified address | 2634 | * FUNCTION: Search for an dtpage containing a specified address |
| 2640 | * This function is mainly used by defragfs utility. | 2635 | * This function is mainly used by defragfs utility. |
| 2641 | * | 2636 | * |
| 2642 | * NOTE: Search result on stack, the found page is pinned at exit. | 2637 | * NOTE: Search result on stack, the found page is pinned at exit. |
| 2643 | * The result page must be an internal dtpage. | 2638 | * The result page must be an internal dtpage. |
| @@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, | |||
| 2660 | BT_CLR(btstack); /* reset stack */ | 2655 | BT_CLR(btstack); /* reset stack */ |
| 2661 | 2656 | ||
| 2662 | /* | 2657 | /* |
| 2663 | * descend tree to the level with specified leftmost page | 2658 | * descend tree to the level with specified leftmost page |
| 2664 | * | 2659 | * |
| 2665 | * by convention, root bn = 0. | 2660 | * by convention, root bn = 0. |
| 2666 | */ | 2661 | */ |
| @@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, | |||
| 2699 | } | 2694 | } |
| 2700 | 2695 | ||
| 2701 | /* | 2696 | /* |
| 2702 | * search each page at the current levevl | 2697 | * search each page at the current levevl |
| 2703 | */ | 2698 | */ |
| 2704 | loop: | 2699 | loop: |
| 2705 | stbl = DT_GETSTBL(p); | 2700 | stbl = DT_GETSTBL(p); |
| @@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3044 | if (DO_INDEX(ip)) { | 3039 | if (DO_INDEX(ip)) { |
| 3045 | /* | 3040 | /* |
| 3046 | * persistent index is stored in directory entries. | 3041 | * persistent index is stored in directory entries. |
| 3047 | * Special cases: 0 = . | 3042 | * Special cases: 0 = . |
| 3048 | * 1 = .. | 3043 | * 1 = .. |
| 3049 | * -1 = End of directory | 3044 | * -1 = End of directory |
| 3050 | */ | 3045 | */ |
| 3051 | do_index = 1; | 3046 | do_index = 1; |
| 3052 | 3047 | ||
| @@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3128 | /* | 3123 | /* |
| 3129 | * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 | 3124 | * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 |
| 3130 | * | 3125 | * |
| 3131 | * pn = index = 0: First entry "." | 3126 | * pn = index = 0: First entry "." |
| 3132 | * pn = 0; index = 1: Second entry ".." | 3127 | * pn = 0; index = 1: Second entry ".." |
| 3133 | * pn > 0: Real entries, pn=1 -> leftmost page | 3128 | * pn > 0: Real entries, pn=1 -> leftmost page |
| 3134 | * pn = index = -1: No more entries | 3129 | * pn = index = -1: No more entries |
| 3135 | */ | 3130 | */ |
| 3136 | dtpos = filp->f_pos; | 3131 | dtpos = filp->f_pos; |
| 3137 | if (dtpos == 0) { | 3132 | if (dtpos == 0) { |
| @@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) | |||
| 3351 | BT_CLR(btstack); /* reset stack */ | 3346 | BT_CLR(btstack); /* reset stack */ |
| 3352 | 3347 | ||
| 3353 | /* | 3348 | /* |
| 3354 | * descend leftmost path of the tree | 3349 | * descend leftmost path of the tree |
| 3355 | * | 3350 | * |
| 3356 | * by convention, root bn = 0. | 3351 | * by convention, root bn = 0. |
| 3357 | */ | 3352 | */ |
| @@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip, | |||
| 4531 | struct ldtentry *entry; | 4526 | struct ldtentry *entry; |
| 4532 | 4527 | ||
| 4533 | /* | 4528 | /* |
| 4534 | * search for the entry to modify: | 4529 | * search for the entry to modify: |
| 4535 | * | 4530 | * |
| 4536 | * dtSearch() returns (leaf page pinned, index at which to modify). | 4531 | * dtSearch() returns (leaf page pinned, index at which to modify). |
| 4537 | */ | 4532 | */ |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index af8513f78648..8561c6ecece0 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
| @@ -35,7 +35,7 @@ typedef union { | |||
| 35 | 35 | ||
| 36 | 36 | ||
| 37 | /* | 37 | /* |
| 38 | * entry segment/slot | 38 | * entry segment/slot |
| 39 | * | 39 | * |
| 40 | * an entry consists of type dependent head/only segment/slot and | 40 | * an entry consists of type dependent head/only segment/slot and |
| 41 | * additional segments/slots linked vi next field; | 41 | * additional segments/slots linked vi next field; |
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index a35bdca6a805..7ae1e3281de9 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c | |||
| @@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); | |||
| 34 | #endif | 34 | #endif |
| 35 | static s64 extRoundDown(s64 nb); | 35 | static s64 extRoundDown(s64 nb); |
| 36 | 36 | ||
| 37 | #define DPD(a) (printk("(a): %d\n",(a))) | 37 | #define DPD(a) (printk("(a): %d\n",(a))) |
| 38 | #define DPC(a) (printk("(a): %c\n",(a))) | 38 | #define DPC(a) (printk("(a): %c\n",(a))) |
| 39 | #define DPL1(a) \ | 39 | #define DPL1(a) \ |
| 40 | { \ | 40 | { \ |
| 41 | if ((a) >> 32) \ | 41 | if ((a) >> 32) \ |
| @@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb); | |||
| 51 | printk("(a): %x\n",(a) << 32); \ | 51 | printk("(a): %x\n",(a) << 32); \ |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | #define DPD1(a) (printk("(a): %d ",(a))) | 54 | #define DPD1(a) (printk("(a): %d ",(a))) |
| 55 | #define DPX(a) (printk("(a): %08x\n",(a))) | 55 | #define DPX(a) (printk("(a): %08x\n",(a))) |
| 56 | #define DPX1(a) (printk("(a): %08x ",(a))) | 56 | #define DPX1(a) (printk("(a): %08x ",(a))) |
| 57 | #define DPS(a) (printk("%s\n",(a))) | 57 | #define DPS(a) (printk("%s\n",(a))) |
| 58 | #define DPE(a) (printk("\nENTERING: %s\n",(a))) | 58 | #define DPE(a) (printk("\nENTERING: %s\n",(a))) |
| 59 | #define DPE1(a) (printk("\nENTERING: %s",(a))) | 59 | #define DPE1(a) (printk("\nENTERING: %s",(a))) |
| 60 | #define DPS1(a) (printk(" %s ",(a))) | 60 | #define DPS1(a) (printk(" %s ",(a))) |
| 61 | 61 | ||
| 62 | 62 | ||
| 63 | /* | 63 | /* |
| 64 | * NAME: extAlloc() | 64 | * NAME: extAlloc() |
| 65 | * | 65 | * |
| 66 | * FUNCTION: allocate an extent for a specified page range within a | 66 | * FUNCTION: allocate an extent for a specified page range within a |
| 67 | * file. | 67 | * file. |
| 68 | * | 68 | * |
| 69 | * PARAMETERS: | 69 | * PARAMETERS: |
| @@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb); | |||
| 78 | * should be marked as allocated but not recorded. | 78 | * should be marked as allocated but not recorded. |
| 79 | * | 79 | * |
| 80 | * RETURN VALUES: | 80 | * RETURN VALUES: |
| 81 | * 0 - success | 81 | * 0 - success |
| 82 | * -EIO - i/o error. | 82 | * -EIO - i/o error. |
| 83 | * -ENOSPC - insufficient disk resources. | 83 | * -ENOSPC - insufficient disk resources. |
| 84 | */ | 84 | */ |
| 85 | int | 85 | int |
| 86 | extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) | 86 | extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) |
| @@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) | |||
| 192 | 192 | ||
| 193 | #ifdef _NOTYET | 193 | #ifdef _NOTYET |
| 194 | /* | 194 | /* |
| 195 | * NAME: extRealloc() | 195 | * NAME: extRealloc() |
| 196 | * | 196 | * |
| 197 | * FUNCTION: extend the allocation of a file extent containing a | 197 | * FUNCTION: extend the allocation of a file extent containing a |
| 198 | * partial back last page. | 198 | * partial back last page. |
| 199 | * | 199 | * |
| 200 | * PARAMETERS: | 200 | * PARAMETERS: |
| @@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) | |||
| 207 | * should be marked as allocated but not recorded. | 207 | * should be marked as allocated but not recorded. |
| 208 | * | 208 | * |
| 209 | * RETURN VALUES: | 209 | * RETURN VALUES: |
| 210 | * 0 - success | 210 | * 0 - success |
| 211 | * -EIO - i/o error. | 211 | * -EIO - i/o error. |
| 212 | * -ENOSPC - insufficient disk resources. | 212 | * -ENOSPC - insufficient disk resources. |
| 213 | */ | 213 | */ |
| 214 | int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) | 214 | int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) |
| 215 | { | 215 | { |
| @@ -345,9 +345,9 @@ exit: | |||
| 345 | 345 | ||
| 346 | 346 | ||
| 347 | /* | 347 | /* |
| 348 | * NAME: extHint() | 348 | * NAME: extHint() |
| 349 | * | 349 | * |
| 350 | * FUNCTION: produce an extent allocation hint for a file offset. | 350 | * FUNCTION: produce an extent allocation hint for a file offset. |
| 351 | * | 351 | * |
| 352 | * PARAMETERS: | 352 | * PARAMETERS: |
| 353 | * ip - the inode of the file. | 353 | * ip - the inode of the file. |
| @@ -356,8 +356,8 @@ exit: | |||
| 356 | * the hint. | 356 | * the hint. |
| 357 | * | 357 | * |
| 358 | * RETURN VALUES: | 358 | * RETURN VALUES: |
| 359 | * 0 - success | 359 | * 0 - success |
| 360 | * -EIO - i/o error. | 360 | * -EIO - i/o error. |
| 361 | */ | 361 | */ |
| 362 | int extHint(struct inode *ip, s64 offset, xad_t * xp) | 362 | int extHint(struct inode *ip, s64 offset, xad_t * xp) |
| 363 | { | 363 | { |
| @@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
| 387 | lxdl.nlxd = 1; | 387 | lxdl.nlxd = 1; |
| 388 | lxdl.lxd = &lxd; | 388 | lxdl.lxd = &lxd; |
| 389 | LXDoffset(&lxd, prev) | 389 | LXDoffset(&lxd, prev) |
| 390 | LXDlength(&lxd, nbperpage); | 390 | LXDlength(&lxd, nbperpage); |
| 391 | 391 | ||
| 392 | xadl.maxnxad = 1; | 392 | xadl.maxnxad = 1; |
| 393 | xadl.nxad = 0; | 393 | xadl.nxad = 0; |
| @@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
| 397 | if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) | 397 | if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) |
| 398 | return (rc); | 398 | return (rc); |
| 399 | 399 | ||
| 400 | /* check if not extent exists for the previous page. | 400 | /* check if no extent exists for the previous page. |
| 401 | * this is possible for sparse files. | 401 | * this is possible for sparse files. |
| 402 | */ | 402 | */ |
| 403 | if (xadl.nxad == 0) { | 403 | if (xadl.nxad == 0) { |
| 404 | // assert(ISSPARSE(ip)); | 404 | // assert(ISSPARSE(ip)); |
| 405 | return (0); | 405 | return (0); |
| 406 | } | 406 | } |
| 407 | 407 | ||
| @@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
| 410 | */ | 410 | */ |
| 411 | xp->flag &= XAD_NOTRECORDED; | 411 | xp->flag &= XAD_NOTRECORDED; |
| 412 | 412 | ||
| 413 | if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { | 413 | if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { |
| 414 | jfs_error(ip->i_sb, "extHint: corrupt xtree"); | 414 | jfs_error(ip->i_sb, "extHint: corrupt xtree"); |
| 415 | return -EIO; | 415 | return -EIO; |
| 416 | } | 416 | } |
| 417 | 417 | ||
| 418 | return (0); | 418 | return (0); |
| 419 | } | 419 | } |
| 420 | 420 | ||
| 421 | 421 | ||
| 422 | /* | 422 | /* |
| 423 | * NAME: extRecord() | 423 | * NAME: extRecord() |
| 424 | * | 424 | * |
| 425 | * FUNCTION: change a page with a file from not recorded to recorded. | 425 | * FUNCTION: change a page with a file from not recorded to recorded. |
| 426 | * | 426 | * |
| 427 | * PARAMETERS: | 427 | * PARAMETERS: |
| 428 | * ip - inode of the file. | 428 | * ip - inode of the file. |
| 429 | * cp - cbuf of the file page. | 429 | * cp - cbuf of the file page. |
| 430 | * | 430 | * |
| 431 | * RETURN VALUES: | 431 | * RETURN VALUES: |
| 432 | * 0 - success | 432 | * 0 - success |
| 433 | * -EIO - i/o error. | 433 | * -EIO - i/o error. |
| 434 | * -ENOSPC - insufficient disk resources. | 434 | * -ENOSPC - insufficient disk resources. |
| 435 | */ | 435 | */ |
| 436 | int extRecord(struct inode *ip, xad_t * xp) | 436 | int extRecord(struct inode *ip, xad_t * xp) |
| 437 | { | 437 | { |
| @@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp) | |||
| 451 | 451 | ||
| 452 | #ifdef _NOTYET | 452 | #ifdef _NOTYET |
| 453 | /* | 453 | /* |
| 454 | * NAME: extFill() | 454 | * NAME: extFill() |
| 455 | * | 455 | * |
| 456 | * FUNCTION: allocate disk space for a file page that represents | 456 | * FUNCTION: allocate disk space for a file page that represents |
| 457 | * a file hole. | 457 | * a file hole. |
| 458 | * | 458 | * |
| 459 | * PARAMETERS: | 459 | * PARAMETERS: |
| @@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp) | |||
| 461 | * cp - cbuf of the file page represent the hole. | 461 | * cp - cbuf of the file page represent the hole. |
| 462 | * | 462 | * |
| 463 | * RETURN VALUES: | 463 | * RETURN VALUES: |
| 464 | * 0 - success | 464 | * 0 - success |
| 465 | * -EIO - i/o error. | 465 | * -EIO - i/o error. |
| 466 | * -ENOSPC - insufficient disk resources. | 466 | * -ENOSPC - insufficient disk resources. |
| 467 | */ | 467 | */ |
| 468 | int extFill(struct inode *ip, xad_t * xp) | 468 | int extFill(struct inode *ip, xad_t * xp) |
| 469 | { | 469 | { |
| 470 | int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; | 470 | int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; |
| 471 | s64 blkno = offsetXAD(xp) >> ip->i_blkbits; | 471 | s64 blkno = offsetXAD(xp) >> ip->i_blkbits; |
| 472 | 472 | ||
| 473 | // assert(ISSPARSE(ip)); | 473 | // assert(ISSPARSE(ip)); |
| 474 | 474 | ||
| 475 | /* initialize the extent allocation hint */ | 475 | /* initialize the extent allocation hint */ |
| 476 | XADaddress(xp, 0); | 476 | XADaddress(xp, 0); |
| @@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp) | |||
| 489 | /* | 489 | /* |
| 490 | * NAME: extBalloc() | 490 | * NAME: extBalloc() |
| 491 | * | 491 | * |
| 492 | * FUNCTION: allocate disk blocks to form an extent. | 492 | * FUNCTION: allocate disk blocks to form an extent. |
| 493 | * | 493 | * |
| 494 | * initially, we will try to allocate disk blocks for the | 494 | * initially, we will try to allocate disk blocks for the |
| 495 | * requested size (nblocks). if this fails (nblocks | 495 | * requested size (nblocks). if this fails (nblocks |
| @@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp) | |||
| 513 | * allocated block range. | 513 | * allocated block range. |
| 514 | * | 514 | * |
| 515 | * RETURN VALUES: | 515 | * RETURN VALUES: |
| 516 | * 0 - success | 516 | * 0 - success |
| 517 | * -EIO - i/o error. | 517 | * -EIO - i/o error. |
| 518 | * -ENOSPC - insufficient disk resources. | 518 | * -ENOSPC - insufficient disk resources. |
| 519 | */ | 519 | */ |
| 520 | static int | 520 | static int |
| 521 | extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) | 521 | extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) |
| @@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) | |||
| 580 | /* | 580 | /* |
| 581 | * NAME: extBrealloc() | 581 | * NAME: extBrealloc() |
| 582 | * | 582 | * |
| 583 | * FUNCTION: attempt to extend an extent's allocation. | 583 | * FUNCTION: attempt to extend an extent's allocation. |
| 584 | * | 584 | * |
| 585 | * Initially, we will try to extend the extent's allocation | 585 | * Initially, we will try to extend the extent's allocation |
| 586 | * in place. If this fails, we'll try to move the extent | 586 | * in place. If this fails, we'll try to move the extent |
| @@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) | |||
| 597 | * | 597 | * |
| 598 | * PARAMETERS: | 598 | * PARAMETERS: |
| 599 | * ip - the inode of the file. | 599 | * ip - the inode of the file. |
| 600 | * blkno - starting block number of the extents current allocation. | 600 | * blkno - starting block number of the extents current allocation. |
| 601 | * nblks - number of blocks within the extents current allocation. | 601 | * nblks - number of blocks within the extents current allocation. |
| 602 | * newnblks - pointer to a s64 value. on entry, this value is the | 602 | * newnblks - pointer to a s64 value. on entry, this value is the |
| 603 | * the new desired extent size (number of blocks). on | 603 | * the new desired extent size (number of blocks). on |
| 604 | * successful exit, this value is set to the extent's actual | 604 | * successful exit, this value is set to the extent's actual |
| @@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) | |||
| 606 | * newblkno - the starting block number of the extents new allocation. | 606 | * newblkno - the starting block number of the extents new allocation. |
| 607 | * | 607 | * |
| 608 | * RETURN VALUES: | 608 | * RETURN VALUES: |
| 609 | * 0 - success | 609 | * 0 - success |
| 610 | * -EIO - i/o error. | 610 | * -EIO - i/o error. |
| 611 | * -ENOSPC - insufficient disk resources. | 611 | * -ENOSPC - insufficient disk resources. |
| 612 | */ | 612 | */ |
| 613 | static int | 613 | static int |
| 614 | extBrealloc(struct inode *ip, | 614 | extBrealloc(struct inode *ip, |
| @@ -634,16 +634,16 @@ extBrealloc(struct inode *ip, | |||
| 634 | 634 | ||
| 635 | 635 | ||
| 636 | /* | 636 | /* |
| 637 | * NAME: extRoundDown() | 637 | * NAME: extRoundDown() |
| 638 | * | 638 | * |
| 639 | * FUNCTION: round down a specified number of blocks to the next | 639 | * FUNCTION: round down a specified number of blocks to the next |
| 640 | * smallest power of 2 number. | 640 | * smallest power of 2 number. |
| 641 | * | 641 | * |
| 642 | * PARAMETERS: | 642 | * PARAMETERS: |
| 643 | * nb - the inode of the file. | 643 | * nb - the inode of the file. |
| 644 | * | 644 | * |
| 645 | * RETURN VALUES: | 645 | * RETURN VALUES: |
| 646 | * next smallest power of 2 number. | 646 | * next smallest power of 2 number. |
| 647 | */ | 647 | */ |
| 648 | static s64 extRoundDown(s64 nb) | 648 | static s64 extRoundDown(s64 nb) |
| 649 | { | 649 | { |
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 38f70ac03bec..b3f5463fbe52 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h | |||
| @@ -34,9 +34,9 @@ | |||
| 34 | #define JFS_UNICODE 0x00000001 /* unicode name */ | 34 | #define JFS_UNICODE 0x00000001 /* unicode name */ |
| 35 | 35 | ||
| 36 | /* mount time flags for error handling */ | 36 | /* mount time flags for error handling */ |
| 37 | #define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ | 37 | #define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ |
| 38 | #define JFS_ERR_CONTINUE 0x00000004 /* continue */ | 38 | #define JFS_ERR_CONTINUE 0x00000004 /* continue */ |
| 39 | #define JFS_ERR_PANIC 0x00000008 /* panic */ | 39 | #define JFS_ERR_PANIC 0x00000008 /* panic */ |
| 40 | 40 | ||
| 41 | /* Quota support */ | 41 | /* Quota support */ |
| 42 | #define JFS_USRQUOTA 0x00000010 | 42 | #define JFS_USRQUOTA 0x00000010 |
| @@ -83,7 +83,6 @@ | |||
| 83 | /* case-insensitive name/directory support */ | 83 | /* case-insensitive name/directory support */ |
| 84 | 84 | ||
| 85 | #define JFS_AIX 0x80000000 /* AIX support */ | 85 | #define JFS_AIX 0x80000000 /* AIX support */ |
| 86 | /* POSIX name/directory support - Never implemented*/ | ||
| 87 | 86 | ||
| 88 | /* | 87 | /* |
| 89 | * buffer cache configuration | 88 | * buffer cache configuration |
| @@ -113,10 +112,10 @@ | |||
| 113 | #define IDATASIZE 256 /* inode inline data size */ | 112 | #define IDATASIZE 256 /* inode inline data size */ |
| 114 | #define IXATTRSIZE 128 /* inode inline extended attribute size */ | 113 | #define IXATTRSIZE 128 /* inode inline extended attribute size */ |
| 115 | 114 | ||
| 116 | #define XTPAGE_SIZE 4096 | 115 | #define XTPAGE_SIZE 4096 |
| 117 | #define log2_PAGESIZE 12 | 116 | #define log2_PAGESIZE 12 |
| 118 | 117 | ||
| 119 | #define IAG_SIZE 4096 | 118 | #define IAG_SIZE 4096 |
| 120 | #define IAG_EXTENT_SIZE 4096 | 119 | #define IAG_EXTENT_SIZE 4096 |
| 121 | #define INOSPERIAG 4096 /* number of disk inodes per iag */ | 120 | #define INOSPERIAG 4096 /* number of disk inodes per iag */ |
| 122 | #define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ | 121 | #define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index c6530227cda6..3870ba8b9086 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
| @@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *); | |||
| 93 | static void copy_to_dinode(struct dinode *, struct inode *); | 93 | static void copy_to_dinode(struct dinode *, struct inode *); |
| 94 | 94 | ||
| 95 | /* | 95 | /* |
| 96 | * NAME: diMount() | 96 | * NAME: diMount() |
| 97 | * | 97 | * |
| 98 | * FUNCTION: initialize the incore inode map control structures for | 98 | * FUNCTION: initialize the incore inode map control structures for |
| 99 | * a fileset or aggregate init time. | 99 | * a fileset or aggregate init time. |
| 100 | * | 100 | * |
| 101 | * the inode map's control structure (dinomap) is | 101 | * the inode map's control structure (dinomap) is |
| 102 | * brought in from disk and placed in virtual memory. | 102 | * brought in from disk and placed in virtual memory. |
| 103 | * | 103 | * |
| 104 | * PARAMETERS: | 104 | * PARAMETERS: |
| 105 | * ipimap - pointer to inode map inode for the aggregate or fileset. | 105 | * ipimap - pointer to inode map inode for the aggregate or fileset. |
| 106 | * | 106 | * |
| 107 | * RETURN VALUES: | 107 | * RETURN VALUES: |
| 108 | * 0 - success | 108 | * 0 - success |
| 109 | * -ENOMEM - insufficient free virtual memory. | 109 | * -ENOMEM - insufficient free virtual memory. |
| 110 | * -EIO - i/o error. | 110 | * -EIO - i/o error. |
| 111 | */ | 111 | */ |
| 112 | int diMount(struct inode *ipimap) | 112 | int diMount(struct inode *ipimap) |
| 113 | { | 113 | { |
| @@ -180,18 +180,18 @@ int diMount(struct inode *ipimap) | |||
| 180 | 180 | ||
| 181 | 181 | ||
| 182 | /* | 182 | /* |
| 183 | * NAME: diUnmount() | 183 | * NAME: diUnmount() |
| 184 | * | 184 | * |
| 185 | * FUNCTION: write to disk the incore inode map control structures for | 185 | * FUNCTION: write to disk the incore inode map control structures for |
| 186 | * a fileset or aggregate at unmount time. | 186 | * a fileset or aggregate at unmount time. |
| 187 | * | 187 | * |
| 188 | * PARAMETERS: | 188 | * PARAMETERS: |
| 189 | * ipimap - pointer to inode map inode for the aggregate or fileset. | 189 | * ipimap - pointer to inode map inode for the aggregate or fileset. |
| 190 | * | 190 | * |
| 191 | * RETURN VALUES: | 191 | * RETURN VALUES: |
| 192 | * 0 - success | 192 | * 0 - success |
| 193 | * -ENOMEM - insufficient free virtual memory. | 193 | * -ENOMEM - insufficient free virtual memory. |
| 194 | * -EIO - i/o error. | 194 | * -EIO - i/o error. |
| 195 | */ | 195 | */ |
| 196 | int diUnmount(struct inode *ipimap, int mounterror) | 196 | int diUnmount(struct inode *ipimap, int mounterror) |
| 197 | { | 197 | { |
| @@ -274,9 +274,9 @@ int diSync(struct inode *ipimap) | |||
| 274 | 274 | ||
| 275 | 275 | ||
| 276 | /* | 276 | /* |
| 277 | * NAME: diRead() | 277 | * NAME: diRead() |
| 278 | * | 278 | * |
| 279 | * FUNCTION: initialize an incore inode from disk. | 279 | * FUNCTION: initialize an incore inode from disk. |
| 280 | * | 280 | * |
| 281 | * on entry, the specifed incore inode should itself | 281 | * on entry, the specifed incore inode should itself |
| 282 | * specify the disk inode number corresponding to the | 282 | * specify the disk inode number corresponding to the |
| @@ -285,7 +285,7 @@ int diSync(struct inode *ipimap) | |||
| 285 | * this routine handles incore inode initialization for | 285 | * this routine handles incore inode initialization for |
| 286 | * both "special" and "regular" inodes. special inodes | 286 | * both "special" and "regular" inodes. special inodes |
| 287 | * are those required early in the mount process and | 287 | * are those required early in the mount process and |
| 288 | * require special handling since much of the file system | 288 | * require special handling since much of the file system |
| 289 | * is not yet initialized. these "special" inodes are | 289 | * is not yet initialized. these "special" inodes are |
| 290 | * identified by a NULL inode map inode pointer and are | 290 | * identified by a NULL inode map inode pointer and are |
| 291 | * actually initialized by a call to diReadSpecial(). | 291 | * actually initialized by a call to diReadSpecial(). |
| @@ -298,12 +298,12 @@ int diSync(struct inode *ipimap) | |||
| 298 | * incore inode. | 298 | * incore inode. |
| 299 | * | 299 | * |
| 300 | * PARAMETERS: | 300 | * PARAMETERS: |
| 301 | * ip - pointer to incore inode to be initialized from disk. | 301 | * ip - pointer to incore inode to be initialized from disk. |
| 302 | * | 302 | * |
| 303 | * RETURN VALUES: | 303 | * RETURN VALUES: |
| 304 | * 0 - success | 304 | * 0 - success |
| 305 | * -EIO - i/o error. | 305 | * -EIO - i/o error. |
| 306 | * -ENOMEM - insufficient memory | 306 | * -ENOMEM - insufficient memory |
| 307 | * | 307 | * |
| 308 | */ | 308 | */ |
| 309 | int diRead(struct inode *ip) | 309 | int diRead(struct inode *ip) |
| @@ -410,26 +410,26 @@ int diRead(struct inode *ip) | |||
| 410 | 410 | ||
| 411 | 411 | ||
| 412 | /* | 412 | /* |
| 413 | * NAME: diReadSpecial() | 413 | * NAME: diReadSpecial() |
| 414 | * | 414 | * |
| 415 | * FUNCTION: initialize a 'special' inode from disk. | 415 | * FUNCTION: initialize a 'special' inode from disk. |
| 416 | * | 416 | * |
| 417 | * this routines handles aggregate level inodes. The | 417 | * this routines handles aggregate level inodes. The |
| 418 | * inode cache cannot differentiate between the | 418 | * inode cache cannot differentiate between the |
| 419 | * aggregate inodes and the filesystem inodes, so we | 419 | * aggregate inodes and the filesystem inodes, so we |
| 420 | * handle these here. We don't actually use the aggregate | 420 | * handle these here. We don't actually use the aggregate |
| 421 | * inode map, since these inodes are at a fixed location | 421 | * inode map, since these inodes are at a fixed location |
| 422 | * and in some cases the aggregate inode map isn't initialized | 422 | * and in some cases the aggregate inode map isn't initialized |
| 423 | * yet. | 423 | * yet. |
| 424 | * | 424 | * |
| 425 | * PARAMETERS: | 425 | * PARAMETERS: |
| 426 | * sb - filesystem superblock | 426 | * sb - filesystem superblock |
| 427 | * inum - aggregate inode number | 427 | * inum - aggregate inode number |
| 428 | * secondary - 1 if secondary aggregate inode table | 428 | * secondary - 1 if secondary aggregate inode table |
| 429 | * | 429 | * |
| 430 | * RETURN VALUES: | 430 | * RETURN VALUES: |
| 431 | * new inode - success | 431 | * new inode - success |
| 432 | * NULL - i/o error. | 432 | * NULL - i/o error. |
| 433 | */ | 433 | */ |
| 434 | struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) | 434 | struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) |
| 435 | { | 435 | { |
| @@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) | |||
| 502 | } | 502 | } |
| 503 | 503 | ||
| 504 | /* | 504 | /* |
| 505 | * NAME: diWriteSpecial() | 505 | * NAME: diWriteSpecial() |
| 506 | * | 506 | * |
| 507 | * FUNCTION: Write the special inode to disk | 507 | * FUNCTION: Write the special inode to disk |
| 508 | * | 508 | * |
| 509 | * PARAMETERS: | 509 | * PARAMETERS: |
| 510 | * ip - special inode | 510 | * ip - special inode |
| 511 | * secondary - 1 if secondary aggregate inode table | 511 | * secondary - 1 if secondary aggregate inode table |
| 512 | * | 512 | * |
| 513 | * RETURN VALUES: none | 513 | * RETURN VALUES: none |
| @@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary) | |||
| 554 | } | 554 | } |
| 555 | 555 | ||
| 556 | /* | 556 | /* |
| 557 | * NAME: diFreeSpecial() | 557 | * NAME: diFreeSpecial() |
| 558 | * | 558 | * |
| 559 | * FUNCTION: Free allocated space for special inode | 559 | * FUNCTION: Free allocated space for special inode |
| 560 | */ | 560 | */ |
| 561 | void diFreeSpecial(struct inode *ip) | 561 | void diFreeSpecial(struct inode *ip) |
| 562 | { | 562 | { |
| @@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip) | |||
| 572 | 572 | ||
| 573 | 573 | ||
| 574 | /* | 574 | /* |
| 575 | * NAME: diWrite() | 575 | * NAME: diWrite() |
| 576 | * | 576 | * |
| 577 | * FUNCTION: write the on-disk inode portion of the in-memory inode | 577 | * FUNCTION: write the on-disk inode portion of the in-memory inode |
| 578 | * to its corresponding on-disk inode. | 578 | * to its corresponding on-disk inode. |
| 579 | * | 579 | * |
| 580 | * on entry, the specifed incore inode should itself | 580 | * on entry, the specifed incore inode should itself |
| @@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip) | |||
| 589 | * | 589 | * |
| 590 | * PARAMETERS: | 590 | * PARAMETERS: |
| 591 | * tid - transacation id | 591 | * tid - transacation id |
| 592 | * ip - pointer to incore inode to be written to the inode extent. | 592 | * ip - pointer to incore inode to be written to the inode extent. |
| 593 | * | 593 | * |
| 594 | * RETURN VALUES: | 594 | * RETURN VALUES: |
| 595 | * 0 - success | 595 | * 0 - success |
| 596 | * -EIO - i/o error. | 596 | * -EIO - i/o error. |
| 597 | */ | 597 | */ |
| 598 | int diWrite(tid_t tid, struct inode *ip) | 598 | int diWrite(tid_t tid, struct inode *ip) |
| 599 | { | 599 | { |
| @@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 730 | ilinelock = (struct linelock *) & tlck->lock; | 730 | ilinelock = (struct linelock *) & tlck->lock; |
| 731 | 731 | ||
| 732 | /* | 732 | /* |
| 733 | * regular file: 16 byte (XAD slot) granularity | 733 | * regular file: 16 byte (XAD slot) granularity |
| 734 | */ | 734 | */ |
| 735 | if (type & tlckXTREE) { | 735 | if (type & tlckXTREE) { |
| 736 | xtpage_t *p, *xp; | 736 | xtpage_t *p, *xp; |
| @@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 755 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); | 755 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); |
| 756 | } | 756 | } |
| 757 | /* | 757 | /* |
| 758 | * directory: 32 byte (directory entry slot) granularity | 758 | * directory: 32 byte (directory entry slot) granularity |
| 759 | */ | 759 | */ |
| 760 | else if (type & tlckDTREE) { | 760 | else if (type & tlckDTREE) { |
| 761 | dtpage_t *p, *xp; | 761 | dtpage_t *p, *xp; |
| @@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 800 | } | 800 | } |
| 801 | 801 | ||
| 802 | /* | 802 | /* |
| 803 | * lock/copy inode base: 128 byte slot granularity | 803 | * lock/copy inode base: 128 byte slot granularity |
| 804 | */ | 804 | */ |
| 805 | // baseDinode: | ||
| 806 | lv = & dilinelock->lv[dilinelock->index]; | 805 | lv = & dilinelock->lv[dilinelock->index]; |
| 807 | lv->offset = dioffset >> L2INODESLOTSIZE; | 806 | lv->offset = dioffset >> L2INODESLOTSIZE; |
| 808 | copy_to_dinode(dp, ip); | 807 | copy_to_dinode(dp, ip); |
| @@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 813 | lv->length = 1; | 812 | lv->length = 1; |
| 814 | dilinelock->index++; | 813 | dilinelock->index++; |
| 815 | 814 | ||
| 816 | #ifdef _JFS_FASTDASD | ||
| 817 | /* | ||
| 818 | * We aren't logging changes to the DASD used in directory inodes, | ||
| 819 | * but we need to write them to disk. If we don't unmount cleanly, | ||
| 820 | * mount will recalculate the DASD used. | ||
| 821 | */ | ||
| 822 | if (S_ISDIR(ip->i_mode) | ||
| 823 | && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) | ||
| 824 | memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); | ||
| 825 | #endif /* _JFS_FASTDASD */ | ||
| 826 | |||
| 827 | /* release the buffer holding the updated on-disk inode. | 815 | /* release the buffer holding the updated on-disk inode. |
| 828 | * the buffer will be later written by commit processing. | 816 | * the buffer will be later written by commit processing. |
| 829 | */ | 817 | */ |
| @@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 834 | 822 | ||
| 835 | 823 | ||
| 836 | /* | 824 | /* |
| 837 | * NAME: diFree(ip) | 825 | * NAME: diFree(ip) |
| 838 | * | 826 | * |
| 839 | * FUNCTION: free a specified inode from the inode working map | 827 | * FUNCTION: free a specified inode from the inode working map |
| 840 | * for a fileset or aggregate. | 828 | * for a fileset or aggregate. |
| 841 | * | 829 | * |
| 842 | * if the inode to be freed represents the first (only) | 830 | * if the inode to be freed represents the first (only) |
| @@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip) | |||
| 865 | * any updates and are held until all updates are complete. | 853 | * any updates and are held until all updates are complete. |
| 866 | * | 854 | * |
| 867 | * PARAMETERS: | 855 | * PARAMETERS: |
| 868 | * ip - inode to be freed. | 856 | * ip - inode to be freed. |
| 869 | * | 857 | * |
| 870 | * RETURN VALUES: | 858 | * RETURN VALUES: |
| 871 | * 0 - success | 859 | * 0 - success |
| 872 | * -EIO - i/o error. | 860 | * -EIO - i/o error. |
| 873 | */ | 861 | */ |
| 874 | int diFree(struct inode *ip) | 862 | int diFree(struct inode *ip) |
| 875 | { | 863 | { |
| @@ -902,7 +890,8 @@ int diFree(struct inode *ip) | |||
| 902 | * the map. | 890 | * the map. |
| 903 | */ | 891 | */ |
| 904 | if (iagno >= imap->im_nextiag) { | 892 | if (iagno >= imap->im_nextiag) { |
| 905 | dump_mem("imap", imap, 32); | 893 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, |
| 894 | imap, 32, 0); | ||
| 906 | jfs_error(ip->i_sb, | 895 | jfs_error(ip->i_sb, |
| 907 | "diFree: inum = %d, iagno = %d, nextiag = %d", | 896 | "diFree: inum = %d, iagno = %d, nextiag = %d", |
| 908 | (uint) inum, iagno, imap->im_nextiag); | 897 | (uint) inum, iagno, imap->im_nextiag); |
| @@ -964,8 +953,8 @@ int diFree(struct inode *ip) | |||
| 964 | return -EIO; | 953 | return -EIO; |
| 965 | } | 954 | } |
| 966 | /* | 955 | /* |
| 967 | * inode extent still has some inodes or below low water mark: | 956 | * inode extent still has some inodes or below low water mark: |
| 968 | * keep the inode extent; | 957 | * keep the inode extent; |
| 969 | */ | 958 | */ |
| 970 | if (bitmap || | 959 | if (bitmap || |
| 971 | imap->im_agctl[agno].numfree < 96 || | 960 | imap->im_agctl[agno].numfree < 96 || |
| @@ -1047,12 +1036,12 @@ int diFree(struct inode *ip) | |||
| 1047 | 1036 | ||
| 1048 | 1037 | ||
| 1049 | /* | 1038 | /* |
| 1050 | * inode extent has become free and above low water mark: | 1039 | * inode extent has become free and above low water mark: |
| 1051 | * free the inode extent; | 1040 | * free the inode extent; |
| 1052 | */ | 1041 | */ |
| 1053 | 1042 | ||
| 1054 | /* | 1043 | /* |
| 1055 | * prepare to update iag list(s) (careful update step 1) | 1044 | * prepare to update iag list(s) (careful update step 1) |
| 1056 | */ | 1045 | */ |
| 1057 | amp = bmp = cmp = dmp = NULL; | 1046 | amp = bmp = cmp = dmp = NULL; |
| 1058 | fwd = back = -1; | 1047 | fwd = back = -1; |
| @@ -1152,7 +1141,7 @@ int diFree(struct inode *ip) | |||
| 1152 | invalidate_pxd_metapages(ip, freepxd); | 1141 | invalidate_pxd_metapages(ip, freepxd); |
| 1153 | 1142 | ||
| 1154 | /* | 1143 | /* |
| 1155 | * update iag list(s) (careful update step 2) | 1144 | * update iag list(s) (careful update step 2) |
| 1156 | */ | 1145 | */ |
| 1157 | /* add the iag to the ag extent free list if this is the | 1146 | /* add the iag to the ag extent free list if this is the |
| 1158 | * first free extent for the iag. | 1147 | * first free extent for the iag. |
| @@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) | |||
| 1338 | 1327 | ||
| 1339 | 1328 | ||
| 1340 | /* | 1329 | /* |
| 1341 | * NAME: diAlloc(pip,dir,ip) | 1330 | * NAME: diAlloc(pip,dir,ip) |
| 1342 | * | 1331 | * |
| 1343 | * FUNCTION: allocate a disk inode from the inode working map | 1332 | * FUNCTION: allocate a disk inode from the inode working map |
| 1344 | * for a fileset or aggregate. | 1333 | * for a fileset or aggregate. |
| 1345 | * | 1334 | * |
| 1346 | * PARAMETERS: | 1335 | * PARAMETERS: |
| 1347 | * pip - pointer to incore inode for the parent inode. | 1336 | * pip - pointer to incore inode for the parent inode. |
| 1348 | * dir - 'true' if the new disk inode is for a directory. | 1337 | * dir - 'true' if the new disk inode is for a directory. |
| 1349 | * ip - pointer to a new inode | 1338 | * ip - pointer to a new inode |
| 1350 | * | 1339 | * |
| 1351 | * RETURN VALUES: | 1340 | * RETURN VALUES: |
| 1352 | * 0 - success. | 1341 | * 0 - success. |
| 1353 | * -ENOSPC - insufficient disk resources. | 1342 | * -ENOSPC - insufficient disk resources. |
| 1354 | * -EIO - i/o error. | 1343 | * -EIO - i/o error. |
| 1355 | */ | 1344 | */ |
| 1356 | int diAlloc(struct inode *pip, bool dir, struct inode *ip) | 1345 | int diAlloc(struct inode *pip, bool dir, struct inode *ip) |
| 1357 | { | 1346 | { |
| @@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
| 1433 | addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); | 1422 | addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); |
| 1434 | 1423 | ||
| 1435 | /* | 1424 | /* |
| 1436 | * try to allocate from the IAG | 1425 | * try to allocate from the IAG |
| 1437 | */ | 1426 | */ |
| 1438 | /* check if the inode may be allocated from the iag | 1427 | /* check if the inode may be allocated from the iag |
| 1439 | * (i.e. the inode has free inodes or new extent can be added). | 1428 | * (i.e. the inode has free inodes or new extent can be added). |
| @@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
| 1633 | 1622 | ||
| 1634 | 1623 | ||
| 1635 | /* | 1624 | /* |
| 1636 | * NAME: diAllocAG(imap,agno,dir,ip) | 1625 | * NAME: diAllocAG(imap,agno,dir,ip) |
| 1637 | * | 1626 | * |
| 1638 | * FUNCTION: allocate a disk inode from the allocation group. | 1627 | * FUNCTION: allocate a disk inode from the allocation group. |
| 1639 | * | 1628 | * |
| 1640 | * this routine first determines if a new extent of free | 1629 | * this routine first determines if a new extent of free |
| 1641 | * inodes should be added for the allocation group, with | 1630 | * inodes should be added for the allocation group, with |
| @@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
| 1649 | * PRE CONDITION: Already have the AG lock for this AG. | 1638 | * PRE CONDITION: Already have the AG lock for this AG. |
| 1650 | * | 1639 | * |
| 1651 | * PARAMETERS: | 1640 | * PARAMETERS: |
| 1652 | * imap - pointer to inode map control structure. | 1641 | * imap - pointer to inode map control structure. |
| 1653 | * agno - allocation group to allocate from. | 1642 | * agno - allocation group to allocate from. |
| 1654 | * dir - 'true' if the new disk inode is for a directory. | 1643 | * dir - 'true' if the new disk inode is for a directory. |
| 1655 | * ip - pointer to the new inode to be filled in on successful return | 1644 | * ip - pointer to the new inode to be filled in on successful return |
| 1656 | * with the disk inode number allocated, its extent address | 1645 | * with the disk inode number allocated, its extent address |
| 1657 | * and the start of the ag. | 1646 | * and the start of the ag. |
| 1658 | * | 1647 | * |
| 1659 | * RETURN VALUES: | 1648 | * RETURN VALUES: |
| 1660 | * 0 - success. | 1649 | * 0 - success. |
| 1661 | * -ENOSPC - insufficient disk resources. | 1650 | * -ENOSPC - insufficient disk resources. |
| 1662 | * -EIO - i/o error. | 1651 | * -EIO - i/o error. |
| 1663 | */ | 1652 | */ |
| 1664 | static int | 1653 | static int |
| 1665 | diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | 1654 | diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) |
| @@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
| 1709 | 1698 | ||
| 1710 | 1699 | ||
| 1711 | /* | 1700 | /* |
| 1712 | * NAME: diAllocAny(imap,agno,dir,iap) | 1701 | * NAME: diAllocAny(imap,agno,dir,iap) |
| 1713 | * | 1702 | * |
| 1714 | * FUNCTION: allocate a disk inode from any other allocation group. | 1703 | * FUNCTION: allocate a disk inode from any other allocation group. |
| 1715 | * | 1704 | * |
| 1716 | * this routine is called when an allocation attempt within | 1705 | * this routine is called when an allocation attempt within |
| 1717 | * the primary allocation group has failed. if attempts to | 1706 | * the primary allocation group has failed. if attempts to |
| @@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
| 1719 | * specified primary group. | 1708 | * specified primary group. |
| 1720 | * | 1709 | * |
| 1721 | * PARAMETERS: | 1710 | * PARAMETERS: |
| 1722 | * imap - pointer to inode map control structure. | 1711 | * imap - pointer to inode map control structure. |
| 1723 | * agno - primary allocation group (to avoid). | 1712 | * agno - primary allocation group (to avoid). |
| 1724 | * dir - 'true' if the new disk inode is for a directory. | 1713 | * dir - 'true' if the new disk inode is for a directory. |
| 1725 | * ip - pointer to a new inode to be filled in on successful return | 1714 | * ip - pointer to a new inode to be filled in on successful return |
| 1726 | * with the disk inode number allocated, its extent address | 1715 | * with the disk inode number allocated, its extent address |
| 1727 | * and the start of the ag. | 1716 | * and the start of the ag. |
| 1728 | * | 1717 | * |
| 1729 | * RETURN VALUES: | 1718 | * RETURN VALUES: |
| 1730 | * 0 - success. | 1719 | * 0 - success. |
| 1731 | * -ENOSPC - insufficient disk resources. | 1720 | * -ENOSPC - insufficient disk resources. |
| 1732 | * -EIO - i/o error. | 1721 | * -EIO - i/o error. |
| 1733 | */ | 1722 | */ |
| 1734 | static int | 1723 | static int |
| 1735 | diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | 1724 | diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) |
| @@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
| 1772 | 1761 | ||
| 1773 | 1762 | ||
| 1774 | /* | 1763 | /* |
| 1775 | * NAME: diAllocIno(imap,agno,ip) | 1764 | * NAME: diAllocIno(imap,agno,ip) |
| 1776 | * | 1765 | * |
| 1777 | * FUNCTION: allocate a disk inode from the allocation group's free | 1766 | * FUNCTION: allocate a disk inode from the allocation group's free |
| 1778 | * inode list, returning an error if this free list is | 1767 | * inode list, returning an error if this free list is |
| 1779 | * empty (i.e. no iags on the list). | 1768 | * empty (i.e. no iags on the list). |
| 1780 | * | 1769 | * |
| @@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
| 1785 | * PRE CONDITION: Already have AG lock for this AG. | 1774 | * PRE CONDITION: Already have AG lock for this AG. |
| 1786 | * | 1775 | * |
| 1787 | * PARAMETERS: | 1776 | * PARAMETERS: |
| 1788 | * imap - pointer to inode map control structure. | 1777 | * imap - pointer to inode map control structure. |
| 1789 | * agno - allocation group. | 1778 | * agno - allocation group. |
| 1790 | * ip - pointer to new inode to be filled in on successful return | 1779 | * ip - pointer to new inode to be filled in on successful return |
| 1791 | * with the disk inode number allocated, its extent address | 1780 | * with the disk inode number allocated, its extent address |
| 1792 | * and the start of the ag. | 1781 | * and the start of the ag. |
| 1793 | * | 1782 | * |
| 1794 | * RETURN VALUES: | 1783 | * RETURN VALUES: |
| 1795 | * 0 - success. | 1784 | * 0 - success. |
| 1796 | * -ENOSPC - insufficient disk resources. | 1785 | * -ENOSPC - insufficient disk resources. |
| 1797 | * -EIO - i/o error. | 1786 | * -EIO - i/o error. |
| 1798 | */ | 1787 | */ |
| 1799 | static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | 1788 | static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) |
| 1800 | { | 1789 | { |
| @@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
| 1890 | 1879 | ||
| 1891 | 1880 | ||
| 1892 | /* | 1881 | /* |
| 1893 | * NAME: diAllocExt(imap,agno,ip) | 1882 | * NAME: diAllocExt(imap,agno,ip) |
| 1894 | * | 1883 | * |
| 1895 | * FUNCTION: add a new extent of free inodes to an iag, allocating | 1884 | * FUNCTION: add a new extent of free inodes to an iag, allocating |
| 1896 | * an inode from this extent to satisfy the current allocation | 1885 | * an inode from this extent to satisfy the current allocation |
| @@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
| 1910 | * for the purpose of satisfying this request. | 1899 | * for the purpose of satisfying this request. |
| 1911 | * | 1900 | * |
| 1912 | * PARAMETERS: | 1901 | * PARAMETERS: |
| 1913 | * imap - pointer to inode map control structure. | 1902 | * imap - pointer to inode map control structure. |
| 1914 | * agno - allocation group number. | 1903 | * agno - allocation group number. |
| 1915 | * ip - pointer to new inode to be filled in on successful return | 1904 | * ip - pointer to new inode to be filled in on successful return |
| 1916 | * with the disk inode number allocated, its extent address | 1905 | * with the disk inode number allocated, its extent address |
| 1917 | * and the start of the ag. | 1906 | * and the start of the ag. |
| 1918 | * | 1907 | * |
| 1919 | * RETURN VALUES: | 1908 | * RETURN VALUES: |
| 1920 | * 0 - success. | 1909 | * 0 - success. |
| 1921 | * -ENOSPC - insufficient disk resources. | 1910 | * -ENOSPC - insufficient disk resources. |
| 1922 | * -EIO - i/o error. | 1911 | * -EIO - i/o error. |
| 1923 | */ | 1912 | */ |
| 1924 | static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | 1913 | static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) |
| 1925 | { | 1914 | { |
| @@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
| 2010 | 1999 | ||
| 2011 | 2000 | ||
| 2012 | /* | 2001 | /* |
| 2013 | * NAME: diAllocBit(imap,iagp,ino) | 2002 | * NAME: diAllocBit(imap,iagp,ino) |
| 2014 | * | 2003 | * |
| 2015 | * FUNCTION: allocate a backed inode from an iag. | 2004 | * FUNCTION: allocate a backed inode from an iag. |
| 2016 | * | 2005 | * |
| @@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
| 2030 | * this AG. Must have read lock on imap inode. | 2019 | * this AG. Must have read lock on imap inode. |
| 2031 | * | 2020 | * |
| 2032 | * PARAMETERS: | 2021 | * PARAMETERS: |
| 2033 | * imap - pointer to inode map control structure. | 2022 | * imap - pointer to inode map control structure. |
| 2034 | * iagp - pointer to iag. | 2023 | * iagp - pointer to iag. |
| 2035 | * ino - inode number to be allocated within the iag. | 2024 | * ino - inode number to be allocated within the iag. |
| 2036 | * | 2025 | * |
| 2037 | * RETURN VALUES: | 2026 | * RETURN VALUES: |
| 2038 | * 0 - success. | 2027 | * 0 - success. |
| 2039 | * -ENOSPC - insufficient disk resources. | 2028 | * -ENOSPC - insufficient disk resources. |
| 2040 | * -EIO - i/o error. | 2029 | * -EIO - i/o error. |
| 2041 | */ | 2030 | */ |
| 2042 | static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | 2031 | static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) |
| 2043 | { | 2032 | { |
| @@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
| 2144 | 2133 | ||
| 2145 | 2134 | ||
| 2146 | /* | 2135 | /* |
| 2147 | * NAME: diNewExt(imap,iagp,extno) | 2136 | * NAME: diNewExt(imap,iagp,extno) |
| 2148 | * | 2137 | * |
| 2149 | * FUNCTION: initialize a new extent of inodes for an iag, allocating | 2138 | * FUNCTION: initialize a new extent of inodes for an iag, allocating |
| 2150 | * the first inode of the extent for use for the current | 2139 | * the first inode of the extent for use for the current |
| 2151 | * allocation request. | 2140 | * allocation request. |
| 2152 | * | 2141 | * |
| 2153 | * disk resources are allocated for the new extent of inodes | 2142 | * disk resources are allocated for the new extent of inodes |
| 2154 | * and the inodes themselves are initialized to reflect their | 2143 | * and the inodes themselves are initialized to reflect their |
| @@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
| 2177 | * this AG. Must have read lock on imap inode. | 2166 | * this AG. Must have read lock on imap inode. |
| 2178 | * | 2167 | * |
| 2179 | * PARAMETERS: | 2168 | * PARAMETERS: |
| 2180 | * imap - pointer to inode map control structure. | 2169 | * imap - pointer to inode map control structure. |
| 2181 | * iagp - pointer to iag. | 2170 | * iagp - pointer to iag. |
| 2182 | * extno - extent number. | 2171 | * extno - extent number. |
| 2183 | * | 2172 | * |
| 2184 | * RETURN VALUES: | 2173 | * RETURN VALUES: |
| 2185 | * 0 - success. | 2174 | * 0 - success. |
| 2186 | * -ENOSPC - insufficient disk resources. | 2175 | * -ENOSPC - insufficient disk resources. |
| 2187 | * -EIO - i/o error. | 2176 | * -EIO - i/o error. |
| 2188 | */ | 2177 | */ |
| 2189 | static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | 2178 | static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) |
| 2190 | { | 2179 | { |
| @@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
| 2430 | 2419 | ||
| 2431 | 2420 | ||
| 2432 | /* | 2421 | /* |
| 2433 | * NAME: diNewIAG(imap,iagnop,agno) | 2422 | * NAME: diNewIAG(imap,iagnop,agno) |
| 2434 | * | 2423 | * |
| 2435 | * FUNCTION: allocate a new iag for an allocation group. | 2424 | * FUNCTION: allocate a new iag for an allocation group. |
| 2436 | * | 2425 | * |
| @@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
| 2443 | * and returned to satisfy the request. | 2432 | * and returned to satisfy the request. |
| 2444 | * | 2433 | * |
| 2445 | * PARAMETERS: | 2434 | * PARAMETERS: |
| 2446 | * imap - pointer to inode map control structure. | 2435 | * imap - pointer to inode map control structure. |
| 2447 | * iagnop - pointer to an iag number set with the number of the | 2436 | * iagnop - pointer to an iag number set with the number of the |
| 2448 | * newly allocated iag upon successful return. | 2437 | * newly allocated iag upon successful return. |
| 2449 | * agno - allocation group number. | 2438 | * agno - allocation group number. |
| 2450 | * bpp - Buffer pointer to be filled in with new IAG's buffer | 2439 | * bpp - Buffer pointer to be filled in with new IAG's buffer |
| 2451 | * | 2440 | * |
| 2452 | * RETURN VALUES: | 2441 | * RETURN VALUES: |
| 2453 | * 0 - success. | 2442 | * 0 - success. |
| 2454 | * -ENOSPC - insufficient disk resources. | 2443 | * -ENOSPC - insufficient disk resources. |
| 2455 | * -EIO - i/o error. | 2444 | * -EIO - i/o error. |
| 2456 | * | 2445 | * |
| 2457 | * serialization: | 2446 | * serialization: |
| 2458 | * AG lock held on entry/exit; | 2447 | * AG lock held on entry/exit; |
| @@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
| 2461 | * | 2450 | * |
| 2462 | * note: new iag transaction: | 2451 | * note: new iag transaction: |
| 2463 | * . synchronously write iag; | 2452 | * . synchronously write iag; |
| 2464 | * . write log of xtree and inode of imap; | 2453 | * . write log of xtree and inode of imap; |
| 2465 | * . commit; | 2454 | * . commit; |
| 2466 | * . synchronous write of xtree (right to left, bottom to top); | 2455 | * . synchronous write of xtree (right to left, bottom to top); |
| 2467 | * . at start of logredo(): init in-memory imap with one additional iag page; | 2456 | * . at start of logredo(): init in-memory imap with one additional iag page; |
| @@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
| 2481 | s64 xaddr = 0; | 2470 | s64 xaddr = 0; |
| 2482 | s64 blkno; | 2471 | s64 blkno; |
| 2483 | tid_t tid; | 2472 | tid_t tid; |
| 2484 | #ifdef _STILL_TO_PORT | ||
| 2485 | xad_t xad; | ||
| 2486 | #endif /* _STILL_TO_PORT */ | ||
| 2487 | struct inode *iplist[1]; | 2473 | struct inode *iplist[1]; |
| 2488 | 2474 | ||
| 2489 | /* pick up pointers to the inode map and mount inodes */ | 2475 | /* pick up pointers to the inode map and mount inodes */ |
| @@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
| 2674 | } | 2660 | } |
| 2675 | 2661 | ||
| 2676 | /* | 2662 | /* |
| 2677 | * NAME: diIAGRead() | 2663 | * NAME: diIAGRead() |
| 2678 | * | 2664 | * |
| 2679 | * FUNCTION: get the buffer for the specified iag within a fileset | 2665 | * FUNCTION: get the buffer for the specified iag within a fileset |
| 2680 | * or aggregate inode map. | 2666 | * or aggregate inode map. |
| 2681 | * | 2667 | * |
| 2682 | * PARAMETERS: | 2668 | * PARAMETERS: |
| 2683 | * imap - pointer to inode map control structure. | 2669 | * imap - pointer to inode map control structure. |
| 2684 | * iagno - iag number. | 2670 | * iagno - iag number. |
| 2685 | * bpp - point to buffer pointer to be filled in on successful | 2671 | * bpp - point to buffer pointer to be filled in on successful |
| 2686 | * exit. | 2672 | * exit. |
| 2687 | * | 2673 | * |
| 2688 | * SERIALIZATION: | 2674 | * SERIALIZATION: |
| @@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
| 2691 | * the read lock is unnecessary.) | 2677 | * the read lock is unnecessary.) |
| 2692 | * | 2678 | * |
| 2693 | * RETURN VALUES: | 2679 | * RETURN VALUES: |
| 2694 | * 0 - success. | 2680 | * 0 - success. |
| 2695 | * -EIO - i/o error. | 2681 | * -EIO - i/o error. |
| 2696 | */ | 2682 | */ |
| 2697 | static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) | 2683 | static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) |
| 2698 | { | 2684 | { |
| @@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) | |||
| 2712 | } | 2698 | } |
| 2713 | 2699 | ||
| 2714 | /* | 2700 | /* |
| 2715 | * NAME: diFindFree() | 2701 | * NAME: diFindFree() |
| 2716 | * | 2702 | * |
| 2717 | * FUNCTION: find the first free bit in a word starting at | 2703 | * FUNCTION: find the first free bit in a word starting at |
| 2718 | * the specified bit position. | 2704 | * the specified bit position. |
| 2719 | * | 2705 | * |
| 2720 | * PARAMETERS: | 2706 | * PARAMETERS: |
| 2721 | * word - word to be examined. | 2707 | * word - word to be examined. |
| 2722 | * start - starting bit position. | 2708 | * start - starting bit position. |
| 2723 | * | 2709 | * |
| 2724 | * RETURN VALUES: | 2710 | * RETURN VALUES: |
| 2725 | * bit position of first free bit in the word or 32 if | 2711 | * bit position of first free bit in the word or 32 if |
| 2726 | * no free bits were found. | 2712 | * no free bits were found. |
| 2727 | */ | 2713 | */ |
| 2728 | static int diFindFree(u32 word, int start) | 2714 | static int diFindFree(u32 word, int start) |
| @@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
| 2897 | atomic_read(&imap->im_numfree)); | 2883 | atomic_read(&imap->im_numfree)); |
| 2898 | 2884 | ||
| 2899 | /* | 2885 | /* |
| 2900 | * reconstruct imap | 2886 | * reconstruct imap |
| 2901 | * | 2887 | * |
| 2902 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 2888 | * coalesce contiguous k (newAGSize/oldAGSize) AGs; |
| 2903 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; | 2889 | * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; |
| @@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
| 2913 | } | 2899 | } |
| 2914 | 2900 | ||
| 2915 | /* | 2901 | /* |
| 2916 | * process each iag page of the map. | 2902 | * process each iag page of the map. |
| 2917 | * | 2903 | * |
| 2918 | * rebuild AG Free Inode List, AG Free Inode Extent List; | 2904 | * rebuild AG Free Inode List, AG Free Inode Extent List; |
| 2919 | */ | 2905 | */ |
| @@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
| 2932 | 2918 | ||
| 2933 | /* leave free iag in the free iag list */ | 2919 | /* leave free iag in the free iag list */ |
| 2934 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { | 2920 | if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { |
| 2935 | release_metapage(bp); | 2921 | release_metapage(bp); |
| 2936 | continue; | 2922 | continue; |
| 2937 | } | 2923 | } |
| 2938 | 2924 | ||
| @@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno, | |||
| 3063 | } | 3049 | } |
| 3064 | 3050 | ||
| 3065 | /* | 3051 | /* |
| 3066 | * NAME: copy_from_dinode() | 3052 | * NAME: copy_from_dinode() |
| 3067 | * | 3053 | * |
| 3068 | * FUNCTION: Copies inode info from disk inode to in-memory inode | 3054 | * FUNCTION: Copies inode info from disk inode to in-memory inode |
| 3069 | * | 3055 | * |
| 3070 | * RETURN VALUES: | 3056 | * RETURN VALUES: |
| 3071 | * 0 - success | 3057 | * 0 - success |
| 3072 | * -ENOMEM - insufficient memory | 3058 | * -ENOMEM - insufficient memory |
| 3073 | */ | 3059 | */ |
| 3074 | static int copy_from_dinode(struct dinode * dip, struct inode *ip) | 3060 | static int copy_from_dinode(struct dinode * dip, struct inode *ip) |
| 3075 | { | 3061 | { |
| @@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) | |||
| 3151 | } | 3137 | } |
| 3152 | 3138 | ||
| 3153 | /* | 3139 | /* |
| 3154 | * NAME: copy_to_dinode() | 3140 | * NAME: copy_to_dinode() |
| 3155 | * | 3141 | * |
| 3156 | * FUNCTION: Copies inode info from in-memory inode to disk inode | 3142 | * FUNCTION: Copies inode info from in-memory inode to disk inode |
| 3157 | */ | 3143 | */ |
| 3158 | static void copy_to_dinode(struct dinode * dip, struct inode *ip) | 3144 | static void copy_to_dinode(struct dinode * dip, struct inode *ip) |
| 3159 | { | 3145 | { |
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h index 4f9c346ed498..610a0e9d8941 100644 --- a/fs/jfs/jfs_imap.h +++ b/fs/jfs/jfs_imap.h | |||
| @@ -24,17 +24,17 @@ | |||
| 24 | * jfs_imap.h: disk inode manager | 24 | * jfs_imap.h: disk inode manager |
| 25 | */ | 25 | */ |
| 26 | 26 | ||
| 27 | #define EXTSPERIAG 128 /* number of disk inode extent per iag */ | 27 | #define EXTSPERIAG 128 /* number of disk inode extent per iag */ |
| 28 | #define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ | 28 | #define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ |
| 29 | #define SMAPSZ 4 /* number of words per summary map */ | 29 | #define SMAPSZ 4 /* number of words per summary map */ |
| 30 | #define EXTSPERSUM 32 /* number of extents per summary map entry */ | 30 | #define EXTSPERSUM 32 /* number of extents per summary map entry */ |
| 31 | #define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ | 31 | #define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ |
| 32 | #define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ | 32 | #define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ |
| 33 | #define MAXIAGS ((1<<20)-1) /* maximum number of iags */ | 33 | #define MAXIAGS ((1<<20)-1) /* maximum number of iags */ |
| 34 | #define MAXAG 128 /* maximum number of allocation groups */ | 34 | #define MAXAG 128 /* maximum number of allocation groups */ |
| 35 | 35 | ||
| 36 | #define AMAPSIZE 512 /* bytes in the IAG allocation maps */ | 36 | #define AMAPSIZE 512 /* bytes in the IAG allocation maps */ |
| 37 | #define SMAPSIZE 16 /* bytes in the IAG summary maps */ | 37 | #define SMAPSIZE 16 /* bytes in the IAG summary maps */ |
| 38 | 38 | ||
| 39 | /* convert inode number to iag number */ | 39 | /* convert inode number to iag number */ |
| 40 | #define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) | 40 | #define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) |
| @@ -60,31 +60,31 @@ | |||
| 60 | * inode allocation group page (per 4096 inodes of an AG) | 60 | * inode allocation group page (per 4096 inodes of an AG) |
| 61 | */ | 61 | */ |
| 62 | struct iag { | 62 | struct iag { |
| 63 | __le64 agstart; /* 8: starting block of ag */ | 63 | __le64 agstart; /* 8: starting block of ag */ |
| 64 | __le32 iagnum; /* 4: inode allocation group number */ | 64 | __le32 iagnum; /* 4: inode allocation group number */ |
| 65 | __le32 inofreefwd; /* 4: ag inode free list forward */ | 65 | __le32 inofreefwd; /* 4: ag inode free list forward */ |
| 66 | __le32 inofreeback; /* 4: ag inode free list back */ | 66 | __le32 inofreeback; /* 4: ag inode free list back */ |
| 67 | __le32 extfreefwd; /* 4: ag inode extent free list forward */ | 67 | __le32 extfreefwd; /* 4: ag inode extent free list forward */ |
| 68 | __le32 extfreeback; /* 4: ag inode extent free list back */ | 68 | __le32 extfreeback; /* 4: ag inode extent free list back */ |
| 69 | __le32 iagfree; /* 4: iag free list */ | 69 | __le32 iagfree; /* 4: iag free list */ |
| 70 | 70 | ||
| 71 | /* summary map: 1 bit per inode extent */ | 71 | /* summary map: 1 bit per inode extent */ |
| 72 | __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; | 72 | __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; |
| 73 | * note: this indicates free and backed | 73 | * note: this indicates free and backed |
| 74 | * inodes, if the extent is not backed the | 74 | * inodes, if the extent is not backed the |
| 75 | * value will be 1. if the extent is | 75 | * value will be 1. if the extent is |
| 76 | * backed but all inodes are being used the | 76 | * backed but all inodes are being used the |
| 77 | * value will be 1. if the extent is | 77 | * value will be 1. if the extent is |
| 78 | * backed but at least one of the inodes is | 78 | * backed but at least one of the inodes is |
| 79 | * free the value will be 0. | 79 | * free the value will be 0. |
| 80 | */ | 80 | */ |
| 81 | __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ | 81 | __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ |
| 82 | __le32 nfreeinos; /* 4: number of free inodes */ | 82 | __le32 nfreeinos; /* 4: number of free inodes */ |
| 83 | __le32 nfreeexts; /* 4: number of free extents */ | 83 | __le32 nfreeexts; /* 4: number of free extents */ |
| 84 | /* (72) */ | 84 | /* (72) */ |
| 85 | u8 pad[1976]; /* 1976: pad to 2048 bytes */ | 85 | u8 pad[1976]; /* 1976: pad to 2048 bytes */ |
| 86 | /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ | 86 | /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ |
| 87 | __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ | 87 | __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ |
| 88 | __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ | 88 | __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ |
| 89 | pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ | 89 | pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ |
| 90 | }; /* (4096) */ | 90 | }; /* (4096) */ |
| @@ -93,44 +93,44 @@ struct iag { | |||
| 93 | * per AG control information (in inode map control page) | 93 | * per AG control information (in inode map control page) |
| 94 | */ | 94 | */ |
| 95 | struct iagctl_disk { | 95 | struct iagctl_disk { |
| 96 | __le32 inofree; /* 4: free inode list anchor */ | 96 | __le32 inofree; /* 4: free inode list anchor */ |
| 97 | __le32 extfree; /* 4: free extent list anchor */ | 97 | __le32 extfree; /* 4: free extent list anchor */ |
| 98 | __le32 numinos; /* 4: number of backed inodes */ | 98 | __le32 numinos; /* 4: number of backed inodes */ |
| 99 | __le32 numfree; /* 4: number of free inodes */ | 99 | __le32 numfree; /* 4: number of free inodes */ |
| 100 | }; /* (16) */ | 100 | }; /* (16) */ |
| 101 | 101 | ||
| 102 | struct iagctl { | 102 | struct iagctl { |
| 103 | int inofree; /* free inode list anchor */ | 103 | int inofree; /* free inode list anchor */ |
| 104 | int extfree; /* free extent list anchor */ | 104 | int extfree; /* free extent list anchor */ |
| 105 | int numinos; /* number of backed inodes */ | 105 | int numinos; /* number of backed inodes */ |
| 106 | int numfree; /* number of free inodes */ | 106 | int numfree; /* number of free inodes */ |
| 107 | }; | 107 | }; |
| 108 | 108 | ||
| 109 | /* | 109 | /* |
| 110 | * per fileset/aggregate inode map control page | 110 | * per fileset/aggregate inode map control page |
| 111 | */ | 111 | */ |
| 112 | struct dinomap_disk { | 112 | struct dinomap_disk { |
| 113 | __le32 in_freeiag; /* 4: free iag list anchor */ | 113 | __le32 in_freeiag; /* 4: free iag list anchor */ |
| 114 | __le32 in_nextiag; /* 4: next free iag number */ | 114 | __le32 in_nextiag; /* 4: next free iag number */ |
| 115 | __le32 in_numinos; /* 4: num of backed inodes */ | 115 | __le32 in_numinos; /* 4: num of backed inodes */ |
| 116 | __le32 in_numfree; /* 4: num of free backed inodes */ | 116 | __le32 in_numfree; /* 4: num of free backed inodes */ |
| 117 | __le32 in_nbperiext; /* 4: num of blocks per inode extent */ | 117 | __le32 in_nbperiext; /* 4: num of blocks per inode extent */ |
| 118 | __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ | 118 | __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ |
| 119 | __le32 in_diskblock; /* 4: for standalone test driver */ | 119 | __le32 in_diskblock; /* 4: for standalone test driver */ |
| 120 | __le32 in_maxag; /* 4: for standalone test driver */ | 120 | __le32 in_maxag; /* 4: for standalone test driver */ |
| 121 | u8 pad[2016]; /* 2016: pad to 2048 */ | 121 | u8 pad[2016]; /* 2016: pad to 2048 */ |
| 122 | struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ | 122 | struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ |
| 123 | }; /* (4096) */ | 123 | }; /* (4096) */ |
| 124 | 124 | ||
| 125 | struct dinomap { | 125 | struct dinomap { |
| 126 | int in_freeiag; /* free iag list anchor */ | 126 | int in_freeiag; /* free iag list anchor */ |
| 127 | int in_nextiag; /* next free iag number */ | 127 | int in_nextiag; /* next free iag number */ |
| 128 | int in_numinos; /* num of backed inodes */ | 128 | int in_numinos; /* num of backed inodes */ |
| 129 | int in_numfree; /* num of free backed inodes */ | 129 | int in_numfree; /* num of free backed inodes */ |
| 130 | int in_nbperiext; /* num of blocks per inode extent */ | 130 | int in_nbperiext; /* num of blocks per inode extent */ |
| 131 | int in_l2nbperiext; /* l2 of in_nbperiext */ | 131 | int in_l2nbperiext; /* l2 of in_nbperiext */ |
| 132 | int in_diskblock; /* for standalone test driver */ | 132 | int in_diskblock; /* for standalone test driver */ |
| 133 | int in_maxag; /* for standalone test driver */ | 133 | int in_maxag; /* for standalone test driver */ |
| 134 | struct iagctl in_agctl[MAXAG]; /* AG control information */ | 134 | struct iagctl in_agctl[MAXAG]; /* AG control information */ |
| 135 | }; | 135 | }; |
| 136 | 136 | ||
| @@ -139,9 +139,9 @@ struct dinomap { | |||
| 139 | */ | 139 | */ |
| 140 | struct inomap { | 140 | struct inomap { |
| 141 | struct dinomap im_imap; /* 4096: inode allocation control */ | 141 | struct dinomap im_imap; /* 4096: inode allocation control */ |
| 142 | struct inode *im_ipimap; /* 4: ptr to inode for imap */ | 142 | struct inode *im_ipimap; /* 4: ptr to inode for imap */ |
| 143 | struct mutex im_freelock; /* 4: iag free list lock */ | 143 | struct mutex im_freelock; /* 4: iag free list lock */ |
| 144 | struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ | 144 | struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ |
| 145 | u32 *im_DBGdimap; | 145 | u32 *im_DBGdimap; |
| 146 | atomic_t im_numinos; /* num of backed inodes */ | 146 | atomic_t im_numinos; /* num of backed inodes */ |
| 147 | atomic_t im_numfree; /* num of free backed inodes */ | 147 | atomic_t im_numfree; /* num of free backed inodes */ |
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 8f453eff3c83..cb8f30985ad1 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h | |||
| @@ -40,7 +40,7 @@ struct jfs_inode_info { | |||
| 40 | uint mode2; /* jfs-specific mode */ | 40 | uint mode2; /* jfs-specific mode */ |
| 41 | uint saved_uid; /* saved for uid mount option */ | 41 | uint saved_uid; /* saved for uid mount option */ |
| 42 | uint saved_gid; /* saved for gid mount option */ | 42 | uint saved_gid; /* saved for gid mount option */ |
| 43 | pxd_t ixpxd; /* inode extent descriptor */ | 43 | pxd_t ixpxd; /* inode extent descriptor */ |
| 44 | dxd_t acl; /* dxd describing acl */ | 44 | dxd_t acl; /* dxd describing acl */ |
| 45 | dxd_t ea; /* dxd describing ea */ | 45 | dxd_t ea; /* dxd describing ea */ |
| 46 | time_t otime; /* time created */ | 46 | time_t otime; /* time created */ |
| @@ -190,7 +190,7 @@ struct jfs_sb_info { | |||
| 190 | uint gengen; /* inode generation generator*/ | 190 | uint gengen; /* inode generation generator*/ |
| 191 | uint inostamp; /* shows inode belongs to fileset*/ | 191 | uint inostamp; /* shows inode belongs to fileset*/ |
| 192 | 192 | ||
| 193 | /* Formerly in ipbmap */ | 193 | /* Formerly in ipbmap */ |
| 194 | struct bmap *bmap; /* incore bmap descriptor */ | 194 | struct bmap *bmap; /* incore bmap descriptor */ |
| 195 | struct nls_table *nls_tab; /* current codepage */ | 195 | struct nls_table *nls_tab; /* current codepage */ |
| 196 | struct inode *direct_inode; /* metadata inode */ | 196 | struct inode *direct_inode; /* metadata inode */ |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 44a2f33cb98d..de3e4a506dbc 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
| @@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 244 | goto writeRecord; | 244 | goto writeRecord; |
| 245 | 245 | ||
| 246 | /* | 246 | /* |
| 247 | * initialize/update page/transaction recovery lsn | 247 | * initialize/update page/transaction recovery lsn |
| 248 | */ | 248 | */ |
| 249 | lsn = log->lsn; | 249 | lsn = log->lsn; |
| 250 | 250 | ||
| @@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | /* | 265 | /* |
| 266 | * initialize/update lsn of tblock of the page | 266 | * initialize/update lsn of tblock of the page |
| 267 | * | 267 | * |
| 268 | * transaction inherits oldest lsn of pages associated | 268 | * transaction inherits oldest lsn of pages associated |
| 269 | * with allocation/deallocation of resources (their | 269 | * with allocation/deallocation of resources (their |
| @@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 307 | LOGSYNC_UNLOCK(log, flags); | 307 | LOGSYNC_UNLOCK(log, flags); |
| 308 | 308 | ||
| 309 | /* | 309 | /* |
| 310 | * write the log record | 310 | * write the log record |
| 311 | */ | 311 | */ |
| 312 | writeRecord: | 312 | writeRecord: |
| 313 | lsn = lmWriteRecord(log, tblk, lrd, tlck); | 313 | lsn = lmWriteRecord(log, tblk, lrd, tlck); |
| @@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 372 | goto moveLrd; | 372 | goto moveLrd; |
| 373 | 373 | ||
| 374 | /* | 374 | /* |
| 375 | * move log record data | 375 | * move log record data |
| 376 | */ | 376 | */ |
| 377 | /* retrieve source meta-data page to log */ | 377 | /* retrieve source meta-data page to log */ |
| 378 | if (tlck->flag & tlckPAGELOCK) { | 378 | if (tlck->flag & tlckPAGELOCK) { |
| @@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 465 | } | 465 | } |
| 466 | 466 | ||
| 467 | /* | 467 | /* |
| 468 | * move log record descriptor | 468 | * move log record descriptor |
| 469 | */ | 469 | */ |
| 470 | moveLrd: | 470 | moveLrd: |
| 471 | lrd->length = cpu_to_le16(len); | 471 | lrd->length = cpu_to_le16(len); |
| @@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log) | |||
| 574 | LOGGC_LOCK(log); | 574 | LOGGC_LOCK(log); |
| 575 | 575 | ||
| 576 | /* | 576 | /* |
| 577 | * write or queue the full page at the tail of write queue | 577 | * write or queue the full page at the tail of write queue |
| 578 | */ | 578 | */ |
| 579 | /* get the tail tblk on commit queue */ | 579 | /* get the tail tblk on commit queue */ |
| 580 | if (list_empty(&log->cqueue)) | 580 | if (list_empty(&log->cqueue)) |
| @@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log) | |||
| 625 | LOGGC_UNLOCK(log); | 625 | LOGGC_UNLOCK(log); |
| 626 | 626 | ||
| 627 | /* | 627 | /* |
| 628 | * allocate/initialize next page | 628 | * allocate/initialize next page |
| 629 | */ | 629 | */ |
| 630 | /* if log wraps, the first data page of log is 2 | 630 | /* if log wraps, the first data page of log is 2 |
| 631 | * (0 never used, 1 is superblock). | 631 | * (0 never used, 1 is superblock). |
| @@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
| 953 | } | 953 | } |
| 954 | 954 | ||
| 955 | /* | 955 | /* |
| 956 | * forward syncpt | 956 | * forward syncpt |
| 957 | */ | 957 | */ |
| 958 | /* if last sync is same as last syncpt, | 958 | /* if last sync is same as last syncpt, |
| 959 | * invoke sync point forward processing to update sync. | 959 | * invoke sync point forward processing to update sync. |
| @@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
| 989 | lsn = log->lsn; | 989 | lsn = log->lsn; |
| 990 | 990 | ||
| 991 | /* | 991 | /* |
| 992 | * setup next syncpt trigger (SWAG) | 992 | * setup next syncpt trigger (SWAG) |
| 993 | */ | 993 | */ |
| 994 | logsize = log->logsize; | 994 | logsize = log->logsize; |
| 995 | 995 | ||
| @@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
| 1000 | if (more < 2 * LOGPSIZE) { | 1000 | if (more < 2 * LOGPSIZE) { |
| 1001 | jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); | 1001 | jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); |
| 1002 | /* | 1002 | /* |
| 1003 | * log wrapping | 1003 | * log wrapping |
| 1004 | * | 1004 | * |
| 1005 | * option 1 - panic ? No.! | 1005 | * option 1 - panic ? No.! |
| 1006 | * option 2 - shutdown file systems | 1006 | * option 2 - shutdown file systems |
| 1007 | * associated with log ? | 1007 | * associated with log ? |
| 1008 | * option 3 - extend log ? | 1008 | * option 3 - extend log ? |
| 1009 | */ | 1009 | */ |
| 1010 | /* | 1010 | /* |
| @@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync) | |||
| 1062 | /* | 1062 | /* |
| 1063 | * NAME: lmLogOpen() | 1063 | * NAME: lmLogOpen() |
| 1064 | * | 1064 | * |
| 1065 | * FUNCTION: open the log on first open; | 1065 | * FUNCTION: open the log on first open; |
| 1066 | * insert filesystem in the active list of the log. | 1066 | * insert filesystem in the active list of the log. |
| 1067 | * | 1067 | * |
| 1068 | * PARAMETER: ipmnt - file system mount inode | 1068 | * PARAMETER: ipmnt - file system mount inode |
| @@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb) | |||
| 1113 | init_waitqueue_head(&log->syncwait); | 1113 | init_waitqueue_head(&log->syncwait); |
| 1114 | 1114 | ||
| 1115 | /* | 1115 | /* |
| 1116 | * external log as separate logical volume | 1116 | * external log as separate logical volume |
| 1117 | * | 1117 | * |
| 1118 | * file systems to log may have n-to-1 relationship; | 1118 | * file systems to log may have n-to-1 relationship; |
| 1119 | */ | 1119 | */ |
| @@ -1155,7 +1155,7 @@ journal_found: | |||
| 1155 | return 0; | 1155 | return 0; |
| 1156 | 1156 | ||
| 1157 | /* | 1157 | /* |
| 1158 | * unwind on error | 1158 | * unwind on error |
| 1159 | */ | 1159 | */ |
| 1160 | shutdown: /* unwind lbmLogInit() */ | 1160 | shutdown: /* unwind lbmLogInit() */ |
| 1161 | list_del(&log->journal_list); | 1161 | list_del(&log->journal_list); |
| @@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log) | |||
| 1427 | return 0; | 1427 | return 0; |
| 1428 | 1428 | ||
| 1429 | /* | 1429 | /* |
| 1430 | * unwind on error | 1430 | * unwind on error |
| 1431 | */ | 1431 | */ |
| 1432 | errout30: /* release log page */ | 1432 | errout30: /* release log page */ |
| 1433 | log->wqueue = NULL; | 1433 | log->wqueue = NULL; |
| @@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb) | |||
| 1480 | 1480 | ||
| 1481 | if (test_bit(log_INLINELOG, &log->flag)) { | 1481 | if (test_bit(log_INLINELOG, &log->flag)) { |
| 1482 | /* | 1482 | /* |
| 1483 | * in-line log in host file system | 1483 | * in-line log in host file system |
| 1484 | */ | 1484 | */ |
| 1485 | rc = lmLogShutdown(log); | 1485 | rc = lmLogShutdown(log); |
| 1486 | kfree(log); | 1486 | kfree(log); |
| @@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb) | |||
| 1504 | goto out; | 1504 | goto out; |
| 1505 | 1505 | ||
| 1506 | /* | 1506 | /* |
| 1507 | * external log as separate logical volume | 1507 | * external log as separate logical volume |
| 1508 | */ | 1508 | */ |
| 1509 | list_del(&log->journal_list); | 1509 | list_del(&log->journal_list); |
| 1510 | bdev = log->bdev; | 1510 | bdev = log->bdev; |
| @@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
| 1622 | if (!list_empty(&log->synclist)) { | 1622 | if (!list_empty(&log->synclist)) { |
| 1623 | struct logsyncblk *lp; | 1623 | struct logsyncblk *lp; |
| 1624 | 1624 | ||
| 1625 | printk(KERN_ERR "jfs_flush_journal: synclist not empty\n"); | ||
| 1625 | list_for_each_entry(lp, &log->synclist, synclist) { | 1626 | list_for_each_entry(lp, &log->synclist, synclist) { |
| 1626 | if (lp->xflag & COMMIT_PAGE) { | 1627 | if (lp->xflag & COMMIT_PAGE) { |
| 1627 | struct metapage *mp = (struct metapage *)lp; | 1628 | struct metapage *mp = (struct metapage *)lp; |
| 1628 | dump_mem("orphan metapage", lp, | 1629 | print_hex_dump(KERN_ERR, "metapage: ", |
| 1629 | sizeof(struct metapage)); | 1630 | DUMP_PREFIX_ADDRESS, 16, 4, |
| 1630 | dump_mem("page", mp->page, sizeof(struct page)); | 1631 | mp, sizeof(struct metapage), 0); |
| 1631 | } | 1632 | print_hex_dump(KERN_ERR, "page: ", |
| 1632 | else | 1633 | DUMP_PREFIX_ADDRESS, 16, |
| 1633 | dump_mem("orphan tblock", lp, | 1634 | sizeof(long), mp->page, |
| 1634 | sizeof(struct tblock)); | 1635 | sizeof(struct page), 0); |
| 1636 | } else | ||
| 1637 | print_hex_dump(KERN_ERR, "tblock:", | ||
| 1638 | DUMP_PREFIX_ADDRESS, 16, 4, | ||
| 1639 | lp, sizeof(struct tblock), 0); | ||
| 1635 | } | 1640 | } |
| 1636 | } | 1641 | } |
| 1642 | #else | ||
| 1643 | WARN_ON(!list_empty(&log->synclist)); | ||
| 1637 | #endif | 1644 | #endif |
| 1638 | //assert(list_empty(&log->synclist)); | ||
| 1639 | clear_bit(log_FLUSH, &log->flag); | 1645 | clear_bit(log_FLUSH, &log->flag); |
| 1640 | } | 1646 | } |
| 1641 | 1647 | ||
| @@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log) | |||
| 1723 | * | 1729 | * |
| 1724 | * PARAMETE: log - pointer to logs inode. | 1730 | * PARAMETE: log - pointer to logs inode. |
| 1725 | * fsdev - kdev_t of filesystem. | 1731 | * fsdev - kdev_t of filesystem. |
| 1726 | * serial - pointer to returned log serial number | 1732 | * serial - pointer to returned log serial number |
| 1727 | * activate - insert/remove device from active list. | 1733 | * activate - insert/remove device from active list. |
| 1728 | * | 1734 | * |
| 1729 | * RETURN: 0 - success | 1735 | * RETURN: 0 - success |
| @@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp) | |||
| 1963 | * FUNCTION: add a log buffer to the log redrive list | 1969 | * FUNCTION: add a log buffer to the log redrive list |
| 1964 | * | 1970 | * |
| 1965 | * PARAMETER: | 1971 | * PARAMETER: |
| 1966 | * bp - log buffer | 1972 | * bp - log buffer |
| 1967 | * | 1973 | * |
| 1968 | * NOTES: | 1974 | * NOTES: |
| 1969 | * Takes log_redrive_lock. | 1975 | * Takes log_redrive_lock. |
| @@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, | |||
| 2054 | bp->l_flag = flag; | 2060 | bp->l_flag = flag; |
| 2055 | 2061 | ||
| 2056 | /* | 2062 | /* |
| 2057 | * insert bp at tail of write queue associated with log | 2063 | * insert bp at tail of write queue associated with log |
| 2058 | * | 2064 | * |
| 2059 | * (request is either for bp already/currently at head of queue | 2065 | * (request is either for bp already/currently at head of queue |
| 2060 | * or new bp to be inserted at tail) | 2066 | * or new bp to be inserted at tail) |
| @@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) | |||
| 2117 | log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); | 2123 | log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); |
| 2118 | 2124 | ||
| 2119 | /* | 2125 | /* |
| 2120 | * initiate pageout of the page | 2126 | * initiate pageout of the page |
| 2121 | */ | 2127 | */ |
| 2122 | lbmStartIO(bp); | 2128 | lbmStartIO(bp); |
| 2123 | } | 2129 | } |
| @@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) | |||
| 2128 | * | 2134 | * |
| 2129 | * FUNCTION: Interface to DD strategy routine | 2135 | * FUNCTION: Interface to DD strategy routine |
| 2130 | * | 2136 | * |
| 2131 | * RETURN: none | 2137 | * RETURN: none |
| 2132 | * | 2138 | * |
| 2133 | * serialization: LCACHE_LOCK() is NOT held during log i/o; | 2139 | * serialization: LCACHE_LOCK() is NOT held during log i/o; |
| 2134 | */ | 2140 | */ |
| @@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) | |||
| 2222 | bio_put(bio); | 2228 | bio_put(bio); |
| 2223 | 2229 | ||
| 2224 | /* | 2230 | /* |
| 2225 | * pagein completion | 2231 | * pagein completion |
| 2226 | */ | 2232 | */ |
| 2227 | if (bp->l_flag & lbmREAD) { | 2233 | if (bp->l_flag & lbmREAD) { |
| 2228 | bp->l_flag &= ~lbmREAD; | 2234 | bp->l_flag &= ~lbmREAD; |
| @@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) | |||
| 2236 | } | 2242 | } |
| 2237 | 2243 | ||
| 2238 | /* | 2244 | /* |
| 2239 | * pageout completion | 2245 | * pageout completion |
| 2240 | * | 2246 | * |
| 2241 | * the bp at the head of write queue has completed pageout. | 2247 | * the bp at the head of write queue has completed pageout. |
| 2242 | * | 2248 | * |
| @@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) | |||
| 2302 | } | 2308 | } |
| 2303 | 2309 | ||
| 2304 | /* | 2310 | /* |
| 2305 | * synchronous pageout: | 2311 | * synchronous pageout: |
| 2306 | * | 2312 | * |
| 2307 | * buffer has not necessarily been removed from write queue | 2313 | * buffer has not necessarily been removed from write queue |
| 2308 | * (e.g., synchronous write of partial-page with COMMIT): | 2314 | * (e.g., synchronous write of partial-page with COMMIT): |
| @@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) | |||
| 2316 | } | 2322 | } |
| 2317 | 2323 | ||
| 2318 | /* | 2324 | /* |
| 2319 | * Group Commit pageout: | 2325 | * Group Commit pageout: |
| 2320 | */ | 2326 | */ |
| 2321 | else if (bp->l_flag & lbmGC) { | 2327 | else if (bp->l_flag & lbmGC) { |
| 2322 | LCACHE_UNLOCK(flags); | 2328 | LCACHE_UNLOCK(flags); |
| @@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) | |||
| 2324 | } | 2330 | } |
| 2325 | 2331 | ||
| 2326 | /* | 2332 | /* |
| 2327 | * asynchronous pageout: | 2333 | * asynchronous pageout: |
| 2328 | * | 2334 | * |
| 2329 | * buffer must have been removed from write queue: | 2335 | * buffer must have been removed from write queue: |
| 2330 | * insert buffer at head of freelist where it can be recycled | 2336 | * insert buffer at head of freelist where it can be recycled |
| @@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg) | |||
| 2375 | * FUNCTION: format file system log | 2381 | * FUNCTION: format file system log |
| 2376 | * | 2382 | * |
| 2377 | * PARAMETERS: | 2383 | * PARAMETERS: |
| 2378 | * log - volume log | 2384 | * log - volume log |
| 2379 | * logAddress - start address of log space in FS block | 2385 | * logAddress - start address of log space in FS block |
| 2380 | * logSize - length of log space in FS block; | 2386 | * logSize - length of log space in FS block; |
| 2381 | * | 2387 | * |
| @@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) | |||
| 2407 | npages = logSize >> sbi->l2nbperpage; | 2413 | npages = logSize >> sbi->l2nbperpage; |
| 2408 | 2414 | ||
| 2409 | /* | 2415 | /* |
| 2410 | * log space: | 2416 | * log space: |
| 2411 | * | 2417 | * |
| 2412 | * page 0 - reserved; | 2418 | * page 0 - reserved; |
| 2413 | * page 1 - log superblock; | 2419 | * page 1 - log superblock; |
| 2414 | * page 2 - log data page: A SYNC log record is written | 2420 | * page 2 - log data page: A SYNC log record is written |
| 2415 | * into this page at logform time; | 2421 | * into this page at logform time; |
| 2416 | * pages 3-N - log data page: set to empty log data pages; | 2422 | * pages 3-N - log data page: set to empty log data pages; |
| 2417 | */ | 2423 | */ |
| 2418 | /* | 2424 | /* |
| 2419 | * init log superblock: log page 1 | 2425 | * init log superblock: log page 1 |
| 2420 | */ | 2426 | */ |
| 2421 | logsuper = (struct logsuper *) bp->l_ldata; | 2427 | logsuper = (struct logsuper *) bp->l_ldata; |
| 2422 | 2428 | ||
| @@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) | |||
| 2436 | goto exit; | 2442 | goto exit; |
| 2437 | 2443 | ||
| 2438 | /* | 2444 | /* |
| 2439 | * init pages 2 to npages-1 as log data pages: | 2445 | * init pages 2 to npages-1 as log data pages: |
| 2440 | * | 2446 | * |
| 2441 | * log page sequence number (lpsn) initialization: | 2447 | * log page sequence number (lpsn) initialization: |
| 2442 | * | 2448 | * |
| @@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) | |||
| 2479 | goto exit; | 2485 | goto exit; |
| 2480 | 2486 | ||
| 2481 | /* | 2487 | /* |
| 2482 | * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) | 2488 | * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) |
| 2483 | */ | 2489 | */ |
| 2484 | for (lspn = 0; lspn < npages - 3; lspn++) { | 2490 | for (lspn = 0; lspn < npages - 3; lspn++) { |
| 2485 | lp->h.page = lp->t.page = cpu_to_le32(lspn); | 2491 | lp->h.page = lp->t.page = cpu_to_le32(lspn); |
| @@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) | |||
| 2495 | rc = 0; | 2501 | rc = 0; |
| 2496 | exit: | 2502 | exit: |
| 2497 | /* | 2503 | /* |
| 2498 | * finalize log | 2504 | * finalize log |
| 2499 | */ | 2505 | */ |
| 2500 | /* release the buffer */ | 2506 | /* release the buffer */ |
| 2501 | lbmFree(bp); | 2507 | lbmFree(bp); |
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index a53fb17ea219..1f85ef0ec045 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h | |||
| @@ -144,7 +144,7 @@ struct logpage { | |||
| 144 | * | 144 | * |
| 145 | * (this comment should be rewritten !) | 145 | * (this comment should be rewritten !) |
| 146 | * jfs uses only "after" log records (only a single writer is allowed | 146 | * jfs uses only "after" log records (only a single writer is allowed |
| 147 | * in a page, pages are written to temporary paging space if | 147 | * in a page, pages are written to temporary paging space if |
| 148 | * if they must be written to disk before commit, and i/o is | 148 | * if they must be written to disk before commit, and i/o is |
| 149 | * scheduled for modified pages to their home location after | 149 | * scheduled for modified pages to their home location after |
| 150 | * the log records containing the after values and the commit | 150 | * the log records containing the after values and the commit |
| @@ -153,7 +153,7 @@ struct logpage { | |||
| 153 | * | 153 | * |
| 154 | * a log record consists of a data area of variable length followed by | 154 | * a log record consists of a data area of variable length followed by |
| 155 | * a descriptor of fixed size LOGRDSIZE bytes. | 155 | * a descriptor of fixed size LOGRDSIZE bytes. |
| 156 | * the data area is rounded up to an integral number of 4-bytes and | 156 | * the data area is rounded up to an integral number of 4-bytes and |
| 157 | * must be no longer than LOGPSIZE. | 157 | * must be no longer than LOGPSIZE. |
| 158 | * the descriptor is of size of multiple of 4-bytes and aligned on a | 158 | * the descriptor is of size of multiple of 4-bytes and aligned on a |
| 159 | * 4-byte boundary. | 159 | * 4-byte boundary. |
| @@ -215,13 +215,13 @@ struct lrd { | |||
| 215 | union { | 215 | union { |
| 216 | 216 | ||
| 217 | /* | 217 | /* |
| 218 | * COMMIT: commit | 218 | * COMMIT: commit |
| 219 | * | 219 | * |
| 220 | * transaction commit: no type-dependent information; | 220 | * transaction commit: no type-dependent information; |
| 221 | */ | 221 | */ |
| 222 | 222 | ||
| 223 | /* | 223 | /* |
| 224 | * REDOPAGE: after-image | 224 | * REDOPAGE: after-image |
| 225 | * | 225 | * |
| 226 | * apply after-image; | 226 | * apply after-image; |
| 227 | * | 227 | * |
| @@ -236,7 +236,7 @@ struct lrd { | |||
| 236 | } redopage; /* (20) */ | 236 | } redopage; /* (20) */ |
| 237 | 237 | ||
| 238 | /* | 238 | /* |
| 239 | * NOREDOPAGE: the page is freed | 239 | * NOREDOPAGE: the page is freed |
| 240 | * | 240 | * |
| 241 | * do not apply after-image records which precede this record | 241 | * do not apply after-image records which precede this record |
| 242 | * in the log with the same page block number to this page. | 242 | * in the log with the same page block number to this page. |
| @@ -252,7 +252,7 @@ struct lrd { | |||
| 252 | } noredopage; /* (20) */ | 252 | } noredopage; /* (20) */ |
| 253 | 253 | ||
| 254 | /* | 254 | /* |
| 255 | * UPDATEMAP: update block allocation map | 255 | * UPDATEMAP: update block allocation map |
| 256 | * | 256 | * |
| 257 | * either in-line PXD, | 257 | * either in-line PXD, |
| 258 | * or out-of-line XADLIST; | 258 | * or out-of-line XADLIST; |
| @@ -268,7 +268,7 @@ struct lrd { | |||
| 268 | } updatemap; /* (20) */ | 268 | } updatemap; /* (20) */ |
| 269 | 269 | ||
| 270 | /* | 270 | /* |
| 271 | * NOREDOINOEXT: the inode extent is freed | 271 | * NOREDOINOEXT: the inode extent is freed |
| 272 | * | 272 | * |
| 273 | * do not apply after-image records which precede this | 273 | * do not apply after-image records which precede this |
| 274 | * record in the log with the any of the 4 page block | 274 | * record in the log with the any of the 4 page block |
| @@ -286,7 +286,7 @@ struct lrd { | |||
| 286 | } noredoinoext; /* (20) */ | 286 | } noredoinoext; /* (20) */ |
| 287 | 287 | ||
| 288 | /* | 288 | /* |
| 289 | * SYNCPT: log sync point | 289 | * SYNCPT: log sync point |
| 290 | * | 290 | * |
| 291 | * replay log upto syncpt address specified; | 291 | * replay log upto syncpt address specified; |
| 292 | */ | 292 | */ |
| @@ -295,13 +295,13 @@ struct lrd { | |||
| 295 | } syncpt; | 295 | } syncpt; |
| 296 | 296 | ||
| 297 | /* | 297 | /* |
| 298 | * MOUNT: file system mount | 298 | * MOUNT: file system mount |
| 299 | * | 299 | * |
| 300 | * file system mount: no type-dependent information; | 300 | * file system mount: no type-dependent information; |
| 301 | */ | 301 | */ |
| 302 | 302 | ||
| 303 | /* | 303 | /* |
| 304 | * ? FREEXTENT: free specified extent(s) | 304 | * ? FREEXTENT: free specified extent(s) |
| 305 | * | 305 | * |
| 306 | * free specified extent(s) from block allocation map | 306 | * free specified extent(s) from block allocation map |
| 307 | * N.B.: nextents should be length of data/sizeof(xad_t) | 307 | * N.B.: nextents should be length of data/sizeof(xad_t) |
| @@ -314,7 +314,7 @@ struct lrd { | |||
| 314 | } freextent; | 314 | } freextent; |
| 315 | 315 | ||
| 316 | /* | 316 | /* |
| 317 | * ? NOREDOFILE: this file is freed | 317 | * ? NOREDOFILE: this file is freed |
| 318 | * | 318 | * |
| 319 | * do not apply records which precede this record in the log | 319 | * do not apply records which precede this record in the log |
| 320 | * with the same inode number. | 320 | * with the same inode number. |
| @@ -330,7 +330,7 @@ struct lrd { | |||
| 330 | } noredofile; | 330 | } noredofile; |
| 331 | 331 | ||
| 332 | /* | 332 | /* |
| 333 | * ? NEWPAGE: | 333 | * ? NEWPAGE: |
| 334 | * | 334 | * |
| 335 | * metadata type dependent | 335 | * metadata type dependent |
| 336 | */ | 336 | */ |
| @@ -342,7 +342,7 @@ struct lrd { | |||
| 342 | } newpage; | 342 | } newpage; |
| 343 | 343 | ||
| 344 | /* | 344 | /* |
| 345 | * ? DUMMY: filler | 345 | * ? DUMMY: filler |
| 346 | * | 346 | * |
| 347 | * no type-dependent information | 347 | * no type-dependent information |
| 348 | */ | 348 | */ |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 43d4f69afbec..77c7f1129dde 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
| @@ -472,7 +472,8 @@ add_failed: | |||
| 472 | printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); | 472 | printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); |
| 473 | goto skip; | 473 | goto skip; |
| 474 | dump_bio: | 474 | dump_bio: |
| 475 | dump_mem("bio", bio, sizeof(*bio)); | 475 | print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16, |
| 476 | 4, bio, sizeof(*bio), 0); | ||
| 476 | skip: | 477 | skip: |
| 477 | bio_put(bio); | 478 | bio_put(bio); |
| 478 | unlock_page(page); | 479 | unlock_page(page); |
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 4dd479834897..644429acb8c0 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c | |||
| @@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb); | |||
| 80 | */ | 80 | */ |
| 81 | int jfs_mount(struct super_block *sb) | 81 | int jfs_mount(struct super_block *sb) |
| 82 | { | 82 | { |
| 83 | int rc = 0; /* Return code */ | 83 | int rc = 0; /* Return code */ |
| 84 | struct jfs_sb_info *sbi = JFS_SBI(sb); | 84 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
| 85 | struct inode *ipaimap = NULL; | 85 | struct inode *ipaimap = NULL; |
| 86 | struct inode *ipaimap2 = NULL; | 86 | struct inode *ipaimap2 = NULL; |
| @@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb) | |||
| 169 | sbi->ipaimap2 = NULL; | 169 | sbi->ipaimap2 = NULL; |
| 170 | 170 | ||
| 171 | /* | 171 | /* |
| 172 | * mount (the only/single) fileset | 172 | * mount (the only/single) fileset |
| 173 | */ | 173 | */ |
| 174 | /* | 174 | /* |
| 175 | * open fileset inode allocation map (aka fileset inode) | 175 | * open fileset inode allocation map (aka fileset inode) |
| @@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb) | |||
| 195 | goto out; | 195 | goto out; |
| 196 | 196 | ||
| 197 | /* | 197 | /* |
| 198 | * unwind on error | 198 | * unwind on error |
| 199 | */ | 199 | */ |
| 200 | errout41: /* close fileset inode allocation map inode */ | 200 | errout41: /* close fileset inode allocation map inode */ |
| 201 | diFreeSpecial(ipimap); | 201 | diFreeSpecial(ipimap); |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 25430d0b0d59..7aa1f7004eaf 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | */ | 18 | */ |
| 19 | 19 | ||
| 20 | /* | 20 | /* |
| 21 | * jfs_txnmgr.c: transaction manager | 21 | * jfs_txnmgr.c: transaction manager |
| 22 | * | 22 | * |
| 23 | * notes: | 23 | * notes: |
| 24 | * transaction starts with txBegin() and ends with txCommit() | 24 | * transaction starts with txBegin() and ends with txCommit() |
| @@ -60,7 +60,7 @@ | |||
| 60 | #include "jfs_debug.h" | 60 | #include "jfs_debug.h" |
| 61 | 61 | ||
| 62 | /* | 62 | /* |
| 63 | * transaction management structures | 63 | * transaction management structures |
| 64 | */ | 64 | */ |
| 65 | static struct { | 65 | static struct { |
| 66 | int freetid; /* index of a free tid structure */ | 66 | int freetid; /* index of a free tid structure */ |
| @@ -103,19 +103,19 @@ module_param(nTxLock, int, 0); | |||
| 103 | MODULE_PARM_DESC(nTxLock, | 103 | MODULE_PARM_DESC(nTxLock, |
| 104 | "Number of transaction locks (max:65536)"); | 104 | "Number of transaction locks (max:65536)"); |
| 105 | 105 | ||
| 106 | struct tblock *TxBlock; /* transaction block table */ | 106 | struct tblock *TxBlock; /* transaction block table */ |
| 107 | static int TxLockLWM; /* Low water mark for number of txLocks used */ | 107 | static int TxLockLWM; /* Low water mark for number of txLocks used */ |
| 108 | static int TxLockHWM; /* High water mark for number of txLocks used */ | 108 | static int TxLockHWM; /* High water mark for number of txLocks used */ |
| 109 | static int TxLockVHWM; /* Very High water mark */ | 109 | static int TxLockVHWM; /* Very High water mark */ |
| 110 | struct tlock *TxLock; /* transaction lock table */ | 110 | struct tlock *TxLock; /* transaction lock table */ |
| 111 | 111 | ||
| 112 | /* | 112 | /* |
| 113 | * transaction management lock | 113 | * transaction management lock |
| 114 | */ | 114 | */ |
| 115 | static DEFINE_SPINLOCK(jfsTxnLock); | 115 | static DEFINE_SPINLOCK(jfsTxnLock); |
| 116 | 116 | ||
| 117 | #define TXN_LOCK() spin_lock(&jfsTxnLock) | 117 | #define TXN_LOCK() spin_lock(&jfsTxnLock) |
| 118 | #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) | 118 | #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) |
| 119 | 119 | ||
| 120 | #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); | 120 | #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); |
| 121 | #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) | 121 | #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) |
| @@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) | |||
| 148 | #define TXN_WAKEUP(event) wake_up_all(event) | 148 | #define TXN_WAKEUP(event) wake_up_all(event) |
| 149 | 149 | ||
| 150 | /* | 150 | /* |
| 151 | * statistics | 151 | * statistics |
| 152 | */ | 152 | */ |
| 153 | static struct { | 153 | static struct { |
| 154 | tid_t maxtid; /* 4: biggest tid ever used */ | 154 | tid_t maxtid; /* 4: biggest tid ever used */ |
| @@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 181 | static void LogSyncRelease(struct metapage * mp); | 181 | static void LogSyncRelease(struct metapage * mp); |
| 182 | 182 | ||
| 183 | /* | 183 | /* |
| 184 | * transaction block/lock management | 184 | * transaction block/lock management |
| 185 | * --------------------------------- | 185 | * --------------------------------- |
| 186 | */ | 186 | */ |
| 187 | 187 | ||
| 188 | /* | 188 | /* |
| @@ -227,9 +227,9 @@ static void txLockFree(lid_t lid) | |||
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | /* | 229 | /* |
| 230 | * NAME: txInit() | 230 | * NAME: txInit() |
| 231 | * | 231 | * |
| 232 | * FUNCTION: initialize transaction management structures | 232 | * FUNCTION: initialize transaction management structures |
| 233 | * | 233 | * |
| 234 | * RETURN: | 234 | * RETURN: |
| 235 | * | 235 | * |
| @@ -333,9 +333,9 @@ int txInit(void) | |||
| 333 | } | 333 | } |
| 334 | 334 | ||
| 335 | /* | 335 | /* |
| 336 | * NAME: txExit() | 336 | * NAME: txExit() |
| 337 | * | 337 | * |
| 338 | * FUNCTION: clean up when module is unloaded | 338 | * FUNCTION: clean up when module is unloaded |
| 339 | */ | 339 | */ |
| 340 | void txExit(void) | 340 | void txExit(void) |
| 341 | { | 341 | { |
| @@ -346,12 +346,12 @@ void txExit(void) | |||
| 346 | } | 346 | } |
| 347 | 347 | ||
| 348 | /* | 348 | /* |
| 349 | * NAME: txBegin() | 349 | * NAME: txBegin() |
| 350 | * | 350 | * |
| 351 | * FUNCTION: start a transaction. | 351 | * FUNCTION: start a transaction. |
| 352 | * | 352 | * |
| 353 | * PARAMETER: sb - superblock | 353 | * PARAMETER: sb - superblock |
| 354 | * flag - force for nested tx; | 354 | * flag - force for nested tx; |
| 355 | * | 355 | * |
| 356 | * RETURN: tid - transaction id | 356 | * RETURN: tid - transaction id |
| 357 | * | 357 | * |
| @@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag) | |||
| 447 | } | 447 | } |
| 448 | 448 | ||
| 449 | /* | 449 | /* |
| 450 | * NAME: txBeginAnon() | 450 | * NAME: txBeginAnon() |
| 451 | * | 451 | * |
| 452 | * FUNCTION: start an anonymous transaction. | 452 | * FUNCTION: start an anonymous transaction. |
| 453 | * Blocks if logsync or available tlocks are low to prevent | 453 | * Blocks if logsync or available tlocks are low to prevent |
| 454 | * anonymous tlocks from depleting supply. | 454 | * anonymous tlocks from depleting supply. |
| 455 | * | 455 | * |
| 456 | * PARAMETER: sb - superblock | 456 | * PARAMETER: sb - superblock |
| 457 | * | 457 | * |
| 458 | * RETURN: none | 458 | * RETURN: none |
| 459 | */ | 459 | */ |
| @@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb) | |||
| 489 | } | 489 | } |
| 490 | 490 | ||
| 491 | /* | 491 | /* |
| 492 | * txEnd() | 492 | * txEnd() |
| 493 | * | 493 | * |
| 494 | * function: free specified transaction block. | 494 | * function: free specified transaction block. |
| 495 | * | 495 | * |
| 496 | * logsync barrier processing: | 496 | * logsync barrier processing: |
| 497 | * | 497 | * |
| 498 | * serialization: | 498 | * serialization: |
| 499 | */ | 499 | */ |
| @@ -577,13 +577,13 @@ wakeup: | |||
| 577 | } | 577 | } |
| 578 | 578 | ||
| 579 | /* | 579 | /* |
| 580 | * txLock() | 580 | * txLock() |
| 581 | * | 581 | * |
| 582 | * function: acquire a transaction lock on the specified <mp> | 582 | * function: acquire a transaction lock on the specified <mp> |
| 583 | * | 583 | * |
| 584 | * parameter: | 584 | * parameter: |
| 585 | * | 585 | * |
| 586 | * return: transaction lock id | 586 | * return: transaction lock id |
| 587 | * | 587 | * |
| 588 | * serialization: | 588 | * serialization: |
| 589 | */ | 589 | */ |
| @@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, | |||
| 829 | /* Only locks on ipimap or ipaimap should reach here */ | 829 | /* Only locks on ipimap or ipaimap should reach here */ |
| 830 | /* assert(jfs_ip->fileset == AGGREGATE_I); */ | 830 | /* assert(jfs_ip->fileset == AGGREGATE_I); */ |
| 831 | if (jfs_ip->fileset != AGGREGATE_I) { | 831 | if (jfs_ip->fileset != AGGREGATE_I) { |
| 832 | jfs_err("txLock: trying to lock locked page!"); | 832 | printk(KERN_ERR "txLock: trying to lock locked page!"); |
| 833 | dump_mem("ip", ip, sizeof(struct inode)); | 833 | print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, |
| 834 | dump_mem("mp", mp, sizeof(struct metapage)); | 834 | ip, sizeof(*ip), 0); |
| 835 | dump_mem("Locker's tblk", tid_to_tblock(tid), | 835 | print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, |
| 836 | sizeof(struct tblock)); | 836 | mp, sizeof(*mp), 0); |
| 837 | dump_mem("Tlock", tlck, sizeof(struct tlock)); | 837 | print_hex_dump(KERN_ERR, "Locker's tblock: ", |
| 838 | DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), | ||
| 839 | sizeof(struct tblock), 0); | ||
| 840 | print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, | ||
| 841 | tlck, sizeof(*tlck), 0); | ||
| 838 | BUG(); | 842 | BUG(); |
| 839 | } | 843 | } |
| 840 | INCREMENT(stattx.waitlock); /* statistics */ | 844 | INCREMENT(stattx.waitlock); /* statistics */ |
| @@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, | |||
| 857 | } | 861 | } |
| 858 | 862 | ||
| 859 | /* | 863 | /* |
| 860 | * NAME: txRelease() | 864 | * NAME: txRelease() |
| 861 | * | 865 | * |
| 862 | * FUNCTION: Release buffers associated with transaction locks, but don't | 866 | * FUNCTION: Release buffers associated with transaction locks, but don't |
| 863 | * mark homeok yet. The allows other transactions to modify | 867 | * mark homeok yet. The allows other transactions to modify |
| 864 | * buffers, but won't let them go to disk until commit record | 868 | * buffers, but won't let them go to disk until commit record |
| 865 | * actually gets written. | 869 | * actually gets written. |
| 866 | * | 870 | * |
| 867 | * PARAMETER: | 871 | * PARAMETER: |
| 868 | * tblk - | 872 | * tblk - |
| 869 | * | 873 | * |
| 870 | * RETURN: Errors from subroutines. | 874 | * RETURN: Errors from subroutines. |
| 871 | */ | 875 | */ |
| 872 | static void txRelease(struct tblock * tblk) | 876 | static void txRelease(struct tblock * tblk) |
| 873 | { | 877 | { |
| @@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk) | |||
| 896 | } | 900 | } |
| 897 | 901 | ||
| 898 | /* | 902 | /* |
| 899 | * NAME: txUnlock() | 903 | * NAME: txUnlock() |
| 900 | * | 904 | * |
| 901 | * FUNCTION: Initiates pageout of pages modified by tid in journalled | 905 | * FUNCTION: Initiates pageout of pages modified by tid in journalled |
| 902 | * objects and frees their lockwords. | 906 | * objects and frees their lockwords. |
| 903 | */ | 907 | */ |
| 904 | static void txUnlock(struct tblock * tblk) | 908 | static void txUnlock(struct tblock * tblk) |
| 905 | { | 909 | { |
| @@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk) | |||
| 983 | } | 987 | } |
| 984 | 988 | ||
| 985 | /* | 989 | /* |
| 986 | * txMaplock() | 990 | * txMaplock() |
| 987 | * | 991 | * |
| 988 | * function: allocate a transaction lock for freed page/entry; | 992 | * function: allocate a transaction lock for freed page/entry; |
| 989 | * for freed page, maplock is used as xtlock/dtlock type; | 993 | * for freed page, maplock is used as xtlock/dtlock type; |
| 990 | */ | 994 | */ |
| 991 | struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) | 995 | struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) |
| 992 | { | 996 | { |
| @@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) | |||
| 1057 | } | 1061 | } |
| 1058 | 1062 | ||
| 1059 | /* | 1063 | /* |
| 1060 | * txLinelock() | 1064 | * txLinelock() |
| 1061 | * | 1065 | * |
| 1062 | * function: allocate a transaction lock for log vector list | 1066 | * function: allocate a transaction lock for log vector list |
| 1063 | */ | 1067 | */ |
| @@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock) | |||
| 1092 | } | 1096 | } |
| 1093 | 1097 | ||
| 1094 | /* | 1098 | /* |
| 1095 | * transaction commit management | 1099 | * transaction commit management |
| 1096 | * ----------------------------- | 1100 | * ----------------------------- |
| 1097 | */ | 1101 | */ |
| 1098 | 1102 | ||
| 1099 | /* | 1103 | /* |
| 1100 | * NAME: txCommit() | 1104 | * NAME: txCommit() |
| 1101 | * | 1105 | * |
| 1102 | * FUNCTION: commit the changes to the objects specified in | 1106 | * FUNCTION: commit the changes to the objects specified in |
| 1103 | * clist. For journalled segments only the | 1107 | * clist. For journalled segments only the |
| 1104 | * changes of the caller are committed, ie by tid. | 1108 | * changes of the caller are committed, ie by tid. |
| 1105 | * for non-journalled segments the data are flushed to | 1109 | * for non-journalled segments the data are flushed to |
| 1106 | * disk and then the change to the disk inode and indirect | 1110 | * disk and then the change to the disk inode and indirect |
| 1107 | * blocks committed (so blocks newly allocated to the | 1111 | * blocks committed (so blocks newly allocated to the |
| 1108 | * segment will be made a part of the segment atomically). | 1112 | * segment will be made a part of the segment atomically). |
| 1109 | * | 1113 | * |
| 1110 | * all of the segments specified in clist must be in | 1114 | * all of the segments specified in clist must be in |
| 1111 | * one file system. no more than 6 segments are needed | 1115 | * one file system. no more than 6 segments are needed |
| 1112 | * to handle all unix svcs. | 1116 | * to handle all unix svcs. |
| 1113 | * | 1117 | * |
| 1114 | * if the i_nlink field (i.e. disk inode link count) | 1118 | * if the i_nlink field (i.e. disk inode link count) |
| 1115 | * is zero, and the type of inode is a regular file or | 1119 | * is zero, and the type of inode is a regular file or |
| 1116 | * directory, or symbolic link , the inode is truncated | 1120 | * directory, or symbolic link , the inode is truncated |
| 1117 | * to zero length. the truncation is committed but the | 1121 | * to zero length. the truncation is committed but the |
| 1118 | * VM resources are unaffected until it is closed (see | 1122 | * VM resources are unaffected until it is closed (see |
| 1119 | * iput and iclose). | 1123 | * iput and iclose). |
| 1120 | * | 1124 | * |
| 1121 | * PARAMETER: | 1125 | * PARAMETER: |
| 1122 | * | 1126 | * |
| 1123 | * RETURN: | 1127 | * RETURN: |
| 1124 | * | 1128 | * |
| 1125 | * serialization: | 1129 | * serialization: |
| 1126 | * on entry the inode lock on each segment is assumed | 1130 | * on entry the inode lock on each segment is assumed |
| 1127 | * to be held. | 1131 | * to be held. |
| 1128 | * | 1132 | * |
| 1129 | * i/o error: | 1133 | * i/o error: |
| 1130 | */ | 1134 | */ |
| @@ -1175,7 +1179,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1175 | if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) | 1179 | if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) |
| 1176 | tblk->xflag |= COMMIT_LAZY; | 1180 | tblk->xflag |= COMMIT_LAZY; |
| 1177 | /* | 1181 | /* |
| 1178 | * prepare non-journaled objects for commit | 1182 | * prepare non-journaled objects for commit |
| 1179 | * | 1183 | * |
| 1180 | * flush data pages of non-journaled file | 1184 | * flush data pages of non-journaled file |
| 1181 | * to prevent the file getting non-initialized disk blocks | 1185 | * to prevent the file getting non-initialized disk blocks |
| @@ -1186,7 +1190,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1186 | cd.nip = nip; | 1190 | cd.nip = nip; |
| 1187 | 1191 | ||
| 1188 | /* | 1192 | /* |
| 1189 | * acquire transaction lock on (on-disk) inodes | 1193 | * acquire transaction lock on (on-disk) inodes |
| 1190 | * | 1194 | * |
| 1191 | * update on-disk inode from in-memory inode | 1195 | * update on-disk inode from in-memory inode |
| 1192 | * acquiring transaction locks for AFTER records | 1196 | * acquiring transaction locks for AFTER records |
| @@ -1262,7 +1266,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1262 | } | 1266 | } |
| 1263 | 1267 | ||
| 1264 | /* | 1268 | /* |
| 1265 | * write log records from transaction locks | 1269 | * write log records from transaction locks |
| 1266 | * | 1270 | * |
| 1267 | * txUpdateMap() resets XAD_NEW in XAD. | 1271 | * txUpdateMap() resets XAD_NEW in XAD. |
| 1268 | */ | 1272 | */ |
| @@ -1294,7 +1298,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1294 | !test_cflag(COMMIT_Nolink, tblk->u.ip))); | 1298 | !test_cflag(COMMIT_Nolink, tblk->u.ip))); |
| 1295 | 1299 | ||
| 1296 | /* | 1300 | /* |
| 1297 | * write COMMIT log record | 1301 | * write COMMIT log record |
| 1298 | */ | 1302 | */ |
| 1299 | lrd->type = cpu_to_le16(LOG_COMMIT); | 1303 | lrd->type = cpu_to_le16(LOG_COMMIT); |
| 1300 | lrd->length = 0; | 1304 | lrd->length = 0; |
| @@ -1303,7 +1307,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1303 | lmGroupCommit(log, tblk); | 1307 | lmGroupCommit(log, tblk); |
| 1304 | 1308 | ||
| 1305 | /* | 1309 | /* |
| 1306 | * - transaction is now committed - | 1310 | * - transaction is now committed - |
| 1307 | */ | 1311 | */ |
| 1308 | 1312 | ||
| 1309 | /* | 1313 | /* |
| @@ -1314,11 +1318,11 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1314 | txForce(tblk); | 1318 | txForce(tblk); |
| 1315 | 1319 | ||
| 1316 | /* | 1320 | /* |
| 1317 | * update allocation map. | 1321 | * update allocation map. |
| 1318 | * | 1322 | * |
| 1319 | * update inode allocation map and inode: | 1323 | * update inode allocation map and inode: |
| 1320 | * free pager lock on memory object of inode if any. | 1324 | * free pager lock on memory object of inode if any. |
| 1321 | * update block allocation map. | 1325 | * update block allocation map. |
| 1322 | * | 1326 | * |
| 1323 | * txUpdateMap() resets XAD_NEW in XAD. | 1327 | * txUpdateMap() resets XAD_NEW in XAD. |
| 1324 | */ | 1328 | */ |
| @@ -1326,7 +1330,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1326 | txUpdateMap(tblk); | 1330 | txUpdateMap(tblk); |
| 1327 | 1331 | ||
| 1328 | /* | 1332 | /* |
| 1329 | * free transaction locks and pageout/free pages | 1333 | * free transaction locks and pageout/free pages |
| 1330 | */ | 1334 | */ |
| 1331 | txRelease(tblk); | 1335 | txRelease(tblk); |
| 1332 | 1336 | ||
| @@ -1335,7 +1339,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1335 | 1339 | ||
| 1336 | 1340 | ||
| 1337 | /* | 1341 | /* |
| 1338 | * reset in-memory object state | 1342 | * reset in-memory object state |
| 1339 | */ | 1343 | */ |
| 1340 | for (k = 0; k < cd.nip; k++) { | 1344 | for (k = 0; k < cd.nip; k++) { |
| 1341 | ip = cd.iplist[k]; | 1345 | ip = cd.iplist[k]; |
| @@ -1358,11 +1362,11 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
| 1358 | } | 1362 | } |
| 1359 | 1363 | ||
| 1360 | /* | 1364 | /* |
| 1361 | * NAME: txLog() | 1365 | * NAME: txLog() |
| 1362 | * | 1366 | * |
| 1363 | * FUNCTION: Writes AFTER log records for all lines modified | 1367 | * FUNCTION: Writes AFTER log records for all lines modified |
| 1364 | * by tid for segments specified by inodes in comdata. | 1368 | * by tid for segments specified by inodes in comdata. |
| 1365 | * Code assumes only WRITELOCKS are recorded in lockwords. | 1369 | * Code assumes only WRITELOCKS are recorded in lockwords. |
| 1366 | * | 1370 | * |
| 1367 | * PARAMETERS: | 1371 | * PARAMETERS: |
| 1368 | * | 1372 | * |
| @@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) | |||
| 1421 | } | 1425 | } |
| 1422 | 1426 | ||
| 1423 | /* | 1427 | /* |
| 1424 | * diLog() | 1428 | * diLog() |
| 1425 | * | 1429 | * |
| 1426 | * function: log inode tlock and format maplock to update bmap; | 1430 | * function: log inode tlock and format maplock to update bmap; |
| 1427 | */ | 1431 | */ |
| 1428 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1432 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
| 1429 | struct tlock * tlck, struct commit * cd) | 1433 | struct tlock * tlck, struct commit * cd) |
| 1430 | { | 1434 | { |
| 1431 | int rc = 0; | 1435 | int rc = 0; |
| 1432 | struct metapage *mp; | 1436 | struct metapage *mp; |
| @@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1442 | pxd = &lrd->log.redopage.pxd; | 1446 | pxd = &lrd->log.redopage.pxd; |
| 1443 | 1447 | ||
| 1444 | /* | 1448 | /* |
| 1445 | * inode after image | 1449 | * inode after image |
| 1446 | */ | 1450 | */ |
| 1447 | if (tlck->type & tlckENTRY) { | 1451 | if (tlck->type & tlckENTRY) { |
| 1448 | /* log after-image for logredo(): */ | 1452 | /* log after-image for logredo(): */ |
| @@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1456 | tlck->flag |= tlckWRITEPAGE; | 1460 | tlck->flag |= tlckWRITEPAGE; |
| 1457 | } else if (tlck->type & tlckFREE) { | 1461 | } else if (tlck->type & tlckFREE) { |
| 1458 | /* | 1462 | /* |
| 1459 | * free inode extent | 1463 | * free inode extent |
| 1460 | * | 1464 | * |
| 1461 | * (pages of the freed inode extent have been invalidated and | 1465 | * (pages of the freed inode extent have been invalidated and |
| 1462 | * a maplock for free of the extent has been formatted at | 1466 | * a maplock for free of the extent has been formatted at |
| @@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1498 | jfs_err("diLog: UFO type tlck:0x%p", tlck); | 1502 | jfs_err("diLog: UFO type tlck:0x%p", tlck); |
| 1499 | #ifdef _JFS_WIP | 1503 | #ifdef _JFS_WIP |
| 1500 | /* | 1504 | /* |
| 1501 | * alloc/free external EA extent | 1505 | * alloc/free external EA extent |
| 1502 | * | 1506 | * |
| 1503 | * a maplock for txUpdateMap() to update bPWMAP for alloc/free | 1507 | * a maplock for txUpdateMap() to update bPWMAP for alloc/free |
| 1504 | * of the extent has been formatted at txLock() time; | 1508 | * of the extent has been formatted at txLock() time; |
| @@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1534 | } | 1538 | } |
| 1535 | 1539 | ||
| 1536 | /* | 1540 | /* |
| 1537 | * dataLog() | 1541 | * dataLog() |
| 1538 | * | 1542 | * |
| 1539 | * function: log data tlock | 1543 | * function: log data tlock |
| 1540 | */ | 1544 | */ |
| 1541 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1545 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
| 1542 | struct tlock * tlck) | 1546 | struct tlock * tlck) |
| @@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1580 | } | 1584 | } |
| 1581 | 1585 | ||
| 1582 | /* | 1586 | /* |
| 1583 | * dtLog() | 1587 | * dtLog() |
| 1584 | * | 1588 | * |
| 1585 | * function: log dtree tlock and format maplock to update bmap; | 1589 | * function: log dtree tlock and format maplock to update bmap; |
| 1586 | */ | 1590 | */ |
| 1587 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1591 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
| 1588 | struct tlock * tlck) | 1592 | struct tlock * tlck) |
| @@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1603 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | 1607 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); |
| 1604 | 1608 | ||
| 1605 | /* | 1609 | /* |
| 1606 | * page extension via relocation: entry insertion; | 1610 | * page extension via relocation: entry insertion; |
| 1607 | * page extension in-place: entry insertion; | 1611 | * page extension in-place: entry insertion; |
| 1608 | * new right page from page split, reinitialized in-line | 1612 | * new right page from page split, reinitialized in-line |
| 1609 | * root from root page split: entry insertion; | 1613 | * root from root page split: entry insertion; |
| 1610 | */ | 1614 | */ |
| 1611 | if (tlck->type & (tlckNEW | tlckEXTEND)) { | 1615 | if (tlck->type & (tlckNEW | tlckEXTEND)) { |
| 1612 | /* log after-image of the new page for logredo(): | 1616 | /* log after-image of the new page for logredo(): |
| @@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1641 | } | 1645 | } |
| 1642 | 1646 | ||
| 1643 | /* | 1647 | /* |
| 1644 | * entry insertion/deletion, | 1648 | * entry insertion/deletion, |
| 1645 | * sibling page link update (old right page before split); | 1649 | * sibling page link update (old right page before split); |
| 1646 | */ | 1650 | */ |
| 1647 | if (tlck->type & (tlckENTRY | tlckRELINK)) { | 1651 | if (tlck->type & (tlckENTRY | tlckRELINK)) { |
| 1648 | /* log after-image for logredo(): */ | 1652 | /* log after-image for logredo(): */ |
| @@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1658 | } | 1662 | } |
| 1659 | 1663 | ||
| 1660 | /* | 1664 | /* |
| 1661 | * page deletion: page has been invalidated | 1665 | * page deletion: page has been invalidated |
| 1662 | * page relocation: source extent | 1666 | * page relocation: source extent |
| 1663 | * | 1667 | * |
| 1664 | * a maplock for free of the page has been formatted | 1668 | * a maplock for free of the page has been formatted |
| 1665 | * at txLock() time); | 1669 | * at txLock() time); |
| 1666 | */ | 1670 | */ |
| 1667 | if (tlck->type & (tlckFREE | tlckRELOCATE)) { | 1671 | if (tlck->type & (tlckFREE | tlckRELOCATE)) { |
| 1668 | /* log LOG_NOREDOPAGE of the deleted page for logredo() | 1672 | /* log LOG_NOREDOPAGE of the deleted page for logredo() |
| @@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1683 | } | 1687 | } |
| 1684 | 1688 | ||
| 1685 | /* | 1689 | /* |
| 1686 | * xtLog() | 1690 | * xtLog() |
| 1687 | * | 1691 | * |
| 1688 | * function: log xtree tlock and format maplock to update bmap; | 1692 | * function: log xtree tlock and format maplock to update bmap; |
| 1689 | */ | 1693 | */ |
| 1690 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 1694 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
| 1691 | struct tlock * tlck) | 1695 | struct tlock * tlck) |
| @@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1725 | xadlock = (struct xdlistlock *) maplock; | 1729 | xadlock = (struct xdlistlock *) maplock; |
| 1726 | 1730 | ||
| 1727 | /* | 1731 | /* |
| 1728 | * entry insertion/extension; | 1732 | * entry insertion/extension; |
| 1729 | * sibling page link update (old right page before split); | 1733 | * sibling page link update (old right page before split); |
| 1730 | */ | 1734 | */ |
| 1731 | if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { | 1735 | if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { |
| 1732 | /* log after-image for logredo(): | 1736 | /* log after-image for logredo(): |
| @@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1801 | } | 1805 | } |
| 1802 | 1806 | ||
| 1803 | /* | 1807 | /* |
| 1804 | * page deletion: file deletion/truncation (ref. xtTruncate()) | 1808 | * page deletion: file deletion/truncation (ref. xtTruncate()) |
| 1805 | * | 1809 | * |
| 1806 | * (page will be invalidated after log is written and bmap | 1810 | * (page will be invalidated after log is written and bmap |
| 1807 | * is updated from the page); | 1811 | * is updated from the page); |
| @@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1908 | } | 1912 | } |
| 1909 | 1913 | ||
| 1910 | /* | 1914 | /* |
| 1911 | * page/entry truncation: file truncation (ref. xtTruncate()) | 1915 | * page/entry truncation: file truncation (ref. xtTruncate()) |
| 1912 | * | 1916 | * |
| 1913 | * |----------+------+------+---------------| | 1917 | * |----------+------+------+---------------| |
| 1914 | * | | | | 1918 | * | | | |
| 1915 | * | | hwm - hwm before truncation | 1919 | * | | hwm - hwm before truncation |
| 1916 | * | next - truncation point | 1920 | * | next - truncation point |
| 1917 | * lwm - lwm before truncation | 1921 | * lwm - lwm before truncation |
| 1918 | * header ? | 1922 | * header ? |
| 1919 | */ | 1923 | */ |
| 1920 | if (tlck->type & tlckTRUNCATE) { | 1924 | if (tlck->type & tlckTRUNCATE) { |
| @@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1937 | twm = xtlck->twm.offset; | 1941 | twm = xtlck->twm.offset; |
| 1938 | 1942 | ||
| 1939 | /* | 1943 | /* |
| 1940 | * write log records | 1944 | * write log records |
| 1941 | */ | 1945 | */ |
| 1942 | /* log after-image for logredo(): | 1946 | /* log after-image for logredo(): |
| 1943 | * | 1947 | * |
| @@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 1997 | } | 2001 | } |
| 1998 | 2002 | ||
| 1999 | /* | 2003 | /* |
| 2000 | * format maplock(s) for txUpdateMap() to update bmap | 2004 | * format maplock(s) for txUpdateMap() to update bmap |
| 2001 | */ | 2005 | */ |
| 2002 | maplock->index = 0; | 2006 | maplock->index = 0; |
| 2003 | 2007 | ||
| @@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 2069 | } | 2073 | } |
| 2070 | 2074 | ||
| 2071 | /* | 2075 | /* |
| 2072 | * mapLog() | 2076 | * mapLog() |
| 2073 | * | 2077 | * |
| 2074 | * function: log from maplock of freed data extents; | 2078 | * function: log from maplock of freed data extents; |
| 2075 | */ | 2079 | */ |
| 2076 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | 2080 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
| 2077 | struct tlock * tlck) | 2081 | struct tlock * tlck) |
| @@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 2081 | pxd_t *pxd; | 2085 | pxd_t *pxd; |
| 2082 | 2086 | ||
| 2083 | /* | 2087 | /* |
| 2084 | * page relocation: free the source page extent | 2088 | * page relocation: free the source page extent |
| 2085 | * | 2089 | * |
| 2086 | * a maplock for txUpdateMap() for free of the page | 2090 | * a maplock for txUpdateMap() for free of the page |
| 2087 | * has been formatted at txLock() time saving the src | 2091 | * has been formatted at txLock() time saving the src |
| @@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |||
| 2155 | } | 2159 | } |
| 2156 | 2160 | ||
| 2157 | /* | 2161 | /* |
| 2158 | * txEA() | 2162 | * txEA() |
| 2159 | * | 2163 | * |
| 2160 | * function: acquire maplock for EA/ACL extents or | 2164 | * function: acquire maplock for EA/ACL extents or |
| 2161 | * set COMMIT_INLINE flag; | 2165 | * set COMMIT_INLINE flag; |
| 2162 | */ | 2166 | */ |
| 2163 | void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) | 2167 | void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) |
| 2164 | { | 2168 | { |
| @@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) | |||
| 2207 | } | 2211 | } |
| 2208 | 2212 | ||
| 2209 | /* | 2213 | /* |
| 2210 | * txForce() | 2214 | * txForce() |
| 2211 | * | 2215 | * |
| 2212 | * function: synchronously write pages locked by transaction | 2216 | * function: synchronously write pages locked by transaction |
| 2213 | * after txLog() but before txUpdateMap(); | 2217 | * after txLog() but before txUpdateMap(); |
| 2214 | */ | 2218 | */ |
| 2215 | static void txForce(struct tblock * tblk) | 2219 | static void txForce(struct tblock * tblk) |
| 2216 | { | 2220 | { |
| @@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk) | |||
| 2273 | } | 2277 | } |
| 2274 | 2278 | ||
| 2275 | /* | 2279 | /* |
| 2276 | * txUpdateMap() | 2280 | * txUpdateMap() |
| 2277 | * | 2281 | * |
| 2278 | * function: update persistent allocation map (and working map | 2282 | * function: update persistent allocation map (and working map |
| 2279 | * if appropriate); | 2283 | * if appropriate); |
| 2280 | * | 2284 | * |
| 2281 | * parameter: | 2285 | * parameter: |
| 2282 | */ | 2286 | */ |
| @@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk) | |||
| 2298 | 2302 | ||
| 2299 | 2303 | ||
| 2300 | /* | 2304 | /* |
| 2301 | * update block allocation map | 2305 | * update block allocation map |
| 2302 | * | 2306 | * |
| 2303 | * update allocation state in pmap (and wmap) and | 2307 | * update allocation state in pmap (and wmap) and |
| 2304 | * update lsn of the pmap page; | 2308 | * update lsn of the pmap page; |
| @@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk) | |||
| 2382 | } | 2386 | } |
| 2383 | } | 2387 | } |
| 2384 | /* | 2388 | /* |
| 2385 | * update inode allocation map | 2389 | * update inode allocation map |
| 2386 | * | 2390 | * |
| 2387 | * update allocation state in pmap and | 2391 | * update allocation state in pmap and |
| 2388 | * update lsn of the pmap page; | 2392 | * update lsn of the pmap page; |
| @@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk) | |||
| 2407 | } | 2411 | } |
| 2408 | 2412 | ||
| 2409 | /* | 2413 | /* |
| 2410 | * txAllocPMap() | 2414 | * txAllocPMap() |
| 2411 | * | 2415 | * |
| 2412 | * function: allocate from persistent map; | 2416 | * function: allocate from persistent map; |
| 2413 | * | 2417 | * |
| 2414 | * parameter: | 2418 | * parameter: |
| 2415 | * ipbmap - | 2419 | * ipbmap - |
| 2416 | * malock - | 2420 | * malock - |
| 2417 | * xad list: | 2421 | * xad list: |
| 2418 | * pxd: | 2422 | * pxd: |
| 2419 | * | 2423 | * |
| 2420 | * maptype - | 2424 | * maptype - |
| 2421 | * allocate from persistent map; | 2425 | * allocate from persistent map; |
| 2422 | * free from persistent map; | 2426 | * free from persistent map; |
| 2423 | * (e.g., tmp file - free from working map at releae | 2427 | * (e.g., tmp file - free from working map at releae |
| 2424 | * of last reference); | 2428 | * of last reference); |
| 2425 | * free from persistent and working map; | 2429 | * free from persistent and working map; |
| 2426 | * | 2430 | * |
| 2427 | * lsn - log sequence number; | 2431 | * lsn - log sequence number; |
| 2428 | */ | 2432 | */ |
| 2429 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | 2433 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, |
| 2430 | struct tblock * tblk) | 2434 | struct tblock * tblk) |
| @@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, | |||
| 2478 | } | 2482 | } |
| 2479 | 2483 | ||
| 2480 | /* | 2484 | /* |
| 2481 | * txFreeMap() | 2485 | * txFreeMap() |
| 2482 | * | 2486 | * |
| 2483 | * function: free from persistent and/or working map; | 2487 | * function: free from persistent and/or working map; |
| 2484 | * | 2488 | * |
| 2485 | * todo: optimization | 2489 | * todo: optimization |
| 2486 | */ | 2490 | */ |
| @@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip, | |||
| 2579 | } | 2583 | } |
| 2580 | 2584 | ||
| 2581 | /* | 2585 | /* |
| 2582 | * txFreelock() | 2586 | * txFreelock() |
| 2583 | * | 2587 | * |
| 2584 | * function: remove tlock from inode anonymous locklist | 2588 | * function: remove tlock from inode anonymous locklist |
| 2585 | */ | 2589 | */ |
| 2586 | void txFreelock(struct inode *ip) | 2590 | void txFreelock(struct inode *ip) |
| 2587 | { | 2591 | { |
| @@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip) | |||
| 2619 | } | 2623 | } |
| 2620 | 2624 | ||
| 2621 | /* | 2625 | /* |
| 2622 | * txAbort() | 2626 | * txAbort() |
| 2623 | * | 2627 | * |
| 2624 | * function: abort tx before commit; | 2628 | * function: abort tx before commit; |
| 2625 | * | 2629 | * |
| @@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty) | |||
| 2679 | } | 2683 | } |
| 2680 | 2684 | ||
| 2681 | /* | 2685 | /* |
| 2682 | * txLazyCommit(void) | 2686 | * txLazyCommit(void) |
| 2683 | * | 2687 | * |
| 2684 | * All transactions except those changing ipimap (COMMIT_FORCE) are | 2688 | * All transactions except those changing ipimap (COMMIT_FORCE) are |
| 2685 | * processed by this routine. This insures that the inode and block | 2689 | * processed by this routine. This insures that the inode and block |
| @@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk) | |||
| 2728 | } | 2732 | } |
| 2729 | 2733 | ||
| 2730 | /* | 2734 | /* |
| 2731 | * jfs_lazycommit(void) | 2735 | * jfs_lazycommit(void) |
| 2732 | * | 2736 | * |
| 2733 | * To be run as a kernel daemon. If lbmIODone is called in an interrupt | 2737 | * To be run as a kernel daemon. If lbmIODone is called in an interrupt |
| 2734 | * context, or where blocking is not wanted, this routine will process | 2738 | * context, or where blocking is not wanted, this routine will process |
| @@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb) | |||
| 2913 | } | 2917 | } |
| 2914 | 2918 | ||
| 2915 | /* | 2919 | /* |
| 2916 | * jfs_sync(void) | 2920 | * jfs_sync(void) |
| 2917 | * | 2921 | * |
| 2918 | * To be run as a kernel daemon. This is awakened when tlocks run low. | 2922 | * To be run as a kernel daemon. This is awakened when tlocks run low. |
| 2919 | * We write any inodes that have anonymous tlocks so they will become | 2923 | * We write any inodes that have anonymous tlocks so they will become |
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index 7863cf21afca..ab7288937019 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h | |||
| @@ -94,7 +94,7 @@ extern struct tblock *TxBlock; /* transaction block table */ | |||
| 94 | */ | 94 | */ |
| 95 | struct tlock { | 95 | struct tlock { |
| 96 | lid_t next; /* 2: index next lockword on tid locklist | 96 | lid_t next; /* 2: index next lockword on tid locklist |
| 97 | * next lockword on freelist | 97 | * next lockword on freelist |
| 98 | */ | 98 | */ |
| 99 | tid_t tid; /* 2: transaction id holding lock */ | 99 | tid_t tid; /* 2: transaction id holding lock */ |
| 100 | 100 | ||
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h index 09b252958687..649f9817accd 100644 --- a/fs/jfs/jfs_types.h +++ b/fs/jfs/jfs_types.h | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | /* | 21 | /* |
| 22 | * jfs_types.h: | 22 | * jfs_types.h: |
| 23 | * | 23 | * |
| 24 | * basic type/utility definitions | 24 | * basic type/utility definitions |
| 25 | * | 25 | * |
| 26 | * note: this header file must be the 1st include file | 26 | * note: this header file must be the 1st include file |
| 27 | * of JFS include list in all JFS .c file. | 27 | * of JFS include list in all JFS .c file. |
| @@ -54,8 +54,8 @@ struct timestruc_t { | |||
| 54 | */ | 54 | */ |
| 55 | 55 | ||
| 56 | #define LEFTMOSTONE 0x80000000 | 56 | #define LEFTMOSTONE 0x80000000 |
| 57 | #define HIGHORDER 0x80000000u /* high order bit on */ | 57 | #define HIGHORDER 0x80000000u /* high order bit on */ |
| 58 | #define ONES 0xffffffffu /* all bit on */ | 58 | #define ONES 0xffffffffu /* all bit on */ |
| 59 | 59 | ||
| 60 | /* | 60 | /* |
| 61 | * logical xd (lxd) | 61 | * logical xd (lxd) |
| @@ -148,7 +148,7 @@ typedef struct { | |||
| 148 | #define sizeDXD(dxd) le32_to_cpu((dxd)->size) | 148 | #define sizeDXD(dxd) le32_to_cpu((dxd)->size) |
| 149 | 149 | ||
| 150 | /* | 150 | /* |
| 151 | * directory entry argument | 151 | * directory entry argument |
| 152 | */ | 152 | */ |
| 153 | struct component_name { | 153 | struct component_name { |
| 154 | int namlen; | 154 | int namlen; |
| @@ -160,14 +160,14 @@ struct component_name { | |||
| 160 | * DASD limit information - stored in directory inode | 160 | * DASD limit information - stored in directory inode |
| 161 | */ | 161 | */ |
| 162 | struct dasd { | 162 | struct dasd { |
| 163 | u8 thresh; /* Alert Threshold (in percent) */ | 163 | u8 thresh; /* Alert Threshold (in percent) */ |
| 164 | u8 delta; /* Alert Threshold delta (in percent) */ | 164 | u8 delta; /* Alert Threshold delta (in percent) */ |
| 165 | u8 rsrvd1; | 165 | u8 rsrvd1; |
| 166 | u8 limit_hi; /* DASD limit (in logical blocks) */ | 166 | u8 limit_hi; /* DASD limit (in logical blocks) */ |
| 167 | __le32 limit_lo; /* DASD limit (in logical blocks) */ | 167 | __le32 limit_lo; /* DASD limit (in logical blocks) */ |
| 168 | u8 rsrvd2[3]; | 168 | u8 rsrvd2[3]; |
| 169 | u8 used_hi; /* DASD usage (in logical blocks) */ | 169 | u8 used_hi; /* DASD usage (in logical blocks) */ |
| 170 | __le32 used_lo; /* DASD usage (in logical blocks) */ | 170 | __le32 used_lo; /* DASD usage (in logical blocks) */ |
| 171 | }; | 171 | }; |
| 172 | 172 | ||
| 173 | #define DASDLIMIT(dasdp) \ | 173 | #define DASDLIMIT(dasdp) \ |
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index a386f48c73fc..7971f37534a3 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
| @@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb) | |||
| 60 | jfs_info("UnMount JFS: sb:0x%p", sb); | 60 | jfs_info("UnMount JFS: sb:0x%p", sb); |
| 61 | 61 | ||
| 62 | /* | 62 | /* |
| 63 | * update superblock and close log | 63 | * update superblock and close log |
| 64 | * | 64 | * |
| 65 | * if mounted read-write and log based recovery was enabled | 65 | * if mounted read-write and log based recovery was enabled |
| 66 | */ | 66 | */ |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index acc97c46d8a4..1543906a2e0d 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 17 | */ | 17 | */ |
| 18 | /* | 18 | /* |
| 19 | * jfs_xtree.c: extent allocation descriptor B+-tree manager | 19 | * jfs_xtree.c: extent allocation descriptor B+-tree manager |
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| @@ -32,30 +32,30 @@ | |||
| 32 | /* | 32 | /* |
| 33 | * xtree local flag | 33 | * xtree local flag |
| 34 | */ | 34 | */ |
| 35 | #define XT_INSERT 0x00000001 | 35 | #define XT_INSERT 0x00000001 |
| 36 | 36 | ||
| 37 | /* | 37 | /* |
| 38 | * xtree key/entry comparison: extent offset | 38 | * xtree key/entry comparison: extent offset |
| 39 | * | 39 | * |
| 40 | * return: | 40 | * return: |
| 41 | * -1: k < start of extent | 41 | * -1: k < start of extent |
| 42 | * 0: start_of_extent <= k <= end_of_extent | 42 | * 0: start_of_extent <= k <= end_of_extent |
| 43 | * 1: k > end_of_extent | 43 | * 1: k > end_of_extent |
| 44 | */ | 44 | */ |
| 45 | #define XT_CMP(CMP, K, X, OFFSET64)\ | 45 | #define XT_CMP(CMP, K, X, OFFSET64)\ |
| 46 | {\ | 46 | {\ |
| 47 | OFFSET64 = offsetXAD(X);\ | 47 | OFFSET64 = offsetXAD(X);\ |
| 48 | (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ | 48 | (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ |
| 49 | ((K) < OFFSET64) ? -1 : 0;\ | 49 | ((K) < OFFSET64) ? -1 : 0;\ |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | /* write a xad entry */ | 52 | /* write a xad entry */ |
| 53 | #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ | 53 | #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ |
| 54 | {\ | 54 | {\ |
| 55 | (XAD)->flag = (FLAG);\ | 55 | (XAD)->flag = (FLAG);\ |
| 56 | XADoffset((XAD), (OFF));\ | 56 | XADoffset((XAD), (OFF));\ |
| 57 | XADlength((XAD), (LEN));\ | 57 | XADlength((XAD), (LEN));\ |
| 58 | XADaddress((XAD), (ADDR));\ | 58 | XADaddress((XAD), (ADDR));\ |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) | 61 | #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) |
| @@ -76,13 +76,13 @@ | |||
| 76 | MP = NULL;\ | 76 | MP = NULL;\ |
| 77 | RC = -EIO;\ | 77 | RC = -EIO;\ |
| 78 | }\ | 78 | }\ |
| 79 | }\ | 79 | }\ |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | /* for consistency */ | 82 | /* for consistency */ |
| 83 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) | 83 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) |
| 84 | 84 | ||
| 85 | #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ | 85 | #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ |
| 86 | BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) | 86 | BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) |
| 87 | /* xtree entry parameter descriptor */ | 87 | /* xtree entry parameter descriptor */ |
| 88 | struct xtsplit { | 88 | struct xtsplit { |
| @@ -97,7 +97,7 @@ struct xtsplit { | |||
| 97 | 97 | ||
| 98 | 98 | ||
| 99 | /* | 99 | /* |
| 100 | * statistics | 100 | * statistics |
| 101 | */ | 101 | */ |
| 102 | #ifdef CONFIG_JFS_STATISTICS | 102 | #ifdef CONFIG_JFS_STATISTICS |
| 103 | static struct { | 103 | static struct { |
| @@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); | |||
| 136 | #endif /* _STILL_TO_PORT */ | 136 | #endif /* _STILL_TO_PORT */ |
| 137 | 137 | ||
| 138 | /* | 138 | /* |
| 139 | * xtLookup() | 139 | * xtLookup() |
| 140 | * | 140 | * |
| 141 | * function: map a single page into a physical extent; | 141 | * function: map a single page into a physical extent; |
| 142 | */ | 142 | */ |
| @@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart, | |||
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | /* | 181 | /* |
| 182 | * compute the physical extent covering logical extent | 182 | * compute the physical extent covering logical extent |
| 183 | * | 183 | * |
| 184 | * N.B. search may have failed (e.g., hole in sparse file), | 184 | * N.B. search may have failed (e.g., hole in sparse file), |
| 185 | * and returned the index of the next entry. | 185 | * and returned the index of the next entry. |
| @@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart, | |||
| 220 | 220 | ||
| 221 | 221 | ||
| 222 | /* | 222 | /* |
| 223 | * xtLookupList() | 223 | * xtLookupList() |
| 224 | * | 224 | * |
| 225 | * function: map a single logical extent into a list of physical extent; | 225 | * function: map a single logical extent into a list of physical extent; |
| 226 | * | 226 | * |
| 227 | * parameter: | 227 | * parameter: |
| 228 | * struct inode *ip, | 228 | * struct inode *ip, |
| 229 | * struct lxdlist *lxdlist, lxd list (in) | 229 | * struct lxdlist *lxdlist, lxd list (in) |
| 230 | * struct xadlist *xadlist, xad list (in/out) | 230 | * struct xadlist *xadlist, xad list (in/out) |
| 231 | * int flag) | 231 | * int flag) |
| 232 | * | 232 | * |
| 233 | * coverage of lxd by xad under assumption of | 233 | * coverage of lxd by xad under assumption of |
| 234 | * . lxd's are ordered and disjoint. | 234 | * . lxd's are ordered and disjoint. |
| 235 | * . xad's are ordered and disjoint. | 235 | * . xad's are ordered and disjoint. |
| 236 | * | 236 | * |
| 237 | * return: | 237 | * return: |
| 238 | * 0: success | 238 | * 0: success |
| 239 | * | 239 | * |
| 240 | * note: a page being written (even a single byte) is backed fully, | 240 | * note: a page being written (even a single byte) is backed fully, |
| 241 | * except the last page which is only backed with blocks | 241 | * except the last page which is only backed with blocks |
| 242 | * required to cover the last byte; | 242 | * required to cover the last byte; |
| 243 | * the extent backing a page is fully contained within an xad; | 243 | * the extent backing a page is fully contained within an xad; |
| 244 | */ | 244 | */ |
| 245 | int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, | 245 | int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, |
| 246 | struct xadlist * xadlist, int flag) | 246 | struct xadlist * xadlist, int flag) |
| @@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, | |||
| 284 | return rc; | 284 | return rc; |
| 285 | 285 | ||
| 286 | /* | 286 | /* |
| 287 | * compute the physical extent covering logical extent | 287 | * compute the physical extent covering logical extent |
| 288 | * | 288 | * |
| 289 | * N.B. search may have failed (e.g., hole in sparse file), | 289 | * N.B. search may have failed (e.g., hole in sparse file), |
| 290 | * and returned the index of the next entry. | 290 | * and returned the index of the next entry. |
| @@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, | |||
| 343 | if (lstart >= size) | 343 | if (lstart >= size) |
| 344 | goto mapend; | 344 | goto mapend; |
| 345 | 345 | ||
| 346 | /* compare with the current xad */ | 346 | /* compare with the current xad */ |
| 347 | goto compare1; | 347 | goto compare1; |
| 348 | } | 348 | } |
| 349 | /* lxd is covered by xad */ | 349 | /* lxd is covered by xad */ |
| @@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, | |||
| 430 | /* | 430 | /* |
| 431 | * lxd is partially covered by xad | 431 | * lxd is partially covered by xad |
| 432 | */ | 432 | */ |
| 433 | else { /* (xend < lend) */ | 433 | else { /* (xend < lend) */ |
| 434 | 434 | ||
| 435 | /* | 435 | /* |
| 436 | * get next xad | 436 | * get next xad |
| @@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, | |||
| 477 | 477 | ||
| 478 | 478 | ||
| 479 | /* | 479 | /* |
| 480 | * xtSearch() | 480 | * xtSearch() |
| 481 | * | 481 | * |
| 482 | * function: search for the xad entry covering specified offset. | 482 | * function: search for the xad entry covering specified offset. |
| 483 | * | 483 | * |
| 484 | * parameters: | 484 | * parameters: |
| 485 | * ip - file object; | 485 | * ip - file object; |
| 486 | * xoff - extent offset; | 486 | * xoff - extent offset; |
| 487 | * nextp - address of next extent (if any) for search miss | 487 | * nextp - address of next extent (if any) for search miss |
| 488 | * cmpp - comparison result: | 488 | * cmpp - comparison result: |
| 489 | * btstack - traverse stack; | 489 | * btstack - traverse stack; |
| 490 | * flag - search process flag (XT_INSERT); | 490 | * flag - search process flag (XT_INSERT); |
| 491 | * | 491 | * |
| 492 | * returns: | 492 | * returns: |
| 493 | * btstack contains (bn, index) of search path traversed to the entry. | 493 | * btstack contains (bn, index) of search path traversed to the entry. |
| 494 | * *cmpp is set to result of comparison with the entry returned. | 494 | * *cmpp is set to result of comparison with the entry returned. |
| 495 | * the page containing the entry is pinned at exit. | 495 | * the page containing the entry is pinned at exit. |
| 496 | */ | 496 | */ |
| 497 | static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | 497 | static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, |
| 498 | int *cmpp, struct btstack * btstack, int flag) | 498 | int *cmpp, struct btstack * btstack, int flag) |
| @@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
| 517 | btstack->nsplit = 0; | 517 | btstack->nsplit = 0; |
| 518 | 518 | ||
| 519 | /* | 519 | /* |
| 520 | * search down tree from root: | 520 | * search down tree from root: |
| 521 | * | 521 | * |
| 522 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of | 522 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of |
| 523 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. | 523 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. |
| @@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
| 642 | XT_CMP(cmp, xoff, &p->xad[index], t64); | 642 | XT_CMP(cmp, xoff, &p->xad[index], t64); |
| 643 | if (cmp == 0) { | 643 | if (cmp == 0) { |
| 644 | /* | 644 | /* |
| 645 | * search hit | 645 | * search hit |
| 646 | */ | 646 | */ |
| 647 | /* search hit - leaf page: | 647 | /* search hit - leaf page: |
| 648 | * return the entry found | 648 | * return the entry found |
| @@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
| 692 | } | 692 | } |
| 693 | 693 | ||
| 694 | /* | 694 | /* |
| 695 | * search miss | 695 | * search miss |
| 696 | * | 696 | * |
| 697 | * base is the smallest index with key (Kj) greater than | 697 | * base is the smallest index with key (Kj) greater than |
| 698 | * search key (K) and may be zero or maxentry index. | 698 | * search key (K) and may be zero or maxentry index. |
| @@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
| 773 | } | 773 | } |
| 774 | 774 | ||
| 775 | /* | 775 | /* |
| 776 | * xtInsert() | 776 | * xtInsert() |
| 777 | * | 777 | * |
| 778 | * function: | 778 | * function: |
| 779 | * | 779 | * |
| 780 | * parameter: | 780 | * parameter: |
| 781 | * tid - transaction id; | 781 | * tid - transaction id; |
| 782 | * ip - file object; | 782 | * ip - file object; |
| 783 | * xflag - extent flag (XAD_NOTRECORDED): | 783 | * xflag - extent flag (XAD_NOTRECORDED): |
| 784 | * xoff - extent offset; | 784 | * xoff - extent offset; |
| 785 | * xlen - extent length; | 785 | * xlen - extent length; |
| 786 | * xaddrp - extent address pointer (in/out): | 786 | * xaddrp - extent address pointer (in/out): |
| 787 | * if (*xaddrp) | 787 | * if (*xaddrp) |
| 788 | * caller allocated data extent at *xaddrp; | 788 | * caller allocated data extent at *xaddrp; |
| 789 | * else | 789 | * else |
| 790 | * allocate data extent and return its xaddr; | 790 | * allocate data extent and return its xaddr; |
| 791 | * flag - | 791 | * flag - |
| 792 | * | 792 | * |
| 793 | * return: | 793 | * return: |
| 794 | */ | 794 | */ |
| @@ -813,7 +813,7 @@ int xtInsert(tid_t tid, /* transaction id */ | |||
| 813 | jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); | 813 | jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); |
| 814 | 814 | ||
| 815 | /* | 815 | /* |
| 816 | * search for the entry location at which to insert: | 816 | * search for the entry location at which to insert: |
| 817 | * | 817 | * |
| 818 | * xtFastSearch() and xtSearch() both returns (leaf page | 818 | * xtFastSearch() and xtSearch() both returns (leaf page |
| 819 | * pinned, index at which to insert). | 819 | * pinned, index at which to insert). |
| @@ -853,13 +853,13 @@ int xtInsert(tid_t tid, /* transaction id */ | |||
| 853 | } | 853 | } |
| 854 | 854 | ||
| 855 | /* | 855 | /* |
| 856 | * insert entry for new extent | 856 | * insert entry for new extent |
| 857 | */ | 857 | */ |
| 858 | xflag |= XAD_NEW; | 858 | xflag |= XAD_NEW; |
| 859 | 859 | ||
| 860 | /* | 860 | /* |
| 861 | * if the leaf page is full, split the page and | 861 | * if the leaf page is full, split the page and |
| 862 | * propagate up the router entry for the new page from split | 862 | * propagate up the router entry for the new page from split |
| 863 | * | 863 | * |
| 864 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 864 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
| 865 | */ | 865 | */ |
| @@ -886,7 +886,7 @@ int xtInsert(tid_t tid, /* transaction id */ | |||
| 886 | } | 886 | } |
| 887 | 887 | ||
| 888 | /* | 888 | /* |
| 889 | * insert the new entry into the leaf page | 889 | * insert the new entry into the leaf page |
| 890 | */ | 890 | */ |
| 891 | /* | 891 | /* |
| 892 | * acquire a transaction lock on the leaf page; | 892 | * acquire a transaction lock on the leaf page; |
| @@ -930,16 +930,16 @@ int xtInsert(tid_t tid, /* transaction id */ | |||
| 930 | 930 | ||
| 931 | 931 | ||
| 932 | /* | 932 | /* |
| 933 | * xtSplitUp() | 933 | * xtSplitUp() |
| 934 | * | 934 | * |
| 935 | * function: | 935 | * function: |
| 936 | * split full pages as propagating insertion up the tree | 936 | * split full pages as propagating insertion up the tree |
| 937 | * | 937 | * |
| 938 | * parameter: | 938 | * parameter: |
| 939 | * tid - transaction id; | 939 | * tid - transaction id; |
| 940 | * ip - file object; | 940 | * ip - file object; |
| 941 | * split - entry parameter descriptor; | 941 | * split - entry parameter descriptor; |
| 942 | * btstack - traverse stack from xtSearch() | 942 | * btstack - traverse stack from xtSearch() |
| 943 | * | 943 | * |
| 944 | * return: | 944 | * return: |
| 945 | */ | 945 | */ |
| @@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid, | |||
| 1199 | 1199 | ||
| 1200 | 1200 | ||
| 1201 | /* | 1201 | /* |
| 1202 | * xtSplitPage() | 1202 | * xtSplitPage() |
| 1203 | * | 1203 | * |
| 1204 | * function: | 1204 | * function: |
| 1205 | * split a full non-root page into | 1205 | * split a full non-root page into |
| 1206 | * original/split/left page and new right page | 1206 | * original/split/left page and new right page |
| 1207 | * i.e., the original/split page remains as left page. | 1207 | * i.e., the original/split page remains as left page. |
| 1208 | * | 1208 | * |
| 1209 | * parameter: | 1209 | * parameter: |
| 1210 | * int tid, | 1210 | * int tid, |
| 1211 | * struct inode *ip, | 1211 | * struct inode *ip, |
| 1212 | * struct xtsplit *split, | 1212 | * struct xtsplit *split, |
| 1213 | * struct metapage **rmpp, | 1213 | * struct metapage **rmpp, |
| 1214 | * u64 *rbnp, | 1214 | * u64 *rbnp, |
| 1215 | * | 1215 | * |
| 1216 | * return: | 1216 | * return: |
| 1217 | * Pointer to page in which to insert or NULL on error. | 1217 | * Pointer to page in which to insert or NULL on error. |
| 1218 | */ | 1218 | */ |
| 1219 | static int | 1219 | static int |
| 1220 | xtSplitPage(tid_t tid, struct inode *ip, | 1220 | xtSplitPage(tid_t tid, struct inode *ip, |
| @@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip, | |||
| 1248 | rbn = addressPXD(pxd); | 1248 | rbn = addressPXD(pxd); |
| 1249 | 1249 | ||
| 1250 | /* Allocate blocks to quota. */ | 1250 | /* Allocate blocks to quota. */ |
| 1251 | if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { | 1251 | if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { |
| 1252 | rc = -EDQUOT; | 1252 | rc = -EDQUOT; |
| 1253 | goto clean_up; | 1253 | goto clean_up; |
| 1254 | } | 1254 | } |
| 1255 | 1255 | ||
| 1256 | quota_allocation += lengthPXD(pxd); | 1256 | quota_allocation += lengthPXD(pxd); |
| @@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip, | |||
| 1304 | skip = split->index; | 1304 | skip = split->index; |
| 1305 | 1305 | ||
| 1306 | /* | 1306 | /* |
| 1307 | * sequential append at tail (after last entry of last page) | 1307 | * sequential append at tail (after last entry of last page) |
| 1308 | * | 1308 | * |
| 1309 | * if splitting the last page on a level because of appending | 1309 | * if splitting the last page on a level because of appending |
| 1310 | * a entry to it (skip is maxentry), it's likely that the access is | 1310 | * a entry to it (skip is maxentry), it's likely that the access is |
| @@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip, | |||
| 1342 | } | 1342 | } |
| 1343 | 1343 | ||
| 1344 | /* | 1344 | /* |
| 1345 | * non-sequential insert (at possibly middle page) | 1345 | * non-sequential insert (at possibly middle page) |
| 1346 | */ | 1346 | */ |
| 1347 | 1347 | ||
| 1348 | /* | 1348 | /* |
| @@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip, | |||
| 1465 | 1465 | ||
| 1466 | 1466 | ||
| 1467 | /* | 1467 | /* |
| 1468 | * xtSplitRoot() | 1468 | * xtSplitRoot() |
| 1469 | * | 1469 | * |
| 1470 | * function: | 1470 | * function: |
| 1471 | * split the full root page into | 1471 | * split the full root page into original/root/split page and new |
| 1472 | * original/root/split page and new right page | 1472 | * right page |
| 1473 | * i.e., root remains fixed in tree anchor (inode) and | 1473 | * i.e., root remains fixed in tree anchor (inode) and the root is |
| 1474 | * the root is copied to a single new right child page | 1474 | * copied to a single new right child page since root page << |
| 1475 | * since root page << non-root page, and | 1475 | * non-root page, and the split root page contains a single entry |
| 1476 | * the split root page contains a single entry for the | 1476 | * for the new right child page. |
| 1477 | * new right child page. | ||
| 1478 | * | 1477 | * |
| 1479 | * parameter: | 1478 | * parameter: |
| 1480 | * int tid, | 1479 | * int tid, |
| 1481 | * struct inode *ip, | 1480 | * struct inode *ip, |
| 1482 | * struct xtsplit *split, | 1481 | * struct xtsplit *split, |
| 1483 | * struct metapage **rmpp) | 1482 | * struct metapage **rmpp) |
| 1484 | * | 1483 | * |
| 1485 | * return: | 1484 | * return: |
| 1486 | * Pointer to page in which to insert or NULL on error. | 1485 | * Pointer to page in which to insert or NULL on error. |
| 1487 | */ | 1486 | */ |
| 1488 | static int | 1487 | static int |
| 1489 | xtSplitRoot(tid_t tid, | 1488 | xtSplitRoot(tid_t tid, |
| @@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid, | |||
| 1505 | INCREMENT(xtStat.split); | 1504 | INCREMENT(xtStat.split); |
| 1506 | 1505 | ||
| 1507 | /* | 1506 | /* |
| 1508 | * allocate a single (right) child page | 1507 | * allocate a single (right) child page |
| 1509 | */ | 1508 | */ |
| 1510 | pxdlist = split->pxdlist; | 1509 | pxdlist = split->pxdlist; |
| 1511 | pxd = &pxdlist->pxd[pxdlist->npxd]; | 1510 | pxd = &pxdlist->pxd[pxdlist->npxd]; |
| @@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid, | |||
| 1573 | } | 1572 | } |
| 1574 | 1573 | ||
| 1575 | /* | 1574 | /* |
| 1576 | * reset the root | 1575 | * reset the root |
| 1577 | * | 1576 | * |
| 1578 | * init root with the single entry for the new right page | 1577 | * init root with the single entry for the new right page |
| 1579 | * set the 1st entry offset to 0, which force the left-most key | 1578 | * set the 1st entry offset to 0, which force the left-most key |
| @@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid, | |||
| 1610 | 1609 | ||
| 1611 | 1610 | ||
| 1612 | /* | 1611 | /* |
| 1613 | * xtExtend() | 1612 | * xtExtend() |
| 1614 | * | 1613 | * |
| 1615 | * function: extend in-place; | 1614 | * function: extend in-place; |
| 1616 | * | 1615 | * |
| @@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1677 | goto extendOld; | 1676 | goto extendOld; |
| 1678 | 1677 | ||
| 1679 | /* | 1678 | /* |
| 1680 | * extent overflow: insert entry for new extent | 1679 | * extent overflow: insert entry for new extent |
| 1681 | */ | 1680 | */ |
| 1682 | //insertNew: | 1681 | //insertNew: |
| 1683 | xoff = offsetXAD(xad) + MAXXLEN; | 1682 | xoff = offsetXAD(xad) + MAXXLEN; |
| @@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1685 | nextindex = le16_to_cpu(p->header.nextindex); | 1684 | nextindex = le16_to_cpu(p->header.nextindex); |
| 1686 | 1685 | ||
| 1687 | /* | 1686 | /* |
| 1688 | * if the leaf page is full, insert the new entry and | 1687 | * if the leaf page is full, insert the new entry and |
| 1689 | * propagate up the router entry for the new page from split | 1688 | * propagate up the router entry for the new page from split |
| 1690 | * | 1689 | * |
| 1691 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 1690 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
| 1692 | */ | 1691 | */ |
| @@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1731 | } | 1730 | } |
| 1732 | } | 1731 | } |
| 1733 | /* | 1732 | /* |
| 1734 | * insert the new entry into the leaf page | 1733 | * insert the new entry into the leaf page |
| 1735 | */ | 1734 | */ |
| 1736 | else { | 1735 | else { |
| 1737 | /* insert the new entry: mark the entry NEW */ | 1736 | /* insert the new entry: mark the entry NEW */ |
| @@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1771 | 1770 | ||
| 1772 | #ifdef _NOTYET | 1771 | #ifdef _NOTYET |
| 1773 | /* | 1772 | /* |
| 1774 | * xtTailgate() | 1773 | * xtTailgate() |
| 1775 | * | 1774 | * |
| 1776 | * function: split existing 'tail' extent | 1775 | * function: split existing 'tail' extent |
| 1777 | * (split offset >= start offset of tail extent), and | 1776 | * (split offset >= start offset of tail extent), and |
| 1778 | * relocate and extend the split tail half; | 1777 | * relocate and extend the split tail half; |
| 1779 | * | 1778 | * |
| 1780 | * note: existing extent may or may not have been committed. | 1779 | * note: existing extent may or may not have been committed. |
| 1781 | * caller is responsible for pager buffer cache update, and | 1780 | * caller is responsible for pager buffer cache update, and |
| @@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid, /* transaction id */ | |||
| 1804 | 1803 | ||
| 1805 | /* | 1804 | /* |
| 1806 | printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | 1805 | printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", |
| 1807 | (ulong)xoff, xlen, (ulong)xaddr); | 1806 | (ulong)xoff, xlen, (ulong)xaddr); |
| 1808 | */ | 1807 | */ |
| 1809 | 1808 | ||
| 1810 | /* there must exist extent to be tailgated */ | 1809 | /* there must exist extent to be tailgated */ |
| @@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | |||
| 1842 | xad = &p->xad[index]; | 1841 | xad = &p->xad[index]; |
| 1843 | /* | 1842 | /* |
| 1844 | printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", | 1843 | printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", |
| 1845 | (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); | 1844 | (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); |
| 1846 | */ | 1845 | */ |
| 1847 | if ((llen = xoff - offsetXAD(xad)) == 0) | 1846 | if ((llen = xoff - offsetXAD(xad)) == 0) |
| 1848 | goto updateOld; | 1847 | goto updateOld; |
| 1849 | 1848 | ||
| 1850 | /* | 1849 | /* |
| 1851 | * partially replace extent: insert entry for new extent | 1850 | * partially replace extent: insert entry for new extent |
| 1852 | */ | 1851 | */ |
| 1853 | //insertNew: | 1852 | //insertNew: |
| 1854 | /* | 1853 | /* |
| 1855 | * if the leaf page is full, insert the new entry and | 1854 | * if the leaf page is full, insert the new entry and |
| 1856 | * propagate up the router entry for the new page from split | 1855 | * propagate up the router entry for the new page from split |
| 1857 | * | 1856 | * |
| 1858 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 1857 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
| 1859 | */ | 1858 | */ |
| @@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", | |||
| 1898 | } | 1897 | } |
| 1899 | } | 1898 | } |
| 1900 | /* | 1899 | /* |
| 1901 | * insert the new entry into the leaf page | 1900 | * insert the new entry into the leaf page |
| 1902 | */ | 1901 | */ |
| 1903 | else { | 1902 | else { |
| 1904 | /* insert the new entry: mark the entry NEW */ | 1903 | /* insert the new entry: mark the entry NEW */ |
| @@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", | |||
| 1955 | #endif /* _NOTYET */ | 1954 | #endif /* _NOTYET */ |
| 1956 | 1955 | ||
| 1957 | /* | 1956 | /* |
| 1958 | * xtUpdate() | 1957 | * xtUpdate() |
| 1959 | * | 1958 | * |
| 1960 | * function: update XAD; | 1959 | * function: update XAD; |
| 1961 | * | 1960 | * |
| 1962 | * update extent for allocated_but_not_recorded or | 1961 | * update extent for allocated_but_not_recorded or |
| 1963 | * compressed extent; | 1962 | * compressed extent; |
| 1964 | * | 1963 | * |
| 1965 | * parameter: | 1964 | * parameter: |
| 1966 | * nxad - new XAD; | 1965 | * nxad - new XAD; |
| 1967 | * logical extent of the specified XAD must be completely | 1966 | * logical extent of the specified XAD must be completely |
| 1968 | * contained by an existing XAD; | 1967 | * contained by an existing XAD; |
| 1969 | */ | 1968 | */ |
| 1970 | int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | 1969 | int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) |
| 1971 | { /* new XAD */ | 1970 | { /* new XAD */ |
| @@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p); | |||
| 2416 | 2415 | ||
| 2417 | 2416 | ||
| 2418 | /* | 2417 | /* |
| 2419 | * xtAppend() | 2418 | * xtAppend() |
| 2420 | * | 2419 | * |
| 2421 | * function: grow in append mode from contiguous region specified ; | 2420 | * function: grow in append mode from contiguous region specified ; |
| 2422 | * | 2421 | * |
| 2423 | * parameter: | 2422 | * parameter: |
| 2424 | * tid - transaction id; | 2423 | * tid - transaction id; |
| 2425 | * ip - file object; | 2424 | * ip - file object; |
| 2426 | * xflag - extent flag: | 2425 | * xflag - extent flag: |
| 2427 | * xoff - extent offset; | 2426 | * xoff - extent offset; |
| 2428 | * maxblocks - max extent length; | 2427 | * maxblocks - max extent length; |
| 2429 | * xlen - extent length (in/out); | 2428 | * xlen - extent length (in/out); |
| 2430 | * xaddrp - extent address pointer (in/out): | 2429 | * xaddrp - extent address pointer (in/out): |
| 2431 | * flag - | 2430 | * flag - |
| 2432 | * | 2431 | * |
| 2433 | * return: | 2432 | * return: |
| 2434 | */ | 2433 | */ |
| @@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
| 2460 | (ulong) xoff, maxblocks, xlen, (ulong) xaddr); | 2459 | (ulong) xoff, maxblocks, xlen, (ulong) xaddr); |
| 2461 | 2460 | ||
| 2462 | /* | 2461 | /* |
| 2463 | * search for the entry location at which to insert: | 2462 | * search for the entry location at which to insert: |
| 2464 | * | 2463 | * |
| 2465 | * xtFastSearch() and xtSearch() both returns (leaf page | 2464 | * xtFastSearch() and xtSearch() both returns (leaf page |
| 2466 | * pinned, index at which to insert). | 2465 | * pinned, index at which to insert). |
| @@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
| 2482 | xlen = min(xlen, (int)(next - xoff)); | 2481 | xlen = min(xlen, (int)(next - xoff)); |
| 2483 | //insert: | 2482 | //insert: |
| 2484 | /* | 2483 | /* |
| 2485 | * insert entry for new extent | 2484 | * insert entry for new extent |
| 2486 | */ | 2485 | */ |
| 2487 | xflag |= XAD_NEW; | 2486 | xflag |= XAD_NEW; |
| 2488 | 2487 | ||
| 2489 | /* | 2488 | /* |
| 2490 | * if the leaf page is full, split the page and | 2489 | * if the leaf page is full, split the page and |
| 2491 | * propagate up the router entry for the new page from split | 2490 | * propagate up the router entry for the new page from split |
| 2492 | * | 2491 | * |
| 2493 | * The xtSplitUp() will insert the entry and unpin the leaf page. | 2492 | * The xtSplitUp() will insert the entry and unpin the leaf page. |
| 2494 | */ | 2493 | */ |
| @@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
| 2545 | return 0; | 2544 | return 0; |
| 2546 | 2545 | ||
| 2547 | /* | 2546 | /* |
| 2548 | * insert the new entry into the leaf page | 2547 | * insert the new entry into the leaf page |
| 2549 | */ | 2548 | */ |
| 2550 | insertLeaf: | 2549 | insertLeaf: |
| 2551 | /* | 2550 | /* |
| @@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
| 2589 | 2588 | ||
| 2590 | /* - TBD for defragmentaion/reorganization - | 2589 | /* - TBD for defragmentaion/reorganization - |
| 2591 | * | 2590 | * |
| 2592 | * xtDelete() | 2591 | * xtDelete() |
| 2593 | * | 2592 | * |
| 2594 | * function: | 2593 | * function: |
| 2595 | * delete the entry with the specified key. | 2594 | * delete the entry with the specified key. |
| 2596 | * | 2595 | * |
| 2597 | * N.B.: whole extent of the entry is assumed to be deleted. | 2596 | * N.B.: whole extent of the entry is assumed to be deleted. |
| 2598 | * | 2597 | * |
| 2599 | * parameter: | 2598 | * parameter: |
| 2600 | * | 2599 | * |
| 2601 | * return: | 2600 | * return: |
| 2602 | * ENOENT: if the entry is not found. | 2601 | * ENOENT: if the entry is not found. |
| 2603 | * | 2602 | * |
| 2604 | * exception: | 2603 | * exception: |
| 2605 | */ | 2604 | */ |
| @@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) | |||
| 2665 | 2664 | ||
| 2666 | /* - TBD for defragmentaion/reorganization - | 2665 | /* - TBD for defragmentaion/reorganization - |
| 2667 | * | 2666 | * |
| 2668 | * xtDeleteUp() | 2667 | * xtDeleteUp() |
| 2669 | * | 2668 | * |
| 2670 | * function: | 2669 | * function: |
| 2671 | * free empty pages as propagating deletion up the tree | 2670 | * free empty pages as propagating deletion up the tree |
| 2672 | * | 2671 | * |
| 2673 | * parameter: | 2672 | * parameter: |
| 2674 | * | 2673 | * |
| @@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip, | |||
| 2815 | 2814 | ||
| 2816 | 2815 | ||
| 2817 | /* | 2816 | /* |
| 2818 | * NAME: xtRelocate() | 2817 | * NAME: xtRelocate() |
| 2819 | * | 2818 | * |
| 2820 | * FUNCTION: relocate xtpage or data extent of regular file; | 2819 | * FUNCTION: relocate xtpage or data extent of regular file; |
| 2821 | * This function is mainly used by defragfs utility. | 2820 | * This function is mainly used by defragfs utility. |
| 2822 | * | 2821 | * |
| 2823 | * NOTE: This routine does not have the logic to handle | 2822 | * NOTE: This routine does not have the logic to handle |
| 2824 | * uncommitted allocated extent. The caller should call | 2823 | * uncommitted allocated extent. The caller should call |
| 2825 | * txCommit() to commit all the allocation before call | 2824 | * txCommit() to commit all the allocation before call |
| 2826 | * this routine. | 2825 | * this routine. |
| 2827 | */ | 2826 | */ |
| 2828 | int | 2827 | int |
| 2829 | xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | 2828 | xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ |
| @@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 2865 | xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); | 2864 | xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); |
| 2866 | 2865 | ||
| 2867 | /* | 2866 | /* |
| 2868 | * 1. get and validate the parent xtpage/xad entry | 2867 | * 1. get and validate the parent xtpage/xad entry |
| 2869 | * covering the source extent to be relocated; | 2868 | * covering the source extent to be relocated; |
| 2870 | */ | 2869 | */ |
| 2871 | if (xtype == DATAEXT) { | 2870 | if (xtype == DATAEXT) { |
| 2872 | /* search in leaf entry */ | 2871 | /* search in leaf entry */ |
| @@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 2910 | jfs_info("xtRelocate: parent xad entry validated."); | 2909 | jfs_info("xtRelocate: parent xad entry validated."); |
| 2911 | 2910 | ||
| 2912 | /* | 2911 | /* |
| 2913 | * 2. relocate the extent | 2912 | * 2. relocate the extent |
| 2914 | */ | 2913 | */ |
| 2915 | if (xtype == DATAEXT) { | 2914 | if (xtype == DATAEXT) { |
| 2916 | /* if the extent is allocated-but-not-recorded | 2915 | /* if the extent is allocated-but-not-recorded |
| @@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 2923 | XT_PUTPAGE(pmp); | 2922 | XT_PUTPAGE(pmp); |
| 2924 | 2923 | ||
| 2925 | /* | 2924 | /* |
| 2926 | * cmRelocate() | 2925 | * cmRelocate() |
| 2927 | * | 2926 | * |
| 2928 | * copy target data pages to be relocated; | 2927 | * copy target data pages to be relocated; |
| 2929 | * | 2928 | * |
| @@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 2945 | pno = offset >> CM_L2BSIZE; | 2944 | pno = offset >> CM_L2BSIZE; |
| 2946 | npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; | 2945 | npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; |
| 2947 | /* | 2946 | /* |
| 2948 | npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - | 2947 | npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - |
| 2949 | (offset >> CM_L2BSIZE) + 1; | 2948 | (offset >> CM_L2BSIZE) + 1; |
| 2950 | */ | 2949 | */ |
| 2951 | sxaddr = oxaddr; | 2950 | sxaddr = oxaddr; |
| 2952 | dxaddr = nxaddr; | 2951 | dxaddr = nxaddr; |
| @@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 2981 | 2980 | ||
| 2982 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); | 2981 | XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); |
| 2983 | jfs_info("xtRelocate: target data extent relocated."); | 2982 | jfs_info("xtRelocate: target data extent relocated."); |
| 2984 | } else { /* (xtype == XTPAGE) */ | 2983 | } else { /* (xtype == XTPAGE) */ |
| 2985 | 2984 | ||
| 2986 | /* | 2985 | /* |
| 2987 | * read in the target xtpage from the source extent; | 2986 | * read in the target xtpage from the source extent; |
| @@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 3026 | */ | 3025 | */ |
| 3027 | if (lmp) { | 3026 | if (lmp) { |
| 3028 | BT_MARK_DIRTY(lmp, ip); | 3027 | BT_MARK_DIRTY(lmp, ip); |
| 3029 | tlck = | 3028 | tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); |
| 3030 | txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); | ||
| 3031 | lp->header.next = cpu_to_le64(nxaddr); | 3029 | lp->header.next = cpu_to_le64(nxaddr); |
| 3032 | XT_PUTPAGE(lmp); | 3030 | XT_PUTPAGE(lmp); |
| 3033 | } | 3031 | } |
| 3034 | 3032 | ||
| 3035 | if (rmp) { | 3033 | if (rmp) { |
| 3036 | BT_MARK_DIRTY(rmp, ip); | 3034 | BT_MARK_DIRTY(rmp, ip); |
| 3037 | tlck = | 3035 | tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); |
| 3038 | txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); | ||
| 3039 | rp->header.prev = cpu_to_le64(nxaddr); | 3036 | rp->header.prev = cpu_to_le64(nxaddr); |
| 3040 | XT_PUTPAGE(rmp); | 3037 | XT_PUTPAGE(rmp); |
| 3041 | } | 3038 | } |
| @@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 3062 | * scan may be skipped by commit() and logredo(); | 3059 | * scan may be skipped by commit() and logredo(); |
| 3063 | */ | 3060 | */ |
| 3064 | BT_MARK_DIRTY(mp, ip); | 3061 | BT_MARK_DIRTY(mp, ip); |
| 3065 | /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ | 3062 | /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ |
| 3066 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); | 3063 | tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); |
| 3067 | xtlck = (struct xtlock *) & tlck->lock; | 3064 | xtlck = (struct xtlock *) & tlck->lock; |
| 3068 | 3065 | ||
| @@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 3084 | } | 3081 | } |
| 3085 | 3082 | ||
| 3086 | /* | 3083 | /* |
| 3087 | * 3. acquire maplock for the source extent to be freed; | 3084 | * 3. acquire maplock for the source extent to be freed; |
| 3088 | * | 3085 | * |
| 3089 | * acquire a maplock saving the src relocated extent address; | 3086 | * acquire a maplock saving the src relocated extent address; |
| 3090 | * to free of the extent at commit time; | 3087 | * to free of the extent at commit time; |
| @@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 3105 | * is no buffer associated with this lock since the buffer | 3102 | * is no buffer associated with this lock since the buffer |
| 3106 | * has been redirected to the target location. | 3103 | * has been redirected to the target location. |
| 3107 | */ | 3104 | */ |
| 3108 | else /* (xtype == XTPAGE) */ | 3105 | else /* (xtype == XTPAGE) */ |
| 3109 | tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); | 3106 | tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); |
| 3110 | 3107 | ||
| 3111 | pxdlock = (struct pxd_lock *) & tlck->lock; | 3108 | pxdlock = (struct pxd_lock *) & tlck->lock; |
| @@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 3115 | pxdlock->index = 1; | 3112 | pxdlock->index = 1; |
| 3116 | 3113 | ||
| 3117 | /* | 3114 | /* |
| 3118 | * 4. update the parent xad entry for relocation; | 3115 | * 4. update the parent xad entry for relocation; |
| 3119 | * | 3116 | * |
| 3120 | * acquire tlck for the parent entry with XAD_NEW as entry | 3117 | * acquire tlck for the parent entry with XAD_NEW as entry |
| 3121 | * update which will write LOG_REDOPAGE and update bmap for | 3118 | * update which will write LOG_REDOPAGE and update bmap for |
| @@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ | |||
| 3143 | 3140 | ||
| 3144 | 3141 | ||
| 3145 | /* | 3142 | /* |
| 3146 | * xtSearchNode() | 3143 | * xtSearchNode() |
| 3147 | * | 3144 | * |
| 3148 | * function: search for the internal xad entry covering specified extent. | 3145 | * function: search for the internal xad entry covering specified extent. |
| 3149 | * This function is mainly used by defragfs utility. | 3146 | * This function is mainly used by defragfs utility. |
| 3150 | * | 3147 | * |
| 3151 | * parameters: | 3148 | * parameters: |
| 3152 | * ip - file object; | 3149 | * ip - file object; |
| 3153 | * xad - extent to find; | 3150 | * xad - extent to find; |
| 3154 | * cmpp - comparison result: | 3151 | * cmpp - comparison result: |
| 3155 | * btstack - traverse stack; | 3152 | * btstack - traverse stack; |
| 3156 | * flag - search process flag; | 3153 | * flag - search process flag; |
| 3157 | * | 3154 | * |
| 3158 | * returns: | 3155 | * returns: |
| 3159 | * btstack contains (bn, index) of search path traversed to the entry. | 3156 | * btstack contains (bn, index) of search path traversed to the entry. |
| 3160 | * *cmpp is set to result of comparison with the entry returned. | 3157 | * *cmpp is set to result of comparison with the entry returned. |
| 3161 | * the page containing the entry is pinned at exit. | 3158 | * the page containing the entry is pinned at exit. |
| 3162 | */ | 3159 | */ |
| 3163 | static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ | 3160 | static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ |
| 3164 | int *cmpp, struct btstack * btstack, int flag) | 3161 | int *cmpp, struct btstack * btstack, int flag) |
| @@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ | |||
| 3181 | xaddr = addressXAD(xad); | 3178 | xaddr = addressXAD(xad); |
| 3182 | 3179 | ||
| 3183 | /* | 3180 | /* |
| 3184 | * search down tree from root: | 3181 | * search down tree from root: |
| 3185 | * | 3182 | * |
| 3186 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of | 3183 | * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of |
| 3187 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. | 3184 | * internal page, child page Pi contains entry with k, Ki <= K < Kj. |
| @@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ | |||
| 3217 | XT_CMP(cmp, xoff, &p->xad[index], t64); | 3214 | XT_CMP(cmp, xoff, &p->xad[index], t64); |
| 3218 | if (cmp == 0) { | 3215 | if (cmp == 0) { |
| 3219 | /* | 3216 | /* |
| 3220 | * search hit | 3217 | * search hit |
| 3221 | * | 3218 | * |
| 3222 | * verify for exact match; | 3219 | * verify for exact match; |
| 3223 | */ | 3220 | */ |
| @@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ | |||
| 3245 | } | 3242 | } |
| 3246 | 3243 | ||
| 3247 | /* | 3244 | /* |
| 3248 | * search miss - non-leaf page: | 3245 | * search miss - non-leaf page: |
| 3249 | * | 3246 | * |
| 3250 | * base is the smallest index with key (Kj) greater than | 3247 | * base is the smallest index with key (Kj) greater than |
| 3251 | * search key (K) and may be zero or maxentry index. | 3248 | * search key (K) and may be zero or maxentry index. |
| @@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ | |||
| 3268 | 3265 | ||
| 3269 | 3266 | ||
| 3270 | /* | 3267 | /* |
| 3271 | * xtRelink() | 3268 | * xtRelink() |
| 3272 | * | 3269 | * |
| 3273 | * function: | 3270 | * function: |
| 3274 | * link around a freed page. | 3271 | * link around a freed page. |
| 3275 | * | 3272 | * |
| 3276 | * Parameter: | 3273 | * Parameter: |
| 3277 | * int tid, | 3274 | * int tid, |
| 3278 | * struct inode *ip, | 3275 | * struct inode *ip, |
| 3279 | * xtpage_t *p) | 3276 | * xtpage_t *p) |
| 3280 | * | 3277 | * |
| 3281 | * returns: | 3278 | * returns: |
| 3282 | */ | 3279 | */ |
| @@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) | |||
| 3338 | 3335 | ||
| 3339 | 3336 | ||
| 3340 | /* | 3337 | /* |
| 3341 | * xtInitRoot() | 3338 | * xtInitRoot() |
| 3342 | * | 3339 | * |
| 3343 | * initialize file root (inline in inode) | 3340 | * initialize file root (inline in inode) |
| 3344 | */ | 3341 | */ |
| @@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip) | |||
| 3385 | #define MAX_TRUNCATE_LEAVES 50 | 3382 | #define MAX_TRUNCATE_LEAVES 50 |
| 3386 | 3383 | ||
| 3387 | /* | 3384 | /* |
| 3388 | * xtTruncate() | 3385 | * xtTruncate() |
| 3389 | * | 3386 | * |
| 3390 | * function: | 3387 | * function: |
| 3391 | * traverse for truncation logging backward bottom up; | 3388 | * traverse for truncation logging backward bottom up; |
| 3392 | * terminate at the last extent entry at the current subtree | 3389 | * terminate at the last extent entry at the current subtree |
| 3393 | * root page covering new down size. | 3390 | * root page covering new down size. |
| 3394 | * truncation may occur within the last extent entry. | 3391 | * truncation may occur within the last extent entry. |
| 3395 | * | 3392 | * |
| 3396 | * parameter: | 3393 | * parameter: |
| 3397 | * int tid, | 3394 | * int tid, |
| 3398 | * struct inode *ip, | 3395 | * struct inode *ip, |
| 3399 | * s64 newsize, | 3396 | * s64 newsize, |
| 3400 | * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} | 3397 | * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} |
| 3401 | * | 3398 | * |
| 3402 | * return: | 3399 | * return: |
| 3403 | * | 3400 | * |
| 3404 | * note: | 3401 | * note: |
| 3405 | * PWMAP: | 3402 | * PWMAP: |
| 3406 | * 1. truncate (non-COMMIT_NOLINK file) | 3403 | * 1. truncate (non-COMMIT_NOLINK file) |
| 3407 | * by jfs_truncate() or jfs_open(O_TRUNC): | 3404 | * by jfs_truncate() or jfs_open(O_TRUNC): |
| 3408 | * xtree is updated; | 3405 | * xtree is updated; |
| 3409 | * 2. truncate index table of directory when last entry removed | 3406 | * 2. truncate index table of directory when last entry removed |
| 3410 | * map update via tlock at commit time; | 3407 | * map update via tlock at commit time; |
| 3411 | * PMAP: | 3408 | * PMAP: |
| 3412 | * Call xtTruncate_pmap instead | 3409 | * Call xtTruncate_pmap instead |
| 3413 | * WMAP: | 3410 | * WMAP: |
| 3414 | * 1. remove (free zero link count) on last reference release | 3411 | * 1. remove (free zero link count) on last reference release |
| 3415 | * (pmap has been freed at commit zero link count); | 3412 | * (pmap has been freed at commit zero link count); |
| 3416 | * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): | 3413 | * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): |
| 3417 | * xtree is updated; | 3414 | * xtree is updated; |
| 3418 | * map update directly at truncation time; | 3415 | * map update directly at truncation time; |
| 3419 | * | 3416 | * |
| 3420 | * if (DELETE) | 3417 | * if (DELETE) |
| 3421 | * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); | 3418 | * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); |
| 3422 | * else if (TRUNCATE) | 3419 | * else if (TRUNCATE) |
| 3423 | * must write LOG_NOREDOPAGE for deleted index page; | 3420 | * must write LOG_NOREDOPAGE for deleted index page; |
| 3424 | * | 3421 | * |
| 3425 | * pages may already have been tlocked by anonymous transactions | 3422 | * pages may already have been tlocked by anonymous transactions |
| 3426 | * during file growth (i.e., write) before truncation; | 3423 | * during file growth (i.e., write) before truncation; |
| @@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
| 3493 | * retained in the new sized file. | 3490 | * retained in the new sized file. |
| 3494 | * if type is PMAP, the data and index pages are NOT | 3491 | * if type is PMAP, the data and index pages are NOT |
| 3495 | * freed, and the data and index blocks are NOT freed | 3492 | * freed, and the data and index blocks are NOT freed |
| 3496 | * from working map. | 3493 | * from working map. |
| 3497 | * (this will allow continued access of data/index of | 3494 | * (this will allow continued access of data/index of |
| 3498 | * temporary file (zerolink count file truncated to zero-length)). | 3495 | * temporary file (zerolink count file truncated to zero-length)). |
| 3499 | */ | 3496 | */ |
| @@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
| 3542 | goto getChild; | 3539 | goto getChild; |
| 3543 | 3540 | ||
| 3544 | /* | 3541 | /* |
| 3545 | * leaf page | 3542 | * leaf page |
| 3546 | */ | 3543 | */ |
| 3547 | freed = 0; | 3544 | freed = 0; |
| 3548 | 3545 | ||
| @@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
| 3916 | } | 3913 | } |
| 3917 | 3914 | ||
| 3918 | /* | 3915 | /* |
| 3919 | * internal page: go down to child page of current entry | 3916 | * internal page: go down to child page of current entry |
| 3920 | */ | 3917 | */ |
| 3921 | getChild: | 3918 | getChild: |
| 3922 | /* save current parent entry for the child page */ | 3919 | /* save current parent entry for the child page */ |
| @@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
| 3965 | 3962 | ||
| 3966 | 3963 | ||
| 3967 | /* | 3964 | /* |
| 3968 | * xtTruncate_pmap() | 3965 | * xtTruncate_pmap() |
| 3969 | * | 3966 | * |
| 3970 | * function: | 3967 | * function: |
| 3971 | * Perform truncate to zero lenghth for deleted file, leaving the | 3968 | * Perform truncate to zero lenghth for deleted file, leaving the |
| @@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
| 3974 | * is committed to disk. | 3971 | * is committed to disk. |
| 3975 | * | 3972 | * |
| 3976 | * parameter: | 3973 | * parameter: |
| 3977 | * tid_t tid, | 3974 | * tid_t tid, |
| 3978 | * struct inode *ip, | 3975 | * struct inode *ip, |
| 3979 | * s64 committed_size) | 3976 | * s64 committed_size) |
| 3980 | * | 3977 | * |
| 3981 | * return: new committed size | 3978 | * return: new committed size |
| 3982 | * | 3979 | * |
| @@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 4050 | } | 4047 | } |
| 4051 | 4048 | ||
| 4052 | /* | 4049 | /* |
| 4053 | * leaf page | 4050 | * leaf page |
| 4054 | */ | 4051 | */ |
| 4055 | 4052 | ||
| 4056 | if (++locked_leaves > MAX_TRUNCATE_LEAVES) { | 4053 | if (++locked_leaves > MAX_TRUNCATE_LEAVES) { |
| @@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 4062 | xoff = offsetXAD(xad); | 4059 | xoff = offsetXAD(xad); |
| 4063 | xlen = lengthXAD(xad); | 4060 | xlen = lengthXAD(xad); |
| 4064 | XT_PUTPAGE(mp); | 4061 | XT_PUTPAGE(mp); |
| 4065 | return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; | 4062 | return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; |
| 4066 | } | 4063 | } |
| 4067 | tlck = txLock(tid, ip, mp, tlckXTREE); | 4064 | tlck = txLock(tid, ip, mp, tlckXTREE); |
| 4068 | tlck->type = tlckXTREE | tlckFREE; | 4065 | tlck->type = tlckXTREE | tlckFREE; |
| @@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 4099 | */ | 4096 | */ |
| 4100 | tlck = txLock(tid, ip, mp, tlckXTREE); | 4097 | tlck = txLock(tid, ip, mp, tlckXTREE); |
| 4101 | xtlck = (struct xtlock *) & tlck->lock; | 4098 | xtlck = (struct xtlock *) & tlck->lock; |
| 4102 | xtlck->hwm.offset = | 4099 | xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; |
| 4103 | le16_to_cpu(p->header.nextindex) - 1; | ||
| 4104 | tlck->type = tlckXTREE | tlckFREE; | 4100 | tlck->type = tlckXTREE | tlckFREE; |
| 4105 | 4101 | ||
| 4106 | XT_PUTPAGE(mp); | 4102 | XT_PUTPAGE(mp); |
| @@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 4118 | else | 4114 | else |
| 4119 | index--; | 4115 | index--; |
| 4120 | /* | 4116 | /* |
| 4121 | * internal page: go down to child page of current entry | 4117 | * internal page: go down to child page of current entry |
| 4122 | */ | 4118 | */ |
| 4123 | getChild: | 4119 | getChild: |
| 4124 | /* save current parent entry for the child page */ | 4120 | /* save current parent entry for the child page */ |
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index 164f6f2b1019..70815c8a3d6a 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h | |||
| @@ -19,14 +19,14 @@ | |||
| 19 | #define _H_JFS_XTREE | 19 | #define _H_JFS_XTREE |
| 20 | 20 | ||
| 21 | /* | 21 | /* |
| 22 | * jfs_xtree.h: extent allocation descriptor B+-tree manager | 22 | * jfs_xtree.h: extent allocation descriptor B+-tree manager |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include "jfs_btree.h" | 25 | #include "jfs_btree.h" |
| 26 | 26 | ||
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| 29 | * extent allocation descriptor (xad) | 29 | * extent allocation descriptor (xad) |
| 30 | */ | 30 | */ |
| 31 | typedef struct xad { | 31 | typedef struct xad { |
| 32 | unsigned flag:8; /* 1: flag */ | 32 | unsigned flag:8; /* 1: flag */ |
| @@ -38,30 +38,30 @@ typedef struct xad { | |||
| 38 | __le32 addr2; /* 4: address in unit of fsblksize */ | 38 | __le32 addr2; /* 4: address in unit of fsblksize */ |
| 39 | } xad_t; /* (16) */ | 39 | } xad_t; /* (16) */ |
| 40 | 40 | ||
| 41 | #define MAXXLEN ((1 << 24) - 1) | 41 | #define MAXXLEN ((1 << 24) - 1) |
| 42 | 42 | ||
| 43 | #define XTSLOTSIZE 16 | 43 | #define XTSLOTSIZE 16 |
| 44 | #define L2XTSLOTSIZE 4 | 44 | #define L2XTSLOTSIZE 4 |
| 45 | 45 | ||
| 46 | /* xad_t field construction */ | 46 | /* xad_t field construction */ |
| 47 | #define XADoffset(xad, offset64)\ | 47 | #define XADoffset(xad, offset64)\ |
| 48 | {\ | 48 | {\ |
| 49 | (xad)->off1 = ((u64)offset64) >> 32;\ | 49 | (xad)->off1 = ((u64)offset64) >> 32;\ |
| 50 | (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ | 50 | (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ |
| 51 | } | 51 | } |
| 52 | #define XADaddress(xad, address64)\ | 52 | #define XADaddress(xad, address64)\ |
| 53 | {\ | 53 | {\ |
| 54 | (xad)->addr1 = ((u64)address64) >> 32;\ | 54 | (xad)->addr1 = ((u64)address64) >> 32;\ |
| 55 | (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ | 55 | (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ |
| 56 | } | 56 | } |
| 57 | #define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) | 57 | #define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) |
| 58 | 58 | ||
| 59 | /* xad_t field extraction */ | 59 | /* xad_t field extraction */ |
| 60 | #define offsetXAD(xad)\ | 60 | #define offsetXAD(xad)\ |
| 61 | ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) | 61 | ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) |
| 62 | #define addressXAD(xad)\ | 62 | #define addressXAD(xad)\ |
| 63 | ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) | 63 | ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) |
| 64 | #define lengthXAD(xad) __le24_to_cpu((xad)->len) | 64 | #define lengthXAD(xad) __le24_to_cpu((xad)->len) |
| 65 | 65 | ||
| 66 | /* xad list */ | 66 | /* xad list */ |
| 67 | struct xadlist { | 67 | struct xadlist { |
| @@ -71,22 +71,22 @@ struct xadlist { | |||
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| 73 | /* xad_t flags */ | 73 | /* xad_t flags */ |
| 74 | #define XAD_NEW 0x01 /* new */ | 74 | #define XAD_NEW 0x01 /* new */ |
| 75 | #define XAD_EXTENDED 0x02 /* extended */ | 75 | #define XAD_EXTENDED 0x02 /* extended */ |
| 76 | #define XAD_COMPRESSED 0x04 /* compressed with recorded length */ | 76 | #define XAD_COMPRESSED 0x04 /* compressed with recorded length */ |
| 77 | #define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ | 77 | #define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ |
| 78 | #define XAD_COW 0x10 /* copy-on-write */ | 78 | #define XAD_COW 0x10 /* copy-on-write */ |
| 79 | 79 | ||
| 80 | 80 | ||
| 81 | /* possible values for maxentry */ | 81 | /* possible values for maxentry */ |
| 82 | #define XTROOTINITSLOT_DIR 6 | 82 | #define XTROOTINITSLOT_DIR 6 |
| 83 | #define XTROOTINITSLOT 10 | 83 | #define XTROOTINITSLOT 10 |
| 84 | #define XTROOTMAXSLOT 18 | 84 | #define XTROOTMAXSLOT 18 |
| 85 | #define XTPAGEMAXSLOT 256 | 85 | #define XTPAGEMAXSLOT 256 |
| 86 | #define XTENTRYSTART 2 | 86 | #define XTENTRYSTART 2 |
| 87 | 87 | ||
| 88 | /* | 88 | /* |
| 89 | * xtree page: | 89 | * xtree page: |
| 90 | */ | 90 | */ |
| 91 | typedef union { | 91 | typedef union { |
| 92 | struct xtheader { | 92 | struct xtheader { |
| @@ -106,7 +106,7 @@ typedef union { | |||
| 106 | } xtpage_t; | 106 | } xtpage_t; |
| 107 | 107 | ||
| 108 | /* | 108 | /* |
| 109 | * external declaration | 109 | * external declaration |
| 110 | */ | 110 | */ |
| 111 | extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, | 111 | extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, |
| 112 | int *pflag, s64 * paddr, int *plen, int flag); | 112 | int *pflag, s64 * paddr, int *plen, int flag); |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 41c204771262..25161c4121e4 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) | |||
| 328 | * dentry - child directory dentry | 328 | * dentry - child directory dentry |
| 329 | * | 329 | * |
| 330 | * RETURN: -EINVAL - if name is . or .. | 330 | * RETURN: -EINVAL - if name is . or .. |
| 331 | * -EINVAL - if . or .. exist but are invalid. | 331 | * -EINVAL - if . or .. exist but are invalid. |
| 332 | * errors from subroutines | 332 | * errors from subroutines |
| 333 | * | 333 | * |
| 334 | * note: | 334 | * note: |
| @@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
| 517 | inode_dec_link_count(ip); | 517 | inode_dec_link_count(ip); |
| 518 | 518 | ||
| 519 | /* | 519 | /* |
| 520 | * commit zero link count object | 520 | * commit zero link count object |
| 521 | */ | 521 | */ |
| 522 | if (ip->i_nlink == 0) { | 522 | if (ip->i_nlink == 0) { |
| 523 | assert(!test_cflag(COMMIT_Nolink, ip)); | 523 | assert(!test_cflag(COMMIT_Nolink, ip)); |
| @@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
| 596 | /* | 596 | /* |
| 597 | * NAME: commitZeroLink() | 597 | * NAME: commitZeroLink() |
| 598 | * | 598 | * |
| 599 | * FUNCTION: for non-directory, called by jfs_remove(), | 599 | * FUNCTION: for non-directory, called by jfs_remove(), |
| 600 | * truncate a regular file, directory or symbolic | 600 | * truncate a regular file, directory or symbolic |
| 601 | * link to zero length. return 0 if type is not | 601 | * link to zero length. return 0 if type is not |
| 602 | * one of these. | 602 | * one of these. |
| @@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) | |||
| 676 | /* | 676 | /* |
| 677 | * NAME: jfs_free_zero_link() | 677 | * NAME: jfs_free_zero_link() |
| 678 | * | 678 | * |
| 679 | * FUNCTION: for non-directory, called by iClose(), | 679 | * FUNCTION: for non-directory, called by iClose(), |
| 680 | * free resources of a file from cache and WORKING map | 680 | * free resources of a file from cache and WORKING map |
| 681 | * for a file previously committed with zero link count | 681 | * for a file previously committed with zero link count |
| 682 | * while associated with a pager object, | 682 | * while associated with a pager object, |
| @@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry, | |||
| 855 | * NAME: jfs_symlink(dip, dentry, name) | 855 | * NAME: jfs_symlink(dip, dentry, name) |
| 856 | * | 856 | * |
| 857 | * FUNCTION: creates a symbolic link to <symlink> by name <name> | 857 | * FUNCTION: creates a symbolic link to <symlink> by name <name> |
| 858 | * in directory <dip> | 858 | * in directory <dip> |
| 859 | * | 859 | * |
| 860 | * PARAMETER: dip - parent directory vnode | 860 | * PARAMETER: dip - parent directory vnode |
| 861 | * dentry - dentry of symbolic link | 861 | * dentry - dentry of symbolic link |
| 862 | * name - the path name of the existing object | 862 | * name - the path name of the existing object |
| 863 | * that will be the source of the link | 863 | * that will be the source of the link |
| 864 | * | 864 | * |
| 865 | * RETURN: errors from subroutines | 865 | * RETURN: errors from subroutines |
| 866 | * | 866 | * |
| @@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
| 1052 | 1052 | ||
| 1053 | 1053 | ||
| 1054 | /* | 1054 | /* |
| 1055 | * NAME: jfs_rename | 1055 | * NAME: jfs_rename |
| 1056 | * | 1056 | * |
| 1057 | * FUNCTION: rename a file or directory | 1057 | * FUNCTION: rename a file or directory |
| 1058 | */ | 1058 | */ |
| 1059 | static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 1059 | static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 1060 | struct inode *new_dir, struct dentry *new_dentry) | 1060 | struct inode *new_dir, struct dentry *new_dentry) |
| @@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1331 | 1331 | ||
| 1332 | 1332 | ||
| 1333 | /* | 1333 | /* |
| 1334 | * NAME: jfs_mknod | 1334 | * NAME: jfs_mknod |
| 1335 | * | 1335 | * |
| 1336 | * FUNCTION: Create a special file (device) | 1336 | * FUNCTION: Create a special file (device) |
| 1337 | */ | 1337 | */ |
| 1338 | static int jfs_mknod(struct inode *dir, struct dentry *dentry, | 1338 | static int jfs_mknod(struct inode *dir, struct dentry *dentry, |
| 1339 | int mode, dev_t rdev) | 1339 | int mode, dev_t rdev) |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 79d625f3f733..71984ee95346 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
| @@ -29,17 +29,17 @@ | |||
| 29 | #include "jfs_txnmgr.h" | 29 | #include "jfs_txnmgr.h" |
| 30 | #include "jfs_debug.h" | 30 | #include "jfs_debug.h" |
| 31 | 31 | ||
| 32 | #define BITSPERPAGE (PSIZE << 3) | 32 | #define BITSPERPAGE (PSIZE << 3) |
| 33 | #define L2MEGABYTE 20 | 33 | #define L2MEGABYTE 20 |
| 34 | #define MEGABYTE (1 << L2MEGABYTE) | 34 | #define MEGABYTE (1 << L2MEGABYTE) |
| 35 | #define MEGABYTE32 (MEGABYTE << 5) | 35 | #define MEGABYTE32 (MEGABYTE << 5) |
| 36 | 36 | ||
| 37 | /* convert block number to bmap file page number */ | 37 | /* convert block number to bmap file page number */ |
| 38 | #define BLKTODMAPN(b)\ | 38 | #define BLKTODMAPN(b)\ |
| 39 | (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) | 39 | (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) |
| 40 | 40 | ||
| 41 | /* | 41 | /* |
| 42 | * jfs_extendfs() | 42 | * jfs_extendfs() |
| 43 | * | 43 | * |
| 44 | * function: extend file system; | 44 | * function: extend file system; |
| 45 | * | 45 | * |
| @@ -48,9 +48,9 @@ | |||
| 48 | * workspace space | 48 | * workspace space |
| 49 | * | 49 | * |
| 50 | * input: | 50 | * input: |
| 51 | * new LVSize: in LV blocks (required) | 51 | * new LVSize: in LV blocks (required) |
| 52 | * new LogSize: in LV blocks (optional) | 52 | * new LogSize: in LV blocks (optional) |
| 53 | * new FSSize: in LV blocks (optional) | 53 | * new FSSize: in LV blocks (optional) |
| 54 | * | 54 | * |
| 55 | * new configuration: | 55 | * new configuration: |
| 56 | * 1. set new LogSize as specified or default from new LVSize; | 56 | * 1. set new LogSize as specified or default from new LVSize; |
| @@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | /* | 127 | /* |
| 128 | * reconfigure LV spaces | 128 | * reconfigure LV spaces |
| 129 | * --------------------- | 129 | * --------------------- |
| 130 | * | 130 | * |
| 131 | * validate new size, or, if not specified, determine new size | 131 | * validate new size, or, if not specified, determine new size |
| 132 | */ | 132 | */ |
| @@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 198 | log_formatted = 1; | 198 | log_formatted = 1; |
| 199 | } | 199 | } |
| 200 | /* | 200 | /* |
| 201 | * quiesce file system | 201 | * quiesce file system |
| 202 | * | 202 | * |
| 203 | * (prepare to move the inline log and to prevent map update) | 203 | * (prepare to move the inline log and to prevent map update) |
| 204 | * | 204 | * |
| @@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | /* | 272 | /* |
| 273 | * extend block allocation map | 273 | * extend block allocation map |
| 274 | * --------------------------- | 274 | * --------------------------- |
| 275 | * | 275 | * |
| 276 | * extendfs() for new extension, retry after crash recovery; | 276 | * extendfs() for new extension, retry after crash recovery; |
| 277 | * | 277 | * |
| @@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 283 | * s_size: aggregate size in physical blocks; | 283 | * s_size: aggregate size in physical blocks; |
| 284 | */ | 284 | */ |
| 285 | /* | 285 | /* |
| 286 | * compute the new block allocation map configuration | 286 | * compute the new block allocation map configuration |
| 287 | * | 287 | * |
| 288 | * map dinode: | 288 | * map dinode: |
| 289 | * di_size: map file size in byte; | 289 | * di_size: map file size in byte; |
| @@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 301 | newNpages = BLKTODMAPN(t64) + 1; | 301 | newNpages = BLKTODMAPN(t64) + 1; |
| 302 | 302 | ||
| 303 | /* | 303 | /* |
| 304 | * extend map from current map (WITHOUT growing mapfile) | 304 | * extend map from current map (WITHOUT growing mapfile) |
| 305 | * | 305 | * |
| 306 | * map new extension with unmapped part of the last partial | 306 | * map new extension with unmapped part of the last partial |
| 307 | * dmap page, if applicable, and extra page(s) allocated | 307 | * dmap page, if applicable, and extra page(s) allocated |
| @@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 341 | XSize -= nblocks; | 341 | XSize -= nblocks; |
| 342 | 342 | ||
| 343 | /* | 343 | /* |
| 344 | * grow map file to cover remaining extension | 344 | * grow map file to cover remaining extension |
| 345 | * and/or one extra dmap page for next extendfs(); | 345 | * and/or one extra dmap page for next extendfs(); |
| 346 | * | 346 | * |
| 347 | * allocate new map pages and its backing blocks, and | 347 | * allocate new map pages and its backing blocks, and |
| 348 | * update map file xtree | 348 | * update map file xtree |
| @@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 422 | dbFinalizeBmap(ipbmap); | 422 | dbFinalizeBmap(ipbmap); |
| 423 | 423 | ||
| 424 | /* | 424 | /* |
| 425 | * update inode allocation map | 425 | * update inode allocation map |
| 426 | * --------------------------- | 426 | * --------------------------- |
| 427 | * | 427 | * |
| 428 | * move iag lists from old to new iag; | 428 | * move iag lists from old to new iag; |
| 429 | * agstart field is not updated for logredo() to reconstruct | 429 | * agstart field is not updated for logredo() to reconstruct |
| @@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 442 | } | 442 | } |
| 443 | 443 | ||
| 444 | /* | 444 | /* |
| 445 | * finalize | 445 | * finalize |
| 446 | * -------- | 446 | * -------- |
| 447 | * | 447 | * |
| 448 | * extension is committed when on-disk super block is | 448 | * extension is committed when on-disk super block is |
| 449 | * updated with new descriptors: logredo will recover | 449 | * updated with new descriptors: logredo will recover |
| @@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 480 | diFreeSpecial(ipbmap2); | 480 | diFreeSpecial(ipbmap2); |
| 481 | 481 | ||
| 482 | /* | 482 | /* |
| 483 | * update superblock | 483 | * update superblock |
| 484 | */ | 484 | */ |
| 485 | if ((rc = readSuper(sb, &bh))) | 485 | if ((rc = readSuper(sb, &bh))) |
| 486 | goto error_out; | 486 | goto error_out; |
| @@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
| 530 | 530 | ||
| 531 | resume: | 531 | resume: |
| 532 | /* | 532 | /* |
| 533 | * resume file system transactions | 533 | * resume file system transactions |
| 534 | */ | 534 | */ |
| 535 | txResume(sb); | 535 | txResume(sb); |
| 536 | 536 | ||
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index b753ba216450..b2375f0774b7 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
| @@ -63,9 +63,9 @@ | |||
| 63 | * | 63 | * |
| 64 | * On-disk: | 64 | * On-disk: |
| 65 | * | 65 | * |
| 66 | * FEALISTs are stored on disk using blocks allocated by dbAlloc() and | 66 | * FEALISTs are stored on disk using blocks allocated by dbAlloc() and |
| 67 | * written directly. An EA list may be in-lined in the inode if there is | 67 | * written directly. An EA list may be in-lined in the inode if there is |
| 68 | * sufficient room available. | 68 | * sufficient room available. |
| 69 | */ | 69 | */ |
| 70 | 70 | ||
| 71 | struct ea_buffer { | 71 | struct ea_buffer { |
| @@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) | |||
| 590 | size_check: | 590 | size_check: |
| 591 | if (EALIST_SIZE(ea_buf->xattr) != ea_size) { | 591 | if (EALIST_SIZE(ea_buf->xattr) != ea_size) { |
| 592 | printk(KERN_ERR "ea_get: invalid extended attribute\n"); | 592 | printk(KERN_ERR "ea_get: invalid extended attribute\n"); |
| 593 | dump_mem("xattr", ea_buf->xattr, ea_size); | 593 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, |
| 594 | ea_buf->xattr, ea_size, 1); | ||
| 594 | ea_release(inode, ea_buf); | 595 | ea_release(inode, ea_buf); |
| 595 | rc = -EIO; | 596 | rc = -EIO; |
| 596 | goto clean_up; | 597 | goto clean_up; |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 74f30e0c0381..98e78e2f18d6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
| @@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
| 165 | rcu_read_lock(); | 165 | rcu_read_lock(); |
| 166 | buffer += sprintf(buffer, | 166 | buffer += sprintf(buffer, |
| 167 | "State:\t%s\n" | 167 | "State:\t%s\n" |
| 168 | "SleepAVG:\t%lu%%\n" | ||
| 169 | "Tgid:\t%d\n" | 168 | "Tgid:\t%d\n" |
| 170 | "Pid:\t%d\n" | 169 | "Pid:\t%d\n" |
| 171 | "PPid:\t%d\n" | 170 | "PPid:\t%d\n" |
| @@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
| 173 | "Uid:\t%d\t%d\t%d\t%d\n" | 172 | "Uid:\t%d\t%d\t%d\t%d\n" |
| 174 | "Gid:\t%d\t%d\t%d\t%d\n", | 173 | "Gid:\t%d\t%d\t%d\t%d\n", |
| 175 | get_task_state(p), | 174 | get_task_state(p), |
| 176 | (p->sleep_avg/1024)*100/(1020000000/1024), | ||
| 177 | p->tgid, p->pid, | 175 | p->tgid, p->pid, |
| 178 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, | 176 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, |
| 179 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, | 177 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, |
| @@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer) | |||
| 312 | return buffer - orig; | 310 | return buffer - orig; |
| 313 | } | 311 | } |
| 314 | 312 | ||
| 313 | static clock_t task_utime(struct task_struct *p) | ||
| 314 | { | ||
| 315 | clock_t utime = cputime_to_clock_t(p->utime), | ||
| 316 | total = utime + cputime_to_clock_t(p->stime); | ||
| 317 | u64 temp; | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Use CFS's precise accounting: | ||
| 321 | */ | ||
| 322 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
| 323 | |||
| 324 | if (total) { | ||
| 325 | temp *= utime; | ||
| 326 | do_div(temp, total); | ||
| 327 | } | ||
| 328 | utime = (clock_t)temp; | ||
| 329 | |||
| 330 | return utime; | ||
| 331 | } | ||
| 332 | |||
| 333 | static clock_t task_stime(struct task_struct *p) | ||
| 334 | { | ||
| 335 | clock_t stime = cputime_to_clock_t(p->stime); | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Use CFS's precise accounting. (we subtract utime from | ||
| 339 | * the total, to make sure the total observed by userspace | ||
| 340 | * grows monotonically - apps rely on that): | ||
| 341 | */ | ||
| 342 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p); | ||
| 343 | |||
| 344 | return stime; | ||
| 345 | } | ||
| 346 | |||
| 347 | |||
| 315 | static int do_task_stat(struct task_struct *task, char * buffer, int whole) | 348 | static int do_task_stat(struct task_struct *task, char * buffer, int whole) |
| 316 | { | 349 | { |
| 317 | unsigned long vsize, eip, esp, wchan = ~0UL; | 350 | unsigned long vsize, eip, esp, wchan = ~0UL; |
| @@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 326 | unsigned long long start_time; | 359 | unsigned long long start_time; |
| 327 | unsigned long cmin_flt = 0, cmaj_flt = 0; | 360 | unsigned long cmin_flt = 0, cmaj_flt = 0; |
| 328 | unsigned long min_flt = 0, maj_flt = 0; | 361 | unsigned long min_flt = 0, maj_flt = 0; |
| 329 | cputime_t cutime, cstime, utime, stime; | 362 | cputime_t cutime, cstime; |
| 363 | clock_t utime, stime; | ||
| 330 | unsigned long rsslim = 0; | 364 | unsigned long rsslim = 0; |
| 331 | char tcomm[sizeof(task->comm)]; | 365 | char tcomm[sizeof(task->comm)]; |
| 332 | unsigned long flags; | 366 | unsigned long flags; |
| @@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 344 | 378 | ||
| 345 | sigemptyset(&sigign); | 379 | sigemptyset(&sigign); |
| 346 | sigemptyset(&sigcatch); | 380 | sigemptyset(&sigcatch); |
| 347 | cutime = cstime = utime = stime = cputime_zero; | 381 | cutime = cstime = cputime_zero; |
| 382 | utime = stime = 0; | ||
| 348 | 383 | ||
| 349 | rcu_read_lock(); | 384 | rcu_read_lock(); |
| 350 | if (lock_task_sighand(task, &flags)) { | 385 | if (lock_task_sighand(task, &flags)) { |
| @@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 370 | do { | 405 | do { |
| 371 | min_flt += t->min_flt; | 406 | min_flt += t->min_flt; |
| 372 | maj_flt += t->maj_flt; | 407 | maj_flt += t->maj_flt; |
| 373 | utime = cputime_add(utime, t->utime); | 408 | utime += task_utime(t); |
| 374 | stime = cputime_add(stime, t->stime); | 409 | stime += task_stime(t); |
| 375 | t = next_thread(t); | 410 | t = next_thread(t); |
| 376 | } while (t != task); | 411 | } while (t != task); |
| 377 | 412 | ||
| 378 | min_flt += sig->min_flt; | 413 | min_flt += sig->min_flt; |
| 379 | maj_flt += sig->maj_flt; | 414 | maj_flt += sig->maj_flt; |
| 380 | utime = cputime_add(utime, sig->utime); | 415 | utime += cputime_to_clock_t(sig->utime); |
| 381 | stime = cputime_add(stime, sig->stime); | 416 | stime += cputime_to_clock_t(sig->stime); |
| 382 | } | 417 | } |
| 383 | 418 | ||
| 384 | sid = signal_session(sig); | 419 | sid = signal_session(sig); |
| @@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 394 | if (!whole) { | 429 | if (!whole) { |
| 395 | min_flt = task->min_flt; | 430 | min_flt = task->min_flt; |
| 396 | maj_flt = task->maj_flt; | 431 | maj_flt = task->maj_flt; |
| 397 | utime = task->utime; | 432 | utime = task_utime(task); |
| 398 | stime = task->stime; | 433 | stime = task_stime(task); |
| 399 | } | 434 | } |
| 400 | 435 | ||
| 401 | /* scale priority and nice values from timeslices to -20..20 */ | 436 | /* scale priority and nice values from timeslices to -20..20 */ |
| @@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 426 | cmin_flt, | 461 | cmin_flt, |
| 427 | maj_flt, | 462 | maj_flt, |
| 428 | cmaj_flt, | 463 | cmaj_flt, |
| 429 | cputime_to_clock_t(utime), | 464 | utime, |
| 430 | cputime_to_clock_t(stime), | 465 | stime, |
| 431 | cputime_to_clock_t(cutime), | 466 | cputime_to_clock_t(cutime), |
| 432 | cputime_to_clock_t(cstime), | 467 | cputime_to_clock_t(cstime), |
| 433 | priority, | 468 | priority, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index a5fa1fdafc4e..46ea5d56e1bb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) | |||
| 296 | */ | 296 | */ |
| 297 | static int proc_pid_schedstat(struct task_struct *task, char *buffer) | 297 | static int proc_pid_schedstat(struct task_struct *task, char *buffer) |
| 298 | { | 298 | { |
| 299 | return sprintf(buffer, "%lu %lu %lu\n", | 299 | return sprintf(buffer, "%llu %llu %lu\n", |
| 300 | task->sched_info.cpu_time, | 300 | task->sched_info.cpu_time, |
| 301 | task->sched_info.run_delay, | 301 | task->sched_info.run_delay, |
| 302 | task->sched_info.pcnt); | 302 | task->sched_info.pcnt); |
| @@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = { | |||
| 929 | }; | 929 | }; |
| 930 | #endif | 930 | #endif |
| 931 | 931 | ||
| 932 | #ifdef CONFIG_SCHED_DEBUG | ||
| 933 | /* | ||
| 934 | * Print out various scheduling related per-task fields: | ||
| 935 | */ | ||
| 936 | static int sched_show(struct seq_file *m, void *v) | ||
| 937 | { | ||
| 938 | struct inode *inode = m->private; | ||
| 939 | struct task_struct *p; | ||
| 940 | |||
| 941 | WARN_ON(!inode); | ||
| 942 | |||
| 943 | p = get_proc_task(inode); | ||
| 944 | if (!p) | ||
| 945 | return -ESRCH; | ||
| 946 | proc_sched_show_task(p, m); | ||
| 947 | |||
| 948 | put_task_struct(p); | ||
| 949 | |||
| 950 | return 0; | ||
| 951 | } | ||
| 952 | |||
| 953 | static ssize_t | ||
| 954 | sched_write(struct file *file, const char __user *buf, | ||
| 955 | size_t count, loff_t *offset) | ||
| 956 | { | ||
| 957 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 958 | struct task_struct *p; | ||
| 959 | |||
| 960 | WARN_ON(!inode); | ||
| 961 | |||
| 962 | p = get_proc_task(inode); | ||
| 963 | if (!p) | ||
| 964 | return -ESRCH; | ||
| 965 | proc_sched_set_task(p); | ||
| 966 | |||
| 967 | put_task_struct(p); | ||
| 968 | |||
| 969 | return count; | ||
| 970 | } | ||
| 971 | |||
| 972 | static int sched_open(struct inode *inode, struct file *filp) | ||
| 973 | { | ||
| 974 | int ret; | ||
| 975 | |||
| 976 | ret = single_open(filp, sched_show, NULL); | ||
| 977 | if (!ret) { | ||
| 978 | struct seq_file *m = filp->private_data; | ||
| 979 | |||
| 980 | m->private = inode; | ||
| 981 | } | ||
| 982 | return ret; | ||
| 983 | } | ||
| 984 | |||
| 985 | static const struct file_operations proc_pid_sched_operations = { | ||
| 986 | .open = sched_open, | ||
| 987 | .read = seq_read, | ||
| 988 | .write = sched_write, | ||
| 989 | .llseek = seq_lseek, | ||
| 990 | .release = seq_release, | ||
| 991 | }; | ||
| 992 | |||
| 993 | #endif | ||
| 994 | |||
| 932 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | 995 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) |
| 933 | { | 996 | { |
| 934 | struct inode *inode = dentry->d_inode; | 997 | struct inode *inode = dentry->d_inode; |
| @@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 1963 | INF("environ", S_IRUSR, pid_environ), | 2026 | INF("environ", S_IRUSR, pid_environ), |
| 1964 | INF("auxv", S_IRUSR, pid_auxv), | 2027 | INF("auxv", S_IRUSR, pid_auxv), |
| 1965 | INF("status", S_IRUGO, pid_status), | 2028 | INF("status", S_IRUGO, pid_status), |
| 2029 | #ifdef CONFIG_SCHED_DEBUG | ||
| 2030 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | ||
| 2031 | #endif | ||
| 1966 | INF("cmdline", S_IRUGO, pid_cmdline), | 2032 | INF("cmdline", S_IRUGO, pid_cmdline), |
| 1967 | INF("stat", S_IRUGO, tgid_stat), | 2033 | INF("stat", S_IRUGO, tgid_stat), |
| 1968 | INF("statm", S_IRUGO, pid_statm), | 2034 | INF("statm", S_IRUGO, pid_statm), |
| @@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 2247 | INF("environ", S_IRUSR, pid_environ), | 2313 | INF("environ", S_IRUSR, pid_environ), |
| 2248 | INF("auxv", S_IRUSR, pid_auxv), | 2314 | INF("auxv", S_IRUSR, pid_auxv), |
| 2249 | INF("status", S_IRUGO, pid_status), | 2315 | INF("status", S_IRUGO, pid_status), |
| 2316 | #ifdef CONFIG_SCHED_DEBUG | ||
| 2317 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | ||
| 2318 | #endif | ||
| 2250 | INF("cmdline", S_IRUGO, pid_cmdline), | 2319 | INF("cmdline", S_IRUGO, pid_cmdline), |
| 2251 | INF("stat", S_IRUGO, tid_stat), | 2320 | INF("stat", S_IRUGO, tid_stat), |
| 2252 | INF("statm", S_IRUGO, pid_statm), | 2321 | INF("statm", S_IRUGO, pid_statm), |
diff --git a/include/asm-generic/bitops/sched.h b/include/asm-generic/bitops/sched.h index 815bb0148060..604fab7031a6 100644 --- a/include/asm-generic/bitops/sched.h +++ b/include/asm-generic/bitops/sched.h | |||
| @@ -6,28 +6,23 @@ | |||
| 6 | 6 | ||
| 7 | /* | 7 | /* |
| 8 | * Every architecture must define this function. It's the fastest | 8 | * Every architecture must define this function. It's the fastest |
| 9 | * way of searching a 140-bit bitmap where the first 100 bits are | 9 | * way of searching a 100-bit bitmap. It's guaranteed that at least |
| 10 | * unlikely to be set. It's guaranteed that at least one of the 140 | 10 | * one of the 100 bits is cleared. |
| 11 | * bits is cleared. | ||
| 12 | */ | 11 | */ |
| 13 | static inline int sched_find_first_bit(const unsigned long *b) | 12 | static inline int sched_find_first_bit(const unsigned long *b) |
| 14 | { | 13 | { |
| 15 | #if BITS_PER_LONG == 64 | 14 | #if BITS_PER_LONG == 64 |
| 16 | if (unlikely(b[0])) | 15 | if (b[0]) |
| 17 | return __ffs(b[0]); | 16 | return __ffs(b[0]); |
| 18 | if (likely(b[1])) | 17 | return __ffs(b[1]) + 64; |
| 19 | return __ffs(b[1]) + 64; | ||
| 20 | return __ffs(b[2]) + 128; | ||
| 21 | #elif BITS_PER_LONG == 32 | 18 | #elif BITS_PER_LONG == 32 |
| 22 | if (unlikely(b[0])) | 19 | if (b[0]) |
| 23 | return __ffs(b[0]); | 20 | return __ffs(b[0]); |
| 24 | if (unlikely(b[1])) | 21 | if (b[1]) |
| 25 | return __ffs(b[1]) + 32; | 22 | return __ffs(b[1]) + 32; |
| 26 | if (unlikely(b[2])) | 23 | if (b[2]) |
| 27 | return __ffs(b[2]) + 64; | 24 | return __ffs(b[2]) + 64; |
| 28 | if (b[3]) | 25 | return __ffs(b[3]) + 96; |
| 29 | return __ffs(b[3]) + 96; | ||
| 30 | return __ffs(b[4]) + 128; | ||
| 31 | #else | 26 | #else |
| 32 | #error BITS_PER_LONG not defined | 27 | #error BITS_PER_LONG not defined |
| 33 | #endif | 28 | #endif |
diff --git a/include/asm-mips/mach-au1x00/au1xxx_ide.h b/include/asm-mips/mach-au1x00/au1xxx_ide.h index 8fcae21adbd5..4663e8b415c9 100644 --- a/include/asm-mips/mach-au1x00/au1xxx_ide.h +++ b/include/asm-mips/mach-au1x00/au1xxx_ide.h | |||
| @@ -88,26 +88,26 @@ static const struct drive_list_entry dma_white_list [] = { | |||
| 88 | /* | 88 | /* |
| 89 | * Hitachi | 89 | * Hitachi |
| 90 | */ | 90 | */ |
| 91 | { "HITACHI_DK14FA-20" , "ALL" }, | 91 | { "HITACHI_DK14FA-20" , NULL }, |
| 92 | { "HTS726060M9AT00" , "ALL" }, | 92 | { "HTS726060M9AT00" , NULL }, |
| 93 | /* | 93 | /* |
| 94 | * Maxtor | 94 | * Maxtor |
| 95 | */ | 95 | */ |
| 96 | { "Maxtor 6E040L0" , "ALL" }, | 96 | { "Maxtor 6E040L0" , NULL }, |
| 97 | { "Maxtor 6Y080P0" , "ALL" }, | 97 | { "Maxtor 6Y080P0" , NULL }, |
| 98 | { "Maxtor 6Y160P0" , "ALL" }, | 98 | { "Maxtor 6Y160P0" , NULL }, |
| 99 | /* | 99 | /* |
| 100 | * Seagate | 100 | * Seagate |
| 101 | */ | 101 | */ |
| 102 | { "ST3120026A" , "ALL" }, | 102 | { "ST3120026A" , NULL }, |
| 103 | { "ST320014A" , "ALL" }, | 103 | { "ST320014A" , NULL }, |
| 104 | { "ST94011A" , "ALL" }, | 104 | { "ST94011A" , NULL }, |
| 105 | { "ST340016A" , "ALL" }, | 105 | { "ST340016A" , NULL }, |
| 106 | /* | 106 | /* |
| 107 | * Western Digital | 107 | * Western Digital |
| 108 | */ | 108 | */ |
| 109 | { "WDC WD400UE-00HCT0" , "ALL" }, | 109 | { "WDC WD400UE-00HCT0" , NULL }, |
| 110 | { "WDC WD400JB-00JJC0" , "ALL" }, | 110 | { "WDC WD400JB-00JJC0" , NULL }, |
| 111 | { NULL , NULL } | 111 | { NULL , NULL } |
| 112 | }; | 112 | }; |
| 113 | 113 | ||
| @@ -116,9 +116,9 @@ static const struct drive_list_entry dma_black_list [] = { | |||
| 116 | /* | 116 | /* |
| 117 | * Western Digital | 117 | * Western Digital |
| 118 | */ | 118 | */ |
| 119 | { "WDC WD100EB-00CGH0" , "ALL" }, | 119 | { "WDC WD100EB-00CGH0" , NULL }, |
| 120 | { "WDC WD200BB-00AUA1" , "ALL" }, | 120 | { "WDC WD200BB-00AUA1" , NULL }, |
| 121 | { "WDC AC24300L" , "ALL" }, | 121 | { "WDC AC24300L" , NULL }, |
| 122 | { NULL , NULL } | 122 | { NULL , NULL } |
| 123 | }; | 123 | }; |
| 124 | #endif | 124 | #endif |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 7803014f3a11..8d302298a161 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
| @@ -79,6 +79,19 @@ | |||
| 79 | #endif | 79 | #endif |
| 80 | 80 | ||
| 81 | #ifdef CONFIG_PREEMPT | 81 | #ifdef CONFIG_PREEMPT |
| 82 | # define PREEMPT_CHECK_OFFSET 1 | ||
| 83 | #else | ||
| 84 | # define PREEMPT_CHECK_OFFSET 0 | ||
| 85 | #endif | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Check whether we were atomic before we did preempt_disable(): | ||
| 89 | * (used by the scheduler) | ||
| 90 | */ | ||
| 91 | #define in_atomic_preempt_off() \ | ||
| 92 | ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) | ||
| 93 | |||
| 94 | #ifdef CONFIG_PREEMPT | ||
| 82 | # define preemptible() (preempt_count() == 0 && !irqs_disabled()) | 95 | # define preemptible() (preempt_count() == 0 && !irqs_disabled()) |
| 83 | # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) | 96 | # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) |
| 84 | #else | 97 | #else |
diff --git a/include/linux/ide.h b/include/linux/ide.h index 1e365acdd369..19ab25804056 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <asm/system.h> | 25 | #include <asm/system.h> |
| 26 | #include <asm/io.h> | 26 | #include <asm/io.h> |
| 27 | #include <asm/semaphore.h> | 27 | #include <asm/semaphore.h> |
| 28 | #include <asm/mutex.h> | ||
| 28 | 29 | ||
| 29 | /****************************************************************************** | 30 | /****************************************************************************** |
| 30 | * IDE driver configuration options (play with these as desired): | 31 | * IDE driver configuration options (play with these as desired): |
| @@ -685,6 +686,8 @@ typedef struct hwif_s { | |||
| 685 | u8 mwdma_mask; | 686 | u8 mwdma_mask; |
| 686 | u8 swdma_mask; | 687 | u8 swdma_mask; |
| 687 | 688 | ||
| 689 | u8 cbl; /* cable type */ | ||
| 690 | |||
| 688 | hwif_chipset_t chipset; /* sub-module for tuning.. */ | 691 | hwif_chipset_t chipset; /* sub-module for tuning.. */ |
| 689 | 692 | ||
| 690 | struct pci_dev *pci_dev; /* for pci chipsets */ | 693 | struct pci_dev *pci_dev; /* for pci chipsets */ |
| @@ -735,8 +738,8 @@ typedef struct hwif_s { | |||
| 735 | void (*ide_dma_clear_irq)(ide_drive_t *drive); | 738 | void (*ide_dma_clear_irq)(ide_drive_t *drive); |
| 736 | void (*dma_host_on)(ide_drive_t *drive); | 739 | void (*dma_host_on)(ide_drive_t *drive); |
| 737 | void (*dma_host_off)(ide_drive_t *drive); | 740 | void (*dma_host_off)(ide_drive_t *drive); |
| 738 | int (*ide_dma_lostirq)(ide_drive_t *drive); | 741 | void (*dma_lost_irq)(ide_drive_t *drive); |
| 739 | int (*ide_dma_timeout)(ide_drive_t *drive); | 742 | void (*dma_timeout)(ide_drive_t *drive); |
| 740 | 743 | ||
| 741 | void (*OUTB)(u8 addr, unsigned long port); | 744 | void (*OUTB)(u8 addr, unsigned long port); |
| 742 | void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); | 745 | void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); |
| @@ -791,7 +794,6 @@ typedef struct hwif_s { | |||
| 791 | unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ | 794 | unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ |
| 792 | unsigned reset : 1; /* reset after probe */ | 795 | unsigned reset : 1; /* reset after probe */ |
| 793 | unsigned autodma : 1; /* auto-attempt using DMA at boot */ | 796 | unsigned autodma : 1; /* auto-attempt using DMA at boot */ |
| 794 | unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ | ||
| 795 | unsigned no_lba48 : 1; /* 1 = cannot do LBA48 */ | 797 | unsigned no_lba48 : 1; /* 1 = cannot do LBA48 */ |
| 796 | unsigned no_lba48_dma : 1; /* 1 = cannot do LBA48 DMA */ | 798 | unsigned no_lba48_dma : 1; /* 1 = cannot do LBA48 DMA */ |
| 797 | unsigned auto_poll : 1; /* supports nop auto-poll */ | 799 | unsigned auto_poll : 1; /* supports nop auto-poll */ |
| @@ -863,7 +865,7 @@ typedef struct hwgroup_s { | |||
| 863 | 865 | ||
| 864 | typedef struct ide_driver_s ide_driver_t; | 866 | typedef struct ide_driver_s ide_driver_t; |
| 865 | 867 | ||
| 866 | extern struct semaphore ide_setting_sem; | 868 | extern struct mutex ide_setting_mtx; |
| 867 | 869 | ||
| 868 | int set_io_32bit(ide_drive_t *, int); | 870 | int set_io_32bit(ide_drive_t *, int); |
| 869 | int set_pio_mode(ide_drive_t *, int); | 871 | int set_pio_mode(ide_drive_t *, int); |
| @@ -1304,8 +1306,8 @@ extern int __ide_dma_check(ide_drive_t *); | |||
| 1304 | extern int ide_dma_setup(ide_drive_t *); | 1306 | extern int ide_dma_setup(ide_drive_t *); |
| 1305 | extern void ide_dma_start(ide_drive_t *); | 1307 | extern void ide_dma_start(ide_drive_t *); |
| 1306 | extern int __ide_dma_end(ide_drive_t *); | 1308 | extern int __ide_dma_end(ide_drive_t *); |
| 1307 | extern int __ide_dma_lostirq(ide_drive_t *); | 1309 | extern void ide_dma_lost_irq(ide_drive_t *); |
| 1308 | extern int __ide_dma_timeout(ide_drive_t *); | 1310 | extern void ide_dma_timeout(ide_drive_t *); |
| 1309 | #endif /* CONFIG_BLK_DEV_IDEDMA_PCI */ | 1311 | #endif /* CONFIG_BLK_DEV_IDEDMA_PCI */ |
| 1310 | 1312 | ||
| 1311 | #else | 1313 | #else |
| @@ -1382,11 +1384,11 @@ extern const ide_pio_timings_t ide_pio_timings[6]; | |||
| 1382 | 1384 | ||
| 1383 | 1385 | ||
| 1384 | extern spinlock_t ide_lock; | 1386 | extern spinlock_t ide_lock; |
| 1385 | extern struct semaphore ide_cfg_sem; | 1387 | extern struct mutex ide_cfg_mtx; |
| 1386 | /* | 1388 | /* |
| 1387 | * Structure locking: | 1389 | * Structure locking: |
| 1388 | * | 1390 | * |
| 1389 | * ide_cfg_sem and ide_lock together protect changes to | 1391 | * ide_cfg_mtx and ide_lock together protect changes to |
| 1390 | * ide_hwif_t->{next,hwgroup} | 1392 | * ide_hwif_t->{next,hwgroup} |
| 1391 | * ide_drive_t->next | 1393 | * ide_drive_t->next |
| 1392 | * | 1394 | * |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 693f0e6c54d4..cfb680585ab8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | #define SCHED_FIFO 1 | 34 | #define SCHED_FIFO 1 |
| 35 | #define SCHED_RR 2 | 35 | #define SCHED_RR 2 |
| 36 | #define SCHED_BATCH 3 | 36 | #define SCHED_BATCH 3 |
| 37 | /* SCHED_ISO: reserved but not implemented yet */ | ||
| 38 | #define SCHED_IDLE 5 | ||
| 37 | 39 | ||
| 38 | #ifdef __KERNEL__ | 40 | #ifdef __KERNEL__ |
| 39 | 41 | ||
| @@ -130,6 +132,26 @@ extern unsigned long nr_active(void); | |||
| 130 | extern unsigned long nr_iowait(void); | 132 | extern unsigned long nr_iowait(void); |
| 131 | extern unsigned long weighted_cpuload(const int cpu); | 133 | extern unsigned long weighted_cpuload(const int cpu); |
| 132 | 134 | ||
| 135 | struct seq_file; | ||
| 136 | struct cfs_rq; | ||
| 137 | #ifdef CONFIG_SCHED_DEBUG | ||
| 138 | extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); | ||
| 139 | extern void proc_sched_set_task(struct task_struct *p); | ||
| 140 | extern void | ||
| 141 | print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now); | ||
| 142 | #else | ||
| 143 | static inline void | ||
| 144 | proc_sched_show_task(struct task_struct *p, struct seq_file *m) | ||
| 145 | { | ||
| 146 | } | ||
| 147 | static inline void proc_sched_set_task(struct task_struct *p) | ||
| 148 | { | ||
| 149 | } | ||
| 150 | static inline void | ||
| 151 | print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) | ||
| 152 | { | ||
| 153 | } | ||
| 154 | #endif | ||
| 133 | 155 | ||
| 134 | /* | 156 | /* |
| 135 | * Task state bitmask. NOTE! These bits are also | 157 | * Task state bitmask. NOTE! These bits are also |
| @@ -193,6 +215,7 @@ struct task_struct; | |||
| 193 | extern void sched_init(void); | 215 | extern void sched_init(void); |
| 194 | extern void sched_init_smp(void); | 216 | extern void sched_init_smp(void); |
| 195 | extern void init_idle(struct task_struct *idle, int cpu); | 217 | extern void init_idle(struct task_struct *idle, int cpu); |
| 218 | extern void init_idle_bootup_task(struct task_struct *idle); | ||
| 196 | 219 | ||
| 197 | extern cpumask_t nohz_cpu_mask; | 220 | extern cpumask_t nohz_cpu_mask; |
| 198 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) | 221 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) |
| @@ -479,7 +502,7 @@ struct signal_struct { | |||
| 479 | * from jiffies_to_ns(utime + stime) if sched_clock uses something | 502 | * from jiffies_to_ns(utime + stime) if sched_clock uses something |
| 480 | * other than jiffies.) | 503 | * other than jiffies.) |
| 481 | */ | 504 | */ |
| 482 | unsigned long long sched_time; | 505 | unsigned long long sum_sched_runtime; |
| 483 | 506 | ||
| 484 | /* | 507 | /* |
| 485 | * We don't bother to synchronize most readers of this at all, | 508 | * We don't bother to synchronize most readers of this at all, |
| @@ -521,31 +544,6 @@ struct signal_struct { | |||
| 521 | #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ | 544 | #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ |
| 522 | #define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ | 545 | #define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ |
| 523 | 546 | ||
| 524 | |||
| 525 | /* | ||
| 526 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT | ||
| 527 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH | ||
| 528 | * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority | ||
| 529 | * values are inverted: lower p->prio value means higher priority. | ||
| 530 | * | ||
| 531 | * The MAX_USER_RT_PRIO value allows the actual maximum | ||
| 532 | * RT priority to be separate from the value exported to | ||
| 533 | * user-space. This allows kernel threads to set their | ||
| 534 | * priority to a value higher than any user task. Note: | ||
| 535 | * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. | ||
| 536 | */ | ||
| 537 | |||
| 538 | #define MAX_USER_RT_PRIO 100 | ||
| 539 | #define MAX_RT_PRIO MAX_USER_RT_PRIO | ||
| 540 | |||
| 541 | #define MAX_PRIO (MAX_RT_PRIO + 40) | ||
| 542 | |||
| 543 | #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) | ||
| 544 | #define rt_task(p) rt_prio((p)->prio) | ||
| 545 | #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) | ||
| 546 | #define is_rt_policy(p) ((p) != SCHED_NORMAL && (p) != SCHED_BATCH) | ||
| 547 | #define has_rt_policy(p) unlikely(is_rt_policy((p)->policy)) | ||
| 548 | |||
| 549 | /* | 547 | /* |
| 550 | * Some day this will be a full-fledged user tracking system.. | 548 | * Some day this will be a full-fledged user tracking system.. |
| 551 | */ | 549 | */ |
| @@ -583,13 +581,13 @@ struct reclaim_state; | |||
| 583 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 581 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 584 | struct sched_info { | 582 | struct sched_info { |
| 585 | /* cumulative counters */ | 583 | /* cumulative counters */ |
| 586 | unsigned long cpu_time, /* time spent on the cpu */ | 584 | unsigned long pcnt; /* # of times run on this cpu */ |
| 587 | run_delay, /* time spent waiting on a runqueue */ | 585 | unsigned long long cpu_time, /* time spent on the cpu */ |
| 588 | pcnt; /* # of timeslices run on this cpu */ | 586 | run_delay; /* time spent waiting on a runqueue */ |
| 589 | 587 | ||
| 590 | /* timestamps */ | 588 | /* timestamps */ |
| 591 | unsigned long last_arrival, /* when we last ran on a cpu */ | 589 | unsigned long long last_arrival,/* when we last ran on a cpu */ |
| 592 | last_queued; /* when we were last queued to run */ | 590 | last_queued; /* when we were last queued to run */ |
| 593 | }; | 591 | }; |
| 594 | #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ | 592 | #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ |
| 595 | 593 | ||
| @@ -639,18 +637,24 @@ static inline int sched_info_on(void) | |||
| 639 | #endif | 637 | #endif |
| 640 | } | 638 | } |
| 641 | 639 | ||
| 642 | enum idle_type | 640 | enum cpu_idle_type { |
| 643 | { | 641 | CPU_IDLE, |
| 644 | SCHED_IDLE, | 642 | CPU_NOT_IDLE, |
| 645 | NOT_IDLE, | 643 | CPU_NEWLY_IDLE, |
| 646 | NEWLY_IDLE, | 644 | CPU_MAX_IDLE_TYPES |
| 647 | MAX_IDLE_TYPES | ||
| 648 | }; | 645 | }; |
| 649 | 646 | ||
| 650 | /* | 647 | /* |
| 651 | * sched-domains (multiprocessor balancing) declarations: | 648 | * sched-domains (multiprocessor balancing) declarations: |
| 652 | */ | 649 | */ |
| 653 | #define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ | 650 | |
| 651 | /* | ||
| 652 | * Increase resolution of nice-level calculations: | ||
| 653 | */ | ||
| 654 | #define SCHED_LOAD_SHIFT 10 | ||
| 655 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) | ||
| 656 | |||
| 657 | #define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5) | ||
| 654 | 658 | ||
| 655 | #ifdef CONFIG_SMP | 659 | #ifdef CONFIG_SMP |
| 656 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ | 660 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ |
| @@ -719,14 +723,14 @@ struct sched_domain { | |||
| 719 | 723 | ||
| 720 | #ifdef CONFIG_SCHEDSTATS | 724 | #ifdef CONFIG_SCHEDSTATS |
| 721 | /* load_balance() stats */ | 725 | /* load_balance() stats */ |
| 722 | unsigned long lb_cnt[MAX_IDLE_TYPES]; | 726 | unsigned long lb_cnt[CPU_MAX_IDLE_TYPES]; |
| 723 | unsigned long lb_failed[MAX_IDLE_TYPES]; | 727 | unsigned long lb_failed[CPU_MAX_IDLE_TYPES]; |
| 724 | unsigned long lb_balanced[MAX_IDLE_TYPES]; | 728 | unsigned long lb_balanced[CPU_MAX_IDLE_TYPES]; |
| 725 | unsigned long lb_imbalance[MAX_IDLE_TYPES]; | 729 | unsigned long lb_imbalance[CPU_MAX_IDLE_TYPES]; |
| 726 | unsigned long lb_gained[MAX_IDLE_TYPES]; | 730 | unsigned long lb_gained[CPU_MAX_IDLE_TYPES]; |
| 727 | unsigned long lb_hot_gained[MAX_IDLE_TYPES]; | 731 | unsigned long lb_hot_gained[CPU_MAX_IDLE_TYPES]; |
| 728 | unsigned long lb_nobusyg[MAX_IDLE_TYPES]; | 732 | unsigned long lb_nobusyg[CPU_MAX_IDLE_TYPES]; |
| 729 | unsigned long lb_nobusyq[MAX_IDLE_TYPES]; | 733 | unsigned long lb_nobusyq[CPU_MAX_IDLE_TYPES]; |
| 730 | 734 | ||
| 731 | /* Active load balancing */ | 735 | /* Active load balancing */ |
| 732 | unsigned long alb_cnt; | 736 | unsigned long alb_cnt; |
| @@ -753,12 +757,6 @@ struct sched_domain { | |||
| 753 | extern int partition_sched_domains(cpumask_t *partition1, | 757 | extern int partition_sched_domains(cpumask_t *partition1, |
| 754 | cpumask_t *partition2); | 758 | cpumask_t *partition2); |
| 755 | 759 | ||
| 756 | /* | ||
| 757 | * Maximum cache size the migration-costs auto-tuning code will | ||
| 758 | * search from: | ||
| 759 | */ | ||
| 760 | extern unsigned int max_cache_size; | ||
| 761 | |||
| 762 | #endif /* CONFIG_SMP */ | 760 | #endif /* CONFIG_SMP */ |
| 763 | 761 | ||
| 764 | 762 | ||
| @@ -809,14 +807,86 @@ struct mempolicy; | |||
| 809 | struct pipe_inode_info; | 807 | struct pipe_inode_info; |
| 810 | struct uts_namespace; | 808 | struct uts_namespace; |
| 811 | 809 | ||
| 812 | enum sleep_type { | 810 | struct rq; |
| 813 | SLEEP_NORMAL, | 811 | struct sched_domain; |
| 814 | SLEEP_NONINTERACTIVE, | 812 | |
| 815 | SLEEP_INTERACTIVE, | 813 | struct sched_class { |
| 816 | SLEEP_INTERRUPTED, | 814 | struct sched_class *next; |
| 815 | |||
| 816 | void (*enqueue_task) (struct rq *rq, struct task_struct *p, | ||
| 817 | int wakeup, u64 now); | ||
| 818 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, | ||
| 819 | int sleep, u64 now); | ||
| 820 | void (*yield_task) (struct rq *rq, struct task_struct *p); | ||
| 821 | |||
| 822 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); | ||
| 823 | |||
| 824 | struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); | ||
| 825 | void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); | ||
| 826 | |||
| 827 | int (*load_balance) (struct rq *this_rq, int this_cpu, | ||
| 828 | struct rq *busiest, | ||
| 829 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 830 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 831 | int *all_pinned, unsigned long *total_load_moved); | ||
| 832 | |||
| 833 | void (*set_curr_task) (struct rq *rq); | ||
| 834 | void (*task_tick) (struct rq *rq, struct task_struct *p); | ||
| 835 | void (*task_new) (struct rq *rq, struct task_struct *p); | ||
| 817 | }; | 836 | }; |
| 818 | 837 | ||
| 819 | struct prio_array; | 838 | struct load_weight { |
| 839 | unsigned long weight, inv_weight; | ||
| 840 | }; | ||
| 841 | |||
| 842 | /* | ||
| 843 | * CFS stats for a schedulable entity (task, task-group etc) | ||
| 844 | * | ||
| 845 | * Current field usage histogram: | ||
| 846 | * | ||
| 847 | * 4 se->block_start | ||
| 848 | * 4 se->run_node | ||
| 849 | * 4 se->sleep_start | ||
| 850 | * 4 se->sleep_start_fair | ||
| 851 | * 6 se->load.weight | ||
| 852 | * 7 se->delta_fair | ||
| 853 | * 15 se->wait_runtime | ||
| 854 | */ | ||
| 855 | struct sched_entity { | ||
| 856 | long wait_runtime; | ||
| 857 | unsigned long delta_fair_run; | ||
| 858 | unsigned long delta_fair_sleep; | ||
| 859 | unsigned long delta_exec; | ||
| 860 | s64 fair_key; | ||
| 861 | struct load_weight load; /* for load-balancing */ | ||
| 862 | struct rb_node run_node; | ||
| 863 | unsigned int on_rq; | ||
| 864 | |||
| 865 | u64 wait_start_fair; | ||
| 866 | u64 wait_start; | ||
| 867 | u64 exec_start; | ||
| 868 | u64 sleep_start; | ||
| 869 | u64 sleep_start_fair; | ||
| 870 | u64 block_start; | ||
| 871 | u64 sleep_max; | ||
| 872 | u64 block_max; | ||
| 873 | u64 exec_max; | ||
| 874 | u64 wait_max; | ||
| 875 | u64 last_ran; | ||
| 876 | |||
| 877 | u64 sum_exec_runtime; | ||
| 878 | s64 sum_wait_runtime; | ||
| 879 | s64 sum_sleep_runtime; | ||
| 880 | unsigned long wait_runtime_overruns; | ||
| 881 | unsigned long wait_runtime_underruns; | ||
| 882 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 883 | struct sched_entity *parent; | ||
| 884 | /* rq on which this entity is (to be) queued: */ | ||
| 885 | struct cfs_rq *cfs_rq; | ||
| 886 | /* rq "owned" by this entity/group: */ | ||
| 887 | struct cfs_rq *my_q; | ||
| 888 | #endif | ||
| 889 | }; | ||
| 820 | 890 | ||
| 821 | struct task_struct { | 891 | struct task_struct { |
| 822 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 892 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
| @@ -832,23 +902,20 @@ struct task_struct { | |||
| 832 | int oncpu; | 902 | int oncpu; |
| 833 | #endif | 903 | #endif |
| 834 | #endif | 904 | #endif |
| 835 | int load_weight; /* for niceness load balancing purposes */ | 905 | |
| 836 | int prio, static_prio, normal_prio; | 906 | int prio, static_prio, normal_prio; |
| 837 | struct list_head run_list; | 907 | struct list_head run_list; |
| 838 | struct prio_array *array; | 908 | struct sched_class *sched_class; |
| 909 | struct sched_entity se; | ||
| 839 | 910 | ||
| 840 | unsigned short ioprio; | 911 | unsigned short ioprio; |
| 841 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 912 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
| 842 | unsigned int btrace_seq; | 913 | unsigned int btrace_seq; |
| 843 | #endif | 914 | #endif |
| 844 | unsigned long sleep_avg; | ||
| 845 | unsigned long long timestamp, last_ran; | ||
| 846 | unsigned long long sched_time; /* sched_clock time spent running */ | ||
| 847 | enum sleep_type sleep_type; | ||
| 848 | 915 | ||
| 849 | unsigned int policy; | 916 | unsigned int policy; |
| 850 | cpumask_t cpus_allowed; | 917 | cpumask_t cpus_allowed; |
| 851 | unsigned int time_slice, first_time_slice; | 918 | unsigned int time_slice; |
| 852 | 919 | ||
| 853 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 920 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 854 | struct sched_info sched_info; | 921 | struct sched_info sched_info; |
| @@ -1078,6 +1145,37 @@ struct task_struct { | |||
| 1078 | #endif | 1145 | #endif |
| 1079 | }; | 1146 | }; |
| 1080 | 1147 | ||
| 1148 | /* | ||
| 1149 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT | ||
| 1150 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH | ||
| 1151 | * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority | ||
| 1152 | * values are inverted: lower p->prio value means higher priority. | ||
| 1153 | * | ||
| 1154 | * The MAX_USER_RT_PRIO value allows the actual maximum | ||
| 1155 | * RT priority to be separate from the value exported to | ||
| 1156 | * user-space. This allows kernel threads to set their | ||
| 1157 | * priority to a value higher than any user task. Note: | ||
| 1158 | * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. | ||
| 1159 | */ | ||
| 1160 | |||
| 1161 | #define MAX_USER_RT_PRIO 100 | ||
| 1162 | #define MAX_RT_PRIO MAX_USER_RT_PRIO | ||
| 1163 | |||
| 1164 | #define MAX_PRIO (MAX_RT_PRIO + 40) | ||
| 1165 | #define DEFAULT_PRIO (MAX_RT_PRIO + 20) | ||
| 1166 | |||
| 1167 | static inline int rt_prio(int prio) | ||
| 1168 | { | ||
| 1169 | if (unlikely(prio < MAX_RT_PRIO)) | ||
| 1170 | return 1; | ||
| 1171 | return 0; | ||
| 1172 | } | ||
| 1173 | |||
| 1174 | static inline int rt_task(struct task_struct *p) | ||
| 1175 | { | ||
| 1176 | return rt_prio(p->prio); | ||
| 1177 | } | ||
| 1178 | |||
| 1081 | static inline pid_t process_group(struct task_struct *tsk) | 1179 | static inline pid_t process_group(struct task_struct *tsk) |
| 1082 | { | 1180 | { |
| 1083 | return tsk->signal->pgrp; | 1181 | return tsk->signal->pgrp; |
| @@ -1223,7 +1321,7 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) | |||
| 1223 | 1321 | ||
| 1224 | extern unsigned long long sched_clock(void); | 1322 | extern unsigned long long sched_clock(void); |
| 1225 | extern unsigned long long | 1323 | extern unsigned long long |
| 1226 | current_sched_time(const struct task_struct *current_task); | 1324 | task_sched_runtime(struct task_struct *task); |
| 1227 | 1325 | ||
| 1228 | /* sched_exec is called by processes performing an exec */ | 1326 | /* sched_exec is called by processes performing an exec */ |
| 1229 | #ifdef CONFIG_SMP | 1327 | #ifdef CONFIG_SMP |
| @@ -1232,6 +1330,8 @@ extern void sched_exec(void); | |||
| 1232 | #define sched_exec() {} | 1330 | #define sched_exec() {} |
| 1233 | #endif | 1331 | #endif |
| 1234 | 1332 | ||
| 1333 | extern void sched_clock_unstable_event(void); | ||
| 1334 | |||
| 1235 | #ifdef CONFIG_HOTPLUG_CPU | 1335 | #ifdef CONFIG_HOTPLUG_CPU |
| 1236 | extern void idle_task_exit(void); | 1336 | extern void idle_task_exit(void); |
| 1237 | #else | 1337 | #else |
| @@ -1240,6 +1340,14 @@ static inline void idle_task_exit(void) {} | |||
| 1240 | 1340 | ||
| 1241 | extern void sched_idle_next(void); | 1341 | extern void sched_idle_next(void); |
| 1242 | 1342 | ||
| 1343 | extern unsigned int sysctl_sched_granularity; | ||
| 1344 | extern unsigned int sysctl_sched_wakeup_granularity; | ||
| 1345 | extern unsigned int sysctl_sched_batch_wakeup_granularity; | ||
| 1346 | extern unsigned int sysctl_sched_stat_granularity; | ||
| 1347 | extern unsigned int sysctl_sched_runtime_limit; | ||
| 1348 | extern unsigned int sysctl_sched_child_runs_first; | ||
| 1349 | extern unsigned int sysctl_sched_features; | ||
| 1350 | |||
| 1243 | #ifdef CONFIG_RT_MUTEXES | 1351 | #ifdef CONFIG_RT_MUTEXES |
| 1244 | extern int rt_mutex_getprio(struct task_struct *p); | 1352 | extern int rt_mutex_getprio(struct task_struct *p); |
| 1245 | extern void rt_mutex_setprio(struct task_struct *p, int prio); | 1353 | extern void rt_mutex_setprio(struct task_struct *p, int prio); |
| @@ -1317,8 +1425,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, | |||
| 1317 | #else | 1425 | #else |
| 1318 | static inline void kick_process(struct task_struct *tsk) { } | 1426 | static inline void kick_process(struct task_struct *tsk) { } |
| 1319 | #endif | 1427 | #endif |
| 1320 | extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags)); | 1428 | extern void sched_fork(struct task_struct *p, int clone_flags); |
| 1321 | extern void FASTCALL(sched_exit(struct task_struct * p)); | 1429 | extern void sched_dead(struct task_struct *p); |
| 1322 | 1430 | ||
| 1323 | extern int in_group_p(gid_t); | 1431 | extern int in_group_p(gid_t); |
| 1324 | extern int in_egroup_p(gid_t); | 1432 | extern int in_egroup_p(gid_t); |
| @@ -1406,7 +1514,7 @@ extern struct mm_struct * mm_alloc(void); | |||
| 1406 | extern void FASTCALL(__mmdrop(struct mm_struct *)); | 1514 | extern void FASTCALL(__mmdrop(struct mm_struct *)); |
| 1407 | static inline void mmdrop(struct mm_struct * mm) | 1515 | static inline void mmdrop(struct mm_struct * mm) |
| 1408 | { | 1516 | { |
| 1409 | if (atomic_dec_and_test(&mm->mm_count)) | 1517 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) |
| 1410 | __mmdrop(mm); | 1518 | __mmdrop(mm); |
| 1411 | } | 1519 | } |
| 1412 | 1520 | ||
| @@ -1638,10 +1746,7 @@ static inline unsigned int task_cpu(const struct task_struct *p) | |||
| 1638 | return task_thread_info(p)->cpu; | 1746 | return task_thread_info(p)->cpu; |
| 1639 | } | 1747 | } |
| 1640 | 1748 | ||
| 1641 | static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) | 1749 | extern void set_task_cpu(struct task_struct *p, unsigned int cpu); |
| 1642 | { | ||
| 1643 | task_thread_info(p)->cpu = cpu; | ||
| 1644 | } | ||
| 1645 | 1750 | ||
| 1646 | #else | 1751 | #else |
| 1647 | 1752 | ||
diff --git a/include/linux/topology.h b/include/linux/topology.h index a9d1f049cc15..da6c39b2d051 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -98,7 +98,7 @@ | |||
| 98 | .cache_nice_tries = 0, \ | 98 | .cache_nice_tries = 0, \ |
| 99 | .busy_idx = 0, \ | 99 | .busy_idx = 0, \ |
| 100 | .idle_idx = 0, \ | 100 | .idle_idx = 0, \ |
| 101 | .newidle_idx = 1, \ | 101 | .newidle_idx = 0, \ |
| 102 | .wake_idx = 0, \ | 102 | .wake_idx = 0, \ |
| 103 | .forkexec_idx = 0, \ | 103 | .forkexec_idx = 0, \ |
| 104 | .flags = SD_LOAD_BALANCE \ | 104 | .flags = SD_LOAD_BALANCE \ |
| @@ -128,14 +128,15 @@ | |||
| 128 | .imbalance_pct = 125, \ | 128 | .imbalance_pct = 125, \ |
| 129 | .cache_nice_tries = 1, \ | 129 | .cache_nice_tries = 1, \ |
| 130 | .busy_idx = 2, \ | 130 | .busy_idx = 2, \ |
| 131 | .idle_idx = 1, \ | 131 | .idle_idx = 0, \ |
| 132 | .newidle_idx = 2, \ | 132 | .newidle_idx = 0, \ |
| 133 | .wake_idx = 1, \ | 133 | .wake_idx = 1, \ |
| 134 | .forkexec_idx = 1, \ | 134 | .forkexec_idx = 1, \ |
| 135 | .flags = SD_LOAD_BALANCE \ | 135 | .flags = SD_LOAD_BALANCE \ |
| 136 | | SD_BALANCE_NEWIDLE \ | 136 | | SD_BALANCE_NEWIDLE \ |
| 137 | | SD_BALANCE_EXEC \ | 137 | | SD_BALANCE_EXEC \ |
| 138 | | SD_WAKE_AFFINE \ | 138 | | SD_WAKE_AFFINE \ |
| 139 | | SD_WAKE_IDLE \ | ||
| 139 | | SD_SHARE_PKG_RESOURCES\ | 140 | | SD_SHARE_PKG_RESOURCES\ |
| 140 | | BALANCE_FOR_MC_POWER, \ | 141 | | BALANCE_FOR_MC_POWER, \ |
| 141 | .last_balance = jiffies, \ | 142 | .last_balance = jiffies, \ |
| @@ -158,14 +159,15 @@ | |||
| 158 | .imbalance_pct = 125, \ | 159 | .imbalance_pct = 125, \ |
| 159 | .cache_nice_tries = 1, \ | 160 | .cache_nice_tries = 1, \ |
| 160 | .busy_idx = 2, \ | 161 | .busy_idx = 2, \ |
| 161 | .idle_idx = 1, \ | 162 | .idle_idx = 0, \ |
| 162 | .newidle_idx = 2, \ | 163 | .newidle_idx = 0, \ |
| 163 | .wake_idx = 1, \ | 164 | .wake_idx = 1, \ |
| 164 | .forkexec_idx = 1, \ | 165 | .forkexec_idx = 1, \ |
| 165 | .flags = SD_LOAD_BALANCE \ | 166 | .flags = SD_LOAD_BALANCE \ |
| 166 | | SD_BALANCE_NEWIDLE \ | 167 | | SD_BALANCE_NEWIDLE \ |
| 167 | | SD_BALANCE_EXEC \ | 168 | | SD_BALANCE_EXEC \ |
| 168 | | SD_WAKE_AFFINE \ | 169 | | SD_WAKE_AFFINE \ |
| 170 | | SD_WAKE_IDLE \ | ||
| 169 | | BALANCE_FOR_PKG_POWER,\ | 171 | | BALANCE_FOR_PKG_POWER,\ |
| 170 | .last_balance = jiffies, \ | 172 | .last_balance = jiffies, \ |
| 171 | .balance_interval = 1, \ | 173 | .balance_interval = 1, \ |
diff --git a/include/linux/wait.h b/include/linux/wait.h index e820d00e1383..0e686280450b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
| @@ -366,15 +366,15 @@ static inline void remove_wait_queue_locked(wait_queue_head_t *q, | |||
| 366 | 366 | ||
| 367 | /* | 367 | /* |
| 368 | * These are the old interfaces to sleep waiting for an event. | 368 | * These are the old interfaces to sleep waiting for an event. |
| 369 | * They are racy. DO NOT use them, use the wait_event* interfaces above. | 369 | * They are racy. DO NOT use them, use the wait_event* interfaces above. |
| 370 | * We plan to remove these interfaces during 2.7. | 370 | * We plan to remove these interfaces. |
| 371 | */ | 371 | */ |
| 372 | extern void FASTCALL(sleep_on(wait_queue_head_t *q)); | 372 | extern void sleep_on(wait_queue_head_t *q); |
| 373 | extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, | 373 | extern long sleep_on_timeout(wait_queue_head_t *q, |
| 374 | signed long timeout)); | 374 | signed long timeout); |
| 375 | extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q)); | 375 | extern void interruptible_sleep_on(wait_queue_head_t *q); |
| 376 | extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q, | 376 | extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, |
| 377 | signed long timeout)); | 377 | signed long timeout); |
| 378 | 378 | ||
| 379 | /* | 379 | /* |
| 380 | * Waitqueues which are removed from the waitqueue_head at wakeup time | 380 | * Waitqueues which are removed from the waitqueue_head at wakeup time |
diff --git a/init/main.c b/init/main.c index eb8bdbae4fc7..0eb1c7463fe4 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -436,15 +436,16 @@ static void noinline __init_refok rest_init(void) | |||
| 436 | 436 | ||
| 437 | /* | 437 | /* |
| 438 | * The boot idle thread must execute schedule() | 438 | * The boot idle thread must execute schedule() |
| 439 | * at least one to get things moving: | 439 | * at least once to get things moving: |
| 440 | */ | 440 | */ |
| 441 | init_idle_bootup_task(current); | ||
| 441 | preempt_enable_no_resched(); | 442 | preempt_enable_no_resched(); |
| 442 | schedule(); | 443 | schedule(); |
| 443 | preempt_disable(); | 444 | preempt_disable(); |
| 444 | 445 | ||
| 445 | /* Call into cpu_idle with preempt disabled */ | 446 | /* Call into cpu_idle with preempt disabled */ |
| 446 | cpu_idle(); | 447 | cpu_idle(); |
| 447 | } | 448 | } |
| 448 | 449 | ||
| 449 | /* Check for early params. */ | 450 | /* Check for early params. */ |
| 450 | static int __init do_early_param(char *param, char *val) | 451 | static int __init do_early_param(char *param, char *val) |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index c0148ae992c4..81e697829633 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
| @@ -99,9 +99,10 @@ void __delayacct_blkio_end(void) | |||
| 99 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | 99 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) |
| 100 | { | 100 | { |
| 101 | s64 tmp; | 101 | s64 tmp; |
| 102 | struct timespec ts; | 102 | unsigned long t1; |
| 103 | unsigned long t1,t2,t3; | 103 | unsigned long long t2, t3; |
| 104 | unsigned long flags; | 104 | unsigned long flags; |
| 105 | struct timespec ts; | ||
| 105 | 106 | ||
| 106 | /* Though tsk->delays accessed later, early exit avoids | 107 | /* Though tsk->delays accessed later, early exit avoids |
| 107 | * unnecessary returning of other data | 108 | * unnecessary returning of other data |
| @@ -124,11 +125,10 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | |||
| 124 | 125 | ||
| 125 | d->cpu_count += t1; | 126 | d->cpu_count += t1; |
| 126 | 127 | ||
| 127 | jiffies_to_timespec(t2, &ts); | 128 | tmp = (s64)d->cpu_delay_total + t2; |
| 128 | tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); | ||
| 129 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; | 129 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; |
| 130 | 130 | ||
| 131 | tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; | 131 | tmp = (s64)d->cpu_run_virtual_total + t3; |
| 132 | d->cpu_run_virtual_total = | 132 | d->cpu_run_virtual_total = |
| 133 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; | 133 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; |
| 134 | 134 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 5c8ecbaa19a5..ca6a11b73023 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -122,9 +122,9 @@ static void __exit_signal(struct task_struct *tsk) | |||
| 122 | sig->maj_flt += tsk->maj_flt; | 122 | sig->maj_flt += tsk->maj_flt; |
| 123 | sig->nvcsw += tsk->nvcsw; | 123 | sig->nvcsw += tsk->nvcsw; |
| 124 | sig->nivcsw += tsk->nivcsw; | 124 | sig->nivcsw += tsk->nivcsw; |
| 125 | sig->sched_time += tsk->sched_time; | ||
| 126 | sig->inblock += task_io_get_inblock(tsk); | 125 | sig->inblock += task_io_get_inblock(tsk); |
| 127 | sig->oublock += task_io_get_oublock(tsk); | 126 | sig->oublock += task_io_get_oublock(tsk); |
| 127 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | ||
| 128 | sig = NULL; /* Marker for below. */ | 128 | sig = NULL; /* Marker for below. */ |
| 129 | } | 129 | } |
| 130 | 130 | ||
| @@ -182,7 +182,6 @@ repeat: | |||
| 182 | zap_leader = (leader->exit_signal == -1); | 182 | zap_leader = (leader->exit_signal == -1); |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | sched_exit(p); | ||
| 186 | write_unlock_irq(&tasklist_lock); | 185 | write_unlock_irq(&tasklist_lock); |
| 187 | proc_flush_task(p); | 186 | proc_flush_task(p); |
| 188 | release_thread(p); | 187 | release_thread(p); |
| @@ -291,7 +290,7 @@ static void reparent_to_kthreadd(void) | |||
| 291 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 290 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
| 292 | current->exit_signal = SIGCHLD; | 291 | current->exit_signal = SIGCHLD; |
| 293 | 292 | ||
| 294 | if (!has_rt_policy(current) && (task_nice(current) < 0)) | 293 | if (task_nice(current) < 0) |
| 295 | set_user_nice(current, 0); | 294 | set_user_nice(current, 0); |
| 296 | /* cpus_allowed? */ | 295 | /* cpus_allowed? */ |
| 297 | /* rt_priority? */ | 296 | /* rt_priority? */ |
diff --git a/kernel/fork.c b/kernel/fork.c index 73ad5cda1bcd..da3a155bba0d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -877,7 +877,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
| 877 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 877 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
| 878 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 878 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
| 879 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 879 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
| 880 | sig->sched_time = 0; | 880 | sig->sum_sched_runtime = 0; |
| 881 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 881 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
| 882 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 882 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
| 883 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | 883 | INIT_LIST_HEAD(&sig->cpu_timers[2]); |
| @@ -1040,7 +1040,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1040 | 1040 | ||
| 1041 | p->utime = cputime_zero; | 1041 | p->utime = cputime_zero; |
| 1042 | p->stime = cputime_zero; | 1042 | p->stime = cputime_zero; |
| 1043 | p->sched_time = 0; | 1043 | |
| 1044 | #ifdef CONFIG_TASK_XACCT | 1044 | #ifdef CONFIG_TASK_XACCT |
| 1045 | p->rchar = 0; /* I/O counter: bytes read */ | 1045 | p->rchar = 0; /* I/O counter: bytes read */ |
| 1046 | p->wchar = 0; /* I/O counter: bytes written */ | 1046 | p->wchar = 0; /* I/O counter: bytes written */ |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 1de710e18373..b53c8fcd9d82 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struct task_struct *p) | |||
| 161 | } | 161 | } |
| 162 | static inline unsigned long long sched_ns(struct task_struct *p) | 162 | static inline unsigned long long sched_ns(struct task_struct *p) |
| 163 | { | 163 | { |
| 164 | return (p == current) ? current_sched_time(p) : p->sched_time; | 164 | return task_sched_runtime(p); |
| 165 | } | 165 | } |
| 166 | 166 | ||
| 167 | int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) | 167 | int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) |
| @@ -246,10 +246,10 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, | |||
| 246 | } while (t != p); | 246 | } while (t != p); |
| 247 | break; | 247 | break; |
| 248 | case CPUCLOCK_SCHED: | 248 | case CPUCLOCK_SCHED: |
| 249 | cpu->sched = p->signal->sched_time; | 249 | cpu->sched = p->signal->sum_sched_runtime; |
| 250 | /* Add in each other live thread. */ | 250 | /* Add in each other live thread. */ |
| 251 | while ((t = next_thread(t)) != p) { | 251 | while ((t = next_thread(t)) != p) { |
| 252 | cpu->sched += t->sched_time; | 252 | cpu->sched += t->se.sum_exec_runtime; |
| 253 | } | 253 | } |
| 254 | cpu->sched += sched_ns(p); | 254 | cpu->sched += sched_ns(p); |
| 255 | break; | 255 | break; |
| @@ -422,7 +422,7 @@ int posix_cpu_timer_del(struct k_itimer *timer) | |||
| 422 | */ | 422 | */ |
| 423 | static void cleanup_timers(struct list_head *head, | 423 | static void cleanup_timers(struct list_head *head, |
| 424 | cputime_t utime, cputime_t stime, | 424 | cputime_t utime, cputime_t stime, |
| 425 | unsigned long long sched_time) | 425 | unsigned long long sum_exec_runtime) |
| 426 | { | 426 | { |
| 427 | struct cpu_timer_list *timer, *next; | 427 | struct cpu_timer_list *timer, *next; |
| 428 | cputime_t ptime = cputime_add(utime, stime); | 428 | cputime_t ptime = cputime_add(utime, stime); |
| @@ -451,10 +451,10 @@ static void cleanup_timers(struct list_head *head, | |||
| 451 | ++head; | 451 | ++head; |
| 452 | list_for_each_entry_safe(timer, next, head, entry) { | 452 | list_for_each_entry_safe(timer, next, head, entry) { |
| 453 | list_del_init(&timer->entry); | 453 | list_del_init(&timer->entry); |
| 454 | if (timer->expires.sched < sched_time) { | 454 | if (timer->expires.sched < sum_exec_runtime) { |
| 455 | timer->expires.sched = 0; | 455 | timer->expires.sched = 0; |
| 456 | } else { | 456 | } else { |
| 457 | timer->expires.sched -= sched_time; | 457 | timer->expires.sched -= sum_exec_runtime; |
| 458 | } | 458 | } |
| 459 | } | 459 | } |
| 460 | } | 460 | } |
| @@ -467,7 +467,7 @@ static void cleanup_timers(struct list_head *head, | |||
| 467 | void posix_cpu_timers_exit(struct task_struct *tsk) | 467 | void posix_cpu_timers_exit(struct task_struct *tsk) |
| 468 | { | 468 | { |
| 469 | cleanup_timers(tsk->cpu_timers, | 469 | cleanup_timers(tsk->cpu_timers, |
| 470 | tsk->utime, tsk->stime, tsk->sched_time); | 470 | tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); |
| 471 | 471 | ||
| 472 | } | 472 | } |
| 473 | void posix_cpu_timers_exit_group(struct task_struct *tsk) | 473 | void posix_cpu_timers_exit_group(struct task_struct *tsk) |
| @@ -475,7 +475,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) | |||
| 475 | cleanup_timers(tsk->signal->cpu_timers, | 475 | cleanup_timers(tsk->signal->cpu_timers, |
| 476 | cputime_add(tsk->utime, tsk->signal->utime), | 476 | cputime_add(tsk->utime, tsk->signal->utime), |
| 477 | cputime_add(tsk->stime, tsk->signal->stime), | 477 | cputime_add(tsk->stime, tsk->signal->stime), |
| 478 | tsk->sched_time + tsk->signal->sched_time); | 478 | tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime); |
| 479 | } | 479 | } |
| 480 | 480 | ||
| 481 | 481 | ||
| @@ -536,7 +536,7 @@ static void process_timer_rebalance(struct task_struct *p, | |||
| 536 | nsleft = max_t(unsigned long long, nsleft, 1); | 536 | nsleft = max_t(unsigned long long, nsleft, 1); |
| 537 | do { | 537 | do { |
| 538 | if (likely(!(t->flags & PF_EXITING))) { | 538 | if (likely(!(t->flags & PF_EXITING))) { |
| 539 | ns = t->sched_time + nsleft; | 539 | ns = t->se.sum_exec_runtime + nsleft; |
| 540 | if (t->it_sched_expires == 0 || | 540 | if (t->it_sched_expires == 0 || |
| 541 | t->it_sched_expires > ns) { | 541 | t->it_sched_expires > ns) { |
| 542 | t->it_sched_expires = ns; | 542 | t->it_sched_expires = ns; |
| @@ -1004,7 +1004,7 @@ static void check_thread_timers(struct task_struct *tsk, | |||
| 1004 | struct cpu_timer_list *t = list_first_entry(timers, | 1004 | struct cpu_timer_list *t = list_first_entry(timers, |
| 1005 | struct cpu_timer_list, | 1005 | struct cpu_timer_list, |
| 1006 | entry); | 1006 | entry); |
| 1007 | if (!--maxfire || tsk->sched_time < t->expires.sched) { | 1007 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { |
| 1008 | tsk->it_sched_expires = t->expires.sched; | 1008 | tsk->it_sched_expires = t->expires.sched; |
| 1009 | break; | 1009 | break; |
| 1010 | } | 1010 | } |
| @@ -1024,7 +1024,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1024 | int maxfire; | 1024 | int maxfire; |
| 1025 | struct signal_struct *const sig = tsk->signal; | 1025 | struct signal_struct *const sig = tsk->signal; |
| 1026 | cputime_t utime, stime, ptime, virt_expires, prof_expires; | 1026 | cputime_t utime, stime, ptime, virt_expires, prof_expires; |
| 1027 | unsigned long long sched_time, sched_expires; | 1027 | unsigned long long sum_sched_runtime, sched_expires; |
| 1028 | struct task_struct *t; | 1028 | struct task_struct *t; |
| 1029 | struct list_head *timers = sig->cpu_timers; | 1029 | struct list_head *timers = sig->cpu_timers; |
| 1030 | 1030 | ||
| @@ -1044,12 +1044,12 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1044 | */ | 1044 | */ |
| 1045 | utime = sig->utime; | 1045 | utime = sig->utime; |
| 1046 | stime = sig->stime; | 1046 | stime = sig->stime; |
| 1047 | sched_time = sig->sched_time; | 1047 | sum_sched_runtime = sig->sum_sched_runtime; |
| 1048 | t = tsk; | 1048 | t = tsk; |
| 1049 | do { | 1049 | do { |
| 1050 | utime = cputime_add(utime, t->utime); | 1050 | utime = cputime_add(utime, t->utime); |
| 1051 | stime = cputime_add(stime, t->stime); | 1051 | stime = cputime_add(stime, t->stime); |
| 1052 | sched_time += t->sched_time; | 1052 | sum_sched_runtime += t->se.sum_exec_runtime; |
| 1053 | t = next_thread(t); | 1053 | t = next_thread(t); |
| 1054 | } while (t != tsk); | 1054 | } while (t != tsk); |
| 1055 | ptime = cputime_add(utime, stime); | 1055 | ptime = cputime_add(utime, stime); |
| @@ -1090,7 +1090,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1090 | struct cpu_timer_list *t = list_first_entry(timers, | 1090 | struct cpu_timer_list *t = list_first_entry(timers, |
| 1091 | struct cpu_timer_list, | 1091 | struct cpu_timer_list, |
| 1092 | entry); | 1092 | entry); |
| 1093 | if (!--maxfire || sched_time < t->expires.sched) { | 1093 | if (!--maxfire || sum_sched_runtime < t->expires.sched) { |
| 1094 | sched_expires = t->expires.sched; | 1094 | sched_expires = t->expires.sched; |
| 1095 | break; | 1095 | break; |
| 1096 | } | 1096 | } |
| @@ -1182,7 +1182,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1182 | virt_left = cputime_sub(virt_expires, utime); | 1182 | virt_left = cputime_sub(virt_expires, utime); |
| 1183 | virt_left = cputime_div_non_zero(virt_left, nthreads); | 1183 | virt_left = cputime_div_non_zero(virt_left, nthreads); |
| 1184 | if (sched_expires) { | 1184 | if (sched_expires) { |
| 1185 | sched_left = sched_expires - sched_time; | 1185 | sched_left = sched_expires - sum_sched_runtime; |
| 1186 | do_div(sched_left, nthreads); | 1186 | do_div(sched_left, nthreads); |
| 1187 | sched_left = max_t(unsigned long long, sched_left, 1); | 1187 | sched_left = max_t(unsigned long long, sched_left, 1); |
| 1188 | } else { | 1188 | } else { |
| @@ -1208,7 +1208,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1208 | t->it_virt_expires = ticks; | 1208 | t->it_virt_expires = ticks; |
| 1209 | } | 1209 | } |
| 1210 | 1210 | ||
| 1211 | sched = t->sched_time + sched_left; | 1211 | sched = t->se.sum_exec_runtime + sched_left; |
| 1212 | if (sched_expires && (t->it_sched_expires == 0 || | 1212 | if (sched_expires && (t->it_sched_expires == 0 || |
| 1213 | t->it_sched_expires > sched)) { | 1213 | t->it_sched_expires > sched)) { |
| 1214 | t->it_sched_expires = sched; | 1214 | t->it_sched_expires = sched; |
| @@ -1300,7 +1300,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
| 1300 | 1300 | ||
| 1301 | if (UNEXPIRED(prof) && UNEXPIRED(virt) && | 1301 | if (UNEXPIRED(prof) && UNEXPIRED(virt) && |
| 1302 | (tsk->it_sched_expires == 0 || | 1302 | (tsk->it_sched_expires == 0 || |
| 1303 | tsk->sched_time < tsk->it_sched_expires)) | 1303 | tsk->se.sum_exec_runtime < tsk->it_sched_expires)) |
| 1304 | return; | 1304 | return; |
| 1305 | 1305 | ||
| 1306 | #undef UNEXPIRED | 1306 | #undef UNEXPIRED |
diff --git a/kernel/sched.c b/kernel/sched.c index 50e1a3122699..9fbced64bfee 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -16,13 +16,19 @@ | |||
| 16 | * by Davide Libenzi, preemptible kernel bits by Robert Love. | 16 | * by Davide Libenzi, preemptible kernel bits by Robert Love. |
| 17 | * 2003-09-03 Interactivity tuning by Con Kolivas. | 17 | * 2003-09-03 Interactivity tuning by Con Kolivas. |
| 18 | * 2004-04-02 Scheduler domains code by Nick Piggin | 18 | * 2004-04-02 Scheduler domains code by Nick Piggin |
| 19 | * 2007-04-15 Work begun on replacing all interactivity tuning with a | ||
| 20 | * fair scheduling design by Con Kolivas. | ||
| 21 | * 2007-05-05 Load balancing (smp-nice) and other improvements | ||
| 22 | * by Peter Williams | ||
| 23 | * 2007-05-06 Interactivity improvements to CFS by Mike Galbraith | ||
| 24 | * 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri | ||
| 19 | */ | 25 | */ |
| 20 | 26 | ||
| 21 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
| 22 | #include <linux/module.h> | 28 | #include <linux/module.h> |
| 23 | #include <linux/nmi.h> | 29 | #include <linux/nmi.h> |
| 24 | #include <linux/init.h> | 30 | #include <linux/init.h> |
| 25 | #include <asm/uaccess.h> | 31 | #include <linux/uaccess.h> |
| 26 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
| 27 | #include <linux/smp_lock.h> | 33 | #include <linux/smp_lock.h> |
| 28 | #include <asm/mmu_context.h> | 34 | #include <asm/mmu_context.h> |
| @@ -53,9 +59,9 @@ | |||
| 53 | #include <linux/kprobes.h> | 59 | #include <linux/kprobes.h> |
| 54 | #include <linux/delayacct.h> | 60 | #include <linux/delayacct.h> |
| 55 | #include <linux/reciprocal_div.h> | 61 | #include <linux/reciprocal_div.h> |
| 62 | #include <linux/unistd.h> | ||
| 56 | 63 | ||
| 57 | #include <asm/tlb.h> | 64 | #include <asm/tlb.h> |
| 58 | #include <asm/unistd.h> | ||
| 59 | 65 | ||
| 60 | /* | 66 | /* |
| 61 | * Scheduler clock - returns current time in nanosec units. | 67 | * Scheduler clock - returns current time in nanosec units. |
| @@ -91,6 +97,9 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
| 91 | #define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) | 97 | #define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) |
| 92 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) | 98 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) |
| 93 | 99 | ||
| 100 | #define NICE_0_LOAD SCHED_LOAD_SCALE | ||
| 101 | #define NICE_0_SHIFT SCHED_LOAD_SHIFT | ||
| 102 | |||
| 94 | /* | 103 | /* |
| 95 | * These are the 'tuning knobs' of the scheduler: | 104 | * These are the 'tuning knobs' of the scheduler: |
| 96 | * | 105 | * |
| @@ -100,87 +109,6 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
| 100 | */ | 109 | */ |
| 101 | #define MIN_TIMESLICE max(5 * HZ / 1000, 1) | 110 | #define MIN_TIMESLICE max(5 * HZ / 1000, 1) |
| 102 | #define DEF_TIMESLICE (100 * HZ / 1000) | 111 | #define DEF_TIMESLICE (100 * HZ / 1000) |
| 103 | #define ON_RUNQUEUE_WEIGHT 30 | ||
| 104 | #define CHILD_PENALTY 95 | ||
| 105 | #define PARENT_PENALTY 100 | ||
| 106 | #define EXIT_WEIGHT 3 | ||
| 107 | #define PRIO_BONUS_RATIO 25 | ||
| 108 | #define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) | ||
| 109 | #define INTERACTIVE_DELTA 2 | ||
| 110 | #define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS) | ||
| 111 | #define STARVATION_LIMIT (MAX_SLEEP_AVG) | ||
| 112 | #define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) | ||
| 113 | |||
| 114 | /* | ||
| 115 | * If a task is 'interactive' then we reinsert it in the active | ||
| 116 | * array after it has expired its current timeslice. (it will not | ||
| 117 | * continue to run immediately, it will still roundrobin with | ||
| 118 | * other interactive tasks.) | ||
| 119 | * | ||
| 120 | * This part scales the interactivity limit depending on niceness. | ||
| 121 | * | ||
| 122 | * We scale it linearly, offset by the INTERACTIVE_DELTA delta. | ||
| 123 | * Here are a few examples of different nice levels: | ||
| 124 | * | ||
| 125 | * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] | ||
| 126 | * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] | ||
| 127 | * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] | ||
| 128 | * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] | ||
| 129 | * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] | ||
| 130 | * | ||
| 131 | * (the X axis represents the possible -5 ... 0 ... +5 dynamic | ||
| 132 | * priority range a task can explore, a value of '1' means the | ||
| 133 | * task is rated interactive.) | ||
| 134 | * | ||
| 135 | * Ie. nice +19 tasks can never get 'interactive' enough to be | ||
| 136 | * reinserted into the active array. And only heavily CPU-hog nice -20 | ||
| 137 | * tasks will be expired. Default nice 0 tasks are somewhere between, | ||
| 138 | * it takes some effort for them to get interactive, but it's not | ||
| 139 | * too hard. | ||
| 140 | */ | ||
| 141 | |||
| 142 | #define CURRENT_BONUS(p) \ | ||
| 143 | (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \ | ||
| 144 | MAX_SLEEP_AVG) | ||
| 145 | |||
| 146 | #define GRANULARITY (10 * HZ / 1000 ? : 1) | ||
| 147 | |||
| 148 | #ifdef CONFIG_SMP | ||
| 149 | #define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ | ||
| 150 | (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \ | ||
| 151 | num_online_cpus()) | ||
| 152 | #else | ||
| 153 | #define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ | ||
| 154 | (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1))) | ||
| 155 | #endif | ||
| 156 | |||
| 157 | #define SCALE(v1,v1_max,v2_max) \ | ||
| 158 | (v1) * (v2_max) / (v1_max) | ||
| 159 | |||
| 160 | #define DELTA(p) \ | ||
| 161 | (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ | ||
| 162 | INTERACTIVE_DELTA) | ||
| 163 | |||
| 164 | #define TASK_INTERACTIVE(p) \ | ||
| 165 | ((p)->prio <= (p)->static_prio - DELTA(p)) | ||
| 166 | |||
| 167 | #define INTERACTIVE_SLEEP(p) \ | ||
| 168 | (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ | ||
| 169 | (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1)) | ||
| 170 | |||
| 171 | #define TASK_PREEMPTS_CURR(p, rq) \ | ||
| 172 | ((p)->prio < (rq)->curr->prio) | ||
| 173 | |||
| 174 | #define SCALE_PRIO(x, prio) \ | ||
| 175 | max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) | ||
| 176 | |||
| 177 | static unsigned int static_prio_timeslice(int static_prio) | ||
| 178 | { | ||
| 179 | if (static_prio < NICE_TO_PRIO(0)) | ||
| 180 | return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); | ||
| 181 | else | ||
| 182 | return SCALE_PRIO(DEF_TIMESLICE, static_prio); | ||
| 183 | } | ||
| 184 | 112 | ||
| 185 | #ifdef CONFIG_SMP | 113 | #ifdef CONFIG_SMP |
| 186 | /* | 114 | /* |
| @@ -203,28 +131,87 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | |||
| 203 | } | 131 | } |
| 204 | #endif | 132 | #endif |
| 205 | 133 | ||
| 134 | #define SCALE_PRIO(x, prio) \ | ||
| 135 | max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) | ||
| 136 | |||
| 206 | /* | 137 | /* |
| 207 | * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] | 138 | * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] |
| 208 | * to time slice values: [800ms ... 100ms ... 5ms] | 139 | * to time slice values: [800ms ... 100ms ... 5ms] |
| 209 | * | ||
| 210 | * The higher a thread's priority, the bigger timeslices | ||
| 211 | * it gets during one round of execution. But even the lowest | ||
| 212 | * priority thread gets MIN_TIMESLICE worth of execution time. | ||
| 213 | */ | 140 | */ |
| 141 | static unsigned int static_prio_timeslice(int static_prio) | ||
| 142 | { | ||
| 143 | if (static_prio == NICE_TO_PRIO(19)) | ||
| 144 | return 1; | ||
| 145 | |||
| 146 | if (static_prio < NICE_TO_PRIO(0)) | ||
| 147 | return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); | ||
| 148 | else | ||
| 149 | return SCALE_PRIO(DEF_TIMESLICE, static_prio); | ||
| 150 | } | ||
| 151 | |||
| 152 | static inline int rt_policy(int policy) | ||
| 153 | { | ||
| 154 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) | ||
| 155 | return 1; | ||
| 156 | return 0; | ||
| 157 | } | ||
| 214 | 158 | ||
| 215 | static inline unsigned int task_timeslice(struct task_struct *p) | 159 | static inline int task_has_rt_policy(struct task_struct *p) |
| 216 | { | 160 | { |
| 217 | return static_prio_timeslice(p->static_prio); | 161 | return rt_policy(p->policy); |
| 218 | } | 162 | } |
| 219 | 163 | ||
| 220 | /* | 164 | /* |
| 221 | * These are the runqueue data structures: | 165 | * This is the priority-queue data structure of the RT scheduling class: |
| 222 | */ | 166 | */ |
| 167 | struct rt_prio_array { | ||
| 168 | DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ | ||
| 169 | struct list_head queue[MAX_RT_PRIO]; | ||
| 170 | }; | ||
| 171 | |||
| 172 | struct load_stat { | ||
| 173 | struct load_weight load; | ||
| 174 | u64 load_update_start, load_update_last; | ||
| 175 | unsigned long delta_fair, delta_exec, delta_stat; | ||
| 176 | }; | ||
| 177 | |||
| 178 | /* CFS-related fields in a runqueue */ | ||
| 179 | struct cfs_rq { | ||
| 180 | struct load_weight load; | ||
| 181 | unsigned long nr_running; | ||
| 182 | |||
| 183 | s64 fair_clock; | ||
| 184 | u64 exec_clock; | ||
| 185 | s64 wait_runtime; | ||
| 186 | u64 sleeper_bonus; | ||
| 187 | unsigned long wait_runtime_overruns, wait_runtime_underruns; | ||
| 188 | |||
| 189 | struct rb_root tasks_timeline; | ||
| 190 | struct rb_node *rb_leftmost; | ||
| 191 | struct rb_node *rb_load_balance_curr; | ||
| 192 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 193 | /* 'curr' points to currently running entity on this cfs_rq. | ||
| 194 | * It is set to NULL otherwise (i.e when none are currently running). | ||
| 195 | */ | ||
| 196 | struct sched_entity *curr; | ||
| 197 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ | ||
| 223 | 198 | ||
| 224 | struct prio_array { | 199 | /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in |
| 225 | unsigned int nr_active; | 200 | * a hierarchy). Non-leaf lrqs hold other higher schedulable entities |
| 226 | DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ | 201 | * (like users, containers etc.) |
| 227 | struct list_head queue[MAX_PRIO]; | 202 | * |
| 203 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | ||
| 204 | * list is used during load balance. | ||
| 205 | */ | ||
| 206 | struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ | ||
| 207 | #endif | ||
| 208 | }; | ||
| 209 | |||
| 210 | /* Real-Time classes' related field in a runqueue: */ | ||
| 211 | struct rt_rq { | ||
| 212 | struct rt_prio_array active; | ||
| 213 | int rt_load_balance_idx; | ||
| 214 | struct list_head *rt_load_balance_head, *rt_load_balance_curr; | ||
| 228 | }; | 215 | }; |
| 229 | 216 | ||
| 230 | /* | 217 | /* |
| @@ -235,22 +222,28 @@ struct prio_array { | |||
| 235 | * acquire operations must be ordered by ascending &runqueue. | 222 | * acquire operations must be ordered by ascending &runqueue. |
| 236 | */ | 223 | */ |
| 237 | struct rq { | 224 | struct rq { |
| 238 | spinlock_t lock; | 225 | spinlock_t lock; /* runqueue lock */ |
| 239 | 226 | ||
| 240 | /* | 227 | /* |
| 241 | * nr_running and cpu_load should be in the same cacheline because | 228 | * nr_running and cpu_load should be in the same cacheline because |
| 242 | * remote CPUs use both these fields when doing load calculation. | 229 | * remote CPUs use both these fields when doing load calculation. |
| 243 | */ | 230 | */ |
| 244 | unsigned long nr_running; | 231 | unsigned long nr_running; |
| 245 | unsigned long raw_weighted_load; | 232 | #define CPU_LOAD_IDX_MAX 5 |
| 246 | #ifdef CONFIG_SMP | 233 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
| 247 | unsigned long cpu_load[3]; | ||
| 248 | unsigned char idle_at_tick; | 234 | unsigned char idle_at_tick; |
| 249 | #ifdef CONFIG_NO_HZ | 235 | #ifdef CONFIG_NO_HZ |
| 250 | unsigned char in_nohz_recently; | 236 | unsigned char in_nohz_recently; |
| 251 | #endif | 237 | #endif |
| 238 | struct load_stat ls; /* capture load from *all* tasks on this cpu */ | ||
| 239 | unsigned long nr_load_updates; | ||
| 240 | u64 nr_switches; | ||
| 241 | |||
| 242 | struct cfs_rq cfs; | ||
| 243 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 244 | struct list_head leaf_cfs_rq_list; /* list of leaf cfs_rq on this cpu */ | ||
| 252 | #endif | 245 | #endif |
| 253 | unsigned long long nr_switches; | 246 | struct rt_rq rt; |
| 254 | 247 | ||
| 255 | /* | 248 | /* |
| 256 | * This is part of a global counter where only the total sum | 249 | * This is part of a global counter where only the total sum |
| @@ -260,14 +253,18 @@ struct rq { | |||
| 260 | */ | 253 | */ |
| 261 | unsigned long nr_uninterruptible; | 254 | unsigned long nr_uninterruptible; |
| 262 | 255 | ||
| 263 | unsigned long expired_timestamp; | ||
| 264 | /* Cached timestamp set by update_cpu_clock() */ | ||
| 265 | unsigned long long most_recent_timestamp; | ||
| 266 | struct task_struct *curr, *idle; | 256 | struct task_struct *curr, *idle; |
| 267 | unsigned long next_balance; | 257 | unsigned long next_balance; |
| 268 | struct mm_struct *prev_mm; | 258 | struct mm_struct *prev_mm; |
| 269 | struct prio_array *active, *expired, arrays[2]; | 259 | |
| 270 | int best_expired_prio; | 260 | u64 clock, prev_clock_raw; |
| 261 | s64 clock_max_delta; | ||
| 262 | |||
| 263 | unsigned int clock_warps, clock_overflows; | ||
| 264 | unsigned int clock_unstable_events; | ||
| 265 | |||
| 266 | struct sched_class *load_balance_class; | ||
| 267 | |||
| 271 | atomic_t nr_iowait; | 268 | atomic_t nr_iowait; |
| 272 | 269 | ||
| 273 | #ifdef CONFIG_SMP | 270 | #ifdef CONFIG_SMP |
| @@ -307,6 +304,11 @@ struct rq { | |||
| 307 | static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; | 304 | static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; |
| 308 | static DEFINE_MUTEX(sched_hotcpu_mutex); | 305 | static DEFINE_MUTEX(sched_hotcpu_mutex); |
| 309 | 306 | ||
| 307 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) | ||
| 308 | { | ||
| 309 | rq->curr->sched_class->check_preempt_curr(rq, p); | ||
| 310 | } | ||
| 311 | |||
| 310 | static inline int cpu_of(struct rq *rq) | 312 | static inline int cpu_of(struct rq *rq) |
| 311 | { | 313 | { |
| 312 | #ifdef CONFIG_SMP | 314 | #ifdef CONFIG_SMP |
| @@ -317,6 +319,52 @@ static inline int cpu_of(struct rq *rq) | |||
| 317 | } | 319 | } |
| 318 | 320 | ||
| 319 | /* | 321 | /* |
| 322 | * Per-runqueue clock, as finegrained as the platform can give us: | ||
| 323 | */ | ||
| 324 | static unsigned long long __rq_clock(struct rq *rq) | ||
| 325 | { | ||
| 326 | u64 prev_raw = rq->prev_clock_raw; | ||
| 327 | u64 now = sched_clock(); | ||
| 328 | s64 delta = now - prev_raw; | ||
| 329 | u64 clock = rq->clock; | ||
| 330 | |||
| 331 | /* | ||
| 332 | * Protect against sched_clock() occasionally going backwards: | ||
| 333 | */ | ||
| 334 | if (unlikely(delta < 0)) { | ||
| 335 | clock++; | ||
| 336 | rq->clock_warps++; | ||
| 337 | } else { | ||
| 338 | /* | ||
| 339 | * Catch too large forward jumps too: | ||
| 340 | */ | ||
| 341 | if (unlikely(delta > 2*TICK_NSEC)) { | ||
| 342 | clock++; | ||
| 343 | rq->clock_overflows++; | ||
| 344 | } else { | ||
| 345 | if (unlikely(delta > rq->clock_max_delta)) | ||
| 346 | rq->clock_max_delta = delta; | ||
| 347 | clock += delta; | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | rq->prev_clock_raw = now; | ||
| 352 | rq->clock = clock; | ||
| 353 | |||
| 354 | return clock; | ||
| 355 | } | ||
| 356 | |||
| 357 | static inline unsigned long long rq_clock(struct rq *rq) | ||
| 358 | { | ||
| 359 | int this_cpu = smp_processor_id(); | ||
| 360 | |||
| 361 | if (this_cpu == cpu_of(rq)) | ||
| 362 | return __rq_clock(rq); | ||
| 363 | |||
| 364 | return rq->clock; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* | ||
| 320 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. | 368 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. |
| 321 | * See detach_destroy_domains: synchronize_sched for details. | 369 | * See detach_destroy_domains: synchronize_sched for details. |
| 322 | * | 370 | * |
| @@ -331,6 +379,18 @@ static inline int cpu_of(struct rq *rq) | |||
| 331 | #define task_rq(p) cpu_rq(task_cpu(p)) | 379 | #define task_rq(p) cpu_rq(task_cpu(p)) |
| 332 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 380 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
| 333 | 381 | ||
| 382 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 383 | /* Change a task's ->cfs_rq if it moves across CPUs */ | ||
| 384 | static inline void set_task_cfs_rq(struct task_struct *p) | ||
| 385 | { | ||
| 386 | p->se.cfs_rq = &task_rq(p)->cfs; | ||
| 387 | } | ||
| 388 | #else | ||
| 389 | static inline void set_task_cfs_rq(struct task_struct *p) | ||
| 390 | { | ||
| 391 | } | ||
| 392 | #endif | ||
| 393 | |||
| 334 | #ifndef prepare_arch_switch | 394 | #ifndef prepare_arch_switch |
| 335 | # define prepare_arch_switch(next) do { } while (0) | 395 | # define prepare_arch_switch(next) do { } while (0) |
| 336 | #endif | 396 | #endif |
| @@ -460,134 +520,6 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
| 460 | spin_unlock_irqrestore(&rq->lock, *flags); | 520 | spin_unlock_irqrestore(&rq->lock, *flags); |
| 461 | } | 521 | } |
| 462 | 522 | ||
| 463 | #ifdef CONFIG_SCHEDSTATS | ||
| 464 | /* | ||
| 465 | * bump this up when changing the output format or the meaning of an existing | ||
| 466 | * format, so that tools can adapt (or abort) | ||
| 467 | */ | ||
| 468 | #define SCHEDSTAT_VERSION 14 | ||
| 469 | |||
| 470 | static int show_schedstat(struct seq_file *seq, void *v) | ||
| 471 | { | ||
| 472 | int cpu; | ||
| 473 | |||
| 474 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
| 475 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
| 476 | for_each_online_cpu(cpu) { | ||
| 477 | struct rq *rq = cpu_rq(cpu); | ||
| 478 | #ifdef CONFIG_SMP | ||
| 479 | struct sched_domain *sd; | ||
| 480 | int dcnt = 0; | ||
| 481 | #endif | ||
| 482 | |||
| 483 | /* runqueue-specific stats */ | ||
| 484 | seq_printf(seq, | ||
| 485 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | ||
| 486 | cpu, rq->yld_both_empty, | ||
| 487 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, | ||
| 488 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, | ||
| 489 | rq->ttwu_cnt, rq->ttwu_local, | ||
| 490 | rq->rq_sched_info.cpu_time, | ||
| 491 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); | ||
| 492 | |||
| 493 | seq_printf(seq, "\n"); | ||
| 494 | |||
| 495 | #ifdef CONFIG_SMP | ||
| 496 | /* domain-specific stats */ | ||
| 497 | preempt_disable(); | ||
| 498 | for_each_domain(cpu, sd) { | ||
| 499 | enum idle_type itype; | ||
| 500 | char mask_str[NR_CPUS]; | ||
| 501 | |||
| 502 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); | ||
| 503 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | ||
| 504 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; | ||
| 505 | itype++) { | ||
| 506 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " | ||
| 507 | "%lu", | ||
| 508 | sd->lb_cnt[itype], | ||
| 509 | sd->lb_balanced[itype], | ||
| 510 | sd->lb_failed[itype], | ||
| 511 | sd->lb_imbalance[itype], | ||
| 512 | sd->lb_gained[itype], | ||
| 513 | sd->lb_hot_gained[itype], | ||
| 514 | sd->lb_nobusyq[itype], | ||
| 515 | sd->lb_nobusyg[itype]); | ||
| 516 | } | ||
| 517 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" | ||
| 518 | " %lu %lu %lu\n", | ||
| 519 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | ||
| 520 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | ||
| 521 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | ||
| 522 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
| 523 | sd->ttwu_move_balance); | ||
| 524 | } | ||
| 525 | preempt_enable(); | ||
| 526 | #endif | ||
| 527 | } | ||
| 528 | return 0; | ||
| 529 | } | ||
| 530 | |||
| 531 | static int schedstat_open(struct inode *inode, struct file *file) | ||
| 532 | { | ||
| 533 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
| 534 | char *buf = kmalloc(size, GFP_KERNEL); | ||
| 535 | struct seq_file *m; | ||
| 536 | int res; | ||
| 537 | |||
| 538 | if (!buf) | ||
| 539 | return -ENOMEM; | ||
| 540 | res = single_open(file, show_schedstat, NULL); | ||
| 541 | if (!res) { | ||
| 542 | m = file->private_data; | ||
| 543 | m->buf = buf; | ||
| 544 | m->size = size; | ||
| 545 | } else | ||
| 546 | kfree(buf); | ||
| 547 | return res; | ||
| 548 | } | ||
| 549 | |||
| 550 | const struct file_operations proc_schedstat_operations = { | ||
| 551 | .open = schedstat_open, | ||
| 552 | .read = seq_read, | ||
| 553 | .llseek = seq_lseek, | ||
| 554 | .release = single_release, | ||
| 555 | }; | ||
| 556 | |||
| 557 | /* | ||
| 558 | * Expects runqueue lock to be held for atomicity of update | ||
| 559 | */ | ||
| 560 | static inline void | ||
| 561 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
| 562 | { | ||
| 563 | if (rq) { | ||
| 564 | rq->rq_sched_info.run_delay += delta_jiffies; | ||
| 565 | rq->rq_sched_info.pcnt++; | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 569 | /* | ||
| 570 | * Expects runqueue lock to be held for atomicity of update | ||
| 571 | */ | ||
| 572 | static inline void | ||
| 573 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
| 574 | { | ||
| 575 | if (rq) | ||
| 576 | rq->rq_sched_info.cpu_time += delta_jiffies; | ||
| 577 | } | ||
| 578 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | ||
| 579 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | ||
| 580 | #else /* !CONFIG_SCHEDSTATS */ | ||
| 581 | static inline void | ||
| 582 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
| 583 | {} | ||
| 584 | static inline void | ||
| 585 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
| 586 | {} | ||
| 587 | # define schedstat_inc(rq, field) do { } while (0) | ||
| 588 | # define schedstat_add(rq, field, amt) do { } while (0) | ||
| 589 | #endif | ||
| 590 | |||
| 591 | /* | 523 | /* |
| 592 | * this_rq_lock - lock this runqueue and disable interrupts. | 524 | * this_rq_lock - lock this runqueue and disable interrupts. |
| 593 | */ | 525 | */ |
| @@ -603,177 +535,172 @@ static inline struct rq *this_rq_lock(void) | |||
| 603 | return rq; | 535 | return rq; |
| 604 | } | 536 | } |
| 605 | 537 | ||
| 606 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | ||
| 607 | /* | 538 | /* |
| 608 | * Called when a process is dequeued from the active array and given | 539 | * CPU frequency is/was unstable - start new by setting prev_clock_raw: |
| 609 | * the cpu. We should note that with the exception of interactive | ||
| 610 | * tasks, the expired queue will become the active queue after the active | ||
| 611 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
| 612 | * expired queue. (Interactive tasks may be requeued directly to the | ||
| 613 | * active queue, thus delaying tasks in the expired queue from running; | ||
| 614 | * see scheduler_tick()). | ||
| 615 | * | ||
| 616 | * This function is only called from sched_info_arrive(), rather than | ||
| 617 | * dequeue_task(). Even though a task may be queued and dequeued multiple | ||
| 618 | * times as it is shuffled about, we're really interested in knowing how | ||
| 619 | * long it was from the *first* time it was queued to the time that it | ||
| 620 | * finally hit a cpu. | ||
| 621 | */ | 540 | */ |
| 622 | static inline void sched_info_dequeued(struct task_struct *t) | 541 | void sched_clock_unstable_event(void) |
| 623 | { | 542 | { |
| 624 | t->sched_info.last_queued = 0; | 543 | unsigned long flags; |
| 544 | struct rq *rq; | ||
| 545 | |||
| 546 | rq = task_rq_lock(current, &flags); | ||
| 547 | rq->prev_clock_raw = sched_clock(); | ||
| 548 | rq->clock_unstable_events++; | ||
| 549 | task_rq_unlock(rq, &flags); | ||
| 625 | } | 550 | } |
| 626 | 551 | ||
| 627 | /* | 552 | /* |
| 628 | * Called when a task finally hits the cpu. We can now calculate how | 553 | * resched_task - mark a task 'to be rescheduled now'. |
| 629 | * long it was waiting to run. We also note when it began so that we | 554 | * |
| 630 | * can keep stats on how long its timeslice is. | 555 | * On UP this means the setting of the need_resched flag, on SMP it |
| 556 | * might also involve a cross-CPU call to trigger the scheduler on | ||
| 557 | * the target CPU. | ||
| 631 | */ | 558 | */ |
| 632 | static void sched_info_arrive(struct task_struct *t) | 559 | #ifdef CONFIG_SMP |
| 560 | |||
| 561 | #ifndef tsk_is_polling | ||
| 562 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | ||
| 563 | #endif | ||
| 564 | |||
| 565 | static void resched_task(struct task_struct *p) | ||
| 633 | { | 566 | { |
| 634 | unsigned long now = jiffies, delta_jiffies = 0; | 567 | int cpu; |
| 568 | |||
| 569 | assert_spin_locked(&task_rq(p)->lock); | ||
| 570 | |||
| 571 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) | ||
| 572 | return; | ||
| 635 | 573 | ||
| 636 | if (t->sched_info.last_queued) | 574 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); |
| 637 | delta_jiffies = now - t->sched_info.last_queued; | 575 | |
| 638 | sched_info_dequeued(t); | 576 | cpu = task_cpu(p); |
| 639 | t->sched_info.run_delay += delta_jiffies; | 577 | if (cpu == smp_processor_id()) |
| 640 | t->sched_info.last_arrival = now; | 578 | return; |
| 641 | t->sched_info.pcnt++; | ||
| 642 | 579 | ||
| 643 | rq_sched_info_arrive(task_rq(t), delta_jiffies); | 580 | /* NEED_RESCHED must be visible before we test polling */ |
| 581 | smp_mb(); | ||
| 582 | if (!tsk_is_polling(p)) | ||
| 583 | smp_send_reschedule(cpu); | ||
| 644 | } | 584 | } |
| 645 | 585 | ||
| 646 | /* | 586 | static void resched_cpu(int cpu) |
| 647 | * Called when a process is queued into either the active or expired | ||
| 648 | * array. The time is noted and later used to determine how long we | ||
| 649 | * had to wait for us to reach the cpu. Since the expired queue will | ||
| 650 | * become the active queue after active queue is empty, without dequeuing | ||
| 651 | * and requeuing any tasks, we are interested in queuing to either. It | ||
| 652 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
| 653 | * requeued in the same or another array: this can happen in sched_yield(), | ||
| 654 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
| 655 | * to runqueue. | ||
| 656 | * | ||
| 657 | * This function is only called from enqueue_task(), but also only updates | ||
| 658 | * the timestamp if it is already not set. It's assumed that | ||
| 659 | * sched_info_dequeued() will clear that stamp when appropriate. | ||
| 660 | */ | ||
| 661 | static inline void sched_info_queued(struct task_struct *t) | ||
| 662 | { | 587 | { |
| 663 | if (unlikely(sched_info_on())) | 588 | struct rq *rq = cpu_rq(cpu); |
| 664 | if (!t->sched_info.last_queued) | 589 | unsigned long flags; |
| 665 | t->sched_info.last_queued = jiffies; | 590 | |
| 591 | if (!spin_trylock_irqsave(&rq->lock, flags)) | ||
| 592 | return; | ||
| 593 | resched_task(cpu_curr(cpu)); | ||
| 594 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 666 | } | 595 | } |
| 596 | #else | ||
| 597 | static inline void resched_task(struct task_struct *p) | ||
| 598 | { | ||
| 599 | assert_spin_locked(&task_rq(p)->lock); | ||
| 600 | set_tsk_need_resched(p); | ||
| 601 | } | ||
| 602 | #endif | ||
| 667 | 603 | ||
| 668 | /* | 604 | static u64 div64_likely32(u64 divident, unsigned long divisor) |
| 669 | * Called when a process ceases being the active-running process, either | ||
| 670 | * voluntarily or involuntarily. Now we can calculate how long we ran. | ||
| 671 | */ | ||
| 672 | static inline void sched_info_depart(struct task_struct *t) | ||
| 673 | { | 605 | { |
| 674 | unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; | 606 | #if BITS_PER_LONG == 32 |
| 607 | if (likely(divident <= 0xffffffffULL)) | ||
| 608 | return (u32)divident / divisor; | ||
| 609 | do_div(divident, divisor); | ||
| 675 | 610 | ||
| 676 | t->sched_info.cpu_time += delta_jiffies; | 611 | return divident; |
| 677 | rq_sched_info_depart(task_rq(t), delta_jiffies); | 612 | #else |
| 613 | return divident / divisor; | ||
| 614 | #endif | ||
| 678 | } | 615 | } |
| 679 | 616 | ||
| 680 | /* | 617 | #if BITS_PER_LONG == 32 |
| 681 | * Called when tasks are switched involuntarily due, typically, to expiring | 618 | # define WMULT_CONST (~0UL) |
| 682 | * their time slice. (This may also be called when switching to or from | 619 | #else |
| 683 | * the idle task.) We are only called when prev != next. | 620 | # define WMULT_CONST (1UL << 32) |
| 684 | */ | 621 | #endif |
| 685 | static inline void | 622 | |
| 686 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) | 623 | #define WMULT_SHIFT 32 |
| 624 | |||
| 625 | static inline unsigned long | ||
| 626 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | ||
| 627 | struct load_weight *lw) | ||
| 687 | { | 628 | { |
| 688 | struct rq *rq = task_rq(prev); | 629 | u64 tmp; |
| 689 | 630 | ||
| 631 | if (unlikely(!lw->inv_weight)) | ||
| 632 | lw->inv_weight = WMULT_CONST / lw->weight; | ||
| 633 | |||
| 634 | tmp = (u64)delta_exec * weight; | ||
| 690 | /* | 635 | /* |
| 691 | * prev now departs the cpu. It's not interesting to record | 636 | * Check whether we'd overflow the 64-bit multiplication: |
| 692 | * stats about how efficient we were at scheduling the idle | ||
| 693 | * process, however. | ||
| 694 | */ | 637 | */ |
| 695 | if (prev != rq->idle) | 638 | if (unlikely(tmp > WMULT_CONST)) { |
| 696 | sched_info_depart(prev); | 639 | tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) |
| 640 | >> (WMULT_SHIFT/2); | ||
| 641 | } else { | ||
| 642 | tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; | ||
| 643 | } | ||
| 697 | 644 | ||
| 698 | if (next != rq->idle) | 645 | return (unsigned long)min(tmp, (u64)sysctl_sched_runtime_limit); |
| 699 | sched_info_arrive(next); | ||
| 700 | } | ||
| 701 | static inline void | ||
| 702 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
| 703 | { | ||
| 704 | if (unlikely(sched_info_on())) | ||
| 705 | __sched_info_switch(prev, next); | ||
| 706 | } | 646 | } |
| 707 | #else | ||
| 708 | #define sched_info_queued(t) do { } while (0) | ||
| 709 | #define sched_info_switch(t, next) do { } while (0) | ||
| 710 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | ||
| 711 | 647 | ||
| 712 | /* | 648 | static inline unsigned long |
| 713 | * Adding/removing a task to/from a priority array: | 649 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) |
| 714 | */ | ||
| 715 | static void dequeue_task(struct task_struct *p, struct prio_array *array) | ||
| 716 | { | 650 | { |
| 717 | array->nr_active--; | 651 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); |
| 718 | list_del(&p->run_list); | ||
| 719 | if (list_empty(array->queue + p->prio)) | ||
| 720 | __clear_bit(p->prio, array->bitmap); | ||
| 721 | } | 652 | } |
| 722 | 653 | ||
| 723 | static void enqueue_task(struct task_struct *p, struct prio_array *array) | 654 | static void update_load_add(struct load_weight *lw, unsigned long inc) |
| 724 | { | 655 | { |
| 725 | sched_info_queued(p); | 656 | lw->weight += inc; |
| 726 | list_add_tail(&p->run_list, array->queue + p->prio); | 657 | lw->inv_weight = 0; |
| 727 | __set_bit(p->prio, array->bitmap); | ||
| 728 | array->nr_active++; | ||
| 729 | p->array = array; | ||
| 730 | } | 658 | } |
| 731 | 659 | ||
| 732 | /* | 660 | static void update_load_sub(struct load_weight *lw, unsigned long dec) |
| 733 | * Put task to the end of the run list without the overhead of dequeue | ||
| 734 | * followed by enqueue. | ||
| 735 | */ | ||
| 736 | static void requeue_task(struct task_struct *p, struct prio_array *array) | ||
| 737 | { | 661 | { |
| 738 | list_move_tail(&p->run_list, array->queue + p->prio); | 662 | lw->weight -= dec; |
| 663 | lw->inv_weight = 0; | ||
| 739 | } | 664 | } |
| 740 | 665 | ||
| 741 | static inline void | 666 | static void __update_curr_load(struct rq *rq, struct load_stat *ls) |
| 742 | enqueue_task_head(struct task_struct *p, struct prio_array *array) | ||
| 743 | { | 667 | { |
| 744 | list_add(&p->run_list, array->queue + p->prio); | 668 | if (rq->curr != rq->idle && ls->load.weight) { |
| 745 | __set_bit(p->prio, array->bitmap); | 669 | ls->delta_exec += ls->delta_stat; |
| 746 | array->nr_active++; | 670 | ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load); |
| 747 | p->array = array; | 671 | ls->delta_stat = 0; |
| 672 | } | ||
| 748 | } | 673 | } |
| 749 | 674 | ||
| 750 | /* | 675 | /* |
| 751 | * __normal_prio - return the priority that is based on the static | 676 | * Update delta_exec, delta_fair fields for rq. |
| 752 | * priority but is modified by bonuses/penalties. | ||
| 753 | * | 677 | * |
| 754 | * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] | 678 | * delta_fair clock advances at a rate inversely proportional to |
| 755 | * into the -5 ... 0 ... +5 bonus/penalty range. | 679 | * total load (rq->ls.load.weight) on the runqueue, while |
| 680 | * delta_exec advances at the same rate as wall-clock (provided | ||
| 681 | * cpu is not idle). | ||
| 756 | * | 682 | * |
| 757 | * We use 25% of the full 0...39 priority range so that: | 683 | * delta_exec / delta_fair is a measure of the (smoothened) load on this |
| 684 | * runqueue over any given interval. This (smoothened) load is used | ||
| 685 | * during load balance. | ||
| 758 | * | 686 | * |
| 759 | * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. | 687 | * This function is called /before/ updating rq->ls.load |
| 760 | * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. | 688 | * and when switching tasks. |
| 761 | * | ||
| 762 | * Both properties are important to certain workloads. | ||
| 763 | */ | 689 | */ |
| 764 | 690 | static void update_curr_load(struct rq *rq, u64 now) | |
| 765 | static inline int __normal_prio(struct task_struct *p) | ||
| 766 | { | 691 | { |
| 767 | int bonus, prio; | 692 | struct load_stat *ls = &rq->ls; |
| 768 | 693 | u64 start; | |
| 769 | bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; | ||
| 770 | 694 | ||
| 771 | prio = p->static_prio - bonus; | 695 | start = ls->load_update_start; |
| 772 | if (prio < MAX_RT_PRIO) | 696 | ls->load_update_start = now; |
| 773 | prio = MAX_RT_PRIO; | 697 | ls->delta_stat += now - start; |
| 774 | if (prio > MAX_PRIO-1) | 698 | /* |
| 775 | prio = MAX_PRIO-1; | 699 | * Stagger updates to ls->delta_fair. Very frequent updates |
| 776 | return prio; | 700 | * can be expensive. |
| 701 | */ | ||
| 702 | if (ls->delta_stat >= sysctl_sched_stat_granularity) | ||
| 703 | __update_curr_load(rq, ls); | ||
| 777 | } | 704 | } |
| 778 | 705 | ||
| 779 | /* | 706 | /* |
| @@ -791,53 +718,146 @@ static inline int __normal_prio(struct task_struct *p) | |||
| 791 | * this code will need modification | 718 | * this code will need modification |
| 792 | */ | 719 | */ |
| 793 | #define TIME_SLICE_NICE_ZERO DEF_TIMESLICE | 720 | #define TIME_SLICE_NICE_ZERO DEF_TIMESLICE |
| 794 | #define LOAD_WEIGHT(lp) \ | 721 | #define load_weight(lp) \ |
| 795 | (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO) | 722 | (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO) |
| 796 | #define PRIO_TO_LOAD_WEIGHT(prio) \ | 723 | #define PRIO_TO_LOAD_WEIGHT(prio) \ |
| 797 | LOAD_WEIGHT(static_prio_timeslice(prio)) | 724 | load_weight(static_prio_timeslice(prio)) |
| 798 | #define RTPRIO_TO_LOAD_WEIGHT(rp) \ | 725 | #define RTPRIO_TO_LOAD_WEIGHT(rp) \ |
| 799 | (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) | 726 | (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + load_weight(rp)) |
| 800 | 727 | ||
| 801 | static void set_load_weight(struct task_struct *p) | 728 | #define WEIGHT_IDLEPRIO 2 |
| 802 | { | 729 | #define WMULT_IDLEPRIO (1 << 31) |
| 803 | if (has_rt_policy(p)) { | 730 | |
| 804 | #ifdef CONFIG_SMP | 731 | /* |
| 805 | if (p == task_rq(p)->migration_thread) | 732 | * Nice levels are multiplicative, with a gentle 10% change for every |
| 806 | /* | 733 | * nice level changed. I.e. when a CPU-bound task goes from nice 0 to |
| 807 | * The migration thread does the actual balancing. | 734 | * nice 1, it will get ~10% less CPU time than another CPU-bound task |
| 808 | * Giving its load any weight will skew balancing | 735 | * that remained on nice 0. |
| 809 | * adversely. | 736 | * |
| 810 | */ | 737 | * The "10% effect" is relative and cumulative: from _any_ nice level, |
| 811 | p->load_weight = 0; | 738 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level |
| 812 | else | 739 | * it's +10% CPU usage. |
| 813 | #endif | 740 | */ |
| 814 | p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority); | 741 | static const int prio_to_weight[40] = { |
| 815 | } else | 742 | /* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, |
| 816 | p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); | 743 | /* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, |
| 817 | } | 744 | /* 0 */ NICE_0_LOAD /* 1024 */, |
| 745 | /* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, | ||
| 746 | /* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, | ||
| 747 | }; | ||
| 748 | |||
| 749 | static const u32 prio_to_wmult[40] = { | ||
| 750 | 48356, 60446, 75558, 94446, 118058, 147573, | ||
| 751 | 184467, 230589, 288233, 360285, 450347, | ||
| 752 | 562979, 703746, 879575, 1099582, 1374389, | ||
| 753 | 717986, 2147483, 2684354, 3355443, 4194304, | ||
| 754 | 244160, 6557201, 8196502, 10250518, 12782640, | ||
| 755 | 16025997, 19976592, 24970740, 31350126, 39045157, | ||
| 756 | 49367440, 61356675, 76695844, 95443717, 119304647, | ||
| 757 | 148102320, 186737708, 238609294, 286331153, | ||
| 758 | }; | ||
| 818 | 759 | ||
| 819 | static inline void | 760 | static inline void |
| 820 | inc_raw_weighted_load(struct rq *rq, const struct task_struct *p) | 761 | inc_load(struct rq *rq, const struct task_struct *p, u64 now) |
| 821 | { | 762 | { |
| 822 | rq->raw_weighted_load += p->load_weight; | 763 | update_curr_load(rq, now); |
| 764 | update_load_add(&rq->ls.load, p->se.load.weight); | ||
| 823 | } | 765 | } |
| 824 | 766 | ||
| 825 | static inline void | 767 | static inline void |
| 826 | dec_raw_weighted_load(struct rq *rq, const struct task_struct *p) | 768 | dec_load(struct rq *rq, const struct task_struct *p, u64 now) |
| 827 | { | 769 | { |
| 828 | rq->raw_weighted_load -= p->load_weight; | 770 | update_curr_load(rq, now); |
| 771 | update_load_sub(&rq->ls.load, p->se.load.weight); | ||
| 829 | } | 772 | } |
| 830 | 773 | ||
| 831 | static inline void inc_nr_running(struct task_struct *p, struct rq *rq) | 774 | static inline void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) |
| 832 | { | 775 | { |
| 833 | rq->nr_running++; | 776 | rq->nr_running++; |
| 834 | inc_raw_weighted_load(rq, p); | 777 | inc_load(rq, p, now); |
| 835 | } | 778 | } |
| 836 | 779 | ||
| 837 | static inline void dec_nr_running(struct task_struct *p, struct rq *rq) | 780 | static inline void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) |
| 838 | { | 781 | { |
| 839 | rq->nr_running--; | 782 | rq->nr_running--; |
| 840 | dec_raw_weighted_load(rq, p); | 783 | dec_load(rq, p, now); |
| 784 | } | ||
| 785 | |||
| 786 | static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); | ||
| 787 | |||
| 788 | /* | ||
| 789 | * runqueue iterator, to support SMP load-balancing between different | ||
| 790 | * scheduling classes, without having to expose their internal data | ||
| 791 | * structures to the load-balancing proper: | ||
| 792 | */ | ||
| 793 | struct rq_iterator { | ||
| 794 | void *arg; | ||
| 795 | struct task_struct *(*start)(void *); | ||
| 796 | struct task_struct *(*next)(void *); | ||
| 797 | }; | ||
| 798 | |||
| 799 | static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 800 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 801 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 802 | int *all_pinned, unsigned long *load_moved, | ||
| 803 | int this_best_prio, int best_prio, int best_prio_seen, | ||
| 804 | struct rq_iterator *iterator); | ||
| 805 | |||
| 806 | #include "sched_stats.h" | ||
| 807 | #include "sched_rt.c" | ||
| 808 | #include "sched_fair.c" | ||
| 809 | #include "sched_idletask.c" | ||
| 810 | #ifdef CONFIG_SCHED_DEBUG | ||
| 811 | # include "sched_debug.c" | ||
| 812 | #endif | ||
| 813 | |||
| 814 | #define sched_class_highest (&rt_sched_class) | ||
| 815 | |||
| 816 | static void set_load_weight(struct task_struct *p) | ||
| 817 | { | ||
| 818 | task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; | ||
| 819 | p->se.wait_runtime = 0; | ||
| 820 | |||
| 821 | if (task_has_rt_policy(p)) { | ||
| 822 | p->se.load.weight = prio_to_weight[0] * 2; | ||
| 823 | p->se.load.inv_weight = prio_to_wmult[0] >> 1; | ||
| 824 | return; | ||
| 825 | } | ||
| 826 | |||
| 827 | /* | ||
| 828 | * SCHED_IDLE tasks get minimal weight: | ||
| 829 | */ | ||
| 830 | if (p->policy == SCHED_IDLE) { | ||
| 831 | p->se.load.weight = WEIGHT_IDLEPRIO; | ||
| 832 | p->se.load.inv_weight = WMULT_IDLEPRIO; | ||
| 833 | return; | ||
| 834 | } | ||
| 835 | |||
| 836 | p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO]; | ||
| 837 | p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; | ||
| 838 | } | ||
| 839 | |||
| 840 | static void | ||
| 841 | enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) | ||
| 842 | { | ||
| 843 | sched_info_queued(p); | ||
| 844 | p->sched_class->enqueue_task(rq, p, wakeup, now); | ||
| 845 | p->se.on_rq = 1; | ||
| 846 | } | ||
| 847 | |||
| 848 | static void | ||
| 849 | dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) | ||
| 850 | { | ||
| 851 | p->sched_class->dequeue_task(rq, p, sleep, now); | ||
| 852 | p->se.on_rq = 0; | ||
| 853 | } | ||
| 854 | |||
| 855 | /* | ||
| 856 | * __normal_prio - return the priority that is based on the static prio | ||
| 857 | */ | ||
| 858 | static inline int __normal_prio(struct task_struct *p) | ||
| 859 | { | ||
| 860 | return p->static_prio; | ||
| 841 | } | 861 | } |
| 842 | 862 | ||
| 843 | /* | 863 | /* |
| @@ -851,7 +871,7 @@ static inline int normal_prio(struct task_struct *p) | |||
| 851 | { | 871 | { |
| 852 | int prio; | 872 | int prio; |
| 853 | 873 | ||
| 854 | if (has_rt_policy(p)) | 874 | if (task_has_rt_policy(p)) |
| 855 | prio = MAX_RT_PRIO-1 - p->rt_priority; | 875 | prio = MAX_RT_PRIO-1 - p->rt_priority; |
| 856 | else | 876 | else |
| 857 | prio = __normal_prio(p); | 877 | prio = __normal_prio(p); |
| @@ -879,222 +899,47 @@ static int effective_prio(struct task_struct *p) | |||
| 879 | } | 899 | } |
| 880 | 900 | ||
| 881 | /* | 901 | /* |
| 882 | * __activate_task - move a task to the runqueue. | 902 | * activate_task - move a task to the runqueue. |
| 883 | */ | ||
| 884 | static void __activate_task(struct task_struct *p, struct rq *rq) | ||
| 885 | { | ||
| 886 | struct prio_array *target = rq->active; | ||
| 887 | |||
| 888 | if (batch_task(p)) | ||
| 889 | target = rq->expired; | ||
| 890 | enqueue_task(p, target); | ||
| 891 | inc_nr_running(p, rq); | ||
| 892 | } | ||
| 893 | |||
| 894 | /* | ||
| 895 | * __activate_idle_task - move idle task to the _front_ of runqueue. | ||
| 896 | */ | ||
| 897 | static inline void __activate_idle_task(struct task_struct *p, struct rq *rq) | ||
| 898 | { | ||
| 899 | enqueue_task_head(p, rq->active); | ||
| 900 | inc_nr_running(p, rq); | ||
| 901 | } | ||
| 902 | |||
| 903 | /* | ||
| 904 | * Recalculate p->normal_prio and p->prio after having slept, | ||
| 905 | * updating the sleep-average too: | ||
| 906 | */ | 903 | */ |
| 907 | static int recalc_task_prio(struct task_struct *p, unsigned long long now) | 904 | static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) |
| 908 | { | 905 | { |
| 909 | /* Caller must always ensure 'now >= p->timestamp' */ | 906 | u64 now = rq_clock(rq); |
| 910 | unsigned long sleep_time = now - p->timestamp; | ||
| 911 | |||
| 912 | if (batch_task(p)) | ||
| 913 | sleep_time = 0; | ||
| 914 | |||
| 915 | if (likely(sleep_time > 0)) { | ||
| 916 | /* | ||
| 917 | * This ceiling is set to the lowest priority that would allow | ||
| 918 | * a task to be reinserted into the active array on timeslice | ||
| 919 | * completion. | ||
| 920 | */ | ||
| 921 | unsigned long ceiling = INTERACTIVE_SLEEP(p); | ||
| 922 | |||
| 923 | if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) { | ||
| 924 | /* | ||
| 925 | * Prevents user tasks from achieving best priority | ||
| 926 | * with one single large enough sleep. | ||
| 927 | */ | ||
| 928 | p->sleep_avg = ceiling; | ||
| 929 | /* | ||
| 930 | * Using INTERACTIVE_SLEEP() as a ceiling places a | ||
| 931 | * nice(0) task 1ms sleep away from promotion, and | ||
| 932 | * gives it 700ms to round-robin with no chance of | ||
| 933 | * being demoted. This is more than generous, so | ||
| 934 | * mark this sleep as non-interactive to prevent the | ||
| 935 | * on-runqueue bonus logic from intervening should | ||
| 936 | * this task not receive cpu immediately. | ||
| 937 | */ | ||
| 938 | p->sleep_type = SLEEP_NONINTERACTIVE; | ||
| 939 | } else { | ||
| 940 | /* | ||
| 941 | * Tasks waking from uninterruptible sleep are | ||
| 942 | * limited in their sleep_avg rise as they | ||
| 943 | * are likely to be waiting on I/O | ||
| 944 | */ | ||
| 945 | if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) { | ||
| 946 | if (p->sleep_avg >= ceiling) | ||
| 947 | sleep_time = 0; | ||
| 948 | else if (p->sleep_avg + sleep_time >= | ||
| 949 | ceiling) { | ||
| 950 | p->sleep_avg = ceiling; | ||
| 951 | sleep_time = 0; | ||
| 952 | } | ||
| 953 | } | ||
| 954 | 907 | ||
| 955 | /* | 908 | if (p->state == TASK_UNINTERRUPTIBLE) |
| 956 | * This code gives a bonus to interactive tasks. | 909 | rq->nr_uninterruptible--; |
| 957 | * | ||
| 958 | * The boost works by updating the 'average sleep time' | ||
| 959 | * value here, based on ->timestamp. The more time a | ||
| 960 | * task spends sleeping, the higher the average gets - | ||
| 961 | * and the higher the priority boost gets as well. | ||
| 962 | */ | ||
| 963 | p->sleep_avg += sleep_time; | ||
| 964 | |||
| 965 | } | ||
| 966 | if (p->sleep_avg > NS_MAX_SLEEP_AVG) | ||
| 967 | p->sleep_avg = NS_MAX_SLEEP_AVG; | ||
| 968 | } | ||
| 969 | 910 | ||
| 970 | return effective_prio(p); | 911 | enqueue_task(rq, p, wakeup, now); |
| 912 | inc_nr_running(p, rq, now); | ||
| 971 | } | 913 | } |
| 972 | 914 | ||
| 973 | /* | 915 | /* |
| 974 | * activate_task - move a task to the runqueue and do priority recalculation | 916 | * activate_idle_task - move idle task to the _front_ of runqueue. |
| 975 | * | ||
| 976 | * Update all the scheduling statistics stuff. (sleep average | ||
| 977 | * calculation, priority modifiers, etc.) | ||
| 978 | */ | 917 | */ |
| 979 | static void activate_task(struct task_struct *p, struct rq *rq, int local) | 918 | static inline void activate_idle_task(struct task_struct *p, struct rq *rq) |
| 980 | { | 919 | { |
| 981 | unsigned long long now; | 920 | u64 now = rq_clock(rq); |
| 982 | 921 | ||
| 983 | if (rt_task(p)) | 922 | if (p->state == TASK_UNINTERRUPTIBLE) |
| 984 | goto out; | 923 | rq->nr_uninterruptible--; |
| 985 | |||
| 986 | now = sched_clock(); | ||
| 987 | #ifdef CONFIG_SMP | ||
| 988 | if (!local) { | ||
| 989 | /* Compensate for drifting sched_clock */ | ||
| 990 | struct rq *this_rq = this_rq(); | ||
| 991 | now = (now - this_rq->most_recent_timestamp) | ||
| 992 | + rq->most_recent_timestamp; | ||
| 993 | } | ||
| 994 | #endif | ||
| 995 | |||
| 996 | /* | ||
| 997 | * Sleep time is in units of nanosecs, so shift by 20 to get a | ||
| 998 | * milliseconds-range estimation of the amount of time that the task | ||
| 999 | * spent sleeping: | ||
| 1000 | */ | ||
| 1001 | if (unlikely(prof_on == SLEEP_PROFILING)) { | ||
| 1002 | if (p->state == TASK_UNINTERRUPTIBLE) | ||
| 1003 | profile_hits(SLEEP_PROFILING, (void *)get_wchan(p), | ||
| 1004 | (now - p->timestamp) >> 20); | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | p->prio = recalc_task_prio(p, now); | ||
| 1008 | 924 | ||
| 1009 | /* | 925 | enqueue_task(rq, p, 0, now); |
| 1010 | * This checks to make sure it's not an uninterruptible task | 926 | inc_nr_running(p, rq, now); |
| 1011 | * that is now waking up. | ||
| 1012 | */ | ||
| 1013 | if (p->sleep_type == SLEEP_NORMAL) { | ||
| 1014 | /* | ||
| 1015 | * Tasks which were woken up by interrupts (ie. hw events) | ||
| 1016 | * are most likely of interactive nature. So we give them | ||
| 1017 | * the credit of extending their sleep time to the period | ||
| 1018 | * of time they spend on the runqueue, waiting for execution | ||
| 1019 | * on a CPU, first time around: | ||
| 1020 | */ | ||
| 1021 | if (in_interrupt()) | ||
| 1022 | p->sleep_type = SLEEP_INTERRUPTED; | ||
| 1023 | else { | ||
| 1024 | /* | ||
| 1025 | * Normal first-time wakeups get a credit too for | ||
| 1026 | * on-runqueue time, but it will be weighted down: | ||
| 1027 | */ | ||
| 1028 | p->sleep_type = SLEEP_INTERACTIVE; | ||
| 1029 | } | ||
| 1030 | } | ||
| 1031 | p->timestamp = now; | ||
| 1032 | out: | ||
| 1033 | __activate_task(p, rq); | ||
| 1034 | } | 927 | } |
| 1035 | 928 | ||
| 1036 | /* | 929 | /* |
| 1037 | * deactivate_task - remove a task from the runqueue. | 930 | * deactivate_task - remove a task from the runqueue. |
| 1038 | */ | 931 | */ |
| 1039 | static void deactivate_task(struct task_struct *p, struct rq *rq) | 932 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) |
| 1040 | { | ||
| 1041 | dec_nr_running(p, rq); | ||
| 1042 | dequeue_task(p, p->array); | ||
| 1043 | p->array = NULL; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | /* | ||
| 1047 | * resched_task - mark a task 'to be rescheduled now'. | ||
| 1048 | * | ||
| 1049 | * On UP this means the setting of the need_resched flag, on SMP it | ||
| 1050 | * might also involve a cross-CPU call to trigger the scheduler on | ||
| 1051 | * the target CPU. | ||
| 1052 | */ | ||
| 1053 | #ifdef CONFIG_SMP | ||
| 1054 | |||
| 1055 | #ifndef tsk_is_polling | ||
| 1056 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | ||
| 1057 | #endif | ||
| 1058 | |||
| 1059 | static void resched_task(struct task_struct *p) | ||
| 1060 | { | 933 | { |
| 1061 | int cpu; | 934 | u64 now = rq_clock(rq); |
| 1062 | 935 | ||
| 1063 | assert_spin_locked(&task_rq(p)->lock); | 936 | if (p->state == TASK_UNINTERRUPTIBLE) |
| 937 | rq->nr_uninterruptible++; | ||
| 1064 | 938 | ||
| 1065 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) | 939 | dequeue_task(rq, p, sleep, now); |
| 1066 | return; | 940 | dec_nr_running(p, rq, now); |
| 1067 | |||
| 1068 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); | ||
| 1069 | |||
| 1070 | cpu = task_cpu(p); | ||
| 1071 | if (cpu == smp_processor_id()) | ||
| 1072 | return; | ||
| 1073 | |||
| 1074 | /* NEED_RESCHED must be visible before we test polling */ | ||
| 1075 | smp_mb(); | ||
| 1076 | if (!tsk_is_polling(p)) | ||
| 1077 | smp_send_reschedule(cpu); | ||
| 1078 | } | 941 | } |
| 1079 | 942 | ||
| 1080 | static void resched_cpu(int cpu) | ||
| 1081 | { | ||
| 1082 | struct rq *rq = cpu_rq(cpu); | ||
| 1083 | unsigned long flags; | ||
| 1084 | |||
| 1085 | if (!spin_trylock_irqsave(&rq->lock, flags)) | ||
| 1086 | return; | ||
| 1087 | resched_task(cpu_curr(cpu)); | ||
| 1088 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1089 | } | ||
| 1090 | #else | ||
| 1091 | static inline void resched_task(struct task_struct *p) | ||
| 1092 | { | ||
| 1093 | assert_spin_locked(&task_rq(p)->lock); | ||
| 1094 | set_tsk_need_resched(p); | ||
| 1095 | } | ||
| 1096 | #endif | ||
| 1097 | |||
| 1098 | /** | 943 | /** |
| 1099 | * task_curr - is this task currently executing on a CPU? | 944 | * task_curr - is this task currently executing on a CPU? |
| 1100 | * @p: the task in question. | 945 | * @p: the task in question. |
| @@ -1107,10 +952,42 @@ inline int task_curr(const struct task_struct *p) | |||
| 1107 | /* Used instead of source_load when we know the type == 0 */ | 952 | /* Used instead of source_load when we know the type == 0 */ |
| 1108 | unsigned long weighted_cpuload(const int cpu) | 953 | unsigned long weighted_cpuload(const int cpu) |
| 1109 | { | 954 | { |
| 1110 | return cpu_rq(cpu)->raw_weighted_load; | 955 | return cpu_rq(cpu)->ls.load.weight; |
| 956 | } | ||
| 957 | |||
| 958 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
| 959 | { | ||
| 960 | #ifdef CONFIG_SMP | ||
| 961 | task_thread_info(p)->cpu = cpu; | ||
| 962 | set_task_cfs_rq(p); | ||
| 963 | #endif | ||
| 1111 | } | 964 | } |
| 1112 | 965 | ||
| 1113 | #ifdef CONFIG_SMP | 966 | #ifdef CONFIG_SMP |
| 967 | |||
| 968 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | ||
| 969 | { | ||
| 970 | int old_cpu = task_cpu(p); | ||
| 971 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | ||
| 972 | u64 clock_offset, fair_clock_offset; | ||
| 973 | |||
| 974 | clock_offset = old_rq->clock - new_rq->clock; | ||
| 975 | fair_clock_offset = old_rq->cfs.fair_clock - | ||
| 976 | new_rq->cfs.fair_clock; | ||
| 977 | if (p->se.wait_start) | ||
| 978 | p->se.wait_start -= clock_offset; | ||
| 979 | if (p->se.wait_start_fair) | ||
| 980 | p->se.wait_start_fair -= fair_clock_offset; | ||
| 981 | if (p->se.sleep_start) | ||
| 982 | p->se.sleep_start -= clock_offset; | ||
| 983 | if (p->se.block_start) | ||
| 984 | p->se.block_start -= clock_offset; | ||
| 985 | if (p->se.sleep_start_fair) | ||
| 986 | p->se.sleep_start_fair -= fair_clock_offset; | ||
| 987 | |||
| 988 | __set_task_cpu(p, new_cpu); | ||
| 989 | } | ||
| 990 | |||
| 1114 | struct migration_req { | 991 | struct migration_req { |
| 1115 | struct list_head list; | 992 | struct list_head list; |
| 1116 | 993 | ||
| @@ -1133,7 +1010,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
| 1133 | * If the task is not on a runqueue (and not running), then | 1010 | * If the task is not on a runqueue (and not running), then |
| 1134 | * it is sufficient to simply update the task's cpu field. | 1011 | * it is sufficient to simply update the task's cpu field. |
| 1135 | */ | 1012 | */ |
| 1136 | if (!p->array && !task_running(rq, p)) { | 1013 | if (!p->se.on_rq && !task_running(rq, p)) { |
| 1137 | set_task_cpu(p, dest_cpu); | 1014 | set_task_cpu(p, dest_cpu); |
| 1138 | return 0; | 1015 | return 0; |
| 1139 | } | 1016 | } |
| @@ -1158,9 +1035,8 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
| 1158 | void wait_task_inactive(struct task_struct *p) | 1035 | void wait_task_inactive(struct task_struct *p) |
| 1159 | { | 1036 | { |
| 1160 | unsigned long flags; | 1037 | unsigned long flags; |
| 1038 | int running, on_rq; | ||
| 1161 | struct rq *rq; | 1039 | struct rq *rq; |
| 1162 | struct prio_array *array; | ||
| 1163 | int running; | ||
| 1164 | 1040 | ||
| 1165 | repeat: | 1041 | repeat: |
| 1166 | /* | 1042 | /* |
| @@ -1192,7 +1068,7 @@ repeat: | |||
| 1192 | */ | 1068 | */ |
| 1193 | rq = task_rq_lock(p, &flags); | 1069 | rq = task_rq_lock(p, &flags); |
| 1194 | running = task_running(rq, p); | 1070 | running = task_running(rq, p); |
| 1195 | array = p->array; | 1071 | on_rq = p->se.on_rq; |
| 1196 | task_rq_unlock(rq, &flags); | 1072 | task_rq_unlock(rq, &flags); |
| 1197 | 1073 | ||
| 1198 | /* | 1074 | /* |
| @@ -1215,7 +1091,7 @@ repeat: | |||
| 1215 | * running right now), it's preempted, and we should | 1091 | * running right now), it's preempted, and we should |
| 1216 | * yield - it could be a while. | 1092 | * yield - it could be a while. |
| 1217 | */ | 1093 | */ |
| 1218 | if (unlikely(array)) { | 1094 | if (unlikely(on_rq)) { |
| 1219 | yield(); | 1095 | yield(); |
| 1220 | goto repeat; | 1096 | goto repeat; |
| 1221 | } | 1097 | } |
| @@ -1261,11 +1137,12 @@ void kick_process(struct task_struct *p) | |||
| 1261 | static inline unsigned long source_load(int cpu, int type) | 1137 | static inline unsigned long source_load(int cpu, int type) |
| 1262 | { | 1138 | { |
| 1263 | struct rq *rq = cpu_rq(cpu); | 1139 | struct rq *rq = cpu_rq(cpu); |
| 1140 | unsigned long total = weighted_cpuload(cpu); | ||
| 1264 | 1141 | ||
| 1265 | if (type == 0) | 1142 | if (type == 0) |
| 1266 | return rq->raw_weighted_load; | 1143 | return total; |
| 1267 | 1144 | ||
| 1268 | return min(rq->cpu_load[type-1], rq->raw_weighted_load); | 1145 | return min(rq->cpu_load[type-1], total); |
| 1269 | } | 1146 | } |
| 1270 | 1147 | ||
| 1271 | /* | 1148 | /* |
| @@ -1275,11 +1152,12 @@ static inline unsigned long source_load(int cpu, int type) | |||
| 1275 | static inline unsigned long target_load(int cpu, int type) | 1152 | static inline unsigned long target_load(int cpu, int type) |
| 1276 | { | 1153 | { |
| 1277 | struct rq *rq = cpu_rq(cpu); | 1154 | struct rq *rq = cpu_rq(cpu); |
| 1155 | unsigned long total = weighted_cpuload(cpu); | ||
| 1278 | 1156 | ||
| 1279 | if (type == 0) | 1157 | if (type == 0) |
| 1280 | return rq->raw_weighted_load; | 1158 | return total; |
| 1281 | 1159 | ||
| 1282 | return max(rq->cpu_load[type-1], rq->raw_weighted_load); | 1160 | return max(rq->cpu_load[type-1], total); |
| 1283 | } | 1161 | } |
| 1284 | 1162 | ||
| 1285 | /* | 1163 | /* |
| @@ -1288,9 +1166,10 @@ static inline unsigned long target_load(int cpu, int type) | |||
| 1288 | static inline unsigned long cpu_avg_load_per_task(int cpu) | 1166 | static inline unsigned long cpu_avg_load_per_task(int cpu) |
| 1289 | { | 1167 | { |
| 1290 | struct rq *rq = cpu_rq(cpu); | 1168 | struct rq *rq = cpu_rq(cpu); |
| 1169 | unsigned long total = weighted_cpuload(cpu); | ||
| 1291 | unsigned long n = rq->nr_running; | 1170 | unsigned long n = rq->nr_running; |
| 1292 | 1171 | ||
| 1293 | return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; | 1172 | return n ? total / n : SCHED_LOAD_SCALE; |
| 1294 | } | 1173 | } |
| 1295 | 1174 | ||
| 1296 | /* | 1175 | /* |
| @@ -1392,9 +1271,9 @@ static int sched_balance_self(int cpu, int flag) | |||
| 1392 | struct sched_domain *tmp, *sd = NULL; | 1271 | struct sched_domain *tmp, *sd = NULL; |
| 1393 | 1272 | ||
| 1394 | for_each_domain(cpu, tmp) { | 1273 | for_each_domain(cpu, tmp) { |
| 1395 | /* | 1274 | /* |
| 1396 | * If power savings logic is enabled for a domain, stop there. | 1275 | * If power savings logic is enabled for a domain, stop there. |
| 1397 | */ | 1276 | */ |
| 1398 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | 1277 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) |
| 1399 | break; | 1278 | break; |
| 1400 | if (tmp->flags & flag) | 1279 | if (tmp->flags & flag) |
| @@ -1477,9 +1356,9 @@ static int wake_idle(int cpu, struct task_struct *p) | |||
| 1477 | if (idle_cpu(i)) | 1356 | if (idle_cpu(i)) |
| 1478 | return i; | 1357 | return i; |
| 1479 | } | 1358 | } |
| 1480 | } | 1359 | } else { |
| 1481 | else | ||
| 1482 | break; | 1360 | break; |
| 1361 | } | ||
| 1483 | } | 1362 | } |
| 1484 | return cpu; | 1363 | return cpu; |
| 1485 | } | 1364 | } |
| @@ -1521,7 +1400,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 1521 | if (!(old_state & state)) | 1400 | if (!(old_state & state)) |
| 1522 | goto out; | 1401 | goto out; |
| 1523 | 1402 | ||
| 1524 | if (p->array) | 1403 | if (p->se.on_rq) |
| 1525 | goto out_running; | 1404 | goto out_running; |
| 1526 | 1405 | ||
| 1527 | cpu = task_cpu(p); | 1406 | cpu = task_cpu(p); |
| @@ -1576,11 +1455,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 1576 | * of the current CPU: | 1455 | * of the current CPU: |
| 1577 | */ | 1456 | */ |
| 1578 | if (sync) | 1457 | if (sync) |
| 1579 | tl -= current->load_weight; | 1458 | tl -= current->se.load.weight; |
| 1580 | 1459 | ||
| 1581 | if ((tl <= load && | 1460 | if ((tl <= load && |
| 1582 | tl + target_load(cpu, idx) <= tl_per_task) || | 1461 | tl + target_load(cpu, idx) <= tl_per_task) || |
| 1583 | 100*(tl + p->load_weight) <= imbalance*load) { | 1462 | 100*(tl + p->se.load.weight) <= imbalance*load) { |
| 1584 | /* | 1463 | /* |
| 1585 | * This domain has SD_WAKE_AFFINE and | 1464 | * This domain has SD_WAKE_AFFINE and |
| 1586 | * p is cache cold in this domain, and | 1465 | * p is cache cold in this domain, and |
| @@ -1614,7 +1493,7 @@ out_set_cpu: | |||
| 1614 | old_state = p->state; | 1493 | old_state = p->state; |
| 1615 | if (!(old_state & state)) | 1494 | if (!(old_state & state)) |
| 1616 | goto out; | 1495 | goto out; |
| 1617 | if (p->array) | 1496 | if (p->se.on_rq) |
| 1618 | goto out_running; | 1497 | goto out_running; |
| 1619 | 1498 | ||
| 1620 | this_cpu = smp_processor_id(); | 1499 | this_cpu = smp_processor_id(); |
| @@ -1623,25 +1502,7 @@ out_set_cpu: | |||
| 1623 | 1502 | ||
| 1624 | out_activate: | 1503 | out_activate: |
| 1625 | #endif /* CONFIG_SMP */ | 1504 | #endif /* CONFIG_SMP */ |
| 1626 | if (old_state == TASK_UNINTERRUPTIBLE) { | 1505 | activate_task(rq, p, 1); |
| 1627 | rq->nr_uninterruptible--; | ||
| 1628 | /* | ||
| 1629 | * Tasks on involuntary sleep don't earn | ||
| 1630 | * sleep_avg beyond just interactive state. | ||
| 1631 | */ | ||
| 1632 | p->sleep_type = SLEEP_NONINTERACTIVE; | ||
| 1633 | } else | ||
| 1634 | |||
| 1635 | /* | ||
| 1636 | * Tasks that have marked their sleep as noninteractive get | ||
| 1637 | * woken up with their sleep average not weighted in an | ||
| 1638 | * interactive way. | ||
| 1639 | */ | ||
| 1640 | if (old_state & TASK_NONINTERACTIVE) | ||
| 1641 | p->sleep_type = SLEEP_NONINTERACTIVE; | ||
| 1642 | |||
| 1643 | |||
| 1644 | activate_task(p, rq, cpu == this_cpu); | ||
| 1645 | /* | 1506 | /* |
| 1646 | * Sync wakeups (i.e. those types of wakeups where the waker | 1507 | * Sync wakeups (i.e. those types of wakeups where the waker |
| 1647 | * has indicated that it will leave the CPU in short order) | 1508 | * has indicated that it will leave the CPU in short order) |
| @@ -1650,10 +1511,8 @@ out_activate: | |||
| 1650 | * the waker guarantees that the freshly woken up task is going | 1511 | * the waker guarantees that the freshly woken up task is going |
| 1651 | * to be considered on this CPU.) | 1512 | * to be considered on this CPU.) |
| 1652 | */ | 1513 | */ |
| 1653 | if (!sync || cpu != this_cpu) { | 1514 | if (!sync || cpu != this_cpu) |
| 1654 | if (TASK_PREEMPTS_CURR(p, rq)) | 1515 | check_preempt_curr(rq, p); |
| 1655 | resched_task(rq->curr); | ||
| 1656 | } | ||
| 1657 | success = 1; | 1516 | success = 1; |
| 1658 | 1517 | ||
| 1659 | out_running: | 1518 | out_running: |
| @@ -1676,19 +1535,36 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state) | |||
| 1676 | return try_to_wake_up(p, state, 0); | 1535 | return try_to_wake_up(p, state, 0); |
| 1677 | } | 1536 | } |
| 1678 | 1537 | ||
| 1679 | static void task_running_tick(struct rq *rq, struct task_struct *p); | ||
| 1680 | /* | 1538 | /* |
| 1681 | * Perform scheduler related setup for a newly forked process p. | 1539 | * Perform scheduler related setup for a newly forked process p. |
| 1682 | * p is forked by current. | 1540 | * p is forked by current. |
| 1683 | */ | 1541 | * |
| 1684 | void fastcall sched_fork(struct task_struct *p, int clone_flags) | 1542 | * __sched_fork() is basic setup used by init_idle() too: |
| 1685 | { | 1543 | */ |
| 1686 | int cpu = get_cpu(); | 1544 | static void __sched_fork(struct task_struct *p) |
| 1545 | { | ||
| 1546 | p->se.wait_start_fair = 0; | ||
| 1547 | p->se.wait_start = 0; | ||
| 1548 | p->se.exec_start = 0; | ||
| 1549 | p->se.sum_exec_runtime = 0; | ||
| 1550 | p->se.delta_exec = 0; | ||
| 1551 | p->se.delta_fair_run = 0; | ||
| 1552 | p->se.delta_fair_sleep = 0; | ||
| 1553 | p->se.wait_runtime = 0; | ||
| 1554 | p->se.sum_wait_runtime = 0; | ||
| 1555 | p->se.sum_sleep_runtime = 0; | ||
| 1556 | p->se.sleep_start = 0; | ||
| 1557 | p->se.sleep_start_fair = 0; | ||
| 1558 | p->se.block_start = 0; | ||
| 1559 | p->se.sleep_max = 0; | ||
| 1560 | p->se.block_max = 0; | ||
| 1561 | p->se.exec_max = 0; | ||
| 1562 | p->se.wait_max = 0; | ||
| 1563 | p->se.wait_runtime_overruns = 0; | ||
| 1564 | p->se.wait_runtime_underruns = 0; | ||
| 1687 | 1565 | ||
| 1688 | #ifdef CONFIG_SMP | 1566 | INIT_LIST_HEAD(&p->run_list); |
| 1689 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | 1567 | p->se.on_rq = 0; |
| 1690 | #endif | ||
| 1691 | set_task_cpu(p, cpu); | ||
| 1692 | 1568 | ||
| 1693 | /* | 1569 | /* |
| 1694 | * We mark the process as running here, but have not actually | 1570 | * We mark the process as running here, but have not actually |
| @@ -1697,16 +1573,29 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
| 1697 | * event cannot wake it up and insert it on the runqueue either. | 1573 | * event cannot wake it up and insert it on the runqueue either. |
| 1698 | */ | 1574 | */ |
| 1699 | p->state = TASK_RUNNING; | 1575 | p->state = TASK_RUNNING; |
| 1576 | } | ||
| 1577 | |||
| 1578 | /* | ||
| 1579 | * fork()/clone()-time setup: | ||
| 1580 | */ | ||
| 1581 | void sched_fork(struct task_struct *p, int clone_flags) | ||
| 1582 | { | ||
| 1583 | int cpu = get_cpu(); | ||
| 1584 | |||
| 1585 | __sched_fork(p); | ||
| 1586 | |||
| 1587 | #ifdef CONFIG_SMP | ||
| 1588 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
| 1589 | #endif | ||
| 1590 | __set_task_cpu(p, cpu); | ||
| 1700 | 1591 | ||
| 1701 | /* | 1592 | /* |
| 1702 | * Make sure we do not leak PI boosting priority to the child: | 1593 | * Make sure we do not leak PI boosting priority to the child: |
| 1703 | */ | 1594 | */ |
| 1704 | p->prio = current->normal_prio; | 1595 | p->prio = current->normal_prio; |
| 1705 | 1596 | ||
| 1706 | INIT_LIST_HEAD(&p->run_list); | ||
| 1707 | p->array = NULL; | ||
| 1708 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1597 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 1709 | if (unlikely(sched_info_on())) | 1598 | if (likely(sched_info_on())) |
| 1710 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1599 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
| 1711 | #endif | 1600 | #endif |
| 1712 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 1601 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
| @@ -1716,34 +1605,16 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
| 1716 | /* Want to start with kernel preemption disabled. */ | 1605 | /* Want to start with kernel preemption disabled. */ |
| 1717 | task_thread_info(p)->preempt_count = 1; | 1606 | task_thread_info(p)->preempt_count = 1; |
| 1718 | #endif | 1607 | #endif |
| 1719 | /* | ||
| 1720 | * Share the timeslice between parent and child, thus the | ||
| 1721 | * total amount of pending timeslices in the system doesn't change, | ||
| 1722 | * resulting in more scheduling fairness. | ||
| 1723 | */ | ||
| 1724 | local_irq_disable(); | ||
| 1725 | p->time_slice = (current->time_slice + 1) >> 1; | ||
| 1726 | /* | ||
| 1727 | * The remainder of the first timeslice might be recovered by | ||
| 1728 | * the parent if the child exits early enough. | ||
| 1729 | */ | ||
| 1730 | p->first_time_slice = 1; | ||
| 1731 | current->time_slice >>= 1; | ||
| 1732 | p->timestamp = sched_clock(); | ||
| 1733 | if (unlikely(!current->time_slice)) { | ||
| 1734 | /* | ||
| 1735 | * This case is rare, it happens when the parent has only | ||
| 1736 | * a single jiffy left from its timeslice. Taking the | ||
| 1737 | * runqueue lock is not a problem. | ||
| 1738 | */ | ||
| 1739 | current->time_slice = 1; | ||
| 1740 | task_running_tick(cpu_rq(cpu), current); | ||
| 1741 | } | ||
| 1742 | local_irq_enable(); | ||
| 1743 | put_cpu(); | 1608 | put_cpu(); |
| 1744 | } | 1609 | } |
| 1745 | 1610 | ||
| 1746 | /* | 1611 | /* |
| 1612 | * After fork, child runs first. (default) If set to 0 then | ||
| 1613 | * parent will (try to) run first. | ||
| 1614 | */ | ||
| 1615 | unsigned int __read_mostly sysctl_sched_child_runs_first = 1; | ||
| 1616 | |||
| 1617 | /* | ||
| 1747 | * wake_up_new_task - wake up a newly created task for the first time. | 1618 | * wake_up_new_task - wake up a newly created task for the first time. |
| 1748 | * | 1619 | * |
| 1749 | * This function will do some initial scheduler statistics housekeeping | 1620 | * This function will do some initial scheduler statistics housekeeping |
| @@ -1752,107 +1623,27 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
| 1752 | */ | 1623 | */ |
| 1753 | void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | 1624 | void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) |
| 1754 | { | 1625 | { |
| 1755 | struct rq *rq, *this_rq; | ||
| 1756 | unsigned long flags; | 1626 | unsigned long flags; |
| 1757 | int this_cpu, cpu; | 1627 | struct rq *rq; |
| 1628 | int this_cpu; | ||
| 1758 | 1629 | ||
| 1759 | rq = task_rq_lock(p, &flags); | 1630 | rq = task_rq_lock(p, &flags); |
| 1760 | BUG_ON(p->state != TASK_RUNNING); | 1631 | BUG_ON(p->state != TASK_RUNNING); |
| 1761 | this_cpu = smp_processor_id(); | 1632 | this_cpu = smp_processor_id(); /* parent's CPU */ |
| 1762 | cpu = task_cpu(p); | ||
| 1763 | |||
| 1764 | /* | ||
| 1765 | * We decrease the sleep average of forking parents | ||
| 1766 | * and children as well, to keep max-interactive tasks | ||
| 1767 | * from forking tasks that are max-interactive. The parent | ||
| 1768 | * (current) is done further down, under its lock. | ||
| 1769 | */ | ||
| 1770 | p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) * | ||
| 1771 | CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); | ||
| 1772 | 1633 | ||
| 1773 | p->prio = effective_prio(p); | 1634 | p->prio = effective_prio(p); |
| 1774 | 1635 | ||
| 1775 | if (likely(cpu == this_cpu)) { | 1636 | if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || |
| 1776 | if (!(clone_flags & CLONE_VM)) { | 1637 | task_cpu(p) != this_cpu || !current->se.on_rq) { |
| 1777 | /* | 1638 | activate_task(rq, p, 0); |
| 1778 | * The VM isn't cloned, so we're in a good position to | ||
| 1779 | * do child-runs-first in anticipation of an exec. This | ||
| 1780 | * usually avoids a lot of COW overhead. | ||
| 1781 | */ | ||
| 1782 | if (unlikely(!current->array)) | ||
| 1783 | __activate_task(p, rq); | ||
| 1784 | else { | ||
| 1785 | p->prio = current->prio; | ||
| 1786 | p->normal_prio = current->normal_prio; | ||
| 1787 | list_add_tail(&p->run_list, ¤t->run_list); | ||
| 1788 | p->array = current->array; | ||
| 1789 | p->array->nr_active++; | ||
| 1790 | inc_nr_running(p, rq); | ||
| 1791 | } | ||
| 1792 | set_need_resched(); | ||
| 1793 | } else | ||
| 1794 | /* Run child last */ | ||
| 1795 | __activate_task(p, rq); | ||
| 1796 | /* | ||
| 1797 | * We skip the following code due to cpu == this_cpu | ||
| 1798 | * | ||
| 1799 | * task_rq_unlock(rq, &flags); | ||
| 1800 | * this_rq = task_rq_lock(current, &flags); | ||
| 1801 | */ | ||
| 1802 | this_rq = rq; | ||
| 1803 | } else { | 1639 | } else { |
| 1804 | this_rq = cpu_rq(this_cpu); | ||
| 1805 | |||
| 1806 | /* | 1640 | /* |
| 1807 | * Not the local CPU - must adjust timestamp. This should | 1641 | * Let the scheduling class do new task startup |
| 1808 | * get optimised away in the !CONFIG_SMP case. | 1642 | * management (if any): |
| 1809 | */ | 1643 | */ |
| 1810 | p->timestamp = (p->timestamp - this_rq->most_recent_timestamp) | 1644 | p->sched_class->task_new(rq, p); |
| 1811 | + rq->most_recent_timestamp; | ||
| 1812 | __activate_task(p, rq); | ||
| 1813 | if (TASK_PREEMPTS_CURR(p, rq)) | ||
| 1814 | resched_task(rq->curr); | ||
| 1815 | |||
| 1816 | /* | ||
| 1817 | * Parent and child are on different CPUs, now get the | ||
| 1818 | * parent runqueue to update the parent's ->sleep_avg: | ||
| 1819 | */ | ||
| 1820 | task_rq_unlock(rq, &flags); | ||
| 1821 | this_rq = task_rq_lock(current, &flags); | ||
| 1822 | } | ||
| 1823 | current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) * | ||
| 1824 | PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); | ||
| 1825 | task_rq_unlock(this_rq, &flags); | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | /* | ||
| 1829 | * Potentially available exiting-child timeslices are | ||
| 1830 | * retrieved here - this way the parent does not get | ||
| 1831 | * penalized for creating too many threads. | ||
| 1832 | * | ||
| 1833 | * (this cannot be used to 'generate' timeslices | ||
| 1834 | * artificially, because any timeslice recovered here | ||
| 1835 | * was given away by the parent in the first place.) | ||
| 1836 | */ | ||
| 1837 | void fastcall sched_exit(struct task_struct *p) | ||
| 1838 | { | ||
| 1839 | unsigned long flags; | ||
| 1840 | struct rq *rq; | ||
| 1841 | |||
| 1842 | /* | ||
| 1843 | * If the child was a (relative-) CPU hog then decrease | ||
| 1844 | * the sleep_avg of the parent as well. | ||
| 1845 | */ | ||
| 1846 | rq = task_rq_lock(p->parent, &flags); | ||
| 1847 | if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) { | ||
| 1848 | p->parent->time_slice += p->time_slice; | ||
| 1849 | if (unlikely(p->parent->time_slice > task_timeslice(p))) | ||
| 1850 | p->parent->time_slice = task_timeslice(p); | ||
| 1851 | } | 1645 | } |
| 1852 | if (p->sleep_avg < p->parent->sleep_avg) | 1646 | check_preempt_curr(rq, p); |
| 1853 | p->parent->sleep_avg = p->parent->sleep_avg / | ||
| 1854 | (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg / | ||
| 1855 | (EXIT_WEIGHT + 1); | ||
| 1856 | task_rq_unlock(rq, &flags); | 1647 | task_rq_unlock(rq, &flags); |
| 1857 | } | 1648 | } |
| 1858 | 1649 | ||
| @@ -1917,7 +1708,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 1917 | /* | 1708 | /* |
| 1918 | * Remove function-return probe instances associated with this | 1709 | * Remove function-return probe instances associated with this |
| 1919 | * task and put them back on the free list. | 1710 | * task and put them back on the free list. |
| 1920 | */ | 1711 | */ |
| 1921 | kprobe_flush_task(prev); | 1712 | kprobe_flush_task(prev); |
| 1922 | put_task_struct(prev); | 1713 | put_task_struct(prev); |
| 1923 | } | 1714 | } |
| @@ -1945,13 +1736,15 @@ asmlinkage void schedule_tail(struct task_struct *prev) | |||
| 1945 | * context_switch - switch to the new MM and the new | 1736 | * context_switch - switch to the new MM and the new |
| 1946 | * thread's register state. | 1737 | * thread's register state. |
| 1947 | */ | 1738 | */ |
| 1948 | static inline struct task_struct * | 1739 | static inline void |
| 1949 | context_switch(struct rq *rq, struct task_struct *prev, | 1740 | context_switch(struct rq *rq, struct task_struct *prev, |
| 1950 | struct task_struct *next) | 1741 | struct task_struct *next) |
| 1951 | { | 1742 | { |
| 1952 | struct mm_struct *mm = next->mm; | 1743 | struct mm_struct *mm, *oldmm; |
| 1953 | struct mm_struct *oldmm = prev->active_mm; | ||
| 1954 | 1744 | ||
| 1745 | prepare_task_switch(rq, next); | ||
| 1746 | mm = next->mm; | ||
| 1747 | oldmm = prev->active_mm; | ||
| 1955 | /* | 1748 | /* |
| 1956 | * For paravirt, this is coupled with an exit in switch_to to | 1749 | * For paravirt, this is coupled with an exit in switch_to to |
| 1957 | * combine the page table reload and the switch backend into | 1750 | * combine the page table reload and the switch backend into |
| @@ -1959,16 +1752,15 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 1959 | */ | 1752 | */ |
| 1960 | arch_enter_lazy_cpu_mode(); | 1753 | arch_enter_lazy_cpu_mode(); |
| 1961 | 1754 | ||
| 1962 | if (!mm) { | 1755 | if (unlikely(!mm)) { |
| 1963 | next->active_mm = oldmm; | 1756 | next->active_mm = oldmm; |
| 1964 | atomic_inc(&oldmm->mm_count); | 1757 | atomic_inc(&oldmm->mm_count); |
| 1965 | enter_lazy_tlb(oldmm, next); | 1758 | enter_lazy_tlb(oldmm, next); |
| 1966 | } else | 1759 | } else |
| 1967 | switch_mm(oldmm, mm, next); | 1760 | switch_mm(oldmm, mm, next); |
| 1968 | 1761 | ||
| 1969 | if (!prev->mm) { | 1762 | if (unlikely(!prev->mm)) { |
| 1970 | prev->active_mm = NULL; | 1763 | prev->active_mm = NULL; |
| 1971 | WARN_ON(rq->prev_mm); | ||
| 1972 | rq->prev_mm = oldmm; | 1764 | rq->prev_mm = oldmm; |
| 1973 | } | 1765 | } |
| 1974 | /* | 1766 | /* |
| @@ -1984,7 +1776,13 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 1984 | /* Here we just switch the register state and the stack. */ | 1776 | /* Here we just switch the register state and the stack. */ |
| 1985 | switch_to(prev, next, prev); | 1777 | switch_to(prev, next, prev); |
| 1986 | 1778 | ||
| 1987 | return prev; | 1779 | barrier(); |
| 1780 | /* | ||
| 1781 | * this_rq must be evaluated again because prev may have moved | ||
| 1782 | * CPUs since it called schedule(), thus the 'rq' on its stack | ||
| 1783 | * frame will be invalid. | ||
| 1784 | */ | ||
| 1785 | finish_task_switch(this_rq(), prev); | ||
| 1988 | } | 1786 | } |
| 1989 | 1787 | ||
| 1990 | /* | 1788 | /* |
| @@ -2057,17 +1855,65 @@ unsigned long nr_active(void) | |||
| 2057 | return running + uninterruptible; | 1855 | return running + uninterruptible; |
| 2058 | } | 1856 | } |
| 2059 | 1857 | ||
| 2060 | #ifdef CONFIG_SMP | ||
| 2061 | |||
| 2062 | /* | 1858 | /* |
| 2063 | * Is this task likely cache-hot: | 1859 | * Update rq->cpu_load[] statistics. This function is usually called every |
| 1860 | * scheduler tick (TICK_NSEC). | ||
| 2064 | */ | 1861 | */ |
| 2065 | static inline int | 1862 | static void update_cpu_load(struct rq *this_rq) |
| 2066 | task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd) | ||
| 2067 | { | 1863 | { |
| 2068 | return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time; | 1864 | u64 fair_delta64, exec_delta64, idle_delta64, sample_interval64, tmp64; |
| 1865 | unsigned long total_load = this_rq->ls.load.weight; | ||
| 1866 | unsigned long this_load = total_load; | ||
| 1867 | struct load_stat *ls = &this_rq->ls; | ||
| 1868 | u64 now = __rq_clock(this_rq); | ||
| 1869 | int i, scale; | ||
| 1870 | |||
| 1871 | this_rq->nr_load_updates++; | ||
| 1872 | if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD))) | ||
| 1873 | goto do_avg; | ||
| 1874 | |||
| 1875 | /* Update delta_fair/delta_exec fields first */ | ||
| 1876 | update_curr_load(this_rq, now); | ||
| 1877 | |||
| 1878 | fair_delta64 = ls->delta_fair + 1; | ||
| 1879 | ls->delta_fair = 0; | ||
| 1880 | |||
| 1881 | exec_delta64 = ls->delta_exec + 1; | ||
| 1882 | ls->delta_exec = 0; | ||
| 1883 | |||
| 1884 | sample_interval64 = now - ls->load_update_last; | ||
| 1885 | ls->load_update_last = now; | ||
| 1886 | |||
| 1887 | if ((s64)sample_interval64 < (s64)TICK_NSEC) | ||
| 1888 | sample_interval64 = TICK_NSEC; | ||
| 1889 | |||
| 1890 | if (exec_delta64 > sample_interval64) | ||
| 1891 | exec_delta64 = sample_interval64; | ||
| 1892 | |||
| 1893 | idle_delta64 = sample_interval64 - exec_delta64; | ||
| 1894 | |||
| 1895 | tmp64 = div64_64(SCHED_LOAD_SCALE * exec_delta64, fair_delta64); | ||
| 1896 | tmp64 = div64_64(tmp64 * exec_delta64, sample_interval64); | ||
| 1897 | |||
| 1898 | this_load = (unsigned long)tmp64; | ||
| 1899 | |||
| 1900 | do_avg: | ||
| 1901 | |||
| 1902 | /* Update our load: */ | ||
| 1903 | for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
| 1904 | unsigned long old_load, new_load; | ||
| 1905 | |||
| 1906 | /* scale is effectively 1 << i now, and >> i divides by scale */ | ||
| 1907 | |||
| 1908 | old_load = this_rq->cpu_load[i]; | ||
| 1909 | new_load = this_load; | ||
| 1910 | |||
| 1911 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | ||
| 1912 | } | ||
| 2069 | } | 1913 | } |
| 2070 | 1914 | ||
| 1915 | #ifdef CONFIG_SMP | ||
| 1916 | |||
| 2071 | /* | 1917 | /* |
| 2072 | * double_rq_lock - safely lock two runqueues | 1918 | * double_rq_lock - safely lock two runqueues |
| 2073 | * | 1919 | * |
| @@ -2184,23 +2030,17 @@ void sched_exec(void) | |||
| 2184 | * pull_task - move a task from a remote runqueue to the local runqueue. | 2030 | * pull_task - move a task from a remote runqueue to the local runqueue. |
| 2185 | * Both runqueues must be locked. | 2031 | * Both runqueues must be locked. |
| 2186 | */ | 2032 | */ |
| 2187 | static void pull_task(struct rq *src_rq, struct prio_array *src_array, | 2033 | static void pull_task(struct rq *src_rq, struct task_struct *p, |
| 2188 | struct task_struct *p, struct rq *this_rq, | 2034 | struct rq *this_rq, int this_cpu) |
| 2189 | struct prio_array *this_array, int this_cpu) | ||
| 2190 | { | 2035 | { |
| 2191 | dequeue_task(p, src_array); | 2036 | deactivate_task(src_rq, p, 0); |
| 2192 | dec_nr_running(p, src_rq); | ||
| 2193 | set_task_cpu(p, this_cpu); | 2037 | set_task_cpu(p, this_cpu); |
| 2194 | inc_nr_running(p, this_rq); | 2038 | activate_task(this_rq, p, 0); |
| 2195 | enqueue_task(p, this_array); | ||
| 2196 | p->timestamp = (p->timestamp - src_rq->most_recent_timestamp) | ||
| 2197 | + this_rq->most_recent_timestamp; | ||
| 2198 | /* | 2039 | /* |
| 2199 | * Note that idle threads have a prio of MAX_PRIO, for this test | 2040 | * Note that idle threads have a prio of MAX_PRIO, for this test |
| 2200 | * to be always true for them. | 2041 | * to be always true for them. |
| 2201 | */ | 2042 | */ |
| 2202 | if (TASK_PREEMPTS_CURR(p, this_rq)) | 2043 | check_preempt_curr(this_rq, p); |
| 2203 | resched_task(this_rq->curr); | ||
| 2204 | } | 2044 | } |
| 2205 | 2045 | ||
| 2206 | /* | 2046 | /* |
| @@ -2208,7 +2048,7 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array, | |||
| 2208 | */ | 2048 | */ |
| 2209 | static | 2049 | static |
| 2210 | int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | 2050 | int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, |
| 2211 | struct sched_domain *sd, enum idle_type idle, | 2051 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 2212 | int *all_pinned) | 2052 | int *all_pinned) |
| 2213 | { | 2053 | { |
| 2214 | /* | 2054 | /* |
| @@ -2225,132 +2065,67 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 2225 | return 0; | 2065 | return 0; |
| 2226 | 2066 | ||
| 2227 | /* | 2067 | /* |
| 2228 | * Aggressive migration if: | 2068 | * Aggressive migration if too many balance attempts have failed: |
| 2229 | * 1) task is cache cold, or | ||
| 2230 | * 2) too many balance attempts have failed. | ||
| 2231 | */ | 2069 | */ |
| 2232 | 2070 | if (sd->nr_balance_failed > sd->cache_nice_tries) | |
| 2233 | if (sd->nr_balance_failed > sd->cache_nice_tries) { | ||
| 2234 | #ifdef CONFIG_SCHEDSTATS | ||
| 2235 | if (task_hot(p, rq->most_recent_timestamp, sd)) | ||
| 2236 | schedstat_inc(sd, lb_hot_gained[idle]); | ||
| 2237 | #endif | ||
| 2238 | return 1; | 2071 | return 1; |
| 2239 | } | ||
| 2240 | 2072 | ||
| 2241 | if (task_hot(p, rq->most_recent_timestamp, sd)) | ||
| 2242 | return 0; | ||
| 2243 | return 1; | 2073 | return 1; |
| 2244 | } | 2074 | } |
| 2245 | 2075 | ||
| 2246 | #define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) | 2076 | static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, |
| 2247 | |||
| 2248 | /* | ||
| 2249 | * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted | ||
| 2250 | * load from busiest to this_rq, as part of a balancing operation within | ||
| 2251 | * "domain". Returns the number of tasks moved. | ||
| 2252 | * | ||
| 2253 | * Called with both runqueues locked. | ||
| 2254 | */ | ||
| 2255 | static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 2256 | unsigned long max_nr_move, unsigned long max_load_move, | 2077 | unsigned long max_nr_move, unsigned long max_load_move, |
| 2257 | struct sched_domain *sd, enum idle_type idle, | 2078 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 2258 | int *all_pinned) | 2079 | int *all_pinned, unsigned long *load_moved, |
| 2080 | int this_best_prio, int best_prio, int best_prio_seen, | ||
| 2081 | struct rq_iterator *iterator) | ||
| 2259 | { | 2082 | { |
| 2260 | int idx, pulled = 0, pinned = 0, this_best_prio, best_prio, | 2083 | int pulled = 0, pinned = 0, skip_for_load; |
| 2261 | best_prio_seen, skip_for_load; | 2084 | struct task_struct *p; |
| 2262 | struct prio_array *array, *dst_array; | 2085 | long rem_load_move = max_load_move; |
| 2263 | struct list_head *head, *curr; | ||
| 2264 | struct task_struct *tmp; | ||
| 2265 | long rem_load_move; | ||
| 2266 | 2086 | ||
| 2267 | if (max_nr_move == 0 || max_load_move == 0) | 2087 | if (max_nr_move == 0 || max_load_move == 0) |
| 2268 | goto out; | 2088 | goto out; |
| 2269 | 2089 | ||
| 2270 | rem_load_move = max_load_move; | ||
| 2271 | pinned = 1; | 2090 | pinned = 1; |
| 2272 | this_best_prio = rq_best_prio(this_rq); | ||
| 2273 | best_prio = rq_best_prio(busiest); | ||
| 2274 | /* | ||
| 2275 | * Enable handling of the case where there is more than one task | ||
| 2276 | * with the best priority. If the current running task is one | ||
| 2277 | * of those with prio==best_prio we know it won't be moved | ||
| 2278 | * and therefore it's safe to override the skip (based on load) of | ||
| 2279 | * any task we find with that prio. | ||
| 2280 | */ | ||
| 2281 | best_prio_seen = best_prio == busiest->curr->prio; | ||
| 2282 | 2091 | ||
| 2283 | /* | 2092 | /* |
| 2284 | * We first consider expired tasks. Those will likely not be | 2093 | * Start the load-balancing iterator: |
| 2285 | * executed in the near future, and they are most likely to | ||
| 2286 | * be cache-cold, thus switching CPUs has the least effect | ||
| 2287 | * on them. | ||
| 2288 | */ | 2094 | */ |
| 2289 | if (busiest->expired->nr_active) { | 2095 | p = iterator->start(iterator->arg); |
| 2290 | array = busiest->expired; | 2096 | next: |
| 2291 | dst_array = this_rq->expired; | 2097 | if (!p) |
| 2292 | } else { | ||
| 2293 | array = busiest->active; | ||
| 2294 | dst_array = this_rq->active; | ||
| 2295 | } | ||
| 2296 | |||
| 2297 | new_array: | ||
| 2298 | /* Start searching at priority 0: */ | ||
| 2299 | idx = 0; | ||
| 2300 | skip_bitmap: | ||
| 2301 | if (!idx) | ||
| 2302 | idx = sched_find_first_bit(array->bitmap); | ||
| 2303 | else | ||
| 2304 | idx = find_next_bit(array->bitmap, MAX_PRIO, idx); | ||
| 2305 | if (idx >= MAX_PRIO) { | ||
| 2306 | if (array == busiest->expired && busiest->active->nr_active) { | ||
| 2307 | array = busiest->active; | ||
| 2308 | dst_array = this_rq->active; | ||
| 2309 | goto new_array; | ||
| 2310 | } | ||
| 2311 | goto out; | 2098 | goto out; |
| 2312 | } | ||
| 2313 | |||
| 2314 | head = array->queue + idx; | ||
| 2315 | curr = head->prev; | ||
| 2316 | skip_queue: | ||
| 2317 | tmp = list_entry(curr, struct task_struct, run_list); | ||
| 2318 | |||
| 2319 | curr = curr->prev; | ||
| 2320 | |||
| 2321 | /* | 2099 | /* |
| 2322 | * To help distribute high priority tasks accross CPUs we don't | 2100 | * To help distribute high priority tasks accross CPUs we don't |
| 2323 | * skip a task if it will be the highest priority task (i.e. smallest | 2101 | * skip a task if it will be the highest priority task (i.e. smallest |
| 2324 | * prio value) on its new queue regardless of its load weight | 2102 | * prio value) on its new queue regardless of its load weight |
| 2325 | */ | 2103 | */ |
| 2326 | skip_for_load = tmp->load_weight > rem_load_move; | 2104 | skip_for_load = (p->se.load.weight >> 1) > rem_load_move + |
| 2327 | if (skip_for_load && idx < this_best_prio) | 2105 | SCHED_LOAD_SCALE_FUZZ; |
| 2328 | skip_for_load = !best_prio_seen && idx == best_prio; | 2106 | if (skip_for_load && p->prio < this_best_prio) |
| 2107 | skip_for_load = !best_prio_seen && p->prio == best_prio; | ||
| 2329 | if (skip_for_load || | 2108 | if (skip_for_load || |
| 2330 | !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { | 2109 | !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { |
| 2331 | 2110 | ||
| 2332 | best_prio_seen |= idx == best_prio; | 2111 | best_prio_seen |= p->prio == best_prio; |
| 2333 | if (curr != head) | 2112 | p = iterator->next(iterator->arg); |
| 2334 | goto skip_queue; | 2113 | goto next; |
| 2335 | idx++; | ||
| 2336 | goto skip_bitmap; | ||
| 2337 | } | 2114 | } |
| 2338 | 2115 | ||
| 2339 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); | 2116 | pull_task(busiest, p, this_rq, this_cpu); |
| 2340 | pulled++; | 2117 | pulled++; |
| 2341 | rem_load_move -= tmp->load_weight; | 2118 | rem_load_move -= p->se.load.weight; |
| 2342 | 2119 | ||
| 2343 | /* | 2120 | /* |
| 2344 | * We only want to steal up to the prescribed number of tasks | 2121 | * We only want to steal up to the prescribed number of tasks |
| 2345 | * and the prescribed amount of weighted load. | 2122 | * and the prescribed amount of weighted load. |
| 2346 | */ | 2123 | */ |
| 2347 | if (pulled < max_nr_move && rem_load_move > 0) { | 2124 | if (pulled < max_nr_move && rem_load_move > 0) { |
| 2348 | if (idx < this_best_prio) | 2125 | if (p->prio < this_best_prio) |
| 2349 | this_best_prio = idx; | 2126 | this_best_prio = p->prio; |
| 2350 | if (curr != head) | 2127 | p = iterator->next(iterator->arg); |
| 2351 | goto skip_queue; | 2128 | goto next; |
| 2352 | idx++; | ||
| 2353 | goto skip_bitmap; | ||
| 2354 | } | 2129 | } |
| 2355 | out: | 2130 | out: |
| 2356 | /* | 2131 | /* |
| @@ -2362,18 +2137,48 @@ out: | |||
| 2362 | 2137 | ||
| 2363 | if (all_pinned) | 2138 | if (all_pinned) |
| 2364 | *all_pinned = pinned; | 2139 | *all_pinned = pinned; |
| 2140 | *load_moved = max_load_move - rem_load_move; | ||
| 2365 | return pulled; | 2141 | return pulled; |
| 2366 | } | 2142 | } |
| 2367 | 2143 | ||
| 2368 | /* | 2144 | /* |
| 2145 | * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted | ||
| 2146 | * load from busiest to this_rq, as part of a balancing operation within | ||
| 2147 | * "domain". Returns the number of tasks moved. | ||
| 2148 | * | ||
| 2149 | * Called with both runqueues locked. | ||
| 2150 | */ | ||
| 2151 | static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 2152 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 2153 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 2154 | int *all_pinned) | ||
| 2155 | { | ||
| 2156 | struct sched_class *class = sched_class_highest; | ||
| 2157 | unsigned long load_moved, total_nr_moved = 0, nr_moved; | ||
| 2158 | long rem_load_move = max_load_move; | ||
| 2159 | |||
| 2160 | do { | ||
| 2161 | nr_moved = class->load_balance(this_rq, this_cpu, busiest, | ||
| 2162 | max_nr_move, (unsigned long)rem_load_move, | ||
| 2163 | sd, idle, all_pinned, &load_moved); | ||
| 2164 | total_nr_moved += nr_moved; | ||
| 2165 | max_nr_move -= nr_moved; | ||
| 2166 | rem_load_move -= load_moved; | ||
| 2167 | class = class->next; | ||
| 2168 | } while (class && max_nr_move && rem_load_move > 0); | ||
| 2169 | |||
| 2170 | return total_nr_moved; | ||
| 2171 | } | ||
| 2172 | |||
| 2173 | /* | ||
| 2369 | * find_busiest_group finds and returns the busiest CPU group within the | 2174 | * find_busiest_group finds and returns the busiest CPU group within the |
| 2370 | * domain. It calculates and returns the amount of weighted load which | 2175 | * domain. It calculates and returns the amount of weighted load which |
| 2371 | * should be moved to restore balance via the imbalance parameter. | 2176 | * should be moved to restore balance via the imbalance parameter. |
| 2372 | */ | 2177 | */ |
| 2373 | static struct sched_group * | 2178 | static struct sched_group * |
| 2374 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2179 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 2375 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, | 2180 | unsigned long *imbalance, enum cpu_idle_type idle, |
| 2376 | cpumask_t *cpus, int *balance) | 2181 | int *sd_idle, cpumask_t *cpus, int *balance) |
| 2377 | { | 2182 | { |
| 2378 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2183 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
| 2379 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2184 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
| @@ -2391,9 +2196,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2391 | max_load = this_load = total_load = total_pwr = 0; | 2196 | max_load = this_load = total_load = total_pwr = 0; |
| 2392 | busiest_load_per_task = busiest_nr_running = 0; | 2197 | busiest_load_per_task = busiest_nr_running = 0; |
| 2393 | this_load_per_task = this_nr_running = 0; | 2198 | this_load_per_task = this_nr_running = 0; |
| 2394 | if (idle == NOT_IDLE) | 2199 | if (idle == CPU_NOT_IDLE) |
| 2395 | load_idx = sd->busy_idx; | 2200 | load_idx = sd->busy_idx; |
| 2396 | else if (idle == NEWLY_IDLE) | 2201 | else if (idle == CPU_NEWLY_IDLE) |
| 2397 | load_idx = sd->newidle_idx; | 2202 | load_idx = sd->newidle_idx; |
| 2398 | else | 2203 | else |
| 2399 | load_idx = sd->idle_idx; | 2204 | load_idx = sd->idle_idx; |
| @@ -2437,7 +2242,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2437 | 2242 | ||
| 2438 | avg_load += load; | 2243 | avg_load += load; |
| 2439 | sum_nr_running += rq->nr_running; | 2244 | sum_nr_running += rq->nr_running; |
| 2440 | sum_weighted_load += rq->raw_weighted_load; | 2245 | sum_weighted_load += weighted_cpuload(i); |
| 2441 | } | 2246 | } |
| 2442 | 2247 | ||
| 2443 | /* | 2248 | /* |
| @@ -2477,8 +2282,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2477 | * Busy processors will not participate in power savings | 2282 | * Busy processors will not participate in power savings |
| 2478 | * balance. | 2283 | * balance. |
| 2479 | */ | 2284 | */ |
| 2480 | if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) | 2285 | if (idle == CPU_NOT_IDLE || |
| 2481 | goto group_next; | 2286 | !(sd->flags & SD_POWERSAVINGS_BALANCE)) |
| 2287 | goto group_next; | ||
| 2482 | 2288 | ||
| 2483 | /* | 2289 | /* |
| 2484 | * If the local group is idle or completely loaded | 2290 | * If the local group is idle or completely loaded |
| @@ -2488,42 +2294,42 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2488 | !this_nr_running)) | 2294 | !this_nr_running)) |
| 2489 | power_savings_balance = 0; | 2295 | power_savings_balance = 0; |
| 2490 | 2296 | ||
| 2491 | /* | 2297 | /* |
| 2492 | * If a group is already running at full capacity or idle, | 2298 | * If a group is already running at full capacity or idle, |
| 2493 | * don't include that group in power savings calculations | 2299 | * don't include that group in power savings calculations |
| 2494 | */ | 2300 | */ |
| 2495 | if (!power_savings_balance || sum_nr_running >= group_capacity | 2301 | if (!power_savings_balance || sum_nr_running >= group_capacity |
| 2496 | || !sum_nr_running) | 2302 | || !sum_nr_running) |
| 2497 | goto group_next; | 2303 | goto group_next; |
| 2498 | 2304 | ||
| 2499 | /* | 2305 | /* |
| 2500 | * Calculate the group which has the least non-idle load. | 2306 | * Calculate the group which has the least non-idle load. |
| 2501 | * This is the group from where we need to pick up the load | 2307 | * This is the group from where we need to pick up the load |
| 2502 | * for saving power | 2308 | * for saving power |
| 2503 | */ | 2309 | */ |
| 2504 | if ((sum_nr_running < min_nr_running) || | 2310 | if ((sum_nr_running < min_nr_running) || |
| 2505 | (sum_nr_running == min_nr_running && | 2311 | (sum_nr_running == min_nr_running && |
| 2506 | first_cpu(group->cpumask) < | 2312 | first_cpu(group->cpumask) < |
| 2507 | first_cpu(group_min->cpumask))) { | 2313 | first_cpu(group_min->cpumask))) { |
| 2508 | group_min = group; | 2314 | group_min = group; |
| 2509 | min_nr_running = sum_nr_running; | 2315 | min_nr_running = sum_nr_running; |
| 2510 | min_load_per_task = sum_weighted_load / | 2316 | min_load_per_task = sum_weighted_load / |
| 2511 | sum_nr_running; | 2317 | sum_nr_running; |
| 2512 | } | 2318 | } |
| 2513 | 2319 | ||
| 2514 | /* | 2320 | /* |
| 2515 | * Calculate the group which is almost near its | 2321 | * Calculate the group which is almost near its |
| 2516 | * capacity but still has some space to pick up some load | 2322 | * capacity but still has some space to pick up some load |
| 2517 | * from other group and save more power | 2323 | * from other group and save more power |
| 2518 | */ | 2324 | */ |
| 2519 | if (sum_nr_running <= group_capacity - 1) { | 2325 | if (sum_nr_running <= group_capacity - 1) { |
| 2520 | if (sum_nr_running > leader_nr_running || | 2326 | if (sum_nr_running > leader_nr_running || |
| 2521 | (sum_nr_running == leader_nr_running && | 2327 | (sum_nr_running == leader_nr_running && |
| 2522 | first_cpu(group->cpumask) > | 2328 | first_cpu(group->cpumask) > |
| 2523 | first_cpu(group_leader->cpumask))) { | 2329 | first_cpu(group_leader->cpumask))) { |
| 2524 | group_leader = group; | 2330 | group_leader = group; |
| 2525 | leader_nr_running = sum_nr_running; | 2331 | leader_nr_running = sum_nr_running; |
| 2526 | } | 2332 | } |
| 2527 | } | 2333 | } |
| 2528 | group_next: | 2334 | group_next: |
| 2529 | #endif | 2335 | #endif |
| @@ -2578,7 +2384,7 @@ group_next: | |||
| 2578 | * a think about bumping its value to force at least one task to be | 2384 | * a think about bumping its value to force at least one task to be |
| 2579 | * moved | 2385 | * moved |
| 2580 | */ | 2386 | */ |
| 2581 | if (*imbalance < busiest_load_per_task) { | 2387 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { |
| 2582 | unsigned long tmp, pwr_now, pwr_move; | 2388 | unsigned long tmp, pwr_now, pwr_move; |
| 2583 | unsigned int imbn; | 2389 | unsigned int imbn; |
| 2584 | 2390 | ||
| @@ -2592,7 +2398,8 @@ small_imbalance: | |||
| 2592 | } else | 2398 | } else |
| 2593 | this_load_per_task = SCHED_LOAD_SCALE; | 2399 | this_load_per_task = SCHED_LOAD_SCALE; |
| 2594 | 2400 | ||
| 2595 | if (max_load - this_load >= busiest_load_per_task * imbn) { | 2401 | if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >= |
| 2402 | busiest_load_per_task * imbn) { | ||
| 2596 | *imbalance = busiest_load_per_task; | 2403 | *imbalance = busiest_load_per_task; |
| 2597 | return busiest; | 2404 | return busiest; |
| 2598 | } | 2405 | } |
| @@ -2639,7 +2446,7 @@ small_imbalance: | |||
| 2639 | 2446 | ||
| 2640 | out_balanced: | 2447 | out_balanced: |
| 2641 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2448 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
| 2642 | if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) | 2449 | if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) |
| 2643 | goto ret; | 2450 | goto ret; |
| 2644 | 2451 | ||
| 2645 | if (this == group_leader && group_leader != group_min) { | 2452 | if (this == group_leader && group_leader != group_min) { |
| @@ -2656,7 +2463,7 @@ ret: | |||
| 2656 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 2463 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
| 2657 | */ | 2464 | */ |
| 2658 | static struct rq * | 2465 | static struct rq * |
| 2659 | find_busiest_queue(struct sched_group *group, enum idle_type idle, | 2466 | find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, |
| 2660 | unsigned long imbalance, cpumask_t *cpus) | 2467 | unsigned long imbalance, cpumask_t *cpus) |
| 2661 | { | 2468 | { |
| 2662 | struct rq *busiest = NULL, *rq; | 2469 | struct rq *busiest = NULL, *rq; |
| @@ -2664,17 +2471,19 @@ find_busiest_queue(struct sched_group *group, enum idle_type idle, | |||
| 2664 | int i; | 2471 | int i; |
| 2665 | 2472 | ||
| 2666 | for_each_cpu_mask(i, group->cpumask) { | 2473 | for_each_cpu_mask(i, group->cpumask) { |
| 2474 | unsigned long wl; | ||
| 2667 | 2475 | ||
| 2668 | if (!cpu_isset(i, *cpus)) | 2476 | if (!cpu_isset(i, *cpus)) |
| 2669 | continue; | 2477 | continue; |
| 2670 | 2478 | ||
| 2671 | rq = cpu_rq(i); | 2479 | rq = cpu_rq(i); |
| 2480 | wl = weighted_cpuload(i); | ||
| 2672 | 2481 | ||
| 2673 | if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) | 2482 | if (rq->nr_running == 1 && wl > imbalance) |
| 2674 | continue; | 2483 | continue; |
| 2675 | 2484 | ||
| 2676 | if (rq->raw_weighted_load > max_load) { | 2485 | if (wl > max_load) { |
| 2677 | max_load = rq->raw_weighted_load; | 2486 | max_load = wl; |
| 2678 | busiest = rq; | 2487 | busiest = rq; |
| 2679 | } | 2488 | } |
| 2680 | } | 2489 | } |
| @@ -2698,7 +2507,7 @@ static inline unsigned long minus_1_or_zero(unsigned long n) | |||
| 2698 | * tasks if there is an imbalance. | 2507 | * tasks if there is an imbalance. |
| 2699 | */ | 2508 | */ |
| 2700 | static int load_balance(int this_cpu, struct rq *this_rq, | 2509 | static int load_balance(int this_cpu, struct rq *this_rq, |
| 2701 | struct sched_domain *sd, enum idle_type idle, | 2510 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 2702 | int *balance) | 2511 | int *balance) |
| 2703 | { | 2512 | { |
| 2704 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 2513 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
| @@ -2711,10 +2520,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 2711 | /* | 2520 | /* |
| 2712 | * When power savings policy is enabled for the parent domain, idle | 2521 | * When power savings policy is enabled for the parent domain, idle |
| 2713 | * sibling can pick up load irrespective of busy siblings. In this case, | 2522 | * sibling can pick up load irrespective of busy siblings. In this case, |
| 2714 | * let the state of idle sibling percolate up as IDLE, instead of | 2523 | * let the state of idle sibling percolate up as CPU_IDLE, instead of |
| 2715 | * portraying it as NOT_IDLE. | 2524 | * portraying it as CPU_NOT_IDLE. |
| 2716 | */ | 2525 | */ |
| 2717 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | 2526 | if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && |
| 2718 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2527 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 2719 | sd_idle = 1; | 2528 | sd_idle = 1; |
| 2720 | 2529 | ||
| @@ -2848,7 +2657,7 @@ out_one_pinned: | |||
| 2848 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2657 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
| 2849 | * tasks if there is an imbalance. | 2658 | * tasks if there is an imbalance. |
| 2850 | * | 2659 | * |
| 2851 | * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). | 2660 | * Called from schedule when this_rq is about to become idle (CPU_NEWLY_IDLE). |
| 2852 | * this_rq is locked. | 2661 | * this_rq is locked. |
| 2853 | */ | 2662 | */ |
| 2854 | static int | 2663 | static int |
| @@ -2865,31 +2674,31 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 2865 | * When power savings policy is enabled for the parent domain, idle | 2674 | * When power savings policy is enabled for the parent domain, idle |
| 2866 | * sibling can pick up load irrespective of busy siblings. In this case, | 2675 | * sibling can pick up load irrespective of busy siblings. In this case, |
| 2867 | * let the state of idle sibling percolate up as IDLE, instead of | 2676 | * let the state of idle sibling percolate up as IDLE, instead of |
| 2868 | * portraying it as NOT_IDLE. | 2677 | * portraying it as CPU_NOT_IDLE. |
| 2869 | */ | 2678 | */ |
| 2870 | if (sd->flags & SD_SHARE_CPUPOWER && | 2679 | if (sd->flags & SD_SHARE_CPUPOWER && |
| 2871 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2680 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 2872 | sd_idle = 1; | 2681 | sd_idle = 1; |
| 2873 | 2682 | ||
| 2874 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2683 | schedstat_inc(sd, lb_cnt[CPU_NEWLY_IDLE]); |
| 2875 | redo: | 2684 | redo: |
| 2876 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | 2685 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, |
| 2877 | &sd_idle, &cpus, NULL); | 2686 | &sd_idle, &cpus, NULL); |
| 2878 | if (!group) { | 2687 | if (!group) { |
| 2879 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2688 | schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]); |
| 2880 | goto out_balanced; | 2689 | goto out_balanced; |
| 2881 | } | 2690 | } |
| 2882 | 2691 | ||
| 2883 | busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance, | 2692 | busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance, |
| 2884 | &cpus); | 2693 | &cpus); |
| 2885 | if (!busiest) { | 2694 | if (!busiest) { |
| 2886 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2695 | schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]); |
| 2887 | goto out_balanced; | 2696 | goto out_balanced; |
| 2888 | } | 2697 | } |
| 2889 | 2698 | ||
| 2890 | BUG_ON(busiest == this_rq); | 2699 | BUG_ON(busiest == this_rq); |
| 2891 | 2700 | ||
| 2892 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); | 2701 | schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); |
| 2893 | 2702 | ||
| 2894 | nr_moved = 0; | 2703 | nr_moved = 0; |
| 2895 | if (busiest->nr_running > 1) { | 2704 | if (busiest->nr_running > 1) { |
| @@ -2897,7 +2706,7 @@ redo: | |||
| 2897 | double_lock_balance(this_rq, busiest); | 2706 | double_lock_balance(this_rq, busiest); |
| 2898 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2707 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
| 2899 | minus_1_or_zero(busiest->nr_running), | 2708 | minus_1_or_zero(busiest->nr_running), |
| 2900 | imbalance, sd, NEWLY_IDLE, NULL); | 2709 | imbalance, sd, CPU_NEWLY_IDLE, NULL); |
| 2901 | spin_unlock(&busiest->lock); | 2710 | spin_unlock(&busiest->lock); |
| 2902 | 2711 | ||
| 2903 | if (!nr_moved) { | 2712 | if (!nr_moved) { |
| @@ -2908,7 +2717,7 @@ redo: | |||
| 2908 | } | 2717 | } |
| 2909 | 2718 | ||
| 2910 | if (!nr_moved) { | 2719 | if (!nr_moved) { |
| 2911 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2720 | schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); |
| 2912 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2721 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
| 2913 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2722 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 2914 | return -1; | 2723 | return -1; |
| @@ -2918,7 +2727,7 @@ redo: | |||
| 2918 | return nr_moved; | 2727 | return nr_moved; |
| 2919 | 2728 | ||
| 2920 | out_balanced: | 2729 | out_balanced: |
| 2921 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2730 | schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); |
| 2922 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2731 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
| 2923 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2732 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 2924 | return -1; | 2733 | return -1; |
| @@ -2934,8 +2743,8 @@ out_balanced: | |||
| 2934 | static void idle_balance(int this_cpu, struct rq *this_rq) | 2743 | static void idle_balance(int this_cpu, struct rq *this_rq) |
| 2935 | { | 2744 | { |
| 2936 | struct sched_domain *sd; | 2745 | struct sched_domain *sd; |
| 2937 | int pulled_task = 0; | 2746 | int pulled_task = -1; |
| 2938 | unsigned long next_balance = jiffies + 60 * HZ; | 2747 | unsigned long next_balance = jiffies + HZ; |
| 2939 | 2748 | ||
| 2940 | for_each_domain(this_cpu, sd) { | 2749 | for_each_domain(this_cpu, sd) { |
| 2941 | unsigned long interval; | 2750 | unsigned long interval; |
| @@ -2954,12 +2763,13 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 2954 | if (pulled_task) | 2763 | if (pulled_task) |
| 2955 | break; | 2764 | break; |
| 2956 | } | 2765 | } |
| 2957 | if (!pulled_task) | 2766 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
| 2958 | /* | 2767 | /* |
| 2959 | * We are going idle. next_balance may be set based on | 2768 | * We are going idle. next_balance may be set based on |
| 2960 | * a busy processor. So reset next_balance. | 2769 | * a busy processor. So reset next_balance. |
| 2961 | */ | 2770 | */ |
| 2962 | this_rq->next_balance = next_balance; | 2771 | this_rq->next_balance = next_balance; |
| 2772 | } | ||
| 2963 | } | 2773 | } |
| 2964 | 2774 | ||
| 2965 | /* | 2775 | /* |
| @@ -3003,7 +2813,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 3003 | schedstat_inc(sd, alb_cnt); | 2813 | schedstat_inc(sd, alb_cnt); |
| 3004 | 2814 | ||
| 3005 | if (move_tasks(target_rq, target_cpu, busiest_rq, 1, | 2815 | if (move_tasks(target_rq, target_cpu, busiest_rq, 1, |
| 3006 | RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, | 2816 | RTPRIO_TO_LOAD_WEIGHT(100), sd, CPU_IDLE, |
| 3007 | NULL)) | 2817 | NULL)) |
| 3008 | schedstat_inc(sd, alb_pushed); | 2818 | schedstat_inc(sd, alb_pushed); |
| 3009 | else | 2819 | else |
| @@ -3012,32 +2822,6 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 3012 | spin_unlock(&target_rq->lock); | 2822 | spin_unlock(&target_rq->lock); |
| 3013 | } | 2823 | } |
| 3014 | 2824 | ||
| 3015 | static void update_load(struct rq *this_rq) | ||
| 3016 | { | ||
| 3017 | unsigned long this_load; | ||
| 3018 | unsigned int i, scale; | ||
| 3019 | |||
| 3020 | this_load = this_rq->raw_weighted_load; | ||
| 3021 | |||
| 3022 | /* Update our load: */ | ||
| 3023 | for (i = 0, scale = 1; i < 3; i++, scale += scale) { | ||
| 3024 | unsigned long old_load, new_load; | ||
| 3025 | |||
| 3026 | /* scale is effectively 1 << i now, and >> i divides by scale */ | ||
| 3027 | |||
| 3028 | old_load = this_rq->cpu_load[i]; | ||
| 3029 | new_load = this_load; | ||
| 3030 | /* | ||
| 3031 | * Round up the averaging division if load is increasing. This | ||
| 3032 | * prevents us from getting stuck on 9 if the load is 10, for | ||
| 3033 | * example. | ||
| 3034 | */ | ||
| 3035 | if (new_load > old_load) | ||
| 3036 | new_load += scale-1; | ||
| 3037 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | ||
| 3038 | } | ||
| 3039 | } | ||
| 3040 | |||
| 3041 | #ifdef CONFIG_NO_HZ | 2825 | #ifdef CONFIG_NO_HZ |
| 3042 | static struct { | 2826 | static struct { |
| 3043 | atomic_t load_balancer; | 2827 | atomic_t load_balancer; |
| @@ -3120,7 +2904,7 @@ static DEFINE_SPINLOCK(balancing); | |||
| 3120 | * | 2904 | * |
| 3121 | * Balancing parameters are set up in arch_init_sched_domains. | 2905 | * Balancing parameters are set up in arch_init_sched_domains. |
| 3122 | */ | 2906 | */ |
| 3123 | static inline void rebalance_domains(int cpu, enum idle_type idle) | 2907 | static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) |
| 3124 | { | 2908 | { |
| 3125 | int balance = 1; | 2909 | int balance = 1; |
| 3126 | struct rq *rq = cpu_rq(cpu); | 2910 | struct rq *rq = cpu_rq(cpu); |
| @@ -3134,13 +2918,16 @@ static inline void rebalance_domains(int cpu, enum idle_type idle) | |||
| 3134 | continue; | 2918 | continue; |
| 3135 | 2919 | ||
| 3136 | interval = sd->balance_interval; | 2920 | interval = sd->balance_interval; |
| 3137 | if (idle != SCHED_IDLE) | 2921 | if (idle != CPU_IDLE) |
| 3138 | interval *= sd->busy_factor; | 2922 | interval *= sd->busy_factor; |
| 3139 | 2923 | ||
| 3140 | /* scale ms to jiffies */ | 2924 | /* scale ms to jiffies */ |
| 3141 | interval = msecs_to_jiffies(interval); | 2925 | interval = msecs_to_jiffies(interval); |
| 3142 | if (unlikely(!interval)) | 2926 | if (unlikely(!interval)) |
| 3143 | interval = 1; | 2927 | interval = 1; |
| 2928 | if (interval > HZ*NR_CPUS/10) | ||
| 2929 | interval = HZ*NR_CPUS/10; | ||
| 2930 | |||
| 3144 | 2931 | ||
| 3145 | if (sd->flags & SD_SERIALIZE) { | 2932 | if (sd->flags & SD_SERIALIZE) { |
| 3146 | if (!spin_trylock(&balancing)) | 2933 | if (!spin_trylock(&balancing)) |
| @@ -3154,7 +2941,7 @@ static inline void rebalance_domains(int cpu, enum idle_type idle) | |||
| 3154 | * longer idle, or one of our SMT siblings is | 2941 | * longer idle, or one of our SMT siblings is |
| 3155 | * not idle. | 2942 | * not idle. |
| 3156 | */ | 2943 | */ |
| 3157 | idle = NOT_IDLE; | 2944 | idle = CPU_NOT_IDLE; |
| 3158 | } | 2945 | } |
| 3159 | sd->last_balance = jiffies; | 2946 | sd->last_balance = jiffies; |
| 3160 | } | 2947 | } |
| @@ -3182,11 +2969,12 @@ out: | |||
| 3182 | */ | 2969 | */ |
| 3183 | static void run_rebalance_domains(struct softirq_action *h) | 2970 | static void run_rebalance_domains(struct softirq_action *h) |
| 3184 | { | 2971 | { |
| 3185 | int local_cpu = smp_processor_id(); | 2972 | int this_cpu = smp_processor_id(); |
| 3186 | struct rq *local_rq = cpu_rq(local_cpu); | 2973 | struct rq *this_rq = cpu_rq(this_cpu); |
| 3187 | enum idle_type idle = local_rq->idle_at_tick ? SCHED_IDLE : NOT_IDLE; | 2974 | enum cpu_idle_type idle = this_rq->idle_at_tick ? |
| 2975 | CPU_IDLE : CPU_NOT_IDLE; | ||
| 3188 | 2976 | ||
| 3189 | rebalance_domains(local_cpu, idle); | 2977 | rebalance_domains(this_cpu, idle); |
| 3190 | 2978 | ||
| 3191 | #ifdef CONFIG_NO_HZ | 2979 | #ifdef CONFIG_NO_HZ |
| 3192 | /* | 2980 | /* |
| @@ -3194,13 +2982,13 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 3194 | * balancing on behalf of the other idle cpus whose ticks are | 2982 | * balancing on behalf of the other idle cpus whose ticks are |
| 3195 | * stopped. | 2983 | * stopped. |
| 3196 | */ | 2984 | */ |
| 3197 | if (local_rq->idle_at_tick && | 2985 | if (this_rq->idle_at_tick && |
| 3198 | atomic_read(&nohz.load_balancer) == local_cpu) { | 2986 | atomic_read(&nohz.load_balancer) == this_cpu) { |
| 3199 | cpumask_t cpus = nohz.cpu_mask; | 2987 | cpumask_t cpus = nohz.cpu_mask; |
| 3200 | struct rq *rq; | 2988 | struct rq *rq; |
| 3201 | int balance_cpu; | 2989 | int balance_cpu; |
| 3202 | 2990 | ||
| 3203 | cpu_clear(local_cpu, cpus); | 2991 | cpu_clear(this_cpu, cpus); |
| 3204 | for_each_cpu_mask(balance_cpu, cpus) { | 2992 | for_each_cpu_mask(balance_cpu, cpus) { |
| 3205 | /* | 2993 | /* |
| 3206 | * If this cpu gets work to do, stop the load balancing | 2994 | * If this cpu gets work to do, stop the load balancing |
| @@ -3213,8 +3001,8 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 3213 | rebalance_domains(balance_cpu, SCHED_IDLE); | 3001 | rebalance_domains(balance_cpu, SCHED_IDLE); |
| 3214 | 3002 | ||
| 3215 | rq = cpu_rq(balance_cpu); | 3003 | rq = cpu_rq(balance_cpu); |
| 3216 | if (time_after(local_rq->next_balance, rq->next_balance)) | 3004 | if (time_after(this_rq->next_balance, rq->next_balance)) |
| 3217 | local_rq->next_balance = rq->next_balance; | 3005 | this_rq->next_balance = rq->next_balance; |
| 3218 | } | 3006 | } |
| 3219 | } | 3007 | } |
| 3220 | #endif | 3008 | #endif |
| @@ -3227,9 +3015,8 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 3227 | * idle load balancing owner or decide to stop the periodic load balancing, | 3015 | * idle load balancing owner or decide to stop the periodic load balancing, |
| 3228 | * if the whole system is idle. | 3016 | * if the whole system is idle. |
| 3229 | */ | 3017 | */ |
| 3230 | static inline void trigger_load_balance(int cpu) | 3018 | static inline void trigger_load_balance(struct rq *rq, int cpu) |
| 3231 | { | 3019 | { |
| 3232 | struct rq *rq = cpu_rq(cpu); | ||
| 3233 | #ifdef CONFIG_NO_HZ | 3020 | #ifdef CONFIG_NO_HZ |
| 3234 | /* | 3021 | /* |
| 3235 | * If we were in the nohz mode recently and busy at the current | 3022 | * If we were in the nohz mode recently and busy at the current |
| @@ -3281,13 +3068,29 @@ static inline void trigger_load_balance(int cpu) | |||
| 3281 | if (time_after_eq(jiffies, rq->next_balance)) | 3068 | if (time_after_eq(jiffies, rq->next_balance)) |
| 3282 | raise_softirq(SCHED_SOFTIRQ); | 3069 | raise_softirq(SCHED_SOFTIRQ); |
| 3283 | } | 3070 | } |
| 3284 | #else | 3071 | |
| 3072 | #else /* CONFIG_SMP */ | ||
| 3073 | |||
| 3285 | /* | 3074 | /* |
| 3286 | * on UP we do not need to balance between CPUs: | 3075 | * on UP we do not need to balance between CPUs: |
| 3287 | */ | 3076 | */ |
| 3288 | static inline void idle_balance(int cpu, struct rq *rq) | 3077 | static inline void idle_balance(int cpu, struct rq *rq) |
| 3289 | { | 3078 | { |
| 3290 | } | 3079 | } |
| 3080 | |||
| 3081 | /* Avoid "used but not defined" warning on UP */ | ||
| 3082 | static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 3083 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 3084 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 3085 | int *all_pinned, unsigned long *load_moved, | ||
| 3086 | int this_best_prio, int best_prio, int best_prio_seen, | ||
| 3087 | struct rq_iterator *iterator) | ||
| 3088 | { | ||
| 3089 | *load_moved = 0; | ||
| 3090 | |||
| 3091 | return 0; | ||
| 3092 | } | ||
| 3093 | |||
| 3291 | #endif | 3094 | #endif |
| 3292 | 3095 | ||
| 3293 | DEFINE_PER_CPU(struct kernel_stat, kstat); | 3096 | DEFINE_PER_CPU(struct kernel_stat, kstat); |
| @@ -3295,54 +3098,28 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
| 3295 | EXPORT_PER_CPU_SYMBOL(kstat); | 3098 | EXPORT_PER_CPU_SYMBOL(kstat); |
| 3296 | 3099 | ||
| 3297 | /* | 3100 | /* |
| 3298 | * This is called on clock ticks and on context switches. | 3101 | * Return p->sum_exec_runtime plus any more ns on the sched_clock |
| 3299 | * Bank in p->sched_time the ns elapsed since the last tick or switch. | 3102 | * that have not yet been banked in case the task is currently running. |
| 3300 | */ | ||
| 3301 | static inline void | ||
| 3302 | update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) | ||
| 3303 | { | ||
| 3304 | p->sched_time += now - p->last_ran; | ||
| 3305 | p->last_ran = rq->most_recent_timestamp = now; | ||
| 3306 | } | ||
| 3307 | |||
| 3308 | /* | ||
| 3309 | * Return current->sched_time plus any more ns on the sched_clock | ||
| 3310 | * that have not yet been banked. | ||
| 3311 | */ | 3103 | */ |
| 3312 | unsigned long long current_sched_time(const struct task_struct *p) | 3104 | unsigned long long task_sched_runtime(struct task_struct *p) |
| 3313 | { | 3105 | { |
| 3314 | unsigned long long ns; | ||
| 3315 | unsigned long flags; | 3106 | unsigned long flags; |
| 3107 | u64 ns, delta_exec; | ||
| 3108 | struct rq *rq; | ||
| 3316 | 3109 | ||
| 3317 | local_irq_save(flags); | 3110 | rq = task_rq_lock(p, &flags); |
| 3318 | ns = p->sched_time + sched_clock() - p->last_ran; | 3111 | ns = p->se.sum_exec_runtime; |
| 3319 | local_irq_restore(flags); | 3112 | if (rq->curr == p) { |
| 3113 | delta_exec = rq_clock(rq) - p->se.exec_start; | ||
| 3114 | if ((s64)delta_exec > 0) | ||
| 3115 | ns += delta_exec; | ||
| 3116 | } | ||
| 3117 | task_rq_unlock(rq, &flags); | ||
| 3320 | 3118 | ||
| 3321 | return ns; | 3119 | return ns; |
| 3322 | } | 3120 | } |
| 3323 | 3121 | ||
| 3324 | /* | 3122 | /* |
| 3325 | * We place interactive tasks back into the active array, if possible. | ||
| 3326 | * | ||
| 3327 | * To guarantee that this does not starve expired tasks we ignore the | ||
| 3328 | * interactivity of a task if the first expired task had to wait more | ||
| 3329 | * than a 'reasonable' amount of time. This deadline timeout is | ||
| 3330 | * load-dependent, as the frequency of array switched decreases with | ||
| 3331 | * increasing number of running tasks. We also ignore the interactivity | ||
| 3332 | * if a better static_prio task has expired: | ||
| 3333 | */ | ||
| 3334 | static inline int expired_starving(struct rq *rq) | ||
| 3335 | { | ||
| 3336 | if (rq->curr->static_prio > rq->best_expired_prio) | ||
| 3337 | return 1; | ||
| 3338 | if (!STARVATION_LIMIT || !rq->expired_timestamp) | ||
| 3339 | return 0; | ||
| 3340 | if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running) | ||
| 3341 | return 1; | ||
| 3342 | return 0; | ||
| 3343 | } | ||
| 3344 | |||
| 3345 | /* | ||
| 3346 | * Account user cpu time to a process. | 3123 | * Account user cpu time to a process. |
| 3347 | * @p: the process that the cpu time gets accounted to | 3124 | * @p: the process that the cpu time gets accounted to |
| 3348 | * @hardirq_offset: the offset to subtract from hardirq_count() | 3125 | * @hardirq_offset: the offset to subtract from hardirq_count() |
| @@ -3415,81 +3192,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
| 3415 | cpustat->steal = cputime64_add(cpustat->steal, tmp); | 3192 | cpustat->steal = cputime64_add(cpustat->steal, tmp); |
| 3416 | } | 3193 | } |
| 3417 | 3194 | ||
| 3418 | static void task_running_tick(struct rq *rq, struct task_struct *p) | ||
| 3419 | { | ||
| 3420 | if (p->array != rq->active) { | ||
| 3421 | /* Task has expired but was not scheduled yet */ | ||
| 3422 | set_tsk_need_resched(p); | ||
| 3423 | return; | ||
| 3424 | } | ||
| 3425 | spin_lock(&rq->lock); | ||
| 3426 | /* | ||
| 3427 | * The task was running during this tick - update the | ||
| 3428 | * time slice counter. Note: we do not update a thread's | ||
| 3429 | * priority until it either goes to sleep or uses up its | ||
| 3430 | * timeslice. This makes it possible for interactive tasks | ||
| 3431 | * to use up their timeslices at their highest priority levels. | ||
| 3432 | */ | ||
| 3433 | if (rt_task(p)) { | ||
| 3434 | /* | ||
| 3435 | * RR tasks need a special form of timeslice management. | ||
| 3436 | * FIFO tasks have no timeslices. | ||
| 3437 | */ | ||
| 3438 | if ((p->policy == SCHED_RR) && !--p->time_slice) { | ||
| 3439 | p->time_slice = task_timeslice(p); | ||
| 3440 | p->first_time_slice = 0; | ||
| 3441 | set_tsk_need_resched(p); | ||
| 3442 | |||
| 3443 | /* put it at the end of the queue: */ | ||
| 3444 | requeue_task(p, rq->active); | ||
| 3445 | } | ||
| 3446 | goto out_unlock; | ||
| 3447 | } | ||
| 3448 | if (!--p->time_slice) { | ||
| 3449 | dequeue_task(p, rq->active); | ||
| 3450 | set_tsk_need_resched(p); | ||
| 3451 | p->prio = effective_prio(p); | ||
| 3452 | p->time_slice = task_timeslice(p); | ||
| 3453 | p->first_time_slice = 0; | ||
| 3454 | |||
| 3455 | if (!rq->expired_timestamp) | ||
| 3456 | rq->expired_timestamp = jiffies; | ||
| 3457 | if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { | ||
| 3458 | enqueue_task(p, rq->expired); | ||
| 3459 | if (p->static_prio < rq->best_expired_prio) | ||
| 3460 | rq->best_expired_prio = p->static_prio; | ||
| 3461 | } else | ||
| 3462 | enqueue_task(p, rq->active); | ||
| 3463 | } else { | ||
| 3464 | /* | ||
| 3465 | * Prevent a too long timeslice allowing a task to monopolize | ||
| 3466 | * the CPU. We do this by splitting up the timeslice into | ||
| 3467 | * smaller pieces. | ||
| 3468 | * | ||
| 3469 | * Note: this does not mean the task's timeslices expire or | ||
| 3470 | * get lost in any way, they just might be preempted by | ||
| 3471 | * another task of equal priority. (one with higher | ||
| 3472 | * priority would have preempted this task already.) We | ||
| 3473 | * requeue this task to the end of the list on this priority | ||
| 3474 | * level, which is in essence a round-robin of tasks with | ||
| 3475 | * equal priority. | ||
| 3476 | * | ||
| 3477 | * This only applies to tasks in the interactive | ||
| 3478 | * delta range with at least TIMESLICE_GRANULARITY to requeue. | ||
| 3479 | */ | ||
| 3480 | if (TASK_INTERACTIVE(p) && !((task_timeslice(p) - | ||
| 3481 | p->time_slice) % TIMESLICE_GRANULARITY(p)) && | ||
| 3482 | (p->time_slice >= TIMESLICE_GRANULARITY(p)) && | ||
| 3483 | (p->array == rq->active)) { | ||
| 3484 | |||
| 3485 | requeue_task(p, rq->active); | ||
| 3486 | set_tsk_need_resched(p); | ||
| 3487 | } | ||
| 3488 | } | ||
| 3489 | out_unlock: | ||
| 3490 | spin_unlock(&rq->lock); | ||
| 3491 | } | ||
| 3492 | |||
| 3493 | /* | 3195 | /* |
| 3494 | * This function gets called by the timer code, with HZ frequency. | 3196 | * This function gets called by the timer code, with HZ frequency. |
| 3495 | * We call it with interrupts disabled. | 3197 | * We call it with interrupts disabled. |
| @@ -3499,20 +3201,19 @@ out_unlock: | |||
| 3499 | */ | 3201 | */ |
| 3500 | void scheduler_tick(void) | 3202 | void scheduler_tick(void) |
| 3501 | { | 3203 | { |
| 3502 | unsigned long long now = sched_clock(); | ||
| 3503 | struct task_struct *p = current; | ||
| 3504 | int cpu = smp_processor_id(); | 3204 | int cpu = smp_processor_id(); |
| 3505 | int idle_at_tick = idle_cpu(cpu); | ||
| 3506 | struct rq *rq = cpu_rq(cpu); | 3205 | struct rq *rq = cpu_rq(cpu); |
| 3206 | struct task_struct *curr = rq->curr; | ||
| 3507 | 3207 | ||
| 3508 | update_cpu_clock(p, rq, now); | 3208 | spin_lock(&rq->lock); |
| 3209 | if (curr != rq->idle) /* FIXME: needed? */ | ||
| 3210 | curr->sched_class->task_tick(rq, curr); | ||
| 3211 | update_cpu_load(rq); | ||
| 3212 | spin_unlock(&rq->lock); | ||
| 3509 | 3213 | ||
| 3510 | if (!idle_at_tick) | ||
| 3511 | task_running_tick(rq, p); | ||
| 3512 | #ifdef CONFIG_SMP | 3214 | #ifdef CONFIG_SMP |
| 3513 | update_load(rq); | 3215 | rq->idle_at_tick = idle_cpu(cpu); |
| 3514 | rq->idle_at_tick = idle_at_tick; | 3216 | trigger_load_balance(rq, cpu); |
| 3515 | trigger_load_balance(cpu); | ||
| 3516 | #endif | 3217 | #endif |
| 3517 | } | 3218 | } |
| 3518 | 3219 | ||
| @@ -3554,170 +3255,129 @@ EXPORT_SYMBOL(sub_preempt_count); | |||
| 3554 | 3255 | ||
| 3555 | #endif | 3256 | #endif |
| 3556 | 3257 | ||
| 3557 | static inline int interactive_sleep(enum sleep_type sleep_type) | 3258 | /* |
| 3259 | * Print scheduling while atomic bug: | ||
| 3260 | */ | ||
| 3261 | static noinline void __schedule_bug(struct task_struct *prev) | ||
| 3558 | { | 3262 | { |
| 3559 | return (sleep_type == SLEEP_INTERACTIVE || | 3263 | printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n", |
| 3560 | sleep_type == SLEEP_INTERRUPTED); | 3264 | prev->comm, preempt_count(), prev->pid); |
| 3265 | debug_show_held_locks(prev); | ||
| 3266 | if (irqs_disabled()) | ||
| 3267 | print_irqtrace_events(prev); | ||
| 3268 | dump_stack(); | ||
| 3561 | } | 3269 | } |
| 3562 | 3270 | ||
| 3563 | /* | 3271 | /* |
| 3564 | * schedule() is the main scheduler function. | 3272 | * Various schedule()-time debugging checks and statistics: |
| 3565 | */ | 3273 | */ |
| 3566 | asmlinkage void __sched schedule(void) | 3274 | static inline void schedule_debug(struct task_struct *prev) |
| 3567 | { | 3275 | { |
| 3568 | struct task_struct *prev, *next; | ||
| 3569 | struct prio_array *array; | ||
| 3570 | struct list_head *queue; | ||
| 3571 | unsigned long long now; | ||
| 3572 | unsigned long run_time; | ||
| 3573 | int cpu, idx, new_prio; | ||
| 3574 | long *switch_count; | ||
| 3575 | struct rq *rq; | ||
| 3576 | |||
| 3577 | /* | 3276 | /* |
| 3578 | * Test if we are atomic. Since do_exit() needs to call into | 3277 | * Test if we are atomic. Since do_exit() needs to call into |
| 3579 | * schedule() atomically, we ignore that path for now. | 3278 | * schedule() atomically, we ignore that path for now. |
| 3580 | * Otherwise, whine if we are scheduling when we should not be. | 3279 | * Otherwise, whine if we are scheduling when we should not be. |
| 3581 | */ | 3280 | */ |
| 3582 | if (unlikely(in_atomic() && !current->exit_state)) { | 3281 | if (unlikely(in_atomic_preempt_off()) && unlikely(!prev->exit_state)) |
| 3583 | printk(KERN_ERR "BUG: scheduling while atomic: " | 3282 | __schedule_bug(prev); |
| 3584 | "%s/0x%08x/%d\n", | ||
| 3585 | current->comm, preempt_count(), current->pid); | ||
| 3586 | debug_show_held_locks(current); | ||
| 3587 | if (irqs_disabled()) | ||
| 3588 | print_irqtrace_events(current); | ||
| 3589 | dump_stack(); | ||
| 3590 | } | ||
| 3591 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | ||
| 3592 | 3283 | ||
| 3593 | need_resched: | 3284 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
| 3594 | preempt_disable(); | ||
| 3595 | prev = current; | ||
| 3596 | release_kernel_lock(prev); | ||
| 3597 | need_resched_nonpreemptible: | ||
| 3598 | rq = this_rq(); | ||
| 3599 | 3285 | ||
| 3600 | /* | 3286 | schedstat_inc(this_rq(), sched_cnt); |
| 3601 | * The idle thread is not allowed to schedule! | 3287 | } |
| 3602 | * Remove this check after it has been exercised a bit. | ||
| 3603 | */ | ||
| 3604 | if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) { | ||
| 3605 | printk(KERN_ERR "bad: scheduling from the idle thread!\n"); | ||
| 3606 | dump_stack(); | ||
| 3607 | } | ||
| 3608 | 3288 | ||
| 3609 | schedstat_inc(rq, sched_cnt); | 3289 | /* |
| 3610 | now = sched_clock(); | 3290 | * Pick up the highest-prio task: |
| 3611 | if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) { | 3291 | */ |
| 3612 | run_time = now - prev->timestamp; | 3292 | static inline struct task_struct * |
| 3613 | if (unlikely((long long)(now - prev->timestamp) < 0)) | 3293 | pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) |
| 3614 | run_time = 0; | 3294 | { |
| 3615 | } else | 3295 | struct sched_class *class; |
| 3616 | run_time = NS_MAX_SLEEP_AVG; | 3296 | struct task_struct *p; |
| 3617 | 3297 | ||
| 3618 | /* | 3298 | /* |
| 3619 | * Tasks charged proportionately less run_time at high sleep_avg to | 3299 | * Optimization: we know that if all tasks are in |
| 3620 | * delay them losing their interactive status | 3300 | * the fair class we can call that function directly: |
| 3621 | */ | 3301 | */ |
| 3622 | run_time /= (CURRENT_BONUS(prev) ? : 1); | 3302 | if (likely(rq->nr_running == rq->cfs.nr_running)) { |
| 3623 | 3303 | p = fair_sched_class.pick_next_task(rq, now); | |
| 3624 | spin_lock_irq(&rq->lock); | 3304 | if (likely(p)) |
| 3625 | 3305 | return p; | |
| 3626 | switch_count = &prev->nivcsw; | ||
| 3627 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | ||
| 3628 | switch_count = &prev->nvcsw; | ||
| 3629 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | ||
| 3630 | unlikely(signal_pending(prev)))) | ||
| 3631 | prev->state = TASK_RUNNING; | ||
| 3632 | else { | ||
| 3633 | if (prev->state == TASK_UNINTERRUPTIBLE) | ||
| 3634 | rq->nr_uninterruptible++; | ||
| 3635 | deactivate_task(prev, rq); | ||
| 3636 | } | ||
| 3637 | } | ||
| 3638 | |||
| 3639 | cpu = smp_processor_id(); | ||
| 3640 | if (unlikely(!rq->nr_running)) { | ||
| 3641 | idle_balance(cpu, rq); | ||
| 3642 | if (!rq->nr_running) { | ||
| 3643 | next = rq->idle; | ||
| 3644 | rq->expired_timestamp = 0; | ||
| 3645 | goto switch_tasks; | ||
| 3646 | } | ||
| 3647 | } | 3306 | } |
| 3648 | 3307 | ||
| 3649 | array = rq->active; | 3308 | class = sched_class_highest; |
| 3650 | if (unlikely(!array->nr_active)) { | 3309 | for ( ; ; ) { |
| 3310 | p = class->pick_next_task(rq, now); | ||
| 3311 | if (p) | ||
| 3312 | return p; | ||
| 3651 | /* | 3313 | /* |
| 3652 | * Switch the active and expired arrays. | 3314 | * Will never be NULL as the idle class always |
| 3315 | * returns a non-NULL p: | ||
| 3653 | */ | 3316 | */ |
| 3654 | schedstat_inc(rq, sched_switch); | 3317 | class = class->next; |
| 3655 | rq->active = rq->expired; | ||
| 3656 | rq->expired = array; | ||
| 3657 | array = rq->active; | ||
| 3658 | rq->expired_timestamp = 0; | ||
| 3659 | rq->best_expired_prio = MAX_PRIO; | ||
| 3660 | } | 3318 | } |
| 3319 | } | ||
| 3320 | |||
| 3321 | /* | ||
| 3322 | * schedule() is the main scheduler function. | ||
| 3323 | */ | ||
| 3324 | asmlinkage void __sched schedule(void) | ||
| 3325 | { | ||
| 3326 | struct task_struct *prev, *next; | ||
| 3327 | long *switch_count; | ||
| 3328 | struct rq *rq; | ||
| 3329 | u64 now; | ||
| 3330 | int cpu; | ||
| 3661 | 3331 | ||
| 3662 | idx = sched_find_first_bit(array->bitmap); | 3332 | need_resched: |
| 3663 | queue = array->queue + idx; | 3333 | preempt_disable(); |
| 3664 | next = list_entry(queue->next, struct task_struct, run_list); | 3334 | cpu = smp_processor_id(); |
| 3335 | rq = cpu_rq(cpu); | ||
| 3336 | rcu_qsctr_inc(cpu); | ||
| 3337 | prev = rq->curr; | ||
| 3338 | switch_count = &prev->nivcsw; | ||
| 3665 | 3339 | ||
| 3666 | if (!rt_task(next) && interactive_sleep(next->sleep_type)) { | 3340 | release_kernel_lock(prev); |
| 3667 | unsigned long long delta = now - next->timestamp; | 3341 | need_resched_nonpreemptible: |
| 3668 | if (unlikely((long long)(now - next->timestamp) < 0)) | ||
| 3669 | delta = 0; | ||
| 3670 | 3342 | ||
| 3671 | if (next->sleep_type == SLEEP_INTERACTIVE) | 3343 | schedule_debug(prev); |
| 3672 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; | ||
| 3673 | 3344 | ||
| 3674 | array = next->array; | 3345 | spin_lock_irq(&rq->lock); |
| 3675 | new_prio = recalc_task_prio(next, next->timestamp + delta); | 3346 | clear_tsk_need_resched(prev); |
| 3676 | 3347 | ||
| 3677 | if (unlikely(next->prio != new_prio)) { | 3348 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| 3678 | dequeue_task(next, array); | 3349 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && |
| 3679 | next->prio = new_prio; | 3350 | unlikely(signal_pending(prev)))) { |
| 3680 | enqueue_task(next, array); | 3351 | prev->state = TASK_RUNNING; |
| 3352 | } else { | ||
| 3353 | deactivate_task(rq, prev, 1); | ||
| 3681 | } | 3354 | } |
| 3355 | switch_count = &prev->nvcsw; | ||
| 3682 | } | 3356 | } |
| 3683 | next->sleep_type = SLEEP_NORMAL; | ||
| 3684 | switch_tasks: | ||
| 3685 | if (next == rq->idle) | ||
| 3686 | schedstat_inc(rq, sched_goidle); | ||
| 3687 | prefetch(next); | ||
| 3688 | prefetch_stack(next); | ||
| 3689 | clear_tsk_need_resched(prev); | ||
| 3690 | rcu_qsctr_inc(task_cpu(prev)); | ||
| 3691 | 3357 | ||
| 3692 | update_cpu_clock(prev, rq, now); | 3358 | if (unlikely(!rq->nr_running)) |
| 3359 | idle_balance(cpu, rq); | ||
| 3693 | 3360 | ||
| 3694 | prev->sleep_avg -= run_time; | 3361 | now = __rq_clock(rq); |
| 3695 | if ((long)prev->sleep_avg <= 0) | 3362 | prev->sched_class->put_prev_task(rq, prev, now); |
| 3696 | prev->sleep_avg = 0; | 3363 | next = pick_next_task(rq, prev, now); |
| 3697 | prev->timestamp = prev->last_ran = now; | ||
| 3698 | 3364 | ||
| 3699 | sched_info_switch(prev, next); | 3365 | sched_info_switch(prev, next); |
| 3366 | |||
| 3700 | if (likely(prev != next)) { | 3367 | if (likely(prev != next)) { |
| 3701 | next->timestamp = next->last_ran = now; | ||
| 3702 | rq->nr_switches++; | 3368 | rq->nr_switches++; |
| 3703 | rq->curr = next; | 3369 | rq->curr = next; |
| 3704 | ++*switch_count; | 3370 | ++*switch_count; |
| 3705 | 3371 | ||
| 3706 | prepare_task_switch(rq, next); | 3372 | context_switch(rq, prev, next); /* unlocks the rq */ |
| 3707 | prev = context_switch(rq, prev, next); | ||
| 3708 | barrier(); | ||
| 3709 | /* | ||
| 3710 | * this_rq must be evaluated again because prev may have moved | ||
| 3711 | * CPUs since it called schedule(), thus the 'rq' on its stack | ||
| 3712 | * frame will be invalid. | ||
| 3713 | */ | ||
| 3714 | finish_task_switch(this_rq(), prev); | ||
| 3715 | } else | 3373 | } else |
| 3716 | spin_unlock_irq(&rq->lock); | 3374 | spin_unlock_irq(&rq->lock); |
| 3717 | 3375 | ||
| 3718 | prev = current; | 3376 | if (unlikely(reacquire_kernel_lock(current) < 0)) { |
| 3719 | if (unlikely(reacquire_kernel_lock(prev) < 0)) | 3377 | cpu = smp_processor_id(); |
| 3378 | rq = cpu_rq(cpu); | ||
| 3720 | goto need_resched_nonpreemptible; | 3379 | goto need_resched_nonpreemptible; |
| 3380 | } | ||
| 3721 | preempt_enable_no_resched(); | 3381 | preempt_enable_no_resched(); |
| 3722 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 3382 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) |
| 3723 | goto need_resched; | 3383 | goto need_resched; |
| @@ -4045,74 +3705,85 @@ out: | |||
| 4045 | } | 3705 | } |
| 4046 | EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); | 3706 | EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); |
| 4047 | 3707 | ||
| 4048 | 3708 | static inline void | |
| 4049 | #define SLEEP_ON_VAR \ | 3709 | sleep_on_head(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags) |
| 4050 | unsigned long flags; \ | 3710 | { |
| 4051 | wait_queue_t wait; \ | 3711 | spin_lock_irqsave(&q->lock, *flags); |
| 4052 | init_waitqueue_entry(&wait, current); | 3712 | __add_wait_queue(q, wait); |
| 4053 | |||
| 4054 | #define SLEEP_ON_HEAD \ | ||
| 4055 | spin_lock_irqsave(&q->lock,flags); \ | ||
| 4056 | __add_wait_queue(q, &wait); \ | ||
| 4057 | spin_unlock(&q->lock); | 3713 | spin_unlock(&q->lock); |
| 3714 | } | ||
| 4058 | 3715 | ||
| 4059 | #define SLEEP_ON_TAIL \ | 3716 | static inline void |
| 4060 | spin_lock_irq(&q->lock); \ | 3717 | sleep_on_tail(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags) |
| 4061 | __remove_wait_queue(q, &wait); \ | 3718 | { |
| 4062 | spin_unlock_irqrestore(&q->lock, flags); | 3719 | spin_lock_irq(&q->lock); |
| 3720 | __remove_wait_queue(q, wait); | ||
| 3721 | spin_unlock_irqrestore(&q->lock, *flags); | ||
| 3722 | } | ||
| 4063 | 3723 | ||
| 4064 | void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) | 3724 | void __sched interruptible_sleep_on(wait_queue_head_t *q) |
| 4065 | { | 3725 | { |
| 4066 | SLEEP_ON_VAR | 3726 | unsigned long flags; |
| 3727 | wait_queue_t wait; | ||
| 3728 | |||
| 3729 | init_waitqueue_entry(&wait, current); | ||
| 4067 | 3730 | ||
| 4068 | current->state = TASK_INTERRUPTIBLE; | 3731 | current->state = TASK_INTERRUPTIBLE; |
| 4069 | 3732 | ||
| 4070 | SLEEP_ON_HEAD | 3733 | sleep_on_head(q, &wait, &flags); |
| 4071 | schedule(); | 3734 | schedule(); |
| 4072 | SLEEP_ON_TAIL | 3735 | sleep_on_tail(q, &wait, &flags); |
| 4073 | } | 3736 | } |
| 4074 | EXPORT_SYMBOL(interruptible_sleep_on); | 3737 | EXPORT_SYMBOL(interruptible_sleep_on); |
| 4075 | 3738 | ||
| 4076 | long fastcall __sched | 3739 | long __sched |
| 4077 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3740 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) |
| 4078 | { | 3741 | { |
| 4079 | SLEEP_ON_VAR | 3742 | unsigned long flags; |
| 3743 | wait_queue_t wait; | ||
| 3744 | |||
| 3745 | init_waitqueue_entry(&wait, current); | ||
| 4080 | 3746 | ||
| 4081 | current->state = TASK_INTERRUPTIBLE; | 3747 | current->state = TASK_INTERRUPTIBLE; |
| 4082 | 3748 | ||
| 4083 | SLEEP_ON_HEAD | 3749 | sleep_on_head(q, &wait, &flags); |
| 4084 | timeout = schedule_timeout(timeout); | 3750 | timeout = schedule_timeout(timeout); |
| 4085 | SLEEP_ON_TAIL | 3751 | sleep_on_tail(q, &wait, &flags); |
| 4086 | 3752 | ||
| 4087 | return timeout; | 3753 | return timeout; |
| 4088 | } | 3754 | } |
| 4089 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); | 3755 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); |
| 4090 | 3756 | ||
| 4091 | void fastcall __sched sleep_on(wait_queue_head_t *q) | 3757 | void __sched sleep_on(wait_queue_head_t *q) |
| 4092 | { | 3758 | { |
| 4093 | SLEEP_ON_VAR | 3759 | unsigned long flags; |
| 3760 | wait_queue_t wait; | ||
| 3761 | |||
| 3762 | init_waitqueue_entry(&wait, current); | ||
| 4094 | 3763 | ||
| 4095 | current->state = TASK_UNINTERRUPTIBLE; | 3764 | current->state = TASK_UNINTERRUPTIBLE; |
| 4096 | 3765 | ||
| 4097 | SLEEP_ON_HEAD | 3766 | sleep_on_head(q, &wait, &flags); |
| 4098 | schedule(); | 3767 | schedule(); |
| 4099 | SLEEP_ON_TAIL | 3768 | sleep_on_tail(q, &wait, &flags); |
| 4100 | } | 3769 | } |
| 4101 | EXPORT_SYMBOL(sleep_on); | 3770 | EXPORT_SYMBOL(sleep_on); |
| 4102 | 3771 | ||
| 4103 | long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3772 | long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) |
| 4104 | { | 3773 | { |
| 4105 | SLEEP_ON_VAR | 3774 | unsigned long flags; |
| 3775 | wait_queue_t wait; | ||
| 3776 | |||
| 3777 | init_waitqueue_entry(&wait, current); | ||
| 4106 | 3778 | ||
| 4107 | current->state = TASK_UNINTERRUPTIBLE; | 3779 | current->state = TASK_UNINTERRUPTIBLE; |
| 4108 | 3780 | ||
| 4109 | SLEEP_ON_HEAD | 3781 | sleep_on_head(q, &wait, &flags); |
| 4110 | timeout = schedule_timeout(timeout); | 3782 | timeout = schedule_timeout(timeout); |
| 4111 | SLEEP_ON_TAIL | 3783 | sleep_on_tail(q, &wait, &flags); |
| 4112 | 3784 | ||
| 4113 | return timeout; | 3785 | return timeout; |
| 4114 | } | 3786 | } |
| 4115 | |||
| 4116 | EXPORT_SYMBOL(sleep_on_timeout); | 3787 | EXPORT_SYMBOL(sleep_on_timeout); |
| 4117 | 3788 | ||
| 4118 | #ifdef CONFIG_RT_MUTEXES | 3789 | #ifdef CONFIG_RT_MUTEXES |
| @@ -4129,29 +3800,30 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
| 4129 | */ | 3800 | */ |
| 4130 | void rt_mutex_setprio(struct task_struct *p, int prio) | 3801 | void rt_mutex_setprio(struct task_struct *p, int prio) |
| 4131 | { | 3802 | { |
| 4132 | struct prio_array *array; | ||
| 4133 | unsigned long flags; | 3803 | unsigned long flags; |
| 3804 | int oldprio, on_rq; | ||
| 4134 | struct rq *rq; | 3805 | struct rq *rq; |
| 4135 | int oldprio; | 3806 | u64 now; |
| 4136 | 3807 | ||
| 4137 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 3808 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
| 4138 | 3809 | ||
| 4139 | rq = task_rq_lock(p, &flags); | 3810 | rq = task_rq_lock(p, &flags); |
| 3811 | now = rq_clock(rq); | ||
| 4140 | 3812 | ||
| 4141 | oldprio = p->prio; | 3813 | oldprio = p->prio; |
| 4142 | array = p->array; | 3814 | on_rq = p->se.on_rq; |
| 4143 | if (array) | 3815 | if (on_rq) |
| 4144 | dequeue_task(p, array); | 3816 | dequeue_task(rq, p, 0, now); |
| 3817 | |||
| 3818 | if (rt_prio(prio)) | ||
| 3819 | p->sched_class = &rt_sched_class; | ||
| 3820 | else | ||
| 3821 | p->sched_class = &fair_sched_class; | ||
| 3822 | |||
| 4145 | p->prio = prio; | 3823 | p->prio = prio; |
| 4146 | 3824 | ||
| 4147 | if (array) { | 3825 | if (on_rq) { |
| 4148 | /* | 3826 | enqueue_task(rq, p, 0, now); |
| 4149 | * If changing to an RT priority then queue it | ||
| 4150 | * in the active array! | ||
| 4151 | */ | ||
| 4152 | if (rt_task(p)) | ||
| 4153 | array = rq->active; | ||
| 4154 | enqueue_task(p, array); | ||
| 4155 | /* | 3827 | /* |
| 4156 | * Reschedule if we are currently running on this runqueue and | 3828 | * Reschedule if we are currently running on this runqueue and |
| 4157 | * our priority decreased, or if we are not currently running on | 3829 | * our priority decreased, or if we are not currently running on |
| @@ -4160,8 +3832,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 4160 | if (task_running(rq, p)) { | 3832 | if (task_running(rq, p)) { |
| 4161 | if (p->prio > oldprio) | 3833 | if (p->prio > oldprio) |
| 4162 | resched_task(rq->curr); | 3834 | resched_task(rq->curr); |
| 4163 | } else if (TASK_PREEMPTS_CURR(p, rq)) | 3835 | } else { |
| 4164 | resched_task(rq->curr); | 3836 | check_preempt_curr(rq, p); |
| 3837 | } | ||
| 4165 | } | 3838 | } |
| 4166 | task_rq_unlock(rq, &flags); | 3839 | task_rq_unlock(rq, &flags); |
| 4167 | } | 3840 | } |
| @@ -4170,10 +3843,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 4170 | 3843 | ||
| 4171 | void set_user_nice(struct task_struct *p, long nice) | 3844 | void set_user_nice(struct task_struct *p, long nice) |
| 4172 | { | 3845 | { |
| 4173 | struct prio_array *array; | 3846 | int old_prio, delta, on_rq; |
| 4174 | int old_prio, delta; | ||
| 4175 | unsigned long flags; | 3847 | unsigned long flags; |
| 4176 | struct rq *rq; | 3848 | struct rq *rq; |
| 3849 | u64 now; | ||
| 4177 | 3850 | ||
| 4178 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) | 3851 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) |
| 4179 | return; | 3852 | return; |
| @@ -4182,20 +3855,21 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4182 | * the task might be in the middle of scheduling on another CPU. | 3855 | * the task might be in the middle of scheduling on another CPU. |
| 4183 | */ | 3856 | */ |
| 4184 | rq = task_rq_lock(p, &flags); | 3857 | rq = task_rq_lock(p, &flags); |
| 3858 | now = rq_clock(rq); | ||
| 4185 | /* | 3859 | /* |
| 4186 | * The RT priorities are set via sched_setscheduler(), but we still | 3860 | * The RT priorities are set via sched_setscheduler(), but we still |
| 4187 | * allow the 'normal' nice value to be set - but as expected | 3861 | * allow the 'normal' nice value to be set - but as expected |
| 4188 | * it wont have any effect on scheduling until the task is | 3862 | * it wont have any effect on scheduling until the task is |
| 4189 | * not SCHED_NORMAL/SCHED_BATCH: | 3863 | * SCHED_FIFO/SCHED_RR: |
| 4190 | */ | 3864 | */ |
| 4191 | if (has_rt_policy(p)) { | 3865 | if (task_has_rt_policy(p)) { |
| 4192 | p->static_prio = NICE_TO_PRIO(nice); | 3866 | p->static_prio = NICE_TO_PRIO(nice); |
| 4193 | goto out_unlock; | 3867 | goto out_unlock; |
| 4194 | } | 3868 | } |
| 4195 | array = p->array; | 3869 | on_rq = p->se.on_rq; |
| 4196 | if (array) { | 3870 | if (on_rq) { |
| 4197 | dequeue_task(p, array); | 3871 | dequeue_task(rq, p, 0, now); |
| 4198 | dec_raw_weighted_load(rq, p); | 3872 | dec_load(rq, p, now); |
| 4199 | } | 3873 | } |
| 4200 | 3874 | ||
| 4201 | p->static_prio = NICE_TO_PRIO(nice); | 3875 | p->static_prio = NICE_TO_PRIO(nice); |
| @@ -4204,9 +3878,9 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4204 | p->prio = effective_prio(p); | 3878 | p->prio = effective_prio(p); |
| 4205 | delta = p->prio - old_prio; | 3879 | delta = p->prio - old_prio; |
| 4206 | 3880 | ||
| 4207 | if (array) { | 3881 | if (on_rq) { |
| 4208 | enqueue_task(p, array); | 3882 | enqueue_task(rq, p, 0, now); |
| 4209 | inc_raw_weighted_load(rq, p); | 3883 | inc_load(rq, p, now); |
| 4210 | /* | 3884 | /* |
| 4211 | * If the task increased its priority or is running and | 3885 | * If the task increased its priority or is running and |
| 4212 | * lowered its priority, then reschedule its CPU: | 3886 | * lowered its priority, then reschedule its CPU: |
| @@ -4326,20 +4000,28 @@ static inline struct task_struct *find_process_by_pid(pid_t pid) | |||
| 4326 | } | 4000 | } |
| 4327 | 4001 | ||
| 4328 | /* Actually do priority change: must hold rq lock. */ | 4002 | /* Actually do priority change: must hold rq lock. */ |
| 4329 | static void __setscheduler(struct task_struct *p, int policy, int prio) | 4003 | static void |
| 4004 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | ||
| 4330 | { | 4005 | { |
| 4331 | BUG_ON(p->array); | 4006 | BUG_ON(p->se.on_rq); |
| 4332 | 4007 | ||
| 4333 | p->policy = policy; | 4008 | p->policy = policy; |
| 4009 | switch (p->policy) { | ||
| 4010 | case SCHED_NORMAL: | ||
| 4011 | case SCHED_BATCH: | ||
| 4012 | case SCHED_IDLE: | ||
| 4013 | p->sched_class = &fair_sched_class; | ||
| 4014 | break; | ||
| 4015 | case SCHED_FIFO: | ||
| 4016 | case SCHED_RR: | ||
| 4017 | p->sched_class = &rt_sched_class; | ||
| 4018 | break; | ||
| 4019 | } | ||
| 4020 | |||
| 4334 | p->rt_priority = prio; | 4021 | p->rt_priority = prio; |
| 4335 | p->normal_prio = normal_prio(p); | 4022 | p->normal_prio = normal_prio(p); |
| 4336 | /* we are holding p->pi_lock already */ | 4023 | /* we are holding p->pi_lock already */ |
| 4337 | p->prio = rt_mutex_getprio(p); | 4024 | p->prio = rt_mutex_getprio(p); |
| 4338 | /* | ||
| 4339 | * SCHED_BATCH tasks are treated as perpetual CPU hogs: | ||
| 4340 | */ | ||
| 4341 | if (policy == SCHED_BATCH) | ||
| 4342 | p->sleep_avg = 0; | ||
| 4343 | set_load_weight(p); | 4025 | set_load_weight(p); |
| 4344 | } | 4026 | } |
| 4345 | 4027 | ||
| @@ -4354,8 +4036,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) | |||
| 4354 | int sched_setscheduler(struct task_struct *p, int policy, | 4036 | int sched_setscheduler(struct task_struct *p, int policy, |
| 4355 | struct sched_param *param) | 4037 | struct sched_param *param) |
| 4356 | { | 4038 | { |
| 4357 | int retval, oldprio, oldpolicy = -1; | 4039 | int retval, oldprio, oldpolicy = -1, on_rq; |
| 4358 | struct prio_array *array; | ||
| 4359 | unsigned long flags; | 4040 | unsigned long flags; |
| 4360 | struct rq *rq; | 4041 | struct rq *rq; |
| 4361 | 4042 | ||
| @@ -4366,27 +4047,27 @@ recheck: | |||
| 4366 | if (policy < 0) | 4047 | if (policy < 0) |
| 4367 | policy = oldpolicy = p->policy; | 4048 | policy = oldpolicy = p->policy; |
| 4368 | else if (policy != SCHED_FIFO && policy != SCHED_RR && | 4049 | else if (policy != SCHED_FIFO && policy != SCHED_RR && |
| 4369 | policy != SCHED_NORMAL && policy != SCHED_BATCH) | 4050 | policy != SCHED_NORMAL && policy != SCHED_BATCH && |
| 4051 | policy != SCHED_IDLE) | ||
| 4370 | return -EINVAL; | 4052 | return -EINVAL; |
| 4371 | /* | 4053 | /* |
| 4372 | * Valid priorities for SCHED_FIFO and SCHED_RR are | 4054 | * Valid priorities for SCHED_FIFO and SCHED_RR are |
| 4373 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and | 4055 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, |
| 4374 | * SCHED_BATCH is 0. | 4056 | * SCHED_BATCH and SCHED_IDLE is 0. |
| 4375 | */ | 4057 | */ |
| 4376 | if (param->sched_priority < 0 || | 4058 | if (param->sched_priority < 0 || |
| 4377 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 4059 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || |
| 4378 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 4060 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) |
| 4379 | return -EINVAL; | 4061 | return -EINVAL; |
| 4380 | if (is_rt_policy(policy) != (param->sched_priority != 0)) | 4062 | if (rt_policy(policy) != (param->sched_priority != 0)) |
| 4381 | return -EINVAL; | 4063 | return -EINVAL; |
| 4382 | 4064 | ||
| 4383 | /* | 4065 | /* |
| 4384 | * Allow unprivileged RT tasks to decrease priority: | 4066 | * Allow unprivileged RT tasks to decrease priority: |
| 4385 | */ | 4067 | */ |
| 4386 | if (!capable(CAP_SYS_NICE)) { | 4068 | if (!capable(CAP_SYS_NICE)) { |
| 4387 | if (is_rt_policy(policy)) { | 4069 | if (rt_policy(policy)) { |
| 4388 | unsigned long rlim_rtprio; | 4070 | unsigned long rlim_rtprio; |
| 4389 | unsigned long flags; | ||
| 4390 | 4071 | ||
| 4391 | if (!lock_task_sighand(p, &flags)) | 4072 | if (!lock_task_sighand(p, &flags)) |
| 4392 | return -ESRCH; | 4073 | return -ESRCH; |
| @@ -4402,6 +4083,12 @@ recheck: | |||
| 4402 | param->sched_priority > rlim_rtprio) | 4083 | param->sched_priority > rlim_rtprio) |
| 4403 | return -EPERM; | 4084 | return -EPERM; |
| 4404 | } | 4085 | } |
| 4086 | /* | ||
| 4087 | * Like positive nice levels, dont allow tasks to | ||
| 4088 | * move out of SCHED_IDLE either: | ||
| 4089 | */ | ||
| 4090 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) | ||
| 4091 | return -EPERM; | ||
| 4405 | 4092 | ||
| 4406 | /* can't change other user's priorities */ | 4093 | /* can't change other user's priorities */ |
| 4407 | if ((current->euid != p->euid) && | 4094 | if ((current->euid != p->euid) && |
| @@ -4429,13 +4116,13 @@ recheck: | |||
| 4429 | spin_unlock_irqrestore(&p->pi_lock, flags); | 4116 | spin_unlock_irqrestore(&p->pi_lock, flags); |
| 4430 | goto recheck; | 4117 | goto recheck; |
| 4431 | } | 4118 | } |
| 4432 | array = p->array; | 4119 | on_rq = p->se.on_rq; |
| 4433 | if (array) | 4120 | if (on_rq) |
| 4434 | deactivate_task(p, rq); | 4121 | deactivate_task(rq, p, 0); |
| 4435 | oldprio = p->prio; | 4122 | oldprio = p->prio; |
| 4436 | __setscheduler(p, policy, param->sched_priority); | 4123 | __setscheduler(rq, p, policy, param->sched_priority); |
| 4437 | if (array) { | 4124 | if (on_rq) { |
| 4438 | __activate_task(p, rq); | 4125 | activate_task(rq, p, 0); |
| 4439 | /* | 4126 | /* |
| 4440 | * Reschedule if we are currently running on this runqueue and | 4127 | * Reschedule if we are currently running on this runqueue and |
| 4441 | * our priority decreased, or if we are not currently running on | 4128 | * our priority decreased, or if we are not currently running on |
| @@ -4444,8 +4131,9 @@ recheck: | |||
| 4444 | if (task_running(rq, p)) { | 4131 | if (task_running(rq, p)) { |
| 4445 | if (p->prio > oldprio) | 4132 | if (p->prio > oldprio) |
| 4446 | resched_task(rq->curr); | 4133 | resched_task(rq->curr); |
| 4447 | } else if (TASK_PREEMPTS_CURR(p, rq)) | 4134 | } else { |
| 4448 | resched_task(rq->curr); | 4135 | check_preempt_curr(rq, p); |
| 4136 | } | ||
| 4449 | } | 4137 | } |
| 4450 | __task_rq_unlock(rq); | 4138 | __task_rq_unlock(rq); |
| 4451 | spin_unlock_irqrestore(&p->pi_lock, flags); | 4139 | spin_unlock_irqrestore(&p->pi_lock, flags); |
| @@ -4717,41 +4405,18 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, | |||
| 4717 | /** | 4405 | /** |
| 4718 | * sys_sched_yield - yield the current processor to other threads. | 4406 | * sys_sched_yield - yield the current processor to other threads. |
| 4719 | * | 4407 | * |
| 4720 | * This function yields the current CPU by moving the calling thread | 4408 | * This function yields the current CPU to other tasks. If there are no |
| 4721 | * to the expired array. If there are no other threads running on this | 4409 | * other threads running on this CPU then this function will return. |
| 4722 | * CPU then this function will return. | ||
| 4723 | */ | 4410 | */ |
| 4724 | asmlinkage long sys_sched_yield(void) | 4411 | asmlinkage long sys_sched_yield(void) |
| 4725 | { | 4412 | { |
| 4726 | struct rq *rq = this_rq_lock(); | 4413 | struct rq *rq = this_rq_lock(); |
| 4727 | struct prio_array *array = current->array, *target = rq->expired; | ||
| 4728 | 4414 | ||
| 4729 | schedstat_inc(rq, yld_cnt); | 4415 | schedstat_inc(rq, yld_cnt); |
| 4730 | /* | 4416 | if (unlikely(rq->nr_running == 1)) |
| 4731 | * We implement yielding by moving the task into the expired | ||
| 4732 | * queue. | ||
| 4733 | * | ||
| 4734 | * (special rule: RT tasks will just roundrobin in the active | ||
| 4735 | * array.) | ||
| 4736 | */ | ||
| 4737 | if (rt_task(current)) | ||
| 4738 | target = rq->active; | ||
| 4739 | |||
| 4740 | if (array->nr_active == 1) { | ||
| 4741 | schedstat_inc(rq, yld_act_empty); | 4417 | schedstat_inc(rq, yld_act_empty); |
| 4742 | if (!rq->expired->nr_active) | 4418 | else |
| 4743 | schedstat_inc(rq, yld_both_empty); | 4419 | current->sched_class->yield_task(rq, current); |
| 4744 | } else if (!rq->expired->nr_active) | ||
| 4745 | schedstat_inc(rq, yld_exp_empty); | ||
| 4746 | |||
| 4747 | if (array != target) { | ||
| 4748 | dequeue_task(current, array); | ||
| 4749 | enqueue_task(current, target); | ||
| 4750 | } else | ||
| 4751 | /* | ||
| 4752 | * requeue_task is cheaper so perform that if possible. | ||
| 4753 | */ | ||
| 4754 | requeue_task(current, array); | ||
| 4755 | 4420 | ||
| 4756 | /* | 4421 | /* |
| 4757 | * Since we are going to call schedule() anyway, there's | 4422 | * Since we are going to call schedule() anyway, there's |
| @@ -4902,6 +4567,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) | |||
| 4902 | break; | 4567 | break; |
| 4903 | case SCHED_NORMAL: | 4568 | case SCHED_NORMAL: |
| 4904 | case SCHED_BATCH: | 4569 | case SCHED_BATCH: |
| 4570 | case SCHED_IDLE: | ||
| 4905 | ret = 0; | 4571 | ret = 0; |
| 4906 | break; | 4572 | break; |
| 4907 | } | 4573 | } |
| @@ -4926,6 +4592,7 @@ asmlinkage long sys_sched_get_priority_min(int policy) | |||
| 4926 | break; | 4592 | break; |
| 4927 | case SCHED_NORMAL: | 4593 | case SCHED_NORMAL: |
| 4928 | case SCHED_BATCH: | 4594 | case SCHED_BATCH: |
| 4595 | case SCHED_IDLE: | ||
| 4929 | ret = 0; | 4596 | ret = 0; |
| 4930 | } | 4597 | } |
| 4931 | return ret; | 4598 | return ret; |
| @@ -4960,7 +4627,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
| 4960 | goto out_unlock; | 4627 | goto out_unlock; |
| 4961 | 4628 | ||
| 4962 | jiffies_to_timespec(p->policy == SCHED_FIFO ? | 4629 | jiffies_to_timespec(p->policy == SCHED_FIFO ? |
| 4963 | 0 : task_timeslice(p), &t); | 4630 | 0 : static_prio_timeslice(p->static_prio), &t); |
| 4964 | read_unlock(&tasklist_lock); | 4631 | read_unlock(&tasklist_lock); |
| 4965 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 4632 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
| 4966 | out_nounlock: | 4633 | out_nounlock: |
| @@ -5035,6 +4702,9 @@ void show_state_filter(unsigned long state_filter) | |||
| 5035 | 4702 | ||
| 5036 | touch_all_softlockup_watchdogs(); | 4703 | touch_all_softlockup_watchdogs(); |
| 5037 | 4704 | ||
| 4705 | #ifdef CONFIG_SCHED_DEBUG | ||
| 4706 | sysrq_sched_debug_show(); | ||
| 4707 | #endif | ||
| 5038 | read_unlock(&tasklist_lock); | 4708 | read_unlock(&tasklist_lock); |
| 5039 | /* | 4709 | /* |
| 5040 | * Only show locks if all tasks are dumped: | 4710 | * Only show locks if all tasks are dumped: |
| @@ -5043,6 +4713,11 @@ void show_state_filter(unsigned long state_filter) | |||
| 5043 | debug_show_all_locks(); | 4713 | debug_show_all_locks(); |
| 5044 | } | 4714 | } |
| 5045 | 4715 | ||
| 4716 | void __cpuinit init_idle_bootup_task(struct task_struct *idle) | ||
| 4717 | { | ||
| 4718 | idle->sched_class = &idle_sched_class; | ||
| 4719 | } | ||
| 4720 | |||
| 5046 | /** | 4721 | /** |
| 5047 | * init_idle - set up an idle thread for a given CPU | 4722 | * init_idle - set up an idle thread for a given CPU |
| 5048 | * @idle: task in question | 4723 | * @idle: task in question |
| @@ -5056,13 +4731,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5056 | struct rq *rq = cpu_rq(cpu); | 4731 | struct rq *rq = cpu_rq(cpu); |
| 5057 | unsigned long flags; | 4732 | unsigned long flags; |
| 5058 | 4733 | ||
| 5059 | idle->timestamp = sched_clock(); | 4734 | __sched_fork(idle); |
| 5060 | idle->sleep_avg = 0; | 4735 | idle->se.exec_start = sched_clock(); |
| 5061 | idle->array = NULL; | 4736 | |
| 5062 | idle->prio = idle->normal_prio = MAX_PRIO; | 4737 | idle->prio = idle->normal_prio = MAX_PRIO; |
| 5063 | idle->state = TASK_RUNNING; | ||
| 5064 | idle->cpus_allowed = cpumask_of_cpu(cpu); | 4738 | idle->cpus_allowed = cpumask_of_cpu(cpu); |
| 5065 | set_task_cpu(idle, cpu); | 4739 | __set_task_cpu(idle, cpu); |
| 5066 | 4740 | ||
| 5067 | spin_lock_irqsave(&rq->lock, flags); | 4741 | spin_lock_irqsave(&rq->lock, flags); |
| 5068 | rq->curr = rq->idle = idle; | 4742 | rq->curr = rq->idle = idle; |
| @@ -5077,6 +4751,10 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5077 | #else | 4751 | #else |
| 5078 | task_thread_info(idle)->preempt_count = 0; | 4752 | task_thread_info(idle)->preempt_count = 0; |
| 5079 | #endif | 4753 | #endif |
| 4754 | /* | ||
| 4755 | * The idle tasks have their own, simple scheduling class: | ||
| 4756 | */ | ||
| 4757 | idle->sched_class = &idle_sched_class; | ||
| 5080 | } | 4758 | } |
| 5081 | 4759 | ||
| 5082 | /* | 4760 | /* |
| @@ -5088,6 +4766,28 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5088 | */ | 4766 | */ |
| 5089 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | 4767 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; |
| 5090 | 4768 | ||
| 4769 | /* | ||
| 4770 | * Increase the granularity value when there are more CPUs, | ||
| 4771 | * because with more CPUs the 'effective latency' as visible | ||
| 4772 | * to users decreases. But the relationship is not linear, | ||
| 4773 | * so pick a second-best guess by going with the log2 of the | ||
| 4774 | * number of CPUs. | ||
| 4775 | * | ||
| 4776 | * This idea comes from the SD scheduler of Con Kolivas: | ||
| 4777 | */ | ||
| 4778 | static inline void sched_init_granularity(void) | ||
| 4779 | { | ||
| 4780 | unsigned int factor = 1 + ilog2(num_online_cpus()); | ||
| 4781 | const unsigned long gran_limit = 10000000; | ||
| 4782 | |||
| 4783 | sysctl_sched_granularity *= factor; | ||
| 4784 | if (sysctl_sched_granularity > gran_limit) | ||
| 4785 | sysctl_sched_granularity = gran_limit; | ||
| 4786 | |||
| 4787 | sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; | ||
| 4788 | sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; | ||
| 4789 | } | ||
| 4790 | |||
| 5091 | #ifdef CONFIG_SMP | 4791 | #ifdef CONFIG_SMP |
| 5092 | /* | 4792 | /* |
| 5093 | * This is how migration works: | 4793 | * This is how migration works: |
| @@ -5161,7 +4861,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed); | |||
| 5161 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 4861 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
| 5162 | { | 4862 | { |
| 5163 | struct rq *rq_dest, *rq_src; | 4863 | struct rq *rq_dest, *rq_src; |
| 5164 | int ret = 0; | 4864 | int ret = 0, on_rq; |
| 5165 | 4865 | ||
| 5166 | if (unlikely(cpu_is_offline(dest_cpu))) | 4866 | if (unlikely(cpu_is_offline(dest_cpu))) |
| 5167 | return ret; | 4867 | return ret; |
| @@ -5177,20 +4877,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 5177 | if (!cpu_isset(dest_cpu, p->cpus_allowed)) | 4877 | if (!cpu_isset(dest_cpu, p->cpus_allowed)) |
| 5178 | goto out; | 4878 | goto out; |
| 5179 | 4879 | ||
| 4880 | on_rq = p->se.on_rq; | ||
| 4881 | if (on_rq) | ||
| 4882 | deactivate_task(rq_src, p, 0); | ||
| 5180 | set_task_cpu(p, dest_cpu); | 4883 | set_task_cpu(p, dest_cpu); |
| 5181 | if (p->array) { | 4884 | if (on_rq) { |
| 5182 | /* | 4885 | activate_task(rq_dest, p, 0); |
| 5183 | * Sync timestamp with rq_dest's before activating. | 4886 | check_preempt_curr(rq_dest, p); |
| 5184 | * The same thing could be achieved by doing this step | ||
| 5185 | * afterwards, and pretending it was a local activate. | ||
| 5186 | * This way is cleaner and logically correct. | ||
| 5187 | */ | ||
| 5188 | p->timestamp = p->timestamp - rq_src->most_recent_timestamp | ||
| 5189 | + rq_dest->most_recent_timestamp; | ||
| 5190 | deactivate_task(p, rq_src); | ||
| 5191 | __activate_task(p, rq_dest); | ||
| 5192 | if (TASK_PREEMPTS_CURR(p, rq_dest)) | ||
| 5193 | resched_task(rq_dest->curr); | ||
| 5194 | } | 4887 | } |
| 5195 | ret = 1; | 4888 | ret = 1; |
| 5196 | out: | 4889 | out: |
| @@ -5342,7 +5035,8 @@ static void migrate_live_tasks(int src_cpu) | |||
| 5342 | write_unlock_irq(&tasklist_lock); | 5035 | write_unlock_irq(&tasklist_lock); |
| 5343 | } | 5036 | } |
| 5344 | 5037 | ||
| 5345 | /* Schedules idle task to be the next runnable task on current CPU. | 5038 | /* |
| 5039 | * Schedules idle task to be the next runnable task on current CPU. | ||
| 5346 | * It does so by boosting its priority to highest possible and adding it to | 5040 | * It does so by boosting its priority to highest possible and adding it to |
| 5347 | * the _front_ of the runqueue. Used by CPU offline code. | 5041 | * the _front_ of the runqueue. Used by CPU offline code. |
| 5348 | */ | 5042 | */ |
| @@ -5362,10 +5056,10 @@ void sched_idle_next(void) | |||
| 5362 | */ | 5056 | */ |
| 5363 | spin_lock_irqsave(&rq->lock, flags); | 5057 | spin_lock_irqsave(&rq->lock, flags); |
| 5364 | 5058 | ||
| 5365 | __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); | 5059 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
| 5366 | 5060 | ||
| 5367 | /* Add idle task to the _front_ of its priority queue: */ | 5061 | /* Add idle task to the _front_ of its priority queue: */ |
| 5368 | __activate_idle_task(p, rq); | 5062 | activate_idle_task(p, rq); |
| 5369 | 5063 | ||
| 5370 | spin_unlock_irqrestore(&rq->lock, flags); | 5064 | spin_unlock_irqrestore(&rq->lock, flags); |
| 5371 | } | 5065 | } |
| @@ -5415,16 +5109,15 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
| 5415 | static void migrate_dead_tasks(unsigned int dead_cpu) | 5109 | static void migrate_dead_tasks(unsigned int dead_cpu) |
| 5416 | { | 5110 | { |
| 5417 | struct rq *rq = cpu_rq(dead_cpu); | 5111 | struct rq *rq = cpu_rq(dead_cpu); |
| 5418 | unsigned int arr, i; | 5112 | struct task_struct *next; |
| 5419 | 5113 | ||
| 5420 | for (arr = 0; arr < 2; arr++) { | 5114 | for ( ; ; ) { |
| 5421 | for (i = 0; i < MAX_PRIO; i++) { | 5115 | if (!rq->nr_running) |
| 5422 | struct list_head *list = &rq->arrays[arr].queue[i]; | 5116 | break; |
| 5423 | 5117 | next = pick_next_task(rq, rq->curr, rq_clock(rq)); | |
| 5424 | while (!list_empty(list)) | 5118 | if (!next) |
| 5425 | migrate_dead(dead_cpu, list_entry(list->next, | 5119 | break; |
| 5426 | struct task_struct, run_list)); | 5120 | migrate_dead(dead_cpu, next); |
| 5427 | } | ||
| 5428 | } | 5121 | } |
| 5429 | } | 5122 | } |
| 5430 | #endif /* CONFIG_HOTPLUG_CPU */ | 5123 | #endif /* CONFIG_HOTPLUG_CPU */ |
| @@ -5448,14 +5141,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5448 | 5141 | ||
| 5449 | case CPU_UP_PREPARE: | 5142 | case CPU_UP_PREPARE: |
| 5450 | case CPU_UP_PREPARE_FROZEN: | 5143 | case CPU_UP_PREPARE_FROZEN: |
| 5451 | p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); | 5144 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); |
| 5452 | if (IS_ERR(p)) | 5145 | if (IS_ERR(p)) |
| 5453 | return NOTIFY_BAD; | 5146 | return NOTIFY_BAD; |
| 5454 | p->flags |= PF_NOFREEZE; | 5147 | p->flags |= PF_NOFREEZE; |
| 5455 | kthread_bind(p, cpu); | 5148 | kthread_bind(p, cpu); |
| 5456 | /* Must be high prio: stop_machine expects to yield to it. */ | 5149 | /* Must be high prio: stop_machine expects to yield to it. */ |
| 5457 | rq = task_rq_lock(p, &flags); | 5150 | rq = task_rq_lock(p, &flags); |
| 5458 | __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); | 5151 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
| 5459 | task_rq_unlock(rq, &flags); | 5152 | task_rq_unlock(rq, &flags); |
| 5460 | cpu_rq(cpu)->migration_thread = p; | 5153 | cpu_rq(cpu)->migration_thread = p; |
| 5461 | break; | 5154 | break; |
| @@ -5486,9 +5179,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5486 | rq->migration_thread = NULL; | 5179 | rq->migration_thread = NULL; |
| 5487 | /* Idle task back to normal (off runqueue, low prio) */ | 5180 | /* Idle task back to normal (off runqueue, low prio) */ |
| 5488 | rq = task_rq_lock(rq->idle, &flags); | 5181 | rq = task_rq_lock(rq->idle, &flags); |
| 5489 | deactivate_task(rq->idle, rq); | 5182 | deactivate_task(rq, rq->idle, 0); |
| 5490 | rq->idle->static_prio = MAX_PRIO; | 5183 | rq->idle->static_prio = MAX_PRIO; |
| 5491 | __setscheduler(rq->idle, SCHED_NORMAL, 0); | 5184 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
| 5185 | rq->idle->sched_class = &idle_sched_class; | ||
| 5492 | migrate_dead_tasks(cpu); | 5186 | migrate_dead_tasks(cpu); |
| 5493 | task_rq_unlock(rq, &flags); | 5187 | task_rq_unlock(rq, &flags); |
| 5494 | migrate_nr_uninterruptible(rq); | 5188 | migrate_nr_uninterruptible(rq); |
| @@ -5797,483 +5491,6 @@ init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, | |||
| 5797 | 5491 | ||
| 5798 | #define SD_NODES_PER_DOMAIN 16 | 5492 | #define SD_NODES_PER_DOMAIN 16 |
| 5799 | 5493 | ||
| 5800 | /* | ||
| 5801 | * Self-tuning task migration cost measurement between source and target CPUs. | ||
| 5802 | * | ||
| 5803 | * This is done by measuring the cost of manipulating buffers of varying | ||
| 5804 | * sizes. For a given buffer-size here are the steps that are taken: | ||
| 5805 | * | ||
| 5806 | * 1) the source CPU reads+dirties a shared buffer | ||
| 5807 | * 2) the target CPU reads+dirties the same shared buffer | ||
| 5808 | * | ||
| 5809 | * We measure how long they take, in the following 4 scenarios: | ||
| 5810 | * | ||
| 5811 | * - source: CPU1, target: CPU2 | cost1 | ||
| 5812 | * - source: CPU2, target: CPU1 | cost2 | ||
| 5813 | * - source: CPU1, target: CPU1 | cost3 | ||
| 5814 | * - source: CPU2, target: CPU2 | cost4 | ||
| 5815 | * | ||
| 5816 | * We then calculate the cost3+cost4-cost1-cost2 difference - this is | ||
| 5817 | * the cost of migration. | ||
| 5818 | * | ||
| 5819 | * We then start off from a small buffer-size and iterate up to larger | ||
| 5820 | * buffer sizes, in 5% steps - measuring each buffer-size separately, and | ||
| 5821 | * doing a maximum search for the cost. (The maximum cost for a migration | ||
| 5822 | * normally occurs when the working set size is around the effective cache | ||
| 5823 | * size.) | ||
| 5824 | */ | ||
| 5825 | #define SEARCH_SCOPE 2 | ||
| 5826 | #define MIN_CACHE_SIZE (64*1024U) | ||
| 5827 | #define DEFAULT_CACHE_SIZE (5*1024*1024U) | ||
| 5828 | #define ITERATIONS 1 | ||
| 5829 | #define SIZE_THRESH 130 | ||
| 5830 | #define COST_THRESH 130 | ||
| 5831 | |||
| 5832 | /* | ||
| 5833 | * The migration cost is a function of 'domain distance'. Domain | ||
| 5834 | * distance is the number of steps a CPU has to iterate down its | ||
| 5835 | * domain tree to share a domain with the other CPU. The farther | ||
| 5836 | * two CPUs are from each other, the larger the distance gets. | ||
| 5837 | * | ||
| 5838 | * Note that we use the distance only to cache measurement results, | ||
| 5839 | * the distance value is not used numerically otherwise. When two | ||
| 5840 | * CPUs have the same distance it is assumed that the migration | ||
| 5841 | * cost is the same. (this is a simplification but quite practical) | ||
| 5842 | */ | ||
| 5843 | #define MAX_DOMAIN_DISTANCE 32 | ||
| 5844 | |||
| 5845 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = | ||
| 5846 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = | ||
| 5847 | /* | ||
| 5848 | * Architectures may override the migration cost and thus avoid | ||
| 5849 | * boot-time calibration. Unit is nanoseconds. Mostly useful for | ||
| 5850 | * virtualized hardware: | ||
| 5851 | */ | ||
| 5852 | #ifdef CONFIG_DEFAULT_MIGRATION_COST | ||
| 5853 | CONFIG_DEFAULT_MIGRATION_COST | ||
| 5854 | #else | ||
| 5855 | -1LL | ||
| 5856 | #endif | ||
| 5857 | }; | ||
| 5858 | |||
| 5859 | /* | ||
| 5860 | * Allow override of migration cost - in units of microseconds. | ||
| 5861 | * E.g. migration_cost=1000,2000,3000 will set up a level-1 cost | ||
| 5862 | * of 1 msec, level-2 cost of 2 msecs and level3 cost of 3 msecs: | ||
| 5863 | */ | ||
| 5864 | static int __init migration_cost_setup(char *str) | ||
| 5865 | { | ||
| 5866 | int ints[MAX_DOMAIN_DISTANCE+1], i; | ||
| 5867 | |||
| 5868 | str = get_options(str, ARRAY_SIZE(ints), ints); | ||
| 5869 | |||
| 5870 | printk("#ints: %d\n", ints[0]); | ||
| 5871 | for (i = 1; i <= ints[0]; i++) { | ||
| 5872 | migration_cost[i-1] = (unsigned long long)ints[i]*1000; | ||
| 5873 | printk("migration_cost[%d]: %Ld\n", i-1, migration_cost[i-1]); | ||
| 5874 | } | ||
| 5875 | return 1; | ||
| 5876 | } | ||
| 5877 | |||
| 5878 | __setup ("migration_cost=", migration_cost_setup); | ||
| 5879 | |||
| 5880 | /* | ||
| 5881 | * Global multiplier (divisor) for migration-cutoff values, | ||
| 5882 | * in percentiles. E.g. use a value of 150 to get 1.5 times | ||
| 5883 | * longer cache-hot cutoff times. | ||
| 5884 | * | ||
| 5885 | * (We scale it from 100 to 128 to long long handling easier.) | ||
| 5886 | */ | ||
| 5887 | |||
| 5888 | #define MIGRATION_FACTOR_SCALE 128 | ||
| 5889 | |||
| 5890 | static unsigned int migration_factor = MIGRATION_FACTOR_SCALE; | ||
| 5891 | |||
| 5892 | static int __init setup_migration_factor(char *str) | ||
| 5893 | { | ||
| 5894 | get_option(&str, &migration_factor); | ||
| 5895 | migration_factor = migration_factor * MIGRATION_FACTOR_SCALE / 100; | ||
| 5896 | return 1; | ||
| 5897 | } | ||
| 5898 | |||
| 5899 | __setup("migration_factor=", setup_migration_factor); | ||
| 5900 | |||
| 5901 | /* | ||
| 5902 | * Estimated distance of two CPUs, measured via the number of domains | ||
| 5903 | * we have to pass for the two CPUs to be in the same span: | ||
| 5904 | */ | ||
| 5905 | static unsigned long domain_distance(int cpu1, int cpu2) | ||
| 5906 | { | ||
| 5907 | unsigned long distance = 0; | ||
| 5908 | struct sched_domain *sd; | ||
| 5909 | |||
| 5910 | for_each_domain(cpu1, sd) { | ||
| 5911 | WARN_ON(!cpu_isset(cpu1, sd->span)); | ||
| 5912 | if (cpu_isset(cpu2, sd->span)) | ||
| 5913 | return distance; | ||
| 5914 | distance++; | ||
| 5915 | } | ||
| 5916 | if (distance >= MAX_DOMAIN_DISTANCE) { | ||
| 5917 | WARN_ON(1); | ||
| 5918 | distance = MAX_DOMAIN_DISTANCE-1; | ||
| 5919 | } | ||
| 5920 | |||
| 5921 | return distance; | ||
| 5922 | } | ||
| 5923 | |||
| 5924 | static unsigned int migration_debug; | ||
| 5925 | |||
| 5926 | static int __init setup_migration_debug(char *str) | ||
| 5927 | { | ||
| 5928 | get_option(&str, &migration_debug); | ||
| 5929 | return 1; | ||
| 5930 | } | ||
| 5931 | |||
| 5932 | __setup("migration_debug=", setup_migration_debug); | ||
| 5933 | |||
| 5934 | /* | ||
| 5935 | * Maximum cache-size that the scheduler should try to measure. | ||
| 5936 | * Architectures with larger caches should tune this up during | ||
| 5937 | * bootup. Gets used in the domain-setup code (i.e. during SMP | ||
| 5938 | * bootup). | ||
| 5939 | */ | ||
| 5940 | unsigned int max_cache_size; | ||
| 5941 | |||
| 5942 | static int __init setup_max_cache_size(char *str) | ||
| 5943 | { | ||
| 5944 | get_option(&str, &max_cache_size); | ||
| 5945 | return 1; | ||
| 5946 | } | ||
| 5947 | |||
| 5948 | __setup("max_cache_size=", setup_max_cache_size); | ||
| 5949 | |||
| 5950 | /* | ||
| 5951 | * Dirty a big buffer in a hard-to-predict (for the L2 cache) way. This | ||
| 5952 | * is the operation that is timed, so we try to generate unpredictable | ||
| 5953 | * cachemisses that still end up filling the L2 cache: | ||
| 5954 | */ | ||
| 5955 | static void touch_cache(void *__cache, unsigned long __size) | ||
| 5956 | { | ||
| 5957 | unsigned long size = __size / sizeof(long); | ||
| 5958 | unsigned long chunk1 = size / 3; | ||
| 5959 | unsigned long chunk2 = 2 * size / 3; | ||
| 5960 | unsigned long *cache = __cache; | ||
| 5961 | int i; | ||
| 5962 | |||
| 5963 | for (i = 0; i < size/6; i += 8) { | ||
| 5964 | switch (i % 6) { | ||
| 5965 | case 0: cache[i]++; | ||
| 5966 | case 1: cache[size-1-i]++; | ||
| 5967 | case 2: cache[chunk1-i]++; | ||
| 5968 | case 3: cache[chunk1+i]++; | ||
| 5969 | case 4: cache[chunk2-i]++; | ||
| 5970 | case 5: cache[chunk2+i]++; | ||
| 5971 | } | ||
| 5972 | } | ||
| 5973 | } | ||
| 5974 | |||
| 5975 | /* | ||
| 5976 | * Measure the cache-cost of one task migration. Returns in units of nsec. | ||
| 5977 | */ | ||
| 5978 | static unsigned long long | ||
| 5979 | measure_one(void *cache, unsigned long size, int source, int target) | ||
| 5980 | { | ||
| 5981 | cpumask_t mask, saved_mask; | ||
| 5982 | unsigned long long t0, t1, t2, t3, cost; | ||
| 5983 | |||
| 5984 | saved_mask = current->cpus_allowed; | ||
| 5985 | |||
| 5986 | /* | ||
| 5987 | * Flush source caches to RAM and invalidate them: | ||
| 5988 | */ | ||
| 5989 | sched_cacheflush(); | ||
| 5990 | |||
| 5991 | /* | ||
| 5992 | * Migrate to the source CPU: | ||
| 5993 | */ | ||
| 5994 | mask = cpumask_of_cpu(source); | ||
| 5995 | set_cpus_allowed(current, mask); | ||
| 5996 | WARN_ON(smp_processor_id() != source); | ||
| 5997 | |||
| 5998 | /* | ||
| 5999 | * Dirty the working set: | ||
| 6000 | */ | ||
| 6001 | t0 = sched_clock(); | ||
| 6002 | touch_cache(cache, size); | ||
| 6003 | t1 = sched_clock(); | ||
| 6004 | |||
| 6005 | /* | ||
| 6006 | * Migrate to the target CPU, dirty the L2 cache and access | ||
| 6007 | * the shared buffer. (which represents the working set | ||
| 6008 | * of a migrated task.) | ||
| 6009 | */ | ||
| 6010 | mask = cpumask_of_cpu(target); | ||
| 6011 | set_cpus_allowed(current, mask); | ||
| 6012 | WARN_ON(smp_processor_id() != target); | ||
| 6013 | |||
| 6014 | t2 = sched_clock(); | ||
| 6015 | touch_cache(cache, size); | ||
| 6016 | t3 = sched_clock(); | ||
| 6017 | |||
| 6018 | cost = t1-t0 + t3-t2; | ||
| 6019 | |||
| 6020 | if (migration_debug >= 2) | ||
| 6021 | printk("[%d->%d]: %8Ld %8Ld %8Ld => %10Ld.\n", | ||
| 6022 | source, target, t1-t0, t1-t0, t3-t2, cost); | ||
| 6023 | /* | ||
| 6024 | * Flush target caches to RAM and invalidate them: | ||
| 6025 | */ | ||
| 6026 | sched_cacheflush(); | ||
| 6027 | |||
| 6028 | set_cpus_allowed(current, saved_mask); | ||
| 6029 | |||
| 6030 | return cost; | ||
| 6031 | } | ||
| 6032 | |||
| 6033 | /* | ||
| 6034 | * Measure a series of task migrations and return the average | ||
| 6035 | * result. Since this code runs early during bootup the system | ||
| 6036 | * is 'undisturbed' and the average latency makes sense. | ||
| 6037 | * | ||
| 6038 | * The algorithm in essence auto-detects the relevant cache-size, | ||
| 6039 | * so it will properly detect different cachesizes for different | ||
| 6040 | * cache-hierarchies, depending on how the CPUs are connected. | ||
| 6041 | * | ||
| 6042 | * Architectures can prime the upper limit of the search range via | ||
| 6043 | * max_cache_size, otherwise the search range defaults to 20MB...64K. | ||
| 6044 | */ | ||
| 6045 | static unsigned long long | ||
| 6046 | measure_cost(int cpu1, int cpu2, void *cache, unsigned int size) | ||
| 6047 | { | ||
| 6048 | unsigned long long cost1, cost2; | ||
| 6049 | int i; | ||
| 6050 | |||
| 6051 | /* | ||
| 6052 | * Measure the migration cost of 'size' bytes, over an | ||
| 6053 | * average of 10 runs: | ||
| 6054 | * | ||
| 6055 | * (We perturb the cache size by a small (0..4k) | ||
| 6056 | * value to compensate size/alignment related artifacts. | ||
| 6057 | * We also subtract the cost of the operation done on | ||
| 6058 | * the same CPU.) | ||
| 6059 | */ | ||
| 6060 | cost1 = 0; | ||
| 6061 | |||
| 6062 | /* | ||
| 6063 | * dry run, to make sure we start off cache-cold on cpu1, | ||
| 6064 | * and to get any vmalloc pagefaults in advance: | ||
| 6065 | */ | ||
| 6066 | measure_one(cache, size, cpu1, cpu2); | ||
| 6067 | for (i = 0; i < ITERATIONS; i++) | ||
| 6068 | cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2); | ||
| 6069 | |||
| 6070 | measure_one(cache, size, cpu2, cpu1); | ||
| 6071 | for (i = 0; i < ITERATIONS; i++) | ||
| 6072 | cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1); | ||
| 6073 | |||
| 6074 | /* | ||
| 6075 | * (We measure the non-migrating [cached] cost on both | ||
| 6076 | * cpu1 and cpu2, to handle CPUs with different speeds) | ||
| 6077 | */ | ||
| 6078 | cost2 = 0; | ||
| 6079 | |||
| 6080 | measure_one(cache, size, cpu1, cpu1); | ||
| 6081 | for (i = 0; i < ITERATIONS; i++) | ||
| 6082 | cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1); | ||
| 6083 | |||
| 6084 | measure_one(cache, size, cpu2, cpu2); | ||
| 6085 | for (i = 0; i < ITERATIONS; i++) | ||
| 6086 | cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2); | ||
| 6087 | |||
| 6088 | /* | ||
| 6089 | * Get the per-iteration migration cost: | ||
| 6090 | */ | ||
| 6091 | do_div(cost1, 2 * ITERATIONS); | ||
| 6092 | do_div(cost2, 2 * ITERATIONS); | ||
| 6093 | |||
| 6094 | return cost1 - cost2; | ||
| 6095 | } | ||
| 6096 | |||
| 6097 | static unsigned long long measure_migration_cost(int cpu1, int cpu2) | ||
| 6098 | { | ||
| 6099 | unsigned long long max_cost = 0, fluct = 0, avg_fluct = 0; | ||
| 6100 | unsigned int max_size, size, size_found = 0; | ||
| 6101 | long long cost = 0, prev_cost; | ||
| 6102 | void *cache; | ||
| 6103 | |||
| 6104 | /* | ||
| 6105 | * Search from max_cache_size*5 down to 64K - the real relevant | ||
| 6106 | * cachesize has to lie somewhere inbetween. | ||
| 6107 | */ | ||
| 6108 | if (max_cache_size) { | ||
| 6109 | max_size = max(max_cache_size * SEARCH_SCOPE, MIN_CACHE_SIZE); | ||
| 6110 | size = max(max_cache_size / SEARCH_SCOPE, MIN_CACHE_SIZE); | ||
| 6111 | } else { | ||
| 6112 | /* | ||
| 6113 | * Since we have no estimation about the relevant | ||
| 6114 | * search range | ||
| 6115 | */ | ||
| 6116 | max_size = DEFAULT_CACHE_SIZE * SEARCH_SCOPE; | ||
| 6117 | size = MIN_CACHE_SIZE; | ||
| 6118 | } | ||
| 6119 | |||
| 6120 | if (!cpu_online(cpu1) || !cpu_online(cpu2)) { | ||
| 6121 | printk("cpu %d and %d not both online!\n", cpu1, cpu2); | ||
| 6122 | return 0; | ||
| 6123 | } | ||
| 6124 | |||
| 6125 | /* | ||
| 6126 | * Allocate the working set: | ||
| 6127 | */ | ||
| 6128 | cache = vmalloc(max_size); | ||
| 6129 | if (!cache) { | ||
| 6130 | printk("could not vmalloc %d bytes for cache!\n", 2 * max_size); | ||
| 6131 | return 1000000; /* return 1 msec on very small boxen */ | ||
| 6132 | } | ||
| 6133 | |||
| 6134 | while (size <= max_size) { | ||
| 6135 | prev_cost = cost; | ||
| 6136 | cost = measure_cost(cpu1, cpu2, cache, size); | ||
| 6137 | |||
| 6138 | /* | ||
| 6139 | * Update the max: | ||
| 6140 | */ | ||
| 6141 | if (cost > 0) { | ||
| 6142 | if (max_cost < cost) { | ||
| 6143 | max_cost = cost; | ||
| 6144 | size_found = size; | ||
| 6145 | } | ||
| 6146 | } | ||
| 6147 | /* | ||
| 6148 | * Calculate average fluctuation, we use this to prevent | ||
| 6149 | * noise from triggering an early break out of the loop: | ||
| 6150 | */ | ||
| 6151 | fluct = abs(cost - prev_cost); | ||
| 6152 | avg_fluct = (avg_fluct + fluct)/2; | ||
| 6153 | |||
| 6154 | if (migration_debug) | ||
| 6155 | printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): " | ||
| 6156 | "(%8Ld %8Ld)\n", | ||
| 6157 | cpu1, cpu2, size, | ||
| 6158 | (long)cost / 1000000, | ||
| 6159 | ((long)cost / 100000) % 10, | ||
| 6160 | (long)max_cost / 1000000, | ||
| 6161 | ((long)max_cost / 100000) % 10, | ||
| 6162 | domain_distance(cpu1, cpu2), | ||
| 6163 | cost, avg_fluct); | ||
| 6164 | |||
| 6165 | /* | ||
| 6166 | * If we iterated at least 20% past the previous maximum, | ||
| 6167 | * and the cost has dropped by more than 20% already, | ||
| 6168 | * (taking fluctuations into account) then we assume to | ||
| 6169 | * have found the maximum and break out of the loop early: | ||
| 6170 | */ | ||
| 6171 | if (size_found && (size*100 > size_found*SIZE_THRESH)) | ||
| 6172 | if (cost+avg_fluct <= 0 || | ||
| 6173 | max_cost*100 > (cost+avg_fluct)*COST_THRESH) { | ||
| 6174 | |||
| 6175 | if (migration_debug) | ||
| 6176 | printk("-> found max.\n"); | ||
| 6177 | break; | ||
| 6178 | } | ||
| 6179 | /* | ||
| 6180 | * Increase the cachesize in 10% steps: | ||
| 6181 | */ | ||
| 6182 | size = size * 10 / 9; | ||
| 6183 | } | ||
| 6184 | |||
| 6185 | if (migration_debug) | ||
| 6186 | printk("[%d][%d] working set size found: %d, cost: %Ld\n", | ||
| 6187 | cpu1, cpu2, size_found, max_cost); | ||
| 6188 | |||
| 6189 | vfree(cache); | ||
| 6190 | |||
| 6191 | /* | ||
| 6192 | * A task is considered 'cache cold' if at least 2 times | ||
| 6193 | * the worst-case cost of migration has passed. | ||
| 6194 | * | ||
| 6195 | * (this limit is only listened to if the load-balancing | ||
| 6196 | * situation is 'nice' - if there is a large imbalance we | ||
| 6197 | * ignore it for the sake of CPU utilization and | ||
| 6198 | * processing fairness.) | ||
| 6199 | */ | ||
| 6200 | return 2 * max_cost * migration_factor / MIGRATION_FACTOR_SCALE; | ||
| 6201 | } | ||
| 6202 | |||
| 6203 | static void calibrate_migration_costs(const cpumask_t *cpu_map) | ||
| 6204 | { | ||
| 6205 | int cpu1 = -1, cpu2 = -1, cpu, orig_cpu = raw_smp_processor_id(); | ||
| 6206 | unsigned long j0, j1, distance, max_distance = 0; | ||
| 6207 | struct sched_domain *sd; | ||
| 6208 | |||
| 6209 | j0 = jiffies; | ||
| 6210 | |||
| 6211 | /* | ||
| 6212 | * First pass - calculate the cacheflush times: | ||
| 6213 | */ | ||
| 6214 | for_each_cpu_mask(cpu1, *cpu_map) { | ||
| 6215 | for_each_cpu_mask(cpu2, *cpu_map) { | ||
| 6216 | if (cpu1 == cpu2) | ||
| 6217 | continue; | ||
| 6218 | distance = domain_distance(cpu1, cpu2); | ||
| 6219 | max_distance = max(max_distance, distance); | ||
| 6220 | /* | ||
| 6221 | * No result cached yet? | ||
| 6222 | */ | ||
| 6223 | if (migration_cost[distance] == -1LL) | ||
| 6224 | migration_cost[distance] = | ||
| 6225 | measure_migration_cost(cpu1, cpu2); | ||
| 6226 | } | ||
| 6227 | } | ||
| 6228 | /* | ||
| 6229 | * Second pass - update the sched domain hierarchy with | ||
| 6230 | * the new cache-hot-time estimations: | ||
| 6231 | */ | ||
| 6232 | for_each_cpu_mask(cpu, *cpu_map) { | ||
| 6233 | distance = 0; | ||
| 6234 | for_each_domain(cpu, sd) { | ||
| 6235 | sd->cache_hot_time = migration_cost[distance]; | ||
| 6236 | distance++; | ||
| 6237 | } | ||
| 6238 | } | ||
| 6239 | /* | ||
| 6240 | * Print the matrix: | ||
| 6241 | */ | ||
| 6242 | if (migration_debug) | ||
| 6243 | printk("migration: max_cache_size: %d, cpu: %d MHz:\n", | ||
| 6244 | max_cache_size, | ||
| 6245 | #ifdef CONFIG_X86 | ||
| 6246 | cpu_khz/1000 | ||
| 6247 | #else | ||
| 6248 | -1 | ||
| 6249 | #endif | ||
| 6250 | ); | ||
| 6251 | if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) { | ||
| 6252 | printk("migration_cost="); | ||
| 6253 | for (distance = 0; distance <= max_distance; distance++) { | ||
| 6254 | if (distance) | ||
| 6255 | printk(","); | ||
| 6256 | printk("%ld", (long)migration_cost[distance] / 1000); | ||
| 6257 | } | ||
| 6258 | printk("\n"); | ||
| 6259 | } | ||
| 6260 | j1 = jiffies; | ||
| 6261 | if (migration_debug) | ||
| 6262 | printk("migration: %ld seconds\n", (j1-j0) / HZ); | ||
| 6263 | |||
| 6264 | /* | ||
| 6265 | * Move back to the original CPU. NUMA-Q gets confused | ||
| 6266 | * if we migrate to another quad during bootup. | ||
| 6267 | */ | ||
| 6268 | if (raw_smp_processor_id() != orig_cpu) { | ||
| 6269 | cpumask_t mask = cpumask_of_cpu(orig_cpu), | ||
| 6270 | saved_mask = current->cpus_allowed; | ||
| 6271 | |||
| 6272 | set_cpus_allowed(current, mask); | ||
| 6273 | set_cpus_allowed(current, saved_mask); | ||
| 6274 | } | ||
| 6275 | } | ||
| 6276 | |||
| 6277 | #ifdef CONFIG_NUMA | 5494 | #ifdef CONFIG_NUMA |
| 6278 | 5495 | ||
| 6279 | /** | 5496 | /** |
| @@ -6574,7 +5791,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
| 6574 | static int build_sched_domains(const cpumask_t *cpu_map) | 5791 | static int build_sched_domains(const cpumask_t *cpu_map) |
| 6575 | { | 5792 | { |
| 6576 | int i; | 5793 | int i; |
| 6577 | struct sched_domain *sd; | ||
| 6578 | #ifdef CONFIG_NUMA | 5794 | #ifdef CONFIG_NUMA |
| 6579 | struct sched_group **sched_group_nodes = NULL; | 5795 | struct sched_group **sched_group_nodes = NULL; |
| 6580 | int sd_allnodes = 0; | 5796 | int sd_allnodes = 0; |
| @@ -6582,7 +5798,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6582 | /* | 5798 | /* |
| 6583 | * Allocate the per-node list of sched groups | 5799 | * Allocate the per-node list of sched groups |
| 6584 | */ | 5800 | */ |
| 6585 | sched_group_nodes = kzalloc(sizeof(struct sched_group*)*MAX_NUMNODES, | 5801 | sched_group_nodes = kzalloc(sizeof(struct sched_group *)*MAX_NUMNODES, |
| 6586 | GFP_KERNEL); | 5802 | GFP_KERNEL); |
| 6587 | if (!sched_group_nodes) { | 5803 | if (!sched_group_nodes) { |
| 6588 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | 5804 | printk(KERN_WARNING "Can not alloc sched group node list\n"); |
| @@ -6601,8 +5817,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6601 | cpus_and(nodemask, nodemask, *cpu_map); | 5817 | cpus_and(nodemask, nodemask, *cpu_map); |
| 6602 | 5818 | ||
| 6603 | #ifdef CONFIG_NUMA | 5819 | #ifdef CONFIG_NUMA |
| 6604 | if (cpus_weight(*cpu_map) | 5820 | if (cpus_weight(*cpu_map) > |
| 6605 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | 5821 | SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { |
| 6606 | sd = &per_cpu(allnodes_domains, i); | 5822 | sd = &per_cpu(allnodes_domains, i); |
| 6607 | *sd = SD_ALLNODES_INIT; | 5823 | *sd = SD_ALLNODES_INIT; |
| 6608 | sd->span = *cpu_map; | 5824 | sd->span = *cpu_map; |
| @@ -6661,7 +5877,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6661 | if (i != first_cpu(this_sibling_map)) | 5877 | if (i != first_cpu(this_sibling_map)) |
| 6662 | continue; | 5878 | continue; |
| 6663 | 5879 | ||
| 6664 | init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group); | 5880 | init_sched_build_groups(this_sibling_map, cpu_map, |
| 5881 | &cpu_to_cpu_group); | ||
| 6665 | } | 5882 | } |
| 6666 | #endif | 5883 | #endif |
| 6667 | 5884 | ||
| @@ -6672,11 +5889,11 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6672 | cpus_and(this_core_map, this_core_map, *cpu_map); | 5889 | cpus_and(this_core_map, this_core_map, *cpu_map); |
| 6673 | if (i != first_cpu(this_core_map)) | 5890 | if (i != first_cpu(this_core_map)) |
| 6674 | continue; | 5891 | continue; |
| 6675 | init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group); | 5892 | init_sched_build_groups(this_core_map, cpu_map, |
| 5893 | &cpu_to_core_group); | ||
| 6676 | } | 5894 | } |
| 6677 | #endif | 5895 | #endif |
| 6678 | 5896 | ||
| 6679 | |||
| 6680 | /* Set up physical groups */ | 5897 | /* Set up physical groups */ |
| 6681 | for (i = 0; i < MAX_NUMNODES; i++) { | 5898 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 6682 | cpumask_t nodemask = node_to_cpumask(i); | 5899 | cpumask_t nodemask = node_to_cpumask(i); |
| @@ -6691,7 +5908,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6691 | #ifdef CONFIG_NUMA | 5908 | #ifdef CONFIG_NUMA |
| 6692 | /* Set up node groups */ | 5909 | /* Set up node groups */ |
| 6693 | if (sd_allnodes) | 5910 | if (sd_allnodes) |
| 6694 | init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group); | 5911 | init_sched_build_groups(*cpu_map, cpu_map, |
| 5912 | &cpu_to_allnodes_group); | ||
| 6695 | 5913 | ||
| 6696 | for (i = 0; i < MAX_NUMNODES; i++) { | 5914 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 6697 | /* Set up node groups */ | 5915 | /* Set up node groups */ |
| @@ -6719,6 +5937,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6719 | sched_group_nodes[i] = sg; | 5937 | sched_group_nodes[i] = sg; |
| 6720 | for_each_cpu_mask(j, nodemask) { | 5938 | for_each_cpu_mask(j, nodemask) { |
| 6721 | struct sched_domain *sd; | 5939 | struct sched_domain *sd; |
| 5940 | |||
| 6722 | sd = &per_cpu(node_domains, j); | 5941 | sd = &per_cpu(node_domains, j); |
| 6723 | sd->groups = sg; | 5942 | sd->groups = sg; |
| 6724 | } | 5943 | } |
| @@ -6763,19 +5982,22 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6763 | /* Calculate CPU power for physical packages and nodes */ | 5982 | /* Calculate CPU power for physical packages and nodes */ |
| 6764 | #ifdef CONFIG_SCHED_SMT | 5983 | #ifdef CONFIG_SCHED_SMT |
| 6765 | for_each_cpu_mask(i, *cpu_map) { | 5984 | for_each_cpu_mask(i, *cpu_map) { |
| 6766 | sd = &per_cpu(cpu_domains, i); | 5985 | struct sched_domain *sd = &per_cpu(cpu_domains, i); |
| 5986 | |||
| 6767 | init_sched_groups_power(i, sd); | 5987 | init_sched_groups_power(i, sd); |
| 6768 | } | 5988 | } |
| 6769 | #endif | 5989 | #endif |
| 6770 | #ifdef CONFIG_SCHED_MC | 5990 | #ifdef CONFIG_SCHED_MC |
| 6771 | for_each_cpu_mask(i, *cpu_map) { | 5991 | for_each_cpu_mask(i, *cpu_map) { |
| 6772 | sd = &per_cpu(core_domains, i); | 5992 | struct sched_domain *sd = &per_cpu(core_domains, i); |
| 5993 | |||
| 6773 | init_sched_groups_power(i, sd); | 5994 | init_sched_groups_power(i, sd); |
| 6774 | } | 5995 | } |
| 6775 | #endif | 5996 | #endif |
| 6776 | 5997 | ||
| 6777 | for_each_cpu_mask(i, *cpu_map) { | 5998 | for_each_cpu_mask(i, *cpu_map) { |
| 6778 | sd = &per_cpu(phys_domains, i); | 5999 | struct sched_domain *sd = &per_cpu(phys_domains, i); |
| 6000 | |||
| 6779 | init_sched_groups_power(i, sd); | 6001 | init_sched_groups_power(i, sd); |
| 6780 | } | 6002 | } |
| 6781 | 6003 | ||
| @@ -6803,10 +6025,6 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 6803 | #endif | 6025 | #endif |
| 6804 | cpu_attach_domain(sd, i); | 6026 | cpu_attach_domain(sd, i); |
| 6805 | } | 6027 | } |
| 6806 | /* | ||
| 6807 | * Tune cache-hot values: | ||
| 6808 | */ | ||
| 6809 | calibrate_migration_costs(cpu_map); | ||
| 6810 | 6028 | ||
| 6811 | return 0; | 6029 | return 0; |
| 6812 | 6030 | ||
| @@ -7013,10 +6231,12 @@ void __init sched_init_smp(void) | |||
| 7013 | /* Move init over to a non-isolated CPU */ | 6231 | /* Move init over to a non-isolated CPU */ |
| 7014 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 6232 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) |
| 7015 | BUG(); | 6233 | BUG(); |
| 6234 | sched_init_granularity(); | ||
| 7016 | } | 6235 | } |
| 7017 | #else | 6236 | #else |
| 7018 | void __init sched_init_smp(void) | 6237 | void __init sched_init_smp(void) |
| 7019 | { | 6238 | { |
| 6239 | sched_init_granularity(); | ||
| 7020 | } | 6240 | } |
| 7021 | #endif /* CONFIG_SMP */ | 6241 | #endif /* CONFIG_SMP */ |
| 7022 | 6242 | ||
| @@ -7030,28 +6250,51 @@ int in_sched_functions(unsigned long addr) | |||
| 7030 | && addr < (unsigned long)__sched_text_end); | 6250 | && addr < (unsigned long)__sched_text_end); |
| 7031 | } | 6251 | } |
| 7032 | 6252 | ||
| 6253 | static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | ||
| 6254 | { | ||
| 6255 | cfs_rq->tasks_timeline = RB_ROOT; | ||
| 6256 | cfs_rq->fair_clock = 1; | ||
| 6257 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 6258 | cfs_rq->rq = rq; | ||
| 6259 | #endif | ||
| 6260 | } | ||
| 6261 | |||
| 7033 | void __init sched_init(void) | 6262 | void __init sched_init(void) |
| 7034 | { | 6263 | { |
| 7035 | int i, j, k; | 6264 | u64 now = sched_clock(); |
| 7036 | int highest_cpu = 0; | 6265 | int highest_cpu = 0; |
| 6266 | int i, j; | ||
| 6267 | |||
| 6268 | /* | ||
| 6269 | * Link up the scheduling class hierarchy: | ||
| 6270 | */ | ||
| 6271 | rt_sched_class.next = &fair_sched_class; | ||
| 6272 | fair_sched_class.next = &idle_sched_class; | ||
| 6273 | idle_sched_class.next = NULL; | ||
| 7037 | 6274 | ||
| 7038 | for_each_possible_cpu(i) { | 6275 | for_each_possible_cpu(i) { |
| 7039 | struct prio_array *array; | 6276 | struct rt_prio_array *array; |
| 7040 | struct rq *rq; | 6277 | struct rq *rq; |
| 7041 | 6278 | ||
| 7042 | rq = cpu_rq(i); | 6279 | rq = cpu_rq(i); |
| 7043 | spin_lock_init(&rq->lock); | 6280 | spin_lock_init(&rq->lock); |
| 7044 | lockdep_set_class(&rq->lock, &rq->rq_lock_key); | 6281 | lockdep_set_class(&rq->lock, &rq->rq_lock_key); |
| 7045 | rq->nr_running = 0; | 6282 | rq->nr_running = 0; |
| 7046 | rq->active = rq->arrays; | 6283 | rq->clock = 1; |
| 7047 | rq->expired = rq->arrays + 1; | 6284 | init_cfs_rq(&rq->cfs, rq); |
| 7048 | rq->best_expired_prio = MAX_PRIO; | 6285 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 6286 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | ||
| 6287 | list_add(&rq->cfs.leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
| 6288 | #endif | ||
| 6289 | rq->ls.load_update_last = now; | ||
| 6290 | rq->ls.load_update_start = now; | ||
| 7049 | 6291 | ||
| 6292 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | ||
| 6293 | rq->cpu_load[j] = 0; | ||
| 7050 | #ifdef CONFIG_SMP | 6294 | #ifdef CONFIG_SMP |
| 7051 | rq->sd = NULL; | 6295 | rq->sd = NULL; |
| 7052 | for (j = 1; j < 3; j++) | ||
| 7053 | rq->cpu_load[j] = 0; | ||
| 7054 | rq->active_balance = 0; | 6296 | rq->active_balance = 0; |
| 6297 | rq->next_balance = jiffies; | ||
| 7055 | rq->push_cpu = 0; | 6298 | rq->push_cpu = 0; |
| 7056 | rq->cpu = i; | 6299 | rq->cpu = i; |
| 7057 | rq->migration_thread = NULL; | 6300 | rq->migration_thread = NULL; |
| @@ -7059,16 +6302,14 @@ void __init sched_init(void) | |||
| 7059 | #endif | 6302 | #endif |
| 7060 | atomic_set(&rq->nr_iowait, 0); | 6303 | atomic_set(&rq->nr_iowait, 0); |
| 7061 | 6304 | ||
| 7062 | for (j = 0; j < 2; j++) { | 6305 | array = &rq->rt.active; |
| 7063 | array = rq->arrays + j; | 6306 | for (j = 0; j < MAX_RT_PRIO; j++) { |
| 7064 | for (k = 0; k < MAX_PRIO; k++) { | 6307 | INIT_LIST_HEAD(array->queue + j); |
| 7065 | INIT_LIST_HEAD(array->queue + k); | 6308 | __clear_bit(j, array->bitmap); |
| 7066 | __clear_bit(k, array->bitmap); | ||
| 7067 | } | ||
| 7068 | // delimiter for bitsearch | ||
| 7069 | __set_bit(MAX_PRIO, array->bitmap); | ||
| 7070 | } | 6309 | } |
| 7071 | highest_cpu = i; | 6310 | highest_cpu = i; |
| 6311 | /* delimiter for bitsearch: */ | ||
| 6312 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
| 7072 | } | 6313 | } |
| 7073 | 6314 | ||
| 7074 | set_load_weight(&init_task); | 6315 | set_load_weight(&init_task); |
| @@ -7095,6 +6336,10 @@ void __init sched_init(void) | |||
| 7095 | * when this runqueue becomes "idle". | 6336 | * when this runqueue becomes "idle". |
| 7096 | */ | 6337 | */ |
| 7097 | init_idle(current, smp_processor_id()); | 6338 | init_idle(current, smp_processor_id()); |
| 6339 | /* | ||
| 6340 | * During early bootup we pretend to be a normal task: | ||
| 6341 | */ | ||
| 6342 | current->sched_class = &fair_sched_class; | ||
| 7098 | } | 6343 | } |
| 7099 | 6344 | ||
| 7100 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 6345 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
| @@ -7125,29 +6370,55 @@ EXPORT_SYMBOL(__might_sleep); | |||
| 7125 | #ifdef CONFIG_MAGIC_SYSRQ | 6370 | #ifdef CONFIG_MAGIC_SYSRQ |
| 7126 | void normalize_rt_tasks(void) | 6371 | void normalize_rt_tasks(void) |
| 7127 | { | 6372 | { |
| 7128 | struct prio_array *array; | ||
| 7129 | struct task_struct *g, *p; | 6373 | struct task_struct *g, *p; |
| 7130 | unsigned long flags; | 6374 | unsigned long flags; |
| 7131 | struct rq *rq; | 6375 | struct rq *rq; |
| 6376 | int on_rq; | ||
| 7132 | 6377 | ||
| 7133 | read_lock_irq(&tasklist_lock); | 6378 | read_lock_irq(&tasklist_lock); |
| 7134 | |||
| 7135 | do_each_thread(g, p) { | 6379 | do_each_thread(g, p) { |
| 7136 | if (!rt_task(p)) | 6380 | p->se.fair_key = 0; |
| 6381 | p->se.wait_runtime = 0; | ||
| 6382 | p->se.wait_start_fair = 0; | ||
| 6383 | p->se.wait_start = 0; | ||
| 6384 | p->se.exec_start = 0; | ||
| 6385 | p->se.sleep_start = 0; | ||
| 6386 | p->se.sleep_start_fair = 0; | ||
| 6387 | p->se.block_start = 0; | ||
| 6388 | task_rq(p)->cfs.fair_clock = 0; | ||
| 6389 | task_rq(p)->clock = 0; | ||
| 6390 | |||
| 6391 | if (!rt_task(p)) { | ||
| 6392 | /* | ||
| 6393 | * Renice negative nice level userspace | ||
| 6394 | * tasks back to 0: | ||
| 6395 | */ | ||
| 6396 | if (TASK_NICE(p) < 0 && p->mm) | ||
| 6397 | set_user_nice(p, 0); | ||
| 7137 | continue; | 6398 | continue; |
| 6399 | } | ||
| 7138 | 6400 | ||
| 7139 | spin_lock_irqsave(&p->pi_lock, flags); | 6401 | spin_lock_irqsave(&p->pi_lock, flags); |
| 7140 | rq = __task_rq_lock(p); | 6402 | rq = __task_rq_lock(p); |
| 6403 | #ifdef CONFIG_SMP | ||
| 6404 | /* | ||
| 6405 | * Do not touch the migration thread: | ||
| 6406 | */ | ||
| 6407 | if (p == rq->migration_thread) | ||
| 6408 | goto out_unlock; | ||
| 6409 | #endif | ||
| 7141 | 6410 | ||
| 7142 | array = p->array; | 6411 | on_rq = p->se.on_rq; |
| 7143 | if (array) | 6412 | if (on_rq) |
| 7144 | deactivate_task(p, task_rq(p)); | 6413 | deactivate_task(task_rq(p), p, 0); |
| 7145 | __setscheduler(p, SCHED_NORMAL, 0); | 6414 | __setscheduler(rq, p, SCHED_NORMAL, 0); |
| 7146 | if (array) { | 6415 | if (on_rq) { |
| 7147 | __activate_task(p, task_rq(p)); | 6416 | activate_task(task_rq(p), p, 0); |
| 7148 | resched_task(rq->curr); | 6417 | resched_task(rq->curr); |
| 7149 | } | 6418 | } |
| 7150 | 6419 | #ifdef CONFIG_SMP | |
| 6420 | out_unlock: | ||
| 6421 | #endif | ||
| 7151 | __task_rq_unlock(rq); | 6422 | __task_rq_unlock(rq); |
| 7152 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6423 | spin_unlock_irqrestore(&p->pi_lock, flags); |
| 7153 | } while_each_thread(g, p); | 6424 | } while_each_thread(g, p); |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c new file mode 100644 index 000000000000..1baf87cceb7c --- /dev/null +++ b/kernel/sched_debug.c | |||
| @@ -0,0 +1,275 @@ | |||
| 1 | /* | ||
| 2 | * kernel/time/sched_debug.c | ||
| 3 | * | ||
| 4 | * Print the CFS rbtree | ||
| 5 | * | ||
| 6 | * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2 as | ||
| 10 | * published by the Free Software Foundation. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/proc_fs.h> | ||
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/seq_file.h> | ||
| 16 | #include <linux/kallsyms.h> | ||
| 17 | #include <linux/utsname.h> | ||
| 18 | |||
| 19 | /* | ||
| 20 | * This allows printing both to /proc/sched_debug and | ||
| 21 | * to the console | ||
| 22 | */ | ||
| 23 | #define SEQ_printf(m, x...) \ | ||
| 24 | do { \ | ||
| 25 | if (m) \ | ||
| 26 | seq_printf(m, x); \ | ||
| 27 | else \ | ||
| 28 | printk(x); \ | ||
| 29 | } while (0) | ||
| 30 | |||
| 31 | static void | ||
| 32 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now) | ||
| 33 | { | ||
| 34 | if (rq->curr == p) | ||
| 35 | SEQ_printf(m, "R"); | ||
| 36 | else | ||
| 37 | SEQ_printf(m, " "); | ||
| 38 | |||
| 39 | SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d " | ||
| 40 | "%15Ld %15Ld %15Ld %15Ld %15Ld\n", | ||
| 41 | p->comm, p->pid, | ||
| 42 | (long long)p->se.fair_key, | ||
| 43 | (long long)(p->se.fair_key - rq->cfs.fair_clock), | ||
| 44 | (long long)p->se.wait_runtime, | ||
| 45 | (long long)(p->nvcsw + p->nivcsw), | ||
| 46 | p->prio, | ||
| 47 | (long long)p->se.sum_exec_runtime, | ||
| 48 | (long long)p->se.sum_wait_runtime, | ||
| 49 | (long long)p->se.sum_sleep_runtime, | ||
| 50 | (long long)p->se.wait_runtime_overruns, | ||
| 51 | (long long)p->se.wait_runtime_underruns); | ||
| 52 | } | ||
| 53 | |||
| 54 | static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) | ||
| 55 | { | ||
| 56 | struct task_struct *g, *p; | ||
| 57 | |||
| 58 | SEQ_printf(m, | ||
| 59 | "\nrunnable tasks:\n" | ||
| 60 | " task PID tree-key delta waiting" | ||
| 61 | " switches prio" | ||
| 62 | " sum-exec sum-wait sum-sleep" | ||
| 63 | " wait-overrun wait-underrun\n" | ||
| 64 | "------------------------------------------------------------------" | ||
| 65 | "----------------" | ||
| 66 | "------------------------------------------------" | ||
| 67 | "--------------------------------\n"); | ||
| 68 | |||
| 69 | read_lock_irq(&tasklist_lock); | ||
| 70 | |||
| 71 | do_each_thread(g, p) { | ||
| 72 | if (!p->se.on_rq || task_cpu(p) != rq_cpu) | ||
| 73 | continue; | ||
| 74 | |||
| 75 | print_task(m, rq, p, now); | ||
| 76 | } while_each_thread(g, p); | ||
| 77 | |||
| 78 | read_unlock_irq(&tasklist_lock); | ||
| 79 | } | ||
| 80 | |||
| 81 | static void | ||
| 82 | print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | ||
| 83 | { | ||
| 84 | s64 wait_runtime_rq_sum = 0; | ||
| 85 | struct task_struct *p; | ||
| 86 | struct rb_node *curr; | ||
| 87 | unsigned long flags; | ||
| 88 | struct rq *rq = &per_cpu(runqueues, cpu); | ||
| 89 | |||
| 90 | spin_lock_irqsave(&rq->lock, flags); | ||
| 91 | curr = first_fair(cfs_rq); | ||
| 92 | while (curr) { | ||
| 93 | p = rb_entry(curr, struct task_struct, se.run_node); | ||
| 94 | wait_runtime_rq_sum += p->se.wait_runtime; | ||
| 95 | |||
| 96 | curr = rb_next(curr); | ||
| 97 | } | ||
| 98 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 99 | |||
| 100 | SEQ_printf(m, " .%-30s: %Ld\n", "wait_runtime_rq_sum", | ||
| 101 | (long long)wait_runtime_rq_sum); | ||
| 102 | } | ||
| 103 | |||
| 104 | void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) | ||
| 105 | { | ||
| 106 | SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); | ||
| 107 | |||
| 108 | #define P(x) \ | ||
| 109 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x)) | ||
| 110 | |||
| 111 | P(fair_clock); | ||
| 112 | P(exec_clock); | ||
| 113 | P(wait_runtime); | ||
| 114 | P(wait_runtime_overruns); | ||
| 115 | P(wait_runtime_underruns); | ||
| 116 | P(sleeper_bonus); | ||
| 117 | #undef P | ||
| 118 | |||
| 119 | print_cfs_rq_runtime_sum(m, cpu, cfs_rq); | ||
| 120 | } | ||
| 121 | |||
| 122 | static void print_cpu(struct seq_file *m, int cpu, u64 now) | ||
| 123 | { | ||
| 124 | struct rq *rq = &per_cpu(runqueues, cpu); | ||
| 125 | |||
| 126 | #ifdef CONFIG_X86 | ||
| 127 | { | ||
| 128 | unsigned int freq = cpu_khz ? : 1; | ||
| 129 | |||
| 130 | SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", | ||
| 131 | cpu, freq / 1000, (freq % 1000)); | ||
| 132 | } | ||
| 133 | #else | ||
| 134 | SEQ_printf(m, "\ncpu#%d\n", cpu); | ||
| 135 | #endif | ||
| 136 | |||
| 137 | #define P(x) \ | ||
| 138 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x)) | ||
| 139 | |||
| 140 | P(nr_running); | ||
| 141 | SEQ_printf(m, " .%-30s: %lu\n", "load", | ||
| 142 | rq->ls.load.weight); | ||
| 143 | P(ls.delta_fair); | ||
| 144 | P(ls.delta_exec); | ||
| 145 | P(nr_switches); | ||
| 146 | P(nr_load_updates); | ||
| 147 | P(nr_uninterruptible); | ||
| 148 | SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies); | ||
| 149 | P(next_balance); | ||
| 150 | P(curr->pid); | ||
| 151 | P(clock); | ||
| 152 | P(prev_clock_raw); | ||
| 153 | P(clock_warps); | ||
| 154 | P(clock_overflows); | ||
| 155 | P(clock_unstable_events); | ||
| 156 | P(clock_max_delta); | ||
| 157 | P(cpu_load[0]); | ||
| 158 | P(cpu_load[1]); | ||
| 159 | P(cpu_load[2]); | ||
| 160 | P(cpu_load[3]); | ||
| 161 | P(cpu_load[4]); | ||
| 162 | #undef P | ||
| 163 | |||
| 164 | print_cfs_stats(m, cpu, now); | ||
| 165 | |||
| 166 | print_rq(m, rq, cpu, now); | ||
| 167 | } | ||
| 168 | |||
| 169 | static int sched_debug_show(struct seq_file *m, void *v) | ||
| 170 | { | ||
| 171 | u64 now = ktime_to_ns(ktime_get()); | ||
| 172 | int cpu; | ||
| 173 | |||
| 174 | SEQ_printf(m, "Sched Debug Version: v0.04, cfs-v20, %s %.*s\n", | ||
| 175 | init_utsname()->release, | ||
| 176 | (int)strcspn(init_utsname()->version, " "), | ||
| 177 | init_utsname()->version); | ||
| 178 | |||
| 179 | SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now); | ||
| 180 | |||
| 181 | for_each_online_cpu(cpu) | ||
| 182 | print_cpu(m, cpu, now); | ||
| 183 | |||
| 184 | SEQ_printf(m, "\n"); | ||
| 185 | |||
| 186 | return 0; | ||
| 187 | } | ||
| 188 | |||
| 189 | void sysrq_sched_debug_show(void) | ||
| 190 | { | ||
| 191 | sched_debug_show(NULL, NULL); | ||
| 192 | } | ||
| 193 | |||
| 194 | static int sched_debug_open(struct inode *inode, struct file *filp) | ||
| 195 | { | ||
| 196 | return single_open(filp, sched_debug_show, NULL); | ||
| 197 | } | ||
| 198 | |||
| 199 | static struct file_operations sched_debug_fops = { | ||
| 200 | .open = sched_debug_open, | ||
| 201 | .read = seq_read, | ||
| 202 | .llseek = seq_lseek, | ||
| 203 | .release = seq_release, | ||
| 204 | }; | ||
| 205 | |||
| 206 | static int __init init_sched_debug_procfs(void) | ||
| 207 | { | ||
| 208 | struct proc_dir_entry *pe; | ||
| 209 | |||
| 210 | pe = create_proc_entry("sched_debug", 0644, NULL); | ||
| 211 | if (!pe) | ||
| 212 | return -ENOMEM; | ||
| 213 | |||
| 214 | pe->proc_fops = &sched_debug_fops; | ||
| 215 | |||
| 216 | return 0; | ||
| 217 | } | ||
| 218 | |||
| 219 | __initcall(init_sched_debug_procfs); | ||
| 220 | |||
| 221 | void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | ||
| 222 | { | ||
| 223 | unsigned long flags; | ||
| 224 | int num_threads = 1; | ||
| 225 | |||
| 226 | rcu_read_lock(); | ||
| 227 | if (lock_task_sighand(p, &flags)) { | ||
| 228 | num_threads = atomic_read(&p->signal->count); | ||
| 229 | unlock_task_sighand(p, &flags); | ||
| 230 | } | ||
| 231 | rcu_read_unlock(); | ||
| 232 | |||
| 233 | SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads); | ||
| 234 | SEQ_printf(m, "----------------------------------------------\n"); | ||
| 235 | #define P(F) \ | ||
| 236 | SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F) | ||
| 237 | |||
| 238 | P(se.wait_start); | ||
| 239 | P(se.wait_start_fair); | ||
| 240 | P(se.exec_start); | ||
| 241 | P(se.sleep_start); | ||
| 242 | P(se.sleep_start_fair); | ||
| 243 | P(se.block_start); | ||
| 244 | P(se.sleep_max); | ||
| 245 | P(se.block_max); | ||
| 246 | P(se.exec_max); | ||
| 247 | P(se.wait_max); | ||
| 248 | P(se.wait_runtime); | ||
| 249 | P(se.wait_runtime_overruns); | ||
| 250 | P(se.wait_runtime_underruns); | ||
| 251 | P(se.sum_wait_runtime); | ||
| 252 | P(se.sum_exec_runtime); | ||
| 253 | SEQ_printf(m, "%-25s:%20Ld\n", | ||
| 254 | "nr_switches", (long long)(p->nvcsw + p->nivcsw)); | ||
| 255 | P(se.load.weight); | ||
| 256 | P(policy); | ||
| 257 | P(prio); | ||
| 258 | #undef P | ||
| 259 | |||
| 260 | { | ||
| 261 | u64 t0, t1; | ||
| 262 | |||
| 263 | t0 = sched_clock(); | ||
| 264 | t1 = sched_clock(); | ||
| 265 | SEQ_printf(m, "%-25s:%20Ld\n", | ||
| 266 | "clock-delta", (long long)(t1-t0)); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | void proc_sched_set_task(struct task_struct *p) | ||
| 271 | { | ||
| 272 | p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0; | ||
| 273 | p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; | ||
| 274 | p->se.sum_exec_runtime = 0; | ||
| 275 | } | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c new file mode 100644 index 000000000000..6971db0a7160 --- /dev/null +++ b/kernel/sched_fair.c | |||
| @@ -0,0 +1,1131 @@ | |||
| 1 | /* | ||
| 2 | * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH) | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
| 5 | * | ||
| 6 | * Interactivity improvements by Mike Galbraith | ||
| 7 | * (C) 2007 Mike Galbraith <efault@gmx.de> | ||
| 8 | * | ||
| 9 | * Various enhancements by Dmitry Adamushko. | ||
| 10 | * (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com> | ||
| 11 | * | ||
| 12 | * Group scheduling enhancements by Srivatsa Vaddagiri | ||
| 13 | * Copyright IBM Corporation, 2007 | ||
| 14 | * Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> | ||
| 15 | * | ||
| 16 | * Scaled math optimizations by Thomas Gleixner | ||
| 17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Preemption granularity: | ||
| 22 | * (default: 2 msec, units: nanoseconds) | ||
| 23 | * | ||
| 24 | * NOTE: this granularity value is not the same as the concept of | ||
| 25 | * 'timeslice length' - timeslices in CFS will typically be somewhat | ||
| 26 | * larger than this value. (to see the precise effective timeslice | ||
| 27 | * length of your workload, run vmstat and monitor the context-switches | ||
| 28 | * field) | ||
| 29 | * | ||
| 30 | * On SMP systems the value of this is multiplied by the log2 of the | ||
| 31 | * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way | ||
| 32 | * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) | ||
| 33 | */ | ||
| 34 | unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ; | ||
| 35 | |||
| 36 | /* | ||
| 37 | * SCHED_BATCH wake-up granularity. | ||
| 38 | * (default: 10 msec, units: nanoseconds) | ||
| 39 | * | ||
| 40 | * This option delays the preemption effects of decoupled workloads | ||
| 41 | * and reduces their over-scheduling. Synchronous workloads will still | ||
| 42 | * have immediate wakeup/sleep latencies. | ||
| 43 | */ | ||
| 44 | unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = | ||
| 45 | 10000000000ULL/HZ; | ||
| 46 | |||
| 47 | /* | ||
| 48 | * SCHED_OTHER wake-up granularity. | ||
| 49 | * (default: 1 msec, units: nanoseconds) | ||
| 50 | * | ||
| 51 | * This option delays the preemption effects of decoupled workloads | ||
| 52 | * and reduces their over-scheduling. Synchronous workloads will still | ||
| 53 | * have immediate wakeup/sleep latencies. | ||
| 54 | */ | ||
| 55 | unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ; | ||
| 56 | |||
| 57 | unsigned int sysctl_sched_stat_granularity __read_mostly; | ||
| 58 | |||
| 59 | /* | ||
| 60 | * Initialized in sched_init_granularity(): | ||
| 61 | */ | ||
| 62 | unsigned int sysctl_sched_runtime_limit __read_mostly; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Debugging: various feature bits | ||
| 66 | */ | ||
| 67 | enum { | ||
| 68 | SCHED_FEAT_FAIR_SLEEPERS = 1, | ||
| 69 | SCHED_FEAT_SLEEPER_AVG = 2, | ||
| 70 | SCHED_FEAT_SLEEPER_LOAD_AVG = 4, | ||
| 71 | SCHED_FEAT_PRECISE_CPU_LOAD = 8, | ||
| 72 | SCHED_FEAT_START_DEBIT = 16, | ||
| 73 | SCHED_FEAT_SKIP_INITIAL = 32, | ||
| 74 | }; | ||
| 75 | |||
| 76 | unsigned int sysctl_sched_features __read_mostly = | ||
| 77 | SCHED_FEAT_FAIR_SLEEPERS *1 | | ||
| 78 | SCHED_FEAT_SLEEPER_AVG *1 | | ||
| 79 | SCHED_FEAT_SLEEPER_LOAD_AVG *1 | | ||
| 80 | SCHED_FEAT_PRECISE_CPU_LOAD *1 | | ||
| 81 | SCHED_FEAT_START_DEBIT *1 | | ||
| 82 | SCHED_FEAT_SKIP_INITIAL *0; | ||
| 83 | |||
| 84 | extern struct sched_class fair_sched_class; | ||
| 85 | |||
| 86 | /************************************************************** | ||
| 87 | * CFS operations on generic schedulable entities: | ||
| 88 | */ | ||
| 89 | |||
| 90 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 91 | |||
| 92 | /* cpu runqueue to which this cfs_rq is attached */ | ||
| 93 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | ||
| 94 | { | ||
| 95 | return cfs_rq->rq; | ||
| 96 | } | ||
| 97 | |||
| 98 | /* currently running entity (if any) on this cfs_rq */ | ||
| 99 | static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq) | ||
| 100 | { | ||
| 101 | return cfs_rq->curr; | ||
| 102 | } | ||
| 103 | |||
| 104 | /* An entity is a task if it doesn't "own" a runqueue */ | ||
| 105 | #define entity_is_task(se) (!se->my_q) | ||
| 106 | |||
| 107 | static inline void | ||
| 108 | set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
| 109 | { | ||
| 110 | cfs_rq->curr = se; | ||
| 111 | } | ||
| 112 | |||
| 113 | #else /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 114 | |||
| 115 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | ||
| 116 | { | ||
| 117 | return container_of(cfs_rq, struct rq, cfs); | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq) | ||
| 121 | { | ||
| 122 | struct rq *rq = rq_of(cfs_rq); | ||
| 123 | |||
| 124 | if (unlikely(rq->curr->sched_class != &fair_sched_class)) | ||
| 125 | return NULL; | ||
| 126 | |||
| 127 | return &rq->curr->se; | ||
| 128 | } | ||
| 129 | |||
| 130 | #define entity_is_task(se) 1 | ||
| 131 | |||
| 132 | static inline void | ||
| 133 | set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se) { } | ||
| 134 | |||
| 135 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 136 | |||
| 137 | static inline struct task_struct *task_of(struct sched_entity *se) | ||
| 138 | { | ||
| 139 | return container_of(se, struct task_struct, se); | ||
| 140 | } | ||
| 141 | |||
| 142 | |||
| 143 | /************************************************************** | ||
| 144 | * Scheduling class tree data structure manipulation methods: | ||
| 145 | */ | ||
| 146 | |||
| 147 | /* | ||
| 148 | * Enqueue an entity into the rb-tree: | ||
| 149 | */ | ||
| 150 | static inline void | ||
| 151 | __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
| 152 | { | ||
| 153 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | ||
| 154 | struct rb_node *parent = NULL; | ||
| 155 | struct sched_entity *entry; | ||
| 156 | s64 key = se->fair_key; | ||
| 157 | int leftmost = 1; | ||
| 158 | |||
| 159 | /* | ||
| 160 | * Find the right place in the rbtree: | ||
| 161 | */ | ||
| 162 | while (*link) { | ||
| 163 | parent = *link; | ||
| 164 | entry = rb_entry(parent, struct sched_entity, run_node); | ||
| 165 | /* | ||
| 166 | * We dont care about collisions. Nodes with | ||
| 167 | * the same key stay together. | ||
| 168 | */ | ||
| 169 | if (key - entry->fair_key < 0) { | ||
| 170 | link = &parent->rb_left; | ||
| 171 | } else { | ||
| 172 | link = &parent->rb_right; | ||
| 173 | leftmost = 0; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | /* | ||
| 178 | * Maintain a cache of leftmost tree entries (it is frequently | ||
| 179 | * used): | ||
| 180 | */ | ||
| 181 | if (leftmost) | ||
| 182 | cfs_rq->rb_leftmost = &se->run_node; | ||
| 183 | |||
| 184 | rb_link_node(&se->run_node, parent, link); | ||
| 185 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); | ||
| 186 | update_load_add(&cfs_rq->load, se->load.weight); | ||
| 187 | cfs_rq->nr_running++; | ||
| 188 | se->on_rq = 1; | ||
| 189 | } | ||
| 190 | |||
| 191 | static inline void | ||
| 192 | __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
| 193 | { | ||
| 194 | if (cfs_rq->rb_leftmost == &se->run_node) | ||
| 195 | cfs_rq->rb_leftmost = rb_next(&se->run_node); | ||
| 196 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | ||
| 197 | update_load_sub(&cfs_rq->load, se->load.weight); | ||
| 198 | cfs_rq->nr_running--; | ||
| 199 | se->on_rq = 0; | ||
| 200 | } | ||
| 201 | |||
| 202 | static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) | ||
| 203 | { | ||
| 204 | return cfs_rq->rb_leftmost; | ||
| 205 | } | ||
| 206 | |||
| 207 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | ||
| 208 | { | ||
| 209 | return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); | ||
| 210 | } | ||
| 211 | |||
| 212 | /************************************************************** | ||
| 213 | * Scheduling class statistics methods: | ||
| 214 | */ | ||
| 215 | |||
| 216 | /* | ||
| 217 | * We rescale the rescheduling granularity of tasks according to their | ||
| 218 | * nice level, but only linearly, not exponentially: | ||
| 219 | */ | ||
| 220 | static long | ||
| 221 | niced_granularity(struct sched_entity *curr, unsigned long granularity) | ||
| 222 | { | ||
| 223 | u64 tmp; | ||
| 224 | |||
| 225 | /* | ||
| 226 | * Negative nice levels get the same granularity as nice-0: | ||
| 227 | */ | ||
| 228 | if (likely(curr->load.weight >= NICE_0_LOAD)) | ||
| 229 | return granularity; | ||
| 230 | /* | ||
| 231 | * Positive nice level tasks get linearly finer | ||
| 232 | * granularity: | ||
| 233 | */ | ||
| 234 | tmp = curr->load.weight * (u64)granularity; | ||
| 235 | |||
| 236 | /* | ||
| 237 | * It will always fit into 'long': | ||
| 238 | */ | ||
| 239 | return (long) (tmp >> NICE_0_SHIFT); | ||
| 240 | } | ||
| 241 | |||
| 242 | static inline void | ||
| 243 | limit_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
| 244 | { | ||
| 245 | long limit = sysctl_sched_runtime_limit; | ||
| 246 | |||
| 247 | /* | ||
| 248 | * Niced tasks have the same history dynamic range as | ||
| 249 | * non-niced tasks: | ||
| 250 | */ | ||
| 251 | if (unlikely(se->wait_runtime > limit)) { | ||
| 252 | se->wait_runtime = limit; | ||
| 253 | schedstat_inc(se, wait_runtime_overruns); | ||
| 254 | schedstat_inc(cfs_rq, wait_runtime_overruns); | ||
| 255 | } | ||
| 256 | if (unlikely(se->wait_runtime < -limit)) { | ||
| 257 | se->wait_runtime = -limit; | ||
| 258 | schedstat_inc(se, wait_runtime_underruns); | ||
| 259 | schedstat_inc(cfs_rq, wait_runtime_underruns); | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | static inline void | ||
| 264 | __add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta) | ||
| 265 | { | ||
| 266 | se->wait_runtime += delta; | ||
| 267 | schedstat_add(se, sum_wait_runtime, delta); | ||
| 268 | limit_wait_runtime(cfs_rq, se); | ||
| 269 | } | ||
| 270 | |||
| 271 | static void | ||
| 272 | add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta) | ||
| 273 | { | ||
| 274 | schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime); | ||
| 275 | __add_wait_runtime(cfs_rq, se, delta); | ||
| 276 | schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); | ||
| 277 | } | ||
| 278 | |||
| 279 | /* | ||
| 280 | * Update the current task's runtime statistics. Skip current tasks that | ||
| 281 | * are not in our scheduling class. | ||
| 282 | */ | ||
| 283 | static inline void | ||
| 284 | __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now) | ||
| 285 | { | ||
| 286 | unsigned long delta, delta_exec, delta_fair; | ||
| 287 | long delta_mine; | ||
| 288 | struct load_weight *lw = &cfs_rq->load; | ||
| 289 | unsigned long load = lw->weight; | ||
| 290 | |||
| 291 | if (unlikely(!load)) | ||
| 292 | return; | ||
| 293 | |||
| 294 | delta_exec = curr->delta_exec; | ||
| 295 | #ifdef CONFIG_SCHEDSTATS | ||
| 296 | if (unlikely(delta_exec > curr->exec_max)) | ||
| 297 | curr->exec_max = delta_exec; | ||
| 298 | #endif | ||
| 299 | |||
| 300 | curr->sum_exec_runtime += delta_exec; | ||
| 301 | cfs_rq->exec_clock += delta_exec; | ||
| 302 | |||
| 303 | delta_fair = calc_delta_fair(delta_exec, lw); | ||
| 304 | delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); | ||
| 305 | |||
| 306 | if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) { | ||
| 307 | delta = calc_delta_mine(cfs_rq->sleeper_bonus, | ||
| 308 | curr->load.weight, lw); | ||
| 309 | if (unlikely(delta > cfs_rq->sleeper_bonus)) | ||
| 310 | delta = cfs_rq->sleeper_bonus; | ||
| 311 | |||
| 312 | cfs_rq->sleeper_bonus -= delta; | ||
| 313 | delta_mine -= delta; | ||
| 314 | } | ||
| 315 | |||
| 316 | cfs_rq->fair_clock += delta_fair; | ||
| 317 | /* | ||
| 318 | * We executed delta_exec amount of time on the CPU, | ||
| 319 | * but we were only entitled to delta_mine amount of | ||
| 320 | * time during that period (if nr_running == 1 then | ||
| 321 | * the two values are equal) | ||
| 322 | * [Note: delta_mine - delta_exec is negative]: | ||
| 323 | */ | ||
| 324 | add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec); | ||
| 325 | } | ||
| 326 | |||
| 327 | static void update_curr(struct cfs_rq *cfs_rq, u64 now) | ||
| 328 | { | ||
| 329 | struct sched_entity *curr = cfs_rq_curr(cfs_rq); | ||
| 330 | unsigned long delta_exec; | ||
| 331 | |||
| 332 | if (unlikely(!curr)) | ||
| 333 | return; | ||
| 334 | |||
| 335 | /* | ||
| 336 | * Get the amount of time the current task was running | ||
| 337 | * since the last time we changed load (this cannot | ||
| 338 | * overflow on 32 bits): | ||
| 339 | */ | ||
| 340 | delta_exec = (unsigned long)(now - curr->exec_start); | ||
| 341 | |||
| 342 | curr->delta_exec += delta_exec; | ||
| 343 | |||
| 344 | if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) { | ||
| 345 | __update_curr(cfs_rq, curr, now); | ||
| 346 | curr->delta_exec = 0; | ||
| 347 | } | ||
| 348 | curr->exec_start = now; | ||
| 349 | } | ||
| 350 | |||
| 351 | static inline void | ||
| 352 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 353 | { | ||
| 354 | se->wait_start_fair = cfs_rq->fair_clock; | ||
| 355 | se->wait_start = now; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* | ||
| 359 | * We calculate fair deltas here, so protect against the random effects | ||
| 360 | * of a multiplication overflow by capping it to the runtime limit: | ||
| 361 | */ | ||
| 362 | #if BITS_PER_LONG == 32 | ||
| 363 | static inline unsigned long | ||
| 364 | calc_weighted(unsigned long delta, unsigned long weight, int shift) | ||
| 365 | { | ||
| 366 | u64 tmp = (u64)delta * weight >> shift; | ||
| 367 | |||
| 368 | if (unlikely(tmp > sysctl_sched_runtime_limit*2)) | ||
| 369 | return sysctl_sched_runtime_limit*2; | ||
| 370 | return tmp; | ||
| 371 | } | ||
| 372 | #else | ||
| 373 | static inline unsigned long | ||
| 374 | calc_weighted(unsigned long delta, unsigned long weight, int shift) | ||
| 375 | { | ||
| 376 | return delta * weight >> shift; | ||
| 377 | } | ||
| 378 | #endif | ||
| 379 | |||
| 380 | /* | ||
| 381 | * Task is being enqueued - update stats: | ||
| 382 | */ | ||
| 383 | static void | ||
| 384 | update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 385 | { | ||
| 386 | s64 key; | ||
| 387 | |||
| 388 | /* | ||
| 389 | * Are we enqueueing a waiting task? (for current tasks | ||
| 390 | * a dequeue/enqueue event is a NOP) | ||
| 391 | */ | ||
| 392 | if (se != cfs_rq_curr(cfs_rq)) | ||
| 393 | update_stats_wait_start(cfs_rq, se, now); | ||
| 394 | /* | ||
| 395 | * Update the key: | ||
| 396 | */ | ||
| 397 | key = cfs_rq->fair_clock; | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Optimize the common nice 0 case: | ||
| 401 | */ | ||
| 402 | if (likely(se->load.weight == NICE_0_LOAD)) { | ||
| 403 | key -= se->wait_runtime; | ||
| 404 | } else { | ||
| 405 | u64 tmp; | ||
| 406 | |||
| 407 | if (se->wait_runtime < 0) { | ||
| 408 | tmp = -se->wait_runtime; | ||
| 409 | key += (tmp * se->load.inv_weight) >> | ||
| 410 | (WMULT_SHIFT - NICE_0_SHIFT); | ||
| 411 | } else { | ||
| 412 | tmp = se->wait_runtime; | ||
| 413 | key -= (tmp * se->load.weight) >> NICE_0_SHIFT; | ||
| 414 | } | ||
| 415 | } | ||
| 416 | |||
| 417 | se->fair_key = key; | ||
| 418 | } | ||
| 419 | |||
| 420 | /* | ||
| 421 | * Note: must be called with a freshly updated rq->fair_clock. | ||
| 422 | */ | ||
| 423 | static inline void | ||
| 424 | __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 425 | { | ||
| 426 | unsigned long delta_fair = se->delta_fair_run; | ||
| 427 | |||
| 428 | #ifdef CONFIG_SCHEDSTATS | ||
| 429 | { | ||
| 430 | s64 delta_wait = now - se->wait_start; | ||
| 431 | if (unlikely(delta_wait > se->wait_max)) | ||
| 432 | se->wait_max = delta_wait; | ||
| 433 | } | ||
| 434 | #endif | ||
| 435 | |||
| 436 | if (unlikely(se->load.weight != NICE_0_LOAD)) | ||
| 437 | delta_fair = calc_weighted(delta_fair, se->load.weight, | ||
| 438 | NICE_0_SHIFT); | ||
| 439 | |||
| 440 | add_wait_runtime(cfs_rq, se, delta_fair); | ||
| 441 | } | ||
| 442 | |||
| 443 | static void | ||
| 444 | update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 445 | { | ||
| 446 | unsigned long delta_fair; | ||
| 447 | |||
| 448 | delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit), | ||
| 449 | (u64)(cfs_rq->fair_clock - se->wait_start_fair)); | ||
| 450 | |||
| 451 | se->delta_fair_run += delta_fair; | ||
| 452 | if (unlikely(abs(se->delta_fair_run) >= | ||
| 453 | sysctl_sched_stat_granularity)) { | ||
| 454 | __update_stats_wait_end(cfs_rq, se, now); | ||
| 455 | se->delta_fair_run = 0; | ||
| 456 | } | ||
| 457 | |||
| 458 | se->wait_start_fair = 0; | ||
| 459 | se->wait_start = 0; | ||
| 460 | } | ||
| 461 | |||
| 462 | static inline void | ||
| 463 | update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 464 | { | ||
| 465 | update_curr(cfs_rq, now); | ||
| 466 | /* | ||
| 467 | * Mark the end of the wait period if dequeueing a | ||
| 468 | * waiting task: | ||
| 469 | */ | ||
| 470 | if (se != cfs_rq_curr(cfs_rq)) | ||
| 471 | update_stats_wait_end(cfs_rq, se, now); | ||
| 472 | } | ||
| 473 | |||
| 474 | /* | ||
| 475 | * We are picking a new current task - update its stats: | ||
| 476 | */ | ||
| 477 | static inline void | ||
| 478 | update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 479 | { | ||
| 480 | /* | ||
| 481 | * We are starting a new run period: | ||
| 482 | */ | ||
| 483 | se->exec_start = now; | ||
| 484 | } | ||
| 485 | |||
| 486 | /* | ||
| 487 | * We are descheduling a task - update its stats: | ||
| 488 | */ | ||
| 489 | static inline void | ||
| 490 | update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 491 | { | ||
| 492 | se->exec_start = 0; | ||
| 493 | } | ||
| 494 | |||
| 495 | /************************************************** | ||
| 496 | * Scheduling class queueing methods: | ||
| 497 | */ | ||
| 498 | |||
| 499 | static void | ||
| 500 | __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 501 | { | ||
| 502 | unsigned long load = cfs_rq->load.weight, delta_fair; | ||
| 503 | long prev_runtime; | ||
| 504 | |||
| 505 | if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) | ||
| 506 | load = rq_of(cfs_rq)->cpu_load[2]; | ||
| 507 | |||
| 508 | delta_fair = se->delta_fair_sleep; | ||
| 509 | |||
| 510 | /* | ||
| 511 | * Fix up delta_fair with the effect of us running | ||
| 512 | * during the whole sleep period: | ||
| 513 | */ | ||
| 514 | if (sysctl_sched_features & SCHED_FEAT_SLEEPER_AVG) | ||
| 515 | delta_fair = div64_likely32((u64)delta_fair * load, | ||
| 516 | load + se->load.weight); | ||
| 517 | |||
| 518 | if (unlikely(se->load.weight != NICE_0_LOAD)) | ||
| 519 | delta_fair = calc_weighted(delta_fair, se->load.weight, | ||
| 520 | NICE_0_SHIFT); | ||
| 521 | |||
| 522 | prev_runtime = se->wait_runtime; | ||
| 523 | __add_wait_runtime(cfs_rq, se, delta_fair); | ||
| 524 | delta_fair = se->wait_runtime - prev_runtime; | ||
| 525 | |||
| 526 | /* | ||
| 527 | * Track the amount of bonus we've given to sleepers: | ||
| 528 | */ | ||
| 529 | cfs_rq->sleeper_bonus += delta_fair; | ||
| 530 | |||
| 531 | schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); | ||
| 532 | } | ||
| 533 | |||
| 534 | static void | ||
| 535 | enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 536 | { | ||
| 537 | struct task_struct *tsk = task_of(se); | ||
| 538 | unsigned long delta_fair; | ||
| 539 | |||
| 540 | if ((entity_is_task(se) && tsk->policy == SCHED_BATCH) || | ||
| 541 | !(sysctl_sched_features & SCHED_FEAT_FAIR_SLEEPERS)) | ||
| 542 | return; | ||
| 543 | |||
| 544 | delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit), | ||
| 545 | (u64)(cfs_rq->fair_clock - se->sleep_start_fair)); | ||
| 546 | |||
| 547 | se->delta_fair_sleep += delta_fair; | ||
| 548 | if (unlikely(abs(se->delta_fair_sleep) >= | ||
| 549 | sysctl_sched_stat_granularity)) { | ||
| 550 | __enqueue_sleeper(cfs_rq, se, now); | ||
| 551 | se->delta_fair_sleep = 0; | ||
| 552 | } | ||
| 553 | |||
| 554 | se->sleep_start_fair = 0; | ||
| 555 | |||
| 556 | #ifdef CONFIG_SCHEDSTATS | ||
| 557 | if (se->sleep_start) { | ||
| 558 | u64 delta = now - se->sleep_start; | ||
| 559 | |||
| 560 | if ((s64)delta < 0) | ||
| 561 | delta = 0; | ||
| 562 | |||
| 563 | if (unlikely(delta > se->sleep_max)) | ||
| 564 | se->sleep_max = delta; | ||
| 565 | |||
| 566 | se->sleep_start = 0; | ||
| 567 | se->sum_sleep_runtime += delta; | ||
| 568 | } | ||
| 569 | if (se->block_start) { | ||
| 570 | u64 delta = now - se->block_start; | ||
| 571 | |||
| 572 | if ((s64)delta < 0) | ||
| 573 | delta = 0; | ||
| 574 | |||
| 575 | if (unlikely(delta > se->block_max)) | ||
| 576 | se->block_max = delta; | ||
| 577 | |||
| 578 | se->block_start = 0; | ||
| 579 | se->sum_sleep_runtime += delta; | ||
| 580 | } | ||
| 581 | #endif | ||
| 582 | } | ||
| 583 | |||
| 584 | static void | ||
| 585 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | ||
| 586 | int wakeup, u64 now) | ||
| 587 | { | ||
| 588 | /* | ||
| 589 | * Update the fair clock. | ||
| 590 | */ | ||
| 591 | update_curr(cfs_rq, now); | ||
| 592 | |||
| 593 | if (wakeup) | ||
| 594 | enqueue_sleeper(cfs_rq, se, now); | ||
| 595 | |||
| 596 | update_stats_enqueue(cfs_rq, se, now); | ||
| 597 | __enqueue_entity(cfs_rq, se); | ||
| 598 | } | ||
| 599 | |||
| 600 | static void | ||
| 601 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | ||
| 602 | int sleep, u64 now) | ||
| 603 | { | ||
| 604 | update_stats_dequeue(cfs_rq, se, now); | ||
| 605 | if (sleep) { | ||
| 606 | se->sleep_start_fair = cfs_rq->fair_clock; | ||
| 607 | #ifdef CONFIG_SCHEDSTATS | ||
| 608 | if (entity_is_task(se)) { | ||
| 609 | struct task_struct *tsk = task_of(se); | ||
| 610 | |||
| 611 | if (tsk->state & TASK_INTERRUPTIBLE) | ||
| 612 | se->sleep_start = now; | ||
| 613 | if (tsk->state & TASK_UNINTERRUPTIBLE) | ||
| 614 | se->block_start = now; | ||
| 615 | } | ||
| 616 | cfs_rq->wait_runtime -= se->wait_runtime; | ||
| 617 | #endif | ||
| 618 | } | ||
| 619 | __dequeue_entity(cfs_rq, se); | ||
| 620 | } | ||
| 621 | |||
| 622 | /* | ||
| 623 | * Preempt the current task with a newly woken task if needed: | ||
| 624 | */ | ||
| 625 | static void | ||
| 626 | __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, | ||
| 627 | struct sched_entity *curr, unsigned long granularity) | ||
| 628 | { | ||
| 629 | s64 __delta = curr->fair_key - se->fair_key; | ||
| 630 | |||
| 631 | /* | ||
| 632 | * Take scheduling granularity into account - do not | ||
| 633 | * preempt the current task unless the best task has | ||
| 634 | * a larger than sched_granularity fairness advantage: | ||
| 635 | */ | ||
| 636 | if (__delta > niced_granularity(curr, granularity)) | ||
| 637 | resched_task(rq_of(cfs_rq)->curr); | ||
| 638 | } | ||
| 639 | |||
| 640 | static inline void | ||
| 641 | set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) | ||
| 642 | { | ||
| 643 | /* | ||
| 644 | * Any task has to be enqueued before it get to execute on | ||
| 645 | * a CPU. So account for the time it spent waiting on the | ||
| 646 | * runqueue. (note, here we rely on pick_next_task() having | ||
| 647 | * done a put_prev_task_fair() shortly before this, which | ||
| 648 | * updated rq->fair_clock - used by update_stats_wait_end()) | ||
| 649 | */ | ||
| 650 | update_stats_wait_end(cfs_rq, se, now); | ||
| 651 | update_stats_curr_start(cfs_rq, se, now); | ||
| 652 | set_cfs_rq_curr(cfs_rq, se); | ||
| 653 | } | ||
| 654 | |||
| 655 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now) | ||
| 656 | { | ||
| 657 | struct sched_entity *se = __pick_next_entity(cfs_rq); | ||
| 658 | |||
| 659 | set_next_entity(cfs_rq, se, now); | ||
| 660 | |||
| 661 | return se; | ||
| 662 | } | ||
| 663 | |||
| 664 | static void | ||
| 665 | put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now) | ||
| 666 | { | ||
| 667 | /* | ||
| 668 | * If still on the runqueue then deactivate_task() | ||
| 669 | * was not called and update_curr() has to be done: | ||
| 670 | */ | ||
| 671 | if (prev->on_rq) | ||
| 672 | update_curr(cfs_rq, now); | ||
| 673 | |||
| 674 | update_stats_curr_end(cfs_rq, prev, now); | ||
| 675 | |||
| 676 | if (prev->on_rq) | ||
| 677 | update_stats_wait_start(cfs_rq, prev, now); | ||
| 678 | set_cfs_rq_curr(cfs_rq, NULL); | ||
| 679 | } | ||
| 680 | |||
| 681 | static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | ||
| 682 | { | ||
| 683 | struct rq *rq = rq_of(cfs_rq); | ||
| 684 | struct sched_entity *next; | ||
| 685 | u64 now = __rq_clock(rq); | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Dequeue and enqueue the task to update its | ||
| 689 | * position within the tree: | ||
| 690 | */ | ||
| 691 | dequeue_entity(cfs_rq, curr, 0, now); | ||
| 692 | enqueue_entity(cfs_rq, curr, 0, now); | ||
| 693 | |||
| 694 | /* | ||
| 695 | * Reschedule if another task tops the current one. | ||
| 696 | */ | ||
| 697 | next = __pick_next_entity(cfs_rq); | ||
| 698 | if (next == curr) | ||
| 699 | return; | ||
| 700 | |||
| 701 | __check_preempt_curr_fair(cfs_rq, next, curr, sysctl_sched_granularity); | ||
| 702 | } | ||
| 703 | |||
| 704 | /************************************************** | ||
| 705 | * CFS operations on tasks: | ||
| 706 | */ | ||
| 707 | |||
| 708 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 709 | |||
| 710 | /* Walk up scheduling entities hierarchy */ | ||
| 711 | #define for_each_sched_entity(se) \ | ||
| 712 | for (; se; se = se->parent) | ||
| 713 | |||
| 714 | static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) | ||
| 715 | { | ||
| 716 | return p->se.cfs_rq; | ||
| 717 | } | ||
| 718 | |||
| 719 | /* runqueue on which this entity is (to be) queued */ | ||
| 720 | static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) | ||
| 721 | { | ||
| 722 | return se->cfs_rq; | ||
| 723 | } | ||
| 724 | |||
| 725 | /* runqueue "owned" by this group */ | ||
| 726 | static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | ||
| 727 | { | ||
| 728 | return grp->my_q; | ||
| 729 | } | ||
| 730 | |||
| 731 | /* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on | ||
| 732 | * another cpu ('this_cpu') | ||
| 733 | */ | ||
| 734 | static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | ||
| 735 | { | ||
| 736 | /* A later patch will take group into account */ | ||
| 737 | return &cpu_rq(this_cpu)->cfs; | ||
| 738 | } | ||
| 739 | |||
| 740 | /* Iterate thr' all leaf cfs_rq's on a runqueue */ | ||
| 741 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | ||
| 742 | list_for_each_entry(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) | ||
| 743 | |||
| 744 | /* Do the two (enqueued) tasks belong to the same group ? */ | ||
| 745 | static inline int is_same_group(struct task_struct *curr, struct task_struct *p) | ||
| 746 | { | ||
| 747 | if (curr->se.cfs_rq == p->se.cfs_rq) | ||
| 748 | return 1; | ||
| 749 | |||
| 750 | return 0; | ||
| 751 | } | ||
| 752 | |||
| 753 | #else /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 754 | |||
| 755 | #define for_each_sched_entity(se) \ | ||
| 756 | for (; se; se = NULL) | ||
| 757 | |||
| 758 | static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) | ||
| 759 | { | ||
| 760 | return &task_rq(p)->cfs; | ||
| 761 | } | ||
| 762 | |||
| 763 | static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) | ||
| 764 | { | ||
| 765 | struct task_struct *p = task_of(se); | ||
| 766 | struct rq *rq = task_rq(p); | ||
| 767 | |||
| 768 | return &rq->cfs; | ||
| 769 | } | ||
| 770 | |||
| 771 | /* runqueue "owned" by this group */ | ||
| 772 | static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | ||
| 773 | { | ||
| 774 | return NULL; | ||
| 775 | } | ||
| 776 | |||
| 777 | static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | ||
| 778 | { | ||
| 779 | return &cpu_rq(this_cpu)->cfs; | ||
| 780 | } | ||
| 781 | |||
| 782 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | ||
| 783 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) | ||
| 784 | |||
| 785 | static inline int is_same_group(struct task_struct *curr, struct task_struct *p) | ||
| 786 | { | ||
| 787 | return 1; | ||
| 788 | } | ||
| 789 | |||
| 790 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 791 | |||
| 792 | /* | ||
| 793 | * The enqueue_task method is called before nr_running is | ||
| 794 | * increased. Here we update the fair scheduling stats and | ||
| 795 | * then put the task into the rbtree: | ||
| 796 | */ | ||
| 797 | static void | ||
| 798 | enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) | ||
| 799 | { | ||
| 800 | struct cfs_rq *cfs_rq; | ||
| 801 | struct sched_entity *se = &p->se; | ||
| 802 | |||
| 803 | for_each_sched_entity(se) { | ||
| 804 | if (se->on_rq) | ||
| 805 | break; | ||
| 806 | cfs_rq = cfs_rq_of(se); | ||
| 807 | enqueue_entity(cfs_rq, se, wakeup, now); | ||
| 808 | } | ||
| 809 | } | ||
| 810 | |||
| 811 | /* | ||
| 812 | * The dequeue_task method is called before nr_running is | ||
| 813 | * decreased. We remove the task from the rbtree and | ||
| 814 | * update the fair scheduling stats: | ||
| 815 | */ | ||
| 816 | static void | ||
| 817 | dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now) | ||
| 818 | { | ||
| 819 | struct cfs_rq *cfs_rq; | ||
| 820 | struct sched_entity *se = &p->se; | ||
| 821 | |||
| 822 | for_each_sched_entity(se) { | ||
| 823 | cfs_rq = cfs_rq_of(se); | ||
| 824 | dequeue_entity(cfs_rq, se, sleep, now); | ||
| 825 | /* Don't dequeue parent if it has other entities besides us */ | ||
| 826 | if (cfs_rq->load.weight) | ||
| 827 | break; | ||
| 828 | } | ||
| 829 | } | ||
| 830 | |||
| 831 | /* | ||
| 832 | * sched_yield() support is very simple - we dequeue and enqueue | ||
| 833 | */ | ||
| 834 | static void yield_task_fair(struct rq *rq, struct task_struct *p) | ||
| 835 | { | ||
| 836 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | ||
| 837 | u64 now = __rq_clock(rq); | ||
| 838 | |||
| 839 | /* | ||
| 840 | * Dequeue and enqueue the task to update its | ||
| 841 | * position within the tree: | ||
| 842 | */ | ||
| 843 | dequeue_entity(cfs_rq, &p->se, 0, now); | ||
| 844 | enqueue_entity(cfs_rq, &p->se, 0, now); | ||
| 845 | } | ||
| 846 | |||
| 847 | /* | ||
| 848 | * Preempt the current task with a newly woken task if needed: | ||
| 849 | */ | ||
| 850 | static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) | ||
| 851 | { | ||
| 852 | struct task_struct *curr = rq->curr; | ||
| 853 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
| 854 | unsigned long gran; | ||
| 855 | |||
| 856 | if (unlikely(rt_prio(p->prio))) { | ||
| 857 | update_curr(cfs_rq, rq_clock(rq)); | ||
| 858 | resched_task(curr); | ||
| 859 | return; | ||
| 860 | } | ||
| 861 | |||
| 862 | gran = sysctl_sched_wakeup_granularity; | ||
| 863 | /* | ||
| 864 | * Batch tasks prefer throughput over latency: | ||
| 865 | */ | ||
| 866 | if (unlikely(p->policy == SCHED_BATCH)) | ||
| 867 | gran = sysctl_sched_batch_wakeup_granularity; | ||
| 868 | |||
| 869 | if (is_same_group(curr, p)) | ||
| 870 | __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); | ||
| 871 | } | ||
| 872 | |||
| 873 | static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) | ||
| 874 | { | ||
| 875 | struct cfs_rq *cfs_rq = &rq->cfs; | ||
| 876 | struct sched_entity *se; | ||
| 877 | |||
| 878 | if (unlikely(!cfs_rq->nr_running)) | ||
| 879 | return NULL; | ||
| 880 | |||
| 881 | do { | ||
| 882 | se = pick_next_entity(cfs_rq, now); | ||
| 883 | cfs_rq = group_cfs_rq(se); | ||
| 884 | } while (cfs_rq); | ||
| 885 | |||
| 886 | return task_of(se); | ||
| 887 | } | ||
| 888 | |||
| 889 | /* | ||
| 890 | * Account for a descheduled task: | ||
| 891 | */ | ||
| 892 | static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now) | ||
| 893 | { | ||
| 894 | struct sched_entity *se = &prev->se; | ||
| 895 | struct cfs_rq *cfs_rq; | ||
| 896 | |||
| 897 | for_each_sched_entity(se) { | ||
| 898 | cfs_rq = cfs_rq_of(se); | ||
| 899 | put_prev_entity(cfs_rq, se, now); | ||
| 900 | } | ||
| 901 | } | ||
| 902 | |||
| 903 | /************************************************** | ||
| 904 | * Fair scheduling class load-balancing methods: | ||
| 905 | */ | ||
| 906 | |||
| 907 | /* | ||
| 908 | * Load-balancing iterator. Note: while the runqueue stays locked | ||
| 909 | * during the whole iteration, the current task might be | ||
| 910 | * dequeued so the iterator has to be dequeue-safe. Here we | ||
| 911 | * achieve that by always pre-iterating before returning | ||
| 912 | * the current task: | ||
| 913 | */ | ||
| 914 | static inline struct task_struct * | ||
| 915 | __load_balance_iterator(struct cfs_rq *cfs_rq, struct rb_node *curr) | ||
| 916 | { | ||
| 917 | struct task_struct *p; | ||
| 918 | |||
| 919 | if (!curr) | ||
| 920 | return NULL; | ||
| 921 | |||
| 922 | p = rb_entry(curr, struct task_struct, se.run_node); | ||
| 923 | cfs_rq->rb_load_balance_curr = rb_next(curr); | ||
| 924 | |||
| 925 | return p; | ||
| 926 | } | ||
| 927 | |||
| 928 | static struct task_struct *load_balance_start_fair(void *arg) | ||
| 929 | { | ||
| 930 | struct cfs_rq *cfs_rq = arg; | ||
| 931 | |||
| 932 | return __load_balance_iterator(cfs_rq, first_fair(cfs_rq)); | ||
| 933 | } | ||
| 934 | |||
| 935 | static struct task_struct *load_balance_next_fair(void *arg) | ||
| 936 | { | ||
| 937 | struct cfs_rq *cfs_rq = arg; | ||
| 938 | |||
| 939 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); | ||
| 940 | } | ||
| 941 | |||
| 942 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | ||
| 943 | { | ||
| 944 | struct sched_entity *curr; | ||
| 945 | struct task_struct *p; | ||
| 946 | |||
| 947 | if (!cfs_rq->nr_running) | ||
| 948 | return MAX_PRIO; | ||
| 949 | |||
| 950 | curr = __pick_next_entity(cfs_rq); | ||
| 951 | p = task_of(curr); | ||
| 952 | |||
| 953 | return p->prio; | ||
| 954 | } | ||
| 955 | |||
| 956 | static int | ||
| 957 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 958 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 959 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 960 | int *all_pinned, unsigned long *total_load_moved) | ||
| 961 | { | ||
| 962 | struct cfs_rq *busy_cfs_rq; | ||
| 963 | unsigned long load_moved, total_nr_moved = 0, nr_moved; | ||
| 964 | long rem_load_move = max_load_move; | ||
| 965 | struct rq_iterator cfs_rq_iterator; | ||
| 966 | |||
| 967 | cfs_rq_iterator.start = load_balance_start_fair; | ||
| 968 | cfs_rq_iterator.next = load_balance_next_fair; | ||
| 969 | |||
| 970 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { | ||
| 971 | struct cfs_rq *this_cfs_rq; | ||
| 972 | long imbalance; | ||
| 973 | unsigned long maxload; | ||
| 974 | int this_best_prio, best_prio, best_prio_seen = 0; | ||
| 975 | |||
| 976 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); | ||
| 977 | |||
| 978 | imbalance = busy_cfs_rq->load.weight - | ||
| 979 | this_cfs_rq->load.weight; | ||
| 980 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
| 981 | if (imbalance <= 0) | ||
| 982 | continue; | ||
| 983 | |||
| 984 | /* Don't pull more than imbalance/2 */ | ||
| 985 | imbalance /= 2; | ||
| 986 | maxload = min(rem_load_move, imbalance); | ||
| 987 | |||
| 988 | this_best_prio = cfs_rq_best_prio(this_cfs_rq); | ||
| 989 | best_prio = cfs_rq_best_prio(busy_cfs_rq); | ||
| 990 | |||
| 991 | /* | ||
| 992 | * Enable handling of the case where there is more than one task | ||
| 993 | * with the best priority. If the current running task is one | ||
| 994 | * of those with prio==best_prio we know it won't be moved | ||
| 995 | * and therefore it's safe to override the skip (based on load) | ||
| 996 | * of any task we find with that prio. | ||
| 997 | */ | ||
| 998 | if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se) | ||
| 999 | best_prio_seen = 1; | ||
| 1000 | |||
| 1001 | /* pass busy_cfs_rq argument into | ||
| 1002 | * load_balance_[start|next]_fair iterators | ||
| 1003 | */ | ||
| 1004 | cfs_rq_iterator.arg = busy_cfs_rq; | ||
| 1005 | nr_moved = balance_tasks(this_rq, this_cpu, busiest, | ||
| 1006 | max_nr_move, maxload, sd, idle, all_pinned, | ||
| 1007 | &load_moved, this_best_prio, best_prio, | ||
| 1008 | best_prio_seen, &cfs_rq_iterator); | ||
| 1009 | |||
| 1010 | total_nr_moved += nr_moved; | ||
| 1011 | max_nr_move -= nr_moved; | ||
| 1012 | rem_load_move -= load_moved; | ||
| 1013 | |||
| 1014 | if (max_nr_move <= 0 || rem_load_move <= 0) | ||
| 1015 | break; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | *total_load_moved = max_load_move - rem_load_move; | ||
| 1019 | |||
| 1020 | return total_nr_moved; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | /* | ||
| 1024 | * scheduler tick hitting a task of our scheduling class: | ||
| 1025 | */ | ||
| 1026 | static void task_tick_fair(struct rq *rq, struct task_struct *curr) | ||
| 1027 | { | ||
| 1028 | struct cfs_rq *cfs_rq; | ||
| 1029 | struct sched_entity *se = &curr->se; | ||
| 1030 | |||
| 1031 | for_each_sched_entity(se) { | ||
| 1032 | cfs_rq = cfs_rq_of(se); | ||
| 1033 | entity_tick(cfs_rq, se); | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /* | ||
| 1038 | * Share the fairness runtime between parent and child, thus the | ||
| 1039 | * total amount of pressure for CPU stays equal - new tasks | ||
| 1040 | * get a chance to run but frequent forkers are not allowed to | ||
| 1041 | * monopolize the CPU. Note: the parent runqueue is locked, | ||
| 1042 | * the child is not running yet. | ||
| 1043 | */ | ||
| 1044 | static void task_new_fair(struct rq *rq, struct task_struct *p) | ||
| 1045 | { | ||
| 1046 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | ||
| 1047 | struct sched_entity *se = &p->se; | ||
| 1048 | u64 now = rq_clock(rq); | ||
| 1049 | |||
| 1050 | sched_info_queued(p); | ||
| 1051 | |||
| 1052 | update_stats_enqueue(cfs_rq, se, now); | ||
| 1053 | /* | ||
| 1054 | * Child runs first: we let it run before the parent | ||
| 1055 | * until it reschedules once. We set up the key so that | ||
| 1056 | * it will preempt the parent: | ||
| 1057 | */ | ||
| 1058 | p->se.fair_key = current->se.fair_key - | ||
| 1059 | niced_granularity(&rq->curr->se, sysctl_sched_granularity) - 1; | ||
| 1060 | /* | ||
| 1061 | * The first wait is dominated by the child-runs-first logic, | ||
| 1062 | * so do not credit it with that waiting time yet: | ||
| 1063 | */ | ||
| 1064 | if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL) | ||
| 1065 | p->se.wait_start_fair = 0; | ||
| 1066 | |||
| 1067 | /* | ||
| 1068 | * The statistical average of wait_runtime is about | ||
| 1069 | * -granularity/2, so initialize the task with that: | ||
| 1070 | */ | ||
| 1071 | if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) | ||
| 1072 | p->se.wait_runtime = -(sysctl_sched_granularity / 2); | ||
| 1073 | |||
| 1074 | __enqueue_entity(cfs_rq, se); | ||
| 1075 | inc_nr_running(p, rq, now); | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1079 | /* Account for a task changing its policy or group. | ||
| 1080 | * | ||
| 1081 | * This routine is mostly called to set cfs_rq->curr field when a task | ||
| 1082 | * migrates between groups/classes. | ||
| 1083 | */ | ||
| 1084 | static void set_curr_task_fair(struct rq *rq) | ||
| 1085 | { | ||
| 1086 | struct task_struct *curr = rq->curr; | ||
| 1087 | struct sched_entity *se = &curr->se; | ||
| 1088 | u64 now = rq_clock(rq); | ||
| 1089 | struct cfs_rq *cfs_rq; | ||
| 1090 | |||
| 1091 | for_each_sched_entity(se) { | ||
| 1092 | cfs_rq = cfs_rq_of(se); | ||
| 1093 | set_next_entity(cfs_rq, se, now); | ||
| 1094 | } | ||
| 1095 | } | ||
| 1096 | #else | ||
| 1097 | static void set_curr_task_fair(struct rq *rq) | ||
| 1098 | { | ||
| 1099 | } | ||
| 1100 | #endif | ||
| 1101 | |||
| 1102 | /* | ||
| 1103 | * All the scheduling class methods: | ||
| 1104 | */ | ||
| 1105 | struct sched_class fair_sched_class __read_mostly = { | ||
| 1106 | .enqueue_task = enqueue_task_fair, | ||
| 1107 | .dequeue_task = dequeue_task_fair, | ||
| 1108 | .yield_task = yield_task_fair, | ||
| 1109 | |||
| 1110 | .check_preempt_curr = check_preempt_curr_fair, | ||
| 1111 | |||
| 1112 | .pick_next_task = pick_next_task_fair, | ||
| 1113 | .put_prev_task = put_prev_task_fair, | ||
| 1114 | |||
| 1115 | .load_balance = load_balance_fair, | ||
| 1116 | |||
| 1117 | .set_curr_task = set_curr_task_fair, | ||
| 1118 | .task_tick = task_tick_fair, | ||
| 1119 | .task_new = task_new_fair, | ||
| 1120 | }; | ||
| 1121 | |||
| 1122 | #ifdef CONFIG_SCHED_DEBUG | ||
| 1123 | void print_cfs_stats(struct seq_file *m, int cpu, u64 now) | ||
| 1124 | { | ||
| 1125 | struct rq *rq = cpu_rq(cpu); | ||
| 1126 | struct cfs_rq *cfs_rq; | ||
| 1127 | |||
| 1128 | for_each_leaf_cfs_rq(rq, cfs_rq) | ||
| 1129 | print_cfs_rq(m, cpu, cfs_rq, now); | ||
| 1130 | } | ||
| 1131 | #endif | ||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c new file mode 100644 index 000000000000..41841e741c4a --- /dev/null +++ b/kernel/sched_idletask.c | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | /* | ||
| 2 | * idle-task scheduling class. | ||
| 3 | * | ||
| 4 | * (NOTE: these are not related to SCHED_IDLE tasks which are | ||
| 5 | * handled in sched_fair.c) | ||
| 6 | */ | ||
| 7 | |||
| 8 | /* | ||
| 9 | * Idle tasks are unconditionally rescheduled: | ||
| 10 | */ | ||
| 11 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) | ||
| 12 | { | ||
| 13 | resched_task(rq->idle); | ||
| 14 | } | ||
| 15 | |||
| 16 | static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) | ||
| 17 | { | ||
| 18 | schedstat_inc(rq, sched_goidle); | ||
| 19 | |||
| 20 | return rq->idle; | ||
| 21 | } | ||
| 22 | |||
| 23 | /* | ||
| 24 | * It is not legal to sleep in the idle task - print a warning | ||
| 25 | * message if some code attempts to do it: | ||
| 26 | */ | ||
| 27 | static void | ||
| 28 | dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) | ||
| 29 | { | ||
| 30 | spin_unlock_irq(&rq->lock); | ||
| 31 | printk(KERN_ERR "bad: scheduling from the idle thread!\n"); | ||
| 32 | dump_stack(); | ||
| 33 | spin_lock_irq(&rq->lock); | ||
| 34 | } | ||
| 35 | |||
| 36 | static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) | ||
| 37 | { | ||
| 38 | } | ||
| 39 | |||
| 40 | static int | ||
| 41 | load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 42 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 43 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 44 | int *all_pinned, unsigned long *total_load_moved) | ||
| 45 | { | ||
| 46 | return 0; | ||
| 47 | } | ||
| 48 | |||
| 49 | static void task_tick_idle(struct rq *rq, struct task_struct *curr) | ||
| 50 | { | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Simple, special scheduling class for the per-CPU idle tasks: | ||
| 55 | */ | ||
| 56 | static struct sched_class idle_sched_class __read_mostly = { | ||
| 57 | /* no enqueue/yield_task for idle tasks */ | ||
| 58 | |||
| 59 | /* dequeue is not valid, we print a debug message there: */ | ||
| 60 | .dequeue_task = dequeue_task_idle, | ||
| 61 | |||
| 62 | .check_preempt_curr = check_preempt_curr_idle, | ||
| 63 | |||
| 64 | .pick_next_task = pick_next_task_idle, | ||
| 65 | .put_prev_task = put_prev_task_idle, | ||
| 66 | |||
| 67 | .load_balance = load_balance_idle, | ||
| 68 | |||
| 69 | .task_tick = task_tick_idle, | ||
| 70 | /* no .task_new for idle tasks */ | ||
| 71 | }; | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c new file mode 100644 index 000000000000..1192a2741b99 --- /dev/null +++ b/kernel/sched_rt.c | |||
| @@ -0,0 +1,255 @@ | |||
| 1 | /* | ||
| 2 | * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR | ||
| 3 | * policies) | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* | ||
| 7 | * Update the current task's runtime statistics. Skip current tasks that | ||
| 8 | * are not in our scheduling class. | ||
| 9 | */ | ||
| 10 | static inline void update_curr_rt(struct rq *rq, u64 now) | ||
| 11 | { | ||
| 12 | struct task_struct *curr = rq->curr; | ||
| 13 | u64 delta_exec; | ||
| 14 | |||
| 15 | if (!task_has_rt_policy(curr)) | ||
| 16 | return; | ||
| 17 | |||
| 18 | delta_exec = now - curr->se.exec_start; | ||
| 19 | if (unlikely((s64)delta_exec < 0)) | ||
| 20 | delta_exec = 0; | ||
| 21 | if (unlikely(delta_exec > curr->se.exec_max)) | ||
| 22 | curr->se.exec_max = delta_exec; | ||
| 23 | |||
| 24 | curr->se.sum_exec_runtime += delta_exec; | ||
| 25 | curr->se.exec_start = now; | ||
| 26 | } | ||
| 27 | |||
| 28 | static void | ||
| 29 | enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now) | ||
| 30 | { | ||
| 31 | struct rt_prio_array *array = &rq->rt.active; | ||
| 32 | |||
| 33 | list_add_tail(&p->run_list, array->queue + p->prio); | ||
| 34 | __set_bit(p->prio, array->bitmap); | ||
| 35 | } | ||
| 36 | |||
| 37 | /* | ||
| 38 | * Adding/removing a task to/from a priority array: | ||
| 39 | */ | ||
| 40 | static void | ||
| 41 | dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now) | ||
| 42 | { | ||
| 43 | struct rt_prio_array *array = &rq->rt.active; | ||
| 44 | |||
| 45 | update_curr_rt(rq, now); | ||
| 46 | |||
| 47 | list_del(&p->run_list); | ||
| 48 | if (list_empty(array->queue + p->prio)) | ||
| 49 | __clear_bit(p->prio, array->bitmap); | ||
| 50 | } | ||
| 51 | |||
| 52 | /* | ||
| 53 | * Put task to the end of the run list without the overhead of dequeue | ||
| 54 | * followed by enqueue. | ||
| 55 | */ | ||
| 56 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | ||
| 57 | { | ||
| 58 | struct rt_prio_array *array = &rq->rt.active; | ||
| 59 | |||
| 60 | list_move_tail(&p->run_list, array->queue + p->prio); | ||
| 61 | } | ||
| 62 | |||
| 63 | static void | ||
| 64 | yield_task_rt(struct rq *rq, struct task_struct *p) | ||
| 65 | { | ||
| 66 | requeue_task_rt(rq, p); | ||
| 67 | } | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Preempt the current task with a newly woken task if needed: | ||
| 71 | */ | ||
| 72 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | ||
| 73 | { | ||
| 74 | if (p->prio < rq->curr->prio) | ||
| 75 | resched_task(rq->curr); | ||
| 76 | } | ||
| 77 | |||
| 78 | static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) | ||
| 79 | { | ||
| 80 | struct rt_prio_array *array = &rq->rt.active; | ||
| 81 | struct task_struct *next; | ||
| 82 | struct list_head *queue; | ||
| 83 | int idx; | ||
| 84 | |||
| 85 | idx = sched_find_first_bit(array->bitmap); | ||
| 86 | if (idx >= MAX_RT_PRIO) | ||
| 87 | return NULL; | ||
| 88 | |||
| 89 | queue = array->queue + idx; | ||
| 90 | next = list_entry(queue->next, struct task_struct, run_list); | ||
| 91 | |||
| 92 | next->se.exec_start = now; | ||
| 93 | |||
| 94 | return next; | ||
| 95 | } | ||
| 96 | |||
| 97 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now) | ||
| 98 | { | ||
| 99 | update_curr_rt(rq, now); | ||
| 100 | p->se.exec_start = 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Load-balancing iterator. Note: while the runqueue stays locked | ||
| 105 | * during the whole iteration, the current task might be | ||
| 106 | * dequeued so the iterator has to be dequeue-safe. Here we | ||
| 107 | * achieve that by always pre-iterating before returning | ||
| 108 | * the current task: | ||
| 109 | */ | ||
| 110 | static struct task_struct *load_balance_start_rt(void *arg) | ||
| 111 | { | ||
| 112 | struct rq *rq = arg; | ||
| 113 | struct rt_prio_array *array = &rq->rt.active; | ||
| 114 | struct list_head *head, *curr; | ||
| 115 | struct task_struct *p; | ||
| 116 | int idx; | ||
| 117 | |||
| 118 | idx = sched_find_first_bit(array->bitmap); | ||
| 119 | if (idx >= MAX_RT_PRIO) | ||
| 120 | return NULL; | ||
| 121 | |||
| 122 | head = array->queue + idx; | ||
| 123 | curr = head->prev; | ||
| 124 | |||
| 125 | p = list_entry(curr, struct task_struct, run_list); | ||
| 126 | |||
| 127 | curr = curr->prev; | ||
| 128 | |||
| 129 | rq->rt.rt_load_balance_idx = idx; | ||
| 130 | rq->rt.rt_load_balance_head = head; | ||
| 131 | rq->rt.rt_load_balance_curr = curr; | ||
| 132 | |||
| 133 | return p; | ||
| 134 | } | ||
| 135 | |||
| 136 | static struct task_struct *load_balance_next_rt(void *arg) | ||
| 137 | { | ||
| 138 | struct rq *rq = arg; | ||
| 139 | struct rt_prio_array *array = &rq->rt.active; | ||
| 140 | struct list_head *head, *curr; | ||
| 141 | struct task_struct *p; | ||
| 142 | int idx; | ||
| 143 | |||
| 144 | idx = rq->rt.rt_load_balance_idx; | ||
| 145 | head = rq->rt.rt_load_balance_head; | ||
| 146 | curr = rq->rt.rt_load_balance_curr; | ||
| 147 | |||
| 148 | /* | ||
| 149 | * If we arrived back to the head again then | ||
| 150 | * iterate to the next queue (if any): | ||
| 151 | */ | ||
| 152 | if (unlikely(head == curr)) { | ||
| 153 | int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | ||
| 154 | |||
| 155 | if (next_idx >= MAX_RT_PRIO) | ||
| 156 | return NULL; | ||
| 157 | |||
| 158 | idx = next_idx; | ||
| 159 | head = array->queue + idx; | ||
| 160 | curr = head->prev; | ||
| 161 | |||
| 162 | rq->rt.rt_load_balance_idx = idx; | ||
| 163 | rq->rt.rt_load_balance_head = head; | ||
| 164 | } | ||
| 165 | |||
| 166 | p = list_entry(curr, struct task_struct, run_list); | ||
| 167 | |||
| 168 | curr = curr->prev; | ||
| 169 | |||
| 170 | rq->rt.rt_load_balance_curr = curr; | ||
| 171 | |||
| 172 | return p; | ||
| 173 | } | ||
| 174 | |||
| 175 | static int | ||
| 176 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 177 | unsigned long max_nr_move, unsigned long max_load_move, | ||
| 178 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 179 | int *all_pinned, unsigned long *load_moved) | ||
| 180 | { | ||
| 181 | int this_best_prio, best_prio, best_prio_seen = 0; | ||
| 182 | int nr_moved; | ||
| 183 | struct rq_iterator rt_rq_iterator; | ||
| 184 | |||
| 185 | best_prio = sched_find_first_bit(busiest->rt.active.bitmap); | ||
| 186 | this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Enable handling of the case where there is more than one task | ||
| 190 | * with the best priority. If the current running task is one | ||
| 191 | * of those with prio==best_prio we know it won't be moved | ||
| 192 | * and therefore it's safe to override the skip (based on load) | ||
| 193 | * of any task we find with that prio. | ||
| 194 | */ | ||
| 195 | if (busiest->curr->prio == best_prio) | ||
| 196 | best_prio_seen = 1; | ||
| 197 | |||
| 198 | rt_rq_iterator.start = load_balance_start_rt; | ||
| 199 | rt_rq_iterator.next = load_balance_next_rt; | ||
| 200 | /* pass 'busiest' rq argument into | ||
| 201 | * load_balance_[start|next]_rt iterators | ||
| 202 | */ | ||
| 203 | rt_rq_iterator.arg = busiest; | ||
| 204 | |||
| 205 | nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, | ||
| 206 | max_load_move, sd, idle, all_pinned, load_moved, | ||
| 207 | this_best_prio, best_prio, best_prio_seen, | ||
| 208 | &rt_rq_iterator); | ||
| 209 | |||
| 210 | return nr_moved; | ||
| 211 | } | ||
| 212 | |||
| 213 | static void task_tick_rt(struct rq *rq, struct task_struct *p) | ||
| 214 | { | ||
| 215 | /* | ||
| 216 | * RR tasks need a special form of timeslice management. | ||
| 217 | * FIFO tasks have no timeslices. | ||
| 218 | */ | ||
| 219 | if (p->policy != SCHED_RR) | ||
| 220 | return; | ||
| 221 | |||
| 222 | if (--p->time_slice) | ||
| 223 | return; | ||
| 224 | |||
| 225 | p->time_slice = static_prio_timeslice(p->static_prio); | ||
| 226 | set_tsk_need_resched(p); | ||
| 227 | |||
| 228 | /* put it at the end of the queue: */ | ||
| 229 | requeue_task_rt(rq, p); | ||
| 230 | } | ||
| 231 | |||
| 232 | /* | ||
| 233 | * No parent/child timeslice management necessary for RT tasks, | ||
| 234 | * just activate them: | ||
| 235 | */ | ||
| 236 | static void task_new_rt(struct rq *rq, struct task_struct *p) | ||
| 237 | { | ||
| 238 | activate_task(rq, p, 1); | ||
| 239 | } | ||
| 240 | |||
| 241 | static struct sched_class rt_sched_class __read_mostly = { | ||
| 242 | .enqueue_task = enqueue_task_rt, | ||
| 243 | .dequeue_task = dequeue_task_rt, | ||
| 244 | .yield_task = yield_task_rt, | ||
| 245 | |||
| 246 | .check_preempt_curr = check_preempt_curr_rt, | ||
| 247 | |||
| 248 | .pick_next_task = pick_next_task_rt, | ||
| 249 | .put_prev_task = put_prev_task_rt, | ||
| 250 | |||
| 251 | .load_balance = load_balance_rt, | ||
| 252 | |||
| 253 | .task_tick = task_tick_rt, | ||
| 254 | .task_new = task_new_rt, | ||
| 255 | }; | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h new file mode 100644 index 000000000000..c63c38f6fa6e --- /dev/null +++ b/kernel/sched_stats.h | |||
| @@ -0,0 +1,235 @@ | |||
| 1 | |||
| 2 | #ifdef CONFIG_SCHEDSTATS | ||
| 3 | /* | ||
| 4 | * bump this up when changing the output format or the meaning of an existing | ||
| 5 | * format, so that tools can adapt (or abort) | ||
| 6 | */ | ||
| 7 | #define SCHEDSTAT_VERSION 14 | ||
| 8 | |||
| 9 | static int show_schedstat(struct seq_file *seq, void *v) | ||
| 10 | { | ||
| 11 | int cpu; | ||
| 12 | |||
| 13 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
| 14 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
| 15 | for_each_online_cpu(cpu) { | ||
| 16 | struct rq *rq = cpu_rq(cpu); | ||
| 17 | #ifdef CONFIG_SMP | ||
| 18 | struct sched_domain *sd; | ||
| 19 | int dcnt = 0; | ||
| 20 | #endif | ||
| 21 | |||
| 22 | /* runqueue-specific stats */ | ||
| 23 | seq_printf(seq, | ||
| 24 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %llu %llu %lu", | ||
| 25 | cpu, rq->yld_both_empty, | ||
| 26 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, | ||
| 27 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, | ||
| 28 | rq->ttwu_cnt, rq->ttwu_local, | ||
| 29 | rq->rq_sched_info.cpu_time, | ||
| 30 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); | ||
| 31 | |||
| 32 | seq_printf(seq, "\n"); | ||
| 33 | |||
| 34 | #ifdef CONFIG_SMP | ||
| 35 | /* domain-specific stats */ | ||
| 36 | preempt_disable(); | ||
| 37 | for_each_domain(cpu, sd) { | ||
| 38 | enum cpu_idle_type itype; | ||
| 39 | char mask_str[NR_CPUS]; | ||
| 40 | |||
| 41 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); | ||
| 42 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | ||
| 43 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | ||
| 44 | itype++) { | ||
| 45 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " | ||
| 46 | "%lu", | ||
| 47 | sd->lb_cnt[itype], | ||
| 48 | sd->lb_balanced[itype], | ||
| 49 | sd->lb_failed[itype], | ||
| 50 | sd->lb_imbalance[itype], | ||
| 51 | sd->lb_gained[itype], | ||
| 52 | sd->lb_hot_gained[itype], | ||
| 53 | sd->lb_nobusyq[itype], | ||
| 54 | sd->lb_nobusyg[itype]); | ||
| 55 | } | ||
| 56 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" | ||
| 57 | " %lu %lu %lu\n", | ||
| 58 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | ||
| 59 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | ||
| 60 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | ||
| 61 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
| 62 | sd->ttwu_move_balance); | ||
| 63 | } | ||
| 64 | preempt_enable(); | ||
| 65 | #endif | ||
| 66 | } | ||
| 67 | return 0; | ||
| 68 | } | ||
| 69 | |||
| 70 | static int schedstat_open(struct inode *inode, struct file *file) | ||
| 71 | { | ||
| 72 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
| 73 | char *buf = kmalloc(size, GFP_KERNEL); | ||
| 74 | struct seq_file *m; | ||
| 75 | int res; | ||
| 76 | |||
| 77 | if (!buf) | ||
| 78 | return -ENOMEM; | ||
| 79 | res = single_open(file, show_schedstat, NULL); | ||
| 80 | if (!res) { | ||
| 81 | m = file->private_data; | ||
| 82 | m->buf = buf; | ||
| 83 | m->size = size; | ||
| 84 | } else | ||
| 85 | kfree(buf); | ||
| 86 | return res; | ||
| 87 | } | ||
| 88 | |||
| 89 | const struct file_operations proc_schedstat_operations = { | ||
| 90 | .open = schedstat_open, | ||
| 91 | .read = seq_read, | ||
| 92 | .llseek = seq_lseek, | ||
| 93 | .release = single_release, | ||
| 94 | }; | ||
| 95 | |||
| 96 | /* | ||
| 97 | * Expects runqueue lock to be held for atomicity of update | ||
| 98 | */ | ||
| 99 | static inline void | ||
| 100 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | ||
| 101 | { | ||
| 102 | if (rq) { | ||
| 103 | rq->rq_sched_info.run_delay += delta; | ||
| 104 | rq->rq_sched_info.pcnt++; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Expects runqueue lock to be held for atomicity of update | ||
| 110 | */ | ||
| 111 | static inline void | ||
| 112 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | ||
| 113 | { | ||
| 114 | if (rq) | ||
| 115 | rq->rq_sched_info.cpu_time += delta; | ||
| 116 | } | ||
| 117 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | ||
| 118 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | ||
| 119 | #else /* !CONFIG_SCHEDSTATS */ | ||
| 120 | static inline void | ||
| 121 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | ||
| 122 | {} | ||
| 123 | static inline void | ||
| 124 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | ||
| 125 | {} | ||
| 126 | # define schedstat_inc(rq, field) do { } while (0) | ||
| 127 | # define schedstat_add(rq, field, amt) do { } while (0) | ||
| 128 | #endif | ||
| 129 | |||
| 130 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | ||
| 131 | /* | ||
| 132 | * Called when a process is dequeued from the active array and given | ||
| 133 | * the cpu. We should note that with the exception of interactive | ||
| 134 | * tasks, the expired queue will become the active queue after the active | ||
| 135 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
| 136 | * expired queue. (Interactive tasks may be requeued directly to the | ||
| 137 | * active queue, thus delaying tasks in the expired queue from running; | ||
| 138 | * see scheduler_tick()). | ||
| 139 | * | ||
| 140 | * This function is only called from sched_info_arrive(), rather than | ||
| 141 | * dequeue_task(). Even though a task may be queued and dequeued multiple | ||
| 142 | * times as it is shuffled about, we're really interested in knowing how | ||
| 143 | * long it was from the *first* time it was queued to the time that it | ||
| 144 | * finally hit a cpu. | ||
| 145 | */ | ||
| 146 | static inline void sched_info_dequeued(struct task_struct *t) | ||
| 147 | { | ||
| 148 | t->sched_info.last_queued = 0; | ||
| 149 | } | ||
| 150 | |||
| 151 | /* | ||
| 152 | * Called when a task finally hits the cpu. We can now calculate how | ||
| 153 | * long it was waiting to run. We also note when it began so that we | ||
| 154 | * can keep stats on how long its timeslice is. | ||
| 155 | */ | ||
| 156 | static void sched_info_arrive(struct task_struct *t) | ||
| 157 | { | ||
| 158 | unsigned long long now = sched_clock(), delta = 0; | ||
| 159 | |||
| 160 | if (t->sched_info.last_queued) | ||
| 161 | delta = now - t->sched_info.last_queued; | ||
| 162 | sched_info_dequeued(t); | ||
| 163 | t->sched_info.run_delay += delta; | ||
| 164 | t->sched_info.last_arrival = now; | ||
| 165 | t->sched_info.pcnt++; | ||
| 166 | |||
| 167 | rq_sched_info_arrive(task_rq(t), delta); | ||
| 168 | } | ||
| 169 | |||
| 170 | /* | ||
| 171 | * Called when a process is queued into either the active or expired | ||
| 172 | * array. The time is noted and later used to determine how long we | ||
| 173 | * had to wait for us to reach the cpu. Since the expired queue will | ||
| 174 | * become the active queue after active queue is empty, without dequeuing | ||
| 175 | * and requeuing any tasks, we are interested in queuing to either. It | ||
| 176 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
| 177 | * requeued in the same or another array: this can happen in sched_yield(), | ||
| 178 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
| 179 | * to runqueue. | ||
| 180 | * | ||
| 181 | * This function is only called from enqueue_task(), but also only updates | ||
| 182 | * the timestamp if it is already not set. It's assumed that | ||
| 183 | * sched_info_dequeued() will clear that stamp when appropriate. | ||
| 184 | */ | ||
| 185 | static inline void sched_info_queued(struct task_struct *t) | ||
| 186 | { | ||
| 187 | if (unlikely(sched_info_on())) | ||
| 188 | if (!t->sched_info.last_queued) | ||
| 189 | t->sched_info.last_queued = sched_clock(); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | ||
| 193 | * Called when a process ceases being the active-running process, either | ||
| 194 | * voluntarily or involuntarily. Now we can calculate how long we ran. | ||
| 195 | */ | ||
| 196 | static inline void sched_info_depart(struct task_struct *t) | ||
| 197 | { | ||
| 198 | unsigned long long delta = sched_clock() - t->sched_info.last_arrival; | ||
| 199 | |||
| 200 | t->sched_info.cpu_time += delta; | ||
| 201 | rq_sched_info_depart(task_rq(t), delta); | ||
| 202 | } | ||
| 203 | |||
| 204 | /* | ||
| 205 | * Called when tasks are switched involuntarily due, typically, to expiring | ||
| 206 | * their time slice. (This may also be called when switching to or from | ||
| 207 | * the idle task.) We are only called when prev != next. | ||
| 208 | */ | ||
| 209 | static inline void | ||
| 210 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
| 211 | { | ||
| 212 | struct rq *rq = task_rq(prev); | ||
| 213 | |||
| 214 | /* | ||
| 215 | * prev now departs the cpu. It's not interesting to record | ||
| 216 | * stats about how efficient we were at scheduling the idle | ||
| 217 | * process, however. | ||
| 218 | */ | ||
| 219 | if (prev != rq->idle) | ||
| 220 | sched_info_depart(prev); | ||
| 221 | |||
| 222 | if (next != rq->idle) | ||
| 223 | sched_info_arrive(next); | ||
| 224 | } | ||
| 225 | static inline void | ||
| 226 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
| 227 | { | ||
| 228 | if (unlikely(sched_info_on())) | ||
| 229 | __sched_info_switch(prev, next); | ||
| 230 | } | ||
| 231 | #else | ||
| 232 | #define sched_info_queued(t) do { } while (0) | ||
| 233 | #define sched_info_switch(t, next) do { } while (0) | ||
| 234 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | ||
| 235 | |||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 0b9886a00e74..73217a9e2875 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -488,7 +488,6 @@ void __init softirq_init(void) | |||
| 488 | 488 | ||
| 489 | static int ksoftirqd(void * __bind_cpu) | 489 | static int ksoftirqd(void * __bind_cpu) |
| 490 | { | 490 | { |
| 491 | set_user_nice(current, 19); | ||
| 492 | current->flags |= PF_NOFREEZE; | 491 | current->flags |= PF_NOFREEZE; |
| 493 | 492 | ||
| 494 | set_current_state(TASK_INTERRUPTIBLE); | 493 | set_current_state(TASK_INTERRUPTIBLE); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 30ee462ee79f..51f5dac42a00 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -206,7 +206,87 @@ static ctl_table root_table[] = { | |||
| 206 | { .ctl_name = 0 } | 206 | { .ctl_name = 0 } |
| 207 | }; | 207 | }; |
| 208 | 208 | ||
| 209 | #ifdef CONFIG_SCHED_DEBUG | ||
| 210 | static unsigned long min_sched_granularity_ns = 100000; /* 100 usecs */ | ||
| 211 | static unsigned long max_sched_granularity_ns = 1000000000; /* 1 second */ | ||
| 212 | static unsigned long min_wakeup_granularity_ns; /* 0 usecs */ | ||
| 213 | static unsigned long max_wakeup_granularity_ns = 1000000000; /* 1 second */ | ||
| 214 | #endif | ||
| 215 | |||
| 209 | static ctl_table kern_table[] = { | 216 | static ctl_table kern_table[] = { |
| 217 | #ifdef CONFIG_SCHED_DEBUG | ||
| 218 | { | ||
| 219 | .ctl_name = CTL_UNNUMBERED, | ||
| 220 | .procname = "sched_granularity_ns", | ||
| 221 | .data = &sysctl_sched_granularity, | ||
| 222 | .maxlen = sizeof(unsigned int), | ||
| 223 | .mode = 0644, | ||
| 224 | .proc_handler = &proc_dointvec_minmax, | ||
| 225 | .strategy = &sysctl_intvec, | ||
| 226 | .extra1 = &min_sched_granularity_ns, | ||
| 227 | .extra2 = &max_sched_granularity_ns, | ||
| 228 | }, | ||
| 229 | { | ||
| 230 | .ctl_name = CTL_UNNUMBERED, | ||
| 231 | .procname = "sched_wakeup_granularity_ns", | ||
| 232 | .data = &sysctl_sched_wakeup_granularity, | ||
| 233 | .maxlen = sizeof(unsigned int), | ||
| 234 | .mode = 0644, | ||
| 235 | .proc_handler = &proc_dointvec_minmax, | ||
| 236 | .strategy = &sysctl_intvec, | ||
| 237 | .extra1 = &min_wakeup_granularity_ns, | ||
| 238 | .extra2 = &max_wakeup_granularity_ns, | ||
| 239 | }, | ||
| 240 | { | ||
| 241 | .ctl_name = CTL_UNNUMBERED, | ||
| 242 | .procname = "sched_batch_wakeup_granularity_ns", | ||
| 243 | .data = &sysctl_sched_batch_wakeup_granularity, | ||
| 244 | .maxlen = sizeof(unsigned int), | ||
| 245 | .mode = 0644, | ||
| 246 | .proc_handler = &proc_dointvec_minmax, | ||
| 247 | .strategy = &sysctl_intvec, | ||
| 248 | .extra1 = &min_wakeup_granularity_ns, | ||
| 249 | .extra2 = &max_wakeup_granularity_ns, | ||
| 250 | }, | ||
| 251 | { | ||
| 252 | .ctl_name = CTL_UNNUMBERED, | ||
| 253 | .procname = "sched_stat_granularity_ns", | ||
| 254 | .data = &sysctl_sched_stat_granularity, | ||
| 255 | .maxlen = sizeof(unsigned int), | ||
| 256 | .mode = 0644, | ||
| 257 | .proc_handler = &proc_dointvec_minmax, | ||
| 258 | .strategy = &sysctl_intvec, | ||
| 259 | .extra1 = &min_wakeup_granularity_ns, | ||
| 260 | .extra2 = &max_wakeup_granularity_ns, | ||
| 261 | }, | ||
| 262 | { | ||
| 263 | .ctl_name = CTL_UNNUMBERED, | ||
| 264 | .procname = "sched_runtime_limit_ns", | ||
| 265 | .data = &sysctl_sched_runtime_limit, | ||
| 266 | .maxlen = sizeof(unsigned int), | ||
| 267 | .mode = 0644, | ||
| 268 | .proc_handler = &proc_dointvec_minmax, | ||
| 269 | .strategy = &sysctl_intvec, | ||
| 270 | .extra1 = &min_sched_granularity_ns, | ||
| 271 | .extra2 = &max_sched_granularity_ns, | ||
| 272 | }, | ||
| 273 | { | ||
| 274 | .ctl_name = CTL_UNNUMBERED, | ||
| 275 | .procname = "sched_child_runs_first", | ||
| 276 | .data = &sysctl_sched_child_runs_first, | ||
| 277 | .maxlen = sizeof(unsigned int), | ||
| 278 | .mode = 0644, | ||
| 279 | .proc_handler = &proc_dointvec, | ||
| 280 | }, | ||
| 281 | { | ||
| 282 | .ctl_name = CTL_UNNUMBERED, | ||
| 283 | .procname = "sched_features", | ||
| 284 | .data = &sysctl_sched_features, | ||
| 285 | .maxlen = sizeof(unsigned int), | ||
| 286 | .mode = 0644, | ||
| 287 | .proc_handler = &proc_dointvec, | ||
| 288 | }, | ||
| 289 | #endif | ||
| 210 | { | 290 | { |
| 211 | .ctl_name = KERN_PANIC, | 291 | .ctl_name = KERN_PANIC, |
| 212 | .procname = "panic", | 292 | .procname = "panic", |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index da95e10cfd70..fab32a286371 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -105,6 +105,15 @@ config DETECT_SOFTLOCKUP | |||
| 105 | can be detected via the NMI-watchdog, on platforms that | 105 | can be detected via the NMI-watchdog, on platforms that |
| 106 | support it.) | 106 | support it.) |
| 107 | 107 | ||
| 108 | config SCHED_DEBUG | ||
| 109 | bool "Collect scheduler debugging info" | ||
| 110 | depends on DEBUG_KERNEL && PROC_FS | ||
| 111 | default y | ||
| 112 | help | ||
| 113 | If you say Y here, the /proc/sched_debug file will be provided | ||
| 114 | that can help debug the scheduler. The runtime overhead of this | ||
| 115 | option is minimal. | ||
| 116 | |||
| 108 | config SCHEDSTATS | 117 | config SCHEDSTATS |
| 109 | bool "Collect scheduler statistics" | 118 | bool "Collect scheduler statistics" |
| 110 | depends on DEBUG_KERNEL && PROC_FS | 119 | depends on DEBUG_KERNEL && PROC_FS |
