diff options
163 files changed, 9612 insertions, 4547 deletions
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 09dd510c4a5f..576ce463cf44 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt | |||
@@ -78,7 +78,8 @@ Identifying GPIOs | |||
78 | ----------------- | 78 | ----------------- |
79 | GPIOs are identified by unsigned integers in the range 0..MAX_INT. That | 79 | GPIOs are identified by unsigned integers in the range 0..MAX_INT. That |
80 | reserves "negative" numbers for other purposes like marking signals as | 80 | reserves "negative" numbers for other purposes like marking signals as |
81 | "not available on this board", or indicating faults. | 81 | "not available on this board", or indicating faults. Code that doesn't |
82 | touch the underlying hardware treats these integers as opaque cookies. | ||
82 | 83 | ||
83 | Platforms define how they use those integers, and usually #define symbols | 84 | Platforms define how they use those integers, and usually #define symbols |
84 | for the GPIO lines so that board-specific setup code directly corresponds | 85 | for the GPIO lines so that board-specific setup code directly corresponds |
@@ -139,10 +140,10 @@ issues including wire-OR and output latencies. | |||
139 | The get/set calls have no error returns because "invalid GPIO" should have | 140 | The get/set calls have no error returns because "invalid GPIO" should have |
140 | been reported earlier in gpio_set_direction(). However, note that not all | 141 | been reported earlier in gpio_set_direction(). However, note that not all |
141 | platforms can read the value of output pins; those that can't should always | 142 | platforms can read the value of output pins; those that can't should always |
142 | return zero. Also, these calls will be ignored for GPIOs that can't safely | 143 | return zero. Also, using these calls for GPIOs that can't safely be accessed |
143 | be accessed wihtout sleeping (see below). | 144 | without sleeping (see below) is an error. |
144 | 145 | ||
145 | Platform-specific implementations are encouraged to optimise the two | 146 | Platform-specific implementations are encouraged to optimize the two |
146 | calls to access the GPIO value in cases where the GPIO number (and for | 147 | calls to access the GPIO value in cases where the GPIO number (and for |
147 | output, value) are constant. It's normal for them to need only a couple | 148 | output, value) are constant. It's normal for them to need only a couple |
148 | of instructions in such cases (reading or writing a hardware register), | 149 | of instructions in such cases (reading or writing a hardware register), |
@@ -239,7 +240,8 @@ options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are | |||
239 | system wakeup capabilities. | 240 | system wakeup capabilities. |
240 | 241 | ||
241 | Non-error values returned from irq_to_gpio() would most commonly be used | 242 | Non-error values returned from irq_to_gpio() would most commonly be used |
242 | with gpio_get_value(). | 243 | with gpio_get_value(), for example to initialize or update driver state |
244 | when the IRQ is edge-triggered. | ||
243 | 245 | ||
244 | 246 | ||
245 | 247 | ||
@@ -260,9 +262,10 @@ pullups (or pulldowns) so that the on-chip ones should not be used. | |||
260 | There are other system-specific mechanisms that are not specified here, | 262 | There are other system-specific mechanisms that are not specified here, |
261 | like the aforementioned options for input de-glitching and wire-OR output. | 263 | like the aforementioned options for input de-glitching and wire-OR output. |
262 | Hardware may support reading or writing GPIOs in gangs, but that's usually | 264 | Hardware may support reading or writing GPIOs in gangs, but that's usually |
263 | configuration dependednt: for GPIOs sharing the same bank. (GPIOs are | 265 | configuration dependent: for GPIOs sharing the same bank. (GPIOs are |
264 | commonly grouped in banks of 16 or 32, with a given SOC having several such | 266 | commonly grouped in banks of 16 or 32, with a given SOC having several such |
265 | banks.) Code relying on such mechanisms will necessarily be nonportable. | 267 | banks.) Some systems can trigger IRQs from output GPIOs. Code relying on |
268 | such mechanisms will necessarily be nonportable. | ||
266 | 269 | ||
267 | Dynamic definition of GPIOs is not currently supported; for example, as | 270 | Dynamic definition of GPIOs is not currently supported; for example, as |
268 | a side effect of configuring an add-on board with some GPIO expanders. | 271 | a side effect of configuring an add-on board with some GPIO expanders. |
diff --git a/Documentation/hrtimer/timer_stats.txt b/Documentation/hrtimer/timer_stats.txt new file mode 100644 index 000000000000..27f782e3593f --- /dev/null +++ b/Documentation/hrtimer/timer_stats.txt | |||
@@ -0,0 +1,68 @@ | |||
1 | timer_stats - timer usage statistics | ||
2 | ------------------------------------ | ||
3 | |||
4 | timer_stats is a debugging facility to make the timer (ab)usage in a Linux | ||
5 | system visible to kernel and userspace developers. It is not intended for | ||
6 | production usage as it adds significant overhead to the (hr)timer code and the | ||
7 | (hr)timer data structures. | ||
8 | |||
9 | timer_stats should be used by kernel and userspace developers to verify that | ||
10 | their code does not make unduly use of timers. This helps to avoid unnecessary | ||
11 | wakeups, which should be avoided to optimize power consumption. | ||
12 | |||
13 | It can be enabled by CONFIG_TIMER_STATS in the "Kernel hacking" configuration | ||
14 | section. | ||
15 | |||
16 | timer_stats collects information about the timer events which are fired in a | ||
17 | Linux system over a sample period: | ||
18 | |||
19 | - the pid of the task(process) which initialized the timer | ||
20 | - the name of the process which initialized the timer | ||
21 | - the function where the timer was intialized | ||
22 | - the callback function which is associated to the timer | ||
23 | - the number of events (callbacks) | ||
24 | |||
25 | timer_stats adds an entry to /proc: /proc/timer_stats | ||
26 | |||
27 | This entry is used to control the statistics functionality and to read out the | ||
28 | sampled information. | ||
29 | |||
30 | The timer_stats functionality is inactive on bootup. | ||
31 | |||
32 | To activate a sample period issue: | ||
33 | # echo 1 >/proc/timer_stats | ||
34 | |||
35 | To stop a sample period issue: | ||
36 | # echo 0 >/proc/timer_stats | ||
37 | |||
38 | The statistics can be retrieved by: | ||
39 | # cat /proc/timer_stats | ||
40 | |||
41 | The readout of /proc/timer_stats automatically disables sampling. The sampled | ||
42 | information is kept until a new sample period is started. This allows multiple | ||
43 | readouts. | ||
44 | |||
45 | Sample output of /proc/timer_stats: | ||
46 | |||
47 | Timerstats sample period: 3.888770 s | ||
48 | 12, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) | ||
49 | 15, 1 swapper hcd_submit_urb (rh_timer_func) | ||
50 | 4, 959 kedac schedule_timeout (process_timeout) | ||
51 | 1, 0 swapper page_writeback_init (wb_timer_fn) | ||
52 | 28, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) | ||
53 | 22, 2948 IRQ 4 tty_flip_buffer_push (delayed_work_timer_fn) | ||
54 | 3, 3100 bash schedule_timeout (process_timeout) | ||
55 | 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) | ||
56 | 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) | ||
57 | 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) | ||
58 | 1, 2292 ip __netdev_watchdog_up (dev_watchdog) | ||
59 | 1, 23 events/1 do_cache_clean (delayed_work_timer_fn) | ||
60 | 90 total events, 30.0 events/sec | ||
61 | |||
62 | The first column is the number of events, the second column the pid, the third | ||
63 | column is the name of the process. The forth column shows the function which | ||
64 | initialized the timer and in parantheses the callback function which was | ||
65 | executed on expiry. | ||
66 | |||
67 | Thomas, Ingo | ||
68 | |||
diff --git a/Documentation/hrtimers/highres.txt b/Documentation/hrtimers/highres.txt new file mode 100644 index 000000000000..ce0e9a91e157 --- /dev/null +++ b/Documentation/hrtimers/highres.txt | |||
@@ -0,0 +1,249 @@ | |||
1 | High resolution timers and dynamic ticks design notes | ||
2 | ----------------------------------------------------- | ||
3 | |||
4 | Further information can be found in the paper of the OLS 2006 talk "hrtimers | ||
5 | and beyond". The paper is part of the OLS 2006 Proceedings Volume 1, which can | ||
6 | be found on the OLS website: | ||
7 | http://www.linuxsymposium.org/2006/linuxsymposium_procv1.pdf | ||
8 | |||
9 | The slides to this talk are available from: | ||
10 | http://tglx.de/projects/hrtimers/ols2006-hrtimers.pdf | ||
11 | |||
12 | The slides contain five figures (pages 2, 15, 18, 20, 22), which illustrate the | ||
13 | changes in the time(r) related Linux subsystems. Figure #1 (p. 2) shows the | ||
14 | design of the Linux time(r) system before hrtimers and other building blocks | ||
15 | got merged into mainline. | ||
16 | |||
17 | Note: the paper and the slides are talking about "clock event source", while we | ||
18 | switched to the name "clock event devices" in meantime. | ||
19 | |||
20 | The design contains the following basic building blocks: | ||
21 | |||
22 | - hrtimer base infrastructure | ||
23 | - timeofday and clock source management | ||
24 | - clock event management | ||
25 | - high resolution timer functionality | ||
26 | - dynamic ticks | ||
27 | |||
28 | |||
29 | hrtimer base infrastructure | ||
30 | --------------------------- | ||
31 | |||
32 | The hrtimer base infrastructure was merged into the 2.6.16 kernel. Details of | ||
33 | the base implementation are covered in Documentation/hrtimers/hrtimer.txt. See | ||
34 | also figure #2 (OLS slides p. 15) | ||
35 | |||
36 | The main differences to the timer wheel, which holds the armed timer_list type | ||
37 | timers are: | ||
38 | - time ordered enqueueing into a rb-tree | ||
39 | - independent of ticks (the processing is based on nanoseconds) | ||
40 | |||
41 | |||
42 | timeofday and clock source management | ||
43 | ------------------------------------- | ||
44 | |||
45 | John Stultz's Generic Time Of Day (GTOD) framework moves a large portion of | ||
46 | code out of the architecture-specific areas into a generic management | ||
47 | framework, as illustrated in figure #3 (OLS slides p. 18). The architecture | ||
48 | specific portion is reduced to the low level hardware details of the clock | ||
49 | sources, which are registered in the framework and selected on a quality based | ||
50 | decision. The low level code provides hardware setup and readout routines and | ||
51 | initializes data structures, which are used by the generic time keeping code to | ||
52 | convert the clock ticks to nanosecond based time values. All other time keeping | ||
53 | related functionality is moved into the generic code. The GTOD base patch got | ||
54 | merged into the 2.6.18 kernel. | ||
55 | |||
56 | Further information about the Generic Time Of Day framework is available in the | ||
57 | OLS 2005 Proceedings Volume 1: | ||
58 | http://www.linuxsymposium.org/2005/linuxsymposium_procv1.pdf | ||
59 | |||
60 | The paper "We Are Not Getting Any Younger: A New Approach to Time and | ||
61 | Timers" was written by J. Stultz, D.V. Hart, & N. Aravamudan. | ||
62 | |||
63 | Figure #3 (OLS slides p.18) illustrates the transformation. | ||
64 | |||
65 | |||
66 | clock event management | ||
67 | ---------------------- | ||
68 | |||
69 | While clock sources provide read access to the monotonically increasing time | ||
70 | value, clock event devices are used to schedule the next event | ||
71 | interrupt(s). The next event is currently defined to be periodic, with its | ||
72 | period defined at compile time. The setup and selection of the event device | ||
73 | for various event driven functionalities is hardwired into the architecture | ||
74 | dependent code. This results in duplicated code across all architectures and | ||
75 | makes it extremely difficult to change the configuration of the system to use | ||
76 | event interrupt devices other than those already built into the | ||
77 | architecture. Another implication of the current design is that it is necessary | ||
78 | to touch all the architecture-specific implementations in order to provide new | ||
79 | functionality like high resolution timers or dynamic ticks. | ||
80 | |||
81 | The clock events subsystem tries to address this problem by providing a generic | ||
82 | solution to manage clock event devices and their usage for the various clock | ||
83 | event driven kernel functionalities. The goal of the clock event subsystem is | ||
84 | to minimize the clock event related architecture dependent code to the pure | ||
85 | hardware related handling and to allow easy addition and utilization of new | ||
86 | clock event devices. It also minimizes the duplicated code across the | ||
87 | architectures as it provides generic functionality down to the interrupt | ||
88 | service handler, which is almost inherently hardware dependent. | ||
89 | |||
90 | Clock event devices are registered either by the architecture dependent boot | ||
91 | code or at module insertion time. Each clock event device fills a data | ||
92 | structure with clock-specific property parameters and callback functions. The | ||
93 | clock event management decides, by using the specified property parameters, the | ||
94 | set of system functions a clock event device will be used to support. This | ||
95 | includes the distinction of per-CPU and per-system global event devices. | ||
96 | |||
97 | System-level global event devices are used for the Linux periodic tick. Per-CPU | ||
98 | event devices are used to provide local CPU functionality such as process | ||
99 | accounting, profiling, and high resolution timers. | ||
100 | |||
101 | The management layer assignes one or more of the folliwing functions to a clock | ||
102 | event device: | ||
103 | - system global periodic tick (jiffies update) | ||
104 | - cpu local update_process_times | ||
105 | - cpu local profiling | ||
106 | - cpu local next event interrupt (non periodic mode) | ||
107 | |||
108 | The clock event device delegates the selection of those timer interrupt related | ||
109 | functions completely to the management layer. The clock management layer stores | ||
110 | a function pointer in the device description structure, which has to be called | ||
111 | from the hardware level handler. This removes a lot of duplicated code from the | ||
112 | architecture specific timer interrupt handlers and hands the control over the | ||
113 | clock event devices and the assignment of timer interrupt related functionality | ||
114 | to the core code. | ||
115 | |||
116 | The clock event layer API is rather small. Aside from the clock event device | ||
117 | registration interface it provides functions to schedule the next event | ||
118 | interrupt, clock event device notification service and support for suspend and | ||
119 | resume. | ||
120 | |||
121 | The framework adds about 700 lines of code which results in a 2KB increase of | ||
122 | the kernel binary size. The conversion of i386 removes about 100 lines of | ||
123 | code. The binary size decrease is in the range of 400 byte. We believe that the | ||
124 | increase of flexibility and the avoidance of duplicated code across | ||
125 | architectures justifies the slight increase of the binary size. | ||
126 | |||
127 | The conversion of an architecture has no functional impact, but allows to | ||
128 | utilize the high resolution and dynamic tick functionalites without any change | ||
129 | to the clock event device and timer interrupt code. After the conversion the | ||
130 | enabling of high resolution timers and dynamic ticks is simply provided by | ||
131 | adding the kernel/time/Kconfig file to the architecture specific Kconfig and | ||
132 | adding the dynamic tick specific calls to the idle routine (a total of 3 lines | ||
133 | added to the idle function and the Kconfig file) | ||
134 | |||
135 | Figure #4 (OLS slides p.20) illustrates the transformation. | ||
136 | |||
137 | |||
138 | high resolution timer functionality | ||
139 | ----------------------------------- | ||
140 | |||
141 | During system boot it is not possible to use the high resolution timer | ||
142 | functionality, while making it possible would be difficult and would serve no | ||
143 | useful function. The initialization of the clock event device framework, the | ||
144 | clock source framework (GTOD) and hrtimers itself has to be done and | ||
145 | appropriate clock sources and clock event devices have to be registered before | ||
146 | the high resolution functionality can work. Up to the point where hrtimers are | ||
147 | initialized, the system works in the usual low resolution periodic mode. The | ||
148 | clock source and the clock event device layers provide notification functions | ||
149 | which inform hrtimers about availability of new hardware. hrtimers validates | ||
150 | the usability of the registered clock sources and clock event devices before | ||
151 | switching to high resolution mode. This ensures also that a kernel which is | ||
152 | configured for high resolution timers can run on a system which lacks the | ||
153 | necessary hardware support. | ||
154 | |||
155 | The high resolution timer code does not support SMP machines which have only | ||
156 | global clock event devices. The support of such hardware would involve IPI | ||
157 | calls when an interrupt happens. The overhead would be much larger than the | ||
158 | benefit. This is the reason why we currently disable high resolution and | ||
159 | dynamic ticks on i386 SMP systems which stop the local APIC in C3 power | ||
160 | state. A workaround is available as an idea, but the problem has not been | ||
161 | tackled yet. | ||
162 | |||
163 | The time ordered insertion of timers provides all the infrastructure to decide | ||
164 | whether the event device has to be reprogrammed when a timer is added. The | ||
165 | decision is made per timer base and synchronized across per-cpu timer bases in | ||
166 | a support function. The design allows the system to utilize separate per-CPU | ||
167 | clock event devices for the per-CPU timer bases, but currently only one | ||
168 | reprogrammable clock event device per-CPU is utilized. | ||
169 | |||
170 | When the timer interrupt happens, the next event interrupt handler is called | ||
171 | from the clock event distribution code and moves expired timers from the | ||
172 | red-black tree to a separate double linked list and invokes the softirq | ||
173 | handler. An additional mode field in the hrtimer structure allows the system to | ||
174 | execute callback functions directly from the next event interrupt handler. This | ||
175 | is restricted to code which can safely be executed in the hard interrupt | ||
176 | context. This applies, for example, to the common case of a wakeup function as | ||
177 | used by nanosleep. The advantage of executing the handler in the interrupt | ||
178 | context is the avoidance of up to two context switches - from the interrupted | ||
179 | context to the softirq and to the task which is woken up by the expired | ||
180 | timer. | ||
181 | |||
182 | Once a system has switched to high resolution mode, the periodic tick is | ||
183 | switched off. This disables the per system global periodic clock event device - | ||
184 | e.g. the PIT on i386 SMP systems. | ||
185 | |||
186 | The periodic tick functionality is provided by an per-cpu hrtimer. The callback | ||
187 | function is executed in the next event interrupt context and updates jiffies | ||
188 | and calls update_process_times and profiling. The implementation of the hrtimer | ||
189 | based periodic tick is designed to be extended with dynamic tick functionality. | ||
190 | This allows to use a single clock event device to schedule high resolution | ||
191 | timer and periodic events (jiffies tick, profiling, process accounting) on UP | ||
192 | systems. This has been proved to work with the PIT on i386 and the Incrementer | ||
193 | on PPC. | ||
194 | |||
195 | The softirq for running the hrtimer queues and executing the callbacks has been | ||
196 | separated from the tick bound timer softirq to allow accurate delivery of high | ||
197 | resolution timer signals which are used by itimer and POSIX interval | ||
198 | timers. The execution of this softirq can still be delayed by other softirqs, | ||
199 | but the overall latencies have been significantly improved by this separation. | ||
200 | |||
201 | Figure #5 (OLS slides p.22) illustrates the transformation. | ||
202 | |||
203 | |||
204 | dynamic ticks | ||
205 | ------------- | ||
206 | |||
207 | Dynamic ticks are the logical consequence of the hrtimer based periodic tick | ||
208 | replacement (sched_tick). The functionality of the sched_tick hrtimer is | ||
209 | extended by three functions: | ||
210 | |||
211 | - hrtimer_stop_sched_tick | ||
212 | - hrtimer_restart_sched_tick | ||
213 | - hrtimer_update_jiffies | ||
214 | |||
215 | hrtimer_stop_sched_tick() is called when a CPU goes into idle state. The code | ||
216 | evaluates the next scheduled timer event (from both hrtimers and the timer | ||
217 | wheel) and in case that the next event is further away than the next tick it | ||
218 | reprograms the sched_tick to this future event, to allow longer idle sleeps | ||
219 | without worthless interruption by the periodic tick. The function is also | ||
220 | called when an interrupt happens during the idle period, which does not cause a | ||
221 | reschedule. The call is necessary as the interrupt handler might have armed a | ||
222 | new timer whose expiry time is before the time which was identified as the | ||
223 | nearest event in the previous call to hrtimer_stop_sched_tick. | ||
224 | |||
225 | hrtimer_restart_sched_tick() is called when the CPU leaves the idle state before | ||
226 | it calls schedule(). hrtimer_restart_sched_tick() resumes the periodic tick, | ||
227 | which is kept active until the next call to hrtimer_stop_sched_tick(). | ||
228 | |||
229 | hrtimer_update_jiffies() is called from irq_enter() when an interrupt happens | ||
230 | in the idle period to make sure that jiffies are up to date and the interrupt | ||
231 | handler has not to deal with an eventually stale jiffy value. | ||
232 | |||
233 | The dynamic tick feature provides statistical values which are exported to | ||
234 | userspace via /proc/stats and can be made available for enhanced power | ||
235 | management control. | ||
236 | |||
237 | The implementation leaves room for further development like full tickless | ||
238 | systems, where the time slice is controlled by the scheduler, variable | ||
239 | frequency profiling, and a complete removal of jiffies in the future. | ||
240 | |||
241 | |||
242 | Aside the current initial submission of i386 support, the patchset has been | ||
243 | extended to x86_64 and ARM already. Initial (work in progress) support is also | ||
244 | available for MIPS and PowerPC. | ||
245 | |||
246 | Thomas, Ingo | ||
247 | |||
248 | |||
249 | |||
diff --git a/Documentation/hrtimers.txt b/Documentation/hrtimers/hrtimers.txt index ce31f65e12e7..ce31f65e12e7 100644 --- a/Documentation/hrtimers.txt +++ b/Documentation/hrtimers/hrtimers.txt | |||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 22b19962a1a2..abd575cfc759 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -609,6 +609,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
609 | highmem otherwise. This also works to reduce highmem | 609 | highmem otherwise. This also works to reduce highmem |
610 | size on bigger boxes. | 610 | size on bigger boxes. |
611 | 611 | ||
612 | highres= [KNL] Enable/disable high resolution timer mode. | ||
613 | Valid parameters: "on", "off" | ||
614 | Default: "on" | ||
615 | |||
612 | hisax= [HW,ISDN] | 616 | hisax= [HW,ISDN] |
613 | See Documentation/isdn/README.HiSax. | 617 | See Documentation/isdn/README.HiSax. |
614 | 618 | ||
@@ -1078,6 +1082,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1078 | in certain environments such as networked servers or | 1082 | in certain environments such as networked servers or |
1079 | real-time systems. | 1083 | real-time systems. |
1080 | 1084 | ||
1085 | nohz= [KNL] Boottime enable/disable dynamic ticks | ||
1086 | Valid arguments: on, off | ||
1087 | Default: on | ||
1088 | |||
1081 | noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing | 1089 | noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing |
1082 | 1090 | ||
1083 | noirqdebug [IA-32] Disables the code which attempts to detect and | 1091 | noirqdebug [IA-32] Disables the code which attempts to detect and |
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ec01f08f5642..e101846ab7dd 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c | |||
@@ -159,8 +159,7 @@ void __init init_IRQ(void) | |||
159 | int irq; | 159 | int irq; |
160 | 160 | ||
161 | for (irq = 0; irq < NR_IRQS; irq++) | 161 | for (irq = 0; irq < NR_IRQS; irq++) |
162 | irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_DELAYED_DISABLE | | 162 | irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE; |
163 | IRQ_NOPROBE; | ||
164 | 163 | ||
165 | #ifdef CONFIG_SMP | 164 | #ifdef CONFIG_SMP |
166 | bad_irq_desc.affinity = CPU_MASK_ALL; | 165 | bad_irq_desc.affinity = CPU_MASK_ALL; |
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index 40039b2a90b3..2703a730baf7 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c | |||
@@ -87,7 +87,7 @@ static struct clocksource clocksource_imx = { | |||
87 | .read = imx_get_cycles, | 87 | .read = imx_get_cycles, |
88 | .mask = 0xFFFFFFFF, | 88 | .mask = 0xFFFFFFFF, |
89 | .shift = 20, | 89 | .shift = 20, |
90 | .is_continuous = 1, | 90 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
91 | }; | 91 | }; |
92 | 92 | ||
93 | static int __init imx_clocksource_init(void) | 93 | static int __init imx_clocksource_init(void) |
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 2ec9a9e9a04d..45068c3d8dcc 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c | |||
@@ -395,7 +395,7 @@ static struct clocksource clocksource_ixp4xx = { | |||
395 | .read = ixp4xx_get_cycles, | 395 | .read = ixp4xx_get_cycles, |
396 | .mask = CLOCKSOURCE_MASK(32), | 396 | .mask = CLOCKSOURCE_MASK(32), |
397 | .shift = 20, | 397 | .shift = 20, |
398 | .is_continuous = 1, | 398 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
399 | }; | 399 | }; |
400 | 400 | ||
401 | unsigned long ixp4xx_timer_freq = FREQ; | 401 | unsigned long ixp4xx_timer_freq = FREQ; |
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index 5773b55ef4a6..7e132fcccd47 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c | |||
@@ -62,7 +62,7 @@ static struct clocksource clocksource_netx = { | |||
62 | .read = netx_get_cycles, | 62 | .read = netx_get_cycles, |
63 | .mask = CLOCKSOURCE_MASK(32), | 63 | .mask = CLOCKSOURCE_MASK(32), |
64 | .shift = 20, | 64 | .shift = 20, |
65 | .is_continuous = 1, | 65 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
66 | }; | 66 | }; |
67 | 67 | ||
68 | /* | 68 | /* |
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index ee2beb400414..fc3b82a740a0 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c | |||
@@ -112,7 +112,7 @@ static struct clocksource clocksource_pxa = { | |||
112 | .read = pxa_get_cycles, | 112 | .read = pxa_get_cycles, |
113 | .mask = CLOCKSOURCE_MASK(32), | 113 | .mask = CLOCKSOURCE_MASK(32), |
114 | .shift = 20, | 114 | .shift = 20, |
115 | .is_continuous = 1, | 115 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
116 | }; | 116 | }; |
117 | 117 | ||
118 | static void __init pxa_timer_init(void) | 118 | static void __init pxa_timer_init(void) |
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index a2f74affaa98..c10833f2ee0c 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c | |||
@@ -37,7 +37,7 @@ static struct clocksource clocksource_avr32 = { | |||
37 | .read = read_cycle_count, | 37 | .read = read_cycle_count, |
38 | .mask = CLOCKSOURCE_MASK(32), | 38 | .mask = CLOCKSOURCE_MASK(32), |
39 | .shift = 16, | 39 | .shift = 16, |
40 | .is_continuous = 1, | 40 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
41 | }; | 41 | }; |
42 | 42 | ||
43 | /* | 43 | /* |
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 595fb771366e..1df4a1f14289 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -18,6 +18,18 @@ config GENERIC_TIME | |||
18 | bool | 18 | bool |
19 | default y | 19 | default y |
20 | 20 | ||
21 | config CLOCKSOURCE_WATCHDOG | ||
22 | bool | ||
23 | default y | ||
24 | |||
25 | config GENERIC_CLOCKEVENTS | ||
26 | bool | ||
27 | default y | ||
28 | |||
29 | config GENERIC_CLOCKEVENTS_BROADCAST | ||
30 | bool | ||
31 | default y | ||
32 | |||
21 | config LOCKDEP_SUPPORT | 33 | config LOCKDEP_SUPPORT |
22 | bool | 34 | bool |
23 | default y | 35 | default y |
@@ -74,6 +86,8 @@ source "init/Kconfig" | |||
74 | 86 | ||
75 | menu "Processor type and features" | 87 | menu "Processor type and features" |
76 | 88 | ||
89 | source "kernel/time/Kconfig" | ||
90 | |||
77 | config SMP | 91 | config SMP |
78 | bool "Symmetric multi-processing support" | 92 | bool "Symmetric multi-processing support" |
79 | ---help--- | 93 | ---help--- |
@@ -205,7 +219,7 @@ config PARAVIRT | |||
205 | 219 | ||
206 | config VMI | 220 | config VMI |
207 | bool "VMI Paravirt-ops support" | 221 | bool "VMI Paravirt-ops support" |
208 | depends on PARAVIRT | 222 | depends on PARAVIRT && !NO_HZ |
209 | default y | 223 | default y |
210 | help | 224 | help |
211 | VMI provides a paravirtualized interface to multiple hypervisors | 225 | VMI provides a paravirtualized interface to multiple hypervisors |
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index cbe4e601885c..4ae3dcf1d2f0 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o | |||
18 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 18 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
19 | obj-$(CONFIG_MICROCODE) += microcode.o | 19 | obj-$(CONFIG_MICROCODE) += microcode.o |
20 | obj-$(CONFIG_APM) += apm.o | 20 | obj-$(CONFIG_APM) += apm.o |
21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o | 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o |
22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o |
@@ -32,7 +32,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o | |||
32 | obj-$(CONFIG_MODULES) += module.o | 32 | obj-$(CONFIG_MODULES) += module.o |
33 | obj-y += sysenter.o vsyscall.o | 33 | obj-y += sysenter.o vsyscall.o |
34 | obj-$(CONFIG_ACPI_SRAT) += srat.o | 34 | obj-$(CONFIG_ACPI_SRAT) += srat.o |
35 | obj-$(CONFIG_HPET_TIMER) += time_hpet.o | ||
36 | obj-$(CONFIG_EFI) += efi.o efi_stub.o | 35 | obj-$(CONFIG_EFI) += efi.o efi_stub.o |
37 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o | 36 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o |
38 | obj-$(CONFIG_VM86) += vm86.o | 37 | obj-$(CONFIG_VM86) += vm86.o |
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index e94aff6888ca..fb3e72328a5a 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/acpi.h> | 27 | #include <linux/acpi.h> |
28 | #include <linux/acpi_pmtmr.h> | ||
28 | #include <linux/efi.h> | 29 | #include <linux/efi.h> |
29 | #include <linux/cpumask.h> | 30 | #include <linux/cpumask.h> |
30 | #include <linux/module.h> | 31 | #include <linux/module.h> |
@@ -615,6 +616,7 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table) | |||
615 | } | 616 | } |
616 | 617 | ||
617 | #ifdef CONFIG_HPET_TIMER | 618 | #ifdef CONFIG_HPET_TIMER |
619 | #include <asm/hpet.h> | ||
618 | 620 | ||
619 | static int __init acpi_parse_hpet(struct acpi_table_header *table) | 621 | static int __init acpi_parse_hpet(struct acpi_table_header *table) |
620 | { | 622 | { |
@@ -645,24 +647,11 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
645 | hpet_res->end = (1 * 1024) - 1; | 647 | hpet_res->end = (1 * 1024) - 1; |
646 | } | 648 | } |
647 | 649 | ||
648 | #ifdef CONFIG_X86_64 | 650 | hpet_address = hpet_tbl->address.address; |
649 | vxtime.hpet_address = hpet_tbl->address.address; | ||
650 | |||
651 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | 651 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", |
652 | hpet_tbl->id, vxtime.hpet_address); | 652 | hpet_tbl->id, hpet_address); |
653 | |||
654 | res_start = vxtime.hpet_address; | ||
655 | #else /* X86 */ | ||
656 | { | ||
657 | extern unsigned long hpet_address; | ||
658 | |||
659 | hpet_address = hpet_tbl->address.address; | ||
660 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | ||
661 | hpet_tbl->id, hpet_address); | ||
662 | 653 | ||
663 | res_start = hpet_address; | 654 | res_start = hpet_address; |
664 | } | ||
665 | #endif /* X86 */ | ||
666 | 655 | ||
667 | if (hpet_res) { | 656 | if (hpet_res) { |
668 | hpet_res->start = res_start; | 657 | hpet_res->start = res_start; |
@@ -676,10 +665,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
676 | #define acpi_parse_hpet NULL | 665 | #define acpi_parse_hpet NULL |
677 | #endif | 666 | #endif |
678 | 667 | ||
679 | #ifdef CONFIG_X86_PM_TIMER | ||
680 | extern u32 pmtmr_ioport; | ||
681 | #endif | ||
682 | |||
683 | static int __init acpi_parse_fadt(struct acpi_table_header *table) | 668 | static int __init acpi_parse_fadt(struct acpi_table_header *table) |
684 | { | 669 | { |
685 | 670 | ||
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index f4159e0a7ae9..9655c233e6f1 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/kernel_stat.h> | 25 | #include <linux/kernel_stat.h> |
26 | #include <linux/sysdev.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
28 | #include <linux/clockchips.h> | ||
29 | #include <linux/acpi_pmtmr.h> | ||
28 | #include <linux/module.h> | 30 | #include <linux/module.h> |
29 | 31 | ||
30 | #include <asm/atomic.h> | 32 | #include <asm/atomic.h> |
@@ -45,128 +47,549 @@ | |||
45 | #include "io_ports.h" | 47 | #include "io_ports.h" |
46 | 48 | ||
47 | /* | 49 | /* |
48 | * cpu_mask that denotes the CPUs that needs timer interrupt coming in as | 50 | * Sanity check |
49 | * IPIs in place of local APIC timers | ||
50 | */ | 51 | */ |
51 | static cpumask_t timer_bcast_ipi; | 52 | #if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F |
53 | # error SPURIOUS_APIC_VECTOR definition error | ||
54 | #endif | ||
52 | 55 | ||
53 | /* | 56 | /* |
54 | * Knob to control our willingness to enable the local APIC. | 57 | * Knob to control our willingness to enable the local APIC. |
58 | * | ||
59 | * -1=force-disable, +1=force-enable | ||
55 | */ | 60 | */ |
56 | static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ | 61 | static int enable_local_apic __initdata = 0; |
57 | |||
58 | static inline void lapic_disable(void) | ||
59 | { | ||
60 | enable_local_apic = -1; | ||
61 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
62 | } | ||
63 | 62 | ||
64 | static inline void lapic_enable(void) | 63 | /* Local APIC timer verification ok */ |
65 | { | 64 | static int local_apic_timer_verify_ok; |
66 | enable_local_apic = 1; | ||
67 | } | ||
68 | 65 | ||
69 | /* | 66 | /* |
70 | * Debug level | 67 | * Debug level, exported for io_apic.c |
71 | */ | 68 | */ |
72 | int apic_verbosity; | 69 | int apic_verbosity; |
73 | 70 | ||
71 | static unsigned int calibration_result; | ||
74 | 72 | ||
73 | static int lapic_next_event(unsigned long delta, | ||
74 | struct clock_event_device *evt); | ||
75 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
76 | struct clock_event_device *evt); | ||
77 | static void lapic_timer_broadcast(cpumask_t mask); | ||
75 | static void apic_pm_activate(void); | 78 | static void apic_pm_activate(void); |
76 | 79 | ||
80 | /* | ||
81 | * The local apic timer can be used for any function which is CPU local. | ||
82 | */ | ||
83 | static struct clock_event_device lapic_clockevent = { | ||
84 | .name = "lapic", | ||
85 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | ||
86 | | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, | ||
87 | .shift = 32, | ||
88 | .set_mode = lapic_timer_setup, | ||
89 | .set_next_event = lapic_next_event, | ||
90 | .broadcast = lapic_timer_broadcast, | ||
91 | .rating = 100, | ||
92 | .irq = -1, | ||
93 | }; | ||
94 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | ||
95 | |||
96 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | ||
97 | static int enabled_via_apicbase; | ||
98 | |||
99 | /* | ||
100 | * Get the LAPIC version | ||
101 | */ | ||
102 | static inline int lapic_get_version(void) | ||
103 | { | ||
104 | return GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Check, if the APIC is integrated or a seperate chip | ||
109 | */ | ||
110 | static inline int lapic_is_integrated(void) | ||
111 | { | ||
112 | return APIC_INTEGRATED(lapic_get_version()); | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Check, whether this is a modern or a first generation APIC | ||
117 | */ | ||
77 | static int modern_apic(void) | 118 | static int modern_apic(void) |
78 | { | 119 | { |
79 | unsigned int lvr, version; | ||
80 | /* AMD systems use old APIC versions, so check the CPU */ | 120 | /* AMD systems use old APIC versions, so check the CPU */ |
81 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 121 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
82 | boot_cpu_data.x86 >= 0xf) | 122 | boot_cpu_data.x86 >= 0xf) |
83 | return 1; | 123 | return 1; |
84 | lvr = apic_read(APIC_LVR); | 124 | return lapic_get_version() >= 0x14; |
85 | version = GET_APIC_VERSION(lvr); | 125 | } |
86 | return version >= 0x14; | 126 | |
127 | /** | ||
128 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | ||
129 | */ | ||
130 | void enable_NMI_through_LVT0 (void * dummy) | ||
131 | { | ||
132 | unsigned int v = APIC_DM_NMI; | ||
133 | |||
134 | /* Level triggered for 82489DX */ | ||
135 | if (!lapic_is_integrated()) | ||
136 | v |= APIC_LVT_LEVEL_TRIGGER; | ||
137 | apic_write_around(APIC_LVT0, v); | ||
138 | } | ||
139 | |||
140 | /** | ||
141 | * get_physical_broadcast - Get number of physical broadcast IDs | ||
142 | */ | ||
143 | int get_physical_broadcast(void) | ||
144 | { | ||
145 | return modern_apic() ? 0xff : 0xf; | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * lapic_get_maxlvt - get the maximum number of local vector table entries | ||
150 | */ | ||
151 | int lapic_get_maxlvt(void) | ||
152 | { | ||
153 | unsigned int v = apic_read(APIC_LVR); | ||
154 | |||
155 | /* 82489DXs do not report # of LVT entries. */ | ||
156 | return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; | ||
87 | } | 157 | } |
88 | 158 | ||
89 | /* | 159 | /* |
90 | * 'what should we do if we get a hw irq event on an illegal vector'. | 160 | * Local APIC timer |
91 | * each architecture has to answer this themselves. | ||
92 | */ | 161 | */ |
93 | void ack_bad_irq(unsigned int irq) | 162 | |
163 | /* Clock divisor is set to 16 */ | ||
164 | #define APIC_DIVISOR 16 | ||
165 | |||
166 | /* | ||
167 | * This function sets up the local APIC timer, with a timeout of | ||
168 | * 'clocks' APIC bus clock. During calibration we actually call | ||
169 | * this function twice on the boot CPU, once with a bogus timeout | ||
170 | * value, second time for real. The other (noncalibrating) CPUs | ||
171 | * call this function only once, with the real, calibrated value. | ||
172 | * | ||
173 | * We do reads before writes even if unnecessary, to get around the | ||
174 | * P5 APIC double write bug. | ||
175 | */ | ||
176 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | ||
94 | { | 177 | { |
95 | printk("unexpected IRQ trap at vector %02x\n", irq); | 178 | unsigned int lvtt_value, tmp_value; |
179 | |||
180 | lvtt_value = LOCAL_TIMER_VECTOR; | ||
181 | if (!oneshot) | ||
182 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | ||
183 | if (!lapic_is_integrated()) | ||
184 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | ||
185 | |||
186 | if (!irqen) | ||
187 | lvtt_value |= APIC_LVT_MASKED; | ||
188 | |||
189 | apic_write_around(APIC_LVTT, lvtt_value); | ||
190 | |||
96 | /* | 191 | /* |
97 | * Currently unexpected vectors happen only on SMP and APIC. | 192 | * Divide PICLK by 16 |
98 | * We _must_ ack these because every local APIC has only N | ||
99 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
100 | * holds up an irq slot - in excessive cases (when multiple | ||
101 | * unexpected vectors occur) that might lock up the APIC | ||
102 | * completely. | ||
103 | * But only ack when the APIC is enabled -AK | ||
104 | */ | 193 | */ |
105 | if (cpu_has_apic) | 194 | tmp_value = apic_read(APIC_TDCR); |
106 | ack_APIC_irq(); | 195 | apic_write_around(APIC_TDCR, (tmp_value |
196 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ||
197 | | APIC_TDR_DIV_16); | ||
198 | |||
199 | if (!oneshot) | ||
200 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | ||
107 | } | 201 | } |
108 | 202 | ||
109 | void __init apic_intr_init(void) | 203 | /* |
204 | * Program the next event, relative to now | ||
205 | */ | ||
206 | static int lapic_next_event(unsigned long delta, | ||
207 | struct clock_event_device *evt) | ||
208 | { | ||
209 | apic_write_around(APIC_TMICT, delta); | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Setup the lapic timer in periodic or oneshot mode | ||
215 | */ | ||
216 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
217 | struct clock_event_device *evt) | ||
218 | { | ||
219 | unsigned long flags; | ||
220 | unsigned int v; | ||
221 | |||
222 | /* Lapic used for broadcast ? */ | ||
223 | if (!local_apic_timer_verify_ok) | ||
224 | return; | ||
225 | |||
226 | local_irq_save(flags); | ||
227 | |||
228 | switch (mode) { | ||
229 | case CLOCK_EVT_MODE_PERIODIC: | ||
230 | case CLOCK_EVT_MODE_ONESHOT: | ||
231 | __setup_APIC_LVTT(calibration_result, | ||
232 | mode != CLOCK_EVT_MODE_PERIODIC, 1); | ||
233 | break; | ||
234 | case CLOCK_EVT_MODE_UNUSED: | ||
235 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
236 | v = apic_read(APIC_LVTT); | ||
237 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
238 | apic_write_around(APIC_LVTT, v); | ||
239 | break; | ||
240 | } | ||
241 | |||
242 | local_irq_restore(flags); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Local APIC timer broadcast function | ||
247 | */ | ||
248 | static void lapic_timer_broadcast(cpumask_t mask) | ||
110 | { | 249 | { |
111 | #ifdef CONFIG_SMP | 250 | #ifdef CONFIG_SMP |
112 | smp_intr_init(); | 251 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
113 | #endif | 252 | #endif |
114 | /* self generated IPI for local APIC timer */ | 253 | } |
115 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
116 | 254 | ||
117 | /* IPI vectors for APIC spurious and error interrupts */ | 255 | /* |
118 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 256 | * Setup the local APIC timer for this CPU. Copy the initilized values |
119 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 257 | * of the boot CPU and register the clock event in the framework. |
258 | */ | ||
259 | static void __devinit setup_APIC_timer(void) | ||
260 | { | ||
261 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | ||
120 | 262 | ||
121 | /* thermal monitor LVT interrupt */ | 263 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
122 | #ifdef CONFIG_X86_MCE_P4THERMAL | 264 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); |
123 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 265 | |
124 | #endif | 266 | clockevents_register_device(levt); |
125 | } | 267 | } |
126 | 268 | ||
127 | /* Using APIC to generate smp_local_timer_interrupt? */ | 269 | /* |
128 | int using_apic_timer __read_mostly = 0; | 270 | * In this functions we calibrate APIC bus clocks to the external timer. |
271 | * | ||
272 | * We want to do the calibration only once since we want to have local timer | ||
273 | * irqs syncron. CPUs connected by the same APIC bus have the very same bus | ||
274 | * frequency. | ||
275 | * | ||
276 | * This was previously done by reading the PIT/HPET and waiting for a wrap | ||
277 | * around to find out, that a tick has elapsed. I have a box, where the PIT | ||
278 | * readout is broken, so it never gets out of the wait loop again. This was | ||
279 | * also reported by others. | ||
280 | * | ||
281 | * Monitoring the jiffies value is inaccurate and the clockevents | ||
282 | * infrastructure allows us to do a simple substitution of the interrupt | ||
283 | * handler. | ||
284 | * | ||
285 | * The calibration routine also uses the pm_timer when possible, as the PIT | ||
286 | * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes | ||
287 | * back to normal later in the boot process). | ||
288 | */ | ||
129 | 289 | ||
130 | static int enabled_via_apicbase; | 290 | #define LAPIC_CAL_LOOPS (HZ/10) |
131 | 291 | ||
132 | void enable_NMI_through_LVT0 (void * dummy) | 292 | static __initdata volatile int lapic_cal_loops = -1; |
293 | static __initdata long lapic_cal_t1, lapic_cal_t2; | ||
294 | static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; | ||
295 | static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; | ||
296 | static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; | ||
297 | |||
298 | /* | ||
299 | * Temporary interrupt handler. | ||
300 | */ | ||
301 | static void __init lapic_cal_handler(struct clock_event_device *dev) | ||
133 | { | 302 | { |
134 | unsigned int v, ver; | 303 | unsigned long long tsc = 0; |
304 | long tapic = apic_read(APIC_TMCCT); | ||
305 | unsigned long pm = acpi_pm_read_early(); | ||
135 | 306 | ||
136 | ver = apic_read(APIC_LVR); | 307 | if (cpu_has_tsc) |
137 | ver = GET_APIC_VERSION(ver); | 308 | rdtscll(tsc); |
138 | v = APIC_DM_NMI; /* unmask and set to NMI */ | 309 | |
139 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | 310 | switch (lapic_cal_loops++) { |
140 | v |= APIC_LVT_LEVEL_TRIGGER; | 311 | case 0: |
141 | apic_write_around(APIC_LVT0, v); | 312 | lapic_cal_t1 = tapic; |
313 | lapic_cal_tsc1 = tsc; | ||
314 | lapic_cal_pm1 = pm; | ||
315 | lapic_cal_j1 = jiffies; | ||
316 | break; | ||
317 | |||
318 | case LAPIC_CAL_LOOPS: | ||
319 | lapic_cal_t2 = tapic; | ||
320 | lapic_cal_tsc2 = tsc; | ||
321 | if (pm < lapic_cal_pm1) | ||
322 | pm += ACPI_PM_OVRRUN; | ||
323 | lapic_cal_pm2 = pm; | ||
324 | lapic_cal_j2 = jiffies; | ||
325 | break; | ||
326 | } | ||
142 | } | 327 | } |
143 | 328 | ||
144 | int get_physical_broadcast(void) | 329 | /* |
330 | * Setup the boot APIC | ||
331 | * | ||
332 | * Calibrate and verify the result. | ||
333 | */ | ||
334 | void __init setup_boot_APIC_clock(void) | ||
145 | { | 335 | { |
146 | if (modern_apic()) | 336 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
147 | return 0xff; | 337 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; |
148 | else | 338 | const long pm_thresh = pm_100ms/100; |
149 | return 0xf; | 339 | void (*real_handler)(struct clock_event_device *dev); |
340 | unsigned long deltaj; | ||
341 | long delta, deltapm; | ||
342 | |||
343 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
344 | "calibrating APIC timer ...\n"); | ||
345 | |||
346 | local_irq_disable(); | ||
347 | |||
348 | /* Replace the global interrupt handler */ | ||
349 | real_handler = global_clock_event->event_handler; | ||
350 | global_clock_event->event_handler = lapic_cal_handler; | ||
351 | |||
352 | /* | ||
353 | * Setup the APIC counter to 1e9. There is no way the lapic | ||
354 | * can underflow in the 100ms detection time frame | ||
355 | */ | ||
356 | __setup_APIC_LVTT(1000000000, 0, 0); | ||
357 | |||
358 | /* Let the interrupts run */ | ||
359 | local_irq_enable(); | ||
360 | |||
361 | while(lapic_cal_loops <= LAPIC_CAL_LOOPS); | ||
362 | |||
363 | local_irq_disable(); | ||
364 | |||
365 | /* Restore the real event handler */ | ||
366 | global_clock_event->event_handler = real_handler; | ||
367 | |||
368 | /* Build delta t1-t2 as apic timer counts down */ | ||
369 | delta = lapic_cal_t1 - lapic_cal_t2; | ||
370 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); | ||
371 | |||
372 | /* Check, if the PM timer is available */ | ||
373 | deltapm = lapic_cal_pm2 - lapic_cal_pm1; | ||
374 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | ||
375 | |||
376 | if (deltapm) { | ||
377 | unsigned long mult; | ||
378 | u64 res; | ||
379 | |||
380 | mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); | ||
381 | |||
382 | if (deltapm > (pm_100ms - pm_thresh) && | ||
383 | deltapm < (pm_100ms + pm_thresh)) { | ||
384 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | ||
385 | } else { | ||
386 | res = (((u64) deltapm) * mult) >> 22; | ||
387 | do_div(res, 1000000); | ||
388 | printk(KERN_WARNING "APIC calibration not consistent " | ||
389 | "with PM Timer: %ldms instead of 100ms\n", | ||
390 | (long)res); | ||
391 | /* Correct the lapic counter value */ | ||
392 | res = (((u64) delta ) * pm_100ms); | ||
393 | do_div(res, deltapm); | ||
394 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | ||
395 | "%lu (%ld)\n", (unsigned long) res, delta); | ||
396 | delta = (long) res; | ||
397 | } | ||
398 | } | ||
399 | |||
400 | /* Calculate the scaled math multiplication factor */ | ||
401 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); | ||
402 | lapic_clockevent.max_delta_ns = | ||
403 | clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); | ||
404 | lapic_clockevent.min_delta_ns = | ||
405 | clockevent_delta2ns(0xF, &lapic_clockevent); | ||
406 | |||
407 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; | ||
408 | |||
409 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); | ||
410 | apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); | ||
411 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | ||
412 | calibration_result); | ||
413 | |||
414 | if (cpu_has_tsc) { | ||
415 | delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); | ||
416 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | ||
417 | "%ld.%04ld MHz.\n", | ||
418 | (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), | ||
419 | (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); | ||
420 | } | ||
421 | |||
422 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | ||
423 | "%u.%04u MHz.\n", | ||
424 | calibration_result / (1000000 / HZ), | ||
425 | calibration_result % (1000000 / HZ)); | ||
426 | |||
427 | |||
428 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); | ||
429 | |||
430 | /* | ||
431 | * Setup the apic timer manually | ||
432 | */ | ||
433 | local_apic_timer_verify_ok = 1; | ||
434 | levt->event_handler = lapic_cal_handler; | ||
435 | lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); | ||
436 | lapic_cal_loops = -1; | ||
437 | |||
438 | /* Let the interrupts run */ | ||
439 | local_irq_enable(); | ||
440 | |||
441 | while(lapic_cal_loops <= LAPIC_CAL_LOOPS); | ||
442 | |||
443 | local_irq_disable(); | ||
444 | |||
445 | /* Stop the lapic timer */ | ||
446 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); | ||
447 | |||
448 | local_irq_enable(); | ||
449 | |||
450 | /* Jiffies delta */ | ||
451 | deltaj = lapic_cal_j2 - lapic_cal_j1; | ||
452 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); | ||
453 | |||
454 | /* Check, if the PM timer is available */ | ||
455 | deltapm = lapic_cal_pm2 - lapic_cal_pm1; | ||
456 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | ||
457 | |||
458 | local_apic_timer_verify_ok = 0; | ||
459 | |||
460 | if (deltapm) { | ||
461 | if (deltapm > (pm_100ms - pm_thresh) && | ||
462 | deltapm < (pm_100ms + pm_thresh)) { | ||
463 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | ||
464 | /* Check, if the jiffies result is consistent */ | ||
465 | if (deltaj < LAPIC_CAL_LOOPS-2 || | ||
466 | deltaj > LAPIC_CAL_LOOPS+2) { | ||
467 | /* | ||
468 | * Not sure, what we can do about this one. | ||
469 | * When high resultion timers are active | ||
470 | * and the lapic timer does not stop in C3 | ||
471 | * we are fine. Otherwise more trouble might | ||
472 | * be waiting. -- tglx | ||
473 | */ | ||
474 | printk(KERN_WARNING "Global event device %s " | ||
475 | "has wrong frequency " | ||
476 | "(%lu ticks instead of %d)\n", | ||
477 | global_clock_event->name, deltaj, | ||
478 | LAPIC_CAL_LOOPS); | ||
479 | } | ||
480 | local_apic_timer_verify_ok = 1; | ||
481 | } | ||
482 | } else { | ||
483 | /* Check, if the jiffies result is consistent */ | ||
484 | if (deltaj >= LAPIC_CAL_LOOPS-2 && | ||
485 | deltaj <= LAPIC_CAL_LOOPS+2) { | ||
486 | apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); | ||
487 | local_apic_timer_verify_ok = 1; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | if (!local_apic_timer_verify_ok) { | ||
492 | printk(KERN_WARNING | ||
493 | "APIC timer disabled due to verification failure.\n"); | ||
494 | /* No broadcast on UP ! */ | ||
495 | if (num_possible_cpus() == 1) | ||
496 | return; | ||
497 | } else | ||
498 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
499 | |||
500 | /* Setup the lapic or request the broadcast */ | ||
501 | setup_APIC_timer(); | ||
502 | } | ||
503 | |||
504 | void __devinit setup_secondary_APIC_clock(void) | ||
505 | { | ||
506 | setup_APIC_timer(); | ||
150 | } | 507 | } |
151 | 508 | ||
152 | int get_maxlvt(void) | 509 | /* |
510 | * The guts of the apic timer interrupt | ||
511 | */ | ||
512 | static void local_apic_timer_interrupt(void) | ||
153 | { | 513 | { |
154 | unsigned int v, ver, maxlvt; | 514 | int cpu = smp_processor_id(); |
515 | struct clock_event_device *evt = &per_cpu(lapic_events, cpu); | ||
155 | 516 | ||
156 | v = apic_read(APIC_LVR); | 517 | /* |
157 | ver = GET_APIC_VERSION(v); | 518 | * Normally we should not be here till LAPIC has been initialized but |
158 | /* 82489DXs do not report # of LVT entries. */ | 519 | * in some cases like kdump, its possible that there is a pending LAPIC |
159 | maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; | 520 | * timer interrupt from previous kernel's context and is delivered in |
160 | return maxlvt; | 521 | * new kernel the moment interrupts are enabled. |
522 | * | ||
523 | * Interrupts are enabled early and LAPIC is setup much later, hence | ||
524 | * its possible that when we get here evt->event_handler is NULL. | ||
525 | * Check for event_handler being NULL and discard the interrupt as | ||
526 | * spurious. | ||
527 | */ | ||
528 | if (!evt->event_handler) { | ||
529 | printk(KERN_WARNING | ||
530 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
531 | /* Switch it off */ | ||
532 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | ||
533 | return; | ||
534 | } | ||
535 | |||
536 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
537 | |||
538 | evt->event_handler(evt); | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * Local APIC timer interrupt. This is the most natural way for doing | ||
543 | * local interrupts, but local timer interrupts can be emulated by | ||
544 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
545 | * | ||
546 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
547 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
548 | */ | ||
549 | |||
550 | void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) | ||
551 | { | ||
552 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
553 | |||
554 | /* | ||
555 | * NOTE! We'd better ACK the irq immediately, | ||
556 | * because timer handling can be slow. | ||
557 | */ | ||
558 | ack_APIC_irq(); | ||
559 | /* | ||
560 | * update_process_times() expects us to have done irq_enter(). | ||
561 | * Besides, if we don't timer interrupts ignore the global | ||
562 | * interrupt lock, which is the WrongThing (tm) to do. | ||
563 | */ | ||
564 | exit_idle(); | ||
565 | irq_enter(); | ||
566 | local_apic_timer_interrupt(); | ||
567 | irq_exit(); | ||
568 | |||
569 | set_irq_regs(old_regs); | ||
161 | } | 570 | } |
162 | 571 | ||
572 | int setup_profiling_timer(unsigned int multiplier) | ||
573 | { | ||
574 | return -EINVAL; | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * Local APIC start and shutdown | ||
579 | */ | ||
580 | |||
581 | /** | ||
582 | * clear_local_APIC - shutdown the local APIC | ||
583 | * | ||
584 | * This is called, when a CPU is disabled and before rebooting, so the state of | ||
585 | * the local APIC has no dangling leftovers. Also used to cleanout any BIOS | ||
586 | * leftovers during boot. | ||
587 | */ | ||
163 | void clear_local_APIC(void) | 588 | void clear_local_APIC(void) |
164 | { | 589 | { |
165 | int maxlvt; | 590 | int maxlvt = lapic_get_maxlvt(); |
166 | unsigned long v; | 591 | unsigned long v; |
167 | 592 | ||
168 | maxlvt = get_maxlvt(); | ||
169 | |||
170 | /* | 593 | /* |
171 | * Masking an LVT entry can trigger a local APIC error | 594 | * Masking an LVT entry can trigger a local APIC error |
172 | * if the vector is zero. Mask LVTERR first to prevent this. | 595 | * if the vector is zero. Mask LVTERR first to prevent this. |
@@ -190,7 +613,7 @@ void clear_local_APIC(void) | |||
190 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | 613 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); |
191 | } | 614 | } |
192 | 615 | ||
193 | /* lets not touch this if we didn't frob it */ | 616 | /* lets not touch this if we didn't frob it */ |
194 | #ifdef CONFIG_X86_MCE_P4THERMAL | 617 | #ifdef CONFIG_X86_MCE_P4THERMAL |
195 | if (maxlvt >= 5) { | 618 | if (maxlvt >= 5) { |
196 | v = apic_read(APIC_LVTTHMR); | 619 | v = apic_read(APIC_LVTTHMR); |
@@ -212,85 +635,18 @@ void clear_local_APIC(void) | |||
212 | if (maxlvt >= 5) | 635 | if (maxlvt >= 5) |
213 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); | 636 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); |
214 | #endif | 637 | #endif |
215 | v = GET_APIC_VERSION(apic_read(APIC_LVR)); | 638 | /* Integrated APIC (!82489DX) ? */ |
216 | if (APIC_INTEGRATED(v)) { /* !82489DX */ | 639 | if (lapic_is_integrated()) { |
217 | if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ | 640 | if (maxlvt > 3) |
641 | /* Clear ESR due to Pentium errata 3AP and 11AP */ | ||
218 | apic_write(APIC_ESR, 0); | 642 | apic_write(APIC_ESR, 0); |
219 | apic_read(APIC_ESR); | 643 | apic_read(APIC_ESR); |
220 | } | 644 | } |
221 | } | 645 | } |
222 | 646 | ||
223 | void __init connect_bsp_APIC(void) | 647 | /** |
224 | { | 648 | * disable_local_APIC - clear and disable the local APIC |
225 | if (pic_mode) { | 649 | */ |
226 | /* | ||
227 | * Do not trust the local APIC being empty at bootup. | ||
228 | */ | ||
229 | clear_local_APIC(); | ||
230 | /* | ||
231 | * PIC mode, enable APIC mode in the IMCR, i.e. | ||
232 | * connect BSP's local APIC to INT and NMI lines. | ||
233 | */ | ||
234 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | ||
235 | "enabling APIC mode.\n"); | ||
236 | outb(0x70, 0x22); | ||
237 | outb(0x01, 0x23); | ||
238 | } | ||
239 | enable_apic_mode(); | ||
240 | } | ||
241 | |||
242 | void disconnect_bsp_APIC(int virt_wire_setup) | ||
243 | { | ||
244 | if (pic_mode) { | ||
245 | /* | ||
246 | * Put the board back into PIC mode (has an effect | ||
247 | * only on certain older boards). Note that APIC | ||
248 | * interrupts, including IPIs, won't work beyond | ||
249 | * this point! The only exception are INIT IPIs. | ||
250 | */ | ||
251 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | ||
252 | "entering PIC mode.\n"); | ||
253 | outb(0x70, 0x22); | ||
254 | outb(0x00, 0x23); | ||
255 | } | ||
256 | else { | ||
257 | /* Go back to Virtual Wire compatibility mode */ | ||
258 | unsigned long value; | ||
259 | |||
260 | /* For the spurious interrupt use vector F, and enable it */ | ||
261 | value = apic_read(APIC_SPIV); | ||
262 | value &= ~APIC_VECTOR_MASK; | ||
263 | value |= APIC_SPIV_APIC_ENABLED; | ||
264 | value |= 0xf; | ||
265 | apic_write_around(APIC_SPIV, value); | ||
266 | |||
267 | if (!virt_wire_setup) { | ||
268 | /* For LVT0 make it edge triggered, active high, external and enabled */ | ||
269 | value = apic_read(APIC_LVT0); | ||
270 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
271 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
272 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); | ||
273 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
274 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
275 | apic_write_around(APIC_LVT0, value); | ||
276 | } | ||
277 | else { | ||
278 | /* Disable LVT0 */ | ||
279 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | ||
280 | } | ||
281 | |||
282 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | ||
283 | value = apic_read(APIC_LVT1); | ||
284 | value &= ~( | ||
285 | APIC_MODE_MASK | APIC_SEND_PENDING | | ||
286 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
287 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
288 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
289 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
290 | apic_write_around(APIC_LVT1, value); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | void disable_local_APIC(void) | 650 | void disable_local_APIC(void) |
295 | { | 651 | { |
296 | unsigned long value; | 652 | unsigned long value; |
@@ -305,8 +661,13 @@ void disable_local_APIC(void) | |||
305 | value &= ~APIC_SPIV_APIC_ENABLED; | 661 | value &= ~APIC_SPIV_APIC_ENABLED; |
306 | apic_write_around(APIC_SPIV, value); | 662 | apic_write_around(APIC_SPIV, value); |
307 | 663 | ||
664 | /* | ||
665 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | ||
666 | * restore the disabled state. | ||
667 | */ | ||
308 | if (enabled_via_apicbase) { | 668 | if (enabled_via_apicbase) { |
309 | unsigned int l, h; | 669 | unsigned int l, h; |
670 | |||
310 | rdmsr(MSR_IA32_APICBASE, l, h); | 671 | rdmsr(MSR_IA32_APICBASE, l, h); |
311 | l &= ~MSR_IA32_APICBASE_ENABLE; | 672 | l &= ~MSR_IA32_APICBASE_ENABLE; |
312 | wrmsr(MSR_IA32_APICBASE, l, h); | 673 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -314,6 +675,28 @@ void disable_local_APIC(void) | |||
314 | } | 675 | } |
315 | 676 | ||
316 | /* | 677 | /* |
678 | * If Linux enabled the LAPIC against the BIOS default disable it down before | ||
679 | * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and | ||
680 | * not power-off. Additionally clear all LVT entries before disable_local_APIC | ||
681 | * for the case where Linux didn't enable the LAPIC. | ||
682 | */ | ||
683 | void lapic_shutdown(void) | ||
684 | { | ||
685 | unsigned long flags; | ||
686 | |||
687 | if (!cpu_has_apic) | ||
688 | return; | ||
689 | |||
690 | local_irq_save(flags); | ||
691 | clear_local_APIC(); | ||
692 | |||
693 | if (enabled_via_apicbase) | ||
694 | disable_local_APIC(); | ||
695 | |||
696 | local_irq_restore(flags); | ||
697 | } | ||
698 | |||
699 | /* | ||
317 | * This is to verify that we're looking at a real local APIC. | 700 | * This is to verify that we're looking at a real local APIC. |
318 | * Check these against your board if the CPUs aren't getting | 701 | * Check these against your board if the CPUs aren't getting |
319 | * started for no apparent reason. | 702 | * started for no apparent reason. |
@@ -345,7 +728,7 @@ int __init verify_local_APIC(void) | |||
345 | reg1 = GET_APIC_VERSION(reg0); | 728 | reg1 = GET_APIC_VERSION(reg0); |
346 | if (reg1 == 0x00 || reg1 == 0xff) | 729 | if (reg1 == 0x00 || reg1 == 0xff) |
347 | return 0; | 730 | return 0; |
348 | reg1 = get_maxlvt(); | 731 | reg1 = lapic_get_maxlvt(); |
349 | if (reg1 < 0x02 || reg1 == 0xff) | 732 | if (reg1 < 0x02 || reg1 == 0xff) |
350 | return 0; | 733 | return 0; |
351 | 734 | ||
@@ -368,10 +751,15 @@ int __init verify_local_APIC(void) | |||
368 | return 1; | 751 | return 1; |
369 | } | 752 | } |
370 | 753 | ||
754 | /** | ||
755 | * sync_Arb_IDs - synchronize APIC bus arbitration IDs | ||
756 | */ | ||
371 | void __init sync_Arb_IDs(void) | 757 | void __init sync_Arb_IDs(void) |
372 | { | 758 | { |
373 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 | 759 | /* |
374 | And not needed on AMD */ | 760 | * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not |
761 | * needed on AMD. | ||
762 | */ | ||
375 | if (modern_apic()) | 763 | if (modern_apic()) |
376 | return; | 764 | return; |
377 | /* | 765 | /* |
@@ -384,14 +772,12 @@ void __init sync_Arb_IDs(void) | |||
384 | | APIC_DM_INIT); | 772 | | APIC_DM_INIT); |
385 | } | 773 | } |
386 | 774 | ||
387 | extern void __error_in_apic_c (void); | ||
388 | |||
389 | /* | 775 | /* |
390 | * An initial setup of the virtual wire mode. | 776 | * An initial setup of the virtual wire mode. |
391 | */ | 777 | */ |
392 | void __init init_bsp_APIC(void) | 778 | void __init init_bsp_APIC(void) |
393 | { | 779 | { |
394 | unsigned long value, ver; | 780 | unsigned long value; |
395 | 781 | ||
396 | /* | 782 | /* |
397 | * Don't do the setup now if we have a SMP BIOS as the | 783 | * Don't do the setup now if we have a SMP BIOS as the |
@@ -400,9 +786,6 @@ void __init init_bsp_APIC(void) | |||
400 | if (smp_found_config || !cpu_has_apic) | 786 | if (smp_found_config || !cpu_has_apic) |
401 | return; | 787 | return; |
402 | 788 | ||
403 | value = apic_read(APIC_LVR); | ||
404 | ver = GET_APIC_VERSION(value); | ||
405 | |||
406 | /* | 789 | /* |
407 | * Do not trust the local APIC being empty at bootup. | 790 | * Do not trust the local APIC being empty at bootup. |
408 | */ | 791 | */ |
@@ -414,9 +797,10 @@ void __init init_bsp_APIC(void) | |||
414 | value = apic_read(APIC_SPIV); | 797 | value = apic_read(APIC_SPIV); |
415 | value &= ~APIC_VECTOR_MASK; | 798 | value &= ~APIC_VECTOR_MASK; |
416 | value |= APIC_SPIV_APIC_ENABLED; | 799 | value |= APIC_SPIV_APIC_ENABLED; |
417 | 800 | ||
418 | /* This bit is reserved on P4/Xeon and should be cleared */ | 801 | /* This bit is reserved on P4/Xeon and should be cleared */ |
419 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) | 802 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && |
803 | (boot_cpu_data.x86 == 15)) | ||
420 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 804 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
421 | else | 805 | else |
422 | value |= APIC_SPIV_FOCUS_DISABLED; | 806 | value |= APIC_SPIV_FOCUS_DISABLED; |
@@ -428,14 +812,17 @@ void __init init_bsp_APIC(void) | |||
428 | */ | 812 | */ |
429 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 813 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); |
430 | value = APIC_DM_NMI; | 814 | value = APIC_DM_NMI; |
431 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | 815 | if (!lapic_is_integrated()) /* 82489DX */ |
432 | value |= APIC_LVT_LEVEL_TRIGGER; | 816 | value |= APIC_LVT_LEVEL_TRIGGER; |
433 | apic_write_around(APIC_LVT1, value); | 817 | apic_write_around(APIC_LVT1, value); |
434 | } | 818 | } |
435 | 819 | ||
820 | /** | ||
821 | * setup_local_APIC - setup the local APIC | ||
822 | */ | ||
436 | void __devinit setup_local_APIC(void) | 823 | void __devinit setup_local_APIC(void) |
437 | { | 824 | { |
438 | unsigned long oldvalue, value, ver, maxlvt; | 825 | unsigned long oldvalue, value, maxlvt, integrated; |
439 | int i, j; | 826 | int i, j; |
440 | 827 | ||
441 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ | 828 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ |
@@ -446,11 +833,7 @@ void __devinit setup_local_APIC(void) | |||
446 | apic_write(APIC_ESR, 0); | 833 | apic_write(APIC_ESR, 0); |
447 | } | 834 | } |
448 | 835 | ||
449 | value = apic_read(APIC_LVR); | 836 | integrated = lapic_is_integrated(); |
450 | ver = GET_APIC_VERSION(value); | ||
451 | |||
452 | if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) | ||
453 | __error_in_apic_c(); | ||
454 | 837 | ||
455 | /* | 838 | /* |
456 | * Double-check whether this APIC is really registered. | 839 | * Double-check whether this APIC is really registered. |
@@ -521,13 +904,10 @@ void __devinit setup_local_APIC(void) | |||
521 | * like LRU than MRU (the short-term load is more even across CPUs). | 904 | * like LRU than MRU (the short-term load is more even across CPUs). |
522 | * See also the comment in end_level_ioapic_irq(). --macro | 905 | * See also the comment in end_level_ioapic_irq(). --macro |
523 | */ | 906 | */ |
524 | #if 1 | 907 | |
525 | /* Enable focus processor (bit==0) */ | 908 | /* Enable focus processor (bit==0) */ |
526 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 909 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
527 | #else | 910 | |
528 | /* Disable focus processor (bit==1) */ | ||
529 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
530 | #endif | ||
531 | /* | 911 | /* |
532 | * Set spurious IRQ vector | 912 | * Set spurious IRQ vector |
533 | */ | 913 | */ |
@@ -563,17 +943,18 @@ void __devinit setup_local_APIC(void) | |||
563 | value = APIC_DM_NMI; | 943 | value = APIC_DM_NMI; |
564 | else | 944 | else |
565 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 945 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
566 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | 946 | if (!integrated) /* 82489DX */ |
567 | value |= APIC_LVT_LEVEL_TRIGGER; | 947 | value |= APIC_LVT_LEVEL_TRIGGER; |
568 | apic_write_around(APIC_LVT1, value); | 948 | apic_write_around(APIC_LVT1, value); |
569 | 949 | ||
570 | if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ | 950 | if (integrated && !esr_disable) { /* !82489DX */ |
571 | maxlvt = get_maxlvt(); | 951 | maxlvt = lapic_get_maxlvt(); |
572 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 952 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
573 | apic_write(APIC_ESR, 0); | 953 | apic_write(APIC_ESR, 0); |
574 | oldvalue = apic_read(APIC_ESR); | 954 | oldvalue = apic_read(APIC_ESR); |
575 | 955 | ||
576 | value = ERROR_APIC_VECTOR; // enables sending errors | 956 | /* enables sending errors */ |
957 | value = ERROR_APIC_VECTOR; | ||
577 | apic_write_around(APIC_LVTERR, value); | 958 | apic_write_around(APIC_LVTERR, value); |
578 | /* | 959 | /* |
579 | * spec says clear errors after enabling vector. | 960 | * spec says clear errors after enabling vector. |
@@ -586,207 +967,30 @@ void __devinit setup_local_APIC(void) | |||
586 | "vector: 0x%08lx after: 0x%08lx\n", | 967 | "vector: 0x%08lx after: 0x%08lx\n", |
587 | oldvalue, value); | 968 | oldvalue, value); |
588 | } else { | 969 | } else { |
589 | if (esr_disable) | 970 | if (esr_disable) |
590 | /* | 971 | /* |
591 | * Something untraceble is creating bad interrupts on | 972 | * Something untraceble is creating bad interrupts on |
592 | * secondary quads ... for the moment, just leave the | 973 | * secondary quads ... for the moment, just leave the |
593 | * ESR disabled - we can't do anything useful with the | 974 | * ESR disabled - we can't do anything useful with the |
594 | * errors anyway - mbligh | 975 | * errors anyway - mbligh |
595 | */ | 976 | */ |
596 | printk("Leaving ESR disabled.\n"); | 977 | printk(KERN_INFO "Leaving ESR disabled.\n"); |
597 | else | 978 | else |
598 | printk("No ESR for 82489DX.\n"); | 979 | printk(KERN_INFO "No ESR for 82489DX.\n"); |
599 | } | 980 | } |
600 | 981 | ||
982 | /* Disable the local apic timer */ | ||
983 | value = apic_read(APIC_LVTT); | ||
984 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
985 | apic_write_around(APIC_LVTT, value); | ||
986 | |||
601 | setup_apic_nmi_watchdog(NULL); | 987 | setup_apic_nmi_watchdog(NULL); |
602 | apic_pm_activate(); | 988 | apic_pm_activate(); |
603 | } | 989 | } |
604 | 990 | ||
605 | /* | 991 | /* |
606 | * If Linux enabled the LAPIC against the BIOS default | 992 | * Detect and initialize APIC |
607 | * disable it down before re-entering the BIOS on shutdown. | ||
608 | * Otherwise the BIOS may get confused and not power-off. | ||
609 | * Additionally clear all LVT entries before disable_local_APIC | ||
610 | * for the case where Linux didn't enable the LAPIC. | ||
611 | */ | ||
612 | void lapic_shutdown(void) | ||
613 | { | ||
614 | unsigned long flags; | ||
615 | |||
616 | if (!cpu_has_apic) | ||
617 | return; | ||
618 | |||
619 | local_irq_save(flags); | ||
620 | clear_local_APIC(); | ||
621 | |||
622 | if (enabled_via_apicbase) | ||
623 | disable_local_APIC(); | ||
624 | |||
625 | local_irq_restore(flags); | ||
626 | } | ||
627 | |||
628 | #ifdef CONFIG_PM | ||
629 | |||
630 | static struct { | ||
631 | int active; | ||
632 | /* r/w apic fields */ | ||
633 | unsigned int apic_id; | ||
634 | unsigned int apic_taskpri; | ||
635 | unsigned int apic_ldr; | ||
636 | unsigned int apic_dfr; | ||
637 | unsigned int apic_spiv; | ||
638 | unsigned int apic_lvtt; | ||
639 | unsigned int apic_lvtpc; | ||
640 | unsigned int apic_lvt0; | ||
641 | unsigned int apic_lvt1; | ||
642 | unsigned int apic_lvterr; | ||
643 | unsigned int apic_tmict; | ||
644 | unsigned int apic_tdcr; | ||
645 | unsigned int apic_thmr; | ||
646 | } apic_pm_state; | ||
647 | |||
648 | static int lapic_suspend(struct sys_device *dev, pm_message_t state) | ||
649 | { | ||
650 | unsigned long flags; | ||
651 | int maxlvt; | ||
652 | |||
653 | if (!apic_pm_state.active) | ||
654 | return 0; | ||
655 | |||
656 | maxlvt = get_maxlvt(); | ||
657 | |||
658 | apic_pm_state.apic_id = apic_read(APIC_ID); | ||
659 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | ||
660 | apic_pm_state.apic_ldr = apic_read(APIC_LDR); | ||
661 | apic_pm_state.apic_dfr = apic_read(APIC_DFR); | ||
662 | apic_pm_state.apic_spiv = apic_read(APIC_SPIV); | ||
663 | apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); | ||
664 | if (maxlvt >= 4) | ||
665 | apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); | ||
666 | apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); | ||
667 | apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); | ||
668 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | ||
669 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | ||
670 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | ||
671 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
672 | if (maxlvt >= 5) | ||
673 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | ||
674 | #endif | ||
675 | |||
676 | local_irq_save(flags); | ||
677 | disable_local_APIC(); | ||
678 | local_irq_restore(flags); | ||
679 | return 0; | ||
680 | } | ||
681 | |||
682 | static int lapic_resume(struct sys_device *dev) | ||
683 | { | ||
684 | unsigned int l, h; | ||
685 | unsigned long flags; | ||
686 | int maxlvt; | ||
687 | |||
688 | if (!apic_pm_state.active) | ||
689 | return 0; | ||
690 | |||
691 | maxlvt = get_maxlvt(); | ||
692 | |||
693 | local_irq_save(flags); | ||
694 | |||
695 | /* | ||
696 | * Make sure the APICBASE points to the right address | ||
697 | * | ||
698 | * FIXME! This will be wrong if we ever support suspend on | ||
699 | * SMP! We'll need to do this as part of the CPU restore! | ||
700 | */ | ||
701 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
702 | l &= ~MSR_IA32_APICBASE_BASE; | ||
703 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
704 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
705 | |||
706 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | ||
707 | apic_write(APIC_ID, apic_pm_state.apic_id); | ||
708 | apic_write(APIC_DFR, apic_pm_state.apic_dfr); | ||
709 | apic_write(APIC_LDR, apic_pm_state.apic_ldr); | ||
710 | apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); | ||
711 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | ||
712 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | ||
713 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | ||
714 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
715 | if (maxlvt >= 5) | ||
716 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | ||
717 | #endif | ||
718 | if (maxlvt >= 4) | ||
719 | apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); | ||
720 | apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); | ||
721 | apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); | ||
722 | apic_write(APIC_TMICT, apic_pm_state.apic_tmict); | ||
723 | apic_write(APIC_ESR, 0); | ||
724 | apic_read(APIC_ESR); | ||
725 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | ||
726 | apic_write(APIC_ESR, 0); | ||
727 | apic_read(APIC_ESR); | ||
728 | local_irq_restore(flags); | ||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * This device has no shutdown method - fully functioning local APICs | ||
734 | * are needed on every CPU up until machine_halt/restart/poweroff. | ||
735 | */ | 993 | */ |
736 | |||
737 | static struct sysdev_class lapic_sysclass = { | ||
738 | set_kset_name("lapic"), | ||
739 | .resume = lapic_resume, | ||
740 | .suspend = lapic_suspend, | ||
741 | }; | ||
742 | |||
743 | static struct sys_device device_lapic = { | ||
744 | .id = 0, | ||
745 | .cls = &lapic_sysclass, | ||
746 | }; | ||
747 | |||
748 | static void __devinit apic_pm_activate(void) | ||
749 | { | ||
750 | apic_pm_state.active = 1; | ||
751 | } | ||
752 | |||
753 | static int __init init_lapic_sysfs(void) | ||
754 | { | ||
755 | int error; | ||
756 | |||
757 | if (!cpu_has_apic) | ||
758 | return 0; | ||
759 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | ||
760 | |||
761 | error = sysdev_class_register(&lapic_sysclass); | ||
762 | if (!error) | ||
763 | error = sysdev_register(&device_lapic); | ||
764 | return error; | ||
765 | } | ||
766 | device_initcall(init_lapic_sysfs); | ||
767 | |||
768 | #else /* CONFIG_PM */ | ||
769 | |||
770 | static void apic_pm_activate(void) { } | ||
771 | |||
772 | #endif /* CONFIG_PM */ | ||
773 | |||
774 | /* | ||
775 | * Detect and enable local APICs on non-SMP boards. | ||
776 | * Original code written by Keir Fraser. | ||
777 | */ | ||
778 | |||
779 | static int __init apic_set_verbosity(char *str) | ||
780 | { | ||
781 | if (strcmp("debug", str) == 0) | ||
782 | apic_verbosity = APIC_DEBUG; | ||
783 | else if (strcmp("verbose", str) == 0) | ||
784 | apic_verbosity = APIC_VERBOSE; | ||
785 | return 1; | ||
786 | } | ||
787 | |||
788 | __setup("apic=", apic_set_verbosity); | ||
789 | |||
790 | static int __init detect_init_APIC (void) | 994 | static int __init detect_init_APIC (void) |
791 | { | 995 | { |
792 | u32 h, l, features; | 996 | u32 h, l, features; |
@@ -798,7 +1002,7 @@ static int __init detect_init_APIC (void) | |||
798 | switch (boot_cpu_data.x86_vendor) { | 1002 | switch (boot_cpu_data.x86_vendor) { |
799 | case X86_VENDOR_AMD: | 1003 | case X86_VENDOR_AMD: |
800 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || | 1004 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || |
801 | (boot_cpu_data.x86 == 15)) | 1005 | (boot_cpu_data.x86 == 15)) |
802 | break; | 1006 | break; |
803 | goto no_apic; | 1007 | goto no_apic; |
804 | case X86_VENDOR_INTEL: | 1008 | case X86_VENDOR_INTEL: |
@@ -812,23 +1016,23 @@ static int __init detect_init_APIC (void) | |||
812 | 1016 | ||
813 | if (!cpu_has_apic) { | 1017 | if (!cpu_has_apic) { |
814 | /* | 1018 | /* |
815 | * Over-ride BIOS and try to enable the local | 1019 | * Over-ride BIOS and try to enable the local APIC only if |
816 | * APIC only if "lapic" specified. | 1020 | * "lapic" specified. |
817 | */ | 1021 | */ |
818 | if (enable_local_apic <= 0) { | 1022 | if (enable_local_apic <= 0) { |
819 | printk("Local APIC disabled by BIOS -- " | 1023 | printk(KERN_INFO "Local APIC disabled by BIOS -- " |
820 | "you can enable it with \"lapic\"\n"); | 1024 | "you can enable it with \"lapic\"\n"); |
821 | return -1; | 1025 | return -1; |
822 | } | 1026 | } |
823 | /* | 1027 | /* |
824 | * Some BIOSes disable the local APIC in the | 1028 | * Some BIOSes disable the local APIC in the APIC_BASE |
825 | * APIC_BASE MSR. This can only be done in | 1029 | * MSR. This can only be done in software for Intel P6 or later |
826 | * software for Intel P6 or later and AMD K7 | 1030 | * and AMD K7 (Model > 1) or later. |
827 | * (Model > 1) or later. | ||
828 | */ | 1031 | */ |
829 | rdmsr(MSR_IA32_APICBASE, l, h); | 1032 | rdmsr(MSR_IA32_APICBASE, l, h); |
830 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1033 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
831 | printk("Local APIC disabled by BIOS -- reenabling.\n"); | 1034 | printk(KERN_INFO |
1035 | "Local APIC disabled by BIOS -- reenabling.\n"); | ||
832 | l &= ~MSR_IA32_APICBASE_BASE; | 1036 | l &= ~MSR_IA32_APICBASE_BASE; |
833 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1037 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; |
834 | wrmsr(MSR_IA32_APICBASE, l, h); | 1038 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -841,7 +1045,7 @@ static int __init detect_init_APIC (void) | |||
841 | */ | 1045 | */ |
842 | features = cpuid_edx(1); | 1046 | features = cpuid_edx(1); |
843 | if (!(features & (1 << X86_FEATURE_APIC))) { | 1047 | if (!(features & (1 << X86_FEATURE_APIC))) { |
844 | printk("Could not enable APIC!\n"); | 1048 | printk(KERN_WARNING "Could not enable APIC!\n"); |
845 | return -1; | 1049 | return -1; |
846 | } | 1050 | } |
847 | set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1051 | set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
@@ -855,17 +1059,20 @@ static int __init detect_init_APIC (void) | |||
855 | if (nmi_watchdog != NMI_NONE) | 1059 | if (nmi_watchdog != NMI_NONE) |
856 | nmi_watchdog = NMI_LOCAL_APIC; | 1060 | nmi_watchdog = NMI_LOCAL_APIC; |
857 | 1061 | ||
858 | printk("Found and enabled local APIC!\n"); | 1062 | printk(KERN_INFO "Found and enabled local APIC!\n"); |
859 | 1063 | ||
860 | apic_pm_activate(); | 1064 | apic_pm_activate(); |
861 | 1065 | ||
862 | return 0; | 1066 | return 0; |
863 | 1067 | ||
864 | no_apic: | 1068 | no_apic: |
865 | printk("No local APIC present or hardware disabled\n"); | 1069 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); |
866 | return -1; | 1070 | return -1; |
867 | } | 1071 | } |
868 | 1072 | ||
1073 | /** | ||
1074 | * init_apic_mappings - initialize APIC mappings | ||
1075 | */ | ||
869 | void __init init_apic_mappings(void) | 1076 | void __init init_apic_mappings(void) |
870 | { | 1077 | { |
871 | unsigned long apic_phys; | 1078 | unsigned long apic_phys; |
@@ -925,385 +1132,92 @@ fake_ioapic_page: | |||
925 | } | 1132 | } |
926 | 1133 | ||
927 | /* | 1134 | /* |
928 | * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts | 1135 | * This initializes the IO-APIC and APIC hardware if this is |
929 | * per second. We assume that the caller has already set up the local | 1136 | * a UP kernel. |
930 | * APIC. | ||
931 | * | ||
932 | * The APIC timer is not exactly sync with the external timer chip, it | ||
933 | * closely follows bus clocks. | ||
934 | */ | ||
935 | |||
936 | /* | ||
937 | * The timer chip is already set up at HZ interrupts per second here, | ||
938 | * but we do not accept timer interrupts yet. We only allow the BP | ||
939 | * to calibrate. | ||
940 | */ | ||
941 | static unsigned int __devinit get_8254_timer_count(void) | ||
942 | { | ||
943 | unsigned long flags; | ||
944 | |||
945 | unsigned int count; | ||
946 | |||
947 | spin_lock_irqsave(&i8253_lock, flags); | ||
948 | |||
949 | outb_p(0x00, PIT_MODE); | ||
950 | count = inb_p(PIT_CH0); | ||
951 | count |= inb_p(PIT_CH0) << 8; | ||
952 | |||
953 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
954 | |||
955 | return count; | ||
956 | } | ||
957 | |||
958 | /* next tick in 8254 can be caught by catching timer wraparound */ | ||
959 | static void __devinit wait_8254_wraparound(void) | ||
960 | { | ||
961 | unsigned int curr_count, prev_count; | ||
962 | |||
963 | curr_count = get_8254_timer_count(); | ||
964 | do { | ||
965 | prev_count = curr_count; | ||
966 | curr_count = get_8254_timer_count(); | ||
967 | |||
968 | /* workaround for broken Mercury/Neptune */ | ||
969 | if (prev_count >= curr_count + 0x100) | ||
970 | curr_count = get_8254_timer_count(); | ||
971 | |||
972 | } while (prev_count >= curr_count); | ||
973 | } | ||
974 | |||
975 | /* | ||
976 | * Default initialization for 8254 timers. If we use other timers like HPET, | ||
977 | * we override this later | ||
978 | */ | ||
979 | void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; | ||
980 | |||
981 | /* | ||
982 | * This function sets up the local APIC timer, with a timeout of | ||
983 | * 'clocks' APIC bus clock. During calibration we actually call | ||
984 | * this function twice on the boot CPU, once with a bogus timeout | ||
985 | * value, second time for real. The other (noncalibrating) CPUs | ||
986 | * call this function only once, with the real, calibrated value. | ||
987 | * | ||
988 | * We do reads before writes even if unnecessary, to get around the | ||
989 | * P5 APIC double write bug. | ||
990 | */ | 1137 | */ |
991 | 1138 | int __init APIC_init_uniprocessor (void) | |
992 | #define APIC_DIVISOR 16 | ||
993 | |||
994 | static void __setup_APIC_LVTT(unsigned int clocks) | ||
995 | { | 1139 | { |
996 | unsigned int lvtt_value, tmp_value, ver; | 1140 | if (enable_local_apic < 0) |
997 | int cpu = smp_processor_id(); | 1141 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
998 | |||
999 | ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
1000 | lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; | ||
1001 | if (!APIC_INTEGRATED(ver)) | ||
1002 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | ||
1003 | |||
1004 | if (cpu_isset(cpu, timer_bcast_ipi)) | ||
1005 | lvtt_value |= APIC_LVT_MASKED; | ||
1006 | 1142 | ||
1007 | apic_write_around(APIC_LVTT, lvtt_value); | 1143 | if (!smp_found_config && !cpu_has_apic) |
1144 | return -1; | ||
1008 | 1145 | ||
1009 | /* | 1146 | /* |
1010 | * Divide PICLK by 16 | 1147 | * Complain if the BIOS pretends there is one. |
1011 | */ | 1148 | */ |
1012 | tmp_value = apic_read(APIC_TDCR); | 1149 | if (!cpu_has_apic && |
1013 | apic_write_around(APIC_TDCR, (tmp_value | 1150 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1014 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 1151 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
1015 | | APIC_TDR_DIV_16); | 1152 | boot_cpu_physical_apicid); |
1016 | 1153 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | |
1017 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | 1154 | return -1; |
1018 | } | 1155 | } |
1019 | 1156 | ||
1020 | static void __devinit setup_APIC_timer(unsigned int clocks) | 1157 | verify_local_APIC(); |
1021 | { | ||
1022 | unsigned long flags; | ||
1023 | 1158 | ||
1024 | local_irq_save(flags); | 1159 | connect_bsp_APIC(); |
1025 | 1160 | ||
1026 | /* | 1161 | /* |
1027 | * Wait for IRQ0's slice: | 1162 | * Hack: In case of kdump, after a crash, kernel might be booting |
1163 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | ||
1164 | * might be zero if read from MP tables. Get it from LAPIC. | ||
1028 | */ | 1165 | */ |
1029 | wait_timer_tick(); | 1166 | #ifdef CONFIG_CRASH_DUMP |
1167 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | ||
1168 | #endif | ||
1169 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | ||
1030 | 1170 | ||
1031 | __setup_APIC_LVTT(clocks); | 1171 | setup_local_APIC(); |
1032 | 1172 | ||
1033 | local_irq_restore(flags); | 1173 | #ifdef CONFIG_X86_IO_APIC |
1174 | if (smp_found_config) | ||
1175 | if (!skip_ioapic_setup && nr_ioapics) | ||
1176 | setup_IO_APIC(); | ||
1177 | #endif | ||
1178 | setup_boot_clock(); | ||
1179 | |||
1180 | return 0; | ||
1034 | } | 1181 | } |
1035 | 1182 | ||
1036 | /* | 1183 | /* |
1037 | * In this function we calibrate APIC bus clocks to the external | 1184 | * APIC command line parameters |
1038 | * timer. Unfortunately we cannot use jiffies and the timer irq | ||
1039 | * to calibrate, since some later bootup code depends on getting | ||
1040 | * the first irq? Ugh. | ||
1041 | * | ||
1042 | * We want to do the calibration only once since we | ||
1043 | * want to have local timer irqs syncron. CPUs connected | ||
1044 | * by the same APIC bus have the very same bus frequency. | ||
1045 | * And we want to have irqs off anyways, no accidental | ||
1046 | * APIC irq that way. | ||
1047 | */ | 1185 | */ |
1048 | 1186 | static int __init parse_lapic(char *arg) | |
1049 | static int __init calibrate_APIC_clock(void) | ||
1050 | { | ||
1051 | unsigned long long t1 = 0, t2 = 0; | ||
1052 | long tt1, tt2; | ||
1053 | long result; | ||
1054 | int i; | ||
1055 | const int LOOPS = HZ/10; | ||
1056 | |||
1057 | apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n"); | ||
1058 | |||
1059 | /* | ||
1060 | * Put whatever arbitrary (but long enough) timeout | ||
1061 | * value into the APIC clock, we just want to get the | ||
1062 | * counter running for calibration. | ||
1063 | */ | ||
1064 | __setup_APIC_LVTT(1000000000); | ||
1065 | |||
1066 | /* | ||
1067 | * The timer chip counts down to zero. Let's wait | ||
1068 | * for a wraparound to start exact measurement: | ||
1069 | * (the current tick might have been already half done) | ||
1070 | */ | ||
1071 | |||
1072 | wait_timer_tick(); | ||
1073 | |||
1074 | /* | ||
1075 | * We wrapped around just now. Let's start: | ||
1076 | */ | ||
1077 | if (cpu_has_tsc) | ||
1078 | rdtscll(t1); | ||
1079 | tt1 = apic_read(APIC_TMCCT); | ||
1080 | |||
1081 | /* | ||
1082 | * Let's wait LOOPS wraprounds: | ||
1083 | */ | ||
1084 | for (i = 0; i < LOOPS; i++) | ||
1085 | wait_timer_tick(); | ||
1086 | |||
1087 | tt2 = apic_read(APIC_TMCCT); | ||
1088 | if (cpu_has_tsc) | ||
1089 | rdtscll(t2); | ||
1090 | |||
1091 | /* | ||
1092 | * The APIC bus clock counter is 32 bits only, it | ||
1093 | * might have overflown, but note that we use signed | ||
1094 | * longs, thus no extra care needed. | ||
1095 | * | ||
1096 | * underflown to be exact, as the timer counts down ;) | ||
1097 | */ | ||
1098 | |||
1099 | result = (tt1-tt2)*APIC_DIVISOR/LOOPS; | ||
1100 | |||
1101 | if (cpu_has_tsc) | ||
1102 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | ||
1103 | "%ld.%04ld MHz.\n", | ||
1104 | ((long)(t2-t1)/LOOPS)/(1000000/HZ), | ||
1105 | ((long)(t2-t1)/LOOPS)%(1000000/HZ)); | ||
1106 | |||
1107 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | ||
1108 | "%ld.%04ld MHz.\n", | ||
1109 | result/(1000000/HZ), | ||
1110 | result%(1000000/HZ)); | ||
1111 | |||
1112 | return result; | ||
1113 | } | ||
1114 | |||
1115 | static unsigned int calibration_result; | ||
1116 | |||
1117 | void __init setup_boot_APIC_clock(void) | ||
1118 | { | ||
1119 | unsigned long flags; | ||
1120 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); | ||
1121 | using_apic_timer = 1; | ||
1122 | |||
1123 | local_irq_save(flags); | ||
1124 | |||
1125 | calibration_result = calibrate_APIC_clock(); | ||
1126 | /* | ||
1127 | * Now set up the timer for real. | ||
1128 | */ | ||
1129 | setup_APIC_timer(calibration_result); | ||
1130 | |||
1131 | local_irq_restore(flags); | ||
1132 | } | ||
1133 | |||
1134 | void __devinit setup_secondary_APIC_clock(void) | ||
1135 | { | ||
1136 | setup_APIC_timer(calibration_result); | ||
1137 | } | ||
1138 | |||
1139 | void disable_APIC_timer(void) | ||
1140 | { | ||
1141 | if (using_apic_timer) { | ||
1142 | unsigned long v; | ||
1143 | |||
1144 | v = apic_read(APIC_LVTT); | ||
1145 | /* | ||
1146 | * When an illegal vector value (0-15) is written to an LVT | ||
1147 | * entry and delivery mode is Fixed, the APIC may signal an | ||
1148 | * illegal vector error, with out regard to whether the mask | ||
1149 | * bit is set or whether an interrupt is actually seen on input. | ||
1150 | * | ||
1151 | * Boot sequence might call this function when the LVTT has | ||
1152 | * '0' vector value. So make sure vector field is set to | ||
1153 | * valid value. | ||
1154 | */ | ||
1155 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
1156 | apic_write_around(APIC_LVTT, v); | ||
1157 | } | ||
1158 | } | ||
1159 | |||
1160 | void enable_APIC_timer(void) | ||
1161 | { | 1187 | { |
1162 | int cpu = smp_processor_id(); | 1188 | enable_local_apic = 1; |
1163 | 1189 | return 0; | |
1164 | if (using_apic_timer && | ||
1165 | !cpu_isset(cpu, timer_bcast_ipi)) { | ||
1166 | unsigned long v; | ||
1167 | |||
1168 | v = apic_read(APIC_LVTT); | ||
1169 | apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED); | ||
1170 | } | ||
1171 | } | 1190 | } |
1191 | early_param("lapic", parse_lapic); | ||
1172 | 1192 | ||
1173 | void switch_APIC_timer_to_ipi(void *cpumask) | 1193 | static int __init parse_nolapic(char *arg) |
1174 | { | 1194 | { |
1175 | cpumask_t mask = *(cpumask_t *)cpumask; | 1195 | enable_local_apic = -1; |
1176 | int cpu = smp_processor_id(); | 1196 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
1177 | 1197 | return 0; | |
1178 | if (cpu_isset(cpu, mask) && | ||
1179 | !cpu_isset(cpu, timer_bcast_ipi)) { | ||
1180 | disable_APIC_timer(); | ||
1181 | cpu_set(cpu, timer_bcast_ipi); | ||
1182 | } | ||
1183 | } | 1198 | } |
1184 | EXPORT_SYMBOL(switch_APIC_timer_to_ipi); | 1199 | early_param("nolapic", parse_nolapic); |
1185 | 1200 | ||
1186 | void switch_ipi_to_APIC_timer(void *cpumask) | 1201 | static int __init apic_set_verbosity(char *str) |
1187 | { | 1202 | { |
1188 | cpumask_t mask = *(cpumask_t *)cpumask; | 1203 | if (strcmp("debug", str) == 0) |
1189 | int cpu = smp_processor_id(); | 1204 | apic_verbosity = APIC_DEBUG; |
1190 | 1205 | else if (strcmp("verbose", str) == 0) | |
1191 | if (cpu_isset(cpu, mask) && | 1206 | apic_verbosity = APIC_VERBOSE; |
1192 | cpu_isset(cpu, timer_bcast_ipi)) { | 1207 | return 1; |
1193 | cpu_clear(cpu, timer_bcast_ipi); | ||
1194 | enable_APIC_timer(); | ||
1195 | } | ||
1196 | } | 1208 | } |
1197 | EXPORT_SYMBOL(switch_ipi_to_APIC_timer); | ||
1198 | 1209 | ||
1199 | #undef APIC_DIVISOR | 1210 | __setup("apic=", apic_set_verbosity); |
1200 | |||
1201 | /* | ||
1202 | * Local timer interrupt handler. It does both profiling and | ||
1203 | * process statistics/rescheduling. | ||
1204 | * | ||
1205 | * We do profiling in every local tick, statistics/rescheduling | ||
1206 | * happen only every 'profiling multiplier' ticks. The default | ||
1207 | * multiplier is 1 and it can be changed by writing the new multiplier | ||
1208 | * value into /proc/profile. | ||
1209 | */ | ||
1210 | |||
1211 | inline void smp_local_timer_interrupt(void) | ||
1212 | { | ||
1213 | profile_tick(CPU_PROFILING); | ||
1214 | #ifdef CONFIG_SMP | ||
1215 | update_process_times(user_mode_vm(get_irq_regs())); | ||
1216 | #endif | ||
1217 | 1211 | ||
1218 | /* | ||
1219 | * We take the 'long' return path, and there every subsystem | ||
1220 | * grabs the apropriate locks (kernel lock/ irq lock). | ||
1221 | * | ||
1222 | * we might want to decouple profiling from the 'long path', | ||
1223 | * and do the profiling totally in assembly. | ||
1224 | * | ||
1225 | * Currently this isn't too much of an issue (performance wise), | ||
1226 | * we can take more than 100K local irqs per second on a 100 MHz P5. | ||
1227 | */ | ||
1228 | } | ||
1229 | 1212 | ||
1230 | /* | 1213 | /* |
1231 | * Local APIC timer interrupt. This is the most natural way for doing | 1214 | * Local APIC interrupts |
1232 | * local interrupts, but local timer interrupts can be emulated by | ||
1233 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
1234 | * | ||
1235 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
1236 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
1237 | */ | 1215 | */ |
1238 | 1216 | ||
1239 | fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) | ||
1240 | { | ||
1241 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
1242 | int cpu = smp_processor_id(); | ||
1243 | |||
1244 | /* | ||
1245 | * the NMI deadlock-detector uses this. | ||
1246 | */ | ||
1247 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
1248 | |||
1249 | /* | ||
1250 | * NOTE! We'd better ACK the irq immediately, | ||
1251 | * because timer handling can be slow. | ||
1252 | */ | ||
1253 | ack_APIC_irq(); | ||
1254 | /* | ||
1255 | * update_process_times() expects us to have done irq_enter(). | ||
1256 | * Besides, if we don't timer interrupts ignore the global | ||
1257 | * interrupt lock, which is the WrongThing (tm) to do. | ||
1258 | */ | ||
1259 | exit_idle(); | ||
1260 | irq_enter(); | ||
1261 | smp_local_timer_interrupt(); | ||
1262 | irq_exit(); | ||
1263 | set_irq_regs(old_regs); | ||
1264 | } | ||
1265 | |||
1266 | #ifndef CONFIG_SMP | ||
1267 | static void up_apic_timer_interrupt_call(void) | ||
1268 | { | ||
1269 | int cpu = smp_processor_id(); | ||
1270 | |||
1271 | /* | ||
1272 | * the NMI deadlock-detector uses this. | ||
1273 | */ | ||
1274 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
1275 | |||
1276 | smp_local_timer_interrupt(); | ||
1277 | } | ||
1278 | #endif | ||
1279 | |||
1280 | void smp_send_timer_broadcast_ipi(void) | ||
1281 | { | ||
1282 | cpumask_t mask; | ||
1283 | |||
1284 | cpus_and(mask, cpu_online_map, timer_bcast_ipi); | ||
1285 | if (!cpus_empty(mask)) { | ||
1286 | #ifdef CONFIG_SMP | ||
1287 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | ||
1288 | #else | ||
1289 | /* | ||
1290 | * We can directly call the apic timer interrupt handler | ||
1291 | * in UP case. Minus all irq related functions | ||
1292 | */ | ||
1293 | up_apic_timer_interrupt_call(); | ||
1294 | #endif | ||
1295 | } | ||
1296 | } | ||
1297 | |||
1298 | int setup_profiling_timer(unsigned int multiplier) | ||
1299 | { | ||
1300 | return -EINVAL; | ||
1301 | } | ||
1302 | |||
1303 | /* | 1217 | /* |
1304 | * This interrupt should _never_ happen with our APIC/SMP architecture | 1218 | * This interrupt should _never_ happen with our APIC/SMP architecture |
1305 | */ | 1219 | */ |
1306 | fastcall void smp_spurious_interrupt(struct pt_regs *regs) | 1220 | void smp_spurious_interrupt(struct pt_regs *regs) |
1307 | { | 1221 | { |
1308 | unsigned long v; | 1222 | unsigned long v; |
1309 | 1223 | ||
@@ -1319,16 +1233,15 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs) | |||
1319 | ack_APIC_irq(); | 1233 | ack_APIC_irq(); |
1320 | 1234 | ||
1321 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1235 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
1322 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", | 1236 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " |
1323 | smp_processor_id()); | 1237 | "should never happen.\n", smp_processor_id()); |
1324 | irq_exit(); | 1238 | irq_exit(); |
1325 | } | 1239 | } |
1326 | 1240 | ||
1327 | /* | 1241 | /* |
1328 | * This interrupt should never happen with our APIC/SMP architecture | 1242 | * This interrupt should never happen with our APIC/SMP architecture |
1329 | */ | 1243 | */ |
1330 | 1244 | void smp_error_interrupt(struct pt_regs *regs) | |
1331 | fastcall void smp_error_interrupt(struct pt_regs *regs) | ||
1332 | { | 1245 | { |
1333 | unsigned long v, v1; | 1246 | unsigned long v, v1; |
1334 | 1247 | ||
@@ -1352,69 +1265,261 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) | |||
1352 | 7: Illegal register address | 1265 | 7: Illegal register address |
1353 | */ | 1266 | */ |
1354 | printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", | 1267 | printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", |
1355 | smp_processor_id(), v , v1); | 1268 | smp_processor_id(), v , v1); |
1356 | irq_exit(); | 1269 | irq_exit(); |
1357 | } | 1270 | } |
1358 | 1271 | ||
1359 | /* | 1272 | /* |
1360 | * This initializes the IO-APIC and APIC hardware if this is | 1273 | * Initialize APIC interrupts |
1361 | * a UP kernel. | ||
1362 | */ | 1274 | */ |
1363 | int __init APIC_init_uniprocessor (void) | 1275 | void __init apic_intr_init(void) |
1364 | { | 1276 | { |
1365 | if (enable_local_apic < 0) | 1277 | #ifdef CONFIG_SMP |
1366 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1278 | smp_intr_init(); |
1279 | #endif | ||
1280 | /* self generated IPI for local APIC timer */ | ||
1281 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
1367 | 1282 | ||
1368 | if (!smp_found_config && !cpu_has_apic) | 1283 | /* IPI vectors for APIC spurious and error interrupts */ |
1369 | return -1; | 1284 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
1285 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
1370 | 1286 | ||
1371 | /* | 1287 | /* thermal monitor LVT interrupt */ |
1372 | * Complain if the BIOS pretends there is one. | 1288 | #ifdef CONFIG_X86_MCE_P4THERMAL |
1373 | */ | 1289 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
1374 | if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1290 | #endif |
1375 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 1291 | } |
1376 | boot_cpu_physical_apicid); | 1292 | |
1377 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1293 | /** |
1378 | return -1; | 1294 | * connect_bsp_APIC - attach the APIC to the interrupt system |
1295 | */ | ||
1296 | void __init connect_bsp_APIC(void) | ||
1297 | { | ||
1298 | if (pic_mode) { | ||
1299 | /* | ||
1300 | * Do not trust the local APIC being empty at bootup. | ||
1301 | */ | ||
1302 | clear_local_APIC(); | ||
1303 | /* | ||
1304 | * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's | ||
1305 | * local APIC to INT and NMI lines. | ||
1306 | */ | ||
1307 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | ||
1308 | "enabling APIC mode.\n"); | ||
1309 | outb(0x70, 0x22); | ||
1310 | outb(0x01, 0x23); | ||
1379 | } | 1311 | } |
1312 | enable_apic_mode(); | ||
1313 | } | ||
1380 | 1314 | ||
1381 | verify_local_APIC(); | 1315 | /** |
1316 | * disconnect_bsp_APIC - detach the APIC from the interrupt system | ||
1317 | * @virt_wire_setup: indicates, whether virtual wire mode is selected | ||
1318 | * | ||
1319 | * Virtual wire mode is necessary to deliver legacy interrupts even when the | ||
1320 | * APIC is disabled. | ||
1321 | */ | ||
1322 | void disconnect_bsp_APIC(int virt_wire_setup) | ||
1323 | { | ||
1324 | if (pic_mode) { | ||
1325 | /* | ||
1326 | * Put the board back into PIC mode (has an effect only on | ||
1327 | * certain older boards). Note that APIC interrupts, including | ||
1328 | * IPIs, won't work beyond this point! The only exception are | ||
1329 | * INIT IPIs. | ||
1330 | */ | ||
1331 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | ||
1332 | "entering PIC mode.\n"); | ||
1333 | outb(0x70, 0x22); | ||
1334 | outb(0x00, 0x23); | ||
1335 | } else { | ||
1336 | /* Go back to Virtual Wire compatibility mode */ | ||
1337 | unsigned long value; | ||
1382 | 1338 | ||
1383 | connect_bsp_APIC(); | 1339 | /* For the spurious interrupt use vector F, and enable it */ |
1340 | value = apic_read(APIC_SPIV); | ||
1341 | value &= ~APIC_VECTOR_MASK; | ||
1342 | value |= APIC_SPIV_APIC_ENABLED; | ||
1343 | value |= 0xf; | ||
1344 | apic_write_around(APIC_SPIV, value); | ||
1384 | 1345 | ||
1385 | /* | 1346 | if (!virt_wire_setup) { |
1386 | * Hack: In case of kdump, after a crash, kernel might be booting | 1347 | /* |
1387 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | 1348 | * For LVT0 make it edge triggered, active high, |
1388 | * might be zero if read from MP tables. Get it from LAPIC. | 1349 | * external and enabled |
1389 | */ | 1350 | */ |
1390 | #ifdef CONFIG_CRASH_DUMP | 1351 | value = apic_read(APIC_LVT0); |
1391 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | 1352 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
1392 | #endif | 1353 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
1393 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | 1354 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); |
1355 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
1356 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
1357 | apic_write_around(APIC_LVT0, value); | ||
1358 | } else { | ||
1359 | /* Disable LVT0 */ | ||
1360 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | ||
1361 | } | ||
1394 | 1362 | ||
1395 | setup_local_APIC(); | 1363 | /* |
1364 | * For LVT1 make it edge triggered, active high, nmi and | ||
1365 | * enabled | ||
1366 | */ | ||
1367 | value = apic_read(APIC_LVT1); | ||
1368 | value &= ~( | ||
1369 | APIC_MODE_MASK | APIC_SEND_PENDING | | ||
1370 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
1371 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
1372 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
1373 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
1374 | apic_write_around(APIC_LVT1, value); | ||
1375 | } | ||
1376 | } | ||
1396 | 1377 | ||
1397 | #ifdef CONFIG_X86_IO_APIC | 1378 | /* |
1398 | if (smp_found_config) | 1379 | * Power management |
1399 | if (!skip_ioapic_setup && nr_ioapics) | 1380 | */ |
1400 | setup_IO_APIC(); | 1381 | #ifdef CONFIG_PM |
1382 | |||
1383 | static struct { | ||
1384 | int active; | ||
1385 | /* r/w apic fields */ | ||
1386 | unsigned int apic_id; | ||
1387 | unsigned int apic_taskpri; | ||
1388 | unsigned int apic_ldr; | ||
1389 | unsigned int apic_dfr; | ||
1390 | unsigned int apic_spiv; | ||
1391 | unsigned int apic_lvtt; | ||
1392 | unsigned int apic_lvtpc; | ||
1393 | unsigned int apic_lvt0; | ||
1394 | unsigned int apic_lvt1; | ||
1395 | unsigned int apic_lvterr; | ||
1396 | unsigned int apic_tmict; | ||
1397 | unsigned int apic_tdcr; | ||
1398 | unsigned int apic_thmr; | ||
1399 | } apic_pm_state; | ||
1400 | |||
1401 | static int lapic_suspend(struct sys_device *dev, pm_message_t state) | ||
1402 | { | ||
1403 | unsigned long flags; | ||
1404 | int maxlvt; | ||
1405 | |||
1406 | if (!apic_pm_state.active) | ||
1407 | return 0; | ||
1408 | |||
1409 | maxlvt = lapic_get_maxlvt(); | ||
1410 | |||
1411 | apic_pm_state.apic_id = apic_read(APIC_ID); | ||
1412 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | ||
1413 | apic_pm_state.apic_ldr = apic_read(APIC_LDR); | ||
1414 | apic_pm_state.apic_dfr = apic_read(APIC_DFR); | ||
1415 | apic_pm_state.apic_spiv = apic_read(APIC_SPIV); | ||
1416 | apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); | ||
1417 | if (maxlvt >= 4) | ||
1418 | apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); | ||
1419 | apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); | ||
1420 | apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); | ||
1421 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | ||
1422 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | ||
1423 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | ||
1424 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
1425 | if (maxlvt >= 5) | ||
1426 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | ||
1401 | #endif | 1427 | #endif |
1402 | setup_boot_clock(); | ||
1403 | 1428 | ||
1429 | local_irq_save(flags); | ||
1430 | disable_local_APIC(); | ||
1431 | local_irq_restore(flags); | ||
1404 | return 0; | 1432 | return 0; |
1405 | } | 1433 | } |
1406 | 1434 | ||
1407 | static int __init parse_lapic(char *arg) | 1435 | static int lapic_resume(struct sys_device *dev) |
1408 | { | 1436 | { |
1409 | lapic_enable(); | 1437 | unsigned int l, h; |
1438 | unsigned long flags; | ||
1439 | int maxlvt; | ||
1440 | |||
1441 | if (!apic_pm_state.active) | ||
1442 | return 0; | ||
1443 | |||
1444 | maxlvt = lapic_get_maxlvt(); | ||
1445 | |||
1446 | local_irq_save(flags); | ||
1447 | |||
1448 | /* | ||
1449 | * Make sure the APICBASE points to the right address | ||
1450 | * | ||
1451 | * FIXME! This will be wrong if we ever support suspend on | ||
1452 | * SMP! We'll need to do this as part of the CPU restore! | ||
1453 | */ | ||
1454 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
1455 | l &= ~MSR_IA32_APICBASE_BASE; | ||
1456 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
1457 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
1458 | |||
1459 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | ||
1460 | apic_write(APIC_ID, apic_pm_state.apic_id); | ||
1461 | apic_write(APIC_DFR, apic_pm_state.apic_dfr); | ||
1462 | apic_write(APIC_LDR, apic_pm_state.apic_ldr); | ||
1463 | apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); | ||
1464 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | ||
1465 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | ||
1466 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | ||
1467 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
1468 | if (maxlvt >= 5) | ||
1469 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | ||
1470 | #endif | ||
1471 | if (maxlvt >= 4) | ||
1472 | apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); | ||
1473 | apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); | ||
1474 | apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); | ||
1475 | apic_write(APIC_TMICT, apic_pm_state.apic_tmict); | ||
1476 | apic_write(APIC_ESR, 0); | ||
1477 | apic_read(APIC_ESR); | ||
1478 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | ||
1479 | apic_write(APIC_ESR, 0); | ||
1480 | apic_read(APIC_ESR); | ||
1481 | local_irq_restore(flags); | ||
1410 | return 0; | 1482 | return 0; |
1411 | } | 1483 | } |
1412 | early_param("lapic", parse_lapic); | ||
1413 | 1484 | ||
1414 | static int __init parse_nolapic(char *arg) | 1485 | /* |
1486 | * This device has no shutdown method - fully functioning local APICs | ||
1487 | * are needed on every CPU up until machine_halt/restart/poweroff. | ||
1488 | */ | ||
1489 | |||
1490 | static struct sysdev_class lapic_sysclass = { | ||
1491 | set_kset_name("lapic"), | ||
1492 | .resume = lapic_resume, | ||
1493 | .suspend = lapic_suspend, | ||
1494 | }; | ||
1495 | |||
1496 | static struct sys_device device_lapic = { | ||
1497 | .id = 0, | ||
1498 | .cls = &lapic_sysclass, | ||
1499 | }; | ||
1500 | |||
1501 | static void __devinit apic_pm_activate(void) | ||
1415 | { | 1502 | { |
1416 | lapic_disable(); | 1503 | apic_pm_state.active = 1; |
1417 | return 0; | ||
1418 | } | 1504 | } |
1419 | early_param("nolapic", parse_nolapic); | ||
1420 | 1505 | ||
1506 | static int __init init_lapic_sysfs(void) | ||
1507 | { | ||
1508 | int error; | ||
1509 | |||
1510 | if (!cpu_has_apic) | ||
1511 | return 0; | ||
1512 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | ||
1513 | |||
1514 | error = sysdev_class_register(&lapic_sysclass); | ||
1515 | if (!error) | ||
1516 | error = sysdev_register(&device_lapic); | ||
1517 | return error; | ||
1518 | } | ||
1519 | device_initcall(init_lapic_sysfs); | ||
1520 | |||
1521 | #else /* CONFIG_PM */ | ||
1522 | |||
1523 | static void apic_pm_activate(void) { } | ||
1524 | |||
1525 | #endif /* CONFIG_PM */ | ||
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index f9ba0af7ee1f..064bbf2861f4 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -236,7 +236,6 @@ | |||
236 | 236 | ||
237 | #include "io_ports.h" | 237 | #include "io_ports.h" |
238 | 238 | ||
239 | extern unsigned long get_cmos_time(void); | ||
240 | extern void machine_real_restart(unsigned char *, int); | 239 | extern void machine_real_restart(unsigned char *, int); |
241 | 240 | ||
242 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | 241 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) |
@@ -1176,28 +1175,6 @@ out: | |||
1176 | spin_unlock(&user_list_lock); | 1175 | spin_unlock(&user_list_lock); |
1177 | } | 1176 | } |
1178 | 1177 | ||
1179 | static void set_time(void) | ||
1180 | { | ||
1181 | struct timespec ts; | ||
1182 | if (got_clock_diff) { /* Must know time zone in order to set clock */ | ||
1183 | ts.tv_sec = get_cmos_time() + clock_cmos_diff; | ||
1184 | ts.tv_nsec = 0; | ||
1185 | do_settimeofday(&ts); | ||
1186 | } | ||
1187 | } | ||
1188 | |||
1189 | static void get_time_diff(void) | ||
1190 | { | ||
1191 | #ifndef CONFIG_APM_RTC_IS_GMT | ||
1192 | /* | ||
1193 | * Estimate time zone so that set_time can update the clock | ||
1194 | */ | ||
1195 | clock_cmos_diff = -get_cmos_time(); | ||
1196 | clock_cmos_diff += get_seconds(); | ||
1197 | got_clock_diff = 1; | ||
1198 | #endif | ||
1199 | } | ||
1200 | |||
1201 | static void reinit_timer(void) | 1178 | static void reinit_timer(void) |
1202 | { | 1179 | { |
1203 | #ifdef INIT_TIMER_AFTER_SUSPEND | 1180 | #ifdef INIT_TIMER_AFTER_SUSPEND |
@@ -1237,19 +1214,6 @@ static int suspend(int vetoable) | |||
1237 | local_irq_disable(); | 1214 | local_irq_disable(); |
1238 | device_power_down(PMSG_SUSPEND); | 1215 | device_power_down(PMSG_SUSPEND); |
1239 | 1216 | ||
1240 | /* serialize with the timer interrupt */ | ||
1241 | write_seqlock(&xtime_lock); | ||
1242 | |||
1243 | /* protect against access to timer chip registers */ | ||
1244 | spin_lock(&i8253_lock); | ||
1245 | |||
1246 | get_time_diff(); | ||
1247 | /* | ||
1248 | * Irq spinlock must be dropped around set_system_power_state. | ||
1249 | * We'll undo any timer changes due to interrupts below. | ||
1250 | */ | ||
1251 | spin_unlock(&i8253_lock); | ||
1252 | write_sequnlock(&xtime_lock); | ||
1253 | local_irq_enable(); | 1217 | local_irq_enable(); |
1254 | 1218 | ||
1255 | save_processor_state(); | 1219 | save_processor_state(); |
@@ -1258,7 +1222,6 @@ static int suspend(int vetoable) | |||
1258 | restore_processor_state(); | 1222 | restore_processor_state(); |
1259 | 1223 | ||
1260 | local_irq_disable(); | 1224 | local_irq_disable(); |
1261 | set_time(); | ||
1262 | reinit_timer(); | 1225 | reinit_timer(); |
1263 | 1226 | ||
1264 | if (err == APM_NO_ERROR) | 1227 | if (err == APM_NO_ERROR) |
@@ -1288,11 +1251,6 @@ static void standby(void) | |||
1288 | 1251 | ||
1289 | local_irq_disable(); | 1252 | local_irq_disable(); |
1290 | device_power_down(PMSG_SUSPEND); | 1253 | device_power_down(PMSG_SUSPEND); |
1291 | /* serialize with the timer interrupt */ | ||
1292 | write_seqlock(&xtime_lock); | ||
1293 | /* If needed, notify drivers here */ | ||
1294 | get_time_diff(); | ||
1295 | write_sequnlock(&xtime_lock); | ||
1296 | local_irq_enable(); | 1254 | local_irq_enable(); |
1297 | 1255 | ||
1298 | err = set_system_power_state(APM_STATE_STANDBY); | 1256 | err = set_system_power_state(APM_STATE_STANDBY); |
@@ -1386,7 +1344,6 @@ static void check_events(void) | |||
1386 | ignore_bounce = 1; | 1344 | ignore_bounce = 1; |
1387 | if ((event != APM_NORMAL_RESUME) | 1345 | if ((event != APM_NORMAL_RESUME) |
1388 | || (ignore_normal_resume == 0)) { | 1346 | || (ignore_normal_resume == 0)) { |
1389 | set_time(); | ||
1390 | device_resume(); | 1347 | device_resume(); |
1391 | pm_send_all(PM_RESUME, (void *)0); | 1348 | pm_send_all(PM_RESUME, (void *)0); |
1392 | queue_event(event, NULL); | 1349 | queue_event(event, NULL); |
@@ -1402,7 +1359,6 @@ static void check_events(void) | |||
1402 | break; | 1359 | break; |
1403 | 1360 | ||
1404 | case APM_UPDATE_TIME: | 1361 | case APM_UPDATE_TIME: |
1405 | set_time(); | ||
1406 | break; | 1362 | break; |
1407 | 1363 | ||
1408 | case APM_CRITICAL_SUSPEND: | 1364 | case APM_CRITICAL_SUSPEND: |
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 5299c5bf4454..6c52182ca323 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig | |||
@@ -217,6 +217,15 @@ config X86_LONGHAUL | |||
217 | 217 | ||
218 | If in doubt, say N. | 218 | If in doubt, say N. |
219 | 219 | ||
220 | config X86_E_POWERSAVER | ||
221 | tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" | ||
222 | select CPU_FREQ_TABLE | ||
223 | depends on EXPERIMENTAL | ||
224 | help | ||
225 | This adds the CPUFreq driver for VIA C7 processors. | ||
226 | |||
227 | If in doubt, say N. | ||
228 | |||
220 | comment "shared options" | 229 | comment "shared options" |
221 | 230 | ||
222 | config X86_ACPI_CPUFREQ_PROC_INTF | 231 | config X86_ACPI_CPUFREQ_PROC_INTF |
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index 8de3abe322a9..560f7760dae5 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile | |||
@@ -2,6 +2,7 @@ obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | |||
2 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 2 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
3 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 3 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o |
4 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | 4 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o |
5 | obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o | ||
5 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o | 6 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o |
6 | obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o | 7 | obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o |
7 | obj-$(CONFIG_X86_LONGRUN) += longrun.o | 8 | obj-$(CONFIG_X86_LONGRUN) += longrun.o |
diff --git a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c new file mode 100644 index 000000000000..f43d98e11cc7 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | * Based on documentation provided by Dave Jones. Thanks! | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/cpufreq.h> | ||
13 | #include <linux/ioport.h> | ||
14 | #include <linux/slab.h> | ||
15 | |||
16 | #include <asm/msr.h> | ||
17 | #include <asm/tsc.h> | ||
18 | #include <asm/timex.h> | ||
19 | #include <asm/io.h> | ||
20 | #include <asm/delay.h> | ||
21 | |||
22 | #define EPS_BRAND_C7M 0 | ||
23 | #define EPS_BRAND_C7 1 | ||
24 | #define EPS_BRAND_EDEN 2 | ||
25 | #define EPS_BRAND_C3 3 | ||
26 | |||
27 | struct eps_cpu_data { | ||
28 | u32 fsb; | ||
29 | struct cpufreq_frequency_table freq_table[]; | ||
30 | }; | ||
31 | |||
32 | static struct eps_cpu_data *eps_cpu[NR_CPUS]; | ||
33 | |||
34 | |||
35 | static unsigned int eps_get(unsigned int cpu) | ||
36 | { | ||
37 | struct eps_cpu_data *centaur; | ||
38 | u32 lo, hi; | ||
39 | |||
40 | if (cpu) | ||
41 | return 0; | ||
42 | centaur = eps_cpu[cpu]; | ||
43 | if (centaur == NULL) | ||
44 | return 0; | ||
45 | |||
46 | /* Return current frequency */ | ||
47 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
48 | return centaur->fsb * ((lo >> 8) & 0xff); | ||
49 | } | ||
50 | |||
51 | static int eps_set_state(struct eps_cpu_data *centaur, | ||
52 | unsigned int cpu, | ||
53 | u32 dest_state) | ||
54 | { | ||
55 | struct cpufreq_freqs freqs; | ||
56 | u32 lo, hi; | ||
57 | int err = 0; | ||
58 | int i; | ||
59 | |||
60 | freqs.old = eps_get(cpu); | ||
61 | freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); | ||
62 | freqs.cpu = cpu; | ||
63 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
64 | |||
65 | /* Wait while CPU is busy */ | ||
66 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
67 | i = 0; | ||
68 | while (lo & ((1 << 16) | (1 << 17))) { | ||
69 | udelay(16); | ||
70 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
71 | i++; | ||
72 | if (unlikely(i > 64)) { | ||
73 | err = -ENODEV; | ||
74 | goto postchange; | ||
75 | } | ||
76 | } | ||
77 | /* Set new multiplier and voltage */ | ||
78 | wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); | ||
79 | /* Wait until transition end */ | ||
80 | i = 0; | ||
81 | do { | ||
82 | udelay(16); | ||
83 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
84 | i++; | ||
85 | if (unlikely(i > 64)) { | ||
86 | err = -ENODEV; | ||
87 | goto postchange; | ||
88 | } | ||
89 | } while (lo & ((1 << 16) | (1 << 17))); | ||
90 | |||
91 | /* Return current frequency */ | ||
92 | postchange: | ||
93 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
94 | freqs.new = centaur->fsb * ((lo >> 8) & 0xff); | ||
95 | |||
96 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
97 | return err; | ||
98 | } | ||
99 | |||
100 | static int eps_target(struct cpufreq_policy *policy, | ||
101 | unsigned int target_freq, | ||
102 | unsigned int relation) | ||
103 | { | ||
104 | struct eps_cpu_data *centaur; | ||
105 | unsigned int newstate = 0; | ||
106 | unsigned int cpu = policy->cpu; | ||
107 | unsigned int dest_state; | ||
108 | int ret; | ||
109 | |||
110 | if (unlikely(eps_cpu[cpu] == NULL)) | ||
111 | return -ENODEV; | ||
112 | centaur = eps_cpu[cpu]; | ||
113 | |||
114 | if (unlikely(cpufreq_frequency_table_target(policy, | ||
115 | &eps_cpu[cpu]->freq_table[0], | ||
116 | target_freq, | ||
117 | relation, | ||
118 | &newstate))) { | ||
119 | return -EINVAL; | ||
120 | } | ||
121 | |||
122 | /* Make frequency transition */ | ||
123 | dest_state = centaur->freq_table[newstate].index & 0xffff; | ||
124 | ret = eps_set_state(centaur, cpu, dest_state); | ||
125 | if (ret) | ||
126 | printk(KERN_ERR "eps: Timeout!\n"); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | static int eps_verify(struct cpufreq_policy *policy) | ||
131 | { | ||
132 | return cpufreq_frequency_table_verify(policy, | ||
133 | &eps_cpu[policy->cpu]->freq_table[0]); | ||
134 | } | ||
135 | |||
136 | static int eps_cpu_init(struct cpufreq_policy *policy) | ||
137 | { | ||
138 | unsigned int i; | ||
139 | u32 lo, hi; | ||
140 | u64 val; | ||
141 | u8 current_multiplier, current_voltage; | ||
142 | u8 max_multiplier, max_voltage; | ||
143 | u8 min_multiplier, min_voltage; | ||
144 | u8 brand; | ||
145 | u32 fsb; | ||
146 | struct eps_cpu_data *centaur; | ||
147 | struct cpufreq_frequency_table *f_table; | ||
148 | int k, step, voltage; | ||
149 | int ret; | ||
150 | int states; | ||
151 | |||
152 | if (policy->cpu != 0) | ||
153 | return -ENODEV; | ||
154 | |||
155 | /* Check brand */ | ||
156 | printk("eps: Detected VIA "); | ||
157 | rdmsr(0x1153, lo, hi); | ||
158 | brand = (((lo >> 2) ^ lo) >> 18) & 3; | ||
159 | switch(brand) { | ||
160 | case EPS_BRAND_C7M: | ||
161 | printk("C7-M\n"); | ||
162 | break; | ||
163 | case EPS_BRAND_C7: | ||
164 | printk("C7\n"); | ||
165 | break; | ||
166 | case EPS_BRAND_EDEN: | ||
167 | printk("Eden\n"); | ||
168 | break; | ||
169 | case EPS_BRAND_C3: | ||
170 | printk("C3\n"); | ||
171 | return -ENODEV; | ||
172 | break; | ||
173 | } | ||
174 | /* Enable Enhanced PowerSaver */ | ||
175 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
176 | if (!(val & 1 << 16)) { | ||
177 | val |= 1 << 16; | ||
178 | wrmsrl(MSR_IA32_MISC_ENABLE, val); | ||
179 | /* Can be locked at 0 */ | ||
180 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
181 | if (!(val & 1 << 16)) { | ||
182 | printk("eps: Can't enable Enhanced PowerSaver\n"); | ||
183 | return -ENODEV; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | /* Print voltage and multiplier */ | ||
188 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
189 | current_voltage = lo & 0xff; | ||
190 | printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); | ||
191 | current_multiplier = (lo >> 8) & 0xff; | ||
192 | printk("eps: Current multiplier = %d\n", current_multiplier); | ||
193 | |||
194 | /* Print limits */ | ||
195 | max_voltage = hi & 0xff; | ||
196 | printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); | ||
197 | max_multiplier = (hi >> 8) & 0xff; | ||
198 | printk("eps: Highest multiplier = %d\n", max_multiplier); | ||
199 | min_voltage = (hi >> 16) & 0xff; | ||
200 | printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); | ||
201 | min_multiplier = (hi >> 24) & 0xff; | ||
202 | printk("eps: Lowest multiplier = %d\n", min_multiplier); | ||
203 | |||
204 | /* Sanity checks */ | ||
205 | if (current_multiplier == 0 || max_multiplier == 0 | ||
206 | || min_multiplier == 0) | ||
207 | return -EINVAL; | ||
208 | if (current_multiplier > max_multiplier | ||
209 | || max_multiplier <= min_multiplier) | ||
210 | return -EINVAL; | ||
211 | if (current_voltage > 0x1c || max_voltage > 0x1c) | ||
212 | return -EINVAL; | ||
213 | if (max_voltage < min_voltage) | ||
214 | return -EINVAL; | ||
215 | |||
216 | /* Calc FSB speed */ | ||
217 | fsb = cpu_khz / current_multiplier; | ||
218 | /* Calc number of p-states supported */ | ||
219 | if (brand == EPS_BRAND_C7M) | ||
220 | states = max_multiplier - min_multiplier + 1; | ||
221 | else | ||
222 | states = 2; | ||
223 | |||
224 | /* Allocate private data and frequency table for current cpu */ | ||
225 | centaur = kzalloc(sizeof(struct eps_cpu_data) | ||
226 | + (states + 1) * sizeof(struct cpufreq_frequency_table), | ||
227 | GFP_KERNEL); | ||
228 | if (!centaur) | ||
229 | return -ENOMEM; | ||
230 | eps_cpu[0] = centaur; | ||
231 | |||
232 | /* Copy basic values */ | ||
233 | centaur->fsb = fsb; | ||
234 | |||
235 | /* Fill frequency and MSR value table */ | ||
236 | f_table = ¢aur->freq_table[0]; | ||
237 | if (brand != EPS_BRAND_C7M) { | ||
238 | f_table[0].frequency = fsb * min_multiplier; | ||
239 | f_table[0].index = (min_multiplier << 8) | min_voltage; | ||
240 | f_table[1].frequency = fsb * max_multiplier; | ||
241 | f_table[1].index = (max_multiplier << 8) | max_voltage; | ||
242 | f_table[2].frequency = CPUFREQ_TABLE_END; | ||
243 | } else { | ||
244 | k = 0; | ||
245 | step = ((max_voltage - min_voltage) * 256) | ||
246 | / (max_multiplier - min_multiplier); | ||
247 | for (i = min_multiplier; i <= max_multiplier; i++) { | ||
248 | voltage = (k * step) / 256 + min_voltage; | ||
249 | f_table[k].frequency = fsb * i; | ||
250 | f_table[k].index = (i << 8) | voltage; | ||
251 | k++; | ||
252 | } | ||
253 | f_table[k].frequency = CPUFREQ_TABLE_END; | ||
254 | } | ||
255 | |||
256 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
257 | policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ | ||
258 | policy->cur = fsb * current_multiplier; | ||
259 | |||
260 | ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); | ||
261 | if (ret) { | ||
262 | kfree(centaur); | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | static int eps_cpu_exit(struct cpufreq_policy *policy) | ||
271 | { | ||
272 | unsigned int cpu = policy->cpu; | ||
273 | struct eps_cpu_data *centaur; | ||
274 | u32 lo, hi; | ||
275 | |||
276 | if (eps_cpu[cpu] == NULL) | ||
277 | return -ENODEV; | ||
278 | centaur = eps_cpu[cpu]; | ||
279 | |||
280 | /* Get max frequency */ | ||
281 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
282 | /* Set max frequency */ | ||
283 | eps_set_state(centaur, cpu, hi & 0xffff); | ||
284 | /* Bye */ | ||
285 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
286 | kfree(eps_cpu[cpu]); | ||
287 | eps_cpu[cpu] = NULL; | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static struct freq_attr* eps_attr[] = { | ||
292 | &cpufreq_freq_attr_scaling_available_freqs, | ||
293 | NULL, | ||
294 | }; | ||
295 | |||
296 | static struct cpufreq_driver eps_driver = { | ||
297 | .verify = eps_verify, | ||
298 | .target = eps_target, | ||
299 | .init = eps_cpu_init, | ||
300 | .exit = eps_cpu_exit, | ||
301 | .get = eps_get, | ||
302 | .name = "e_powersaver", | ||
303 | .owner = THIS_MODULE, | ||
304 | .attr = eps_attr, | ||
305 | }; | ||
306 | |||
307 | static int __init eps_init(void) | ||
308 | { | ||
309 | struct cpuinfo_x86 *c = cpu_data; | ||
310 | |||
311 | /* This driver will work only on Centaur C7 processors with | ||
312 | * Enhanced SpeedStep/PowerSaver registers */ | ||
313 | if (c->x86_vendor != X86_VENDOR_CENTAUR | ||
314 | || c->x86 != 6 || c->x86_model != 10) | ||
315 | return -ENODEV; | ||
316 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
317 | return -ENODEV; | ||
318 | |||
319 | if (cpufreq_register_driver(&eps_driver)) | ||
320 | return -EINVAL; | ||
321 | return 0; | ||
322 | } | ||
323 | |||
324 | static void __exit eps_exit(void) | ||
325 | { | ||
326 | cpufreq_unregister_driver(&eps_driver); | ||
327 | } | ||
328 | |||
329 | MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>"); | ||
330 | MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); | ||
331 | MODULE_LICENSE("GPL"); | ||
332 | |||
333 | module_init(eps_init); | ||
334 | module_exit(eps_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index a3db9332d652..b59878a0d9b3 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c | |||
@@ -8,12 +8,11 @@ | |||
8 | * VIA have currently 3 different versions of Longhaul. | 8 | * VIA have currently 3 different versions of Longhaul. |
9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. | 9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. |
10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. | 10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. |
11 | * Version 2 of longhaul is the same as v1, but adds voltage scaling. | 11 | * Version 2 of longhaul is backward compatible with v1, but adds |
12 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C) | 12 | * LONGHAUL MSR for purpose of both frequency and voltage scaling. |
13 | * voltage scaling support has currently been disabled in this driver | 13 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). |
14 | * until we have code that gets it right. | ||
15 | * Version 3 of longhaul got renamed to Powersaver and redesigned | 14 | * Version 3 of longhaul got renamed to Powersaver and redesigned |
16 | * to use the POWERSAVER MSR at 0x110a. | 15 | * to use only the POWERSAVER MSR at 0x110a. |
17 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. | 16 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. |
18 | * It's pretty much the same feature wise to longhaul v2, though | 17 | * It's pretty much the same feature wise to longhaul v2, though |
19 | * there is provision for scaling FSB too, but this doesn't work | 18 | * there is provision for scaling FSB too, but this doesn't work |
@@ -51,10 +50,12 @@ | |||
51 | #define CPU_EZRA 3 | 50 | #define CPU_EZRA 3 |
52 | #define CPU_EZRA_T 4 | 51 | #define CPU_EZRA_T 4 |
53 | #define CPU_NEHEMIAH 5 | 52 | #define CPU_NEHEMIAH 5 |
53 | #define CPU_NEHEMIAH_C 6 | ||
54 | 54 | ||
55 | /* Flags */ | 55 | /* Flags */ |
56 | #define USE_ACPI_C3 (1 << 1) | 56 | #define USE_ACPI_C3 (1 << 1) |
57 | #define USE_NORTHBRIDGE (1 << 2) | 57 | #define USE_NORTHBRIDGE (1 << 2) |
58 | #define USE_VT8235 (1 << 3) | ||
58 | 59 | ||
59 | static int cpu_model; | 60 | static int cpu_model; |
60 | static unsigned int numscales=16; | 61 | static unsigned int numscales=16; |
@@ -63,7 +64,8 @@ static unsigned int fsb; | |||
63 | static struct mV_pos *vrm_mV_table; | 64 | static struct mV_pos *vrm_mV_table; |
64 | static unsigned char *mV_vrm_table; | 65 | static unsigned char *mV_vrm_table; |
65 | struct f_msr { | 66 | struct f_msr { |
66 | unsigned char vrm; | 67 | u8 vrm; |
68 | u8 pos; | ||
67 | }; | 69 | }; |
68 | static struct f_msr f_msr_table[32]; | 70 | static struct f_msr f_msr_table[32]; |
69 | 71 | ||
@@ -73,10 +75,10 @@ static int can_scale_voltage; | |||
73 | static struct acpi_processor *pr = NULL; | 75 | static struct acpi_processor *pr = NULL; |
74 | static struct acpi_processor_cx *cx = NULL; | 76 | static struct acpi_processor_cx *cx = NULL; |
75 | static u8 longhaul_flags; | 77 | static u8 longhaul_flags; |
78 | static u8 longhaul_pos; | ||
76 | 79 | ||
77 | /* Module parameters */ | 80 | /* Module parameters */ |
78 | static int scale_voltage; | 81 | static int scale_voltage; |
79 | static int ignore_latency; | ||
80 | 82 | ||
81 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) | 83 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) |
82 | 84 | ||
@@ -164,26 +166,47 @@ static void do_longhaul1(unsigned int clock_ratio_index) | |||
164 | static void do_powersaver(int cx_address, unsigned int clock_ratio_index) | 166 | static void do_powersaver(int cx_address, unsigned int clock_ratio_index) |
165 | { | 167 | { |
166 | union msr_longhaul longhaul; | 168 | union msr_longhaul longhaul; |
169 | u8 dest_pos; | ||
167 | u32 t; | 170 | u32 t; |
168 | 171 | ||
172 | dest_pos = f_msr_table[clock_ratio_index].pos; | ||
173 | |||
169 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 174 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
175 | /* Setup new frequency */ | ||
170 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; | 176 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; |
171 | longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; | 177 | longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; |
172 | longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; | 178 | longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; |
173 | longhaul.bits.EnableSoftBusRatio = 1; | 179 | /* Setup new voltage */ |
174 | 180 | if (can_scale_voltage) | |
175 | if (can_scale_voltage) { | ||
176 | longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; | 181 | longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; |
182 | /* Sync to timer tick */ | ||
183 | safe_halt(); | ||
184 | /* Raise voltage if necessary */ | ||
185 | if (can_scale_voltage && longhaul_pos < dest_pos) { | ||
177 | longhaul.bits.EnableSoftVID = 1; | 186 | longhaul.bits.EnableSoftVID = 1; |
187 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
188 | /* Change voltage */ | ||
189 | if (!cx_address) { | ||
190 | ACPI_FLUSH_CPU_CACHE(); | ||
191 | halt(); | ||
192 | } else { | ||
193 | ACPI_FLUSH_CPU_CACHE(); | ||
194 | /* Invoke C3 */ | ||
195 | inb(cx_address); | ||
196 | /* Dummy op - must do something useless after P_LVL3 | ||
197 | * read */ | ||
198 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
199 | } | ||
200 | longhaul.bits.EnableSoftVID = 0; | ||
201 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
202 | longhaul_pos = dest_pos; | ||
178 | } | 203 | } |
179 | 204 | ||
180 | /* Sync to timer tick */ | ||
181 | safe_halt(); | ||
182 | /* Change frequency on next halt or sleep */ | 205 | /* Change frequency on next halt or sleep */ |
206 | longhaul.bits.EnableSoftBusRatio = 1; | ||
183 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 207 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
184 | if (!cx_address) { | 208 | if (!cx_address) { |
185 | ACPI_FLUSH_CPU_CACHE(); | 209 | ACPI_FLUSH_CPU_CACHE(); |
186 | /* Invoke C1 */ | ||
187 | halt(); | 210 | halt(); |
188 | } else { | 211 | } else { |
189 | ACPI_FLUSH_CPU_CACHE(); | 212 | ACPI_FLUSH_CPU_CACHE(); |
@@ -193,12 +216,29 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) | |||
193 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | 216 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); |
194 | } | 217 | } |
195 | /* Disable bus ratio bit */ | 218 | /* Disable bus ratio bit */ |
196 | local_irq_disable(); | ||
197 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; | ||
198 | longhaul.bits.EnableSoftBusRatio = 0; | 219 | longhaul.bits.EnableSoftBusRatio = 0; |
199 | longhaul.bits.EnableSoftBSEL = 0; | ||
200 | longhaul.bits.EnableSoftVID = 0; | ||
201 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 220 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
221 | |||
222 | /* Reduce voltage if necessary */ | ||
223 | if (can_scale_voltage && longhaul_pos > dest_pos) { | ||
224 | longhaul.bits.EnableSoftVID = 1; | ||
225 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
226 | /* Change voltage */ | ||
227 | if (!cx_address) { | ||
228 | ACPI_FLUSH_CPU_CACHE(); | ||
229 | halt(); | ||
230 | } else { | ||
231 | ACPI_FLUSH_CPU_CACHE(); | ||
232 | /* Invoke C3 */ | ||
233 | inb(cx_address); | ||
234 | /* Dummy op - must do something useless after P_LVL3 | ||
235 | * read */ | ||
236 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
237 | } | ||
238 | longhaul.bits.EnableSoftVID = 0; | ||
239 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
240 | longhaul_pos = dest_pos; | ||
241 | } | ||
202 | } | 242 | } |
203 | 243 | ||
204 | /** | 244 | /** |
@@ -257,26 +297,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index) | |||
257 | /* | 297 | /* |
258 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) | 298 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) |
259 | * Software controlled multipliers only. | 299 | * Software controlled multipliers only. |
260 | * | ||
261 | * *NB* Until we get voltage scaling working v1 & v2 are the same code. | ||
262 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C] | ||
263 | */ | 300 | */ |
264 | case TYPE_LONGHAUL_V1: | 301 | case TYPE_LONGHAUL_V1: |
265 | case TYPE_LONGHAUL_V2: | ||
266 | do_longhaul1(clock_ratio_index); | 302 | do_longhaul1(clock_ratio_index); |
267 | break; | 303 | break; |
268 | 304 | ||
269 | /* | 305 | /* |
306 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] | ||
307 | * | ||
270 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) | 308 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) |
271 | * We can scale voltage with this too, but that's currently | ||
272 | * disabled until we come up with a decent 'match freq to voltage' | ||
273 | * algorithm. | ||
274 | * When we add voltage scaling, we will also need to do the | ||
275 | * voltage/freq setting in order depending on the direction | ||
276 | * of scaling (like we do in powernow-k7.c) | ||
277 | * Nehemiah can do FSB scaling too, but this has never been proven | 309 | * Nehemiah can do FSB scaling too, but this has never been proven |
278 | * to work in practice. | 310 | * to work in practice. |
279 | */ | 311 | */ |
312 | case TYPE_LONGHAUL_V2: | ||
280 | case TYPE_POWERSAVER: | 313 | case TYPE_POWERSAVER: |
281 | if (longhaul_flags & USE_ACPI_C3) { | 314 | if (longhaul_flags & USE_ACPI_C3) { |
282 | /* Don't allow wakeup */ | 315 | /* Don't allow wakeup */ |
@@ -301,6 +334,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) | |||
301 | local_irq_restore(flags); | 334 | local_irq_restore(flags); |
302 | preempt_enable(); | 335 | preempt_enable(); |
303 | 336 | ||
337 | freqs.new = calc_speed(longhaul_get_cpu_mult()); | ||
304 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 338 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
305 | } | 339 | } |
306 | 340 | ||
@@ -315,31 +349,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index) | |||
315 | 349 | ||
316 | #define ROUNDING 0xf | 350 | #define ROUNDING 0xf |
317 | 351 | ||
318 | static int _guess(int guess, int mult) | ||
319 | { | ||
320 | int target; | ||
321 | |||
322 | target = ((mult/10)*guess); | ||
323 | if (mult%10 != 0) | ||
324 | target += (guess/2); | ||
325 | target += ROUNDING/2; | ||
326 | target &= ~ROUNDING; | ||
327 | return target; | ||
328 | } | ||
329 | |||
330 | |||
331 | static int guess_fsb(int mult) | 352 | static int guess_fsb(int mult) |
332 | { | 353 | { |
333 | int speed = (cpu_khz/1000); | 354 | int speed = cpu_khz / 1000; |
334 | int i; | 355 | int i; |
335 | int speeds[] = { 66, 100, 133, 200 }; | 356 | int speeds[] = { 666, 1000, 1333, 2000 }; |
336 | 357 | int f_max, f_min; | |
337 | speed += ROUNDING/2; | 358 | |
338 | speed &= ~ROUNDING; | 359 | for (i = 0; i < 4; i++) { |
339 | 360 | f_max = ((speeds[i] * mult) + 50) / 100; | |
340 | for (i=0; i<4; i++) { | 361 | f_max += (ROUNDING / 2); |
341 | if (_guess(speeds[i], mult) == speed) | 362 | f_min = f_max - ROUNDING; |
342 | return speeds[i]; | 363 | if ((speed <= f_max) && (speed >= f_min)) |
364 | return speeds[i] / 10; | ||
343 | } | 365 | } |
344 | return 0; | 366 | return 0; |
345 | } | 367 | } |
@@ -347,67 +369,40 @@ static int guess_fsb(int mult) | |||
347 | 369 | ||
348 | static int __init longhaul_get_ranges(void) | 370 | static int __init longhaul_get_ranges(void) |
349 | { | 371 | { |
350 | unsigned long invalue; | ||
351 | unsigned int ezra_t_multipliers[32]= { | ||
352 | 90, 30, 40, 100, 55, 35, 45, 95, | ||
353 | 50, 70, 80, 60, 120, 75, 85, 65, | ||
354 | -1, 110, 120, -1, 135, 115, 125, 105, | ||
355 | 130, 150, 160, 140, -1, 155, -1, 145 }; | ||
356 | unsigned int j, k = 0; | 372 | unsigned int j, k = 0; |
357 | union msr_longhaul longhaul; | 373 | int mult; |
358 | int mult = 0; | ||
359 | 374 | ||
360 | switch (longhaul_version) { | 375 | /* Get current frequency */ |
361 | case TYPE_LONGHAUL_V1: | 376 | mult = longhaul_get_cpu_mult(); |
362 | case TYPE_LONGHAUL_V2: | 377 | if (mult == -1) { |
363 | /* Ugh, Longhaul v1 didn't have the min/max MSRs. | 378 | printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); |
364 | Assume min=3.0x & max = whatever we booted at. */ | 379 | return -EINVAL; |
380 | } | ||
381 | fsb = guess_fsb(mult); | ||
382 | if (fsb == 0) { | ||
383 | printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
384 | return -EINVAL; | ||
385 | } | ||
386 | /* Get max multiplier - as we always did. | ||
387 | * Longhaul MSR is usefull only when voltage scaling is enabled. | ||
388 | * C3 is booting at max anyway. */ | ||
389 | maxmult = mult; | ||
390 | /* Get min multiplier */ | ||
391 | switch (cpu_model) { | ||
392 | case CPU_NEHEMIAH: | ||
393 | minmult = 50; | ||
394 | break; | ||
395 | case CPU_NEHEMIAH_C: | ||
396 | minmult = 40; | ||
397 | break; | ||
398 | default: | ||
365 | minmult = 30; | 399 | minmult = 30; |
366 | maxmult = mult = longhaul_get_cpu_mult(); | ||
367 | break; | 400 | break; |
368 | |||
369 | case TYPE_POWERSAVER: | ||
370 | /* Ezra-T */ | ||
371 | if (cpu_model==CPU_EZRA_T) { | ||
372 | minmult = 30; | ||
373 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
374 | invalue = longhaul.bits.MaxMHzBR; | ||
375 | if (longhaul.bits.MaxMHzBR4) | ||
376 | invalue += 16; | ||
377 | maxmult = mult = ezra_t_multipliers[invalue]; | ||
378 | break; | ||
379 | } | ||
380 | |||
381 | /* Nehemiah */ | ||
382 | if (cpu_model==CPU_NEHEMIAH) { | ||
383 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
384 | |||
385 | /* | ||
386 | * TODO: This code works, but raises a lot of questions. | ||
387 | * - Some Nehemiah's seem to have broken Min/MaxMHzBR's. | ||
388 | * We get around this by using a hardcoded multiplier of 4.0x | ||
389 | * for the minimimum speed, and the speed we booted up at for the max. | ||
390 | * This is done in longhaul_get_cpu_mult() by reading the EBLCR register. | ||
391 | * - According to some VIA documentation EBLCR is only | ||
392 | * in pre-Nehemiah C3s. How this still works is a mystery. | ||
393 | * We're possibly using something undocumented and unsupported, | ||
394 | * But it works, so we don't grumble. | ||
395 | */ | ||
396 | minmult=40; | ||
397 | maxmult = mult = longhaul_get_cpu_mult(); | ||
398 | break; | ||
399 | } | ||
400 | } | 401 | } |
401 | fsb = guess_fsb(mult); | ||
402 | 402 | ||
403 | dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", | 403 | dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", |
404 | minmult/10, minmult%10, maxmult/10, maxmult%10); | 404 | minmult/10, minmult%10, maxmult/10, maxmult%10); |
405 | 405 | ||
406 | if (fsb == 0) { | ||
407 | printk (KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
408 | return -EINVAL; | ||
409 | } | ||
410 | |||
411 | highest_speed = calc_speed(maxmult); | 406 | highest_speed = calc_speed(maxmult); |
412 | lowest_speed = calc_speed(minmult); | 407 | lowest_speed = calc_speed(minmult); |
413 | dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, | 408 | dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, |
@@ -455,6 +450,7 @@ static void __init longhaul_setup_voltagescaling(void) | |||
455 | union msr_longhaul longhaul; | 450 | union msr_longhaul longhaul; |
456 | struct mV_pos minvid, maxvid; | 451 | struct mV_pos minvid, maxvid; |
457 | unsigned int j, speed, pos, kHz_step, numvscales; | 452 | unsigned int j, speed, pos, kHz_step, numvscales; |
453 | int min_vid_speed; | ||
458 | 454 | ||
459 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 455 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
460 | if (!(longhaul.bits.RevisionID & 1)) { | 456 | if (!(longhaul.bits.RevisionID & 1)) { |
@@ -468,14 +464,14 @@ static void __init longhaul_setup_voltagescaling(void) | |||
468 | mV_vrm_table = &mV_vrm85[0]; | 464 | mV_vrm_table = &mV_vrm85[0]; |
469 | } else { | 465 | } else { |
470 | printk (KERN_INFO PFX "Mobile VRM\n"); | 466 | printk (KERN_INFO PFX "Mobile VRM\n"); |
467 | if (cpu_model < CPU_NEHEMIAH) | ||
468 | return; | ||
471 | vrm_mV_table = &mobilevrm_mV[0]; | 469 | vrm_mV_table = &mobilevrm_mV[0]; |
472 | mV_vrm_table = &mV_mobilevrm[0]; | 470 | mV_vrm_table = &mV_mobilevrm[0]; |
473 | } | 471 | } |
474 | 472 | ||
475 | minvid = vrm_mV_table[longhaul.bits.MinimumVID]; | 473 | minvid = vrm_mV_table[longhaul.bits.MinimumVID]; |
476 | maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; | 474 | maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; |
477 | numvscales = maxvid.pos - minvid.pos + 1; | ||
478 | kHz_step = (highest_speed - lowest_speed) / numvscales; | ||
479 | 475 | ||
480 | if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { | 476 | if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { |
481 | printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " | 477 | printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " |
@@ -491,20 +487,59 @@ static void __init longhaul_setup_voltagescaling(void) | |||
491 | return; | 487 | return; |
492 | } | 488 | } |
493 | 489 | ||
494 | printk(KERN_INFO PFX "Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", | 490 | /* How many voltage steps */ |
491 | numvscales = maxvid.pos - minvid.pos + 1; | ||
492 | printk(KERN_INFO PFX | ||
493 | "Max VID=%d.%03d " | ||
494 | "Min VID=%d.%03d, " | ||
495 | "%d possible voltage scales\n", | ||
495 | maxvid.mV/1000, maxvid.mV%1000, | 496 | maxvid.mV/1000, maxvid.mV%1000, |
496 | minvid.mV/1000, minvid.mV%1000, | 497 | minvid.mV/1000, minvid.mV%1000, |
497 | numvscales); | 498 | numvscales); |
498 | 499 | ||
500 | /* Calculate max frequency at min voltage */ | ||
501 | j = longhaul.bits.MinMHzBR; | ||
502 | if (longhaul.bits.MinMHzBR4) | ||
503 | j += 16; | ||
504 | min_vid_speed = eblcr_table[j]; | ||
505 | if (min_vid_speed == -1) | ||
506 | return; | ||
507 | switch (longhaul.bits.MinMHzFSB) { | ||
508 | case 0: | ||
509 | min_vid_speed *= 13333; | ||
510 | break; | ||
511 | case 1: | ||
512 | min_vid_speed *= 10000; | ||
513 | break; | ||
514 | case 3: | ||
515 | min_vid_speed *= 6666; | ||
516 | break; | ||
517 | default: | ||
518 | return; | ||
519 | break; | ||
520 | } | ||
521 | if (min_vid_speed >= highest_speed) | ||
522 | return; | ||
523 | /* Calculate kHz for one voltage step */ | ||
524 | kHz_step = (highest_speed - min_vid_speed) / numvscales; | ||
525 | |||
526 | |||
499 | j = 0; | 527 | j = 0; |
500 | while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { | 528 | while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { |
501 | speed = longhaul_table[j].frequency; | 529 | speed = longhaul_table[j].frequency; |
502 | pos = (speed - lowest_speed) / kHz_step + minvid.pos; | 530 | if (speed > min_vid_speed) |
531 | pos = (speed - min_vid_speed) / kHz_step + minvid.pos; | ||
532 | else | ||
533 | pos = minvid.pos; | ||
503 | f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; | 534 | f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; |
535 | f_msr_table[longhaul_table[j].index].pos = pos; | ||
504 | j++; | 536 | j++; |
505 | } | 537 | } |
506 | 538 | ||
539 | longhaul_pos = maxvid.pos; | ||
507 | can_scale_voltage = 1; | 540 | can_scale_voltage = 1; |
541 | printk(KERN_INFO PFX "Voltage scaling enabled. " | ||
542 | "Use of \"conservative\" governor is highly recommended.\n"); | ||
508 | } | 543 | } |
509 | 544 | ||
510 | 545 | ||
@@ -573,20 +608,51 @@ static int enable_arbiter_disable(void) | |||
573 | if (dev != NULL) { | 608 | if (dev != NULL) { |
574 | /* Enable access to port 0x22 */ | 609 | /* Enable access to port 0x22 */ |
575 | pci_read_config_byte(dev, reg, &pci_cmd); | 610 | pci_read_config_byte(dev, reg, &pci_cmd); |
576 | if ( !(pci_cmd & 1<<7) ) { | 611 | if (!(pci_cmd & 1<<7)) { |
577 | pci_cmd |= 1<<7; | 612 | pci_cmd |= 1<<7; |
578 | pci_write_config_byte(dev, reg, pci_cmd); | 613 | pci_write_config_byte(dev, reg, pci_cmd); |
614 | pci_read_config_byte(dev, reg, &pci_cmd); | ||
615 | if (!(pci_cmd & 1<<7)) { | ||
616 | printk(KERN_ERR PFX | ||
617 | "Can't enable access to port 0x22.\n"); | ||
618 | return 0; | ||
619 | } | ||
579 | } | 620 | } |
580 | return 1; | 621 | return 1; |
581 | } | 622 | } |
582 | return 0; | 623 | return 0; |
583 | } | 624 | } |
584 | 625 | ||
626 | static int longhaul_setup_vt8235(void) | ||
627 | { | ||
628 | struct pci_dev *dev; | ||
629 | u8 pci_cmd; | ||
630 | |||
631 | /* Find VT8235 southbridge */ | ||
632 | dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); | ||
633 | if (dev != NULL) { | ||
634 | /* Set transition time to max */ | ||
635 | pci_read_config_byte(dev, 0xec, &pci_cmd); | ||
636 | pci_cmd &= ~(1 << 2); | ||
637 | pci_write_config_byte(dev, 0xec, pci_cmd); | ||
638 | pci_read_config_byte(dev, 0xe4, &pci_cmd); | ||
639 | pci_cmd &= ~(1 << 7); | ||
640 | pci_write_config_byte(dev, 0xe4, pci_cmd); | ||
641 | pci_read_config_byte(dev, 0xe5, &pci_cmd); | ||
642 | pci_cmd |= 1 << 7; | ||
643 | pci_write_config_byte(dev, 0xe5, pci_cmd); | ||
644 | return 1; | ||
645 | } | ||
646 | return 0; | ||
647 | } | ||
648 | |||
585 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | 649 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) |
586 | { | 650 | { |
587 | struct cpuinfo_x86 *c = cpu_data; | 651 | struct cpuinfo_x86 *c = cpu_data; |
588 | char *cpuname=NULL; | 652 | char *cpuname=NULL; |
589 | int ret; | 653 | int ret; |
654 | u32 lo, hi; | ||
655 | int vt8235_present; | ||
590 | 656 | ||
591 | /* Check what we have on this motherboard */ | 657 | /* Check what we have on this motherboard */ |
592 | switch (c->x86_model) { | 658 | switch (c->x86_model) { |
@@ -599,16 +665,20 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
599 | break; | 665 | break; |
600 | 666 | ||
601 | case 7: | 667 | case 7: |
602 | longhaul_version = TYPE_LONGHAUL_V1; | ||
603 | switch (c->x86_mask) { | 668 | switch (c->x86_mask) { |
604 | case 0: | 669 | case 0: |
670 | longhaul_version = TYPE_LONGHAUL_V1; | ||
605 | cpu_model = CPU_SAMUEL2; | 671 | cpu_model = CPU_SAMUEL2; |
606 | cpuname = "C3 'Samuel 2' [C5B]"; | 672 | cpuname = "C3 'Samuel 2' [C5B]"; |
607 | /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */ | 673 | /* Note, this is not a typo, early Samuel2's had |
608 | memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); | 674 | * Samuel1 ratios. */ |
609 | memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr)); | 675 | memcpy(clock_ratio, samuel1_clock_ratio, |
676 | sizeof(samuel1_clock_ratio)); | ||
677 | memcpy(eblcr_table, samuel2_eblcr, | ||
678 | sizeof(samuel2_eblcr)); | ||
610 | break; | 679 | break; |
611 | case 1 ... 15: | 680 | case 1 ... 15: |
681 | longhaul_version = TYPE_LONGHAUL_V2; | ||
612 | if (c->x86_mask < 8) { | 682 | if (c->x86_mask < 8) { |
613 | cpu_model = CPU_SAMUEL2; | 683 | cpu_model = CPU_SAMUEL2; |
614 | cpuname = "C3 'Samuel 2' [C5B]"; | 684 | cpuname = "C3 'Samuel 2' [C5B]"; |
@@ -616,8 +686,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
616 | cpu_model = CPU_EZRA; | 686 | cpu_model = CPU_EZRA; |
617 | cpuname = "C3 'Ezra' [C5C]"; | 687 | cpuname = "C3 'Ezra' [C5C]"; |
618 | } | 688 | } |
619 | memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio)); | 689 | memcpy(clock_ratio, ezra_clock_ratio, |
620 | memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr)); | 690 | sizeof(ezra_clock_ratio)); |
691 | memcpy(eblcr_table, ezra_eblcr, | ||
692 | sizeof(ezra_eblcr)); | ||
621 | break; | 693 | break; |
622 | } | 694 | } |
623 | break; | 695 | break; |
@@ -632,24 +704,24 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
632 | break; | 704 | break; |
633 | 705 | ||
634 | case 9: | 706 | case 9: |
635 | cpu_model = CPU_NEHEMIAH; | ||
636 | longhaul_version = TYPE_POWERSAVER; | 707 | longhaul_version = TYPE_POWERSAVER; |
637 | numscales=32; | 708 | numscales = 32; |
709 | memcpy(clock_ratio, | ||
710 | nehemiah_clock_ratio, | ||
711 | sizeof(nehemiah_clock_ratio)); | ||
712 | memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); | ||
638 | switch (c->x86_mask) { | 713 | switch (c->x86_mask) { |
639 | case 0 ... 1: | 714 | case 0 ... 1: |
640 | cpuname = "C3 'Nehemiah A' [C5N]"; | 715 | cpu_model = CPU_NEHEMIAH; |
641 | memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio)); | 716 | cpuname = "C3 'Nehemiah A' [C5XLOE]"; |
642 | memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr)); | ||
643 | break; | 717 | break; |
644 | case 2 ... 4: | 718 | case 2 ... 4: |
645 | cpuname = "C3 'Nehemiah B' [C5N]"; | 719 | cpu_model = CPU_NEHEMIAH; |
646 | memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio)); | 720 | cpuname = "C3 'Nehemiah B' [C5XLOH]"; |
647 | memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr)); | ||
648 | break; | 721 | break; |
649 | case 5 ... 15: | 722 | case 5 ... 15: |
650 | cpuname = "C3 'Nehemiah C' [C5N]"; | 723 | cpu_model = CPU_NEHEMIAH_C; |
651 | memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio)); | 724 | cpuname = "C3 'Nehemiah C' [C5P]"; |
652 | memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr)); | ||
653 | break; | 725 | break; |
654 | } | 726 | } |
655 | break; | 727 | break; |
@@ -658,6 +730,13 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
658 | cpuname = "Unknown"; | 730 | cpuname = "Unknown"; |
659 | break; | 731 | break; |
660 | } | 732 | } |
733 | /* Check Longhaul ver. 2 */ | ||
734 | if (longhaul_version == TYPE_LONGHAUL_V2) { | ||
735 | rdmsr(MSR_VIA_LONGHAUL, lo, hi); | ||
736 | if (lo == 0 && hi == 0) | ||
737 | /* Looks like MSR isn't present */ | ||
738 | longhaul_version = TYPE_LONGHAUL_V1; | ||
739 | } | ||
661 | 740 | ||
662 | printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); | 741 | printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); |
663 | switch (longhaul_version) { | 742 | switch (longhaul_version) { |
@@ -670,15 +749,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
670 | break; | 749 | break; |
671 | }; | 750 | }; |
672 | 751 | ||
752 | /* Doesn't hurt */ | ||
753 | vt8235_present = longhaul_setup_vt8235(); | ||
754 | |||
673 | /* Find ACPI data for processor */ | 755 | /* Find ACPI data for processor */ |
674 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, | 756 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, |
675 | &longhaul_walk_callback, NULL, (void *)&pr); | 757 | ACPI_UINT32_MAX, &longhaul_walk_callback, |
758 | NULL, (void *)&pr); | ||
676 | 759 | ||
677 | /* Check ACPI support for C3 state */ | 760 | /* Check ACPI support for C3 state */ |
678 | if ((pr != NULL) && (longhaul_version == TYPE_POWERSAVER)) { | 761 | if (pr != NULL && longhaul_version != TYPE_LONGHAUL_V1) { |
679 | cx = &pr->power.states[ACPI_STATE_C3]; | 762 | cx = &pr->power.states[ACPI_STATE_C3]; |
680 | if (cx->address > 0 && | 763 | if (cx->address > 0 && cx->latency <= 1000) { |
681 | (cx->latency <= 1000 || ignore_latency != 0) ) { | ||
682 | longhaul_flags |= USE_ACPI_C3; | 764 | longhaul_flags |= USE_ACPI_C3; |
683 | goto print_support_type; | 765 | goto print_support_type; |
684 | } | 766 | } |
@@ -688,8 +770,11 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
688 | longhaul_flags |= USE_NORTHBRIDGE; | 770 | longhaul_flags |= USE_NORTHBRIDGE; |
689 | goto print_support_type; | 771 | goto print_support_type; |
690 | } | 772 | } |
691 | 773 | /* Use VT8235 southbridge if present */ | |
692 | /* No ACPI C3 or we can't use it */ | 774 | if (longhaul_version == TYPE_POWERSAVER && vt8235_present) { |
775 | longhaul_flags |= USE_VT8235; | ||
776 | goto print_support_type; | ||
777 | } | ||
693 | /* Check ACPI support for bus master arbiter disable */ | 778 | /* Check ACPI support for bus master arbiter disable */ |
694 | if ((pr == NULL) || !(pr->flags.bm_control)) { | 779 | if ((pr == NULL) || !(pr->flags.bm_control)) { |
695 | printk(KERN_ERR PFX | 780 | printk(KERN_ERR PFX |
@@ -698,18 +783,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
698 | } | 783 | } |
699 | 784 | ||
700 | print_support_type: | 785 | print_support_type: |
701 | if (!(longhaul_flags & USE_NORTHBRIDGE)) { | 786 | if (longhaul_flags & USE_NORTHBRIDGE) |
702 | printk (KERN_INFO PFX "Using ACPI support.\n"); | ||
703 | } else { | ||
704 | printk (KERN_INFO PFX "Using northbridge support.\n"); | 787 | printk (KERN_INFO PFX "Using northbridge support.\n"); |
705 | } | 788 | else if (longhaul_flags & USE_VT8235) |
789 | printk (KERN_INFO PFX "Using VT8235 support.\n"); | ||
790 | else | ||
791 | printk (KERN_INFO PFX "Using ACPI support.\n"); | ||
706 | 792 | ||
707 | ret = longhaul_get_ranges(); | 793 | ret = longhaul_get_ranges(); |
708 | if (ret != 0) | 794 | if (ret != 0) |
709 | return ret; | 795 | return ret; |
710 | 796 | ||
711 | if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && | 797 | if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) |
712 | (scale_voltage != 0)) | ||
713 | longhaul_setup_voltagescaling(); | 798 | longhaul_setup_voltagescaling(); |
714 | 799 | ||
715 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | 800 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; |
@@ -797,8 +882,6 @@ static void __exit longhaul_exit(void) | |||
797 | 882 | ||
798 | module_param (scale_voltage, int, 0644); | 883 | module_param (scale_voltage, int, 0644); |
799 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | 884 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); |
800 | module_param(ignore_latency, int, 0644); | ||
801 | MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test"); | ||
802 | 885 | ||
803 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); | 886 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); |
804 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); | 887 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); |
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index bc4682aad69b..bb0a04b1d1ab 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h | |||
@@ -235,84 +235,14 @@ static int __initdata ezrat_eblcr[32] = { | |||
235 | /* | 235 | /* |
236 | * VIA C3 Nehemiah */ | 236 | * VIA C3 Nehemiah */ |
237 | 237 | ||
238 | static int __initdata nehemiah_a_clock_ratio[32] = { | 238 | static int __initdata nehemiah_clock_ratio[32] = { |
239 | 100, /* 0000 -> 10.0x */ | 239 | 100, /* 0000 -> 10.0x */ |
240 | 160, /* 0001 -> 16.0x */ | 240 | 160, /* 0001 -> 16.0x */ |
241 | -1, /* 0010 -> RESERVED */ | 241 | 40, /* 0010 -> 4.0x */ |
242 | 90, /* 0011 -> 9.0x */ | ||
243 | 95, /* 0100 -> 9.5x */ | ||
244 | -1, /* 0101 -> RESERVED */ | ||
245 | -1, /* 0110 -> RESERVED */ | ||
246 | 55, /* 0111 -> 5.5x */ | ||
247 | 60, /* 1000 -> 6.0x */ | ||
248 | 70, /* 1001 -> 7.0x */ | ||
249 | 80, /* 1010 -> 8.0x */ | ||
250 | 50, /* 1011 -> 5.0x */ | ||
251 | 65, /* 1100 -> 6.5x */ | ||
252 | 75, /* 1101 -> 7.5x */ | ||
253 | 85, /* 1110 -> 8.5x */ | ||
254 | 120, /* 1111 -> 12.0x */ | ||
255 | 100, /* 0000 -> 10.0x */ | ||
256 | -1, /* 0001 -> RESERVED */ | ||
257 | 120, /* 0010 -> 12.0x */ | ||
258 | 90, /* 0011 -> 9.0x */ | ||
259 | 105, /* 0100 -> 10.5x */ | ||
260 | 115, /* 0101 -> 11.5x */ | ||
261 | 125, /* 0110 -> 12.5x */ | ||
262 | 135, /* 0111 -> 13.5x */ | ||
263 | 140, /* 1000 -> 14.0x */ | ||
264 | 150, /* 1001 -> 15.0x */ | ||
265 | 160, /* 1010 -> 16.0x */ | ||
266 | 130, /* 1011 -> 13.0x */ | ||
267 | 145, /* 1100 -> 14.5x */ | ||
268 | 155, /* 1101 -> 15.5x */ | ||
269 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
270 | 120, /* 1111 -> 12.0x */ | ||
271 | }; | ||
272 | |||
273 | static int __initdata nehemiah_b_clock_ratio[32] = { | ||
274 | 100, /* 0000 -> 10.0x */ | ||
275 | 160, /* 0001 -> 16.0x */ | ||
276 | -1, /* 0010 -> RESERVED */ | ||
277 | 90, /* 0011 -> 9.0x */ | ||
278 | 95, /* 0100 -> 9.5x */ | ||
279 | -1, /* 0101 -> RESERVED */ | ||
280 | -1, /* 0110 -> RESERVED */ | ||
281 | 55, /* 0111 -> 5.5x */ | ||
282 | 60, /* 1000 -> 6.0x */ | ||
283 | 70, /* 1001 -> 7.0x */ | ||
284 | 80, /* 1010 -> 8.0x */ | ||
285 | 50, /* 1011 -> 5.0x */ | ||
286 | 65, /* 1100 -> 6.5x */ | ||
287 | 75, /* 1101 -> 7.5x */ | ||
288 | 85, /* 1110 -> 8.5x */ | ||
289 | 120, /* 1111 -> 12.0x */ | ||
290 | 100, /* 0000 -> 10.0x */ | ||
291 | 110, /* 0001 -> 11.0x */ | ||
292 | 120, /* 0010 -> 12.0x */ | ||
293 | 90, /* 0011 -> 9.0x */ | ||
294 | 105, /* 0100 -> 10.5x */ | ||
295 | 115, /* 0101 -> 11.5x */ | ||
296 | 125, /* 0110 -> 12.5x */ | ||
297 | 135, /* 0111 -> 13.5x */ | ||
298 | 140, /* 1000 -> 14.0x */ | ||
299 | 150, /* 1001 -> 15.0x */ | ||
300 | 160, /* 1010 -> 16.0x */ | ||
301 | 130, /* 1011 -> 13.0x */ | ||
302 | 145, /* 1100 -> 14.5x */ | ||
303 | 155, /* 1101 -> 15.5x */ | ||
304 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
305 | 120, /* 1111 -> 12.0x */ | ||
306 | }; | ||
307 | |||
308 | static int __initdata nehemiah_c_clock_ratio[32] = { | ||
309 | 100, /* 0000 -> 10.0x */ | ||
310 | 160, /* 0001 -> 16.0x */ | ||
311 | 40, /* 0010 -> RESERVED */ | ||
312 | 90, /* 0011 -> 9.0x */ | 242 | 90, /* 0011 -> 9.0x */ |
313 | 95, /* 0100 -> 9.5x */ | 243 | 95, /* 0100 -> 9.5x */ |
314 | -1, /* 0101 -> RESERVED */ | 244 | -1, /* 0101 -> RESERVED */ |
315 | 45, /* 0110 -> RESERVED */ | 245 | 45, /* 0110 -> 4.5x */ |
316 | 55, /* 0111 -> 5.5x */ | 246 | 55, /* 0111 -> 5.5x */ |
317 | 60, /* 1000 -> 6.0x */ | 247 | 60, /* 1000 -> 6.0x */ |
318 | 70, /* 1001 -> 7.0x */ | 248 | 70, /* 1001 -> 7.0x */ |
@@ -340,84 +270,14 @@ static int __initdata nehemiah_c_clock_ratio[32] = { | |||
340 | 120, /* 1111 -> 12.0x */ | 270 | 120, /* 1111 -> 12.0x */ |
341 | }; | 271 | }; |
342 | 272 | ||
343 | static int __initdata nehemiah_a_eblcr[32] = { | 273 | static int __initdata nehemiah_eblcr[32] = { |
344 | 50, /* 0000 -> 5.0x */ | ||
345 | 160, /* 0001 -> 16.0x */ | ||
346 | -1, /* 0010 -> RESERVED */ | ||
347 | 100, /* 0011 -> 10.0x */ | ||
348 | 55, /* 0100 -> 5.5x */ | ||
349 | -1, /* 0101 -> RESERVED */ | ||
350 | -1, /* 0110 -> RESERVED */ | ||
351 | 95, /* 0111 -> 9.5x */ | ||
352 | 90, /* 1000 -> 9.0x */ | ||
353 | 70, /* 1001 -> 7.0x */ | ||
354 | 80, /* 1010 -> 8.0x */ | ||
355 | 60, /* 1011 -> 6.0x */ | ||
356 | 120, /* 1100 -> 12.0x */ | ||
357 | 75, /* 1101 -> 7.5x */ | ||
358 | 85, /* 1110 -> 8.5x */ | ||
359 | 65, /* 1111 -> 6.5x */ | ||
360 | 90, /* 0000 -> 9.0x */ | ||
361 | -1, /* 0001 -> RESERVED */ | ||
362 | 120, /* 0010 -> 12.0x */ | ||
363 | 100, /* 0011 -> 10.0x */ | ||
364 | 135, /* 0100 -> 13.5x */ | ||
365 | 115, /* 0101 -> 11.5x */ | ||
366 | 125, /* 0110 -> 12.5x */ | ||
367 | 105, /* 0111 -> 10.5x */ | ||
368 | 130, /* 1000 -> 13.0x */ | ||
369 | 150, /* 1001 -> 15.0x */ | ||
370 | 160, /* 1010 -> 16.0x */ | ||
371 | 140, /* 1011 -> 14.0x */ | ||
372 | 120, /* 1100 -> 12.0x */ | ||
373 | 155, /* 1101 -> 15.5x */ | ||
374 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
375 | 145 /* 1111 -> 14.5x */ | ||
376 | /* end of table */ | ||
377 | }; | ||
378 | static int __initdata nehemiah_b_eblcr[32] = { | ||
379 | 50, /* 0000 -> 5.0x */ | ||
380 | 160, /* 0001 -> 16.0x */ | ||
381 | -1, /* 0010 -> RESERVED */ | ||
382 | 100, /* 0011 -> 10.0x */ | ||
383 | 55, /* 0100 -> 5.5x */ | ||
384 | -1, /* 0101 -> RESERVED */ | ||
385 | -1, /* 0110 -> RESERVED */ | ||
386 | 95, /* 0111 -> 9.5x */ | ||
387 | 90, /* 1000 -> 9.0x */ | ||
388 | 70, /* 1001 -> 7.0x */ | ||
389 | 80, /* 1010 -> 8.0x */ | ||
390 | 60, /* 1011 -> 6.0x */ | ||
391 | 120, /* 1100 -> 12.0x */ | ||
392 | 75, /* 1101 -> 7.5x */ | ||
393 | 85, /* 1110 -> 8.5x */ | ||
394 | 65, /* 1111 -> 6.5x */ | ||
395 | 90, /* 0000 -> 9.0x */ | ||
396 | 110, /* 0001 -> 11.0x */ | ||
397 | 120, /* 0010 -> 12.0x */ | ||
398 | 100, /* 0011 -> 10.0x */ | ||
399 | 135, /* 0100 -> 13.5x */ | ||
400 | 115, /* 0101 -> 11.5x */ | ||
401 | 125, /* 0110 -> 12.5x */ | ||
402 | 105, /* 0111 -> 10.5x */ | ||
403 | 130, /* 1000 -> 13.0x */ | ||
404 | 150, /* 1001 -> 15.0x */ | ||
405 | 160, /* 1010 -> 16.0x */ | ||
406 | 140, /* 1011 -> 14.0x */ | ||
407 | 120, /* 1100 -> 12.0x */ | ||
408 | 155, /* 1101 -> 15.5x */ | ||
409 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
410 | 145 /* 1111 -> 14.5x */ | ||
411 | /* end of table */ | ||
412 | }; | ||
413 | static int __initdata nehemiah_c_eblcr[32] = { | ||
414 | 50, /* 0000 -> 5.0x */ | 274 | 50, /* 0000 -> 5.0x */ |
415 | 160, /* 0001 -> 16.0x */ | 275 | 160, /* 0001 -> 16.0x */ |
416 | 40, /* 0010 -> RESERVED */ | 276 | 40, /* 0010 -> 4.0x */ |
417 | 100, /* 0011 -> 10.0x */ | 277 | 100, /* 0011 -> 10.0x */ |
418 | 55, /* 0100 -> 5.5x */ | 278 | 55, /* 0100 -> 5.5x */ |
419 | -1, /* 0101 -> RESERVED */ | 279 | -1, /* 0101 -> RESERVED */ |
420 | 45, /* 0110 -> RESERVED */ | 280 | 45, /* 0110 -> 4.5x */ |
421 | 95, /* 0111 -> 9.5x */ | 281 | 95, /* 0111 -> 9.5x */ |
422 | 90, /* 1000 -> 9.0x */ | 282 | 90, /* 1000 -> 9.0x */ |
423 | 70, /* 1001 -> 7.0x */ | 283 | 70, /* 1001 -> 7.0x */ |
@@ -443,7 +303,6 @@ static int __initdata nehemiah_c_eblcr[32] = { | |||
443 | 155, /* 1101 -> 15.5x */ | 303 | 155, /* 1101 -> 15.5x */ |
444 | -1, /* 1110 -> RESERVED (13.0x) */ | 304 | -1, /* 1110 -> RESERVED (13.0x) */ |
445 | 145 /* 1111 -> 14.5x */ | 305 | 145 /* 1111 -> 14.5x */ |
446 | /* end of table */ | ||
447 | }; | 306 | }; |
448 | 307 | ||
449 | /* | 308 | /* |
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 2d6491672559..fe3b67005ebb 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1289,7 +1289,11 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1289 | if (query_current_values_with_pending_wait(data)) | 1289 | if (query_current_values_with_pending_wait(data)) |
1290 | goto out; | 1290 | goto out; |
1291 | 1291 | ||
1292 | khz = find_khz_freq_from_fid(data->currfid); | 1292 | if (cpu_family == CPU_HW_PSTATE) |
1293 | khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); | ||
1294 | else | ||
1295 | khz = find_khz_freq_from_fid(data->currfid); | ||
1296 | |||
1293 | 1297 | ||
1294 | out: | 1298 | out: |
1295 | set_cpus_allowed(current, oldmask); | 1299 | set_cpus_allowed(current, oldmask); |
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index 0b29d41322a2..e1006b7acc9e 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/clocksource.h> | 1 | #include <linux/clocksource.h> |
2 | #include <linux/clockchips.h> | ||
2 | #include <linux/errno.h> | 3 | #include <linux/errno.h> |
3 | #include <linux/hpet.h> | 4 | #include <linux/hpet.h> |
4 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -6,17 +7,278 @@ | |||
6 | #include <asm/hpet.h> | 7 | #include <asm/hpet.h> |
7 | #include <asm/io.h> | 8 | #include <asm/io.h> |
8 | 9 | ||
10 | extern struct clock_event_device *global_clock_event; | ||
11 | |||
9 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 12 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
10 | #define HPET_SHIFT 22 | 13 | #define HPET_SHIFT 22 |
11 | 14 | ||
12 | /* FSEC = 10^-15 NSEC = 10^-9 */ | 15 | /* FSEC = 10^-15 NSEC = 10^-9 */ |
13 | #define FSEC_PER_NSEC 1000000 | 16 | #define FSEC_PER_NSEC 1000000 |
14 | 17 | ||
15 | static void __iomem *hpet_ptr; | 18 | /* |
19 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | ||
20 | */ | ||
21 | unsigned long hpet_address; | ||
22 | static void __iomem * hpet_virt_address; | ||
23 | |||
24 | static inline unsigned long hpet_readl(unsigned long a) | ||
25 | { | ||
26 | return readl(hpet_virt_address + a); | ||
27 | } | ||
28 | |||
29 | static inline void hpet_writel(unsigned long d, unsigned long a) | ||
30 | { | ||
31 | writel(d, hpet_virt_address + a); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * HPET command line enable / disable | ||
36 | */ | ||
37 | static int boot_hpet_disable; | ||
38 | |||
39 | static int __init hpet_setup(char* str) | ||
40 | { | ||
41 | if (str) { | ||
42 | if (!strncmp("disable", str, 7)) | ||
43 | boot_hpet_disable = 1; | ||
44 | } | ||
45 | return 1; | ||
46 | } | ||
47 | __setup("hpet=", hpet_setup); | ||
48 | |||
49 | static inline int is_hpet_capable(void) | ||
50 | { | ||
51 | return (!boot_hpet_disable && hpet_address); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * HPET timer interrupt enable / disable | ||
56 | */ | ||
57 | static int hpet_legacy_int_enabled; | ||
58 | |||
59 | /** | ||
60 | * is_hpet_enabled - check whether the hpet timer interrupt is enabled | ||
61 | */ | ||
62 | int is_hpet_enabled(void) | ||
63 | { | ||
64 | return is_hpet_capable() && hpet_legacy_int_enabled; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * When the hpet driver (/dev/hpet) is enabled, we need to reserve | ||
69 | * timer 0 and timer 1 in case of RTC emulation. | ||
70 | */ | ||
71 | #ifdef CONFIG_HPET | ||
72 | static void hpet_reserve_platform_timers(unsigned long id) | ||
73 | { | ||
74 | struct hpet __iomem *hpet = hpet_virt_address; | ||
75 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; | ||
76 | unsigned int nrtimers, i; | ||
77 | struct hpet_data hd; | ||
78 | |||
79 | nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; | ||
80 | |||
81 | memset(&hd, 0, sizeof (hd)); | ||
82 | hd.hd_phys_address = hpet_address; | ||
83 | hd.hd_address = hpet_virt_address; | ||
84 | hd.hd_nirqs = nrtimers; | ||
85 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
86 | hpet_reserve_timer(&hd, 0); | ||
87 | |||
88 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
89 | hpet_reserve_timer(&hd, 1); | ||
90 | #endif | ||
91 | |||
92 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
93 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
94 | |||
95 | for (i = 2; i < nrtimers; timer++, i++) | ||
96 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> | ||
97 | Tn_INT_ROUTE_CNF_SHIFT; | ||
98 | |||
99 | hpet_alloc(&hd); | ||
100 | |||
101 | } | ||
102 | #else | ||
103 | static void hpet_reserve_platform_timers(unsigned long id) { } | ||
104 | #endif | ||
105 | |||
106 | /* | ||
107 | * Common hpet info | ||
108 | */ | ||
109 | static unsigned long hpet_period; | ||
110 | |||
111 | static void hpet_set_mode(enum clock_event_mode mode, | ||
112 | struct clock_event_device *evt); | ||
113 | static int hpet_next_event(unsigned long delta, | ||
114 | struct clock_event_device *evt); | ||
115 | |||
116 | /* | ||
117 | * The hpet clock event device | ||
118 | */ | ||
119 | static struct clock_event_device hpet_clockevent = { | ||
120 | .name = "hpet", | ||
121 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
122 | .set_mode = hpet_set_mode, | ||
123 | .set_next_event = hpet_next_event, | ||
124 | .shift = 32, | ||
125 | .irq = 0, | ||
126 | }; | ||
127 | |||
128 | static void hpet_start_counter(void) | ||
129 | { | ||
130 | unsigned long cfg = hpet_readl(HPET_CFG); | ||
131 | |||
132 | cfg &= ~HPET_CFG_ENABLE; | ||
133 | hpet_writel(cfg, HPET_CFG); | ||
134 | hpet_writel(0, HPET_COUNTER); | ||
135 | hpet_writel(0, HPET_COUNTER + 4); | ||
136 | cfg |= HPET_CFG_ENABLE; | ||
137 | hpet_writel(cfg, HPET_CFG); | ||
138 | } | ||
139 | |||
140 | static void hpet_enable_int(void) | ||
141 | { | ||
142 | unsigned long cfg = hpet_readl(HPET_CFG); | ||
143 | |||
144 | cfg |= HPET_CFG_LEGACY; | ||
145 | hpet_writel(cfg, HPET_CFG); | ||
146 | hpet_legacy_int_enabled = 1; | ||
147 | } | ||
148 | |||
149 | static void hpet_set_mode(enum clock_event_mode mode, | ||
150 | struct clock_event_device *evt) | ||
151 | { | ||
152 | unsigned long cfg, cmp, now; | ||
153 | uint64_t delta; | ||
154 | |||
155 | switch(mode) { | ||
156 | case CLOCK_EVT_MODE_PERIODIC: | ||
157 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; | ||
158 | delta >>= hpet_clockevent.shift; | ||
159 | now = hpet_readl(HPET_COUNTER); | ||
160 | cmp = now + (unsigned long) delta; | ||
161 | cfg = hpet_readl(HPET_T0_CFG); | ||
162 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | ||
163 | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
164 | hpet_writel(cfg, HPET_T0_CFG); | ||
165 | /* | ||
166 | * The first write after writing TN_SETVAL to the | ||
167 | * config register sets the counter value, the second | ||
168 | * write sets the period. | ||
169 | */ | ||
170 | hpet_writel(cmp, HPET_T0_CMP); | ||
171 | udelay(1); | ||
172 | hpet_writel((unsigned long) delta, HPET_T0_CMP); | ||
173 | break; | ||
174 | |||
175 | case CLOCK_EVT_MODE_ONESHOT: | ||
176 | cfg = hpet_readl(HPET_T0_CFG); | ||
177 | cfg &= ~HPET_TN_PERIODIC; | ||
178 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
179 | hpet_writel(cfg, HPET_T0_CFG); | ||
180 | break; | ||
181 | |||
182 | case CLOCK_EVT_MODE_UNUSED: | ||
183 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
184 | cfg = hpet_readl(HPET_T0_CFG); | ||
185 | cfg &= ~HPET_TN_ENABLE; | ||
186 | hpet_writel(cfg, HPET_T0_CFG); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | static int hpet_next_event(unsigned long delta, | ||
192 | struct clock_event_device *evt) | ||
193 | { | ||
194 | unsigned long cnt; | ||
195 | |||
196 | cnt = hpet_readl(HPET_COUNTER); | ||
197 | cnt += delta; | ||
198 | hpet_writel(cnt, HPET_T0_CMP); | ||
199 | |||
200 | return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Try to setup the HPET timer | ||
205 | */ | ||
206 | int __init hpet_enable(void) | ||
207 | { | ||
208 | unsigned long id; | ||
209 | uint64_t hpet_freq; | ||
210 | |||
211 | if (!is_hpet_capable()) | ||
212 | return 0; | ||
213 | |||
214 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
215 | |||
216 | /* | ||
217 | * Read the period and check for a sane value: | ||
218 | */ | ||
219 | hpet_period = hpet_readl(HPET_PERIOD); | ||
220 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) | ||
221 | goto out_nohpet; | ||
222 | |||
223 | /* | ||
224 | * The period is a femto seconds value. We need to calculate the | ||
225 | * scaled math multiplication factor for nanosecond to hpet tick | ||
226 | * conversion. | ||
227 | */ | ||
228 | hpet_freq = 1000000000000000ULL; | ||
229 | do_div(hpet_freq, hpet_period); | ||
230 | hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, | ||
231 | NSEC_PER_SEC, 32); | ||
232 | /* Calculate the min / max delta */ | ||
233 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
234 | &hpet_clockevent); | ||
235 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, | ||
236 | &hpet_clockevent); | ||
237 | |||
238 | /* | ||
239 | * Read the HPET ID register to retrieve the IRQ routing | ||
240 | * information and the number of channels | ||
241 | */ | ||
242 | id = hpet_readl(HPET_ID); | ||
243 | |||
244 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
245 | /* | ||
246 | * The legacy routing mode needs at least two channels, tick timer | ||
247 | * and the rtc emulation channel. | ||
248 | */ | ||
249 | if (!(id & HPET_ID_NUMBER)) | ||
250 | goto out_nohpet; | ||
251 | #endif | ||
252 | |||
253 | /* Start the counter */ | ||
254 | hpet_start_counter(); | ||
255 | |||
256 | if (id & HPET_ID_LEGSUP) { | ||
257 | hpet_enable_int(); | ||
258 | hpet_reserve_platform_timers(id); | ||
259 | /* | ||
260 | * Start hpet with the boot cpu mask and make it | ||
261 | * global after the IO_APIC has been initialized. | ||
262 | */ | ||
263 | hpet_clockevent.cpumask =cpumask_of_cpu(0); | ||
264 | clockevents_register_device(&hpet_clockevent); | ||
265 | global_clock_event = &hpet_clockevent; | ||
266 | return 1; | ||
267 | } | ||
268 | return 0; | ||
16 | 269 | ||
270 | out_nohpet: | ||
271 | iounmap(hpet_virt_address); | ||
272 | hpet_virt_address = NULL; | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Clock source related code | ||
278 | */ | ||
17 | static cycle_t read_hpet(void) | 279 | static cycle_t read_hpet(void) |
18 | { | 280 | { |
19 | return (cycle_t)readl(hpet_ptr); | 281 | return (cycle_t)hpet_readl(HPET_COUNTER); |
20 | } | 282 | } |
21 | 283 | ||
22 | static struct clocksource clocksource_hpet = { | 284 | static struct clocksource clocksource_hpet = { |
@@ -24,28 +286,17 @@ static struct clocksource clocksource_hpet = { | |||
24 | .rating = 250, | 286 | .rating = 250, |
25 | .read = read_hpet, | 287 | .read = read_hpet, |
26 | .mask = HPET_MASK, | 288 | .mask = HPET_MASK, |
27 | .mult = 0, /* set below */ | ||
28 | .shift = HPET_SHIFT, | 289 | .shift = HPET_SHIFT, |
29 | .is_continuous = 1, | 290 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
30 | }; | 291 | }; |
31 | 292 | ||
32 | static int __init init_hpet_clocksource(void) | 293 | static int __init init_hpet_clocksource(void) |
33 | { | 294 | { |
34 | unsigned long hpet_period; | ||
35 | void __iomem* hpet_base; | ||
36 | u64 tmp; | 295 | u64 tmp; |
37 | int err; | ||
38 | 296 | ||
39 | if (!is_hpet_enabled()) | 297 | if (!hpet_virt_address) |
40 | return -ENODEV; | 298 | return -ENODEV; |
41 | 299 | ||
42 | /* calculate the hpet address: */ | ||
43 | hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
44 | hpet_ptr = hpet_base + HPET_COUNTER; | ||
45 | |||
46 | /* calculate the frequency: */ | ||
47 | hpet_period = readl(hpet_base + HPET_PERIOD); | ||
48 | |||
49 | /* | 300 | /* |
50 | * hpet period is in femto seconds per cycle | 301 | * hpet period is in femto seconds per cycle |
51 | * so we need to convert this to ns/cyc units | 302 | * so we need to convert this to ns/cyc units |
@@ -61,11 +312,218 @@ static int __init init_hpet_clocksource(void) | |||
61 | do_div(tmp, FSEC_PER_NSEC); | 312 | do_div(tmp, FSEC_PER_NSEC); |
62 | clocksource_hpet.mult = (u32)tmp; | 313 | clocksource_hpet.mult = (u32)tmp; |
63 | 314 | ||
64 | err = clocksource_register(&clocksource_hpet); | 315 | return clocksource_register(&clocksource_hpet); |
65 | if (err) | ||
66 | iounmap(hpet_base); | ||
67 | |||
68 | return err; | ||
69 | } | 316 | } |
70 | 317 | ||
71 | module_init(init_hpet_clocksource); | 318 | module_init(init_hpet_clocksource); |
319 | |||
320 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
321 | |||
322 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET | ||
323 | * is enabled, we support RTC interrupt functionality in software. | ||
324 | * RTC has 3 kinds of interrupts: | ||
325 | * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock | ||
326 | * is updated | ||
327 | * 2) Alarm Interrupt - generate an interrupt at a specific time of day | ||
328 | * 3) Periodic Interrupt - generate periodic interrupt, with frequencies | ||
329 | * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) | ||
330 | * (1) and (2) above are implemented using polling at a frequency of | ||
331 | * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt | ||
332 | * overhead. (DEFAULT_RTC_INT_FREQ) | ||
333 | * For (3), we use interrupts at 64Hz or user specified periodic | ||
334 | * frequency, whichever is higher. | ||
335 | */ | ||
336 | #include <linux/mc146818rtc.h> | ||
337 | #include <linux/rtc.h> | ||
338 | |||
339 | #define DEFAULT_RTC_INT_FREQ 64 | ||
340 | #define DEFAULT_RTC_SHIFT 6 | ||
341 | #define RTC_NUM_INTS 1 | ||
342 | |||
343 | static unsigned long hpet_rtc_flags; | ||
344 | static unsigned long hpet_prev_update_sec; | ||
345 | static struct rtc_time hpet_alarm_time; | ||
346 | static unsigned long hpet_pie_count; | ||
347 | static unsigned long hpet_t1_cmp; | ||
348 | static unsigned long hpet_default_delta; | ||
349 | static unsigned long hpet_pie_delta; | ||
350 | static unsigned long hpet_pie_limit; | ||
351 | |||
352 | /* | ||
353 | * Timer 1 for RTC emulation. We use one shot mode, as periodic mode | ||
354 | * is not supported by all HPET implementations for timer 1. | ||
355 | * | ||
356 | * hpet_rtc_timer_init() is called when the rtc is initialized. | ||
357 | */ | ||
358 | int hpet_rtc_timer_init(void) | ||
359 | { | ||
360 | unsigned long cfg, cnt, delta, flags; | ||
361 | |||
362 | if (!is_hpet_enabled()) | ||
363 | return 0; | ||
364 | |||
365 | if (!hpet_default_delta) { | ||
366 | uint64_t clc; | ||
367 | |||
368 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | ||
369 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; | ||
370 | hpet_default_delta = (unsigned long) clc; | ||
371 | } | ||
372 | |||
373 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | ||
374 | delta = hpet_default_delta; | ||
375 | else | ||
376 | delta = hpet_pie_delta; | ||
377 | |||
378 | local_irq_save(flags); | ||
379 | |||
380 | cnt = delta + hpet_readl(HPET_COUNTER); | ||
381 | hpet_writel(cnt, HPET_T1_CMP); | ||
382 | hpet_t1_cmp = cnt; | ||
383 | |||
384 | cfg = hpet_readl(HPET_T1_CFG); | ||
385 | cfg &= ~HPET_TN_PERIODIC; | ||
386 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
387 | hpet_writel(cfg, HPET_T1_CFG); | ||
388 | |||
389 | local_irq_restore(flags); | ||
390 | |||
391 | return 1; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * The functions below are called from rtc driver. | ||
396 | * Return 0 if HPET is not being used. | ||
397 | * Otherwise do the necessary changes and return 1. | ||
398 | */ | ||
399 | int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | ||
400 | { | ||
401 | if (!is_hpet_enabled()) | ||
402 | return 0; | ||
403 | |||
404 | hpet_rtc_flags &= ~bit_mask; | ||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) | ||
409 | { | ||
410 | unsigned long oldbits = hpet_rtc_flags; | ||
411 | |||
412 | if (!is_hpet_enabled()) | ||
413 | return 0; | ||
414 | |||
415 | hpet_rtc_flags |= bit_mask; | ||
416 | |||
417 | if (!oldbits) | ||
418 | hpet_rtc_timer_init(); | ||
419 | |||
420 | return 1; | ||
421 | } | ||
422 | |||
423 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, | ||
424 | unsigned char sec) | ||
425 | { | ||
426 | if (!is_hpet_enabled()) | ||
427 | return 0; | ||
428 | |||
429 | hpet_alarm_time.tm_hour = hrs; | ||
430 | hpet_alarm_time.tm_min = min; | ||
431 | hpet_alarm_time.tm_sec = sec; | ||
432 | |||
433 | return 1; | ||
434 | } | ||
435 | |||
436 | int hpet_set_periodic_freq(unsigned long freq) | ||
437 | { | ||
438 | uint64_t clc; | ||
439 | |||
440 | if (!is_hpet_enabled()) | ||
441 | return 0; | ||
442 | |||
443 | if (freq <= DEFAULT_RTC_INT_FREQ) | ||
444 | hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; | ||
445 | else { | ||
446 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | ||
447 | do_div(clc, freq); | ||
448 | clc >>= hpet_clockevent.shift; | ||
449 | hpet_pie_delta = (unsigned long) clc; | ||
450 | } | ||
451 | return 1; | ||
452 | } | ||
453 | |||
454 | int hpet_rtc_dropped_irq(void) | ||
455 | { | ||
456 | return is_hpet_enabled(); | ||
457 | } | ||
458 | |||
459 | static void hpet_rtc_timer_reinit(void) | ||
460 | { | ||
461 | unsigned long cfg, delta; | ||
462 | int lost_ints = -1; | ||
463 | |||
464 | if (unlikely(!hpet_rtc_flags)) { | ||
465 | cfg = hpet_readl(HPET_T1_CFG); | ||
466 | cfg &= ~HPET_TN_ENABLE; | ||
467 | hpet_writel(cfg, HPET_T1_CFG); | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | ||
472 | delta = hpet_default_delta; | ||
473 | else | ||
474 | delta = hpet_pie_delta; | ||
475 | |||
476 | /* | ||
477 | * Increment the comparator value until we are ahead of the | ||
478 | * current count. | ||
479 | */ | ||
480 | do { | ||
481 | hpet_t1_cmp += delta; | ||
482 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | ||
483 | lost_ints++; | ||
484 | } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); | ||
485 | |||
486 | if (lost_ints) { | ||
487 | if (hpet_rtc_flags & RTC_PIE) | ||
488 | hpet_pie_count += lost_ints; | ||
489 | if (printk_ratelimit()) | ||
490 | printk(KERN_WARNING "rtc: lost %d interrupts\n", | ||
491 | lost_ints); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | ||
496 | { | ||
497 | struct rtc_time curr_time; | ||
498 | unsigned long rtc_int_flag = 0; | ||
499 | |||
500 | hpet_rtc_timer_reinit(); | ||
501 | |||
502 | if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) | ||
503 | rtc_get_rtc_time(&curr_time); | ||
504 | |||
505 | if (hpet_rtc_flags & RTC_UIE && | ||
506 | curr_time.tm_sec != hpet_prev_update_sec) { | ||
507 | rtc_int_flag = RTC_UF; | ||
508 | hpet_prev_update_sec = curr_time.tm_sec; | ||
509 | } | ||
510 | |||
511 | if (hpet_rtc_flags & RTC_PIE && | ||
512 | ++hpet_pie_count >= hpet_pie_limit) { | ||
513 | rtc_int_flag |= RTC_PF; | ||
514 | hpet_pie_count = 0; | ||
515 | } | ||
516 | |||
517 | if (hpet_rtc_flags & RTC_PIE && | ||
518 | (curr_time.tm_sec == hpet_alarm_time.tm_sec) && | ||
519 | (curr_time.tm_min == hpet_alarm_time.tm_min) && | ||
520 | (curr_time.tm_hour == hpet_alarm_time.tm_hour)) | ||
521 | rtc_int_flag |= RTC_AF; | ||
522 | |||
523 | if (rtc_int_flag) { | ||
524 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); | ||
525 | rtc_interrupt(rtc_int_flag, dev_id); | ||
526 | } | ||
527 | return IRQ_HANDLED; | ||
528 | } | ||
529 | #endif | ||
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index 9a0060b92e32..a6bc7bb38834 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * i8253.c 8253/PIT functions | 2 | * i8253.c 8253/PIT functions |
3 | * | 3 | * |
4 | */ | 4 | */ |
5 | #include <linux/clocksource.h> | 5 | #include <linux/clockchips.h> |
6 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
7 | #include <linux/jiffies.h> | 7 | #include <linux/jiffies.h> |
8 | #include <linux/sysdev.h> | 8 | #include <linux/sysdev.h> |
@@ -19,17 +19,97 @@ | |||
19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_SPINLOCK(i8253_lock); |
20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
21 | 21 | ||
22 | void setup_pit_timer(void) | 22 | /* |
23 | * HPET replaces the PIT, when enabled. So we need to know, which of | ||
24 | * the two timers is used | ||
25 | */ | ||
26 | struct clock_event_device *global_clock_event; | ||
27 | |||
28 | /* | ||
29 | * Initialize the PIT timer. | ||
30 | * | ||
31 | * This is also called after resume to bring the PIT into operation again. | ||
32 | */ | ||
33 | static void init_pit_timer(enum clock_event_mode mode, | ||
34 | struct clock_event_device *evt) | ||
35 | { | ||
36 | unsigned long flags; | ||
37 | |||
38 | spin_lock_irqsave(&i8253_lock, flags); | ||
39 | |||
40 | switch(mode) { | ||
41 | case CLOCK_EVT_MODE_PERIODIC: | ||
42 | /* binary, mode 2, LSB/MSB, ch 0 */ | ||
43 | outb_p(0x34, PIT_MODE); | ||
44 | udelay(10); | ||
45 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
46 | udelay(10); | ||
47 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
48 | break; | ||
49 | |||
50 | case CLOCK_EVT_MODE_ONESHOT: | ||
51 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
52 | case CLOCK_EVT_MODE_UNUSED: | ||
53 | /* One shot setup */ | ||
54 | outb_p(0x38, PIT_MODE); | ||
55 | udelay(10); | ||
56 | break; | ||
57 | } | ||
58 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * Program the next event in oneshot mode | ||
63 | * | ||
64 | * Delta is given in PIT ticks | ||
65 | */ | ||
66 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | ||
23 | { | 67 | { |
24 | unsigned long flags; | 68 | unsigned long flags; |
25 | 69 | ||
26 | spin_lock_irqsave(&i8253_lock, flags); | 70 | spin_lock_irqsave(&i8253_lock, flags); |
27 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 71 | outb_p(delta & 0xff , PIT_CH0); /* LSB */ |
28 | udelay(10); | 72 | outb(delta >> 8 , PIT_CH0); /* MSB */ |
29 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
30 | udelay(10); | ||
31 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
32 | spin_unlock_irqrestore(&i8253_lock, flags); | 73 | spin_unlock_irqrestore(&i8253_lock, flags); |
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * On UP the PIT can serve all of the possible timer functions. On SMP systems | ||
80 | * it can be solely used for the global tick. | ||
81 | * | ||
82 | * The profiling and update capabilites are switched off once the local apic is | ||
83 | * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - | ||
84 | * !using_apic_timer decisions in do_timer_interrupt_hook() | ||
85 | */ | ||
86 | struct clock_event_device pit_clockevent = { | ||
87 | .name = "pit", | ||
88 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
89 | .set_mode = init_pit_timer, | ||
90 | .set_next_event = pit_next_event, | ||
91 | .shift = 32, | ||
92 | .irq = 0, | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * Initialize the conversion factor and the min/max deltas of the clock event | ||
97 | * structure and register the clock event source with the framework. | ||
98 | */ | ||
99 | void __init setup_pit_timer(void) | ||
100 | { | ||
101 | /* | ||
102 | * Start pit with the boot cpu mask and make it global after the | ||
103 | * IO_APIC has been initialized. | ||
104 | */ | ||
105 | pit_clockevent.cpumask = cpumask_of_cpu(0); | ||
106 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); | ||
107 | pit_clockevent.max_delta_ns = | ||
108 | clockevent_delta2ns(0x7FFF, &pit_clockevent); | ||
109 | pit_clockevent.min_delta_ns = | ||
110 | clockevent_delta2ns(0xF, &pit_clockevent); | ||
111 | clockevents_register_device(&pit_clockevent); | ||
112 | global_clock_event = &pit_clockevent; | ||
33 | } | 113 | } |
34 | 114 | ||
35 | /* | 115 | /* |
@@ -46,7 +126,7 @@ static cycle_t pit_read(void) | |||
46 | static u32 old_jifs; | 126 | static u32 old_jifs; |
47 | 127 | ||
48 | spin_lock_irqsave(&i8253_lock, flags); | 128 | spin_lock_irqsave(&i8253_lock, flags); |
49 | /* | 129 | /* |
50 | * Although our caller may have the read side of xtime_lock, | 130 | * Although our caller may have the read side of xtime_lock, |
51 | * this is now a seqlock, and we are cheating in this routine | 131 | * this is now a seqlock, and we are cheating in this routine |
52 | * by having side effects on state that we cannot undo if | 132 | * by having side effects on state that we cannot undo if |
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index c8d45821c788..03abfdb1a6e4 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c | |||
@@ -41,6 +41,7 @@ static void mask_and_ack_8259A(unsigned int); | |||
41 | static struct irq_chip i8259A_chip = { | 41 | static struct irq_chip i8259A_chip = { |
42 | .name = "XT-PIC", | 42 | .name = "XT-PIC", |
43 | .mask = disable_8259A_irq, | 43 | .mask = disable_8259A_irq, |
44 | .disable = disable_8259A_irq, | ||
44 | .unmask = enable_8259A_irq, | 45 | .unmask = enable_8259A_irq, |
45 | .mask_ack = mask_and_ack_8259A, | 46 | .mask_ack = mask_and_ack_8259A, |
46 | }; | 47 | }; |
@@ -410,12 +411,6 @@ void __init native_init_IRQ(void) | |||
410 | intr_init_hook(); | 411 | intr_init_hook(); |
411 | 412 | ||
412 | /* | 413 | /* |
413 | * Set the clock to HZ Hz, we already have a valid | ||
414 | * vector now: | ||
415 | */ | ||
416 | setup_pit_timer(); | ||
417 | |||
418 | /* | ||
419 | * External FPU? Set up irq13 if so, for | 414 | * External FPU? Set up irq13 if so, for |
420 | * original braindamaged IBM FERR coupling. | 415 | * original braindamaged IBM FERR coupling. |
421 | */ | 416 | */ |
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index e30ccedad0b9..4ccebd454e25 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -482,8 +482,8 @@ static void do_irq_balance(void) | |||
482 | package_index = CPU_TO_PACKAGEINDEX(i); | 482 | package_index = CPU_TO_PACKAGEINDEX(i); |
483 | for (j = 0; j < NR_IRQS; j++) { | 483 | for (j = 0; j < NR_IRQS; j++) { |
484 | unsigned long value_now, delta; | 484 | unsigned long value_now, delta; |
485 | /* Is this an active IRQ? */ | 485 | /* Is this an active IRQ or balancing disabled ? */ |
486 | if (!irq_desc[j].action) | 486 | if (!irq_desc[j].action || irq_balancing_disabled(j)) |
487 | continue; | 487 | continue; |
488 | if ( package_index == i ) | 488 | if ( package_index == i ) |
489 | IRQ_DELTA(package_index,j) = 0; | 489 | IRQ_DELTA(package_index,j) = 0; |
@@ -1281,11 +1281,9 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) | |||
1281 | trigger == IOAPIC_LEVEL) | 1281 | trigger == IOAPIC_LEVEL) |
1282 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1282 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1283 | handle_fasteoi_irq, "fasteoi"); | 1283 | handle_fasteoi_irq, "fasteoi"); |
1284 | else { | 1284 | else |
1285 | irq_desc[irq].status |= IRQ_DELAYED_DISABLE; | ||
1286 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1285 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1287 | handle_edge_irq, "edge"); | 1286 | handle_edge_irq, "edge"); |
1288 | } | ||
1289 | set_intr_gate(vector, interrupt[irq]); | 1287 | set_intr_gate(vector, interrupt[irq]); |
1290 | } | 1288 | } |
1291 | 1289 | ||
@@ -1588,7 +1586,7 @@ void /*__init*/ print_local_APIC(void * dummy) | |||
1588 | v = apic_read(APIC_LVR); | 1586 | v = apic_read(APIC_LVR); |
1589 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1587 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
1590 | ver = GET_APIC_VERSION(v); | 1588 | ver = GET_APIC_VERSION(v); |
1591 | maxlvt = get_maxlvt(); | 1589 | maxlvt = lapic_get_maxlvt(); |
1592 | 1590 | ||
1593 | v = apic_read(APIC_TASKPRI); | 1591 | v = apic_read(APIC_TASKPRI); |
1594 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | 1592 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); |
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 5785d84103a6..0f2ca590bf23 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -10,7 +10,6 @@ | |||
10 | * io_apic.c.) | 10 | * io_apic.c.) |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <asm/uaccess.h> | ||
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/seq_file.h> | 14 | #include <linux/seq_file.h> |
16 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
@@ -21,19 +20,34 @@ | |||
21 | 20 | ||
22 | #include <asm/idle.h> | 21 | #include <asm/idle.h> |
23 | 22 | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/uaccess.h> | ||
25 | |||
24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | 26 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
25 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 27 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
26 | 28 | ||
27 | #ifndef CONFIG_X86_LOCAL_APIC | ||
28 | /* | 29 | /* |
29 | * 'what should we do if we get a hw irq event on an illegal vector'. | 30 | * 'what should we do if we get a hw irq event on an illegal vector'. |
30 | * each architecture has to answer this themselves. | 31 | * each architecture has to answer this themselves. |
31 | */ | 32 | */ |
32 | void ack_bad_irq(unsigned int irq) | 33 | void ack_bad_irq(unsigned int irq) |
33 | { | 34 | { |
34 | printk("unexpected IRQ trap at vector %02x\n", irq); | 35 | printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); |
35 | } | 36 | |
37 | #ifdef CONFIG_X86_LOCAL_APIC | ||
38 | /* | ||
39 | * Currently unexpected vectors happen only on SMP and APIC. | ||
40 | * We _must_ ack these because every local APIC has only N | ||
41 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
42 | * holds up an irq slot - in excessive cases (when multiple | ||
43 | * unexpected vectors occur) that might lock up the APIC | ||
44 | * completely. | ||
45 | * But only ack when the APIC is enabled -AK | ||
46 | */ | ||
47 | if (cpu_has_apic) | ||
48 | ack_APIC_irq(); | ||
36 | #endif | 49 | #endif |
50 | } | ||
37 | 51 | ||
38 | #ifdef CONFIG_4KSTACKS | 52 | #ifdef CONFIG_4KSTACKS |
39 | /* | 53 | /* |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 5d8a07c20281..821df34d2b3a 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/dmi.h> | 23 | #include <linux/dmi.h> |
24 | #include <linux/kprobes.h> | 24 | #include <linux/kprobes.h> |
25 | #include <linux/cpumask.h> | 25 | #include <linux/cpumask.h> |
26 | #include <linux/kernel_stat.h> | ||
26 | 27 | ||
27 | #include <asm/smp.h> | 28 | #include <asm/smp.h> |
28 | #include <asm/nmi.h> | 29 | #include <asm/nmi.h> |
@@ -973,9 +974,13 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
973 | cpu_clear(cpu, backtrace_mask); | 974 | cpu_clear(cpu, backtrace_mask); |
974 | } | 975 | } |
975 | 976 | ||
976 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 977 | /* |
978 | * Take the local apic timer and PIT/HPET into account. We don't | ||
979 | * know which one is active, when we have highres/dyntick on | ||
980 | */ | ||
981 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); | ||
977 | 982 | ||
978 | /* if the apic timer isn't firing, this cpu isn't doing much */ | 983 | /* if the none of the timers isn't firing, this cpu isn't doing much */ |
979 | if (!touched && last_irq_sums[cpu] == sum) { | 984 | if (!touched && last_irq_sums[cpu] == sum) { |
980 | /* | 985 | /* |
981 | * Ayiee, looks like this CPU is stuck ... | 986 | * Ayiee, looks like this CPU is stuck ... |
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 7845d480c293..bea304d48cdb 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/ptrace.h> | 38 | #include <linux/ptrace.h> |
39 | #include <linux/random.h> | 39 | #include <linux/random.h> |
40 | #include <linux/personality.h> | 40 | #include <linux/personality.h> |
41 | #include <linux/tick.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
@@ -211,6 +212,7 @@ void cpu_idle(void) | |||
211 | 212 | ||
212 | /* endless idle loop with no priority at all */ | 213 | /* endless idle loop with no priority at all */ |
213 | while (1) { | 214 | while (1) { |
215 | tick_nohz_stop_sched_tick(); | ||
214 | while (!need_resched()) { | 216 | while (!need_resched()) { |
215 | void (*idle)(void); | 217 | void (*idle)(void); |
216 | 218 | ||
@@ -238,6 +240,7 @@ void cpu_idle(void) | |||
238 | idle(); | 240 | idle(); |
239 | __exit_idle(); | 241 | __exit_idle(); |
240 | } | 242 | } |
243 | tick_nohz_restart_sched_tick(); | ||
241 | preempt_enable_no_resched(); | 244 | preempt_enable_no_resched(); |
242 | schedule(); | 245 | schedule(); |
243 | preempt_disable(); | 246 | preempt_disable(); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index f46a4d095e6c..48bfcaa13ecc 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -94,12 +94,6 @@ cpumask_t cpu_possible_map; | |||
94 | EXPORT_SYMBOL(cpu_possible_map); | 94 | EXPORT_SYMBOL(cpu_possible_map); |
95 | static cpumask_t smp_commenced_mask; | 95 | static cpumask_t smp_commenced_mask; |
96 | 96 | ||
97 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there | ||
98 | * is no way to resync one AP against BP. TBD: for prescott and above, we | ||
99 | * should use IA64's algorithm | ||
100 | */ | ||
101 | static int __devinitdata tsc_sync_disabled; | ||
102 | |||
103 | /* Per CPU bogomips and other parameters */ | 97 | /* Per CPU bogomips and other parameters */ |
104 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 98 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
105 | EXPORT_SYMBOL(cpu_data); | 99 | EXPORT_SYMBOL(cpu_data); |
@@ -216,151 +210,6 @@ valid_k7: | |||
216 | ; | 210 | ; |
217 | } | 211 | } |
218 | 212 | ||
219 | /* | ||
220 | * TSC synchronization. | ||
221 | * | ||
222 | * We first check whether all CPUs have their TSC's synchronized, | ||
223 | * then we print a warning if not, and always resync. | ||
224 | */ | ||
225 | |||
226 | static struct { | ||
227 | atomic_t start_flag; | ||
228 | atomic_t count_start; | ||
229 | atomic_t count_stop; | ||
230 | unsigned long long values[NR_CPUS]; | ||
231 | } tsc __cpuinitdata = { | ||
232 | .start_flag = ATOMIC_INIT(0), | ||
233 | .count_start = ATOMIC_INIT(0), | ||
234 | .count_stop = ATOMIC_INIT(0), | ||
235 | }; | ||
236 | |||
237 | #define NR_LOOPS 5 | ||
238 | |||
239 | static void __init synchronize_tsc_bp(void) | ||
240 | { | ||
241 | int i; | ||
242 | unsigned long long t0; | ||
243 | unsigned long long sum, avg; | ||
244 | long long delta; | ||
245 | unsigned int one_usec; | ||
246 | int buggy = 0; | ||
247 | |||
248 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); | ||
249 | |||
250 | /* convert from kcyc/sec to cyc/usec */ | ||
251 | one_usec = cpu_khz / 1000; | ||
252 | |||
253 | atomic_set(&tsc.start_flag, 1); | ||
254 | wmb(); | ||
255 | |||
256 | /* | ||
257 | * We loop a few times to get a primed instruction cache, | ||
258 | * then the last pass is more or less synchronized and | ||
259 | * the BP and APs set their cycle counters to zero all at | ||
260 | * once. This reduces the chance of having random offsets | ||
261 | * between the processors, and guarantees that the maximum | ||
262 | * delay between the cycle counters is never bigger than | ||
263 | * the latency of information-passing (cachelines) between | ||
264 | * two CPUs. | ||
265 | */ | ||
266 | for (i = 0; i < NR_LOOPS; i++) { | ||
267 | /* | ||
268 | * all APs synchronize but they loop on '== num_cpus' | ||
269 | */ | ||
270 | while (atomic_read(&tsc.count_start) != num_booting_cpus()-1) | ||
271 | cpu_relax(); | ||
272 | atomic_set(&tsc.count_stop, 0); | ||
273 | wmb(); | ||
274 | /* | ||
275 | * this lets the APs save their current TSC: | ||
276 | */ | ||
277 | atomic_inc(&tsc.count_start); | ||
278 | |||
279 | rdtscll(tsc.values[smp_processor_id()]); | ||
280 | /* | ||
281 | * We clear the TSC in the last loop: | ||
282 | */ | ||
283 | if (i == NR_LOOPS-1) | ||
284 | write_tsc(0, 0); | ||
285 | |||
286 | /* | ||
287 | * Wait for all APs to leave the synchronization point: | ||
288 | */ | ||
289 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1) | ||
290 | cpu_relax(); | ||
291 | atomic_set(&tsc.count_start, 0); | ||
292 | wmb(); | ||
293 | atomic_inc(&tsc.count_stop); | ||
294 | } | ||
295 | |||
296 | sum = 0; | ||
297 | for (i = 0; i < NR_CPUS; i++) { | ||
298 | if (cpu_isset(i, cpu_callout_map)) { | ||
299 | t0 = tsc.values[i]; | ||
300 | sum += t0; | ||
301 | } | ||
302 | } | ||
303 | avg = sum; | ||
304 | do_div(avg, num_booting_cpus()); | ||
305 | |||
306 | for (i = 0; i < NR_CPUS; i++) { | ||
307 | if (!cpu_isset(i, cpu_callout_map)) | ||
308 | continue; | ||
309 | delta = tsc.values[i] - avg; | ||
310 | if (delta < 0) | ||
311 | delta = -delta; | ||
312 | /* | ||
313 | * We report bigger than 2 microseconds clock differences. | ||
314 | */ | ||
315 | if (delta > 2*one_usec) { | ||
316 | long long realdelta; | ||
317 | |||
318 | if (!buggy) { | ||
319 | buggy = 1; | ||
320 | printk("\n"); | ||
321 | } | ||
322 | realdelta = delta; | ||
323 | do_div(realdelta, one_usec); | ||
324 | if (tsc.values[i] < avg) | ||
325 | realdelta = -realdelta; | ||
326 | |||
327 | if (realdelta) | ||
328 | printk(KERN_INFO "CPU#%d had %Ld usecs TSC " | ||
329 | "skew, fixed it up.\n", i, realdelta); | ||
330 | } | ||
331 | } | ||
332 | if (!buggy) | ||
333 | printk("passed.\n"); | ||
334 | } | ||
335 | |||
336 | static void __cpuinit synchronize_tsc_ap(void) | ||
337 | { | ||
338 | int i; | ||
339 | |||
340 | /* | ||
341 | * Not every cpu is online at the time | ||
342 | * this gets called, so we first wait for the BP to | ||
343 | * finish SMP initialization: | ||
344 | */ | ||
345 | while (!atomic_read(&tsc.start_flag)) | ||
346 | cpu_relax(); | ||
347 | |||
348 | for (i = 0; i < NR_LOOPS; i++) { | ||
349 | atomic_inc(&tsc.count_start); | ||
350 | while (atomic_read(&tsc.count_start) != num_booting_cpus()) | ||
351 | cpu_relax(); | ||
352 | |||
353 | rdtscll(tsc.values[smp_processor_id()]); | ||
354 | if (i == NR_LOOPS-1) | ||
355 | write_tsc(0, 0); | ||
356 | |||
357 | atomic_inc(&tsc.count_stop); | ||
358 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()) | ||
359 | cpu_relax(); | ||
360 | } | ||
361 | } | ||
362 | #undef NR_LOOPS | ||
363 | |||
364 | extern void calibrate_delay(void); | 213 | extern void calibrate_delay(void); |
365 | 214 | ||
366 | static atomic_t init_deasserted; | 215 | static atomic_t init_deasserted; |
@@ -438,20 +287,12 @@ static void __cpuinit smp_callin(void) | |||
438 | /* | 287 | /* |
439 | * Save our processor parameters | 288 | * Save our processor parameters |
440 | */ | 289 | */ |
441 | smp_store_cpu_info(cpuid); | 290 | smp_store_cpu_info(cpuid); |
442 | |||
443 | disable_APIC_timer(); | ||
444 | 291 | ||
445 | /* | 292 | /* |
446 | * Allow the master to continue. | 293 | * Allow the master to continue. |
447 | */ | 294 | */ |
448 | cpu_set(cpuid, cpu_callin_map); | 295 | cpu_set(cpuid, cpu_callin_map); |
449 | |||
450 | /* | ||
451 | * Synchronize the TSC with the BP | ||
452 | */ | ||
453 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) | ||
454 | synchronize_tsc_ap(); | ||
455 | } | 296 | } |
456 | 297 | ||
457 | static int cpucount; | 298 | static int cpucount; |
@@ -554,13 +395,17 @@ static void __cpuinit start_secondary(void *unused) | |||
554 | smp_callin(); | 395 | smp_callin(); |
555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 396 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
556 | rep_nop(); | 397 | rep_nop(); |
398 | /* | ||
399 | * Check TSC synchronization with the BP: | ||
400 | */ | ||
401 | check_tsc_sync_target(); | ||
402 | |||
557 | setup_secondary_clock(); | 403 | setup_secondary_clock(); |
558 | if (nmi_watchdog == NMI_IO_APIC) { | 404 | if (nmi_watchdog == NMI_IO_APIC) { |
559 | disable_8259A_irq(0); | 405 | disable_8259A_irq(0); |
560 | enable_NMI_through_LVT0(NULL); | 406 | enable_NMI_through_LVT0(NULL); |
561 | enable_8259A_irq(0); | 407 | enable_8259A_irq(0); |
562 | } | 408 | } |
563 | enable_APIC_timer(); | ||
564 | /* | 409 | /* |
565 | * low-memory mappings have been cleared, flush them from | 410 | * low-memory mappings have been cleared, flush them from |
566 | * the local TLBs too. | 411 | * the local TLBs too. |
@@ -752,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
752 | /* | 597 | /* |
753 | * Due to the Pentium erratum 3AP. | 598 | * Due to the Pentium erratum 3AP. |
754 | */ | 599 | */ |
755 | maxlvt = get_maxlvt(); | 600 | maxlvt = lapic_get_maxlvt(); |
756 | if (maxlvt > 3) { | 601 | if (maxlvt > 3) { |
757 | apic_read_around(APIC_SPIV); | 602 | apic_read_around(APIC_SPIV); |
758 | apic_write(APIC_ESR, 0); | 603 | apic_write(APIC_ESR, 0); |
@@ -849,7 +694,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
849 | */ | 694 | */ |
850 | Dprintk("#startup loops: %d.\n", num_starts); | 695 | Dprintk("#startup loops: %d.\n", num_starts); |
851 | 696 | ||
852 | maxlvt = get_maxlvt(); | 697 | maxlvt = lapic_get_maxlvt(); |
853 | 698 | ||
854 | for (j = 1; j <= num_starts; j++) { | 699 | for (j = 1; j <= num_starts; j++) { |
855 | Dprintk("Sending STARTUP #%d.\n",j); | 700 | Dprintk("Sending STARTUP #%d.\n",j); |
@@ -1125,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1125 | info.cpu = cpu; | 970 | info.cpu = cpu; |
1126 | INIT_WORK(&info.task, do_warm_boot_cpu); | 971 | INIT_WORK(&info.task, do_warm_boot_cpu); |
1127 | 972 | ||
1128 | tsc_sync_disabled = 1; | ||
1129 | |||
1130 | /* init low mem mapping */ | 973 | /* init low mem mapping */ |
1131 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 974 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
1132 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 975 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
@@ -1134,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1134 | schedule_work(&info.task); | 977 | schedule_work(&info.task); |
1135 | wait_for_completion(&done); | 978 | wait_for_completion(&done); |
1136 | 979 | ||
1137 | tsc_sync_disabled = 0; | ||
1138 | zap_low_mappings(); | 980 | zap_low_mappings(); |
1139 | ret = 0; | 981 | ret = 0; |
1140 | exit: | 982 | exit: |
@@ -1331,12 +1173,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1331 | smpboot_setup_io_apic(); | 1173 | smpboot_setup_io_apic(); |
1332 | 1174 | ||
1333 | setup_boot_clock(); | 1175 | setup_boot_clock(); |
1334 | |||
1335 | /* | ||
1336 | * Synchronize the TSC with the AP | ||
1337 | */ | ||
1338 | if (cpu_has_tsc && cpucount && cpu_khz) | ||
1339 | synchronize_tsc_bp(); | ||
1340 | } | 1176 | } |
1341 | 1177 | ||
1342 | /* These are wrappers to interface to the new boot process. Someone | 1178 | /* These are wrappers to interface to the new boot process. Someone |
@@ -1471,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1471 | } | 1307 | } |
1472 | 1308 | ||
1473 | local_irq_enable(); | 1309 | local_irq_enable(); |
1310 | |||
1474 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1311 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
1475 | /* Unleash the CPU! */ | 1312 | /* Unleash the CPU! */ |
1476 | cpu_set(cpu, smp_commenced_mask); | 1313 | cpu_set(cpu, smp_commenced_mask); |
1314 | |||
1315 | /* | ||
1316 | * Check TSC synchronization with the AP: | ||
1317 | */ | ||
1318 | check_tsc_sync_source(cpu); | ||
1319 | |||
1477 | while (!cpu_isset(cpu, cpu_online_map)) | 1320 | while (!cpu_isset(cpu, cpu_online_map)) |
1478 | cpu_relax(); | 1321 | cpu_relax(); |
1479 | 1322 | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a4f67a6e6821..a5350059557a 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -159,15 +159,6 @@ EXPORT_SYMBOL(profile_pc); | |||
159 | */ | 159 | */ |
160 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 160 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
161 | { | 161 | { |
162 | /* | ||
163 | * Here we are in the timer irq handler. We just have irqs locally | ||
164 | * disabled but we don't know if the timer_bh is running on the other | ||
165 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
166 | * the irq version of write_lock because as just said we have irq | ||
167 | * locally disabled. -arca | ||
168 | */ | ||
169 | write_seqlock(&xtime_lock); | ||
170 | |||
171 | #ifdef CONFIG_X86_IO_APIC | 162 | #ifdef CONFIG_X86_IO_APIC |
172 | if (timer_ack) { | 163 | if (timer_ack) { |
173 | /* | 164 | /* |
@@ -186,7 +177,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
186 | 177 | ||
187 | do_timer_interrupt_hook(); | 178 | do_timer_interrupt_hook(); |
188 | 179 | ||
189 | |||
190 | if (MCA_bus) { | 180 | if (MCA_bus) { |
191 | /* The PS/2 uses level-triggered interrupts. You can't | 181 | /* The PS/2 uses level-triggered interrupts. You can't |
192 | turn them off, nor would you want to (any attempt to | 182 | turn them off, nor would you want to (any attempt to |
@@ -201,18 +191,11 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
201 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ | 191 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ |
202 | } | 192 | } |
203 | 193 | ||
204 | write_sequnlock(&xtime_lock); | ||
205 | |||
206 | #ifdef CONFIG_X86_LOCAL_APIC | ||
207 | if (using_apic_timer) | ||
208 | smp_send_timer_broadcast_ipi(); | ||
209 | #endif | ||
210 | |||
211 | return IRQ_HANDLED; | 194 | return IRQ_HANDLED; |
212 | } | 195 | } |
213 | 196 | ||
214 | /* not static: needed by APM */ | 197 | /* not static: needed by APM */ |
215 | unsigned long get_cmos_time(void) | 198 | unsigned long read_persistent_clock(void) |
216 | { | 199 | { |
217 | unsigned long retval; | 200 | unsigned long retval; |
218 | unsigned long flags; | 201 | unsigned long flags; |
@@ -225,7 +208,6 @@ unsigned long get_cmos_time(void) | |||
225 | 208 | ||
226 | return retval; | 209 | return retval; |
227 | } | 210 | } |
228 | EXPORT_SYMBOL(get_cmos_time); | ||
229 | 211 | ||
230 | static void sync_cmos_clock(unsigned long dummy); | 212 | static void sync_cmos_clock(unsigned long dummy); |
231 | 213 | ||
@@ -278,114 +260,16 @@ void notify_arch_cmos_timer(void) | |||
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | 260 | mod_timer(&sync_cmos_timer, jiffies + 1); |
279 | } | 261 | } |
280 | 262 | ||
281 | static long clock_cmos_diff; | ||
282 | static unsigned long sleep_start; | ||
283 | |||
284 | static int timer_suspend(struct sys_device *dev, pm_message_t state) | ||
285 | { | ||
286 | /* | ||
287 | * Estimate time zone so that set_time can update the clock | ||
288 | */ | ||
289 | unsigned long ctime = get_cmos_time(); | ||
290 | |||
291 | clock_cmos_diff = -ctime; | ||
292 | clock_cmos_diff += get_seconds(); | ||
293 | sleep_start = ctime; | ||
294 | return 0; | ||
295 | } | ||
296 | |||
297 | static int timer_resume(struct sys_device *dev) | ||
298 | { | ||
299 | unsigned long flags; | ||
300 | unsigned long sec; | ||
301 | unsigned long ctime = get_cmos_time(); | ||
302 | long sleep_length = (ctime - sleep_start) * HZ; | ||
303 | struct timespec ts; | ||
304 | |||
305 | if (sleep_length < 0) { | ||
306 | printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n"); | ||
307 | /* The time after the resume must not be earlier than the time | ||
308 | * before the suspend or some nasty things will happen | ||
309 | */ | ||
310 | sleep_length = 0; | ||
311 | ctime = sleep_start; | ||
312 | } | ||
313 | #ifdef CONFIG_HPET_TIMER | ||
314 | if (is_hpet_enabled()) | ||
315 | hpet_reenable(); | ||
316 | #endif | ||
317 | setup_pit_timer(); | ||
318 | |||
319 | sec = ctime + clock_cmos_diff; | ||
320 | ts.tv_sec = sec; | ||
321 | ts.tv_nsec = 0; | ||
322 | do_settimeofday(&ts); | ||
323 | write_seqlock_irqsave(&xtime_lock, flags); | ||
324 | jiffies_64 += sleep_length; | ||
325 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
326 | touch_softlockup_watchdog(); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | static struct sysdev_class timer_sysclass = { | ||
331 | .resume = timer_resume, | ||
332 | .suspend = timer_suspend, | ||
333 | set_kset_name("timer"), | ||
334 | }; | ||
335 | |||
336 | |||
337 | /* XXX this driverfs stuff should probably go elsewhere later -john */ | ||
338 | static struct sys_device device_timer = { | ||
339 | .id = 0, | ||
340 | .cls = &timer_sysclass, | ||
341 | }; | ||
342 | |||
343 | static int time_init_device(void) | ||
344 | { | ||
345 | int error = sysdev_class_register(&timer_sysclass); | ||
346 | if (!error) | ||
347 | error = sysdev_register(&device_timer); | ||
348 | return error; | ||
349 | } | ||
350 | |||
351 | device_initcall(time_init_device); | ||
352 | |||
353 | #ifdef CONFIG_HPET_TIMER | ||
354 | extern void (*late_time_init)(void); | 263 | extern void (*late_time_init)(void); |
355 | /* Duplicate of time_init() below, with hpet_enable part added */ | 264 | /* Duplicate of time_init() below, with hpet_enable part added */ |
356 | static void __init hpet_time_init(void) | 265 | static void __init hpet_time_init(void) |
357 | { | 266 | { |
358 | struct timespec ts; | 267 | if (!hpet_enable()) |
359 | ts.tv_sec = get_cmos_time(); | 268 | setup_pit_timer(); |
360 | ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); | ||
361 | |||
362 | do_settimeofday(&ts); | ||
363 | |||
364 | if ((hpet_enable() >= 0) && hpet_use_timer) { | ||
365 | printk("Using HPET for base-timer\n"); | ||
366 | } | ||
367 | |||
368 | do_time_init(); | 269 | do_time_init(); |
369 | } | 270 | } |
370 | #endif | ||
371 | 271 | ||
372 | void __init time_init(void) | 272 | void __init time_init(void) |
373 | { | 273 | { |
374 | struct timespec ts; | 274 | late_time_init = hpet_time_init; |
375 | #ifdef CONFIG_HPET_TIMER | ||
376 | if (is_hpet_capable()) { | ||
377 | /* | ||
378 | * HPET initialization needs to do memory-mapped io. So, let | ||
379 | * us do a late initialization after mem_init(). | ||
380 | */ | ||
381 | late_time_init = hpet_time_init; | ||
382 | return; | ||
383 | } | ||
384 | #endif | ||
385 | ts.tv_sec = get_cmos_time(); | ||
386 | ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); | ||
387 | |||
388 | do_settimeofday(&ts); | ||
389 | |||
390 | do_time_init(); | ||
391 | } | 275 | } |
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 46f752a8bbf3..3082a418635c 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -60,12 +60,6 @@ static inline int check_tsc_unstable(void) | |||
60 | return tsc_unstable; | 60 | return tsc_unstable; |
61 | } | 61 | } |
62 | 62 | ||
63 | void mark_tsc_unstable(void) | ||
64 | { | ||
65 | tsc_unstable = 1; | ||
66 | } | ||
67 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
68 | |||
69 | /* Accellerators for sched_clock() | 63 | /* Accellerators for sched_clock() |
70 | * convert from cycles(64bits) => nanoseconds (64bits) | 64 | * convert from cycles(64bits) => nanoseconds (64bits) |
71 | * basic equation: | 65 | * basic equation: |
@@ -222,34 +216,6 @@ out_no_tsc: | |||
222 | 216 | ||
223 | #ifdef CONFIG_CPU_FREQ | 217 | #ifdef CONFIG_CPU_FREQ |
224 | 218 | ||
225 | static unsigned int cpufreq_delayed_issched = 0; | ||
226 | static unsigned int cpufreq_init = 0; | ||
227 | static struct work_struct cpufreq_delayed_get_work; | ||
228 | |||
229 | static void handle_cpufreq_delayed_get(struct work_struct *work) | ||
230 | { | ||
231 | unsigned int cpu; | ||
232 | |||
233 | for_each_online_cpu(cpu) | ||
234 | cpufreq_get(cpu); | ||
235 | |||
236 | cpufreq_delayed_issched = 0; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries | ||
241 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
242 | * at is still correct. | ||
243 | */ | ||
244 | static inline void cpufreq_delayed_get(void) | ||
245 | { | ||
246 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
247 | cpufreq_delayed_issched = 1; | ||
248 | printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); | ||
249 | schedule_work(&cpufreq_delayed_get_work); | ||
250 | } | ||
251 | } | ||
252 | |||
253 | /* | 219 | /* |
254 | * if the CPU frequency is scaled, TSC-based delays will need a different | 220 | * if the CPU frequency is scaled, TSC-based delays will need a different |
255 | * loops_per_jiffy value to function properly. | 221 | * loops_per_jiffy value to function properly. |
@@ -313,17 +279,9 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
313 | 279 | ||
314 | static int __init cpufreq_tsc(void) | 280 | static int __init cpufreq_tsc(void) |
315 | { | 281 | { |
316 | int ret; | 282 | return cpufreq_register_notifier(&time_cpufreq_notifier_block, |
317 | 283 | CPUFREQ_TRANSITION_NOTIFIER); | |
318 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); | ||
319 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
320 | CPUFREQ_TRANSITION_NOTIFIER); | ||
321 | if (!ret) | ||
322 | cpufreq_init = 1; | ||
323 | |||
324 | return ret; | ||
325 | } | 284 | } |
326 | |||
327 | core_initcall(cpufreq_tsc); | 285 | core_initcall(cpufreq_tsc); |
328 | 286 | ||
329 | #endif | 287 | #endif |
@@ -331,7 +289,6 @@ core_initcall(cpufreq_tsc); | |||
331 | /* clock source code */ | 289 | /* clock source code */ |
332 | 290 | ||
333 | static unsigned long current_tsc_khz = 0; | 291 | static unsigned long current_tsc_khz = 0; |
334 | static int tsc_update_callback(void); | ||
335 | 292 | ||
336 | static cycle_t read_tsc(void) | 293 | static cycle_t read_tsc(void) |
337 | { | 294 | { |
@@ -349,37 +306,28 @@ static struct clocksource clocksource_tsc = { | |||
349 | .mask = CLOCKSOURCE_MASK(64), | 306 | .mask = CLOCKSOURCE_MASK(64), |
350 | .mult = 0, /* to be set */ | 307 | .mult = 0, /* to be set */ |
351 | .shift = 22, | 308 | .shift = 22, |
352 | .update_callback = tsc_update_callback, | 309 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
353 | .is_continuous = 1, | 310 | CLOCK_SOURCE_MUST_VERIFY, |
354 | }; | 311 | }; |
355 | 312 | ||
356 | static int tsc_update_callback(void) | 313 | void mark_tsc_unstable(void) |
357 | { | 314 | { |
358 | int change = 0; | 315 | if (!tsc_unstable) { |
359 | 316 | tsc_unstable = 1; | |
360 | /* check to see if we should switch to the safe clocksource: */ | 317 | /* Can be called before registration */ |
361 | if (clocksource_tsc.rating != 0 && check_tsc_unstable()) { | 318 | if (clocksource_tsc.mult) |
362 | clocksource_tsc.rating = 0; | 319 | clocksource_change_rating(&clocksource_tsc, 0); |
363 | clocksource_reselect(); | 320 | else |
364 | change = 1; | 321 | clocksource_tsc.rating = 0; |
365 | } | ||
366 | |||
367 | /* only update if tsc_khz has changed: */ | ||
368 | if (current_tsc_khz != tsc_khz) { | ||
369 | current_tsc_khz = tsc_khz; | ||
370 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | ||
371 | clocksource_tsc.shift); | ||
372 | change = 1; | ||
373 | } | 322 | } |
374 | |||
375 | return change; | ||
376 | } | 323 | } |
324 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
377 | 325 | ||
378 | static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) | 326 | static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) |
379 | { | 327 | { |
380 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | 328 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", |
381 | d->ident); | 329 | d->ident); |
382 | mark_tsc_unstable(); | 330 | tsc_unstable = 1; |
383 | return 0; | 331 | return 0; |
384 | } | 332 | } |
385 | 333 | ||
@@ -396,65 +344,44 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | |||
396 | {} | 344 | {} |
397 | }; | 345 | }; |
398 | 346 | ||
399 | #define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */ | ||
400 | static struct timer_list verify_tsc_freq_timer; | ||
401 | |||
402 | /* XXX - Probably should add locking */ | ||
403 | static void verify_tsc_freq(unsigned long unused) | ||
404 | { | ||
405 | static u64 last_tsc; | ||
406 | static unsigned long last_jiffies; | ||
407 | |||
408 | u64 now_tsc, interval_tsc; | ||
409 | unsigned long now_jiffies, interval_jiffies; | ||
410 | |||
411 | |||
412 | if (check_tsc_unstable()) | ||
413 | return; | ||
414 | |||
415 | rdtscll(now_tsc); | ||
416 | now_jiffies = jiffies; | ||
417 | |||
418 | if (!last_jiffies) { | ||
419 | goto out; | ||
420 | } | ||
421 | |||
422 | interval_jiffies = now_jiffies - last_jiffies; | ||
423 | interval_tsc = now_tsc - last_tsc; | ||
424 | interval_tsc *= HZ; | ||
425 | do_div(interval_tsc, cpu_khz*1000); | ||
426 | |||
427 | if (interval_tsc < (interval_jiffies * 3 / 4)) { | ||
428 | printk("TSC appears to be running slowly. " | ||
429 | "Marking it as unstable\n"); | ||
430 | mark_tsc_unstable(); | ||
431 | return; | ||
432 | } | ||
433 | |||
434 | out: | ||
435 | last_tsc = now_tsc; | ||
436 | last_jiffies = now_jiffies; | ||
437 | /* set us up to go off on the next interval: */ | ||
438 | mod_timer(&verify_tsc_freq_timer, | ||
439 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL)); | ||
440 | } | ||
441 | |||
442 | /* | 347 | /* |
443 | * Make an educated guess if the TSC is trustworthy and synchronized | 348 | * Make an educated guess if the TSC is trustworthy and synchronized |
444 | * over all CPUs. | 349 | * over all CPUs. |
445 | */ | 350 | */ |
446 | static __init int unsynchronized_tsc(void) | 351 | __cpuinit int unsynchronized_tsc(void) |
447 | { | 352 | { |
353 | if (!cpu_has_tsc || tsc_unstable) | ||
354 | return 1; | ||
448 | /* | 355 | /* |
449 | * Intel systems are normally all synchronized. | 356 | * Intel systems are normally all synchronized. |
450 | * Exceptions must mark TSC as unstable: | 357 | * Exceptions must mark TSC as unstable: |
451 | */ | 358 | */ |
452 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | 359 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { |
453 | return 0; | 360 | /* assume multi socket systems are not synchronized: */ |
361 | if (num_possible_cpus() > 1) | ||
362 | tsc_unstable = 1; | ||
363 | } | ||
364 | return tsc_unstable; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | ||
369 | */ | ||
370 | #ifdef CONFIG_MGEODE_LX | ||
371 | /* RTSC counts during suspend */ | ||
372 | #define RTSC_SUSP 0x100 | ||
373 | |||
374 | static void __init check_geode_tsc_reliable(void) | ||
375 | { | ||
376 | unsigned long val; | ||
454 | 377 | ||
455 | /* assume multi socket systems are not synchronized: */ | 378 | rdmsrl(MSR_GEODE_BUSCONT_CONF0, val); |
456 | return num_possible_cpus() > 1; | 379 | if ((val & RTSC_SUSP)) |
380 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
457 | } | 381 | } |
382 | #else | ||
383 | static inline void check_geode_tsc_reliable(void) { } | ||
384 | #endif | ||
458 | 385 | ||
459 | static int __init init_tsc_clocksource(void) | 386 | static int __init init_tsc_clocksource(void) |
460 | { | 387 | { |
@@ -463,20 +390,16 @@ static int __init init_tsc_clocksource(void) | |||
463 | /* check blacklist */ | 390 | /* check blacklist */ |
464 | dmi_check_system(bad_tsc_dmi_table); | 391 | dmi_check_system(bad_tsc_dmi_table); |
465 | 392 | ||
466 | if (unsynchronized_tsc()) /* mark unstable if unsynced */ | 393 | unsynchronized_tsc(); |
467 | mark_tsc_unstable(); | 394 | check_geode_tsc_reliable(); |
468 | current_tsc_khz = tsc_khz; | 395 | current_tsc_khz = tsc_khz; |
469 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | 396 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, |
470 | clocksource_tsc.shift); | 397 | clocksource_tsc.shift); |
471 | /* lower the rating if we already know its unstable: */ | 398 | /* lower the rating if we already know its unstable: */ |
472 | if (check_tsc_unstable()) | 399 | if (check_tsc_unstable()) { |
473 | clocksource_tsc.rating = 0; | 400 | clocksource_tsc.rating = 0; |
474 | 401 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | |
475 | init_timer(&verify_tsc_freq_timer); | 402 | } |
476 | verify_tsc_freq_timer.function = verify_tsc_freq; | ||
477 | verify_tsc_freq_timer.expires = | ||
478 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL); | ||
479 | add_timer(&verify_tsc_freq_timer); | ||
480 | 403 | ||
481 | return clocksource_register(&clocksource_tsc); | 404 | return clocksource_register(&clocksource_tsc); |
482 | } | 405 | } |
diff --git a/arch/i386/kernel/tsc_sync.c b/arch/i386/kernel/tsc_sync.c new file mode 100644 index 000000000000..12424629af87 --- /dev/null +++ b/arch/i386/kernel/tsc_sync.c | |||
@@ -0,0 +1 @@ | |||
#include "../../x86_64/kernel/tsc_sync.c" | |||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c index 2e2d8dbcbd68..76d2adcae5a3 100644 --- a/arch/i386/kernel/vmitime.c +++ b/arch/i386/kernel/vmitime.c | |||
@@ -115,7 +115,7 @@ static struct clocksource clocksource_vmi = { | |||
115 | .mask = CLOCKSOURCE_MASK(64), | 115 | .mask = CLOCKSOURCE_MASK(64), |
116 | .mult = 0, /* to be set */ | 116 | .mult = 0, /* to be set */ |
117 | .shift = 22, | 117 | .shift = 22, |
118 | .is_continuous = 1, | 118 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
119 | }; | 119 | }; |
120 | 120 | ||
121 | 121 | ||
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c index cc2f519b2f7f..c78816210706 100644 --- a/arch/i386/mach-default/setup.c +++ b/arch/i386/mach-default/setup.c | |||
@@ -79,7 +79,12 @@ void __init trap_init_hook(void) | |||
79 | { | 79 | { |
80 | } | 80 | } |
81 | 81 | ||
82 | static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL}; | 82 | static struct irqaction irq0 = { |
83 | .handler = timer_interrupt, | ||
84 | .flags = IRQF_DISABLED | IRQF_NOBALANCING, | ||
85 | .mask = CPU_MASK_NONE, | ||
86 | .name = "timer" | ||
87 | }; | ||
83 | 88 | ||
84 | /** | 89 | /** |
85 | * time_init_hook - do any specific initialisations for the system timer. | 90 | * time_init_hook - do any specific initialisations for the system timer. |
@@ -90,6 +95,7 @@ static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, | |||
90 | **/ | 95 | **/ |
91 | void __init time_init_hook(void) | 96 | void __init time_init_hook(void) |
92 | { | 97 | { |
98 | irq0.mask = cpumask_of_cpu(0); | ||
93 | setup_irq(0, &irq0); | 99 | setup_irq(0, &irq0); |
94 | } | 100 | } |
95 | 101 | ||
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 545fcbc8cea2..e5e56bd498db 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c | |||
@@ -307,7 +307,7 @@ static unsigned int __init calibrate_hpt(void) | |||
307 | struct clocksource clocksource_mips = { | 307 | struct clocksource clocksource_mips = { |
308 | .name = "MIPS", | 308 | .name = "MIPS", |
309 | .mask = 0xffffffff, | 309 | .mask = 0xffffffff, |
310 | .is_continuous = 1, | 310 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
311 | }; | 311 | }; |
312 | 312 | ||
313 | static void __init init_mips_clocksource(void) | 313 | static void __init init_mips_clocksource(void) |
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 39db12890214..5e5c0e4add91 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c | |||
@@ -305,8 +305,6 @@ static int pmac_pic_host_map(struct irq_host *h, unsigned int virq, | |||
305 | level = !!(level_mask[hw >> 5] & (1UL << (hw & 0x1f))); | 305 | level = !!(level_mask[hw >> 5] & (1UL << (hw & 0x1f))); |
306 | if (level) | 306 | if (level) |
307 | desc->status |= IRQ_LEVEL; | 307 | desc->status |= IRQ_LEVEL; |
308 | else | ||
309 | desc->status |= IRQ_DELAYED_DISABLE; | ||
310 | set_irq_chip_and_handler(virq, &pmac_pic, level ? | 308 | set_irq_chip_and_handler(virq, &pmac_pic, level ? |
311 | handle_level_irq : handle_edge_irq); | 309 | handle_level_irq : handle_edge_irq); |
312 | return 0; | 310 | return 0; |
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 3b91f27ab202..ee9fd7b85928 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c | |||
@@ -312,7 +312,7 @@ static struct clocksource clocksource_tod = { | |||
312 | .mask = -1ULL, | 312 | .mask = -1ULL, |
313 | .mult = 1000, | 313 | .mult = 1000, |
314 | .shift = 12, | 314 | .shift = 12, |
315 | .is_continuous = 1, | 315 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
316 | }; | 316 | }; |
317 | 317 | ||
318 | 318 | ||
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 925a65240cfe..b2e1fd8e3571 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c | |||
@@ -97,20 +97,22 @@ static int write_sigio_thread(void *unused) | |||
97 | 97 | ||
98 | static int need_poll(struct pollfds *polls, int n) | 98 | static int need_poll(struct pollfds *polls, int n) |
99 | { | 99 | { |
100 | if(n <= polls->size){ | 100 | struct pollfd *new; |
101 | polls->used = n; | 101 | |
102 | if(n <= polls->size) | ||
102 | return 0; | 103 | return 0; |
103 | } | 104 | |
104 | kfree(polls->poll); | 105 | new = um_kmalloc_atomic(n * sizeof(struct pollfd)); |
105 | polls->poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); | 106 | if(new == NULL){ |
106 | if(polls->poll == NULL){ | ||
107 | printk("need_poll : failed to allocate new pollfds\n"); | 107 | printk("need_poll : failed to allocate new pollfds\n"); |
108 | polls->size = 0; | ||
109 | polls->used = 0; | ||
110 | return -ENOMEM; | 108 | return -ENOMEM; |
111 | } | 109 | } |
110 | |||
111 | memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); | ||
112 | kfree(polls->poll); | ||
113 | |||
114 | polls->poll = new; | ||
112 | polls->size = n; | 115 | polls->size = n; |
113 | polls->used = n; | ||
114 | return 0; | 116 | return 0; |
115 | } | 117 | } |
116 | 118 | ||
@@ -171,15 +173,15 @@ int add_sigio_fd(int fd) | |||
171 | goto out; | 173 | goto out; |
172 | } | 174 | } |
173 | 175 | ||
174 | n = current_poll.used + 1; | 176 | n = current_poll.used; |
175 | err = need_poll(&next_poll, n); | 177 | err = need_poll(&next_poll, n + 1); |
176 | if(err) | 178 | if(err) |
177 | goto out; | 179 | goto out; |
178 | 180 | ||
179 | for(i = 0; i < current_poll.used; i++) | 181 | memcpy(next_poll.poll, current_poll.poll, |
180 | next_poll.poll[i] = current_poll.poll[i]; | 182 | current_poll.used * sizeof(struct pollfd)); |
181 | 183 | next_poll.poll[n] = *p; | |
182 | next_poll.poll[n - 1] = *p; | 184 | next_poll.used = n + 1; |
183 | update_thread(); | 185 | update_thread(); |
184 | out: | 186 | out: |
185 | sigio_unlock(); | 187 | sigio_unlock(); |
@@ -214,6 +216,7 @@ int ignore_sigio_fd(int fd) | |||
214 | if(p->fd != fd) | 216 | if(p->fd != fd) |
215 | next_poll.poll[n++] = *p; | 217 | next_poll.poll[n++] = *p; |
216 | } | 218 | } |
219 | next_poll.used = current_poll.used - 1; | ||
217 | 220 | ||
218 | update_thread(); | 221 | update_thread(); |
219 | out: | 222 | out: |
@@ -331,10 +334,9 @@ void maybe_sigio_broken(int fd, int read) | |||
331 | 334 | ||
332 | sigio_lock(); | 335 | sigio_lock(); |
333 | err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); | 336 | err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); |
334 | if(err){ | 337 | if(err) |
335 | printk("maybe_sigio_broken - failed to add pollfd\n"); | ||
336 | goto out; | 338 | goto out; |
337 | } | 339 | |
338 | all_sigio_fds.poll[all_sigio_fds.used++] = | 340 | all_sigio_fds.poll[all_sigio_fds.used++] = |
339 | ((struct pollfd) { .fd = fd, | 341 | ((struct pollfd) { .fd = fd, |
340 | .events = read ? POLLIN : POLLOUT, | 342 | .events = read ? POLLIN : POLLOUT, |
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 7982cbc3bc94..56eb14c98475 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -24,6 +24,14 @@ config X86 | |||
24 | bool | 24 | bool |
25 | default y | 25 | default y |
26 | 26 | ||
27 | config GENERIC_TIME | ||
28 | bool | ||
29 | default y | ||
30 | |||
31 | config GENERIC_TIME_VSYSCALL | ||
32 | bool | ||
33 | default y | ||
34 | |||
27 | config ZONE_DMA32 | 35 | config ZONE_DMA32 |
28 | bool | 36 | bool |
29 | default y | 37 | default y |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index ae399458024b..bb47e86f3d02 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ |
9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ | 9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ |
10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
11 | pci-dma.o pci-nommu.o alternative.o | 11 | pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o |
12 | 12 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
14 | obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o | 14 | obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o |
@@ -19,7 +19,7 @@ obj-$(CONFIG_ACPI) += acpi/ | |||
19 | obj-$(CONFIG_X86_MSR) += msr.o | 19 | obj-$(CONFIG_X86_MSR) += msr.o |
20 | obj-$(CONFIG_MICROCODE) += microcode.o | 20 | obj-$(CONFIG_MICROCODE) += microcode.o |
21 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 21 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o | 22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o |
23 | obj-y += apic.o nmi.o | 23 | obj-y += apic.o nmi.o |
24 | obj-y += io_apic.o mpparse.o \ | 24 | obj-y += io_apic.o mpparse.o \ |
25 | genapic.o genapic_cluster.o genapic_flat.o | 25 | genapic.o genapic_cluster.o genapic_flat.o |
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 124b2d27b4ac..723417d924c0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/idle.h> | 37 | #include <asm/idle.h> |
38 | #include <asm/proto.h> | 38 | #include <asm/proto.h> |
39 | #include <asm/timex.h> | 39 | #include <asm/timex.h> |
40 | #include <asm/hpet.h> | ||
40 | #include <asm/apic.h> | 41 | #include <asm/apic.h> |
41 | 42 | ||
42 | int apic_mapped; | 43 | int apic_mapped; |
@@ -763,7 +764,7 @@ static void setup_APIC_timer(unsigned int clocks) | |||
763 | local_irq_save(flags); | 764 | local_irq_save(flags); |
764 | 765 | ||
765 | /* wait for irq slice */ | 766 | /* wait for irq slice */ |
766 | if (vxtime.hpet_address && hpet_use_timer) { | 767 | if (hpet_address && hpet_use_timer) { |
767 | int trigger = hpet_readl(HPET_T0_CMP); | 768 | int trigger = hpet_readl(HPET_T0_CMP); |
768 | while (hpet_readl(HPET_COUNTER) >= trigger) | 769 | while (hpet_readl(HPET_COUNTER) >= trigger) |
769 | /* do nothing */ ; | 770 | /* do nothing */ ; |
@@ -785,7 +786,7 @@ static void setup_APIC_timer(unsigned int clocks) | |||
785 | /* Turn off PIT interrupt if we use APIC timer as main timer. | 786 | /* Turn off PIT interrupt if we use APIC timer as main timer. |
786 | Only works with the PM timer right now | 787 | Only works with the PM timer right now |
787 | TBD fix it for HPET too. */ | 788 | TBD fix it for HPET too. */ |
788 | if (vxtime.mode == VXTIME_PMTMR && | 789 | if ((pmtmr_ioport != 0) && |
789 | smp_processor_id() == boot_cpu_id && | 790 | smp_processor_id() == boot_cpu_id && |
790 | apic_runs_main_timer == 1 && | 791 | apic_runs_main_timer == 1 && |
791 | !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { | 792 | !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { |
diff --git a/arch/i386/kernel/time_hpet.c b/arch/x86_64/kernel/hpet.c index 1e4702dfcd01..65a0edd71a17 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/x86_64/kernel/hpet.c | |||
@@ -1,224 +1,138 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/time_hpet.c | ||
3 | * This code largely copied from arch/x86_64/kernel/time.c | ||
4 | * See that file for credits. | ||
5 | * | ||
6 | * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support | ||
7 | */ | ||
8 | |||
9 | #include <linux/errno.h> | ||
10 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
11 | #include <linux/param.h> | 2 | #include <linux/sched.h> |
12 | #include <linux/string.h> | ||
13 | #include <linux/init.h> | 3 | #include <linux/init.h> |
14 | #include <linux/smp.h> | 4 | #include <linux/mc146818rtc.h> |
5 | #include <linux/time.h> | ||
6 | #include <linux/clocksource.h> | ||
7 | #include <linux/ioport.h> | ||
8 | #include <linux/acpi.h> | ||
9 | #include <linux/hpet.h> | ||
10 | #include <asm/pgtable.h> | ||
11 | #include <asm/vsyscall.h> | ||
12 | #include <asm/timex.h> | ||
13 | #include <asm/hpet.h> | ||
15 | 14 | ||
16 | #include <asm/timer.h> | 15 | int nohpet __initdata; |
17 | #include <asm/fixmap.h> | ||
18 | #include <asm/apic.h> | ||
19 | 16 | ||
20 | #include <linux/timex.h> | 17 | unsigned long hpet_address; |
18 | unsigned long hpet_period; /* fsecs / HPET clock */ | ||
19 | unsigned long hpet_tick; /* HPET clocks / interrupt */ | ||
21 | 20 | ||
22 | #include <asm/hpet.h> | 21 | int hpet_use_timer; /* Use counter of hpet for time keeping, |
23 | #include <linux/hpet.h> | 22 | * otherwise PIT |
23 | */ | ||
24 | 24 | ||
25 | static unsigned long hpet_period; /* fsecs / HPET clock */ | 25 | #ifdef CONFIG_HPET |
26 | unsigned long hpet_tick; /* hpet clks count per tick */ | 26 | static __init int late_hpet_init(void) |
27 | unsigned long hpet_address; /* hpet memory map physical address */ | 27 | { |
28 | int hpet_use_timer; | 28 | struct hpet_data hd; |
29 | unsigned int ntimer; | ||
29 | 30 | ||
30 | static int use_hpet; /* can be used for runtime check of hpet */ | 31 | if (!hpet_address) |
31 | static int boot_hpet_disable; /* boottime override for HPET timer */ | 32 | return 0; |
32 | static void __iomem * hpet_virt_address; /* hpet kernel virtual address */ | ||
33 | 33 | ||
34 | #define FSEC_TO_USEC (1000000000UL) | 34 | memset(&hd, 0, sizeof(hd)); |
35 | 35 | ||
36 | int hpet_readl(unsigned long a) | 36 | ntimer = hpet_readl(HPET_ID); |
37 | { | 37 | ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; |
38 | return readl(hpet_virt_address + a); | 38 | ntimer++; |
39 | } | ||
40 | 39 | ||
41 | static void hpet_writel(unsigned long d, unsigned long a) | 40 | /* |
42 | { | 41 | * Register with driver. |
43 | writel(d, hpet_virt_address + a); | 42 | * Timer0 and Timer1 is used by platform. |
44 | } | 43 | */ |
44 | hd.hd_phys_address = hpet_address; | ||
45 | hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); | ||
46 | hd.hd_nirqs = ntimer; | ||
47 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
48 | hpet_reserve_timer(&hd, 0); | ||
49 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
50 | hpet_reserve_timer(&hd, 1); | ||
51 | #endif | ||
52 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
53 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
54 | if (ntimer > 2) { | ||
55 | struct hpet *hpet; | ||
56 | struct hpet_timer *timer; | ||
57 | int i; | ||
58 | |||
59 | hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); | ||
60 | timer = &hpet->hpet_timers[2]; | ||
61 | for (i = 2; i < ntimer; timer++, i++) | ||
62 | hd.hd_irq[i] = (timer->hpet_config & | ||
63 | Tn_INT_ROUTE_CNF_MASK) >> | ||
64 | Tn_INT_ROUTE_CNF_SHIFT; | ||
45 | 65 | ||
46 | #ifdef CONFIG_X86_LOCAL_APIC | 66 | } |
47 | /* | ||
48 | * HPET counters dont wrap around on every tick. They just change the | ||
49 | * comparator value and continue. Next tick can be caught by checking | ||
50 | * for a change in the comparator value. Used in apic.c. | ||
51 | */ | ||
52 | static void __devinit wait_hpet_tick(void) | ||
53 | { | ||
54 | unsigned int start_cmp_val, end_cmp_val; | ||
55 | 67 | ||
56 | start_cmp_val = hpet_readl(HPET_T0_CMP); | 68 | hpet_alloc(&hd); |
57 | do { | 69 | return 0; |
58 | end_cmp_val = hpet_readl(HPET_T0_CMP); | ||
59 | } while (start_cmp_val == end_cmp_val); | ||
60 | } | 70 | } |
71 | fs_initcall(late_hpet_init); | ||
61 | #endif | 72 | #endif |
62 | 73 | ||
63 | static int hpet_timer_stop_set_go(unsigned long tick) | 74 | int hpet_timer_stop_set_go(unsigned long tick) |
64 | { | 75 | { |
65 | unsigned int cfg; | 76 | unsigned int cfg; |
66 | 77 | ||
67 | /* | 78 | /* |
68 | * Stop the timers and reset the main counter. | 79 | * Stop the timers and reset the main counter. |
69 | */ | 80 | */ |
81 | |||
70 | cfg = hpet_readl(HPET_CFG); | 82 | cfg = hpet_readl(HPET_CFG); |
71 | cfg &= ~HPET_CFG_ENABLE; | 83 | cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); |
72 | hpet_writel(cfg, HPET_CFG); | 84 | hpet_writel(cfg, HPET_CFG); |
73 | hpet_writel(0, HPET_COUNTER); | 85 | hpet_writel(0, HPET_COUNTER); |
74 | hpet_writel(0, HPET_COUNTER + 4); | 86 | hpet_writel(0, HPET_COUNTER + 4); |
75 | 87 | ||
88 | /* | ||
89 | * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, | ||
90 | * and period also hpet_tick. | ||
91 | */ | ||
76 | if (hpet_use_timer) { | 92 | if (hpet_use_timer) { |
77 | /* | 93 | hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | |
78 | * Set up timer 0, as periodic with first interrupt to happen at | 94 | HPET_TN_32BIT, HPET_T0_CFG); |
79 | * hpet_tick, and period also hpet_tick. | 95 | hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ |
80 | */ | 96 | hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ |
81 | cfg = hpet_readl(HPET_T0_CFG); | ||
82 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | ||
83 | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
84 | hpet_writel(cfg, HPET_T0_CFG); | ||
85 | |||
86 | /* | ||
87 | * The first write after writing TN_SETVAL to the config register sets | ||
88 | * the counter value, the second write sets the threshold. | ||
89 | */ | ||
90 | hpet_writel(tick, HPET_T0_CMP); | ||
91 | hpet_writel(tick, HPET_T0_CMP); | ||
92 | } | ||
93 | /* | ||
94 | * Go! | ||
95 | */ | ||
96 | cfg = hpet_readl(HPET_CFG); | ||
97 | if (hpet_use_timer) | ||
98 | cfg |= HPET_CFG_LEGACY; | 97 | cfg |= HPET_CFG_LEGACY; |
98 | } | ||
99 | /* | ||
100 | * Go! | ||
101 | */ | ||
102 | |||
99 | cfg |= HPET_CFG_ENABLE; | 103 | cfg |= HPET_CFG_ENABLE; |
100 | hpet_writel(cfg, HPET_CFG); | 104 | hpet_writel(cfg, HPET_CFG); |
101 | 105 | ||
102 | return 0; | 106 | return 0; |
103 | } | 107 | } |
104 | 108 | ||
105 | /* | 109 | int hpet_arch_init(void) |
106 | * Check whether HPET was found by ACPI boot parse. If yes setup HPET | ||
107 | * counter 0 for kernel base timer. | ||
108 | */ | ||
109 | int __init hpet_enable(void) | ||
110 | { | 110 | { |
111 | unsigned int id; | 111 | unsigned int id; |
112 | unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */ | ||
113 | unsigned long hpet_tick_rem; | ||
114 | 112 | ||
115 | if (boot_hpet_disable) | 113 | if (!hpet_address) |
116 | return -1; | 114 | return -1; |
115 | set_fixmap_nocache(FIX_HPET_BASE, hpet_address); | ||
116 | __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); | ||
117 | |||
118 | /* | ||
119 | * Read the period, compute tick and quotient. | ||
120 | */ | ||
117 | 121 | ||
118 | if (!hpet_address) { | ||
119 | return -1; | ||
120 | } | ||
121 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
122 | /* | ||
123 | * Read the period, compute tick and quotient. | ||
124 | */ | ||
125 | id = hpet_readl(HPET_ID); | 122 | id = hpet_readl(HPET_ID); |
126 | 123 | ||
127 | /* | 124 | if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) |
128 | * We are checking for value '1' or more in number field if | ||
129 | * CONFIG_HPET_EMULATE_RTC is set because we will need an | ||
130 | * additional timer for RTC emulation. | ||
131 | * However, we can do with one timer otherwise using the | ||
132 | * the single HPET timer for system time. | ||
133 | */ | ||
134 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
135 | if (!(id & HPET_ID_NUMBER)) { | ||
136 | iounmap(hpet_virt_address); | ||
137 | hpet_virt_address = NULL; | ||
138 | return -1; | 125 | return -1; |
139 | } | ||
140 | #endif | ||
141 | |||
142 | 126 | ||
143 | hpet_period = hpet_readl(HPET_PERIOD); | 127 | hpet_period = hpet_readl(HPET_PERIOD); |
144 | if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) { | 128 | if (hpet_period < 100000 || hpet_period > 100000000) |
145 | iounmap(hpet_virt_address); | ||
146 | hpet_virt_address = NULL; | ||
147 | return -1; | 129 | return -1; |
148 | } | ||
149 | 130 | ||
150 | /* | 131 | hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; |
151 | * 64 bit math | ||
152 | * First changing tick into fsec | ||
153 | * Then 64 bit div to find number of hpet clk per tick | ||
154 | */ | ||
155 | ASM_MUL64_REG(tick_fsec_low, tick_fsec_high, | ||
156 | KERNEL_TICK_USEC, FSEC_TO_USEC); | ||
157 | ASM_DIV64_REG(hpet_tick, hpet_tick_rem, | ||
158 | hpet_period, tick_fsec_low, tick_fsec_high); | ||
159 | |||
160 | if (hpet_tick_rem > (hpet_period >> 1)) | ||
161 | hpet_tick++; /* rounding the result */ | ||
162 | |||
163 | hpet_use_timer = id & HPET_ID_LEGSUP; | ||
164 | |||
165 | if (hpet_timer_stop_set_go(hpet_tick)) { | ||
166 | iounmap(hpet_virt_address); | ||
167 | hpet_virt_address = NULL; | ||
168 | return -1; | ||
169 | } | ||
170 | 132 | ||
171 | use_hpet = 1; | 133 | hpet_use_timer = (id & HPET_ID_LEGSUP); |
172 | 134 | ||
173 | #ifdef CONFIG_HPET | 135 | return hpet_timer_stop_set_go(hpet_tick); |
174 | { | ||
175 | struct hpet_data hd; | ||
176 | unsigned int ntimer; | ||
177 | |||
178 | memset(&hd, 0, sizeof (hd)); | ||
179 | |||
180 | ntimer = hpet_readl(HPET_ID); | ||
181 | ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; | ||
182 | ntimer++; | ||
183 | |||
184 | /* | ||
185 | * Register with driver. | ||
186 | * Timer0 and Timer1 is used by platform. | ||
187 | */ | ||
188 | hd.hd_phys_address = hpet_address; | ||
189 | hd.hd_address = hpet_virt_address; | ||
190 | hd.hd_nirqs = ntimer; | ||
191 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
192 | hpet_reserve_timer(&hd, 0); | ||
193 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
194 | hpet_reserve_timer(&hd, 1); | ||
195 | #endif | ||
196 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
197 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
198 | if (ntimer > 2) { | ||
199 | struct hpet __iomem *hpet; | ||
200 | struct hpet_timer __iomem *timer; | ||
201 | int i; | ||
202 | |||
203 | hpet = hpet_virt_address; | ||
204 | |||
205 | for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; | ||
206 | timer++, i++) | ||
207 | hd.hd_irq[i] = (timer->hpet_config & | ||
208 | Tn_INT_ROUTE_CNF_MASK) >> | ||
209 | Tn_INT_ROUTE_CNF_SHIFT; | ||
210 | |||
211 | } | ||
212 | |||
213 | hpet_alloc(&hd); | ||
214 | } | ||
215 | #endif | ||
216 | |||
217 | #ifdef CONFIG_X86_LOCAL_APIC | ||
218 | if (hpet_use_timer) | ||
219 | wait_timer_tick = wait_hpet_tick; | ||
220 | #endif | ||
221 | return 0; | ||
222 | } | 136 | } |
223 | 137 | ||
224 | int hpet_reenable(void) | 138 | int hpet_reenable(void) |
@@ -226,28 +140,51 @@ int hpet_reenable(void) | |||
226 | return hpet_timer_stop_set_go(hpet_tick); | 140 | return hpet_timer_stop_set_go(hpet_tick); |
227 | } | 141 | } |
228 | 142 | ||
229 | int is_hpet_enabled(void) | 143 | /* |
230 | { | 144 | * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing |
231 | return use_hpet; | 145 | * it to the HPET timer of known frequency. |
232 | } | 146 | */ |
233 | 147 | ||
234 | int is_hpet_capable(void) | 148 | #define TICK_COUNT 100000000 |
149 | #define TICK_MIN 5000 | ||
150 | |||
151 | /* | ||
152 | * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none | ||
153 | * occurs between the reads of the hpet & TSC. | ||
154 | */ | ||
155 | static void __init read_hpet_tsc(int *hpet, int *tsc) | ||
235 | { | 156 | { |
236 | if (!boot_hpet_disable && hpet_address) | 157 | int tsc1, tsc2, hpet1; |
237 | return 1; | 158 | |
238 | return 0; | 159 | do { |
160 | tsc1 = get_cycles_sync(); | ||
161 | hpet1 = hpet_readl(HPET_COUNTER); | ||
162 | tsc2 = get_cycles_sync(); | ||
163 | } while (tsc2 - tsc1 > TICK_MIN); | ||
164 | *hpet = hpet1; | ||
165 | *tsc = tsc2; | ||
239 | } | 166 | } |
240 | 167 | ||
241 | static int __init hpet_setup(char* str) | 168 | unsigned int __init hpet_calibrate_tsc(void) |
242 | { | 169 | { |
243 | if (str) { | 170 | int tsc_start, hpet_start; |
244 | if (!strncmp("disable", str, 7)) | 171 | int tsc_now, hpet_now; |
245 | boot_hpet_disable = 1; | 172 | unsigned long flags; |
246 | } | 173 | |
247 | return 1; | 174 | local_irq_save(flags); |
248 | } | 175 | |
176 | read_hpet_tsc(&hpet_start, &tsc_start); | ||
249 | 177 | ||
250 | __setup("hpet=", hpet_setup); | 178 | do { |
179 | local_irq_disable(); | ||
180 | read_hpet_tsc(&hpet_now, &tsc_now); | ||
181 | local_irq_restore(flags); | ||
182 | } while ((tsc_now - tsc_start) < TICK_COUNT && | ||
183 | (hpet_now - hpet_start) < TICK_COUNT); | ||
184 | |||
185 | return (tsc_now - tsc_start) * 1000000000L | ||
186 | / ((hpet_now - hpet_start) * hpet_period / 1000); | ||
187 | } | ||
251 | 188 | ||
252 | #ifdef CONFIG_HPET_EMULATE_RTC | 189 | #ifdef CONFIG_HPET_EMULATE_RTC |
253 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET | 190 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET |
@@ -264,7 +201,6 @@ __setup("hpet=", hpet_setup); | |||
264 | * For (3), we use interrupts at 64Hz or user specified periodic | 201 | * For (3), we use interrupts at 64Hz or user specified periodic |
265 | * frequency, whichever is higher. | 202 | * frequency, whichever is higher. |
266 | */ | 203 | */ |
267 | #include <linux/mc146818rtc.h> | ||
268 | #include <linux/rtc.h> | 204 | #include <linux/rtc.h> |
269 | 205 | ||
270 | #define DEFAULT_RTC_INT_FREQ 64 | 206 | #define DEFAULT_RTC_INT_FREQ 64 |
@@ -283,6 +219,11 @@ static unsigned long PIE_count; | |||
283 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ | 219 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ |
284 | static unsigned int hpet_t1_cmp; /* cached comparator register */ | 220 | static unsigned int hpet_t1_cmp; /* cached comparator register */ |
285 | 221 | ||
222 | int is_hpet_enabled(void) | ||
223 | { | ||
224 | return hpet_address != 0; | ||
225 | } | ||
226 | |||
286 | /* | 227 | /* |
287 | * Timer 1 for RTC, we do not use periodic interrupt feature, | 228 | * Timer 1 for RTC, we do not use periodic interrupt feature, |
288 | * even if HPET supports periodic interrupts on Timer 1. | 229 | * even if HPET supports periodic interrupts on Timer 1. |
@@ -367,8 +308,9 @@ static void hpet_rtc_timer_reinit(void) | |||
367 | if (PIE_on) | 308 | if (PIE_on) |
368 | PIE_count += lost_ints; | 309 | PIE_count += lost_ints; |
369 | 310 | ||
370 | printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", | 311 | if (printk_ratelimit()) |
371 | hpet_rtc_int_freq); | 312 | printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", |
313 | hpet_rtc_int_freq); | ||
372 | } | 314 | } |
373 | } | 315 | } |
374 | 316 | ||
@@ -450,7 +392,7 @@ int hpet_rtc_dropped_irq(void) | |||
450 | return 1; | 392 | return 1; |
451 | } | 393 | } |
452 | 394 | ||
453 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | 395 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) |
454 | { | 396 | { |
455 | struct rtc_time curr_time; | 397 | struct rtc_time curr_time; |
456 | unsigned long rtc_int_flag = 0; | 398 | unsigned long rtc_int_flag = 0; |
@@ -495,3 +437,75 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
495 | } | 437 | } |
496 | #endif | 438 | #endif |
497 | 439 | ||
440 | static int __init nohpet_setup(char *s) | ||
441 | { | ||
442 | nohpet = 1; | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | __setup("nohpet", nohpet_setup); | ||
447 | |||
448 | #define HPET_MASK 0xFFFFFFFF | ||
449 | #define HPET_SHIFT 22 | ||
450 | |||
451 | /* FSEC = 10^-15 NSEC = 10^-9 */ | ||
452 | #define FSEC_PER_NSEC 1000000 | ||
453 | |||
454 | static void *hpet_ptr; | ||
455 | |||
456 | static cycle_t read_hpet(void) | ||
457 | { | ||
458 | return (cycle_t)readl(hpet_ptr); | ||
459 | } | ||
460 | |||
461 | static cycle_t __vsyscall_fn vread_hpet(void) | ||
462 | { | ||
463 | return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); | ||
464 | } | ||
465 | |||
466 | struct clocksource clocksource_hpet = { | ||
467 | .name = "hpet", | ||
468 | .rating = 250, | ||
469 | .read = read_hpet, | ||
470 | .mask = (cycle_t)HPET_MASK, | ||
471 | .mult = 0, /* set below */ | ||
472 | .shift = HPET_SHIFT, | ||
473 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
474 | .vread = vread_hpet, | ||
475 | }; | ||
476 | |||
477 | static int __init init_hpet_clocksource(void) | ||
478 | { | ||
479 | unsigned long hpet_period; | ||
480 | void __iomem *hpet_base; | ||
481 | u64 tmp; | ||
482 | |||
483 | if (!hpet_address) | ||
484 | return -ENODEV; | ||
485 | |||
486 | /* calculate the hpet address: */ | ||
487 | hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
488 | hpet_ptr = hpet_base + HPET_COUNTER; | ||
489 | |||
490 | /* calculate the frequency: */ | ||
491 | hpet_period = readl(hpet_base + HPET_PERIOD); | ||
492 | |||
493 | /* | ||
494 | * hpet period is in femto seconds per cycle | ||
495 | * so we need to convert this to ns/cyc units | ||
496 | * aproximated by mult/2^shift | ||
497 | * | ||
498 | * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift | ||
499 | * fsec/cyc * 1ns/1000000fsec * 2^shift = mult | ||
500 | * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult | ||
501 | * (fsec/cyc << shift)/1000000 = mult | ||
502 | * (hpet_period << shift)/FSEC_PER_NSEC = mult | ||
503 | */ | ||
504 | tmp = (u64)hpet_period << HPET_SHIFT; | ||
505 | do_div(tmp, FSEC_PER_NSEC); | ||
506 | clocksource_hpet.mult = (u32)tmp; | ||
507 | |||
508 | return clocksource_register(&clocksource_hpet); | ||
509 | } | ||
510 | |||
511 | module_init(init_hpet_clocksource); | ||
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index d73c79e821f1..01e2cf0bdeb1 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -103,6 +103,7 @@ static void mask_and_ack_8259A(unsigned int); | |||
103 | static struct irq_chip i8259A_chip = { | 103 | static struct irq_chip i8259A_chip = { |
104 | .name = "XT-PIC", | 104 | .name = "XT-PIC", |
105 | .mask = disable_8259A_irq, | 105 | .mask = disable_8259A_irq, |
106 | .disable = disable_8259A_irq, | ||
106 | .unmask = enable_8259A_irq, | 107 | .unmask = enable_8259A_irq, |
107 | .mask_ack = mask_and_ack_8259A, | 108 | .mask_ack = mask_and_ack_8259A, |
108 | }; | 109 | }; |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 566e64d966c4..950682f35766 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -810,11 +810,9 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) | |||
810 | trigger == IOAPIC_LEVEL) | 810 | trigger == IOAPIC_LEVEL) |
811 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 811 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
812 | handle_fasteoi_irq, "fasteoi"); | 812 | handle_fasteoi_irq, "fasteoi"); |
813 | else { | 813 | else |
814 | irq_desc[irq].status |= IRQ_DELAYED_DISABLE; | ||
815 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 814 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
816 | handle_edge_irq, "edge"); | 815 | handle_edge_irq, "edge"); |
817 | } | ||
818 | } | 816 | } |
819 | static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) | 817 | static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) |
820 | { | 818 | { |
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c index 7554458dc9cb..ae8f91214f15 100644 --- a/arch/x86_64/kernel/pmtimer.c +++ b/arch/x86_64/kernel/pmtimer.c | |||
@@ -24,15 +24,6 @@ | |||
24 | #include <asm/msr.h> | 24 | #include <asm/msr.h> |
25 | #include <asm/vsyscall.h> | 25 | #include <asm/vsyscall.h> |
26 | 26 | ||
27 | /* The I/O port the PMTMR resides at. | ||
28 | * The location is detected during setup_arch(), | ||
29 | * in arch/i386/kernel/acpi/boot.c */ | ||
30 | u32 pmtmr_ioport __read_mostly; | ||
31 | |||
32 | /* value of the Power timer at last timer interrupt */ | ||
33 | static u32 offset_delay; | ||
34 | static u32 last_pmtmr_tick; | ||
35 | |||
36 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | 27 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ |
37 | 28 | ||
38 | static inline u32 cyc2us(u32 cycles) | 29 | static inline u32 cyc2us(u32 cycles) |
@@ -48,38 +39,6 @@ static inline u32 cyc2us(u32 cycles) | |||
48 | return (cycles >> 10); | 39 | return (cycles >> 10); |
49 | } | 40 | } |
50 | 41 | ||
51 | int pmtimer_mark_offset(void) | ||
52 | { | ||
53 | static int first_run = 1; | ||
54 | unsigned long tsc; | ||
55 | u32 lost; | ||
56 | |||
57 | u32 tick = inl(pmtmr_ioport); | ||
58 | u32 delta; | ||
59 | |||
60 | delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); | ||
61 | |||
62 | last_pmtmr_tick = tick; | ||
63 | monotonic_base += delta * NSEC_PER_USEC; | ||
64 | |||
65 | delta += offset_delay; | ||
66 | |||
67 | lost = delta / (USEC_PER_SEC / HZ); | ||
68 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
69 | |||
70 | rdtscll(tsc); | ||
71 | vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000; | ||
72 | |||
73 | /* don't calculate delay for first run, | ||
74 | or if we've got less then a tick */ | ||
75 | if (first_run || (lost < 1)) { | ||
76 | first_run = 0; | ||
77 | offset_delay = 0; | ||
78 | } | ||
79 | |||
80 | return lost - 1; | ||
81 | } | ||
82 | |||
83 | static unsigned pmtimer_wait_tick(void) | 42 | static unsigned pmtimer_wait_tick(void) |
84 | { | 43 | { |
85 | u32 a, b; | 44 | u32 a, b; |
@@ -101,23 +60,6 @@ void pmtimer_wait(unsigned us) | |||
101 | } while (cyc2us(b - a) < us); | 60 | } while (cyc2us(b - a) < us); |
102 | } | 61 | } |
103 | 62 | ||
104 | void pmtimer_resume(void) | ||
105 | { | ||
106 | last_pmtmr_tick = inl(pmtmr_ioport); | ||
107 | } | ||
108 | |||
109 | unsigned int do_gettimeoffset_pm(void) | ||
110 | { | ||
111 | u32 now, offset, delta = 0; | ||
112 | |||
113 | offset = last_pmtmr_tick; | ||
114 | now = inl(pmtmr_ioport); | ||
115 | delta = (now - offset) & ACPI_PM_MASK; | ||
116 | |||
117 | return offset_delay + cyc2us(delta); | ||
118 | } | ||
119 | |||
120 | |||
121 | static int __init nopmtimer_setup(char *s) | 63 | static int __init nopmtimer_setup(char *s) |
122 | { | 64 | { |
123 | pmtmr_ioport = 0; | 65 | pmtmr_ioport = 0; |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index daf19332f0dd..35443729aad8 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -148,217 +148,6 @@ static void __cpuinit smp_store_cpu_info(int id) | |||
148 | print_cpu_info(c); | 148 | print_cpu_info(c); |
149 | } | 149 | } |
150 | 150 | ||
151 | /* | ||
152 | * New Funky TSC sync algorithm borrowed from IA64. | ||
153 | * Main advantage is that it doesn't reset the TSCs fully and | ||
154 | * in general looks more robust and it works better than my earlier | ||
155 | * attempts. I believe it was written by David Mosberger. Some minor | ||
156 | * adjustments for x86-64 by me -AK | ||
157 | * | ||
158 | * Original comment reproduced below. | ||
159 | * | ||
160 | * Synchronize TSC of the current (slave) CPU with the TSC of the | ||
161 | * MASTER CPU (normally the time-keeper CPU). We use a closed loop to | ||
162 | * eliminate the possibility of unaccounted-for errors (such as | ||
163 | * getting a machine check in the middle of a calibration step). The | ||
164 | * basic idea is for the slave to ask the master what itc value it has | ||
165 | * and to read its own itc before and after the master responds. Each | ||
166 | * iteration gives us three timestamps: | ||
167 | * | ||
168 | * slave master | ||
169 | * | ||
170 | * t0 ---\ | ||
171 | * ---\ | ||
172 | * ---> | ||
173 | * tm | ||
174 | * /--- | ||
175 | * /--- | ||
176 | * t1 <--- | ||
177 | * | ||
178 | * | ||
179 | * The goal is to adjust the slave's TSC such that tm falls exactly | ||
180 | * half-way between t0 and t1. If we achieve this, the clocks are | ||
181 | * synchronized provided the interconnect between the slave and the | ||
182 | * master is symmetric. Even if the interconnect were asymmetric, we | ||
183 | * would still know that the synchronization error is smaller than the | ||
184 | * roundtrip latency (t0 - t1). | ||
185 | * | ||
186 | * When the interconnect is quiet and symmetric, this lets us | ||
187 | * synchronize the TSC to within one or two cycles. However, we can | ||
188 | * only *guarantee* that the synchronization is accurate to within a | ||
189 | * round-trip time, which is typically in the range of several hundred | ||
190 | * cycles (e.g., ~500 cycles). In practice, this means that the TSCs | ||
191 | * are usually almost perfectly synchronized, but we shouldn't assume | ||
192 | * that the accuracy is much better than half a micro second or so. | ||
193 | * | ||
194 | * [there are other errors like the latency of RDTSC and of the | ||
195 | * WRMSR. These can also account to hundreds of cycles. So it's | ||
196 | * probably worse. It claims 153 cycles error on a dual Opteron, | ||
197 | * but I suspect the numbers are actually somewhat worse -AK] | ||
198 | */ | ||
199 | |||
200 | #define MASTER 0 | ||
201 | #define SLAVE (SMP_CACHE_BYTES/8) | ||
202 | |||
203 | /* Intentionally don't use cpu_relax() while TSC synchronization | ||
204 | because we don't want to go into funky power save modi or cause | ||
205 | hypervisors to schedule us away. Going to sleep would likely affect | ||
206 | latency and low latency is the primary objective here. -AK */ | ||
207 | #define no_cpu_relax() barrier() | ||
208 | |||
209 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); | ||
210 | static volatile __cpuinitdata unsigned long go[SLAVE + 1]; | ||
211 | static int notscsync __cpuinitdata; | ||
212 | |||
213 | #undef DEBUG_TSC_SYNC | ||
214 | |||
215 | #define NUM_ROUNDS 64 /* magic value */ | ||
216 | #define NUM_ITERS 5 /* likewise */ | ||
217 | |||
218 | /* Callback on boot CPU */ | ||
219 | static __cpuinit void sync_master(void *arg) | ||
220 | { | ||
221 | unsigned long flags, i; | ||
222 | |||
223 | go[MASTER] = 0; | ||
224 | |||
225 | local_irq_save(flags); | ||
226 | { | ||
227 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { | ||
228 | while (!go[MASTER]) | ||
229 | no_cpu_relax(); | ||
230 | go[MASTER] = 0; | ||
231 | rdtscll(go[SLAVE]); | ||
232 | } | ||
233 | } | ||
234 | local_irq_restore(flags); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Return the number of cycles by which our tsc differs from the tsc | ||
239 | * on the master (time-keeper) CPU. A positive number indicates our | ||
240 | * tsc is ahead of the master, negative that it is behind. | ||
241 | */ | ||
242 | static inline long | ||
243 | get_delta(long *rt, long *master) | ||
244 | { | ||
245 | unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; | ||
246 | unsigned long tcenter, t0, t1, tm; | ||
247 | int i; | ||
248 | |||
249 | for (i = 0; i < NUM_ITERS; ++i) { | ||
250 | rdtscll(t0); | ||
251 | go[MASTER] = 1; | ||
252 | while (!(tm = go[SLAVE])) | ||
253 | no_cpu_relax(); | ||
254 | go[SLAVE] = 0; | ||
255 | rdtscll(t1); | ||
256 | |||
257 | if (t1 - t0 < best_t1 - best_t0) | ||
258 | best_t0 = t0, best_t1 = t1, best_tm = tm; | ||
259 | } | ||
260 | |||
261 | *rt = best_t1 - best_t0; | ||
262 | *master = best_tm - best_t0; | ||
263 | |||
264 | /* average best_t0 and best_t1 without overflow: */ | ||
265 | tcenter = (best_t0/2 + best_t1/2); | ||
266 | if (best_t0 % 2 + best_t1 % 2 == 2) | ||
267 | ++tcenter; | ||
268 | return tcenter - best_tm; | ||
269 | } | ||
270 | |||
271 | static __cpuinit void sync_tsc(unsigned int master) | ||
272 | { | ||
273 | int i, done = 0; | ||
274 | long delta, adj, adjust_latency = 0; | ||
275 | unsigned long flags, rt, master_time_stamp, bound; | ||
276 | #ifdef DEBUG_TSC_SYNC | ||
277 | static struct syncdebug { | ||
278 | long rt; /* roundtrip time */ | ||
279 | long master; /* master's timestamp */ | ||
280 | long diff; /* difference between midpoint and master's timestamp */ | ||
281 | long lat; /* estimate of tsc adjustment latency */ | ||
282 | } t[NUM_ROUNDS] __cpuinitdata; | ||
283 | #endif | ||
284 | |||
285 | printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", | ||
286 | smp_processor_id(), master); | ||
287 | |||
288 | go[MASTER] = 1; | ||
289 | |||
290 | /* It is dangerous to broadcast IPI as cpus are coming up, | ||
291 | * as they may not be ready to accept them. So since | ||
292 | * we only need to send the ipi to the boot cpu direct | ||
293 | * the message, and avoid the race. | ||
294 | */ | ||
295 | smp_call_function_single(master, sync_master, NULL, 1, 0); | ||
296 | |||
297 | while (go[MASTER]) /* wait for master to be ready */ | ||
298 | no_cpu_relax(); | ||
299 | |||
300 | spin_lock_irqsave(&tsc_sync_lock, flags); | ||
301 | { | ||
302 | for (i = 0; i < NUM_ROUNDS; ++i) { | ||
303 | delta = get_delta(&rt, &master_time_stamp); | ||
304 | if (delta == 0) { | ||
305 | done = 1; /* let's lock on to this... */ | ||
306 | bound = rt; | ||
307 | } | ||
308 | |||
309 | if (!done) { | ||
310 | unsigned long t; | ||
311 | if (i > 0) { | ||
312 | adjust_latency += -delta; | ||
313 | adj = -delta + adjust_latency/4; | ||
314 | } else | ||
315 | adj = -delta; | ||
316 | |||
317 | rdtscll(t); | ||
318 | wrmsrl(MSR_IA32_TSC, t + adj); | ||
319 | } | ||
320 | #ifdef DEBUG_TSC_SYNC | ||
321 | t[i].rt = rt; | ||
322 | t[i].master = master_time_stamp; | ||
323 | t[i].diff = delta; | ||
324 | t[i].lat = adjust_latency/4; | ||
325 | #endif | ||
326 | } | ||
327 | } | ||
328 | spin_unlock_irqrestore(&tsc_sync_lock, flags); | ||
329 | |||
330 | #ifdef DEBUG_TSC_SYNC | ||
331 | for (i = 0; i < NUM_ROUNDS; ++i) | ||
332 | printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", | ||
333 | t[i].rt, t[i].master, t[i].diff, t[i].lat); | ||
334 | #endif | ||
335 | |||
336 | printk(KERN_INFO | ||
337 | "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " | ||
338 | "maxerr %lu cycles)\n", | ||
339 | smp_processor_id(), master, delta, rt); | ||
340 | } | ||
341 | |||
342 | static void __cpuinit tsc_sync_wait(void) | ||
343 | { | ||
344 | /* | ||
345 | * When the CPU has synchronized TSCs assume the BIOS | ||
346 | * or the hardware already synced. Otherwise we could | ||
347 | * mess up a possible perfect synchronization with a | ||
348 | * not-quite-perfect algorithm. | ||
349 | */ | ||
350 | if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) | ||
351 | return; | ||
352 | sync_tsc(0); | ||
353 | } | ||
354 | |||
355 | static __init int notscsync_setup(char *s) | ||
356 | { | ||
357 | notscsync = 1; | ||
358 | return 1; | ||
359 | } | ||
360 | __setup("notscsync", notscsync_setup); | ||
361 | |||
362 | static atomic_t init_deasserted __cpuinitdata; | 151 | static atomic_t init_deasserted __cpuinitdata; |
363 | 152 | ||
364 | /* | 153 | /* |
@@ -546,6 +335,11 @@ void __cpuinit start_secondary(void) | |||
546 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | 335 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ |
547 | barrier(); | 336 | barrier(); |
548 | 337 | ||
338 | /* | ||
339 | * Check TSC sync first: | ||
340 | */ | ||
341 | check_tsc_sync_target(); | ||
342 | |||
549 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); | 343 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); |
550 | setup_secondary_APIC_clock(); | 344 | setup_secondary_APIC_clock(); |
551 | 345 | ||
@@ -565,14 +359,6 @@ void __cpuinit start_secondary(void) | |||
565 | */ | 359 | */ |
566 | set_cpu_sibling_map(smp_processor_id()); | 360 | set_cpu_sibling_map(smp_processor_id()); |
567 | 361 | ||
568 | /* | ||
569 | * Wait for TSC sync to not schedule things before. | ||
570 | * We still process interrupts, which could see an inconsistent | ||
571 | * time in that window unfortunately. | ||
572 | * Do this here because TSC sync has global unprotected state. | ||
573 | */ | ||
574 | tsc_sync_wait(); | ||
575 | |||
576 | /* | 362 | /* |
577 | * We need to hold call_lock, so there is no inconsistency | 363 | * We need to hold call_lock, so there is no inconsistency |
578 | * between the time smp_call_function() determines number of | 364 | * between the time smp_call_function() determines number of |
@@ -592,6 +378,7 @@ void __cpuinit start_secondary(void) | |||
592 | cpu_set(smp_processor_id(), cpu_online_map); | 378 | cpu_set(smp_processor_id(), cpu_online_map); |
593 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 379 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
594 | spin_unlock(&vector_lock); | 380 | spin_unlock(&vector_lock); |
381 | |||
595 | unlock_ipi_call_lock(); | 382 | unlock_ipi_call_lock(); |
596 | 383 | ||
597 | cpu_idle(); | 384 | cpu_idle(); |
@@ -1168,6 +955,11 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1168 | /* Unleash the CPU! */ | 955 | /* Unleash the CPU! */ |
1169 | Dprintk("waiting for cpu %d\n", cpu); | 956 | Dprintk("waiting for cpu %d\n", cpu); |
1170 | 957 | ||
958 | /* | ||
959 | * Make sure and check TSC sync: | ||
960 | */ | ||
961 | check_tsc_sync_source(cpu); | ||
962 | |||
1171 | while (!cpu_isset(cpu, cpu_online_map)) | 963 | while (!cpu_isset(cpu, cpu_online_map)) |
1172 | cpu_relax(); | 964 | cpu_relax(); |
1173 | 965 | ||
@@ -1190,7 +982,6 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1190 | smp_cleanup_boot(); | 982 | smp_cleanup_boot(); |
1191 | setup_ioapic_dest(); | 983 | setup_ioapic_dest(); |
1192 | check_nmi_watchdog(); | 984 | check_nmi_watchdog(); |
1193 | time_init_gtod(); | ||
1194 | } | 985 | } |
1195 | 986 | ||
1196 | #ifdef CONFIG_HOTPLUG_CPU | 987 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3cc6886f1fb7..a87c51705620 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -42,10 +42,8 @@ | |||
42 | #include <linux/cpufreq.h> | 42 | #include <linux/cpufreq.h> |
43 | #include <linux/hpet.h> | 43 | #include <linux/hpet.h> |
44 | #include <asm/apic.h> | 44 | #include <asm/apic.h> |
45 | #include <asm/hpet.h> | ||
45 | 46 | ||
46 | #ifdef CONFIG_CPU_FREQ | ||
47 | static void cpufreq_delayed_get(void); | ||
48 | #endif | ||
49 | extern void i8254_timer_resume(void); | 47 | extern void i8254_timer_resume(void); |
50 | extern int using_apic_timer; | 48 | extern int using_apic_timer; |
51 | 49 | ||
@@ -55,128 +53,7 @@ DEFINE_SPINLOCK(rtc_lock); | |||
55 | EXPORT_SYMBOL(rtc_lock); | 53 | EXPORT_SYMBOL(rtc_lock); |
56 | DEFINE_SPINLOCK(i8253_lock); | 54 | DEFINE_SPINLOCK(i8253_lock); |
57 | 55 | ||
58 | int nohpet __initdata = 0; | ||
59 | static int notsc __initdata = 0; | ||
60 | |||
61 | #define USEC_PER_TICK (USEC_PER_SEC / HZ) | ||
62 | #define NSEC_PER_TICK (NSEC_PER_SEC / HZ) | ||
63 | #define FSEC_PER_TICK (FSEC_PER_SEC / HZ) | ||
64 | |||
65 | #define NS_SCALE 10 /* 2^10, carefully chosen */ | ||
66 | #define US_SCALE 32 /* 2^32, arbitralrily chosen */ | ||
67 | |||
68 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | ||
69 | EXPORT_SYMBOL(cpu_khz); | ||
70 | static unsigned long hpet_period; /* fsecs / HPET clock */ | ||
71 | unsigned long hpet_tick; /* HPET clocks / interrupt */ | ||
72 | int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ | ||
73 | unsigned long vxtime_hz = PIT_TICK_RATE; | ||
74 | int report_lost_ticks; /* command line option */ | ||
75 | unsigned long long monotonic_base; | ||
76 | |||
77 | struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ | ||
78 | |||
79 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 56 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
80 | struct timespec __xtime __section_xtime; | ||
81 | struct timezone __sys_tz __section_sys_tz; | ||
82 | |||
83 | /* | ||
84 | * do_gettimeoffset() returns microseconds since last timer interrupt was | ||
85 | * triggered by hardware. A memory read of HPET is slower than a register read | ||
86 | * of TSC, but much more reliable. It's also synchronized to the timer | ||
87 | * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a | ||
88 | * timer interrupt has happened already, but vxtime.trigger wasn't updated yet. | ||
89 | * This is not a problem, because jiffies hasn't updated either. They are bound | ||
90 | * together by xtime_lock. | ||
91 | */ | ||
92 | |||
93 | static inline unsigned int do_gettimeoffset_tsc(void) | ||
94 | { | ||
95 | unsigned long t; | ||
96 | unsigned long x; | ||
97 | t = get_cycles_sync(); | ||
98 | if (t < vxtime.last_tsc) | ||
99 | t = vxtime.last_tsc; /* hack */ | ||
100 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE; | ||
101 | return x; | ||
102 | } | ||
103 | |||
104 | static inline unsigned int do_gettimeoffset_hpet(void) | ||
105 | { | ||
106 | /* cap counter read to one tick to avoid inconsistencies */ | ||
107 | unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; | ||
108 | return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE; | ||
109 | } | ||
110 | |||
111 | unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; | ||
112 | |||
113 | /* | ||
114 | * This version of gettimeofday() has microsecond resolution and better than | ||
115 | * microsecond precision, as we're using at least a 10 MHz (usually 14.31818 | ||
116 | * MHz) HPET timer. | ||
117 | */ | ||
118 | |||
119 | void do_gettimeofday(struct timeval *tv) | ||
120 | { | ||
121 | unsigned long seq; | ||
122 | unsigned int sec, usec; | ||
123 | |||
124 | do { | ||
125 | seq = read_seqbegin(&xtime_lock); | ||
126 | |||
127 | sec = xtime.tv_sec; | ||
128 | usec = xtime.tv_nsec / NSEC_PER_USEC; | ||
129 | |||
130 | /* i386 does some correction here to keep the clock | ||
131 | monotonous even when ntpd is fixing drift. | ||
132 | But they didn't work for me, there is a non monotonic | ||
133 | clock anyways with ntp. | ||
134 | I dropped all corrections now until a real solution can | ||
135 | be found. Note when you fix it here you need to do the same | ||
136 | in arch/x86_64/kernel/vsyscall.c and export all needed | ||
137 | variables in vmlinux.lds. -AK */ | ||
138 | usec += do_gettimeoffset(); | ||
139 | |||
140 | } while (read_seqretry(&xtime_lock, seq)); | ||
141 | |||
142 | tv->tv_sec = sec + usec / USEC_PER_SEC; | ||
143 | tv->tv_usec = usec % USEC_PER_SEC; | ||
144 | } | ||
145 | |||
146 | EXPORT_SYMBOL(do_gettimeofday); | ||
147 | |||
148 | /* | ||
149 | * settimeofday() first undoes the correction that gettimeofday would do | ||
150 | * on the time, and then saves it. This is ugly, but has been like this for | ||
151 | * ages already. | ||
152 | */ | ||
153 | |||
154 | int do_settimeofday(struct timespec *tv) | ||
155 | { | ||
156 | time_t wtm_sec, sec = tv->tv_sec; | ||
157 | long wtm_nsec, nsec = tv->tv_nsec; | ||
158 | |||
159 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
160 | return -EINVAL; | ||
161 | |||
162 | write_seqlock_irq(&xtime_lock); | ||
163 | |||
164 | nsec -= do_gettimeoffset() * NSEC_PER_USEC; | ||
165 | |||
166 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
167 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
168 | |||
169 | set_normalized_timespec(&xtime, sec, nsec); | ||
170 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
171 | |||
172 | ntp_clear(); | ||
173 | |||
174 | write_sequnlock_irq(&xtime_lock); | ||
175 | clock_was_set(); | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | EXPORT_SYMBOL(do_settimeofday); | ||
180 | 57 | ||
181 | unsigned long profile_pc(struct pt_regs *regs) | 58 | unsigned long profile_pc(struct pt_regs *regs) |
182 | { | 59 | { |
@@ -267,84 +144,9 @@ static void set_rtc_mmss(unsigned long nowtime) | |||
267 | } | 144 | } |
268 | 145 | ||
269 | 146 | ||
270 | /* monotonic_clock(): returns # of nanoseconds passed since time_init() | ||
271 | * Note: This function is required to return accurate | ||
272 | * time even in the absence of multiple timer ticks. | ||
273 | */ | ||
274 | static inline unsigned long long cycles_2_ns(unsigned long long cyc); | ||
275 | unsigned long long monotonic_clock(void) | ||
276 | { | ||
277 | unsigned long seq; | ||
278 | u32 last_offset, this_offset, offset; | ||
279 | unsigned long long base; | ||
280 | |||
281 | if (vxtime.mode == VXTIME_HPET) { | ||
282 | do { | ||
283 | seq = read_seqbegin(&xtime_lock); | ||
284 | |||
285 | last_offset = vxtime.last; | ||
286 | base = monotonic_base; | ||
287 | this_offset = hpet_readl(HPET_COUNTER); | ||
288 | } while (read_seqretry(&xtime_lock, seq)); | ||
289 | offset = (this_offset - last_offset); | ||
290 | offset *= NSEC_PER_TICK / hpet_tick; | ||
291 | } else { | ||
292 | do { | ||
293 | seq = read_seqbegin(&xtime_lock); | ||
294 | |||
295 | last_offset = vxtime.last_tsc; | ||
296 | base = monotonic_base; | ||
297 | } while (read_seqretry(&xtime_lock, seq)); | ||
298 | this_offset = get_cycles_sync(); | ||
299 | offset = cycles_2_ns(this_offset - last_offset); | ||
300 | } | ||
301 | return base + offset; | ||
302 | } | ||
303 | EXPORT_SYMBOL(monotonic_clock); | ||
304 | |||
305 | static noinline void handle_lost_ticks(int lost) | ||
306 | { | ||
307 | static long lost_count; | ||
308 | static int warned; | ||
309 | if (report_lost_ticks) { | ||
310 | printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost); | ||
311 | print_symbol("rip %s)\n", get_irq_regs()->rip); | ||
312 | } | ||
313 | |||
314 | if (lost_count == 1000 && !warned) { | ||
315 | printk(KERN_WARNING "warning: many lost ticks.\n" | ||
316 | KERN_WARNING "Your time source seems to be instable or " | ||
317 | "some driver is hogging interupts\n"); | ||
318 | print_symbol("rip %s\n", get_irq_regs()->rip); | ||
319 | if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { | ||
320 | printk(KERN_WARNING "Falling back to HPET\n"); | ||
321 | if (hpet_use_timer) | ||
322 | vxtime.last = hpet_readl(HPET_T0_CMP) - | ||
323 | hpet_tick; | ||
324 | else | ||
325 | vxtime.last = hpet_readl(HPET_COUNTER); | ||
326 | vxtime.mode = VXTIME_HPET; | ||
327 | do_gettimeoffset = do_gettimeoffset_hpet; | ||
328 | } | ||
329 | /* else should fall back to PIT, but code missing. */ | ||
330 | warned = 1; | ||
331 | } else | ||
332 | lost_count++; | ||
333 | |||
334 | #ifdef CONFIG_CPU_FREQ | ||
335 | /* In some cases the CPU can change frequency without us noticing | ||
336 | Give cpufreq a change to catch up. */ | ||
337 | if ((lost_count+1) % 25 == 0) | ||
338 | cpufreq_delayed_get(); | ||
339 | #endif | ||
340 | } | ||
341 | |||
342 | void main_timer_handler(void) | 147 | void main_timer_handler(void) |
343 | { | 148 | { |
344 | static unsigned long rtc_update = 0; | 149 | static unsigned long rtc_update = 0; |
345 | unsigned long tsc; | ||
346 | int delay = 0, offset = 0, lost = 0; | ||
347 | |||
348 | /* | 150 | /* |
349 | * Here we are in the timer irq handler. We have irqs locally disabled (so we | 151 | * Here we are in the timer irq handler. We have irqs locally disabled (so we |
350 | * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running | 152 | * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running |
@@ -354,72 +156,11 @@ void main_timer_handler(void) | |||
354 | 156 | ||
355 | write_seqlock(&xtime_lock); | 157 | write_seqlock(&xtime_lock); |
356 | 158 | ||
357 | if (vxtime.hpet_address) | ||
358 | offset = hpet_readl(HPET_COUNTER); | ||
359 | |||
360 | if (hpet_use_timer) { | ||
361 | /* if we're using the hpet timer functionality, | ||
362 | * we can more accurately know the counter value | ||
363 | * when the timer interrupt occured. | ||
364 | */ | ||
365 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
366 | delay = hpet_readl(HPET_COUNTER) - offset; | ||
367 | } else if (!pmtmr_ioport) { | ||
368 | spin_lock(&i8253_lock); | ||
369 | outb_p(0x00, 0x43); | ||
370 | delay = inb_p(0x40); | ||
371 | delay |= inb(0x40) << 8; | ||
372 | spin_unlock(&i8253_lock); | ||
373 | delay = LATCH - 1 - delay; | ||
374 | } | ||
375 | |||
376 | tsc = get_cycles_sync(); | ||
377 | |||
378 | if (vxtime.mode == VXTIME_HPET) { | ||
379 | if (offset - vxtime.last > hpet_tick) { | ||
380 | lost = (offset - vxtime.last) / hpet_tick - 1; | ||
381 | } | ||
382 | |||
383 | monotonic_base += | ||
384 | (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick; | ||
385 | |||
386 | vxtime.last = offset; | ||
387 | #ifdef CONFIG_X86_PM_TIMER | ||
388 | } else if (vxtime.mode == VXTIME_PMTMR) { | ||
389 | lost = pmtimer_mark_offset(); | ||
390 | #endif | ||
391 | } else { | ||
392 | offset = (((tsc - vxtime.last_tsc) * | ||
393 | vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK; | ||
394 | |||
395 | if (offset < 0) | ||
396 | offset = 0; | ||
397 | |||
398 | if (offset > USEC_PER_TICK) { | ||
399 | lost = offset / USEC_PER_TICK; | ||
400 | offset %= USEC_PER_TICK; | ||
401 | } | ||
402 | |||
403 | monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc); | ||
404 | |||
405 | vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; | ||
406 | |||
407 | if ((((tsc - vxtime.last_tsc) * | ||
408 | vxtime.tsc_quot) >> US_SCALE) < offset) | ||
409 | vxtime.last_tsc = tsc - | ||
410 | (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; | ||
411 | } | ||
412 | |||
413 | if (lost > 0) | ||
414 | handle_lost_ticks(lost); | ||
415 | else | ||
416 | lost = 0; | ||
417 | |||
418 | /* | 159 | /* |
419 | * Do the timer stuff. | 160 | * Do the timer stuff. |
420 | */ | 161 | */ |
421 | 162 | ||
422 | do_timer(lost + 1); | 163 | do_timer(1); |
423 | #ifndef CONFIG_SMP | 164 | #ifndef CONFIG_SMP |
424 | update_process_times(user_mode(get_irq_regs())); | 165 | update_process_times(user_mode(get_irq_regs())); |
425 | #endif | 166 | #endif |
@@ -460,40 +201,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
460 | return IRQ_HANDLED; | 201 | return IRQ_HANDLED; |
461 | } | 202 | } |
462 | 203 | ||
463 | static unsigned int cyc2ns_scale __read_mostly; | ||
464 | |||
465 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
466 | { | ||
467 | cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; | ||
468 | } | ||
469 | |||
470 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
471 | { | ||
472 | return (cyc * cyc2ns_scale) >> NS_SCALE; | ||
473 | } | ||
474 | |||
475 | unsigned long long sched_clock(void) | ||
476 | { | ||
477 | unsigned long a = 0; | ||
478 | |||
479 | #if 0 | ||
480 | /* Don't do a HPET read here. Using TSC always is much faster | ||
481 | and HPET may not be mapped yet when the scheduler first runs. | ||
482 | Disadvantage is a small drift between CPUs in some configurations, | ||
483 | but that should be tolerable. */ | ||
484 | if (__vxtime.mode == VXTIME_HPET) | ||
485 | return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE; | ||
486 | #endif | ||
487 | |||
488 | /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, | ||
489 | which means it is not completely exact and may not be monotonous between | ||
490 | CPUs. But the errors should be too small to matter for scheduling | ||
491 | purposes. */ | ||
492 | |||
493 | rdtscll(a); | ||
494 | return cycles_2_ns(a); | ||
495 | } | ||
496 | |||
497 | static unsigned long get_cmos_time(void) | 204 | static unsigned long get_cmos_time(void) |
498 | { | 205 | { |
499 | unsigned int year, mon, day, hour, min, sec; | 206 | unsigned int year, mon, day, hour, min, sec; |
@@ -545,164 +252,6 @@ static unsigned long get_cmos_time(void) | |||
545 | return mktime(year, mon, day, hour, min, sec); | 252 | return mktime(year, mon, day, hour, min, sec); |
546 | } | 253 | } |
547 | 254 | ||
548 | #ifdef CONFIG_CPU_FREQ | ||
549 | |||
550 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | ||
551 | changes. | ||
552 | |||
553 | RED-PEN: On SMP we assume all CPUs run with the same frequency. It's | ||
554 | not that important because current Opteron setups do not support | ||
555 | scaling on SMP anyroads. | ||
556 | |||
557 | Should fix up last_tsc too. Currently gettimeofday in the | ||
558 | first tick after the change will be slightly wrong. */ | ||
559 | |||
560 | #include <linux/workqueue.h> | ||
561 | |||
562 | static unsigned int cpufreq_delayed_issched = 0; | ||
563 | static unsigned int cpufreq_init = 0; | ||
564 | static struct work_struct cpufreq_delayed_get_work; | ||
565 | |||
566 | static void handle_cpufreq_delayed_get(struct work_struct *v) | ||
567 | { | ||
568 | unsigned int cpu; | ||
569 | for_each_online_cpu(cpu) { | ||
570 | cpufreq_get(cpu); | ||
571 | } | ||
572 | cpufreq_delayed_issched = 0; | ||
573 | } | ||
574 | |||
575 | /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries | ||
576 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
577 | * at is still correct. | ||
578 | */ | ||
579 | static void cpufreq_delayed_get(void) | ||
580 | { | ||
581 | static int warned; | ||
582 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
583 | cpufreq_delayed_issched = 1; | ||
584 | if (!warned) { | ||
585 | warned = 1; | ||
586 | printk(KERN_DEBUG | ||
587 | "Losing some ticks... checking if CPU frequency changed.\n"); | ||
588 | } | ||
589 | schedule_work(&cpufreq_delayed_get_work); | ||
590 | } | ||
591 | } | ||
592 | |||
593 | static unsigned int ref_freq = 0; | ||
594 | static unsigned long loops_per_jiffy_ref = 0; | ||
595 | |||
596 | static unsigned long cpu_khz_ref = 0; | ||
597 | |||
598 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
599 | void *data) | ||
600 | { | ||
601 | struct cpufreq_freqs *freq = data; | ||
602 | unsigned long *lpj, dummy; | ||
603 | |||
604 | if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) | ||
605 | return 0; | ||
606 | |||
607 | lpj = &dummy; | ||
608 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
609 | #ifdef CONFIG_SMP | ||
610 | lpj = &cpu_data[freq->cpu].loops_per_jiffy; | ||
611 | #else | ||
612 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
613 | #endif | ||
614 | |||
615 | if (!ref_freq) { | ||
616 | ref_freq = freq->old; | ||
617 | loops_per_jiffy_ref = *lpj; | ||
618 | cpu_khz_ref = cpu_khz; | ||
619 | } | ||
620 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
621 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
622 | (val == CPUFREQ_RESUMECHANGE)) { | ||
623 | *lpj = | ||
624 | cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
625 | |||
626 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | ||
627 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
628 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; | ||
629 | } | ||
630 | |||
631 | set_cyc2ns_scale(cpu_khz_ref); | ||
632 | |||
633 | return 0; | ||
634 | } | ||
635 | |||
636 | static struct notifier_block time_cpufreq_notifier_block = { | ||
637 | .notifier_call = time_cpufreq_notifier | ||
638 | }; | ||
639 | |||
640 | static int __init cpufreq_tsc(void) | ||
641 | { | ||
642 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); | ||
643 | if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
644 | CPUFREQ_TRANSITION_NOTIFIER)) | ||
645 | cpufreq_init = 1; | ||
646 | return 0; | ||
647 | } | ||
648 | |||
649 | core_initcall(cpufreq_tsc); | ||
650 | |||
651 | #endif | ||
652 | |||
653 | /* | ||
654 | * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing | ||
655 | * it to the HPET timer of known frequency. | ||
656 | */ | ||
657 | |||
658 | #define TICK_COUNT 100000000 | ||
659 | #define TICK_MIN 5000 | ||
660 | #define MAX_READ_RETRIES 5 | ||
661 | |||
662 | /* | ||
663 | * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none | ||
664 | * occurs between the reads of the hpet & TSC. | ||
665 | */ | ||
666 | static void __init read_hpet_tsc(int *hpet, int *tsc) | ||
667 | { | ||
668 | int tsc1, tsc2, hpet1, retries = 0; | ||
669 | static int msg; | ||
670 | |||
671 | do { | ||
672 | tsc1 = get_cycles_sync(); | ||
673 | hpet1 = hpet_readl(HPET_COUNTER); | ||
674 | tsc2 = get_cycles_sync(); | ||
675 | } while (tsc2 - tsc1 > TICK_MIN && retries++ < MAX_READ_RETRIES); | ||
676 | if (retries >= MAX_READ_RETRIES && !msg++) | ||
677 | printk(KERN_WARNING | ||
678 | "hpet.c: exceeded max retries to read HPET & TSC\n"); | ||
679 | *hpet = hpet1; | ||
680 | *tsc = tsc2; | ||
681 | } | ||
682 | |||
683 | |||
684 | static unsigned int __init hpet_calibrate_tsc(void) | ||
685 | { | ||
686 | int tsc_start, hpet_start; | ||
687 | int tsc_now, hpet_now; | ||
688 | unsigned long flags; | ||
689 | |||
690 | local_irq_save(flags); | ||
691 | local_irq_disable(); | ||
692 | |||
693 | read_hpet_tsc(&hpet_start, &tsc_start); | ||
694 | |||
695 | do { | ||
696 | local_irq_disable(); | ||
697 | read_hpet_tsc(&hpet_now, &tsc_now); | ||
698 | local_irq_restore(flags); | ||
699 | } while ((tsc_now - tsc_start) < TICK_COUNT && | ||
700 | (hpet_now - hpet_start) < TICK_COUNT); | ||
701 | |||
702 | return (tsc_now - tsc_start) * 1000000000L | ||
703 | / ((hpet_now - hpet_start) * hpet_period / 1000); | ||
704 | } | ||
705 | |||
706 | 255 | ||
707 | /* | 256 | /* |
708 | * pit_calibrate_tsc() uses the speaker output (channel 2) of | 257 | * pit_calibrate_tsc() uses the speaker output (channel 2) of |
@@ -733,124 +282,6 @@ static unsigned int __init pit_calibrate_tsc(void) | |||
733 | return (end - start) / 50; | 282 | return (end - start) / 50; |
734 | } | 283 | } |
735 | 284 | ||
736 | #ifdef CONFIG_HPET | ||
737 | static __init int late_hpet_init(void) | ||
738 | { | ||
739 | struct hpet_data hd; | ||
740 | unsigned int ntimer; | ||
741 | |||
742 | if (!vxtime.hpet_address) | ||
743 | return 0; | ||
744 | |||
745 | memset(&hd, 0, sizeof (hd)); | ||
746 | |||
747 | ntimer = hpet_readl(HPET_ID); | ||
748 | ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; | ||
749 | ntimer++; | ||
750 | |||
751 | /* | ||
752 | * Register with driver. | ||
753 | * Timer0 and Timer1 is used by platform. | ||
754 | */ | ||
755 | hd.hd_phys_address = vxtime.hpet_address; | ||
756 | hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); | ||
757 | hd.hd_nirqs = ntimer; | ||
758 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
759 | hpet_reserve_timer(&hd, 0); | ||
760 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
761 | hpet_reserve_timer(&hd, 1); | ||
762 | #endif | ||
763 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
764 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
765 | if (ntimer > 2) { | ||
766 | struct hpet *hpet; | ||
767 | struct hpet_timer *timer; | ||
768 | int i; | ||
769 | |||
770 | hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); | ||
771 | timer = &hpet->hpet_timers[2]; | ||
772 | for (i = 2; i < ntimer; timer++, i++) | ||
773 | hd.hd_irq[i] = (timer->hpet_config & | ||
774 | Tn_INT_ROUTE_CNF_MASK) >> | ||
775 | Tn_INT_ROUTE_CNF_SHIFT; | ||
776 | |||
777 | } | ||
778 | |||
779 | hpet_alloc(&hd); | ||
780 | return 0; | ||
781 | } | ||
782 | fs_initcall(late_hpet_init); | ||
783 | #endif | ||
784 | |||
785 | static int hpet_timer_stop_set_go(unsigned long tick) | ||
786 | { | ||
787 | unsigned int cfg; | ||
788 | |||
789 | /* | ||
790 | * Stop the timers and reset the main counter. | ||
791 | */ | ||
792 | |||
793 | cfg = hpet_readl(HPET_CFG); | ||
794 | cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); | ||
795 | hpet_writel(cfg, HPET_CFG); | ||
796 | hpet_writel(0, HPET_COUNTER); | ||
797 | hpet_writel(0, HPET_COUNTER + 4); | ||
798 | |||
799 | /* | ||
800 | * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, | ||
801 | * and period also hpet_tick. | ||
802 | */ | ||
803 | if (hpet_use_timer) { | ||
804 | hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | | ||
805 | HPET_TN_32BIT, HPET_T0_CFG); | ||
806 | hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ | ||
807 | hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ | ||
808 | cfg |= HPET_CFG_LEGACY; | ||
809 | } | ||
810 | /* | ||
811 | * Go! | ||
812 | */ | ||
813 | |||
814 | cfg |= HPET_CFG_ENABLE; | ||
815 | hpet_writel(cfg, HPET_CFG); | ||
816 | |||
817 | return 0; | ||
818 | } | ||
819 | |||
820 | static int hpet_init(void) | ||
821 | { | ||
822 | unsigned int id; | ||
823 | |||
824 | if (!vxtime.hpet_address) | ||
825 | return -1; | ||
826 | set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address); | ||
827 | __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); | ||
828 | |||
829 | /* | ||
830 | * Read the period, compute tick and quotient. | ||
831 | */ | ||
832 | |||
833 | id = hpet_readl(HPET_ID); | ||
834 | |||
835 | if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) | ||
836 | return -1; | ||
837 | |||
838 | hpet_period = hpet_readl(HPET_PERIOD); | ||
839 | if (hpet_period < 100000 || hpet_period > 100000000) | ||
840 | return -1; | ||
841 | |||
842 | hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; | ||
843 | |||
844 | hpet_use_timer = (id & HPET_ID_LEGSUP); | ||
845 | |||
846 | return hpet_timer_stop_set_go(hpet_tick); | ||
847 | } | ||
848 | |||
849 | static int hpet_reenable(void) | ||
850 | { | ||
851 | return hpet_timer_stop_set_go(hpet_tick); | ||
852 | } | ||
853 | |||
854 | #define PIT_MODE 0x43 | 285 | #define PIT_MODE 0x43 |
855 | #define PIT_CH0 0x40 | 286 | #define PIT_CH0 0x40 |
856 | 287 | ||
@@ -878,7 +309,7 @@ void __init pit_stop_interrupt(void) | |||
878 | void __init stop_timer_interrupt(void) | 309 | void __init stop_timer_interrupt(void) |
879 | { | 310 | { |
880 | char *name; | 311 | char *name; |
881 | if (vxtime.hpet_address) { | 312 | if (hpet_address) { |
882 | name = "HPET"; | 313 | name = "HPET"; |
883 | hpet_timer_stop_set_go(0); | 314 | hpet_timer_stop_set_go(0); |
884 | } else { | 315 | } else { |
@@ -888,12 +319,6 @@ void __init stop_timer_interrupt(void) | |||
888 | printk(KERN_INFO "timer: %s interrupt stopped.\n", name); | 319 | printk(KERN_INFO "timer: %s interrupt stopped.\n", name); |
889 | } | 320 | } |
890 | 321 | ||
891 | int __init time_setup(char *str) | ||
892 | { | ||
893 | report_lost_ticks = 1; | ||
894 | return 1; | ||
895 | } | ||
896 | |||
897 | static struct irqaction irq0 = { | 322 | static struct irqaction irq0 = { |
898 | timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL | 323 | timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL |
899 | }; | 324 | }; |
@@ -901,124 +326,41 @@ static struct irqaction irq0 = { | |||
901 | void __init time_init(void) | 326 | void __init time_init(void) |
902 | { | 327 | { |
903 | if (nohpet) | 328 | if (nohpet) |
904 | vxtime.hpet_address = 0; | 329 | hpet_address = 0; |
905 | |||
906 | xtime.tv_sec = get_cmos_time(); | 330 | xtime.tv_sec = get_cmos_time(); |
907 | xtime.tv_nsec = 0; | 331 | xtime.tv_nsec = 0; |
908 | 332 | ||
909 | set_normalized_timespec(&wall_to_monotonic, | 333 | set_normalized_timespec(&wall_to_monotonic, |
910 | -xtime.tv_sec, -xtime.tv_nsec); | 334 | -xtime.tv_sec, -xtime.tv_nsec); |
911 | 335 | ||
912 | if (!hpet_init()) | 336 | if (hpet_arch_init()) |
913 | vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period; | 337 | hpet_address = 0; |
914 | else | ||
915 | vxtime.hpet_address = 0; | ||
916 | 338 | ||
917 | if (hpet_use_timer) { | 339 | if (hpet_use_timer) { |
918 | /* set tick_nsec to use the proper rate for HPET */ | 340 | /* set tick_nsec to use the proper rate for HPET */ |
919 | tick_nsec = TICK_NSEC_HPET; | 341 | tick_nsec = TICK_NSEC_HPET; |
920 | cpu_khz = hpet_calibrate_tsc(); | 342 | cpu_khz = hpet_calibrate_tsc(); |
921 | timename = "HPET"; | 343 | timename = "HPET"; |
922 | #ifdef CONFIG_X86_PM_TIMER | ||
923 | } else if (pmtmr_ioport && !vxtime.hpet_address) { | ||
924 | vxtime_hz = PM_TIMER_FREQUENCY; | ||
925 | timename = "PM"; | ||
926 | pit_init(); | ||
927 | cpu_khz = pit_calibrate_tsc(); | ||
928 | #endif | ||
929 | } else { | 344 | } else { |
930 | pit_init(); | 345 | pit_init(); |
931 | cpu_khz = pit_calibrate_tsc(); | 346 | cpu_khz = pit_calibrate_tsc(); |
932 | timename = "PIT"; | 347 | timename = "PIT"; |
933 | } | 348 | } |
934 | 349 | ||
935 | vxtime.mode = VXTIME_TSC; | ||
936 | vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; | ||
937 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; | ||
938 | vxtime.last_tsc = get_cycles_sync(); | ||
939 | set_cyc2ns_scale(cpu_khz); | ||
940 | setup_irq(0, &irq0); | ||
941 | |||
942 | #ifndef CONFIG_SMP | ||
943 | time_init_gtod(); | ||
944 | #endif | ||
945 | } | ||
946 | |||
947 | /* | ||
948 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
949 | * over all CPUs. | ||
950 | */ | ||
951 | __cpuinit int unsynchronized_tsc(void) | ||
952 | { | ||
953 | #ifdef CONFIG_SMP | ||
954 | if (apic_is_clustered_box()) | ||
955 | return 1; | ||
956 | #endif | ||
957 | /* Most intel systems have synchronized TSCs except for | ||
958 | multi node systems */ | ||
959 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { | ||
960 | #ifdef CONFIG_ACPI | ||
961 | /* But TSC doesn't tick in C3 so don't use it there */ | ||
962 | if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000) | ||
963 | return 1; | ||
964 | #endif | ||
965 | return 0; | ||
966 | } | ||
967 | |||
968 | /* Assume multi socket systems are not synchronized */ | ||
969 | return num_present_cpus() > 1; | ||
970 | } | ||
971 | |||
972 | /* | ||
973 | * Decide what mode gettimeofday should use. | ||
974 | */ | ||
975 | void time_init_gtod(void) | ||
976 | { | ||
977 | char *timetype; | ||
978 | |||
979 | if (unsynchronized_tsc()) | 350 | if (unsynchronized_tsc()) |
980 | notsc = 1; | 351 | mark_tsc_unstable(); |
981 | 352 | ||
982 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | 353 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) |
983 | vgetcpu_mode = VGETCPU_RDTSCP; | 354 | vgetcpu_mode = VGETCPU_RDTSCP; |
984 | else | 355 | else |
985 | vgetcpu_mode = VGETCPU_LSL; | 356 | vgetcpu_mode = VGETCPU_LSL; |
986 | 357 | ||
987 | if (vxtime.hpet_address && notsc) { | 358 | set_cyc2ns_scale(cpu_khz); |
988 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; | ||
989 | if (hpet_use_timer) | ||
990 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
991 | else | ||
992 | vxtime.last = hpet_readl(HPET_COUNTER); | ||
993 | vxtime.mode = VXTIME_HPET; | ||
994 | do_gettimeoffset = do_gettimeoffset_hpet; | ||
995 | #ifdef CONFIG_X86_PM_TIMER | ||
996 | /* Using PM for gettimeofday is quite slow, but we have no other | ||
997 | choice because the TSC is too unreliable on some systems. */ | ||
998 | } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { | ||
999 | timetype = "PM"; | ||
1000 | do_gettimeoffset = do_gettimeoffset_pm; | ||
1001 | vxtime.mode = VXTIME_PMTMR; | ||
1002 | sysctl_vsyscall = 0; | ||
1003 | printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); | ||
1004 | #endif | ||
1005 | } else { | ||
1006 | timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; | ||
1007 | vxtime.mode = VXTIME_TSC; | ||
1008 | } | ||
1009 | |||
1010 | printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", | ||
1011 | vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype); | ||
1012 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | 359 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", |
1013 | cpu_khz / 1000, cpu_khz % 1000); | 360 | cpu_khz / 1000, cpu_khz % 1000); |
1014 | vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; | 361 | setup_irq(0, &irq0); |
1015 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; | ||
1016 | vxtime.last_tsc = get_cycles_sync(); | ||
1017 | |||
1018 | set_cyc2ns_scale(cpu_khz); | ||
1019 | } | 362 | } |
1020 | 363 | ||
1021 | __setup("report_lost_ticks", time_setup); | ||
1022 | 364 | ||
1023 | static long clock_cmos_diff; | 365 | static long clock_cmos_diff; |
1024 | static unsigned long sleep_start; | 366 | static unsigned long sleep_start; |
@@ -1055,7 +397,7 @@ static int timer_resume(struct sys_device *dev) | |||
1055 | sleep_length = 0; | 397 | sleep_length = 0; |
1056 | ctime = sleep_start; | 398 | ctime = sleep_start; |
1057 | } | 399 | } |
1058 | if (vxtime.hpet_address) | 400 | if (hpet_address) |
1059 | hpet_reenable(); | 401 | hpet_reenable(); |
1060 | else | 402 | else |
1061 | i8254_timer_resume(); | 403 | i8254_timer_resume(); |
@@ -1064,20 +406,8 @@ static int timer_resume(struct sys_device *dev) | |||
1064 | write_seqlock_irqsave(&xtime_lock,flags); | 406 | write_seqlock_irqsave(&xtime_lock,flags); |
1065 | xtime.tv_sec = sec; | 407 | xtime.tv_sec = sec; |
1066 | xtime.tv_nsec = 0; | 408 | xtime.tv_nsec = 0; |
1067 | if (vxtime.mode == VXTIME_HPET) { | ||
1068 | if (hpet_use_timer) | ||
1069 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
1070 | else | ||
1071 | vxtime.last = hpet_readl(HPET_COUNTER); | ||
1072 | #ifdef CONFIG_X86_PM_TIMER | ||
1073 | } else if (vxtime.mode == VXTIME_PMTMR) { | ||
1074 | pmtimer_resume(); | ||
1075 | #endif | ||
1076 | } else | ||
1077 | vxtime.last_tsc = get_cycles_sync(); | ||
1078 | write_sequnlock_irqrestore(&xtime_lock,flags); | ||
1079 | jiffies += sleep_length; | 409 | jiffies += sleep_length; |
1080 | monotonic_base += sleep_length * (NSEC_PER_SEC/HZ); | 410 | write_sequnlock_irqrestore(&xtime_lock,flags); |
1081 | touch_softlockup_watchdog(); | 411 | touch_softlockup_watchdog(); |
1082 | return 0; | 412 | return 0; |
1083 | } | 413 | } |
@@ -1103,270 +433,3 @@ static int time_init_device(void) | |||
1103 | } | 433 | } |
1104 | 434 | ||
1105 | device_initcall(time_init_device); | 435 | device_initcall(time_init_device); |
1106 | |||
1107 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
1108 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET | ||
1109 | * is enabled, we support RTC interrupt functionality in software. | ||
1110 | * RTC has 3 kinds of interrupts: | ||
1111 | * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock | ||
1112 | * is updated | ||
1113 | * 2) Alarm Interrupt - generate an interrupt at a specific time of day | ||
1114 | * 3) Periodic Interrupt - generate periodic interrupt, with frequencies | ||
1115 | * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) | ||
1116 | * (1) and (2) above are implemented using polling at a frequency of | ||
1117 | * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt | ||
1118 | * overhead. (DEFAULT_RTC_INT_FREQ) | ||
1119 | * For (3), we use interrupts at 64Hz or user specified periodic | ||
1120 | * frequency, whichever is higher. | ||
1121 | */ | ||
1122 | #include <linux/rtc.h> | ||
1123 | |||
1124 | #define DEFAULT_RTC_INT_FREQ 64 | ||
1125 | #define RTC_NUM_INTS 1 | ||
1126 | |||
1127 | static unsigned long UIE_on; | ||
1128 | static unsigned long prev_update_sec; | ||
1129 | |||
1130 | static unsigned long AIE_on; | ||
1131 | static struct rtc_time alarm_time; | ||
1132 | |||
1133 | static unsigned long PIE_on; | ||
1134 | static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; | ||
1135 | static unsigned long PIE_count; | ||
1136 | |||
1137 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ | ||
1138 | static unsigned int hpet_t1_cmp; /* cached comparator register */ | ||
1139 | |||
1140 | int is_hpet_enabled(void) | ||
1141 | { | ||
1142 | return vxtime.hpet_address != 0; | ||
1143 | } | ||
1144 | |||
1145 | /* | ||
1146 | * Timer 1 for RTC, we do not use periodic interrupt feature, | ||
1147 | * even if HPET supports periodic interrupts on Timer 1. | ||
1148 | * The reason being, to set up a periodic interrupt in HPET, we need to | ||
1149 | * stop the main counter. And if we do that everytime someone diables/enables | ||
1150 | * RTC, we will have adverse effect on main kernel timer running on Timer 0. | ||
1151 | * So, for the time being, simulate the periodic interrupt in software. | ||
1152 | * | ||
1153 | * hpet_rtc_timer_init() is called for the first time and during subsequent | ||
1154 | * interuppts reinit happens through hpet_rtc_timer_reinit(). | ||
1155 | */ | ||
1156 | int hpet_rtc_timer_init(void) | ||
1157 | { | ||
1158 | unsigned int cfg, cnt; | ||
1159 | unsigned long flags; | ||
1160 | |||
1161 | if (!is_hpet_enabled()) | ||
1162 | return 0; | ||
1163 | /* | ||
1164 | * Set the counter 1 and enable the interrupts. | ||
1165 | */ | ||
1166 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | ||
1167 | hpet_rtc_int_freq = PIE_freq; | ||
1168 | else | ||
1169 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | ||
1170 | |||
1171 | local_irq_save(flags); | ||
1172 | |||
1173 | cnt = hpet_readl(HPET_COUNTER); | ||
1174 | cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); | ||
1175 | hpet_writel(cnt, HPET_T1_CMP); | ||
1176 | hpet_t1_cmp = cnt; | ||
1177 | |||
1178 | cfg = hpet_readl(HPET_T1_CFG); | ||
1179 | cfg &= ~HPET_TN_PERIODIC; | ||
1180 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
1181 | hpet_writel(cfg, HPET_T1_CFG); | ||
1182 | |||
1183 | local_irq_restore(flags); | ||
1184 | |||
1185 | return 1; | ||
1186 | } | ||
1187 | |||
1188 | static void hpet_rtc_timer_reinit(void) | ||
1189 | { | ||
1190 | unsigned int cfg, cnt, ticks_per_int, lost_ints; | ||
1191 | |||
1192 | if (unlikely(!(PIE_on | AIE_on | UIE_on))) { | ||
1193 | cfg = hpet_readl(HPET_T1_CFG); | ||
1194 | cfg &= ~HPET_TN_ENABLE; | ||
1195 | hpet_writel(cfg, HPET_T1_CFG); | ||
1196 | return; | ||
1197 | } | ||
1198 | |||
1199 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | ||
1200 | hpet_rtc_int_freq = PIE_freq; | ||
1201 | else | ||
1202 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | ||
1203 | |||
1204 | /* It is more accurate to use the comparator value than current count.*/ | ||
1205 | ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; | ||
1206 | hpet_t1_cmp += ticks_per_int; | ||
1207 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | ||
1208 | |||
1209 | /* | ||
1210 | * If the interrupt handler was delayed too long, the write above tries | ||
1211 | * to schedule the next interrupt in the past and the hardware would | ||
1212 | * not interrupt until the counter had wrapped around. | ||
1213 | * So we have to check that the comparator wasn't set to a past time. | ||
1214 | */ | ||
1215 | cnt = hpet_readl(HPET_COUNTER); | ||
1216 | if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { | ||
1217 | lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; | ||
1218 | /* Make sure that, even with the time needed to execute | ||
1219 | * this code, the next scheduled interrupt has been moved | ||
1220 | * back to the future: */ | ||
1221 | lost_ints++; | ||
1222 | |||
1223 | hpet_t1_cmp += lost_ints * ticks_per_int; | ||
1224 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | ||
1225 | |||
1226 | if (PIE_on) | ||
1227 | PIE_count += lost_ints; | ||
1228 | |||
1229 | if (printk_ratelimit()) | ||
1230 | printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", | ||
1231 | hpet_rtc_int_freq); | ||
1232 | } | ||
1233 | } | ||
1234 | |||
1235 | /* | ||
1236 | * The functions below are called from rtc driver. | ||
1237 | * Return 0 if HPET is not being used. | ||
1238 | * Otherwise do the necessary changes and return 1. | ||
1239 | */ | ||
1240 | int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | ||
1241 | { | ||
1242 | if (!is_hpet_enabled()) | ||
1243 | return 0; | ||
1244 | |||
1245 | if (bit_mask & RTC_UIE) | ||
1246 | UIE_on = 0; | ||
1247 | if (bit_mask & RTC_PIE) | ||
1248 | PIE_on = 0; | ||
1249 | if (bit_mask & RTC_AIE) | ||
1250 | AIE_on = 0; | ||
1251 | |||
1252 | return 1; | ||
1253 | } | ||
1254 | |||
1255 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) | ||
1256 | { | ||
1257 | int timer_init_reqd = 0; | ||
1258 | |||
1259 | if (!is_hpet_enabled()) | ||
1260 | return 0; | ||
1261 | |||
1262 | if (!(PIE_on | AIE_on | UIE_on)) | ||
1263 | timer_init_reqd = 1; | ||
1264 | |||
1265 | if (bit_mask & RTC_UIE) { | ||
1266 | UIE_on = 1; | ||
1267 | } | ||
1268 | if (bit_mask & RTC_PIE) { | ||
1269 | PIE_on = 1; | ||
1270 | PIE_count = 0; | ||
1271 | } | ||
1272 | if (bit_mask & RTC_AIE) { | ||
1273 | AIE_on = 1; | ||
1274 | } | ||
1275 | |||
1276 | if (timer_init_reqd) | ||
1277 | hpet_rtc_timer_init(); | ||
1278 | |||
1279 | return 1; | ||
1280 | } | ||
1281 | |||
1282 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) | ||
1283 | { | ||
1284 | if (!is_hpet_enabled()) | ||
1285 | return 0; | ||
1286 | |||
1287 | alarm_time.tm_hour = hrs; | ||
1288 | alarm_time.tm_min = min; | ||
1289 | alarm_time.tm_sec = sec; | ||
1290 | |||
1291 | return 1; | ||
1292 | } | ||
1293 | |||
1294 | int hpet_set_periodic_freq(unsigned long freq) | ||
1295 | { | ||
1296 | if (!is_hpet_enabled()) | ||
1297 | return 0; | ||
1298 | |||
1299 | PIE_freq = freq; | ||
1300 | PIE_count = 0; | ||
1301 | |||
1302 | return 1; | ||
1303 | } | ||
1304 | |||
1305 | int hpet_rtc_dropped_irq(void) | ||
1306 | { | ||
1307 | if (!is_hpet_enabled()) | ||
1308 | return 0; | ||
1309 | |||
1310 | return 1; | ||
1311 | } | ||
1312 | |||
1313 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) | ||
1314 | { | ||
1315 | struct rtc_time curr_time; | ||
1316 | unsigned long rtc_int_flag = 0; | ||
1317 | int call_rtc_interrupt = 0; | ||
1318 | |||
1319 | hpet_rtc_timer_reinit(); | ||
1320 | |||
1321 | if (UIE_on | AIE_on) { | ||
1322 | rtc_get_rtc_time(&curr_time); | ||
1323 | } | ||
1324 | if (UIE_on) { | ||
1325 | if (curr_time.tm_sec != prev_update_sec) { | ||
1326 | /* Set update int info, call real rtc int routine */ | ||
1327 | call_rtc_interrupt = 1; | ||
1328 | rtc_int_flag = RTC_UF; | ||
1329 | prev_update_sec = curr_time.tm_sec; | ||
1330 | } | ||
1331 | } | ||
1332 | if (PIE_on) { | ||
1333 | PIE_count++; | ||
1334 | if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { | ||
1335 | /* Set periodic int info, call real rtc int routine */ | ||
1336 | call_rtc_interrupt = 1; | ||
1337 | rtc_int_flag |= RTC_PF; | ||
1338 | PIE_count = 0; | ||
1339 | } | ||
1340 | } | ||
1341 | if (AIE_on) { | ||
1342 | if ((curr_time.tm_sec == alarm_time.tm_sec) && | ||
1343 | (curr_time.tm_min == alarm_time.tm_min) && | ||
1344 | (curr_time.tm_hour == alarm_time.tm_hour)) { | ||
1345 | /* Set alarm int info, call real rtc int routine */ | ||
1346 | call_rtc_interrupt = 1; | ||
1347 | rtc_int_flag |= RTC_AF; | ||
1348 | } | ||
1349 | } | ||
1350 | if (call_rtc_interrupt) { | ||
1351 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); | ||
1352 | rtc_interrupt(rtc_int_flag, dev_id); | ||
1353 | } | ||
1354 | return IRQ_HANDLED; | ||
1355 | } | ||
1356 | #endif | ||
1357 | |||
1358 | static int __init nohpet_setup(char *s) | ||
1359 | { | ||
1360 | nohpet = 1; | ||
1361 | return 1; | ||
1362 | } | ||
1363 | |||
1364 | __setup("nohpet", nohpet_setup); | ||
1365 | |||
1366 | int __init notsc_setup(char *s) | ||
1367 | { | ||
1368 | notsc = 1; | ||
1369 | return 1; | ||
1370 | } | ||
1371 | |||
1372 | __setup("notsc", notsc_setup); | ||
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c new file mode 100644 index 000000000000..895831865019 --- /dev/null +++ b/arch/x86_64/kernel/tsc.c | |||
@@ -0,0 +1,226 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/interrupt.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/clocksource.h> | ||
6 | #include <linux/time.h> | ||
7 | #include <linux/acpi.h> | ||
8 | #include <linux/cpufreq.h> | ||
9 | |||
10 | #include <asm/timex.h> | ||
11 | |||
12 | static int notsc __initdata = 0; | ||
13 | |||
14 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | ||
15 | EXPORT_SYMBOL(cpu_khz); | ||
16 | |||
17 | static unsigned int cyc2ns_scale __read_mostly; | ||
18 | |||
19 | void set_cyc2ns_scale(unsigned long khz) | ||
20 | { | ||
21 | cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; | ||
22 | } | ||
23 | |||
24 | static unsigned long long cycles_2_ns(unsigned long long cyc) | ||
25 | { | ||
26 | return (cyc * cyc2ns_scale) >> NS_SCALE; | ||
27 | } | ||
28 | |||
29 | unsigned long long sched_clock(void) | ||
30 | { | ||
31 | unsigned long a = 0; | ||
32 | |||
33 | /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, | ||
34 | * which means it is not completely exact and may not be monotonous | ||
35 | * between CPUs. But the errors should be too small to matter for | ||
36 | * scheduling purposes. | ||
37 | */ | ||
38 | |||
39 | rdtscll(a); | ||
40 | return cycles_2_ns(a); | ||
41 | } | ||
42 | |||
43 | static int tsc_unstable; | ||
44 | |||
45 | static inline int check_tsc_unstable(void) | ||
46 | { | ||
47 | return tsc_unstable; | ||
48 | } | ||
49 | #ifdef CONFIG_CPU_FREQ | ||
50 | |||
51 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | ||
52 | * changes. | ||
53 | * | ||
54 | * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's | ||
55 | * not that important because current Opteron setups do not support | ||
56 | * scaling on SMP anyroads. | ||
57 | * | ||
58 | * Should fix up last_tsc too. Currently gettimeofday in the | ||
59 | * first tick after the change will be slightly wrong. | ||
60 | */ | ||
61 | |||
62 | #include <linux/workqueue.h> | ||
63 | |||
64 | static unsigned int cpufreq_delayed_issched = 0; | ||
65 | static unsigned int cpufreq_init = 0; | ||
66 | static struct work_struct cpufreq_delayed_get_work; | ||
67 | |||
68 | static void handle_cpufreq_delayed_get(struct work_struct *v) | ||
69 | { | ||
70 | unsigned int cpu; | ||
71 | for_each_online_cpu(cpu) { | ||
72 | cpufreq_get(cpu); | ||
73 | } | ||
74 | cpufreq_delayed_issched = 0; | ||
75 | } | ||
76 | |||
77 | static unsigned int ref_freq = 0; | ||
78 | static unsigned long loops_per_jiffy_ref = 0; | ||
79 | |||
80 | static unsigned long cpu_khz_ref = 0; | ||
81 | |||
82 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
83 | void *data) | ||
84 | { | ||
85 | struct cpufreq_freqs *freq = data; | ||
86 | unsigned long *lpj, dummy; | ||
87 | |||
88 | if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) | ||
89 | return 0; | ||
90 | |||
91 | lpj = &dummy; | ||
92 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
93 | #ifdef CONFIG_SMP | ||
94 | lpj = &cpu_data[freq->cpu].loops_per_jiffy; | ||
95 | #else | ||
96 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
97 | #endif | ||
98 | |||
99 | if (!ref_freq) { | ||
100 | ref_freq = freq->old; | ||
101 | loops_per_jiffy_ref = *lpj; | ||
102 | cpu_khz_ref = cpu_khz; | ||
103 | } | ||
104 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
105 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
106 | (val == CPUFREQ_RESUMECHANGE)) { | ||
107 | *lpj = | ||
108 | cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
109 | |||
110 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | ||
111 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
112 | mark_tsc_unstable(); | ||
113 | } | ||
114 | |||
115 | set_cyc2ns_scale(cpu_khz_ref); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static struct notifier_block time_cpufreq_notifier_block = { | ||
121 | .notifier_call = time_cpufreq_notifier | ||
122 | }; | ||
123 | |||
124 | static int __init cpufreq_tsc(void) | ||
125 | { | ||
126 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); | ||
127 | if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
128 | CPUFREQ_TRANSITION_NOTIFIER)) | ||
129 | cpufreq_init = 1; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | core_initcall(cpufreq_tsc); | ||
134 | |||
135 | #endif | ||
136 | |||
137 | static int tsc_unstable = 0; | ||
138 | |||
139 | /* | ||
140 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
141 | * over all CPUs. | ||
142 | */ | ||
143 | __cpuinit int unsynchronized_tsc(void) | ||
144 | { | ||
145 | if (tsc_unstable) | ||
146 | return 1; | ||
147 | |||
148 | #ifdef CONFIG_SMP | ||
149 | if (apic_is_clustered_box()) | ||
150 | return 1; | ||
151 | #endif | ||
152 | /* Most intel systems have synchronized TSCs except for | ||
153 | multi node systems */ | ||
154 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { | ||
155 | #ifdef CONFIG_ACPI | ||
156 | /* But TSC doesn't tick in C3 so don't use it there */ | ||
157 | if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000) | ||
158 | return 1; | ||
159 | #endif | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | /* Assume multi socket systems are not synchronized */ | ||
164 | return num_present_cpus() > 1; | ||
165 | } | ||
166 | |||
167 | int __init notsc_setup(char *s) | ||
168 | { | ||
169 | notsc = 1; | ||
170 | return 1; | ||
171 | } | ||
172 | |||
173 | __setup("notsc", notsc_setup); | ||
174 | |||
175 | |||
176 | /* clock source code: */ | ||
177 | static cycle_t read_tsc(void) | ||
178 | { | ||
179 | cycle_t ret = (cycle_t)get_cycles_sync(); | ||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | static cycle_t __vsyscall_fn vread_tsc(void) | ||
184 | { | ||
185 | cycle_t ret = (cycle_t)get_cycles_sync(); | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | static struct clocksource clocksource_tsc = { | ||
190 | .name = "tsc", | ||
191 | .rating = 300, | ||
192 | .read = read_tsc, | ||
193 | .mask = CLOCKSOURCE_MASK(64), | ||
194 | .shift = 22, | ||
195 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | ||
196 | CLOCK_SOURCE_MUST_VERIFY, | ||
197 | .vread = vread_tsc, | ||
198 | }; | ||
199 | |||
200 | void mark_tsc_unstable(void) | ||
201 | { | ||
202 | if (!tsc_unstable) { | ||
203 | tsc_unstable = 1; | ||
204 | /* Change only the rating, when not registered */ | ||
205 | if (clocksource_tsc.mult) | ||
206 | clocksource_change_rating(&clocksource_tsc, 0); | ||
207 | else | ||
208 | clocksource_tsc.rating = 0; | ||
209 | } | ||
210 | } | ||
211 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
212 | |||
213 | static int __init init_tsc_clocksource(void) | ||
214 | { | ||
215 | if (!notsc) { | ||
216 | clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, | ||
217 | clocksource_tsc.shift); | ||
218 | if (check_tsc_unstable()) | ||
219 | clocksource_tsc.rating = 0; | ||
220 | |||
221 | return clocksource_register(&clocksource_tsc); | ||
222 | } | ||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | module_init(init_tsc_clocksource); | ||
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c new file mode 100644 index 000000000000..014f0db45dfa --- /dev/null +++ b/arch/x86_64/kernel/tsc_sync.c | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * arch/x86_64/kernel/tsc_sync.c: check TSC synchronization. | ||
3 | * | ||
4 | * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar | ||
5 | * | ||
6 | * We check whether all boot CPUs have their TSC's synchronized, | ||
7 | * print a warning if not and turn off the TSC clock-source. | ||
8 | * | ||
9 | * The warp-check is point-to-point between two CPUs, the CPU | ||
10 | * initiating the bootup is the 'source CPU', the freshly booting | ||
11 | * CPU is the 'target CPU'. | ||
12 | * | ||
13 | * Only two CPUs may participate - they can enter in any order. | ||
14 | * ( The serial nature of the boot logic and the CPU hotplug lock | ||
15 | * protects against more than 2 CPUs entering this code. ) | ||
16 | */ | ||
17 | #include <linux/spinlock.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/smp.h> | ||
21 | #include <linux/nmi.h> | ||
22 | #include <asm/tsc.h> | ||
23 | |||
24 | /* | ||
25 | * Entry/exit counters that make sure that both CPUs | ||
26 | * run the measurement code at once: | ||
27 | */ | ||
28 | static __cpuinitdata atomic_t start_count; | ||
29 | static __cpuinitdata atomic_t stop_count; | ||
30 | |||
31 | /* | ||
32 | * We use a raw spinlock in this exceptional case, because | ||
33 | * we want to have the fastest, inlined, non-debug version | ||
34 | * of a critical section, to be able to prove TSC time-warps: | ||
35 | */ | ||
36 | static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
37 | static __cpuinitdata cycles_t last_tsc; | ||
38 | static __cpuinitdata cycles_t max_warp; | ||
39 | static __cpuinitdata int nr_warps; | ||
40 | |||
41 | /* | ||
42 | * TSC-warp measurement loop running on both CPUs: | ||
43 | */ | ||
44 | static __cpuinit void check_tsc_warp(void) | ||
45 | { | ||
46 | cycles_t start, now, prev, end; | ||
47 | int i; | ||
48 | |||
49 | start = get_cycles_sync(); | ||
50 | /* | ||
51 | * The measurement runs for 20 msecs: | ||
52 | */ | ||
53 | end = start + cpu_khz * 20ULL; | ||
54 | now = start; | ||
55 | |||
56 | for (i = 0; ; i++) { | ||
57 | /* | ||
58 | * We take the global lock, measure TSC, save the | ||
59 | * previous TSC that was measured (possibly on | ||
60 | * another CPU) and update the previous TSC timestamp. | ||
61 | */ | ||
62 | __raw_spin_lock(&sync_lock); | ||
63 | prev = last_tsc; | ||
64 | now = get_cycles_sync(); | ||
65 | last_tsc = now; | ||
66 | __raw_spin_unlock(&sync_lock); | ||
67 | |||
68 | /* | ||
69 | * Be nice every now and then (and also check whether | ||
70 | * measurement is done [we also insert a 100 million | ||
71 | * loops safety exit, so we dont lock up in case the | ||
72 | * TSC readout is totally broken]): | ||
73 | */ | ||
74 | if (unlikely(!(i & 7))) { | ||
75 | if (now > end || i > 100000000) | ||
76 | break; | ||
77 | cpu_relax(); | ||
78 | touch_nmi_watchdog(); | ||
79 | } | ||
80 | /* | ||
81 | * Outside the critical section we can now see whether | ||
82 | * we saw a time-warp of the TSC going backwards: | ||
83 | */ | ||
84 | if (unlikely(prev > now)) { | ||
85 | __raw_spin_lock(&sync_lock); | ||
86 | max_warp = max(max_warp, prev - now); | ||
87 | nr_warps++; | ||
88 | __raw_spin_unlock(&sync_lock); | ||
89 | } | ||
90 | |||
91 | } | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Source CPU calls into this - it waits for the freshly booted | ||
96 | * target CPU to arrive and then starts the measurement: | ||
97 | */ | ||
98 | void __cpuinit check_tsc_sync_source(int cpu) | ||
99 | { | ||
100 | int cpus = 2; | ||
101 | |||
102 | /* | ||
103 | * No need to check if we already know that the TSC is not | ||
104 | * synchronized: | ||
105 | */ | ||
106 | if (unsynchronized_tsc()) | ||
107 | return; | ||
108 | |||
109 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | ||
110 | smp_processor_id(), cpu); | ||
111 | |||
112 | /* | ||
113 | * Reset it - in case this is a second bootup: | ||
114 | */ | ||
115 | atomic_set(&stop_count, 0); | ||
116 | |||
117 | /* | ||
118 | * Wait for the target to arrive: | ||
119 | */ | ||
120 | while (atomic_read(&start_count) != cpus-1) | ||
121 | cpu_relax(); | ||
122 | /* | ||
123 | * Trigger the target to continue into the measurement too: | ||
124 | */ | ||
125 | atomic_inc(&start_count); | ||
126 | |||
127 | check_tsc_warp(); | ||
128 | |||
129 | while (atomic_read(&stop_count) != cpus-1) | ||
130 | cpu_relax(); | ||
131 | |||
132 | /* | ||
133 | * Reset it - just in case we boot another CPU later: | ||
134 | */ | ||
135 | atomic_set(&start_count, 0); | ||
136 | |||
137 | if (nr_warps) { | ||
138 | printk("\n"); | ||
139 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," | ||
140 | " turning off TSC clock.\n", max_warp); | ||
141 | mark_tsc_unstable(); | ||
142 | nr_warps = 0; | ||
143 | max_warp = 0; | ||
144 | last_tsc = 0; | ||
145 | } else { | ||
146 | printk(" passed.\n"); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Let the target continue with the bootup: | ||
151 | */ | ||
152 | atomic_inc(&stop_count); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Freshly booted CPUs call into this: | ||
157 | */ | ||
158 | void __cpuinit check_tsc_sync_target(void) | ||
159 | { | ||
160 | int cpus = 2; | ||
161 | |||
162 | if (unsynchronized_tsc()) | ||
163 | return; | ||
164 | |||
165 | /* | ||
166 | * Register this CPU's participation and wait for the | ||
167 | * source CPU to start the measurement: | ||
168 | */ | ||
169 | atomic_inc(&start_count); | ||
170 | while (atomic_read(&start_count) != cpus) | ||
171 | cpu_relax(); | ||
172 | |||
173 | check_tsc_warp(); | ||
174 | |||
175 | /* | ||
176 | * Ok, we are done: | ||
177 | */ | ||
178 | atomic_inc(&stop_count); | ||
179 | |||
180 | /* | ||
181 | * Wait for the source CPU to print stuff: | ||
182 | */ | ||
183 | while (atomic_read(&stop_count) != cpus) | ||
184 | cpu_relax(); | ||
185 | } | ||
186 | #undef NR_LOOPS | ||
187 | |||
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index c360c4225244..b73212c0a550 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -88,31 +88,25 @@ SECTIONS | |||
88 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; | 88 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; |
89 | 89 | ||
90 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 90 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
91 | .xtime_lock : AT(VLOAD(.xtime_lock)) { *(.xtime_lock) } | 91 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } |
92 | xtime_lock = VVIRT(.xtime_lock); | 92 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
93 | 93 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) | |
94 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } | 94 | { *(.vsyscall_gtod_data) } |
95 | vxtime = VVIRT(.vxtime); | 95 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); |
96 | 96 | ||
97 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | 97 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } |
98 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | 98 | vgetcpu_mode = VVIRT(.vgetcpu_mode); |
99 | 99 | ||
100 | .sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) } | ||
101 | sys_tz = VVIRT(.sys_tz); | ||
102 | |||
103 | .sysctl_vsyscall : AT(VLOAD(.sysctl_vsyscall)) { *(.sysctl_vsyscall) } | ||
104 | sysctl_vsyscall = VVIRT(.sysctl_vsyscall); | ||
105 | |||
106 | .xtime : AT(VLOAD(.xtime)) { *(.xtime) } | ||
107 | xtime = VVIRT(.xtime); | ||
108 | |||
109 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 100 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
110 | .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } | 101 | .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } |
111 | jiffies = VVIRT(.jiffies); | 102 | jiffies = VVIRT(.jiffies); |
112 | 103 | ||
113 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } | 104 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) |
114 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } | 105 | { *(.vsyscall_1) } |
115 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } | 106 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) |
107 | { *(.vsyscall_2) } | ||
108 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) | ||
109 | { *(.vsyscall_3) } | ||
116 | 110 | ||
117 | . = VSYSCALL_VIRT_ADDR + 4096; | 111 | . = VSYSCALL_VIRT_ADDR + 4096; |
118 | 112 | ||
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 313dc6ad780b..180ff919eaf9 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
27 | #include <linux/jiffies.h> | 27 | #include <linux/jiffies.h> |
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | #include <linux/clocksource.h> | ||
29 | #include <linux/getcpu.h> | 30 | #include <linux/getcpu.h> |
30 | #include <linux/cpu.h> | 31 | #include <linux/cpu.h> |
31 | #include <linux/smp.h> | 32 | #include <linux/smp.h> |
@@ -34,6 +35,7 @@ | |||
34 | #include <asm/vsyscall.h> | 35 | #include <asm/vsyscall.h> |
35 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
36 | #include <asm/page.h> | 37 | #include <asm/page.h> |
38 | #include <asm/unistd.h> | ||
37 | #include <asm/fixmap.h> | 39 | #include <asm/fixmap.h> |
38 | #include <asm/errno.h> | 40 | #include <asm/errno.h> |
39 | #include <asm/io.h> | 41 | #include <asm/io.h> |
@@ -44,56 +46,41 @@ | |||
44 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
45 | #define __syscall_clobber "r11","rcx","memory" | 47 | #define __syscall_clobber "r11","rcx","memory" |
46 | 48 | ||
47 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 49 | struct vsyscall_gtod_data_t { |
48 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 50 | seqlock_t lock; |
51 | int sysctl_enabled; | ||
52 | struct timeval wall_time_tv; | ||
53 | struct timezone sys_tz; | ||
54 | cycle_t offset_base; | ||
55 | struct clocksource clock; | ||
56 | }; | ||
49 | int __vgetcpu_mode __section_vgetcpu_mode; | 57 | int __vgetcpu_mode __section_vgetcpu_mode; |
50 | 58 | ||
51 | #include <asm/unistd.h> | 59 | struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = |
52 | |||
53 | static __always_inline void timeval_normalize(struct timeval * tv) | ||
54 | { | 60 | { |
55 | time_t __sec; | 61 | .lock = SEQLOCK_UNLOCKED, |
56 | 62 | .sysctl_enabled = 1, | |
57 | __sec = tv->tv_usec / 1000000; | 63 | }; |
58 | if (__sec) { | ||
59 | tv->tv_usec %= 1000000; | ||
60 | tv->tv_sec += __sec; | ||
61 | } | ||
62 | } | ||
63 | 64 | ||
64 | static __always_inline void do_vgettimeofday(struct timeval * tv) | 65 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) |
65 | { | 66 | { |
66 | long sequence, t; | 67 | unsigned long flags; |
67 | unsigned long sec, usec; | 68 | |
68 | 69 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | |
69 | do { | 70 | /* copy vsyscall data */ |
70 | sequence = read_seqbegin(&__xtime_lock); | 71 | vsyscall_gtod_data.clock = *clock; |
71 | 72 | vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; | |
72 | sec = __xtime.tv_sec; | 73 | vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; |
73 | usec = __xtime.tv_nsec / 1000; | 74 | vsyscall_gtod_data.sys_tz = sys_tz; |
74 | 75 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | |
75 | if (__vxtime.mode != VXTIME_HPET) { | ||
76 | t = get_cycles_sync(); | ||
77 | if (t < __vxtime.last_tsc) | ||
78 | t = __vxtime.last_tsc; | ||
79 | usec += ((t - __vxtime.last_tsc) * | ||
80 | __vxtime.tsc_quot) >> 32; | ||
81 | /* See comment in x86_64 do_gettimeofday. */ | ||
82 | } else { | ||
83 | usec += ((readl((void __iomem *) | ||
84 | fix_to_virt(VSYSCALL_HPET) + 0xf0) - | ||
85 | __vxtime.last) * __vxtime.quot) >> 32; | ||
86 | } | ||
87 | } while (read_seqretry(&__xtime_lock, sequence)); | ||
88 | |||
89 | tv->tv_sec = sec + usec / 1000000; | ||
90 | tv->tv_usec = usec % 1000000; | ||
91 | } | 76 | } |
92 | 77 | ||
93 | /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ | 78 | /* RED-PEN may want to readd seq locking, but then the variable should be |
79 | * write-once. | ||
80 | */ | ||
94 | static __always_inline void do_get_tz(struct timezone * tz) | 81 | static __always_inline void do_get_tz(struct timezone * tz) |
95 | { | 82 | { |
96 | *tz = __sys_tz; | 83 | *tz = __vsyscall_gtod_data.sys_tz; |
97 | } | 84 | } |
98 | 85 | ||
99 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 86 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
@@ -101,7 +88,8 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | |||
101 | int ret; | 88 | int ret; |
102 | asm volatile("vsysc2: syscall" | 89 | asm volatile("vsysc2: syscall" |
103 | : "=a" (ret) | 90 | : "=a" (ret) |
104 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber ); | 91 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) |
92 | : __syscall_clobber ); | ||
105 | return ret; | 93 | return ret; |
106 | } | 94 | } |
107 | 95 | ||
@@ -114,10 +102,44 @@ static __always_inline long time_syscall(long *t) | |||
114 | return secs; | 102 | return secs; |
115 | } | 103 | } |
116 | 104 | ||
105 | static __always_inline void do_vgettimeofday(struct timeval * tv) | ||
106 | { | ||
107 | cycle_t now, base, mask, cycle_delta; | ||
108 | unsigned long seq, mult, shift, nsec_delta; | ||
109 | cycle_t (*vread)(void); | ||
110 | do { | ||
111 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | ||
112 | |||
113 | vread = __vsyscall_gtod_data.clock.vread; | ||
114 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { | ||
115 | gettimeofday(tv,0); | ||
116 | return; | ||
117 | } | ||
118 | now = vread(); | ||
119 | base = __vsyscall_gtod_data.clock.cycle_last; | ||
120 | mask = __vsyscall_gtod_data.clock.mask; | ||
121 | mult = __vsyscall_gtod_data.clock.mult; | ||
122 | shift = __vsyscall_gtod_data.clock.shift; | ||
123 | |||
124 | *tv = __vsyscall_gtod_data.wall_time_tv; | ||
125 | |||
126 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | ||
127 | |||
128 | /* calculate interval: */ | ||
129 | cycle_delta = (now - base) & mask; | ||
130 | /* convert to nsecs: */ | ||
131 | nsec_delta = (cycle_delta * mult) >> shift; | ||
132 | |||
133 | /* convert to usecs and add to timespec: */ | ||
134 | tv->tv_usec += nsec_delta / NSEC_PER_USEC; | ||
135 | while (tv->tv_usec > USEC_PER_SEC) { | ||
136 | tv->tv_sec += 1; | ||
137 | tv->tv_usec -= USEC_PER_SEC; | ||
138 | } | ||
139 | } | ||
140 | |||
117 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | 141 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) |
118 | { | 142 | { |
119 | if (!__sysctl_vsyscall) | ||
120 | return gettimeofday(tv,tz); | ||
121 | if (tv) | 143 | if (tv) |
122 | do_vgettimeofday(tv); | 144 | do_vgettimeofday(tv); |
123 | if (tz) | 145 | if (tz) |
@@ -129,11 +151,11 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | |||
129 | * unlikely */ | 151 | * unlikely */ |
130 | time_t __vsyscall(1) vtime(time_t *t) | 152 | time_t __vsyscall(1) vtime(time_t *t) |
131 | { | 153 | { |
132 | if (!__sysctl_vsyscall) | 154 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) |
133 | return time_syscall(t); | 155 | return time_syscall(t); |
134 | else if (t) | 156 | else if (t) |
135 | *t = __xtime.tv_sec; | 157 | *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; |
136 | return __xtime.tv_sec; | 158 | return __vsyscall_gtod_data.wall_time_tv.tv_sec; |
137 | } | 159 | } |
138 | 160 | ||
139 | /* Fast way to get current CPU and node. | 161 | /* Fast way to get current CPU and node. |
@@ -210,7 +232,7 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
210 | ret = -ENOMEM; | 232 | ret = -ENOMEM; |
211 | goto out; | 233 | goto out; |
212 | } | 234 | } |
213 | if (!sysctl_vsyscall) { | 235 | if (!vsyscall_gtod_data.sysctl_enabled) { |
214 | writew(SYSCALL, map1); | 236 | writew(SYSCALL, map1); |
215 | writew(SYSCALL, map2); | 237 | writew(SYSCALL, map2); |
216 | } else { | 238 | } else { |
@@ -232,7 +254,8 @@ static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, | |||
232 | 254 | ||
233 | static ctl_table kernel_table2[] = { | 255 | static ctl_table kernel_table2[] = { |
234 | { .ctl_name = 99, .procname = "vsyscall64", | 256 | { .ctl_name = 99, .procname = "vsyscall64", |
235 | .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, | 257 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), |
258 | .mode = 0644, | ||
236 | .strategy = vsyscall_sysctl_nostrat, | 259 | .strategy = vsyscall_sysctl_nostrat, |
237 | .proc_handler = vsyscall_sysctl_change }, | 260 | .proc_handler = vsyscall_sysctl_change }, |
238 | {} | 261 | {} |
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 6c6751b1405b..8206fc1ecc58 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c | |||
@@ -39,6 +39,17 @@ | |||
39 | #include <linux/moduleparam.h> | 39 | #include <linux/moduleparam.h> |
40 | #include <linux/sched.h> /* need_resched() */ | 40 | #include <linux/sched.h> /* need_resched() */ |
41 | #include <linux/latency.h> | 41 | #include <linux/latency.h> |
42 | #include <linux/clockchips.h> | ||
43 | |||
44 | /* | ||
45 | * Include the apic definitions for x86 to have the APIC timer related defines | ||
46 | * available also for UP (on SMP it gets magically included via linux/smp.h). | ||
47 | * asm/acpi.h is not an option, as it would require more include magic. Also | ||
48 | * creating an empty asm-ia64/apic.h would just trade pest vs. cholera. | ||
49 | */ | ||
50 | #ifdef CONFIG_X86 | ||
51 | #include <asm/apic.h> | ||
52 | #endif | ||
42 | 53 | ||
43 | #include <asm/io.h> | 54 | #include <asm/io.h> |
44 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
@@ -238,6 +249,81 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) | |||
238 | } | 249 | } |
239 | } | 250 | } |
240 | 251 | ||
252 | #ifdef ARCH_APICTIMER_STOPS_ON_C3 | ||
253 | |||
254 | /* | ||
255 | * Some BIOS implementations switch to C3 in the published C2 state. | ||
256 | * This seems to be a common problem on AMD boxen, but other vendors | ||
257 | * are affected too. We pick the most conservative approach: we assume | ||
258 | * that the local APIC stops in both C2 and C3. | ||
259 | */ | ||
260 | static void acpi_timer_check_state(int state, struct acpi_processor *pr, | ||
261 | struct acpi_processor_cx *cx) | ||
262 | { | ||
263 | struct acpi_processor_power *pwr = &pr->power; | ||
264 | |||
265 | /* | ||
266 | * Check, if one of the previous states already marked the lapic | ||
267 | * unstable | ||
268 | */ | ||
269 | if (pwr->timer_broadcast_on_state < state) | ||
270 | return; | ||
271 | |||
272 | if (cx->type >= ACPI_STATE_C2) | ||
273 | pr->power.timer_broadcast_on_state = state; | ||
274 | } | ||
275 | |||
276 | static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) | ||
277 | { | ||
278 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
279 | unsigned long reason; | ||
280 | |||
281 | reason = pr->power.timer_broadcast_on_state < INT_MAX ? | ||
282 | CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; | ||
283 | |||
284 | clockevents_notify(reason, &pr->id); | ||
285 | #else | ||
286 | cpumask_t mask = cpumask_of_cpu(pr->id); | ||
287 | |||
288 | if (pr->power.timer_broadcast_on_state < INT_MAX) | ||
289 | on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1); | ||
290 | else | ||
291 | on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); | ||
292 | #endif | ||
293 | } | ||
294 | |||
295 | /* Power(C) State timer broadcast control */ | ||
296 | static void acpi_state_timer_broadcast(struct acpi_processor *pr, | ||
297 | struct acpi_processor_cx *cx, | ||
298 | int broadcast) | ||
299 | { | ||
300 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
301 | |||
302 | int state = cx - pr->power.states; | ||
303 | |||
304 | if (state >= pr->power.timer_broadcast_on_state) { | ||
305 | unsigned long reason; | ||
306 | |||
307 | reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : | ||
308 | CLOCK_EVT_NOTIFY_BROADCAST_EXIT; | ||
309 | clockevents_notify(reason, &pr->id); | ||
310 | } | ||
311 | #endif | ||
312 | } | ||
313 | |||
314 | #else | ||
315 | |||
316 | static void acpi_timer_check_state(int state, struct acpi_processor *pr, | ||
317 | struct acpi_processor_cx *cstate) { } | ||
318 | static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } | ||
319 | static void acpi_state_timer_broadcast(struct acpi_processor *pr, | ||
320 | struct acpi_processor_cx *cx, | ||
321 | int broadcast) | ||
322 | { | ||
323 | } | ||
324 | |||
325 | #endif | ||
326 | |||
241 | static void acpi_processor_idle(void) | 327 | static void acpi_processor_idle(void) |
242 | { | 328 | { |
243 | struct acpi_processor *pr = NULL; | 329 | struct acpi_processor *pr = NULL; |
@@ -382,6 +468,7 @@ static void acpi_processor_idle(void) | |||
382 | /* Get start time (ticks) */ | 468 | /* Get start time (ticks) */ |
383 | t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); | 469 | t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); |
384 | /* Invoke C2 */ | 470 | /* Invoke C2 */ |
471 | acpi_state_timer_broadcast(pr, cx, 1); | ||
385 | acpi_cstate_enter(cx); | 472 | acpi_cstate_enter(cx); |
386 | /* Get end time (ticks) */ | 473 | /* Get end time (ticks) */ |
387 | t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); | 474 | t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); |
@@ -396,6 +483,7 @@ static void acpi_processor_idle(void) | |||
396 | /* Compute time (ticks) that we were actually asleep */ | 483 | /* Compute time (ticks) that we were actually asleep */ |
397 | sleep_ticks = | 484 | sleep_ticks = |
398 | ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; | 485 | ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; |
486 | acpi_state_timer_broadcast(pr, cx, 0); | ||
399 | break; | 487 | break; |
400 | 488 | ||
401 | case ACPI_STATE_C3: | 489 | case ACPI_STATE_C3: |
@@ -417,6 +505,7 @@ static void acpi_processor_idle(void) | |||
417 | /* Get start time (ticks) */ | 505 | /* Get start time (ticks) */ |
418 | t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); | 506 | t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); |
419 | /* Invoke C3 */ | 507 | /* Invoke C3 */ |
508 | acpi_state_timer_broadcast(pr, cx, 1); | ||
420 | acpi_cstate_enter(cx); | 509 | acpi_cstate_enter(cx); |
421 | /* Get end time (ticks) */ | 510 | /* Get end time (ticks) */ |
422 | t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); | 511 | t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); |
@@ -436,6 +525,7 @@ static void acpi_processor_idle(void) | |||
436 | /* Compute time (ticks) that we were actually asleep */ | 525 | /* Compute time (ticks) that we were actually asleep */ |
437 | sleep_ticks = | 526 | sleep_ticks = |
438 | ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; | 527 | ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; |
528 | acpi_state_timer_broadcast(pr, cx, 0); | ||
439 | break; | 529 | break; |
440 | 530 | ||
441 | default: | 531 | default: |
@@ -904,11 +994,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) | |||
904 | unsigned int i; | 994 | unsigned int i; |
905 | unsigned int working = 0; | 995 | unsigned int working = 0; |
906 | 996 | ||
907 | #ifdef ARCH_APICTIMER_STOPS_ON_C3 | 997 | pr->power.timer_broadcast_on_state = INT_MAX; |
908 | int timer_broadcast = 0; | ||
909 | cpumask_t mask = cpumask_of_cpu(pr->id); | ||
910 | on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); | ||
911 | #endif | ||
912 | 998 | ||
913 | for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { | 999 | for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { |
914 | struct acpi_processor_cx *cx = &pr->power.states[i]; | 1000 | struct acpi_processor_cx *cx = &pr->power.states[i]; |
@@ -920,21 +1006,14 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) | |||
920 | 1006 | ||
921 | case ACPI_STATE_C2: | 1007 | case ACPI_STATE_C2: |
922 | acpi_processor_power_verify_c2(cx); | 1008 | acpi_processor_power_verify_c2(cx); |
923 | #ifdef ARCH_APICTIMER_STOPS_ON_C3 | 1009 | if (cx->valid) |
924 | /* Some AMD systems fake C3 as C2, but still | 1010 | acpi_timer_check_state(i, pr, cx); |
925 | have timer troubles */ | ||
926 | if (cx->valid && | ||
927 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
928 | timer_broadcast++; | ||
929 | #endif | ||
930 | break; | 1011 | break; |
931 | 1012 | ||
932 | case ACPI_STATE_C3: | 1013 | case ACPI_STATE_C3: |
933 | acpi_processor_power_verify_c3(pr, cx); | 1014 | acpi_processor_power_verify_c3(pr, cx); |
934 | #ifdef ARCH_APICTIMER_STOPS_ON_C3 | ||
935 | if (cx->valid) | 1015 | if (cx->valid) |
936 | timer_broadcast++; | 1016 | acpi_timer_check_state(i, pr, cx); |
937 | #endif | ||
938 | break; | 1017 | break; |
939 | } | 1018 | } |
940 | 1019 | ||
@@ -942,10 +1021,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) | |||
942 | working++; | 1021 | working++; |
943 | } | 1022 | } |
944 | 1023 | ||
945 | #ifdef ARCH_APICTIMER_STOPS_ON_C3 | 1024 | acpi_propagate_timer_broadcast(pr); |
946 | if (timer_broadcast) | ||
947 | on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1); | ||
948 | #endif | ||
949 | 1025 | ||
950 | return (working); | 1026 | return (working); |
951 | } | 1027 | } |
diff --git a/drivers/char/agp/Makefile b/drivers/char/agp/Makefile index 3e581603d0a8..a0d04a23dacd 100644 --- a/drivers/char/agp/Makefile +++ b/drivers/char/agp/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | agpgart-y := backend.o frontend.o generic.o isoch.o | 1 | agpgart-y := backend.o frontend.o generic.o isoch.o |
2 | 2 | ||
3 | obj-$(CONFIG_AGP) += agpgart.o | 3 | obj-$(CONFIG_AGP) += agpgart.o |
4 | obj-$(CONFIG_COMPAT) += compat_ioctl.o | ||
4 | obj-$(CONFIG_AGP_ALI) += ali-agp.o | 5 | obj-$(CONFIG_AGP_ALI) += ali-agp.o |
5 | obj-$(CONFIG_AGP_ATI) += ati-agp.o | 6 | obj-$(CONFIG_AGP_ATI) += ati-agp.o |
6 | obj-$(CONFIG_AGP_AMD) += amd-k7-agp.o | 7 | obj-$(CONFIG_AGP_AMD) += amd-k7-agp.o |
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 1d59e2a5b9aa..9bd68d9f0f59 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h | |||
@@ -114,6 +114,7 @@ struct agp_bridge_driver { | |||
114 | void (*free_by_type)(struct agp_memory *); | 114 | void (*free_by_type)(struct agp_memory *); |
115 | void *(*agp_alloc_page)(struct agp_bridge_data *); | 115 | void *(*agp_alloc_page)(struct agp_bridge_data *); |
116 | void (*agp_destroy_page)(void *); | 116 | void (*agp_destroy_page)(void *); |
117 | int (*agp_type_to_mask_type) (struct agp_bridge_data *, int); | ||
117 | }; | 118 | }; |
118 | 119 | ||
119 | struct agp_bridge_data { | 120 | struct agp_bridge_data { |
@@ -218,6 +219,7 @@ struct agp_bridge_data { | |||
218 | #define I810_PTE_MAIN_UNCACHED 0x00000000 | 219 | #define I810_PTE_MAIN_UNCACHED 0x00000000 |
219 | #define I810_PTE_LOCAL 0x00000002 | 220 | #define I810_PTE_LOCAL 0x00000002 |
220 | #define I810_PTE_VALID 0x00000001 | 221 | #define I810_PTE_VALID 0x00000001 |
222 | #define I830_PTE_SYSTEM_CACHED 0x00000006 | ||
221 | #define I810_SMRAM_MISCC 0x70 | 223 | #define I810_SMRAM_MISCC 0x70 |
222 | #define I810_GFX_MEM_WIN_SIZE 0x00010000 | 224 | #define I810_GFX_MEM_WIN_SIZE 0x00010000 |
223 | #define I810_GFX_MEM_WIN_32M 0x00010000 | 225 | #define I810_GFX_MEM_WIN_32M 0x00010000 |
@@ -270,8 +272,16 @@ void global_cache_flush(void); | |||
270 | void get_agp_version(struct agp_bridge_data *bridge); | 272 | void get_agp_version(struct agp_bridge_data *bridge); |
271 | unsigned long agp_generic_mask_memory(struct agp_bridge_data *bridge, | 273 | unsigned long agp_generic_mask_memory(struct agp_bridge_data *bridge, |
272 | unsigned long addr, int type); | 274 | unsigned long addr, int type); |
275 | int agp_generic_type_to_mask_type(struct agp_bridge_data *bridge, | ||
276 | int type); | ||
273 | struct agp_bridge_data *agp_generic_find_bridge(struct pci_dev *pdev); | 277 | struct agp_bridge_data *agp_generic_find_bridge(struct pci_dev *pdev); |
274 | 278 | ||
279 | /* generic functions for user-populated AGP memory types */ | ||
280 | struct agp_memory *agp_generic_alloc_user(size_t page_count, int type); | ||
281 | void agp_alloc_page_array(size_t size, struct agp_memory *mem); | ||
282 | void agp_free_page_array(struct agp_memory *mem); | ||
283 | |||
284 | |||
275 | /* generic routines for agp>=3 */ | 285 | /* generic routines for agp>=3 */ |
276 | int agp3_generic_fetch_size(void); | 286 | int agp3_generic_fetch_size(void); |
277 | void agp3_generic_tlbflush(struct agp_memory *mem); | 287 | void agp3_generic_tlbflush(struct agp_memory *mem); |
@@ -288,6 +298,8 @@ extern struct aper_size_info_16 agp3_generic_sizes[]; | |||
288 | extern int agp_off; | 298 | extern int agp_off; |
289 | extern int agp_try_unsupported_boot; | 299 | extern int agp_try_unsupported_boot; |
290 | 300 | ||
301 | long compat_agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
302 | |||
291 | /* Chipset independant registers (from AGP Spec) */ | 303 | /* Chipset independant registers (from AGP Spec) */ |
292 | #define AGP_APBASE 0x10 | 304 | #define AGP_APBASE 0x10 |
293 | 305 | ||
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c index 5a31ec7c62fc..98177a93076f 100644 --- a/drivers/char/agp/ali-agp.c +++ b/drivers/char/agp/ali-agp.c | |||
@@ -214,6 +214,7 @@ static struct agp_bridge_driver ali_generic_bridge = { | |||
214 | .free_by_type = agp_generic_free_by_type, | 214 | .free_by_type = agp_generic_free_by_type, |
215 | .agp_alloc_page = agp_generic_alloc_page, | 215 | .agp_alloc_page = agp_generic_alloc_page, |
216 | .agp_destroy_page = ali_destroy_page, | 216 | .agp_destroy_page = ali_destroy_page, |
217 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
217 | }; | 218 | }; |
218 | 219 | ||
219 | static struct agp_bridge_driver ali_m1541_bridge = { | 220 | static struct agp_bridge_driver ali_m1541_bridge = { |
@@ -237,6 +238,7 @@ static struct agp_bridge_driver ali_m1541_bridge = { | |||
237 | .free_by_type = agp_generic_free_by_type, | 238 | .free_by_type = agp_generic_free_by_type, |
238 | .agp_alloc_page = m1541_alloc_page, | 239 | .agp_alloc_page = m1541_alloc_page, |
239 | .agp_destroy_page = m1541_destroy_page, | 240 | .agp_destroy_page = m1541_destroy_page, |
241 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
240 | }; | 242 | }; |
241 | 243 | ||
242 | 244 | ||
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c index b4e00a343da9..b0acf41c0db9 100644 --- a/drivers/char/agp/alpha-agp.c +++ b/drivers/char/agp/alpha-agp.c | |||
@@ -91,6 +91,9 @@ static int alpha_core_agp_insert_memory(struct agp_memory *mem, off_t pg_start, | |||
91 | int num_entries, status; | 91 | int num_entries, status; |
92 | void *temp; | 92 | void *temp; |
93 | 93 | ||
94 | if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) | ||
95 | return -EINVAL; | ||
96 | |||
94 | temp = agp_bridge->current_size; | 97 | temp = agp_bridge->current_size; |
95 | num_entries = A_SIZE_FIX(temp)->num_entries; | 98 | num_entries = A_SIZE_FIX(temp)->num_entries; |
96 | if ((pg_start + mem->page_count) > num_entries) | 99 | if ((pg_start + mem->page_count) > num_entries) |
@@ -142,6 +145,7 @@ struct agp_bridge_driver alpha_core_agp_driver = { | |||
142 | .free_by_type = agp_generic_free_by_type, | 145 | .free_by_type = agp_generic_free_by_type, |
143 | .agp_alloc_page = agp_generic_alloc_page, | 146 | .agp_alloc_page = agp_generic_alloc_page, |
144 | .agp_destroy_page = agp_generic_destroy_page, | 147 | .agp_destroy_page = agp_generic_destroy_page, |
148 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
145 | }; | 149 | }; |
146 | 150 | ||
147 | struct agp_bridge_data *alpha_bridge; | 151 | struct agp_bridge_data *alpha_bridge; |
diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c index c85c8cadb6df..3d8d448bf394 100644 --- a/drivers/char/agp/amd-k7-agp.c +++ b/drivers/char/agp/amd-k7-agp.c | |||
@@ -381,6 +381,7 @@ static struct agp_bridge_driver amd_irongate_driver = { | |||
381 | .free_by_type = agp_generic_free_by_type, | 381 | .free_by_type = agp_generic_free_by_type, |
382 | .agp_alloc_page = agp_generic_alloc_page, | 382 | .agp_alloc_page = agp_generic_alloc_page, |
383 | .agp_destroy_page = agp_generic_destroy_page, | 383 | .agp_destroy_page = agp_generic_destroy_page, |
384 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
384 | }; | 385 | }; |
385 | 386 | ||
386 | static struct agp_device_ids amd_agp_device_ids[] __devinitdata = | 387 | static struct agp_device_ids amd_agp_device_ids[] __devinitdata = |
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 93d2209fee4c..636d984ed4a6 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c | |||
@@ -62,12 +62,18 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) | |||
62 | { | 62 | { |
63 | int i, j, num_entries; | 63 | int i, j, num_entries; |
64 | long long tmp; | 64 | long long tmp; |
65 | int mask_type; | ||
66 | struct agp_bridge_data *bridge = mem->bridge; | ||
65 | u32 pte; | 67 | u32 pte; |
66 | 68 | ||
67 | num_entries = agp_num_entries(); | 69 | num_entries = agp_num_entries(); |
68 | 70 | ||
69 | if (type != 0 || mem->type != 0) | 71 | if (type != mem->type) |
70 | return -EINVAL; | 72 | return -EINVAL; |
73 | mask_type = bridge->driver->agp_type_to_mask_type(bridge, type); | ||
74 | if (mask_type != 0) | ||
75 | return -EINVAL; | ||
76 | |||
71 | 77 | ||
72 | /* Make sure we can fit the range in the gatt table. */ | 78 | /* Make sure we can fit the range in the gatt table. */ |
73 | /* FIXME: could wrap */ | 79 | /* FIXME: could wrap */ |
@@ -90,7 +96,7 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) | |||
90 | 96 | ||
91 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { | 97 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { |
92 | tmp = agp_bridge->driver->mask_memory(agp_bridge, | 98 | tmp = agp_bridge->driver->mask_memory(agp_bridge, |
93 | mem->memory[i], mem->type); | 99 | mem->memory[i], mask_type); |
94 | 100 | ||
95 | BUG_ON(tmp & 0xffffff0000000ffcULL); | 101 | BUG_ON(tmp & 0xffffff0000000ffcULL); |
96 | pte = (tmp & 0x000000ff00000000ULL) >> 28; | 102 | pte = (tmp & 0x000000ff00000000ULL) >> 28; |
@@ -247,6 +253,7 @@ static struct agp_bridge_driver amd_8151_driver = { | |||
247 | .free_by_type = agp_generic_free_by_type, | 253 | .free_by_type = agp_generic_free_by_type, |
248 | .agp_alloc_page = agp_generic_alloc_page, | 254 | .agp_alloc_page = agp_generic_alloc_page, |
249 | .agp_destroy_page = agp_generic_destroy_page, | 255 | .agp_destroy_page = agp_generic_destroy_page, |
256 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
250 | }; | 257 | }; |
251 | 258 | ||
252 | /* Some basic sanity checks for the aperture. */ | 259 | /* Some basic sanity checks for the aperture. */ |
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c index 9987dc2e0c3f..77c9ad68fba9 100644 --- a/drivers/char/agp/ati-agp.c +++ b/drivers/char/agp/ati-agp.c | |||
@@ -431,6 +431,7 @@ static struct agp_bridge_driver ati_generic_bridge = { | |||
431 | .free_by_type = agp_generic_free_by_type, | 431 | .free_by_type = agp_generic_free_by_type, |
432 | .agp_alloc_page = agp_generic_alloc_page, | 432 | .agp_alloc_page = agp_generic_alloc_page, |
433 | .agp_destroy_page = agp_generic_destroy_page, | 433 | .agp_destroy_page = agp_generic_destroy_page, |
434 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
434 | }; | 435 | }; |
435 | 436 | ||
436 | 437 | ||
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c index d59e037ddd12..ebdd6dd66edb 100644 --- a/drivers/char/agp/backend.c +++ b/drivers/char/agp/backend.c | |||
@@ -43,7 +43,7 @@ | |||
43 | * fix some real stupidity. It's only by chance we can bump | 43 | * fix some real stupidity. It's only by chance we can bump |
44 | * past 0.99 at all due to some boolean logic error. */ | 44 | * past 0.99 at all due to some boolean logic error. */ |
45 | #define AGPGART_VERSION_MAJOR 0 | 45 | #define AGPGART_VERSION_MAJOR 0 |
46 | #define AGPGART_VERSION_MINOR 101 | 46 | #define AGPGART_VERSION_MINOR 102 |
47 | static const struct agp_version agp_current_version = | 47 | static const struct agp_version agp_current_version = |
48 | { | 48 | { |
49 | .major = AGPGART_VERSION_MAJOR, | 49 | .major = AGPGART_VERSION_MAJOR, |
diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c new file mode 100644 index 000000000000..fcb4b1bf0d4e --- /dev/null +++ b/drivers/char/agp/compat_ioctl.c | |||
@@ -0,0 +1,282 @@ | |||
1 | /* | ||
2 | * AGPGART driver frontend compatibility ioctls | ||
3 | * Copyright (C) 2004 Silicon Graphics, Inc. | ||
4 | * Copyright (C) 2002-2003 Dave Jones | ||
5 | * Copyright (C) 1999 Jeff Hartmann | ||
6 | * Copyright (C) 1999 Precision Insight, Inc. | ||
7 | * Copyright (C) 1999 Xi Graphics, Inc. | ||
8 | * | ||
9 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
10 | * copy of this software and associated documentation files (the "Software"), | ||
11 | * to deal in the Software without restriction, including without limitation | ||
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
13 | * and/or sell copies of the Software, and to permit persons to whom the | ||
14 | * Software is furnished to do so, subject to the following conditions: | ||
15 | * | ||
16 | * The above copyright notice and this permission notice shall be included | ||
17 | * in all copies or substantial portions of the Software. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
22 | * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, | ||
23 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
24 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE | ||
25 | * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include <linux/kernel.h> | ||
30 | #include <linux/pci.h> | ||
31 | #include <linux/agpgart.h> | ||
32 | #include <asm/uaccess.h> | ||
33 | #include "agp.h" | ||
34 | #include "compat_ioctl.h" | ||
35 | |||
36 | static int compat_agpioc_info_wrap(struct agp_file_private *priv, void __user *arg) | ||
37 | { | ||
38 | struct agp_info32 userinfo; | ||
39 | struct agp_kern_info kerninfo; | ||
40 | |||
41 | agp_copy_info(agp_bridge, &kerninfo); | ||
42 | |||
43 | userinfo.version.major = kerninfo.version.major; | ||
44 | userinfo.version.minor = kerninfo.version.minor; | ||
45 | userinfo.bridge_id = kerninfo.device->vendor | | ||
46 | (kerninfo.device->device << 16); | ||
47 | userinfo.agp_mode = kerninfo.mode; | ||
48 | userinfo.aper_base = (compat_long_t)kerninfo.aper_base; | ||
49 | userinfo.aper_size = kerninfo.aper_size; | ||
50 | userinfo.pg_total = userinfo.pg_system = kerninfo.max_memory; | ||
51 | userinfo.pg_used = kerninfo.current_memory; | ||
52 | |||
53 | if (copy_to_user(arg, &userinfo, sizeof(userinfo))) | ||
54 | return -EFAULT; | ||
55 | |||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | static int compat_agpioc_reserve_wrap(struct agp_file_private *priv, void __user *arg) | ||
60 | { | ||
61 | struct agp_region32 ureserve; | ||
62 | struct agp_region kreserve; | ||
63 | struct agp_client *client; | ||
64 | struct agp_file_private *client_priv; | ||
65 | |||
66 | DBG(""); | ||
67 | if (copy_from_user(&ureserve, arg, sizeof(ureserve))) | ||
68 | return -EFAULT; | ||
69 | |||
70 | if ((unsigned) ureserve.seg_count >= ~0U/sizeof(struct agp_segment32)) | ||
71 | return -EFAULT; | ||
72 | |||
73 | kreserve.pid = ureserve.pid; | ||
74 | kreserve.seg_count = ureserve.seg_count; | ||
75 | |||
76 | client = agp_find_client_by_pid(kreserve.pid); | ||
77 | |||
78 | if (kreserve.seg_count == 0) { | ||
79 | /* remove a client */ | ||
80 | client_priv = agp_find_private(kreserve.pid); | ||
81 | |||
82 | if (client_priv != NULL) { | ||
83 | set_bit(AGP_FF_IS_CLIENT, &client_priv->access_flags); | ||
84 | set_bit(AGP_FF_IS_VALID, &client_priv->access_flags); | ||
85 | } | ||
86 | if (client == NULL) { | ||
87 | /* client is already removed */ | ||
88 | return 0; | ||
89 | } | ||
90 | return agp_remove_client(kreserve.pid); | ||
91 | } else { | ||
92 | struct agp_segment32 *usegment; | ||
93 | struct agp_segment *ksegment; | ||
94 | int seg; | ||
95 | |||
96 | if (ureserve.seg_count >= 16384) | ||
97 | return -EINVAL; | ||
98 | |||
99 | usegment = kmalloc(sizeof(*usegment) * ureserve.seg_count, GFP_KERNEL); | ||
100 | if (!usegment) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | ksegment = kmalloc(sizeof(*ksegment) * kreserve.seg_count, GFP_KERNEL); | ||
104 | if (!ksegment) { | ||
105 | kfree(usegment); | ||
106 | return -ENOMEM; | ||
107 | } | ||
108 | |||
109 | if (copy_from_user(usegment, (void __user *) ureserve.seg_list, | ||
110 | sizeof(*usegment) * ureserve.seg_count)) { | ||
111 | kfree(usegment); | ||
112 | kfree(ksegment); | ||
113 | return -EFAULT; | ||
114 | } | ||
115 | |||
116 | for (seg = 0; seg < ureserve.seg_count; seg++) { | ||
117 | ksegment[seg].pg_start = usegment[seg].pg_start; | ||
118 | ksegment[seg].pg_count = usegment[seg].pg_count; | ||
119 | ksegment[seg].prot = usegment[seg].prot; | ||
120 | } | ||
121 | |||
122 | kfree(usegment); | ||
123 | kreserve.seg_list = ksegment; | ||
124 | |||
125 | if (client == NULL) { | ||
126 | /* Create the client and add the segment */ | ||
127 | client = agp_create_client(kreserve.pid); | ||
128 | |||
129 | if (client == NULL) { | ||
130 | kfree(ksegment); | ||
131 | return -ENOMEM; | ||
132 | } | ||
133 | client_priv = agp_find_private(kreserve.pid); | ||
134 | |||
135 | if (client_priv != NULL) { | ||
136 | set_bit(AGP_FF_IS_CLIENT, &client_priv->access_flags); | ||
137 | set_bit(AGP_FF_IS_VALID, &client_priv->access_flags); | ||
138 | } | ||
139 | } | ||
140 | return agp_create_segment(client, &kreserve); | ||
141 | } | ||
142 | /* Will never really happen */ | ||
143 | return -EINVAL; | ||
144 | } | ||
145 | |||
146 | static int compat_agpioc_allocate_wrap(struct agp_file_private *priv, void __user *arg) | ||
147 | { | ||
148 | struct agp_memory *memory; | ||
149 | struct agp_allocate32 alloc; | ||
150 | |||
151 | DBG(""); | ||
152 | if (copy_from_user(&alloc, arg, sizeof(alloc))) | ||
153 | return -EFAULT; | ||
154 | |||
155 | memory = agp_allocate_memory_wrap(alloc.pg_count, alloc.type); | ||
156 | |||
157 | if (memory == NULL) | ||
158 | return -ENOMEM; | ||
159 | |||
160 | alloc.key = memory->key; | ||
161 | alloc.physical = memory->physical; | ||
162 | |||
163 | if (copy_to_user(arg, &alloc, sizeof(alloc))) { | ||
164 | agp_free_memory_wrap(memory); | ||
165 | return -EFAULT; | ||
166 | } | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | static int compat_agpioc_bind_wrap(struct agp_file_private *priv, void __user *arg) | ||
171 | { | ||
172 | struct agp_bind32 bind_info; | ||
173 | struct agp_memory *memory; | ||
174 | |||
175 | DBG(""); | ||
176 | if (copy_from_user(&bind_info, arg, sizeof(bind_info))) | ||
177 | return -EFAULT; | ||
178 | |||
179 | memory = agp_find_mem_by_key(bind_info.key); | ||
180 | |||
181 | if (memory == NULL) | ||
182 | return -EINVAL; | ||
183 | |||
184 | return agp_bind_memory(memory, bind_info.pg_start); | ||
185 | } | ||
186 | |||
187 | static int compat_agpioc_unbind_wrap(struct agp_file_private *priv, void __user *arg) | ||
188 | { | ||
189 | struct agp_memory *memory; | ||
190 | struct agp_unbind32 unbind; | ||
191 | |||
192 | DBG(""); | ||
193 | if (copy_from_user(&unbind, arg, sizeof(unbind))) | ||
194 | return -EFAULT; | ||
195 | |||
196 | memory = agp_find_mem_by_key(unbind.key); | ||
197 | |||
198 | if (memory == NULL) | ||
199 | return -EINVAL; | ||
200 | |||
201 | return agp_unbind_memory(memory); | ||
202 | } | ||
203 | |||
204 | long compat_agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
205 | { | ||
206 | struct agp_file_private *curr_priv = file->private_data; | ||
207 | int ret_val = -ENOTTY; | ||
208 | |||
209 | mutex_lock(&(agp_fe.agp_mutex)); | ||
210 | |||
211 | if ((agp_fe.current_controller == NULL) && | ||
212 | (cmd != AGPIOC_ACQUIRE32)) { | ||
213 | ret_val = -EINVAL; | ||
214 | goto ioctl_out; | ||
215 | } | ||
216 | if ((agp_fe.backend_acquired != TRUE) && | ||
217 | (cmd != AGPIOC_ACQUIRE32)) { | ||
218 | ret_val = -EBUSY; | ||
219 | goto ioctl_out; | ||
220 | } | ||
221 | if (cmd != AGPIOC_ACQUIRE32) { | ||
222 | if (!(test_bit(AGP_FF_IS_CONTROLLER, &curr_priv->access_flags))) { | ||
223 | ret_val = -EPERM; | ||
224 | goto ioctl_out; | ||
225 | } | ||
226 | /* Use the original pid of the controller, | ||
227 | * in case it's threaded */ | ||
228 | |||
229 | if (agp_fe.current_controller->pid != curr_priv->my_pid) { | ||
230 | ret_val = -EBUSY; | ||
231 | goto ioctl_out; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | switch (cmd) { | ||
236 | case AGPIOC_INFO32: | ||
237 | ret_val = compat_agpioc_info_wrap(curr_priv, (void __user *) arg); | ||
238 | break; | ||
239 | |||
240 | case AGPIOC_ACQUIRE32: | ||
241 | ret_val = agpioc_acquire_wrap(curr_priv); | ||
242 | break; | ||
243 | |||
244 | case AGPIOC_RELEASE32: | ||
245 | ret_val = agpioc_release_wrap(curr_priv); | ||
246 | break; | ||
247 | |||
248 | case AGPIOC_SETUP32: | ||
249 | ret_val = agpioc_setup_wrap(curr_priv, (void __user *) arg); | ||
250 | break; | ||
251 | |||
252 | case AGPIOC_RESERVE32: | ||
253 | ret_val = compat_agpioc_reserve_wrap(curr_priv, (void __user *) arg); | ||
254 | break; | ||
255 | |||
256 | case AGPIOC_PROTECT32: | ||
257 | ret_val = agpioc_protect_wrap(curr_priv); | ||
258 | break; | ||
259 | |||
260 | case AGPIOC_ALLOCATE32: | ||
261 | ret_val = compat_agpioc_allocate_wrap(curr_priv, (void __user *) arg); | ||
262 | break; | ||
263 | |||
264 | case AGPIOC_DEALLOCATE32: | ||
265 | ret_val = agpioc_deallocate_wrap(curr_priv, (int) arg); | ||
266 | break; | ||
267 | |||
268 | case AGPIOC_BIND32: | ||
269 | ret_val = compat_agpioc_bind_wrap(curr_priv, (void __user *) arg); | ||
270 | break; | ||
271 | |||
272 | case AGPIOC_UNBIND32: | ||
273 | ret_val = compat_agpioc_unbind_wrap(curr_priv, (void __user *) arg); | ||
274 | break; | ||
275 | } | ||
276 | |||
277 | ioctl_out: | ||
278 | DBG("ioctl returns %d\n", ret_val); | ||
279 | mutex_unlock(&(agp_fe.agp_mutex)); | ||
280 | return ret_val; | ||
281 | } | ||
282 | |||
diff --git a/drivers/char/agp/compat_ioctl.h b/drivers/char/agp/compat_ioctl.h new file mode 100644 index 000000000000..71939d637236 --- /dev/null +++ b/drivers/char/agp/compat_ioctl.h | |||
@@ -0,0 +1,105 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1999 Jeff Hartmann | ||
3 | * Copyright (C) 1999 Precision Insight, Inc. | ||
4 | * Copyright (C) 1999 Xi Graphics, Inc. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included | ||
14 | * in all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE | ||
22 | * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #ifndef _AGP_COMPAT_IOCTL_H | ||
27 | #define _AGP_COMPAT_IOCTL_H | ||
28 | |||
29 | #include <linux/compat.h> | ||
30 | #include <linux/agpgart.h> | ||
31 | |||
32 | #define AGPIOC_INFO32 _IOR (AGPIOC_BASE, 0, compat_uptr_t) | ||
33 | #define AGPIOC_ACQUIRE32 _IO (AGPIOC_BASE, 1) | ||
34 | #define AGPIOC_RELEASE32 _IO (AGPIOC_BASE, 2) | ||
35 | #define AGPIOC_SETUP32 _IOW (AGPIOC_BASE, 3, compat_uptr_t) | ||
36 | #define AGPIOC_RESERVE32 _IOW (AGPIOC_BASE, 4, compat_uptr_t) | ||
37 | #define AGPIOC_PROTECT32 _IOW (AGPIOC_BASE, 5, compat_uptr_t) | ||
38 | #define AGPIOC_ALLOCATE32 _IOWR(AGPIOC_BASE, 6, compat_uptr_t) | ||
39 | #define AGPIOC_DEALLOCATE32 _IOW (AGPIOC_BASE, 7, compat_int_t) | ||
40 | #define AGPIOC_BIND32 _IOW (AGPIOC_BASE, 8, compat_uptr_t) | ||
41 | #define AGPIOC_UNBIND32 _IOW (AGPIOC_BASE, 9, compat_uptr_t) | ||
42 | |||
43 | struct agp_info32 { | ||
44 | struct agp_version version; /* version of the driver */ | ||
45 | u32 bridge_id; /* bridge vendor/device */ | ||
46 | u32 agp_mode; /* mode info of bridge */ | ||
47 | compat_long_t aper_base; /* base of aperture */ | ||
48 | compat_size_t aper_size; /* size of aperture */ | ||
49 | compat_size_t pg_total; /* max pages (swap + system) */ | ||
50 | compat_size_t pg_system; /* max pages (system) */ | ||
51 | compat_size_t pg_used; /* current pages used */ | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * The "prot" down below needs still a "sleep" flag somehow ... | ||
56 | */ | ||
57 | struct agp_segment32 { | ||
58 | compat_off_t pg_start; /* starting page to populate */ | ||
59 | compat_size_t pg_count; /* number of pages */ | ||
60 | compat_int_t prot; /* prot flags for mmap */ | ||
61 | }; | ||
62 | |||
63 | struct agp_region32 { | ||
64 | compat_pid_t pid; /* pid of process */ | ||
65 | compat_size_t seg_count; /* number of segments */ | ||
66 | struct agp_segment32 *seg_list; | ||
67 | }; | ||
68 | |||
69 | struct agp_allocate32 { | ||
70 | compat_int_t key; /* tag of allocation */ | ||
71 | compat_size_t pg_count; /* number of pages */ | ||
72 | u32 type; /* 0 == normal, other devspec */ | ||
73 | u32 physical; /* device specific (some devices | ||
74 | * need a phys address of the | ||
75 | * actual page behind the gatt | ||
76 | * table) */ | ||
77 | }; | ||
78 | |||
79 | struct agp_bind32 { | ||
80 | compat_int_t key; /* tag of allocation */ | ||
81 | compat_off_t pg_start; /* starting page to populate */ | ||
82 | }; | ||
83 | |||
84 | struct agp_unbind32 { | ||
85 | compat_int_t key; /* tag of allocation */ | ||
86 | u32 priority; /* priority for paging out */ | ||
87 | }; | ||
88 | |||
89 | extern struct agp_front_data agp_fe; | ||
90 | |||
91 | int agpioc_acquire_wrap(struct agp_file_private *priv); | ||
92 | int agpioc_release_wrap(struct agp_file_private *priv); | ||
93 | int agpioc_protect_wrap(struct agp_file_private *priv); | ||
94 | int agpioc_setup_wrap(struct agp_file_private *priv, void __user *arg); | ||
95 | int agpioc_deallocate_wrap(struct agp_file_private *priv, int arg); | ||
96 | struct agp_file_private *agp_find_private(pid_t pid); | ||
97 | struct agp_client *agp_create_client(pid_t id); | ||
98 | int agp_remove_client(pid_t id); | ||
99 | int agp_create_segment(struct agp_client *client, struct agp_region *region); | ||
100 | void agp_free_memory_wrap(struct agp_memory *memory); | ||
101 | struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type); | ||
102 | struct agp_memory *agp_find_mem_by_key(int key); | ||
103 | struct agp_client *agp_find_client_by_pid(pid_t id); | ||
104 | |||
105 | #endif /* _AGP_COMPAT_H */ | ||
diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c index 30f730ff81c1..658cb1a72d2c 100644 --- a/drivers/char/agp/efficeon-agp.c +++ b/drivers/char/agp/efficeon-agp.c | |||
@@ -335,6 +335,7 @@ static struct agp_bridge_driver efficeon_driver = { | |||
335 | .free_by_type = agp_generic_free_by_type, | 335 | .free_by_type = agp_generic_free_by_type, |
336 | .agp_alloc_page = agp_generic_alloc_page, | 336 | .agp_alloc_page = agp_generic_alloc_page, |
337 | .agp_destroy_page = agp_generic_destroy_page, | 337 | .agp_destroy_page = agp_generic_destroy_page, |
338 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
338 | }; | 339 | }; |
339 | 340 | ||
340 | static int __devinit agp_efficeon_probe(struct pci_dev *pdev, | 341 | static int __devinit agp_efficeon_probe(struct pci_dev *pdev, |
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index 0f2ed2aa2d81..679d7f972439 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c | |||
@@ -41,9 +41,9 @@ | |||
41 | #include <asm/pgtable.h> | 41 | #include <asm/pgtable.h> |
42 | #include "agp.h" | 42 | #include "agp.h" |
43 | 43 | ||
44 | static struct agp_front_data agp_fe; | 44 | struct agp_front_data agp_fe; |
45 | 45 | ||
46 | static struct agp_memory *agp_find_mem_by_key(int key) | 46 | struct agp_memory *agp_find_mem_by_key(int key) |
47 | { | 47 | { |
48 | struct agp_memory *curr; | 48 | struct agp_memory *curr; |
49 | 49 | ||
@@ -159,7 +159,7 @@ static pgprot_t agp_convert_mmap_flags(int prot) | |||
159 | return vm_get_page_prot(prot_bits); | 159 | return vm_get_page_prot(prot_bits); |
160 | } | 160 | } |
161 | 161 | ||
162 | static int agp_create_segment(struct agp_client *client, struct agp_region *region) | 162 | int agp_create_segment(struct agp_client *client, struct agp_region *region) |
163 | { | 163 | { |
164 | struct agp_segment_priv **ret_seg; | 164 | struct agp_segment_priv **ret_seg; |
165 | struct agp_segment_priv *seg; | 165 | struct agp_segment_priv *seg; |
@@ -211,7 +211,7 @@ static void agp_insert_into_pool(struct agp_memory * temp) | |||
211 | 211 | ||
212 | /* File private list routines */ | 212 | /* File private list routines */ |
213 | 213 | ||
214 | static struct agp_file_private *agp_find_private(pid_t pid) | 214 | struct agp_file_private *agp_find_private(pid_t pid) |
215 | { | 215 | { |
216 | struct agp_file_private *curr; | 216 | struct agp_file_private *curr; |
217 | 217 | ||
@@ -266,13 +266,13 @@ static void agp_remove_file_private(struct agp_file_private * priv) | |||
266 | * Wrappers for agp_free_memory & agp_allocate_memory | 266 | * Wrappers for agp_free_memory & agp_allocate_memory |
267 | * These make sure that internal lists are kept updated. | 267 | * These make sure that internal lists are kept updated. |
268 | */ | 268 | */ |
269 | static void agp_free_memory_wrap(struct agp_memory *memory) | 269 | void agp_free_memory_wrap(struct agp_memory *memory) |
270 | { | 270 | { |
271 | agp_remove_from_pool(memory); | 271 | agp_remove_from_pool(memory); |
272 | agp_free_memory(memory); | 272 | agp_free_memory(memory); |
273 | } | 273 | } |
274 | 274 | ||
275 | static struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type) | 275 | struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type) |
276 | { | 276 | { |
277 | struct agp_memory *memory; | 277 | struct agp_memory *memory; |
278 | 278 | ||
@@ -484,7 +484,7 @@ static struct agp_controller *agp_find_controller_for_client(pid_t id) | |||
484 | return NULL; | 484 | return NULL; |
485 | } | 485 | } |
486 | 486 | ||
487 | static struct agp_client *agp_find_client_by_pid(pid_t id) | 487 | struct agp_client *agp_find_client_by_pid(pid_t id) |
488 | { | 488 | { |
489 | struct agp_client *temp; | 489 | struct agp_client *temp; |
490 | 490 | ||
@@ -509,7 +509,7 @@ static void agp_insert_client(struct agp_client *client) | |||
509 | agp_fe.current_controller->num_clients++; | 509 | agp_fe.current_controller->num_clients++; |
510 | } | 510 | } |
511 | 511 | ||
512 | static struct agp_client *agp_create_client(pid_t id) | 512 | struct agp_client *agp_create_client(pid_t id) |
513 | { | 513 | { |
514 | struct agp_client *new_client; | 514 | struct agp_client *new_client; |
515 | 515 | ||
@@ -522,7 +522,7 @@ static struct agp_client *agp_create_client(pid_t id) | |||
522 | return new_client; | 522 | return new_client; |
523 | } | 523 | } |
524 | 524 | ||
525 | static int agp_remove_client(pid_t id) | 525 | int agp_remove_client(pid_t id) |
526 | { | 526 | { |
527 | struct agp_client *client; | 527 | struct agp_client *client; |
528 | struct agp_client *prev_client; | 528 | struct agp_client *prev_client; |
@@ -746,7 +746,7 @@ static int agpioc_info_wrap(struct agp_file_private *priv, void __user *arg) | |||
746 | return 0; | 746 | return 0; |
747 | } | 747 | } |
748 | 748 | ||
749 | static int agpioc_acquire_wrap(struct agp_file_private *priv) | 749 | int agpioc_acquire_wrap(struct agp_file_private *priv) |
750 | { | 750 | { |
751 | struct agp_controller *controller; | 751 | struct agp_controller *controller; |
752 | 752 | ||
@@ -789,14 +789,14 @@ static int agpioc_acquire_wrap(struct agp_file_private *priv) | |||
789 | return 0; | 789 | return 0; |
790 | } | 790 | } |
791 | 791 | ||
792 | static int agpioc_release_wrap(struct agp_file_private *priv) | 792 | int agpioc_release_wrap(struct agp_file_private *priv) |
793 | { | 793 | { |
794 | DBG(""); | 794 | DBG(""); |
795 | agp_controller_release_current(agp_fe.current_controller, priv); | 795 | agp_controller_release_current(agp_fe.current_controller, priv); |
796 | return 0; | 796 | return 0; |
797 | } | 797 | } |
798 | 798 | ||
799 | static int agpioc_setup_wrap(struct agp_file_private *priv, void __user *arg) | 799 | int agpioc_setup_wrap(struct agp_file_private *priv, void __user *arg) |
800 | { | 800 | { |
801 | struct agp_setup mode; | 801 | struct agp_setup mode; |
802 | 802 | ||
@@ -876,7 +876,7 @@ static int agpioc_reserve_wrap(struct agp_file_private *priv, void __user *arg) | |||
876 | return -EINVAL; | 876 | return -EINVAL; |
877 | } | 877 | } |
878 | 878 | ||
879 | static int agpioc_protect_wrap(struct agp_file_private *priv) | 879 | int agpioc_protect_wrap(struct agp_file_private *priv) |
880 | { | 880 | { |
881 | DBG(""); | 881 | DBG(""); |
882 | /* This function is not currently implemented */ | 882 | /* This function is not currently implemented */ |
@@ -892,6 +892,9 @@ static int agpioc_allocate_wrap(struct agp_file_private *priv, void __user *arg) | |||
892 | if (copy_from_user(&alloc, arg, sizeof(struct agp_allocate))) | 892 | if (copy_from_user(&alloc, arg, sizeof(struct agp_allocate))) |
893 | return -EFAULT; | 893 | return -EFAULT; |
894 | 894 | ||
895 | if (alloc.type >= AGP_USER_TYPES) | ||
896 | return -EINVAL; | ||
897 | |||
895 | memory = agp_allocate_memory_wrap(alloc.pg_count, alloc.type); | 898 | memory = agp_allocate_memory_wrap(alloc.pg_count, alloc.type); |
896 | 899 | ||
897 | if (memory == NULL) | 900 | if (memory == NULL) |
@@ -907,7 +910,7 @@ static int agpioc_allocate_wrap(struct agp_file_private *priv, void __user *arg) | |||
907 | return 0; | 910 | return 0; |
908 | } | 911 | } |
909 | 912 | ||
910 | static int agpioc_deallocate_wrap(struct agp_file_private *priv, int arg) | 913 | int agpioc_deallocate_wrap(struct agp_file_private *priv, int arg) |
911 | { | 914 | { |
912 | struct agp_memory *memory; | 915 | struct agp_memory *memory; |
913 | 916 | ||
@@ -1043,6 +1046,9 @@ static const struct file_operations agp_fops = | |||
1043 | .read = agp_read, | 1046 | .read = agp_read, |
1044 | .write = agp_write, | 1047 | .write = agp_write, |
1045 | .ioctl = agp_ioctl, | 1048 | .ioctl = agp_ioctl, |
1049 | #ifdef CONFIG_COMPAT | ||
1050 | .compat_ioctl = compat_agp_ioctl, | ||
1051 | #endif | ||
1046 | .mmap = agp_mmap, | 1052 | .mmap = agp_mmap, |
1047 | .open = agp_open, | 1053 | .open = agp_open, |
1048 | .release = agp_release, | 1054 | .release = agp_release, |
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 3491d6f84bc6..7923337c3d26 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c | |||
@@ -101,6 +101,63 @@ static int agp_get_key(void) | |||
101 | return -1; | 101 | return -1; |
102 | } | 102 | } |
103 | 103 | ||
104 | /* | ||
105 | * Use kmalloc if possible for the page list. Otherwise fall back to | ||
106 | * vmalloc. This speeds things up and also saves memory for small AGP | ||
107 | * regions. | ||
108 | */ | ||
109 | |||
110 | void agp_alloc_page_array(size_t size, struct agp_memory *mem) | ||
111 | { | ||
112 | mem->memory = NULL; | ||
113 | mem->vmalloc_flag = 0; | ||
114 | |||
115 | if (size <= 2*PAGE_SIZE) | ||
116 | mem->memory = kmalloc(size, GFP_KERNEL | __GFP_NORETRY); | ||
117 | if (mem->memory == NULL) { | ||
118 | mem->memory = vmalloc(size); | ||
119 | mem->vmalloc_flag = 1; | ||
120 | } | ||
121 | } | ||
122 | EXPORT_SYMBOL(agp_alloc_page_array); | ||
123 | |||
124 | void agp_free_page_array(struct agp_memory *mem) | ||
125 | { | ||
126 | if (mem->vmalloc_flag) { | ||
127 | vfree(mem->memory); | ||
128 | } else { | ||
129 | kfree(mem->memory); | ||
130 | } | ||
131 | } | ||
132 | EXPORT_SYMBOL(agp_free_page_array); | ||
133 | |||
134 | |||
135 | static struct agp_memory *agp_create_user_memory(unsigned long num_agp_pages) | ||
136 | { | ||
137 | struct agp_memory *new; | ||
138 | unsigned long alloc_size = num_agp_pages*sizeof(struct page *); | ||
139 | |||
140 | new = kzalloc(sizeof(struct agp_memory), GFP_KERNEL); | ||
141 | if (new == NULL) | ||
142 | return NULL; | ||
143 | |||
144 | new->key = agp_get_key(); | ||
145 | |||
146 | if (new->key < 0) { | ||
147 | kfree(new); | ||
148 | return NULL; | ||
149 | } | ||
150 | |||
151 | agp_alloc_page_array(alloc_size, new); | ||
152 | |||
153 | if (new->memory == NULL) { | ||
154 | agp_free_key(new->key); | ||
155 | kfree(new); | ||
156 | return NULL; | ||
157 | } | ||
158 | new->num_scratch_pages = 0; | ||
159 | return new; | ||
160 | } | ||
104 | 161 | ||
105 | struct agp_memory *agp_create_memory(int scratch_pages) | 162 | struct agp_memory *agp_create_memory(int scratch_pages) |
106 | { | 163 | { |
@@ -116,7 +173,8 @@ struct agp_memory *agp_create_memory(int scratch_pages) | |||
116 | kfree(new); | 173 | kfree(new); |
117 | return NULL; | 174 | return NULL; |
118 | } | 175 | } |
119 | new->memory = vmalloc(PAGE_SIZE * scratch_pages); | 176 | |
177 | agp_alloc_page_array(PAGE_SIZE * scratch_pages, new); | ||
120 | 178 | ||
121 | if (new->memory == NULL) { | 179 | if (new->memory == NULL) { |
122 | agp_free_key(new->key); | 180 | agp_free_key(new->key); |
@@ -124,6 +182,7 @@ struct agp_memory *agp_create_memory(int scratch_pages) | |||
124 | return NULL; | 182 | return NULL; |
125 | } | 183 | } |
126 | new->num_scratch_pages = scratch_pages; | 184 | new->num_scratch_pages = scratch_pages; |
185 | new->type = AGP_NORMAL_MEMORY; | ||
127 | return new; | 186 | return new; |
128 | } | 187 | } |
129 | EXPORT_SYMBOL(agp_create_memory); | 188 | EXPORT_SYMBOL(agp_create_memory); |
@@ -146,6 +205,11 @@ void agp_free_memory(struct agp_memory *curr) | |||
146 | if (curr->is_bound == TRUE) | 205 | if (curr->is_bound == TRUE) |
147 | agp_unbind_memory(curr); | 206 | agp_unbind_memory(curr); |
148 | 207 | ||
208 | if (curr->type >= AGP_USER_TYPES) { | ||
209 | agp_generic_free_by_type(curr); | ||
210 | return; | ||
211 | } | ||
212 | |||
149 | if (curr->type != 0) { | 213 | if (curr->type != 0) { |
150 | curr->bridge->driver->free_by_type(curr); | 214 | curr->bridge->driver->free_by_type(curr); |
151 | return; | 215 | return; |
@@ -157,7 +221,7 @@ void agp_free_memory(struct agp_memory *curr) | |||
157 | flush_agp_mappings(); | 221 | flush_agp_mappings(); |
158 | } | 222 | } |
159 | agp_free_key(curr->key); | 223 | agp_free_key(curr->key); |
160 | vfree(curr->memory); | 224 | agp_free_page_array(curr); |
161 | kfree(curr); | 225 | kfree(curr); |
162 | } | 226 | } |
163 | EXPORT_SYMBOL(agp_free_memory); | 227 | EXPORT_SYMBOL(agp_free_memory); |
@@ -188,6 +252,13 @@ struct agp_memory *agp_allocate_memory(struct agp_bridge_data *bridge, | |||
188 | if ((atomic_read(&bridge->current_memory_agp) + page_count) > bridge->max_memory_agp) | 252 | if ((atomic_read(&bridge->current_memory_agp) + page_count) > bridge->max_memory_agp) |
189 | return NULL; | 253 | return NULL; |
190 | 254 | ||
255 | if (type >= AGP_USER_TYPES) { | ||
256 | new = agp_generic_alloc_user(page_count, type); | ||
257 | if (new) | ||
258 | new->bridge = bridge; | ||
259 | return new; | ||
260 | } | ||
261 | |||
191 | if (type != 0) { | 262 | if (type != 0) { |
192 | new = bridge->driver->alloc_by_type(page_count, type); | 263 | new = bridge->driver->alloc_by_type(page_count, type); |
193 | if (new) | 264 | if (new) |
@@ -960,6 +1031,7 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type) | |||
960 | off_t j; | 1031 | off_t j; |
961 | void *temp; | 1032 | void *temp; |
962 | struct agp_bridge_data *bridge; | 1033 | struct agp_bridge_data *bridge; |
1034 | int mask_type; | ||
963 | 1035 | ||
964 | bridge = mem->bridge; | 1036 | bridge = mem->bridge; |
965 | if (!bridge) | 1037 | if (!bridge) |
@@ -995,7 +1067,11 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type) | |||
995 | num_entries -= agp_memory_reserved/PAGE_SIZE; | 1067 | num_entries -= agp_memory_reserved/PAGE_SIZE; |
996 | if (num_entries < 0) num_entries = 0; | 1068 | if (num_entries < 0) num_entries = 0; |
997 | 1069 | ||
998 | if (type != 0 || mem->type != 0) { | 1070 | if (type != mem->type) |
1071 | return -EINVAL; | ||
1072 | |||
1073 | mask_type = bridge->driver->agp_type_to_mask_type(bridge, type); | ||
1074 | if (mask_type != 0) { | ||
999 | /* The generic routines know nothing of memory types */ | 1075 | /* The generic routines know nothing of memory types */ |
1000 | return -EINVAL; | 1076 | return -EINVAL; |
1001 | } | 1077 | } |
@@ -1018,7 +1094,8 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type) | |||
1018 | } | 1094 | } |
1019 | 1095 | ||
1020 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { | 1096 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { |
1021 | writel(bridge->driver->mask_memory(bridge, mem->memory[i], mem->type), bridge->gatt_table+j); | 1097 | writel(bridge->driver->mask_memory(bridge, mem->memory[i], mask_type), |
1098 | bridge->gatt_table+j); | ||
1022 | } | 1099 | } |
1023 | readl(bridge->gatt_table+j-1); /* PCI Posting. */ | 1100 | readl(bridge->gatt_table+j-1); /* PCI Posting. */ |
1024 | 1101 | ||
@@ -1032,6 +1109,7 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type) | |||
1032 | { | 1109 | { |
1033 | size_t i; | 1110 | size_t i; |
1034 | struct agp_bridge_data *bridge; | 1111 | struct agp_bridge_data *bridge; |
1112 | int mask_type; | ||
1035 | 1113 | ||
1036 | bridge = mem->bridge; | 1114 | bridge = mem->bridge; |
1037 | if (!bridge) | 1115 | if (!bridge) |
@@ -1040,7 +1118,11 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type) | |||
1040 | if (mem->page_count == 0) | 1118 | if (mem->page_count == 0) |
1041 | return 0; | 1119 | return 0; |
1042 | 1120 | ||
1043 | if (type != 0 || mem->type != 0) { | 1121 | if (type != mem->type) |
1122 | return -EINVAL; | ||
1123 | |||
1124 | mask_type = bridge->driver->agp_type_to_mask_type(bridge, type); | ||
1125 | if (mask_type != 0) { | ||
1044 | /* The generic routines know nothing of memory types */ | 1126 | /* The generic routines know nothing of memory types */ |
1045 | return -EINVAL; | 1127 | return -EINVAL; |
1046 | } | 1128 | } |
@@ -1056,22 +1138,40 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type) | |||
1056 | } | 1138 | } |
1057 | EXPORT_SYMBOL(agp_generic_remove_memory); | 1139 | EXPORT_SYMBOL(agp_generic_remove_memory); |
1058 | 1140 | ||
1059 | |||
1060 | struct agp_memory *agp_generic_alloc_by_type(size_t page_count, int type) | 1141 | struct agp_memory *agp_generic_alloc_by_type(size_t page_count, int type) |
1061 | { | 1142 | { |
1062 | return NULL; | 1143 | return NULL; |
1063 | } | 1144 | } |
1064 | EXPORT_SYMBOL(agp_generic_alloc_by_type); | 1145 | EXPORT_SYMBOL(agp_generic_alloc_by_type); |
1065 | 1146 | ||
1066 | |||
1067 | void agp_generic_free_by_type(struct agp_memory *curr) | 1147 | void agp_generic_free_by_type(struct agp_memory *curr) |
1068 | { | 1148 | { |
1069 | vfree(curr->memory); | 1149 | agp_free_page_array(curr); |
1070 | agp_free_key(curr->key); | 1150 | agp_free_key(curr->key); |
1071 | kfree(curr); | 1151 | kfree(curr); |
1072 | } | 1152 | } |
1073 | EXPORT_SYMBOL(agp_generic_free_by_type); | 1153 | EXPORT_SYMBOL(agp_generic_free_by_type); |
1074 | 1154 | ||
1155 | struct agp_memory *agp_generic_alloc_user(size_t page_count, int type) | ||
1156 | { | ||
1157 | struct agp_memory *new; | ||
1158 | int i; | ||
1159 | int pages; | ||
1160 | |||
1161 | pages = (page_count + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE; | ||
1162 | new = agp_create_user_memory(page_count); | ||
1163 | if (new == NULL) | ||
1164 | return NULL; | ||
1165 | |||
1166 | for (i = 0; i < page_count; i++) | ||
1167 | new->memory[i] = 0; | ||
1168 | new->page_count = 0; | ||
1169 | new->type = type; | ||
1170 | new->num_scratch_pages = pages; | ||
1171 | |||
1172 | return new; | ||
1173 | } | ||
1174 | EXPORT_SYMBOL(agp_generic_alloc_user); | ||
1075 | 1175 | ||
1076 | /* | 1176 | /* |
1077 | * Basic Page Allocation Routines - | 1177 | * Basic Page Allocation Routines - |
@@ -1165,6 +1265,15 @@ unsigned long agp_generic_mask_memory(struct agp_bridge_data *bridge, | |||
1165 | } | 1265 | } |
1166 | EXPORT_SYMBOL(agp_generic_mask_memory); | 1266 | EXPORT_SYMBOL(agp_generic_mask_memory); |
1167 | 1267 | ||
1268 | int agp_generic_type_to_mask_type(struct agp_bridge_data *bridge, | ||
1269 | int type) | ||
1270 | { | ||
1271 | if (type >= AGP_USER_TYPES) | ||
1272 | return 0; | ||
1273 | return type; | ||
1274 | } | ||
1275 | EXPORT_SYMBOL(agp_generic_type_to_mask_type); | ||
1276 | |||
1168 | /* | 1277 | /* |
1169 | * These functions are implemented according to the AGPv3 spec, | 1278 | * These functions are implemented according to the AGPv3 spec, |
1170 | * which covers implementation details that had previously been | 1279 | * which covers implementation details that had previously been |
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c index 907fb66ec4a9..847deabf7f9b 100644 --- a/drivers/char/agp/hp-agp.c +++ b/drivers/char/agp/hp-agp.c | |||
@@ -438,6 +438,7 @@ struct agp_bridge_driver hp_zx1_driver = { | |||
438 | .free_by_type = agp_generic_free_by_type, | 438 | .free_by_type = agp_generic_free_by_type, |
439 | .agp_alloc_page = agp_generic_alloc_page, | 439 | .agp_alloc_page = agp_generic_alloc_page, |
440 | .agp_destroy_page = agp_generic_destroy_page, | 440 | .agp_destroy_page = agp_generic_destroy_page, |
441 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
441 | .cant_use_aperture = 1, | 442 | .cant_use_aperture = 1, |
442 | }; | 443 | }; |
443 | 444 | ||
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c index 91769443d8fe..3e7618653abd 100644 --- a/drivers/char/agp/i460-agp.c +++ b/drivers/char/agp/i460-agp.c | |||
@@ -293,6 +293,9 @@ static int i460_insert_memory_small_io_page (struct agp_memory *mem, | |||
293 | pr_debug("i460_insert_memory_small_io_page(mem=%p, pg_start=%ld, type=%d, paddr0=0x%lx)\n", | 293 | pr_debug("i460_insert_memory_small_io_page(mem=%p, pg_start=%ld, type=%d, paddr0=0x%lx)\n", |
294 | mem, pg_start, type, mem->memory[0]); | 294 | mem, pg_start, type, mem->memory[0]); |
295 | 295 | ||
296 | if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) | ||
297 | return -EINVAL; | ||
298 | |||
296 | io_pg_start = I460_IOPAGES_PER_KPAGE * pg_start; | 299 | io_pg_start = I460_IOPAGES_PER_KPAGE * pg_start; |
297 | 300 | ||
298 | temp = agp_bridge->current_size; | 301 | temp = agp_bridge->current_size; |
@@ -396,6 +399,9 @@ static int i460_insert_memory_large_io_page (struct agp_memory *mem, | |||
396 | struct lp_desc *start, *end, *lp; | 399 | struct lp_desc *start, *end, *lp; |
397 | void *temp; | 400 | void *temp; |
398 | 401 | ||
402 | if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) | ||
403 | return -EINVAL; | ||
404 | |||
399 | temp = agp_bridge->current_size; | 405 | temp = agp_bridge->current_size; |
400 | num_entries = A_SIZE_8(temp)->num_entries; | 406 | num_entries = A_SIZE_8(temp)->num_entries; |
401 | 407 | ||
@@ -572,6 +578,7 @@ struct agp_bridge_driver intel_i460_driver = { | |||
572 | #endif | 578 | #endif |
573 | .alloc_by_type = agp_generic_alloc_by_type, | 579 | .alloc_by_type = agp_generic_alloc_by_type, |
574 | .free_by_type = agp_generic_free_by_type, | 580 | .free_by_type = agp_generic_free_by_type, |
581 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
575 | .cant_use_aperture = 1, | 582 | .cant_use_aperture = 1, |
576 | }; | 583 | }; |
577 | 584 | ||
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index a3011de51f7c..06b0bb6d982f 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/pci.h> | 6 | #include <linux/pci.h> |
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/kernel.h> | ||
8 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
9 | #include <linux/agp_backend.h> | 10 | #include <linux/agp_backend.h> |
10 | #include "agp.h" | 11 | #include "agp.h" |
@@ -24,6 +25,9 @@ | |||
24 | agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB) | 25 | agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB) |
25 | 26 | ||
26 | 27 | ||
28 | extern int agp_memory_reserved; | ||
29 | |||
30 | |||
27 | /* Intel 815 register */ | 31 | /* Intel 815 register */ |
28 | #define INTEL_815_APCONT 0x51 | 32 | #define INTEL_815_APCONT 0x51 |
29 | #define INTEL_815_ATTBASE_MASK ~0x1FFFFFFF | 33 | #define INTEL_815_ATTBASE_MASK ~0x1FFFFFFF |
@@ -68,12 +72,15 @@ static struct aper_size_info_fixed intel_i810_sizes[] = | |||
68 | 72 | ||
69 | #define AGP_DCACHE_MEMORY 1 | 73 | #define AGP_DCACHE_MEMORY 1 |
70 | #define AGP_PHYS_MEMORY 2 | 74 | #define AGP_PHYS_MEMORY 2 |
75 | #define INTEL_AGP_CACHED_MEMORY 3 | ||
71 | 76 | ||
72 | static struct gatt_mask intel_i810_masks[] = | 77 | static struct gatt_mask intel_i810_masks[] = |
73 | { | 78 | { |
74 | {.mask = I810_PTE_VALID, .type = 0}, | 79 | {.mask = I810_PTE_VALID, .type = 0}, |
75 | {.mask = (I810_PTE_VALID | I810_PTE_LOCAL), .type = AGP_DCACHE_MEMORY}, | 80 | {.mask = (I810_PTE_VALID | I810_PTE_LOCAL), .type = AGP_DCACHE_MEMORY}, |
76 | {.mask = I810_PTE_VALID, .type = 0} | 81 | {.mask = I810_PTE_VALID, .type = 0}, |
82 | {.mask = I810_PTE_VALID | I830_PTE_SYSTEM_CACHED, | ||
83 | .type = INTEL_AGP_CACHED_MEMORY} | ||
77 | }; | 84 | }; |
78 | 85 | ||
79 | static struct _intel_i810_private { | 86 | static struct _intel_i810_private { |
@@ -117,13 +124,15 @@ static int intel_i810_configure(void) | |||
117 | 124 | ||
118 | current_size = A_SIZE_FIX(agp_bridge->current_size); | 125 | current_size = A_SIZE_FIX(agp_bridge->current_size); |
119 | 126 | ||
120 | pci_read_config_dword(intel_i810_private.i810_dev, I810_MMADDR, &temp); | ||
121 | temp &= 0xfff80000; | ||
122 | |||
123 | intel_i810_private.registers = ioremap(temp, 128 * 4096); | ||
124 | if (!intel_i810_private.registers) { | 127 | if (!intel_i810_private.registers) { |
125 | printk(KERN_ERR PFX "Unable to remap memory.\n"); | 128 | pci_read_config_dword(intel_i810_private.i810_dev, I810_MMADDR, &temp); |
126 | return -ENOMEM; | 129 | temp &= 0xfff80000; |
130 | |||
131 | intel_i810_private.registers = ioremap(temp, 128 * 4096); | ||
132 | if (!intel_i810_private.registers) { | ||
133 | printk(KERN_ERR PFX "Unable to remap memory.\n"); | ||
134 | return -ENOMEM; | ||
135 | } | ||
127 | } | 136 | } |
128 | 137 | ||
129 | if ((readl(intel_i810_private.registers+I810_DRAM_CTL) | 138 | if ((readl(intel_i810_private.registers+I810_DRAM_CTL) |
@@ -201,62 +210,79 @@ static void i8xx_destroy_pages(void *addr) | |||
201 | atomic_dec(&agp_bridge->current_memory_agp); | 210 | atomic_dec(&agp_bridge->current_memory_agp); |
202 | } | 211 | } |
203 | 212 | ||
213 | static int intel_i830_type_to_mask_type(struct agp_bridge_data *bridge, | ||
214 | int type) | ||
215 | { | ||
216 | if (type < AGP_USER_TYPES) | ||
217 | return type; | ||
218 | else if (type == AGP_USER_CACHED_MEMORY) | ||
219 | return INTEL_AGP_CACHED_MEMORY; | ||
220 | else | ||
221 | return 0; | ||
222 | } | ||
223 | |||
204 | static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, | 224 | static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, |
205 | int type) | 225 | int type) |
206 | { | 226 | { |
207 | int i, j, num_entries; | 227 | int i, j, num_entries; |
208 | void *temp; | 228 | void *temp; |
229 | int ret = -EINVAL; | ||
230 | int mask_type; | ||
209 | 231 | ||
210 | if (mem->page_count == 0) | 232 | if (mem->page_count == 0) |
211 | return 0; | 233 | goto out; |
212 | 234 | ||
213 | temp = agp_bridge->current_size; | 235 | temp = agp_bridge->current_size; |
214 | num_entries = A_SIZE_FIX(temp)->num_entries; | 236 | num_entries = A_SIZE_FIX(temp)->num_entries; |
215 | 237 | ||
216 | if ((pg_start + mem->page_count) > num_entries) | 238 | if ((pg_start + mem->page_count) > num_entries) |
217 | return -EINVAL; | 239 | goto out_err; |
218 | 240 | ||
219 | for (j = pg_start; j < (pg_start + mem->page_count); j++) { | ||
220 | if (!PGE_EMPTY(agp_bridge, readl(agp_bridge->gatt_table+j))) | ||
221 | return -EBUSY; | ||
222 | } | ||
223 | 241 | ||
224 | if (type != 0 || mem->type != 0) { | 242 | for (j = pg_start; j < (pg_start + mem->page_count); j++) { |
225 | if ((type == AGP_DCACHE_MEMORY) && (mem->type == AGP_DCACHE_MEMORY)) { | 243 | if (!PGE_EMPTY(agp_bridge, readl(agp_bridge->gatt_table+j))) { |
226 | /* special insert */ | 244 | ret = -EBUSY; |
227 | if (!mem->is_flushed) { | 245 | goto out_err; |
228 | global_cache_flush(); | ||
229 | mem->is_flushed = TRUE; | ||
230 | } | ||
231 | |||
232 | for (i = pg_start; i < (pg_start + mem->page_count); i++) { | ||
233 | writel((i*4096)|I810_PTE_LOCAL|I810_PTE_VALID, intel_i810_private.registers+I810_PTE_BASE+(i*4)); | ||
234 | } | ||
235 | readl(intel_i810_private.registers+I810_PTE_BASE+((i-1)*4)); /* PCI Posting. */ | ||
236 | |||
237 | agp_bridge->driver->tlb_flush(mem); | ||
238 | return 0; | ||
239 | } | 246 | } |
240 | if ((type == AGP_PHYS_MEMORY) && (mem->type == AGP_PHYS_MEMORY)) | ||
241 | goto insert; | ||
242 | return -EINVAL; | ||
243 | } | 247 | } |
244 | 248 | ||
245 | insert: | 249 | if (type != mem->type) |
246 | if (!mem->is_flushed) { | 250 | goto out_err; |
247 | global_cache_flush(); | ||
248 | mem->is_flushed = TRUE; | ||
249 | } | ||
250 | 251 | ||
251 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { | 252 | mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type); |
252 | writel(agp_bridge->driver->mask_memory(agp_bridge, | 253 | |
253 | mem->memory[i], mem->type), | 254 | switch (mask_type) { |
254 | intel_i810_private.registers+I810_PTE_BASE+(j*4)); | 255 | case AGP_DCACHE_MEMORY: |
256 | if (!mem->is_flushed) | ||
257 | global_cache_flush(); | ||
258 | for (i = pg_start; i < (pg_start + mem->page_count); i++) { | ||
259 | writel((i*4096)|I810_PTE_LOCAL|I810_PTE_VALID, | ||
260 | intel_i810_private.registers+I810_PTE_BASE+(i*4)); | ||
261 | } | ||
262 | readl(intel_i810_private.registers+I810_PTE_BASE+((i-1)*4)); | ||
263 | break; | ||
264 | case AGP_PHYS_MEMORY: | ||
265 | case AGP_NORMAL_MEMORY: | ||
266 | if (!mem->is_flushed) | ||
267 | global_cache_flush(); | ||
268 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { | ||
269 | writel(agp_bridge->driver->mask_memory(agp_bridge, | ||
270 | mem->memory[i], | ||
271 | mask_type), | ||
272 | intel_i810_private.registers+I810_PTE_BASE+(j*4)); | ||
273 | } | ||
274 | readl(intel_i810_private.registers+I810_PTE_BASE+((j-1)*4)); | ||
275 | break; | ||
276 | default: | ||
277 | goto out_err; | ||
255 | } | 278 | } |
256 | readl(intel_i810_private.registers+I810_PTE_BASE+((j-1)*4)); /* PCI Posting. */ | ||
257 | 279 | ||
258 | agp_bridge->driver->tlb_flush(mem); | 280 | agp_bridge->driver->tlb_flush(mem); |
259 | return 0; | 281 | out: |
282 | ret = 0; | ||
283 | out_err: | ||
284 | mem->is_flushed = 1; | ||
285 | return ret; | ||
260 | } | 286 | } |
261 | 287 | ||
262 | static int intel_i810_remove_entries(struct agp_memory *mem, off_t pg_start, | 288 | static int intel_i810_remove_entries(struct agp_memory *mem, off_t pg_start, |
@@ -337,12 +363,11 @@ static struct agp_memory *intel_i810_alloc_by_type(size_t pg_count, int type) | |||
337 | new->type = AGP_DCACHE_MEMORY; | 363 | new->type = AGP_DCACHE_MEMORY; |
338 | new->page_count = pg_count; | 364 | new->page_count = pg_count; |
339 | new->num_scratch_pages = 0; | 365 | new->num_scratch_pages = 0; |
340 | vfree(new->memory); | 366 | agp_free_page_array(new); |
341 | return new; | 367 | return new; |
342 | } | 368 | } |
343 | if (type == AGP_PHYS_MEMORY) | 369 | if (type == AGP_PHYS_MEMORY) |
344 | return alloc_agpphysmem_i8xx(pg_count, type); | 370 | return alloc_agpphysmem_i8xx(pg_count, type); |
345 | |||
346 | return NULL; | 371 | return NULL; |
347 | } | 372 | } |
348 | 373 | ||
@@ -357,7 +382,7 @@ static void intel_i810_free_by_type(struct agp_memory *curr) | |||
357 | gart_to_virt(curr->memory[0])); | 382 | gart_to_virt(curr->memory[0])); |
358 | global_flush_tlb(); | 383 | global_flush_tlb(); |
359 | } | 384 | } |
360 | vfree(curr->memory); | 385 | agp_free_page_array(curr); |
361 | } | 386 | } |
362 | kfree(curr); | 387 | kfree(curr); |
363 | } | 388 | } |
@@ -619,9 +644,11 @@ static int intel_i830_insert_entries(struct agp_memory *mem,off_t pg_start, int | |||
619 | { | 644 | { |
620 | int i,j,num_entries; | 645 | int i,j,num_entries; |
621 | void *temp; | 646 | void *temp; |
647 | int ret = -EINVAL; | ||
648 | int mask_type; | ||
622 | 649 | ||
623 | if (mem->page_count == 0) | 650 | if (mem->page_count == 0) |
624 | return 0; | 651 | goto out; |
625 | 652 | ||
626 | temp = agp_bridge->current_size; | 653 | temp = agp_bridge->current_size; |
627 | num_entries = A_SIZE_FIX(temp)->num_entries; | 654 | num_entries = A_SIZE_FIX(temp)->num_entries; |
@@ -631,34 +658,41 @@ static int intel_i830_insert_entries(struct agp_memory *mem,off_t pg_start, int | |||
631 | pg_start,intel_i830_private.gtt_entries); | 658 | pg_start,intel_i830_private.gtt_entries); |
632 | 659 | ||
633 | printk (KERN_INFO PFX "Trying to insert into local/stolen memory\n"); | 660 | printk (KERN_INFO PFX "Trying to insert into local/stolen memory\n"); |
634 | return -EINVAL; | 661 | goto out_err; |
635 | } | 662 | } |
636 | 663 | ||
637 | if ((pg_start + mem->page_count) > num_entries) | 664 | if ((pg_start + mem->page_count) > num_entries) |
638 | return -EINVAL; | 665 | goto out_err; |
639 | 666 | ||
640 | /* The i830 can't check the GTT for entries since its read only, | 667 | /* The i830 can't check the GTT for entries since its read only, |
641 | * depend on the caller to make the correct offset decisions. | 668 | * depend on the caller to make the correct offset decisions. |
642 | */ | 669 | */ |
643 | 670 | ||
644 | if ((type != 0 && type != AGP_PHYS_MEMORY) || | 671 | if (type != mem->type) |
645 | (mem->type != 0 && mem->type != AGP_PHYS_MEMORY)) | 672 | goto out_err; |
646 | return -EINVAL; | 673 | |
674 | mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type); | ||
647 | 675 | ||
648 | if (!mem->is_flushed) { | 676 | if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY && |
677 | mask_type != INTEL_AGP_CACHED_MEMORY) | ||
678 | goto out_err; | ||
679 | |||
680 | if (!mem->is_flushed) | ||
649 | global_cache_flush(); | 681 | global_cache_flush(); |
650 | mem->is_flushed = TRUE; | ||
651 | } | ||
652 | 682 | ||
653 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { | 683 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { |
654 | writel(agp_bridge->driver->mask_memory(agp_bridge, | 684 | writel(agp_bridge->driver->mask_memory(agp_bridge, |
655 | mem->memory[i], mem->type), | 685 | mem->memory[i], mask_type), |
656 | intel_i830_private.registers+I810_PTE_BASE+(j*4)); | 686 | intel_i830_private.registers+I810_PTE_BASE+(j*4)); |
657 | } | 687 | } |
658 | readl(intel_i830_private.registers+I810_PTE_BASE+((j-1)*4)); | 688 | readl(intel_i830_private.registers+I810_PTE_BASE+((j-1)*4)); |
659 | |||
660 | agp_bridge->driver->tlb_flush(mem); | 689 | agp_bridge->driver->tlb_flush(mem); |
661 | return 0; | 690 | |
691 | out: | ||
692 | ret = 0; | ||
693 | out_err: | ||
694 | mem->is_flushed = 1; | ||
695 | return ret; | ||
662 | } | 696 | } |
663 | 697 | ||
664 | static int intel_i830_remove_entries(struct agp_memory *mem,off_t pg_start, | 698 | static int intel_i830_remove_entries(struct agp_memory *mem,off_t pg_start, |
@@ -687,7 +721,6 @@ static struct agp_memory *intel_i830_alloc_by_type(size_t pg_count,int type) | |||
687 | { | 721 | { |
688 | if (type == AGP_PHYS_MEMORY) | 722 | if (type == AGP_PHYS_MEMORY) |
689 | return alloc_agpphysmem_i8xx(pg_count, type); | 723 | return alloc_agpphysmem_i8xx(pg_count, type); |
690 | |||
691 | /* always return NULL for other allocation types for now */ | 724 | /* always return NULL for other allocation types for now */ |
692 | return NULL; | 725 | return NULL; |
693 | } | 726 | } |
@@ -734,9 +767,11 @@ static int intel_i915_insert_entries(struct agp_memory *mem,off_t pg_start, | |||
734 | { | 767 | { |
735 | int i,j,num_entries; | 768 | int i,j,num_entries; |
736 | void *temp; | 769 | void *temp; |
770 | int ret = -EINVAL; | ||
771 | int mask_type; | ||
737 | 772 | ||
738 | if (mem->page_count == 0) | 773 | if (mem->page_count == 0) |
739 | return 0; | 774 | goto out; |
740 | 775 | ||
741 | temp = agp_bridge->current_size; | 776 | temp = agp_bridge->current_size; |
742 | num_entries = A_SIZE_FIX(temp)->num_entries; | 777 | num_entries = A_SIZE_FIX(temp)->num_entries; |
@@ -746,33 +781,41 @@ static int intel_i915_insert_entries(struct agp_memory *mem,off_t pg_start, | |||
746 | pg_start,intel_i830_private.gtt_entries); | 781 | pg_start,intel_i830_private.gtt_entries); |
747 | 782 | ||
748 | printk (KERN_INFO PFX "Trying to insert into local/stolen memory\n"); | 783 | printk (KERN_INFO PFX "Trying to insert into local/stolen memory\n"); |
749 | return -EINVAL; | 784 | goto out_err; |
750 | } | 785 | } |
751 | 786 | ||
752 | if ((pg_start + mem->page_count) > num_entries) | 787 | if ((pg_start + mem->page_count) > num_entries) |
753 | return -EINVAL; | 788 | goto out_err; |
754 | 789 | ||
755 | /* The i830 can't check the GTT for entries since its read only, | 790 | /* The i915 can't check the GTT for entries since its read only, |
756 | * depend on the caller to make the correct offset decisions. | 791 | * depend on the caller to make the correct offset decisions. |
757 | */ | 792 | */ |
758 | 793 | ||
759 | if ((type != 0 && type != AGP_PHYS_MEMORY) || | 794 | if (type != mem->type) |
760 | (mem->type != 0 && mem->type != AGP_PHYS_MEMORY)) | 795 | goto out_err; |
761 | return -EINVAL; | 796 | |
797 | mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type); | ||
762 | 798 | ||
763 | if (!mem->is_flushed) { | 799 | if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY && |
800 | mask_type != INTEL_AGP_CACHED_MEMORY) | ||
801 | goto out_err; | ||
802 | |||
803 | if (!mem->is_flushed) | ||
764 | global_cache_flush(); | 804 | global_cache_flush(); |
765 | mem->is_flushed = TRUE; | ||
766 | } | ||
767 | 805 | ||
768 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { | 806 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { |
769 | writel(agp_bridge->driver->mask_memory(agp_bridge, | 807 | writel(agp_bridge->driver->mask_memory(agp_bridge, |
770 | mem->memory[i], mem->type), intel_i830_private.gtt+j); | 808 | mem->memory[i], mask_type), intel_i830_private.gtt+j); |
771 | } | 809 | } |
772 | readl(intel_i830_private.gtt+j-1); | ||
773 | 810 | ||
811 | readl(intel_i830_private.gtt+j-1); | ||
774 | agp_bridge->driver->tlb_flush(mem); | 812 | agp_bridge->driver->tlb_flush(mem); |
775 | return 0; | 813 | |
814 | out: | ||
815 | ret = 0; | ||
816 | out_err: | ||
817 | mem->is_flushed = 1; | ||
818 | return ret; | ||
776 | } | 819 | } |
777 | 820 | ||
778 | static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start, | 821 | static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start, |
@@ -803,7 +846,7 @@ static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start, | |||
803 | */ | 846 | */ |
804 | static int intel_i9xx_fetch_size(void) | 847 | static int intel_i9xx_fetch_size(void) |
805 | { | 848 | { |
806 | int num_sizes = sizeof(intel_i830_sizes) / sizeof(*intel_i830_sizes); | 849 | int num_sizes = ARRAY_SIZE(intel_i830_sizes); |
807 | int aper_size; /* size in megabytes */ | 850 | int aper_size; /* size in megabytes */ |
808 | int i; | 851 | int i; |
809 | 852 | ||
@@ -1384,6 +1427,7 @@ static struct agp_bridge_driver intel_generic_driver = { | |||
1384 | .free_by_type = agp_generic_free_by_type, | 1427 | .free_by_type = agp_generic_free_by_type, |
1385 | .agp_alloc_page = agp_generic_alloc_page, | 1428 | .agp_alloc_page = agp_generic_alloc_page, |
1386 | .agp_destroy_page = agp_generic_destroy_page, | 1429 | .agp_destroy_page = agp_generic_destroy_page, |
1430 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1387 | }; | 1431 | }; |
1388 | 1432 | ||
1389 | static struct agp_bridge_driver intel_810_driver = { | 1433 | static struct agp_bridge_driver intel_810_driver = { |
@@ -1408,6 +1452,7 @@ static struct agp_bridge_driver intel_810_driver = { | |||
1408 | .free_by_type = intel_i810_free_by_type, | 1452 | .free_by_type = intel_i810_free_by_type, |
1409 | .agp_alloc_page = agp_generic_alloc_page, | 1453 | .agp_alloc_page = agp_generic_alloc_page, |
1410 | .agp_destroy_page = agp_generic_destroy_page, | 1454 | .agp_destroy_page = agp_generic_destroy_page, |
1455 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1411 | }; | 1456 | }; |
1412 | 1457 | ||
1413 | static struct agp_bridge_driver intel_815_driver = { | 1458 | static struct agp_bridge_driver intel_815_driver = { |
@@ -1431,6 +1476,7 @@ static struct agp_bridge_driver intel_815_driver = { | |||
1431 | .free_by_type = agp_generic_free_by_type, | 1476 | .free_by_type = agp_generic_free_by_type, |
1432 | .agp_alloc_page = agp_generic_alloc_page, | 1477 | .agp_alloc_page = agp_generic_alloc_page, |
1433 | .agp_destroy_page = agp_generic_destroy_page, | 1478 | .agp_destroy_page = agp_generic_destroy_page, |
1479 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1434 | }; | 1480 | }; |
1435 | 1481 | ||
1436 | static struct agp_bridge_driver intel_830_driver = { | 1482 | static struct agp_bridge_driver intel_830_driver = { |
@@ -1455,6 +1501,7 @@ static struct agp_bridge_driver intel_830_driver = { | |||
1455 | .free_by_type = intel_i810_free_by_type, | 1501 | .free_by_type = intel_i810_free_by_type, |
1456 | .agp_alloc_page = agp_generic_alloc_page, | 1502 | .agp_alloc_page = agp_generic_alloc_page, |
1457 | .agp_destroy_page = agp_generic_destroy_page, | 1503 | .agp_destroy_page = agp_generic_destroy_page, |
1504 | .agp_type_to_mask_type = intel_i830_type_to_mask_type, | ||
1458 | }; | 1505 | }; |
1459 | 1506 | ||
1460 | static struct agp_bridge_driver intel_820_driver = { | 1507 | static struct agp_bridge_driver intel_820_driver = { |
@@ -1478,6 +1525,7 @@ static struct agp_bridge_driver intel_820_driver = { | |||
1478 | .free_by_type = agp_generic_free_by_type, | 1525 | .free_by_type = agp_generic_free_by_type, |
1479 | .agp_alloc_page = agp_generic_alloc_page, | 1526 | .agp_alloc_page = agp_generic_alloc_page, |
1480 | .agp_destroy_page = agp_generic_destroy_page, | 1527 | .agp_destroy_page = agp_generic_destroy_page, |
1528 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1481 | }; | 1529 | }; |
1482 | 1530 | ||
1483 | static struct agp_bridge_driver intel_830mp_driver = { | 1531 | static struct agp_bridge_driver intel_830mp_driver = { |
@@ -1501,6 +1549,7 @@ static struct agp_bridge_driver intel_830mp_driver = { | |||
1501 | .free_by_type = agp_generic_free_by_type, | 1549 | .free_by_type = agp_generic_free_by_type, |
1502 | .agp_alloc_page = agp_generic_alloc_page, | 1550 | .agp_alloc_page = agp_generic_alloc_page, |
1503 | .agp_destroy_page = agp_generic_destroy_page, | 1551 | .agp_destroy_page = agp_generic_destroy_page, |
1552 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1504 | }; | 1553 | }; |
1505 | 1554 | ||
1506 | static struct agp_bridge_driver intel_840_driver = { | 1555 | static struct agp_bridge_driver intel_840_driver = { |
@@ -1524,6 +1573,7 @@ static struct agp_bridge_driver intel_840_driver = { | |||
1524 | .free_by_type = agp_generic_free_by_type, | 1573 | .free_by_type = agp_generic_free_by_type, |
1525 | .agp_alloc_page = agp_generic_alloc_page, | 1574 | .agp_alloc_page = agp_generic_alloc_page, |
1526 | .agp_destroy_page = agp_generic_destroy_page, | 1575 | .agp_destroy_page = agp_generic_destroy_page, |
1576 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1527 | }; | 1577 | }; |
1528 | 1578 | ||
1529 | static struct agp_bridge_driver intel_845_driver = { | 1579 | static struct agp_bridge_driver intel_845_driver = { |
@@ -1547,6 +1597,7 @@ static struct agp_bridge_driver intel_845_driver = { | |||
1547 | .free_by_type = agp_generic_free_by_type, | 1597 | .free_by_type = agp_generic_free_by_type, |
1548 | .agp_alloc_page = agp_generic_alloc_page, | 1598 | .agp_alloc_page = agp_generic_alloc_page, |
1549 | .agp_destroy_page = agp_generic_destroy_page, | 1599 | .agp_destroy_page = agp_generic_destroy_page, |
1600 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1550 | }; | 1601 | }; |
1551 | 1602 | ||
1552 | static struct agp_bridge_driver intel_850_driver = { | 1603 | static struct agp_bridge_driver intel_850_driver = { |
@@ -1570,6 +1621,7 @@ static struct agp_bridge_driver intel_850_driver = { | |||
1570 | .free_by_type = agp_generic_free_by_type, | 1621 | .free_by_type = agp_generic_free_by_type, |
1571 | .agp_alloc_page = agp_generic_alloc_page, | 1622 | .agp_alloc_page = agp_generic_alloc_page, |
1572 | .agp_destroy_page = agp_generic_destroy_page, | 1623 | .agp_destroy_page = agp_generic_destroy_page, |
1624 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1573 | }; | 1625 | }; |
1574 | 1626 | ||
1575 | static struct agp_bridge_driver intel_860_driver = { | 1627 | static struct agp_bridge_driver intel_860_driver = { |
@@ -1593,6 +1645,7 @@ static struct agp_bridge_driver intel_860_driver = { | |||
1593 | .free_by_type = agp_generic_free_by_type, | 1645 | .free_by_type = agp_generic_free_by_type, |
1594 | .agp_alloc_page = agp_generic_alloc_page, | 1646 | .agp_alloc_page = agp_generic_alloc_page, |
1595 | .agp_destroy_page = agp_generic_destroy_page, | 1647 | .agp_destroy_page = agp_generic_destroy_page, |
1648 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1596 | }; | 1649 | }; |
1597 | 1650 | ||
1598 | static struct agp_bridge_driver intel_915_driver = { | 1651 | static struct agp_bridge_driver intel_915_driver = { |
@@ -1617,6 +1670,7 @@ static struct agp_bridge_driver intel_915_driver = { | |||
1617 | .free_by_type = intel_i810_free_by_type, | 1670 | .free_by_type = intel_i810_free_by_type, |
1618 | .agp_alloc_page = agp_generic_alloc_page, | 1671 | .agp_alloc_page = agp_generic_alloc_page, |
1619 | .agp_destroy_page = agp_generic_destroy_page, | 1672 | .agp_destroy_page = agp_generic_destroy_page, |
1673 | .agp_type_to_mask_type = intel_i830_type_to_mask_type, | ||
1620 | }; | 1674 | }; |
1621 | 1675 | ||
1622 | static struct agp_bridge_driver intel_i965_driver = { | 1676 | static struct agp_bridge_driver intel_i965_driver = { |
@@ -1641,6 +1695,7 @@ static struct agp_bridge_driver intel_i965_driver = { | |||
1641 | .free_by_type = intel_i810_free_by_type, | 1695 | .free_by_type = intel_i810_free_by_type, |
1642 | .agp_alloc_page = agp_generic_alloc_page, | 1696 | .agp_alloc_page = agp_generic_alloc_page, |
1643 | .agp_destroy_page = agp_generic_destroy_page, | 1697 | .agp_destroy_page = agp_generic_destroy_page, |
1698 | .agp_type_to_mask_type = intel_i830_type_to_mask_type, | ||
1644 | }; | 1699 | }; |
1645 | 1700 | ||
1646 | static struct agp_bridge_driver intel_7505_driver = { | 1701 | static struct agp_bridge_driver intel_7505_driver = { |
@@ -1664,6 +1719,7 @@ static struct agp_bridge_driver intel_7505_driver = { | |||
1664 | .free_by_type = agp_generic_free_by_type, | 1719 | .free_by_type = agp_generic_free_by_type, |
1665 | .agp_alloc_page = agp_generic_alloc_page, | 1720 | .agp_alloc_page = agp_generic_alloc_page, |
1666 | .agp_destroy_page = agp_generic_destroy_page, | 1721 | .agp_destroy_page = agp_generic_destroy_page, |
1722 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
1667 | }; | 1723 | }; |
1668 | 1724 | ||
1669 | static int find_i810(u16 device) | 1725 | static int find_i810(u16 device) |
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c index df7f37b2739a..2563286b2fcf 100644 --- a/drivers/char/agp/nvidia-agp.c +++ b/drivers/char/agp/nvidia-agp.c | |||
@@ -310,6 +310,7 @@ static struct agp_bridge_driver nvidia_driver = { | |||
310 | .free_by_type = agp_generic_free_by_type, | 310 | .free_by_type = agp_generic_free_by_type, |
311 | .agp_alloc_page = agp_generic_alloc_page, | 311 | .agp_alloc_page = agp_generic_alloc_page, |
312 | .agp_destroy_page = agp_generic_destroy_page, | 312 | .agp_destroy_page = agp_generic_destroy_page, |
313 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
313 | }; | 314 | }; |
314 | 315 | ||
315 | static int __devinit agp_nvidia_probe(struct pci_dev *pdev, | 316 | static int __devinit agp_nvidia_probe(struct pci_dev *pdev, |
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index 17c50b0f83f0..b7b4590673ae 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c | |||
@@ -228,6 +228,7 @@ struct agp_bridge_driver parisc_agp_driver = { | |||
228 | .free_by_type = agp_generic_free_by_type, | 228 | .free_by_type = agp_generic_free_by_type, |
229 | .agp_alloc_page = agp_generic_alloc_page, | 229 | .agp_alloc_page = agp_generic_alloc_page, |
230 | .agp_destroy_page = agp_generic_destroy_page, | 230 | .agp_destroy_page = agp_generic_destroy_page, |
231 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
231 | .cant_use_aperture = 1, | 232 | .cant_use_aperture = 1, |
232 | }; | 233 | }; |
233 | 234 | ||
diff --git a/drivers/char/agp/sgi-agp.c b/drivers/char/agp/sgi-agp.c index 902648db7efa..92d1dc45b9be 100644 --- a/drivers/char/agp/sgi-agp.c +++ b/drivers/char/agp/sgi-agp.c | |||
@@ -265,6 +265,7 @@ struct agp_bridge_driver sgi_tioca_driver = { | |||
265 | .free_by_type = agp_generic_free_by_type, | 265 | .free_by_type = agp_generic_free_by_type, |
266 | .agp_alloc_page = sgi_tioca_alloc_page, | 266 | .agp_alloc_page = sgi_tioca_alloc_page, |
267 | .agp_destroy_page = agp_generic_destroy_page, | 267 | .agp_destroy_page = agp_generic_destroy_page, |
268 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
268 | .cant_use_aperture = 1, | 269 | .cant_use_aperture = 1, |
269 | .needs_scratch_page = 0, | 270 | .needs_scratch_page = 0, |
270 | .num_aperture_sizes = 1, | 271 | .num_aperture_sizes = 1, |
diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c index a00fd48a6f05..60342b708152 100644 --- a/drivers/char/agp/sis-agp.c +++ b/drivers/char/agp/sis-agp.c | |||
@@ -140,6 +140,7 @@ static struct agp_bridge_driver sis_driver = { | |||
140 | .free_by_type = agp_generic_free_by_type, | 140 | .free_by_type = agp_generic_free_by_type, |
141 | .agp_alloc_page = agp_generic_alloc_page, | 141 | .agp_alloc_page = agp_generic_alloc_page, |
142 | .agp_destroy_page = agp_generic_destroy_page, | 142 | .agp_destroy_page = agp_generic_destroy_page, |
143 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
143 | }; | 144 | }; |
144 | 145 | ||
145 | static struct agp_device_ids sis_agp_device_ids[] __devinitdata = | 146 | static struct agp_device_ids sis_agp_device_ids[] __devinitdata = |
diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c index 4f2d7d99902f..9f5ae7714f85 100644 --- a/drivers/char/agp/sworks-agp.c +++ b/drivers/char/agp/sworks-agp.c | |||
@@ -444,6 +444,7 @@ static struct agp_bridge_driver sworks_driver = { | |||
444 | .free_by_type = agp_generic_free_by_type, | 444 | .free_by_type = agp_generic_free_by_type, |
445 | .agp_alloc_page = agp_generic_alloc_page, | 445 | .agp_alloc_page = agp_generic_alloc_page, |
446 | .agp_destroy_page = agp_generic_destroy_page, | 446 | .agp_destroy_page = agp_generic_destroy_page, |
447 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
447 | }; | 448 | }; |
448 | 449 | ||
449 | static int __devinit agp_serverworks_probe(struct pci_dev *pdev, | 450 | static int __devinit agp_serverworks_probe(struct pci_dev *pdev, |
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c index dffc19382f7e..6c45702e542c 100644 --- a/drivers/char/agp/uninorth-agp.c +++ b/drivers/char/agp/uninorth-agp.c | |||
@@ -510,6 +510,7 @@ struct agp_bridge_driver uninorth_agp_driver = { | |||
510 | .free_by_type = agp_generic_free_by_type, | 510 | .free_by_type = agp_generic_free_by_type, |
511 | .agp_alloc_page = agp_generic_alloc_page, | 511 | .agp_alloc_page = agp_generic_alloc_page, |
512 | .agp_destroy_page = agp_generic_destroy_page, | 512 | .agp_destroy_page = agp_generic_destroy_page, |
513 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
513 | .cant_use_aperture = 1, | 514 | .cant_use_aperture = 1, |
514 | }; | 515 | }; |
515 | 516 | ||
@@ -534,6 +535,7 @@ struct agp_bridge_driver u3_agp_driver = { | |||
534 | .free_by_type = agp_generic_free_by_type, | 535 | .free_by_type = agp_generic_free_by_type, |
535 | .agp_alloc_page = agp_generic_alloc_page, | 536 | .agp_alloc_page = agp_generic_alloc_page, |
536 | .agp_destroy_page = agp_generic_destroy_page, | 537 | .agp_destroy_page = agp_generic_destroy_page, |
538 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
537 | .cant_use_aperture = 1, | 539 | .cant_use_aperture = 1, |
538 | .needs_scratch_page = 1, | 540 | .needs_scratch_page = 1, |
539 | }; | 541 | }; |
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c index 2ded7a280d7f..2e7c04370cd9 100644 --- a/drivers/char/agp/via-agp.c +++ b/drivers/char/agp/via-agp.c | |||
@@ -191,6 +191,7 @@ static struct agp_bridge_driver via_agp3_driver = { | |||
191 | .free_by_type = agp_generic_free_by_type, | 191 | .free_by_type = agp_generic_free_by_type, |
192 | .agp_alloc_page = agp_generic_alloc_page, | 192 | .agp_alloc_page = agp_generic_alloc_page, |
193 | .agp_destroy_page = agp_generic_destroy_page, | 193 | .agp_destroy_page = agp_generic_destroy_page, |
194 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
194 | }; | 195 | }; |
195 | 196 | ||
196 | static struct agp_bridge_driver via_driver = { | 197 | static struct agp_bridge_driver via_driver = { |
@@ -214,6 +215,7 @@ static struct agp_bridge_driver via_driver = { | |||
214 | .free_by_type = agp_generic_free_by_type, | 215 | .free_by_type = agp_generic_free_by_type, |
215 | .agp_alloc_page = agp_generic_alloc_page, | 216 | .agp_alloc_page = agp_generic_alloc_page, |
216 | .agp_destroy_page = agp_generic_destroy_page, | 217 | .agp_destroy_page = agp_generic_destroy_page, |
218 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, | ||
217 | }; | 219 | }; |
218 | 220 | ||
219 | static struct agp_device_ids via_agp_device_ids[] __devinitdata = | 221 | static struct agp_device_ids via_agp_device_ids[] __devinitdata = |
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index 1aa93a752a9c..ae76a9ffe89f 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c | |||
@@ -117,7 +117,7 @@ __setup("hcheck_reboot", hangcheck_parse_reboot); | |||
117 | __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks); | 117 | __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks); |
118 | #endif /* not MODULE */ | 118 | #endif /* not MODULE */ |
119 | 119 | ||
120 | #if defined(CONFIG_X86_64) || defined(CONFIG_S390) | 120 | #if defined(CONFIG_S390) |
121 | # define HAVE_MONOTONIC | 121 | # define HAVE_MONOTONIC |
122 | # define TIMER_FREQ 1000000000ULL | 122 | # define TIMER_FREQ 1000000000ULL |
123 | #elif defined(CONFIG_IA64) | 123 | #elif defined(CONFIG_IA64) |
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index be73c80d699d..1d8c4ae61551 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/workqueue.h> | 36 | #include <linux/workqueue.h> |
37 | #include <linux/kexec.h> | 37 | #include <linux/kexec.h> |
38 | #include <linux/irq.h> | 38 | #include <linux/irq.h> |
39 | #include <linux/hrtimer.h> | ||
39 | 40 | ||
40 | #include <asm/ptrace.h> | 41 | #include <asm/ptrace.h> |
41 | #include <asm/irq_regs.h> | 42 | #include <asm/irq_regs.h> |
@@ -158,6 +159,17 @@ static struct sysrq_key_op sysrq_sync_op = { | |||
158 | .enable_mask = SYSRQ_ENABLE_SYNC, | 159 | .enable_mask = SYSRQ_ENABLE_SYNC, |
159 | }; | 160 | }; |
160 | 161 | ||
162 | static void sysrq_handle_show_timers(int key, struct tty_struct *tty) | ||
163 | { | ||
164 | sysrq_timer_list_show(); | ||
165 | } | ||
166 | |||
167 | static struct sysrq_key_op sysrq_show_timers_op = { | ||
168 | .handler = sysrq_handle_show_timers, | ||
169 | .help_msg = "show-all-timers(Q)", | ||
170 | .action_msg = "Show Pending Timers", | ||
171 | }; | ||
172 | |||
161 | static void sysrq_handle_mountro(int key, struct tty_struct *tty) | 173 | static void sysrq_handle_mountro(int key, struct tty_struct *tty) |
162 | { | 174 | { |
163 | emergency_remount(); | 175 | emergency_remount(); |
@@ -335,7 +347,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { | |||
335 | /* o: This will often be registered as 'Off' at init time */ | 347 | /* o: This will often be registered as 'Off' at init time */ |
336 | NULL, /* o */ | 348 | NULL, /* o */ |
337 | &sysrq_showregs_op, /* p */ | 349 | &sysrq_showregs_op, /* p */ |
338 | NULL, /* q */ | 350 | &sysrq_show_timers_op, /* q */ |
339 | &sysrq_unraw_op, /* r */ | 351 | &sysrq_unraw_op, /* r */ |
340 | &sysrq_sync_op, /* s */ | 352 | &sysrq_sync_op, /* s */ |
341 | &sysrq_showstate_op, /* t */ | 353 | &sysrq_showstate_op, /* t */ |
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index b6bcdbbf57b3..ccaa6a39cb4b 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c | |||
@@ -16,15 +16,13 @@ | |||
16 | * This file is licensed under the GPL v2. | 16 | * This file is licensed under the GPL v2. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/acpi_pmtmr.h> | ||
19 | #include <linux/clocksource.h> | 20 | #include <linux/clocksource.h> |
20 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
21 | #include <linux/init.h> | 22 | #include <linux/init.h> |
22 | #include <linux/pci.h> | 23 | #include <linux/pci.h> |
23 | #include <asm/io.h> | 24 | #include <asm/io.h> |
24 | 25 | ||
25 | /* Number of PMTMR ticks expected during calibration run */ | ||
26 | #define PMTMR_TICKS_PER_SEC 3579545 | ||
27 | |||
28 | /* | 26 | /* |
29 | * The I/O port the PMTMR resides at. | 27 | * The I/O port the PMTMR resides at. |
30 | * The location is detected during setup_arch(), | 28 | * The location is detected during setup_arch(), |
@@ -32,15 +30,13 @@ | |||
32 | */ | 30 | */ |
33 | u32 pmtmr_ioport __read_mostly; | 31 | u32 pmtmr_ioport __read_mostly; |
34 | 32 | ||
35 | #define ACPI_PM_MASK CLOCKSOURCE_MASK(24) /* limit it to 24 bits */ | ||
36 | |||
37 | static inline u32 read_pmtmr(void) | 33 | static inline u32 read_pmtmr(void) |
38 | { | 34 | { |
39 | /* mask the output to 24 bits */ | 35 | /* mask the output to 24 bits */ |
40 | return inl(pmtmr_ioport) & ACPI_PM_MASK; | 36 | return inl(pmtmr_ioport) & ACPI_PM_MASK; |
41 | } | 37 | } |
42 | 38 | ||
43 | static cycle_t acpi_pm_read_verified(void) | 39 | u32 acpi_pm_read_verified(void) |
44 | { | 40 | { |
45 | u32 v1 = 0, v2 = 0, v3 = 0; | 41 | u32 v1 = 0, v2 = 0, v3 = 0; |
46 | 42 | ||
@@ -57,7 +53,12 @@ static cycle_t acpi_pm_read_verified(void) | |||
57 | } while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) | 53 | } while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) |
58 | || (v3 > v1 && v3 < v2))); | 54 | || (v3 > v1 && v3 < v2))); |
59 | 55 | ||
60 | return (cycle_t)v2; | 56 | return v2; |
57 | } | ||
58 | |||
59 | static cycle_t acpi_pm_read_slow(void) | ||
60 | { | ||
61 | return (cycle_t)acpi_pm_read_verified(); | ||
61 | } | 62 | } |
62 | 63 | ||
63 | static cycle_t acpi_pm_read(void) | 64 | static cycle_t acpi_pm_read(void) |
@@ -72,7 +73,8 @@ static struct clocksource clocksource_acpi_pm = { | |||
72 | .mask = (cycle_t)ACPI_PM_MASK, | 73 | .mask = (cycle_t)ACPI_PM_MASK, |
73 | .mult = 0, /*to be caluclated*/ | 74 | .mult = 0, /*to be caluclated*/ |
74 | .shift = 22, | 75 | .shift = 22, |
75 | .is_continuous = 1, | 76 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
77 | |||
76 | }; | 78 | }; |
77 | 79 | ||
78 | 80 | ||
@@ -87,7 +89,7 @@ __setup("acpi_pm_good", acpi_pm_good_setup); | |||
87 | 89 | ||
88 | static inline void acpi_pm_need_workaround(void) | 90 | static inline void acpi_pm_need_workaround(void) |
89 | { | 91 | { |
90 | clocksource_acpi_pm.read = acpi_pm_read_verified; | 92 | clocksource_acpi_pm.read = acpi_pm_read_slow; |
91 | clocksource_acpi_pm.rating = 110; | 93 | clocksource_acpi_pm.rating = 110; |
92 | } | 94 | } |
93 | 95 | ||
diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index bf4d3d50d1c4..4f3925ceb360 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c | |||
@@ -31,7 +31,7 @@ static struct clocksource clocksource_cyclone = { | |||
31 | .mask = CYCLONE_TIMER_MASK, | 31 | .mask = CYCLONE_TIMER_MASK, |
32 | .mult = 10, | 32 | .mult = 10, |
33 | .shift = 0, | 33 | .shift = 0, |
34 | .is_continuous = 1, | 34 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
35 | }; | 35 | }; |
36 | 36 | ||
37 | static int __init init_cyclone_clocksource(void) | 37 | static int __init init_cyclone_clocksource(void) |
diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c index 22915cc46ba7..b92da677aa5d 100644 --- a/drivers/clocksource/scx200_hrt.c +++ b/drivers/clocksource/scx200_hrt.c | |||
@@ -57,7 +57,7 @@ static struct clocksource cs_hrt = { | |||
57 | .rating = 250, | 57 | .rating = 250, |
58 | .read = read_hrt, | 58 | .read = read_hrt, |
59 | .mask = CLOCKSOURCE_MASK(32), | 59 | .mask = CLOCKSOURCE_MASK(32), |
60 | .is_continuous = 1, | 60 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
61 | /* mult, shift are set based on mhz27 flag */ | 61 | /* mult, shift are set based on mhz27 flag */ |
62 | }; | 62 | }; |
63 | 63 | ||
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 491779af8d55..d155e81b5c97 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig | |||
@@ -16,7 +16,7 @@ config CPU_FREQ | |||
16 | if CPU_FREQ | 16 | if CPU_FREQ |
17 | 17 | ||
18 | config CPU_FREQ_TABLE | 18 | config CPU_FREQ_TABLE |
19 | def_tristate m | 19 | tristate |
20 | 20 | ||
21 | config CPU_FREQ_DEBUG | 21 | config CPU_FREQ_DEBUG |
22 | bool "Enable CPUfreq debugging" | 22 | bool "Enable CPUfreq debugging" |
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a45cc89e387a..f52facc570f5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c | |||
@@ -41,8 +41,67 @@ static struct cpufreq_driver *cpufreq_driver; | |||
41 | static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS]; | 41 | static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS]; |
42 | static DEFINE_SPINLOCK(cpufreq_driver_lock); | 42 | static DEFINE_SPINLOCK(cpufreq_driver_lock); |
43 | 43 | ||
44 | /* | ||
45 | * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure | ||
46 | * all cpufreq/hotplug/workqueue/etc related lock issues. | ||
47 | * | ||
48 | * The rules for this semaphore: | ||
49 | * - Any routine that wants to read from the policy structure will | ||
50 | * do a down_read on this semaphore. | ||
51 | * - Any routine that will write to the policy structure and/or may take away | ||
52 | * the policy altogether (eg. CPU hotplug), will hold this lock in write | ||
53 | * mode before doing so. | ||
54 | * | ||
55 | * Additional rules: | ||
56 | * - All holders of the lock should check to make sure that the CPU they | ||
57 | * are concerned with are online after they get the lock. | ||
58 | * - Governor routines that can be called in cpufreq hotplug path should not | ||
59 | * take this sem as top level hotplug notifier handler takes this. | ||
60 | */ | ||
61 | static DEFINE_PER_CPU(int, policy_cpu); | ||
62 | static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); | ||
63 | |||
64 | #define lock_policy_rwsem(mode, cpu) \ | ||
65 | int lock_policy_rwsem_##mode \ | ||
66 | (int cpu) \ | ||
67 | { \ | ||
68 | int policy_cpu = per_cpu(policy_cpu, cpu); \ | ||
69 | BUG_ON(policy_cpu == -1); \ | ||
70 | down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ | ||
71 | if (unlikely(!cpu_online(cpu))) { \ | ||
72 | up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ | ||
73 | return -1; \ | ||
74 | } \ | ||
75 | \ | ||
76 | return 0; \ | ||
77 | } | ||
78 | |||
79 | lock_policy_rwsem(read, cpu); | ||
80 | EXPORT_SYMBOL_GPL(lock_policy_rwsem_read); | ||
81 | |||
82 | lock_policy_rwsem(write, cpu); | ||
83 | EXPORT_SYMBOL_GPL(lock_policy_rwsem_write); | ||
84 | |||
85 | void unlock_policy_rwsem_read(int cpu) | ||
86 | { | ||
87 | int policy_cpu = per_cpu(policy_cpu, cpu); | ||
88 | BUG_ON(policy_cpu == -1); | ||
89 | up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); | ||
90 | } | ||
91 | EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read); | ||
92 | |||
93 | void unlock_policy_rwsem_write(int cpu) | ||
94 | { | ||
95 | int policy_cpu = per_cpu(policy_cpu, cpu); | ||
96 | BUG_ON(policy_cpu == -1); | ||
97 | up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); | ||
98 | } | ||
99 | EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write); | ||
100 | |||
101 | |||
44 | /* internal prototypes */ | 102 | /* internal prototypes */ |
45 | static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); | 103 | static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); |
104 | static unsigned int __cpufreq_get(unsigned int cpu); | ||
46 | static void handle_update(struct work_struct *work); | 105 | static void handle_update(struct work_struct *work); |
47 | 106 | ||
48 | /** | 107 | /** |
@@ -415,12 +474,8 @@ static ssize_t store_##file_name \ | |||
415 | if (ret != 1) \ | 474 | if (ret != 1) \ |
416 | return -EINVAL; \ | 475 | return -EINVAL; \ |
417 | \ | 476 | \ |
418 | lock_cpu_hotplug(); \ | ||
419 | mutex_lock(&policy->lock); \ | ||
420 | ret = __cpufreq_set_policy(policy, &new_policy); \ | 477 | ret = __cpufreq_set_policy(policy, &new_policy); \ |
421 | policy->user_policy.object = policy->object; \ | 478 | policy->user_policy.object = policy->object; \ |
422 | mutex_unlock(&policy->lock); \ | ||
423 | unlock_cpu_hotplug(); \ | ||
424 | \ | 479 | \ |
425 | return ret ? ret : count; \ | 480 | return ret ? ret : count; \ |
426 | } | 481 | } |
@@ -434,7 +489,7 @@ store_one(scaling_max_freq,max); | |||
434 | static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, | 489 | static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, |
435 | char *buf) | 490 | char *buf) |
436 | { | 491 | { |
437 | unsigned int cur_freq = cpufreq_get(policy->cpu); | 492 | unsigned int cur_freq = __cpufreq_get(policy->cpu); |
438 | if (!cur_freq) | 493 | if (!cur_freq) |
439 | return sprintf(buf, "<unknown>"); | 494 | return sprintf(buf, "<unknown>"); |
440 | return sprintf(buf, "%u\n", cur_freq); | 495 | return sprintf(buf, "%u\n", cur_freq); |
@@ -479,18 +534,12 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy, | |||
479 | &new_policy.governor)) | 534 | &new_policy.governor)) |
480 | return -EINVAL; | 535 | return -EINVAL; |
481 | 536 | ||
482 | lock_cpu_hotplug(); | ||
483 | |||
484 | /* Do not use cpufreq_set_policy here or the user_policy.max | 537 | /* Do not use cpufreq_set_policy here or the user_policy.max |
485 | will be wrongly overridden */ | 538 | will be wrongly overridden */ |
486 | mutex_lock(&policy->lock); | ||
487 | ret = __cpufreq_set_policy(policy, &new_policy); | 539 | ret = __cpufreq_set_policy(policy, &new_policy); |
488 | 540 | ||
489 | policy->user_policy.policy = policy->policy; | 541 | policy->user_policy.policy = policy->policy; |
490 | policy->user_policy.governor = policy->governor; | 542 | policy->user_policy.governor = policy->governor; |
491 | mutex_unlock(&policy->lock); | ||
492 | |||
493 | unlock_cpu_hotplug(); | ||
494 | 543 | ||
495 | if (ret) | 544 | if (ret) |
496 | return ret; | 545 | return ret; |
@@ -595,11 +644,17 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf) | |||
595 | policy = cpufreq_cpu_get(policy->cpu); | 644 | policy = cpufreq_cpu_get(policy->cpu); |
596 | if (!policy) | 645 | if (!policy) |
597 | return -EINVAL; | 646 | return -EINVAL; |
647 | |||
648 | if (lock_policy_rwsem_read(policy->cpu) < 0) | ||
649 | return -EINVAL; | ||
650 | |||
598 | if (fattr->show) | 651 | if (fattr->show) |
599 | ret = fattr->show(policy, buf); | 652 | ret = fattr->show(policy, buf); |
600 | else | 653 | else |
601 | ret = -EIO; | 654 | ret = -EIO; |
602 | 655 | ||
656 | unlock_policy_rwsem_read(policy->cpu); | ||
657 | |||
603 | cpufreq_cpu_put(policy); | 658 | cpufreq_cpu_put(policy); |
604 | return ret; | 659 | return ret; |
605 | } | 660 | } |
@@ -613,11 +668,17 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr, | |||
613 | policy = cpufreq_cpu_get(policy->cpu); | 668 | policy = cpufreq_cpu_get(policy->cpu); |
614 | if (!policy) | 669 | if (!policy) |
615 | return -EINVAL; | 670 | return -EINVAL; |
671 | |||
672 | if (lock_policy_rwsem_write(policy->cpu) < 0) | ||
673 | return -EINVAL; | ||
674 | |||
616 | if (fattr->store) | 675 | if (fattr->store) |
617 | ret = fattr->store(policy, buf, count); | 676 | ret = fattr->store(policy, buf, count); |
618 | else | 677 | else |
619 | ret = -EIO; | 678 | ret = -EIO; |
620 | 679 | ||
680 | unlock_policy_rwsem_write(policy->cpu); | ||
681 | |||
621 | cpufreq_cpu_put(policy); | 682 | cpufreq_cpu_put(policy); |
622 | return ret; | 683 | return ret; |
623 | } | 684 | } |
@@ -691,8 +752,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
691 | policy->cpu = cpu; | 752 | policy->cpu = cpu; |
692 | policy->cpus = cpumask_of_cpu(cpu); | 753 | policy->cpus = cpumask_of_cpu(cpu); |
693 | 754 | ||
694 | mutex_init(&policy->lock); | 755 | /* Initially set CPU itself as the policy_cpu */ |
695 | mutex_lock(&policy->lock); | 756 | per_cpu(policy_cpu, cpu) = cpu; |
757 | lock_policy_rwsem_write(cpu); | ||
758 | |||
696 | init_completion(&policy->kobj_unregister); | 759 | init_completion(&policy->kobj_unregister); |
697 | INIT_WORK(&policy->update, handle_update); | 760 | INIT_WORK(&policy->update, handle_update); |
698 | 761 | ||
@@ -702,7 +765,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
702 | ret = cpufreq_driver->init(policy); | 765 | ret = cpufreq_driver->init(policy); |
703 | if (ret) { | 766 | if (ret) { |
704 | dprintk("initialization failed\n"); | 767 | dprintk("initialization failed\n"); |
705 | mutex_unlock(&policy->lock); | 768 | unlock_policy_rwsem_write(cpu); |
706 | goto err_out; | 769 | goto err_out; |
707 | } | 770 | } |
708 | 771 | ||
@@ -716,6 +779,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
716 | */ | 779 | */ |
717 | managed_policy = cpufreq_cpu_get(j); | 780 | managed_policy = cpufreq_cpu_get(j); |
718 | if (unlikely(managed_policy)) { | 781 | if (unlikely(managed_policy)) { |
782 | |||
783 | /* Set proper policy_cpu */ | ||
784 | unlock_policy_rwsem_write(cpu); | ||
785 | per_cpu(policy_cpu, cpu) = managed_policy->cpu; | ||
786 | |||
787 | if (lock_policy_rwsem_write(cpu) < 0) | ||
788 | goto err_out_driver_exit; | ||
789 | |||
719 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 790 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
720 | managed_policy->cpus = policy->cpus; | 791 | managed_policy->cpus = policy->cpus; |
721 | cpufreq_cpu_data[cpu] = managed_policy; | 792 | cpufreq_cpu_data[cpu] = managed_policy; |
@@ -726,13 +797,13 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
726 | &managed_policy->kobj, | 797 | &managed_policy->kobj, |
727 | "cpufreq"); | 798 | "cpufreq"); |
728 | if (ret) { | 799 | if (ret) { |
729 | mutex_unlock(&policy->lock); | 800 | unlock_policy_rwsem_write(cpu); |
730 | goto err_out_driver_exit; | 801 | goto err_out_driver_exit; |
731 | } | 802 | } |
732 | 803 | ||
733 | cpufreq_debug_enable_ratelimit(); | 804 | cpufreq_debug_enable_ratelimit(); |
734 | mutex_unlock(&policy->lock); | ||
735 | ret = 0; | 805 | ret = 0; |
806 | unlock_policy_rwsem_write(cpu); | ||
736 | goto err_out_driver_exit; /* call driver->exit() */ | 807 | goto err_out_driver_exit; /* call driver->exit() */ |
737 | } | 808 | } |
738 | } | 809 | } |
@@ -746,7 +817,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
746 | 817 | ||
747 | ret = kobject_register(&policy->kobj); | 818 | ret = kobject_register(&policy->kobj); |
748 | if (ret) { | 819 | if (ret) { |
749 | mutex_unlock(&policy->lock); | 820 | unlock_policy_rwsem_write(cpu); |
750 | goto err_out_driver_exit; | 821 | goto err_out_driver_exit; |
751 | } | 822 | } |
752 | /* set up files for this cpu device */ | 823 | /* set up files for this cpu device */ |
@@ -761,8 +832,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
761 | sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); | 832 | sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); |
762 | 833 | ||
763 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 834 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
764 | for_each_cpu_mask(j, policy->cpus) | 835 | for_each_cpu_mask(j, policy->cpus) { |
765 | cpufreq_cpu_data[j] = policy; | 836 | cpufreq_cpu_data[j] = policy; |
837 | per_cpu(policy_cpu, j) = policy->cpu; | ||
838 | } | ||
766 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 839 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
767 | 840 | ||
768 | /* symlink affected CPUs */ | 841 | /* symlink affected CPUs */ |
@@ -778,14 +851,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
778 | ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, | 851 | ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, |
779 | "cpufreq"); | 852 | "cpufreq"); |
780 | if (ret) { | 853 | if (ret) { |
781 | mutex_unlock(&policy->lock); | 854 | unlock_policy_rwsem_write(cpu); |
782 | goto err_out_unregister; | 855 | goto err_out_unregister; |
783 | } | 856 | } |
784 | } | 857 | } |
785 | 858 | ||
786 | policy->governor = NULL; /* to assure that the starting sequence is | 859 | policy->governor = NULL; /* to assure that the starting sequence is |
787 | * run in cpufreq_set_policy */ | 860 | * run in cpufreq_set_policy */ |
788 | mutex_unlock(&policy->lock); | 861 | unlock_policy_rwsem_write(cpu); |
789 | 862 | ||
790 | /* set default policy */ | 863 | /* set default policy */ |
791 | ret = cpufreq_set_policy(&new_policy); | 864 | ret = cpufreq_set_policy(&new_policy); |
@@ -826,11 +899,13 @@ module_out: | |||
826 | 899 | ||
827 | 900 | ||
828 | /** | 901 | /** |
829 | * cpufreq_remove_dev - remove a CPU device | 902 | * __cpufreq_remove_dev - remove a CPU device |
830 | * | 903 | * |
831 | * Removes the cpufreq interface for a CPU device. | 904 | * Removes the cpufreq interface for a CPU device. |
905 | * Caller should already have policy_rwsem in write mode for this CPU. | ||
906 | * This routine frees the rwsem before returning. | ||
832 | */ | 907 | */ |
833 | static int cpufreq_remove_dev (struct sys_device * sys_dev) | 908 | static int __cpufreq_remove_dev (struct sys_device * sys_dev) |
834 | { | 909 | { |
835 | unsigned int cpu = sys_dev->id; | 910 | unsigned int cpu = sys_dev->id; |
836 | unsigned long flags; | 911 | unsigned long flags; |
@@ -849,6 +924,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev) | |||
849 | if (!data) { | 924 | if (!data) { |
850 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 925 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
851 | cpufreq_debug_enable_ratelimit(); | 926 | cpufreq_debug_enable_ratelimit(); |
927 | unlock_policy_rwsem_write(cpu); | ||
852 | return -EINVAL; | 928 | return -EINVAL; |
853 | } | 929 | } |
854 | cpufreq_cpu_data[cpu] = NULL; | 930 | cpufreq_cpu_data[cpu] = NULL; |
@@ -865,6 +941,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev) | |||
865 | sysfs_remove_link(&sys_dev->kobj, "cpufreq"); | 941 | sysfs_remove_link(&sys_dev->kobj, "cpufreq"); |
866 | cpufreq_cpu_put(data); | 942 | cpufreq_cpu_put(data); |
867 | cpufreq_debug_enable_ratelimit(); | 943 | cpufreq_debug_enable_ratelimit(); |
944 | unlock_policy_rwsem_write(cpu); | ||
868 | return 0; | 945 | return 0; |
869 | } | 946 | } |
870 | #endif | 947 | #endif |
@@ -873,6 +950,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev) | |||
873 | if (!kobject_get(&data->kobj)) { | 950 | if (!kobject_get(&data->kobj)) { |
874 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 951 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
875 | cpufreq_debug_enable_ratelimit(); | 952 | cpufreq_debug_enable_ratelimit(); |
953 | unlock_policy_rwsem_write(cpu); | ||
876 | return -EFAULT; | 954 | return -EFAULT; |
877 | } | 955 | } |
878 | 956 | ||
@@ -906,10 +984,10 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev) | |||
906 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 984 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
907 | #endif | 985 | #endif |
908 | 986 | ||
909 | mutex_lock(&data->lock); | ||
910 | if (cpufreq_driver->target) | 987 | if (cpufreq_driver->target) |
911 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); | 988 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); |
912 | mutex_unlock(&data->lock); | 989 | |
990 | unlock_policy_rwsem_write(cpu); | ||
913 | 991 | ||
914 | kobject_unregister(&data->kobj); | 992 | kobject_unregister(&data->kobj); |
915 | 993 | ||
@@ -933,6 +1011,18 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev) | |||
933 | } | 1011 | } |
934 | 1012 | ||
935 | 1013 | ||
1014 | static int cpufreq_remove_dev (struct sys_device * sys_dev) | ||
1015 | { | ||
1016 | unsigned int cpu = sys_dev->id; | ||
1017 | int retval; | ||
1018 | if (unlikely(lock_policy_rwsem_write(cpu))) | ||
1019 | BUG(); | ||
1020 | |||
1021 | retval = __cpufreq_remove_dev(sys_dev); | ||
1022 | return retval; | ||
1023 | } | ||
1024 | |||
1025 | |||
936 | static void handle_update(struct work_struct *work) | 1026 | static void handle_update(struct work_struct *work) |
937 | { | 1027 | { |
938 | struct cpufreq_policy *policy = | 1028 | struct cpufreq_policy *policy = |
@@ -980,9 +1070,12 @@ unsigned int cpufreq_quick_get(unsigned int cpu) | |||
980 | unsigned int ret_freq = 0; | 1070 | unsigned int ret_freq = 0; |
981 | 1071 | ||
982 | if (policy) { | 1072 | if (policy) { |
983 | mutex_lock(&policy->lock); | 1073 | if (unlikely(lock_policy_rwsem_read(cpu))) |
1074 | return ret_freq; | ||
1075 | |||
984 | ret_freq = policy->cur; | 1076 | ret_freq = policy->cur; |
985 | mutex_unlock(&policy->lock); | 1077 | |
1078 | unlock_policy_rwsem_read(cpu); | ||
986 | cpufreq_cpu_put(policy); | 1079 | cpufreq_cpu_put(policy); |
987 | } | 1080 | } |
988 | 1081 | ||
@@ -991,24 +1084,13 @@ unsigned int cpufreq_quick_get(unsigned int cpu) | |||
991 | EXPORT_SYMBOL(cpufreq_quick_get); | 1084 | EXPORT_SYMBOL(cpufreq_quick_get); |
992 | 1085 | ||
993 | 1086 | ||
994 | /** | 1087 | static unsigned int __cpufreq_get(unsigned int cpu) |
995 | * cpufreq_get - get the current CPU frequency (in kHz) | ||
996 | * @cpu: CPU number | ||
997 | * | ||
998 | * Get the CPU current (static) CPU frequency | ||
999 | */ | ||
1000 | unsigned int cpufreq_get(unsigned int cpu) | ||
1001 | { | 1088 | { |
1002 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); | 1089 | struct cpufreq_policy *policy = cpufreq_cpu_data[cpu]; |
1003 | unsigned int ret_freq = 0; | 1090 | unsigned int ret_freq = 0; |
1004 | 1091 | ||
1005 | if (!policy) | ||
1006 | return 0; | ||
1007 | |||
1008 | if (!cpufreq_driver->get) | 1092 | if (!cpufreq_driver->get) |
1009 | goto out; | 1093 | return (ret_freq); |
1010 | |||
1011 | mutex_lock(&policy->lock); | ||
1012 | 1094 | ||
1013 | ret_freq = cpufreq_driver->get(cpu); | 1095 | ret_freq = cpufreq_driver->get(cpu); |
1014 | 1096 | ||
@@ -1022,11 +1104,33 @@ unsigned int cpufreq_get(unsigned int cpu) | |||
1022 | } | 1104 | } |
1023 | } | 1105 | } |
1024 | 1106 | ||
1025 | mutex_unlock(&policy->lock); | 1107 | return (ret_freq); |
1108 | } | ||
1026 | 1109 | ||
1027 | out: | 1110 | /** |
1028 | cpufreq_cpu_put(policy); | 1111 | * cpufreq_get - get the current CPU frequency (in kHz) |
1112 | * @cpu: CPU number | ||
1113 | * | ||
1114 | * Get the CPU current (static) CPU frequency | ||
1115 | */ | ||
1116 | unsigned int cpufreq_get(unsigned int cpu) | ||
1117 | { | ||
1118 | unsigned int ret_freq = 0; | ||
1119 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); | ||
1120 | |||
1121 | if (!policy) | ||
1122 | goto out; | ||
1123 | |||
1124 | if (unlikely(lock_policy_rwsem_read(cpu))) | ||
1125 | goto out_policy; | ||
1126 | |||
1127 | ret_freq = __cpufreq_get(cpu); | ||
1029 | 1128 | ||
1129 | unlock_policy_rwsem_read(cpu); | ||
1130 | |||
1131 | out_policy: | ||
1132 | cpufreq_cpu_put(policy); | ||
1133 | out: | ||
1030 | return (ret_freq); | 1134 | return (ret_freq); |
1031 | } | 1135 | } |
1032 | EXPORT_SYMBOL(cpufreq_get); | 1136 | EXPORT_SYMBOL(cpufreq_get); |
@@ -1278,7 +1382,6 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); | |||
1278 | *********************************************************************/ | 1382 | *********************************************************************/ |
1279 | 1383 | ||
1280 | 1384 | ||
1281 | /* Must be called with lock_cpu_hotplug held */ | ||
1282 | int __cpufreq_driver_target(struct cpufreq_policy *policy, | 1385 | int __cpufreq_driver_target(struct cpufreq_policy *policy, |
1283 | unsigned int target_freq, | 1386 | unsigned int target_freq, |
1284 | unsigned int relation) | 1387 | unsigned int relation) |
@@ -1304,20 +1407,19 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, | |||
1304 | if (!policy) | 1407 | if (!policy) |
1305 | return -EINVAL; | 1408 | return -EINVAL; |
1306 | 1409 | ||
1307 | lock_cpu_hotplug(); | 1410 | if (unlikely(lock_policy_rwsem_write(policy->cpu))) |
1308 | mutex_lock(&policy->lock); | 1411 | return -EINVAL; |
1309 | 1412 | ||
1310 | ret = __cpufreq_driver_target(policy, target_freq, relation); | 1413 | ret = __cpufreq_driver_target(policy, target_freq, relation); |
1311 | 1414 | ||
1312 | mutex_unlock(&policy->lock); | 1415 | unlock_policy_rwsem_write(policy->cpu); |
1313 | unlock_cpu_hotplug(); | ||
1314 | 1416 | ||
1315 | cpufreq_cpu_put(policy); | 1417 | cpufreq_cpu_put(policy); |
1316 | return ret; | 1418 | return ret; |
1317 | } | 1419 | } |
1318 | EXPORT_SYMBOL_GPL(cpufreq_driver_target); | 1420 | EXPORT_SYMBOL_GPL(cpufreq_driver_target); |
1319 | 1421 | ||
1320 | int cpufreq_driver_getavg(struct cpufreq_policy *policy) | 1422 | int __cpufreq_driver_getavg(struct cpufreq_policy *policy) |
1321 | { | 1423 | { |
1322 | int ret = 0; | 1424 | int ret = 0; |
1323 | 1425 | ||
@@ -1325,20 +1427,15 @@ int cpufreq_driver_getavg(struct cpufreq_policy *policy) | |||
1325 | if (!policy) | 1427 | if (!policy) |
1326 | return -EINVAL; | 1428 | return -EINVAL; |
1327 | 1429 | ||
1328 | mutex_lock(&policy->lock); | ||
1329 | |||
1330 | if (cpu_online(policy->cpu) && cpufreq_driver->getavg) | 1430 | if (cpu_online(policy->cpu) && cpufreq_driver->getavg) |
1331 | ret = cpufreq_driver->getavg(policy->cpu); | 1431 | ret = cpufreq_driver->getavg(policy->cpu); |
1332 | 1432 | ||
1333 | mutex_unlock(&policy->lock); | ||
1334 | |||
1335 | cpufreq_cpu_put(policy); | 1433 | cpufreq_cpu_put(policy); |
1336 | return ret; | 1434 | return ret; |
1337 | } | 1435 | } |
1338 | EXPORT_SYMBOL_GPL(cpufreq_driver_getavg); | 1436 | EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg); |
1339 | 1437 | ||
1340 | /* | 1438 | /* |
1341 | * Locking: Must be called with the lock_cpu_hotplug() lock held | ||
1342 | * when "event" is CPUFREQ_GOV_LIMITS | 1439 | * when "event" is CPUFREQ_GOV_LIMITS |
1343 | */ | 1440 | */ |
1344 | 1441 | ||
@@ -1420,9 +1517,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) | |||
1420 | if (!cpu_policy) | 1517 | if (!cpu_policy) |
1421 | return -EINVAL; | 1518 | return -EINVAL; |
1422 | 1519 | ||
1423 | mutex_lock(&cpu_policy->lock); | ||
1424 | memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); | 1520 | memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); |
1425 | mutex_unlock(&cpu_policy->lock); | ||
1426 | 1521 | ||
1427 | cpufreq_cpu_put(cpu_policy); | 1522 | cpufreq_cpu_put(cpu_policy); |
1428 | return 0; | 1523 | return 0; |
@@ -1433,7 +1528,6 @@ EXPORT_SYMBOL(cpufreq_get_policy); | |||
1433 | /* | 1528 | /* |
1434 | * data : current policy. | 1529 | * data : current policy. |
1435 | * policy : policy to be set. | 1530 | * policy : policy to be set. |
1436 | * Locking: Must be called with the lock_cpu_hotplug() lock held | ||
1437 | */ | 1531 | */ |
1438 | static int __cpufreq_set_policy(struct cpufreq_policy *data, | 1532 | static int __cpufreq_set_policy(struct cpufreq_policy *data, |
1439 | struct cpufreq_policy *policy) | 1533 | struct cpufreq_policy *policy) |
@@ -1539,10 +1633,9 @@ int cpufreq_set_policy(struct cpufreq_policy *policy) | |||
1539 | if (!data) | 1633 | if (!data) |
1540 | return -EINVAL; | 1634 | return -EINVAL; |
1541 | 1635 | ||
1542 | lock_cpu_hotplug(); | 1636 | if (unlikely(lock_policy_rwsem_write(policy->cpu))) |
1637 | return -EINVAL; | ||
1543 | 1638 | ||
1544 | /* lock this CPU */ | ||
1545 | mutex_lock(&data->lock); | ||
1546 | 1639 | ||
1547 | ret = __cpufreq_set_policy(data, policy); | 1640 | ret = __cpufreq_set_policy(data, policy); |
1548 | data->user_policy.min = data->min; | 1641 | data->user_policy.min = data->min; |
@@ -1550,9 +1643,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy) | |||
1550 | data->user_policy.policy = data->policy; | 1643 | data->user_policy.policy = data->policy; |
1551 | data->user_policy.governor = data->governor; | 1644 | data->user_policy.governor = data->governor; |
1552 | 1645 | ||
1553 | mutex_unlock(&data->lock); | 1646 | unlock_policy_rwsem_write(policy->cpu); |
1554 | 1647 | ||
1555 | unlock_cpu_hotplug(); | ||
1556 | cpufreq_cpu_put(data); | 1648 | cpufreq_cpu_put(data); |
1557 | 1649 | ||
1558 | return ret; | 1650 | return ret; |
@@ -1576,8 +1668,8 @@ int cpufreq_update_policy(unsigned int cpu) | |||
1576 | if (!data) | 1668 | if (!data) |
1577 | return -ENODEV; | 1669 | return -ENODEV; |
1578 | 1670 | ||
1579 | lock_cpu_hotplug(); | 1671 | if (unlikely(lock_policy_rwsem_write(cpu))) |
1580 | mutex_lock(&data->lock); | 1672 | return -EINVAL; |
1581 | 1673 | ||
1582 | dprintk("updating policy for CPU %u\n", cpu); | 1674 | dprintk("updating policy for CPU %u\n", cpu); |
1583 | memcpy(&policy, data, sizeof(struct cpufreq_policy)); | 1675 | memcpy(&policy, data, sizeof(struct cpufreq_policy)); |
@@ -1602,8 +1694,8 @@ int cpufreq_update_policy(unsigned int cpu) | |||
1602 | 1694 | ||
1603 | ret = __cpufreq_set_policy(data, &policy); | 1695 | ret = __cpufreq_set_policy(data, &policy); |
1604 | 1696 | ||
1605 | mutex_unlock(&data->lock); | 1697 | unlock_policy_rwsem_write(cpu); |
1606 | unlock_cpu_hotplug(); | 1698 | |
1607 | cpufreq_cpu_put(data); | 1699 | cpufreq_cpu_put(data); |
1608 | return ret; | 1700 | return ret; |
1609 | } | 1701 | } |
@@ -1613,31 +1705,28 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, | |||
1613 | unsigned long action, void *hcpu) | 1705 | unsigned long action, void *hcpu) |
1614 | { | 1706 | { |
1615 | unsigned int cpu = (unsigned long)hcpu; | 1707 | unsigned int cpu = (unsigned long)hcpu; |
1616 | struct cpufreq_policy *policy; | ||
1617 | struct sys_device *sys_dev; | 1708 | struct sys_device *sys_dev; |
1709 | struct cpufreq_policy *policy; | ||
1618 | 1710 | ||
1619 | sys_dev = get_cpu_sysdev(cpu); | 1711 | sys_dev = get_cpu_sysdev(cpu); |
1620 | |||
1621 | if (sys_dev) { | 1712 | if (sys_dev) { |
1622 | switch (action) { | 1713 | switch (action) { |
1623 | case CPU_ONLINE: | 1714 | case CPU_ONLINE: |
1624 | cpufreq_add_dev(sys_dev); | 1715 | cpufreq_add_dev(sys_dev); |
1625 | break; | 1716 | break; |
1626 | case CPU_DOWN_PREPARE: | 1717 | case CPU_DOWN_PREPARE: |
1627 | /* | 1718 | if (unlikely(lock_policy_rwsem_write(cpu))) |
1628 | * We attempt to put this cpu in lowest frequency | 1719 | BUG(); |
1629 | * possible before going down. This will permit | 1720 | |
1630 | * hardware-managed P-State to switch other related | ||
1631 | * threads to min or higher speeds if possible. | ||
1632 | */ | ||
1633 | policy = cpufreq_cpu_data[cpu]; | 1721 | policy = cpufreq_cpu_data[cpu]; |
1634 | if (policy) { | 1722 | if (policy) { |
1635 | cpufreq_driver_target(policy, policy->min, | 1723 | __cpufreq_driver_target(policy, policy->min, |
1636 | CPUFREQ_RELATION_H); | 1724 | CPUFREQ_RELATION_H); |
1637 | } | 1725 | } |
1726 | __cpufreq_remove_dev(sys_dev); | ||
1638 | break; | 1727 | break; |
1639 | case CPU_DEAD: | 1728 | case CPU_DOWN_FAILED: |
1640 | cpufreq_remove_dev(sys_dev); | 1729 | cpufreq_add_dev(sys_dev); |
1641 | break; | 1730 | break; |
1642 | } | 1731 | } |
1643 | } | 1732 | } |
@@ -1751,3 +1840,16 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) | |||
1751 | return 0; | 1840 | return 0; |
1752 | } | 1841 | } |
1753 | EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); | 1842 | EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); |
1843 | |||
1844 | static int __init cpufreq_core_init(void) | ||
1845 | { | ||
1846 | int cpu; | ||
1847 | |||
1848 | for_each_possible_cpu(cpu) { | ||
1849 | per_cpu(policy_cpu, cpu) = -1; | ||
1850 | init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); | ||
1851 | } | ||
1852 | return 0; | ||
1853 | } | ||
1854 | |||
1855 | core_initcall(cpufreq_core_init); | ||
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 05d6c22ba07c..26f440ccc3fb 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c | |||
@@ -429,14 +429,12 @@ static void dbs_check_cpu(int cpu) | |||
429 | static void do_dbs_timer(struct work_struct *work) | 429 | static void do_dbs_timer(struct work_struct *work) |
430 | { | 430 | { |
431 | int i; | 431 | int i; |
432 | lock_cpu_hotplug(); | ||
433 | mutex_lock(&dbs_mutex); | 432 | mutex_lock(&dbs_mutex); |
434 | for_each_online_cpu(i) | 433 | for_each_online_cpu(i) |
435 | dbs_check_cpu(i); | 434 | dbs_check_cpu(i); |
436 | schedule_delayed_work(&dbs_work, | 435 | schedule_delayed_work(&dbs_work, |
437 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); | 436 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); |
438 | mutex_unlock(&dbs_mutex); | 437 | mutex_unlock(&dbs_mutex); |
439 | unlock_cpu_hotplug(); | ||
440 | } | 438 | } |
441 | 439 | ||
442 | static inline void dbs_timer_init(void) | 440 | static inline void dbs_timer_init(void) |
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index f697449327c6..d60bcb9d14cc 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c | |||
@@ -52,19 +52,20 @@ static unsigned int def_sampling_rate; | |||
52 | static void do_dbs_timer(struct work_struct *work); | 52 | static void do_dbs_timer(struct work_struct *work); |
53 | 53 | ||
54 | /* Sampling types */ | 54 | /* Sampling types */ |
55 | enum dbs_sample {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; | 55 | enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; |
56 | 56 | ||
57 | struct cpu_dbs_info_s { | 57 | struct cpu_dbs_info_s { |
58 | cputime64_t prev_cpu_idle; | 58 | cputime64_t prev_cpu_idle; |
59 | cputime64_t prev_cpu_wall; | 59 | cputime64_t prev_cpu_wall; |
60 | struct cpufreq_policy *cur_policy; | 60 | struct cpufreq_policy *cur_policy; |
61 | struct delayed_work work; | 61 | struct delayed_work work; |
62 | enum dbs_sample sample_type; | ||
63 | unsigned int enable; | ||
64 | struct cpufreq_frequency_table *freq_table; | 62 | struct cpufreq_frequency_table *freq_table; |
65 | unsigned int freq_lo; | 63 | unsigned int freq_lo; |
66 | unsigned int freq_lo_jiffies; | 64 | unsigned int freq_lo_jiffies; |
67 | unsigned int freq_hi_jiffies; | 65 | unsigned int freq_hi_jiffies; |
66 | int cpu; | ||
67 | unsigned int enable:1, | ||
68 | sample_type:1; | ||
68 | }; | 69 | }; |
69 | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); | 70 | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); |
70 | 71 | ||
@@ -402,7 +403,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) | |||
402 | if (load < (dbs_tuners_ins.up_threshold - 10)) { | 403 | if (load < (dbs_tuners_ins.up_threshold - 10)) { |
403 | unsigned int freq_next, freq_cur; | 404 | unsigned int freq_next, freq_cur; |
404 | 405 | ||
405 | freq_cur = cpufreq_driver_getavg(policy); | 406 | freq_cur = __cpufreq_driver_getavg(policy); |
406 | if (!freq_cur) | 407 | if (!freq_cur) |
407 | freq_cur = policy->cur; | 408 | freq_cur = policy->cur; |
408 | 409 | ||
@@ -423,9 +424,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) | |||
423 | 424 | ||
424 | static void do_dbs_timer(struct work_struct *work) | 425 | static void do_dbs_timer(struct work_struct *work) |
425 | { | 426 | { |
426 | unsigned int cpu = smp_processor_id(); | 427 | struct cpu_dbs_info_s *dbs_info = |
427 | struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); | 428 | container_of(work, struct cpu_dbs_info_s, work.work); |
428 | enum dbs_sample sample_type = dbs_info->sample_type; | 429 | unsigned int cpu = dbs_info->cpu; |
430 | int sample_type = dbs_info->sample_type; | ||
431 | |||
429 | /* We want all CPUs to do sampling nearly on same jiffy */ | 432 | /* We want all CPUs to do sampling nearly on same jiffy */ |
430 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | 433 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
431 | 434 | ||
@@ -434,15 +437,19 @@ static void do_dbs_timer(struct work_struct *work) | |||
434 | 437 | ||
435 | delay -= jiffies % delay; | 438 | delay -= jiffies % delay; |
436 | 439 | ||
437 | if (!dbs_info->enable) | 440 | if (lock_policy_rwsem_write(cpu) < 0) |
441 | return; | ||
442 | |||
443 | if (!dbs_info->enable) { | ||
444 | unlock_policy_rwsem_write(cpu); | ||
438 | return; | 445 | return; |
446 | } | ||
447 | |||
439 | /* Common NORMAL_SAMPLE setup */ | 448 | /* Common NORMAL_SAMPLE setup */ |
440 | dbs_info->sample_type = DBS_NORMAL_SAMPLE; | 449 | dbs_info->sample_type = DBS_NORMAL_SAMPLE; |
441 | if (!dbs_tuners_ins.powersave_bias || | 450 | if (!dbs_tuners_ins.powersave_bias || |
442 | sample_type == DBS_NORMAL_SAMPLE) { | 451 | sample_type == DBS_NORMAL_SAMPLE) { |
443 | lock_cpu_hotplug(); | ||
444 | dbs_check_cpu(dbs_info); | 452 | dbs_check_cpu(dbs_info); |
445 | unlock_cpu_hotplug(); | ||
446 | if (dbs_info->freq_lo) { | 453 | if (dbs_info->freq_lo) { |
447 | /* Setup timer for SUB_SAMPLE */ | 454 | /* Setup timer for SUB_SAMPLE */ |
448 | dbs_info->sample_type = DBS_SUB_SAMPLE; | 455 | dbs_info->sample_type = DBS_SUB_SAMPLE; |
@@ -454,26 +461,27 @@ static void do_dbs_timer(struct work_struct *work) | |||
454 | CPUFREQ_RELATION_H); | 461 | CPUFREQ_RELATION_H); |
455 | } | 462 | } |
456 | queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); | 463 | queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); |
464 | unlock_policy_rwsem_write(cpu); | ||
457 | } | 465 | } |
458 | 466 | ||
459 | static inline void dbs_timer_init(unsigned int cpu) | 467 | static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) |
460 | { | 468 | { |
461 | struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); | ||
462 | /* We want all CPUs to do sampling nearly on same jiffy */ | 469 | /* We want all CPUs to do sampling nearly on same jiffy */ |
463 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | 470 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
464 | delay -= jiffies % delay; | 471 | delay -= jiffies % delay; |
465 | 472 | ||
473 | dbs_info->enable = 1; | ||
466 | ondemand_powersave_bias_init(); | 474 | ondemand_powersave_bias_init(); |
467 | INIT_DELAYED_WORK_NAR(&dbs_info->work, do_dbs_timer); | ||
468 | dbs_info->sample_type = DBS_NORMAL_SAMPLE; | 475 | dbs_info->sample_type = DBS_NORMAL_SAMPLE; |
469 | queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); | 476 | INIT_DELAYED_WORK_NAR(&dbs_info->work, do_dbs_timer); |
477 | queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, | ||
478 | delay); | ||
470 | } | 479 | } |
471 | 480 | ||
472 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) | 481 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) |
473 | { | 482 | { |
474 | dbs_info->enable = 0; | 483 | dbs_info->enable = 0; |
475 | cancel_delayed_work(&dbs_info->work); | 484 | cancel_delayed_work(&dbs_info->work); |
476 | flush_workqueue(kondemand_wq); | ||
477 | } | 485 | } |
478 | 486 | ||
479 | static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | 487 | static int cpufreq_governor_dbs(struct cpufreq_policy *policy, |
@@ -502,21 +510,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
502 | 510 | ||
503 | mutex_lock(&dbs_mutex); | 511 | mutex_lock(&dbs_mutex); |
504 | dbs_enable++; | 512 | dbs_enable++; |
505 | if (dbs_enable == 1) { | ||
506 | kondemand_wq = create_workqueue("kondemand"); | ||
507 | if (!kondemand_wq) { | ||
508 | printk(KERN_ERR | ||
509 | "Creation of kondemand failed\n"); | ||
510 | dbs_enable--; | ||
511 | mutex_unlock(&dbs_mutex); | ||
512 | return -ENOSPC; | ||
513 | } | ||
514 | } | ||
515 | 513 | ||
516 | rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); | 514 | rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); |
517 | if (rc) { | 515 | if (rc) { |
518 | if (dbs_enable == 1) | ||
519 | destroy_workqueue(kondemand_wq); | ||
520 | dbs_enable--; | 516 | dbs_enable--; |
521 | mutex_unlock(&dbs_mutex); | 517 | mutex_unlock(&dbs_mutex); |
522 | return rc; | 518 | return rc; |
@@ -530,7 +526,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
530 | j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); | 526 | j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); |
531 | j_dbs_info->prev_cpu_wall = get_jiffies_64(); | 527 | j_dbs_info->prev_cpu_wall = get_jiffies_64(); |
532 | } | 528 | } |
533 | this_dbs_info->enable = 1; | 529 | this_dbs_info->cpu = cpu; |
534 | /* | 530 | /* |
535 | * Start the timerschedule work, when this governor | 531 | * Start the timerschedule work, when this governor |
536 | * is used for first time | 532 | * is used for first time |
@@ -550,7 +546,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
550 | 546 | ||
551 | dbs_tuners_ins.sampling_rate = def_sampling_rate; | 547 | dbs_tuners_ins.sampling_rate = def_sampling_rate; |
552 | } | 548 | } |
553 | dbs_timer_init(policy->cpu); | 549 | dbs_timer_init(this_dbs_info); |
554 | 550 | ||
555 | mutex_unlock(&dbs_mutex); | 551 | mutex_unlock(&dbs_mutex); |
556 | break; | 552 | break; |
@@ -560,9 +556,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |||
560 | dbs_timer_exit(this_dbs_info); | 556 | dbs_timer_exit(this_dbs_info); |
561 | sysfs_remove_group(&policy->kobj, &dbs_attr_group); | 557 | sysfs_remove_group(&policy->kobj, &dbs_attr_group); |
562 | dbs_enable--; | 558 | dbs_enable--; |
563 | if (dbs_enable == 0) | ||
564 | destroy_workqueue(kondemand_wq); | ||
565 | |||
566 | mutex_unlock(&dbs_mutex); | 559 | mutex_unlock(&dbs_mutex); |
567 | 560 | ||
568 | break; | 561 | break; |
@@ -591,12 +584,18 @@ static struct cpufreq_governor cpufreq_gov_dbs = { | |||
591 | 584 | ||
592 | static int __init cpufreq_gov_dbs_init(void) | 585 | static int __init cpufreq_gov_dbs_init(void) |
593 | { | 586 | { |
587 | kondemand_wq = create_workqueue("kondemand"); | ||
588 | if (!kondemand_wq) { | ||
589 | printk(KERN_ERR "Creation of kondemand failed\n"); | ||
590 | return -EFAULT; | ||
591 | } | ||
594 | return cpufreq_register_governor(&cpufreq_gov_dbs); | 592 | return cpufreq_register_governor(&cpufreq_gov_dbs); |
595 | } | 593 | } |
596 | 594 | ||
597 | static void __exit cpufreq_gov_dbs_exit(void) | 595 | static void __exit cpufreq_gov_dbs_exit(void) |
598 | { | 596 | { |
599 | cpufreq_unregister_governor(&cpufreq_gov_dbs); | 597 | cpufreq_unregister_governor(&cpufreq_gov_dbs); |
598 | destroy_workqueue(kondemand_wq); | ||
600 | } | 599 | } |
601 | 600 | ||
602 | 601 | ||
@@ -608,3 +607,4 @@ MODULE_LICENSE("GPL"); | |||
608 | 607 | ||
609 | module_init(cpufreq_gov_dbs_init); | 608 | module_init(cpufreq_gov_dbs_init); |
610 | module_exit(cpufreq_gov_dbs_exit); | 609 | module_exit(cpufreq_gov_dbs_exit); |
610 | |||
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 91ad342a6051..d1c7cac9316c 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c | |||
@@ -370,12 +370,10 @@ __exit cpufreq_stats_exit(void) | |||
370 | cpufreq_unregister_notifier(¬ifier_trans_block, | 370 | cpufreq_unregister_notifier(¬ifier_trans_block, |
371 | CPUFREQ_TRANSITION_NOTIFIER); | 371 | CPUFREQ_TRANSITION_NOTIFIER); |
372 | unregister_hotcpu_notifier(&cpufreq_stat_cpu_notifier); | 372 | unregister_hotcpu_notifier(&cpufreq_stat_cpu_notifier); |
373 | lock_cpu_hotplug(); | ||
374 | for_each_online_cpu(cpu) { | 373 | for_each_online_cpu(cpu) { |
375 | cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, | 374 | cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, |
376 | CPU_DEAD, (void *)(long)cpu); | 375 | CPU_DEAD, (void *)(long)cpu); |
377 | } | 376 | } |
378 | unlock_cpu_hotplug(); | ||
379 | } | 377 | } |
380 | 378 | ||
381 | MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>"); | 379 | MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>"); |
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 2a4eb0bfaf30..860345c7799a 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c | |||
@@ -71,7 +71,6 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy) | |||
71 | 71 | ||
72 | dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); | 72 | dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); |
73 | 73 | ||
74 | lock_cpu_hotplug(); | ||
75 | mutex_lock(&userspace_mutex); | 74 | mutex_lock(&userspace_mutex); |
76 | if (!cpu_is_managed[policy->cpu]) | 75 | if (!cpu_is_managed[policy->cpu]) |
77 | goto err; | 76 | goto err; |
@@ -94,7 +93,6 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy) | |||
94 | 93 | ||
95 | err: | 94 | err: |
96 | mutex_unlock(&userspace_mutex); | 95 | mutex_unlock(&userspace_mutex); |
97 | unlock_cpu_hotplug(); | ||
98 | return ret; | 96 | return ret; |
99 | } | 97 | } |
100 | 98 | ||
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index cd251efda410..0a26e0663542 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c | |||
@@ -546,7 +546,7 @@ static void ads7846_rx(void *ads) | |||
546 | ts->spi->dev.bus_id, ts->tc.ignore, Rt); | 546 | ts->spi->dev.bus_id, ts->tc.ignore, Rt); |
547 | #endif | 547 | #endif |
548 | hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_PERIOD), | 548 | hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_PERIOD), |
549 | HRTIMER_REL); | 549 | HRTIMER_MODE_REL); |
550 | return; | 550 | return; |
551 | } | 551 | } |
552 | 552 | ||
@@ -578,7 +578,8 @@ static void ads7846_rx(void *ads) | |||
578 | #endif | 578 | #endif |
579 | } | 579 | } |
580 | 580 | ||
581 | hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_PERIOD), HRTIMER_REL); | 581 | hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_PERIOD), |
582 | HRTIMER_MODE_REL); | ||
582 | } | 583 | } |
583 | 584 | ||
584 | static int ads7846_debounce(void *ads, int data_idx, int *val) | 585 | static int ads7846_debounce(void *ads, int data_idx, int *val) |
@@ -667,7 +668,7 @@ static void ads7846_rx_val(void *ads) | |||
667 | status); | 668 | status); |
668 | } | 669 | } |
669 | 670 | ||
670 | static int ads7846_timer(struct hrtimer *handle) | 671 | static enum hrtimer_restart ads7846_timer(struct hrtimer *handle) |
671 | { | 672 | { |
672 | struct ads7846 *ts = container_of(handle, struct ads7846, timer); | 673 | struct ads7846 *ts = container_of(handle, struct ads7846, timer); |
673 | int status = 0; | 674 | int status = 0; |
@@ -724,7 +725,7 @@ static irqreturn_t ads7846_irq(int irq, void *handle) | |||
724 | disable_irq(ts->spi->irq); | 725 | disable_irq(ts->spi->irq); |
725 | ts->pending = 1; | 726 | ts->pending = 1; |
726 | hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_DELAY), | 727 | hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_DELAY), |
727 | HRTIMER_REL); | 728 | HRTIMER_MODE_REL); |
728 | } | 729 | } |
729 | } | 730 | } |
730 | spin_unlock_irqrestore(&ts->lock, flags); | 731 | spin_unlock_irqrestore(&ts->lock, flags); |
@@ -862,7 +863,7 @@ static int __devinit ads7846_probe(struct spi_device *spi) | |||
862 | ts->spi = spi; | 863 | ts->spi = spi; |
863 | ts->input = input_dev; | 864 | ts->input = input_dev; |
864 | 865 | ||
865 | hrtimer_init(&ts->timer, CLOCK_MONOTONIC, HRTIMER_REL); | 866 | hrtimer_init(&ts->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
866 | ts->timer.function = ads7846_timer; | 867 | ts->timer.function = ads7846_timer; |
867 | 868 | ||
868 | spin_lock_init(&ts->lock); | 869 | spin_lock_init(&ts->lock); |
diff --git a/drivers/isdn/gigaset/Makefile b/drivers/isdn/gigaset/Makefile index 835b806a9de7..077e297d8c72 100644 --- a/drivers/isdn/gigaset/Makefile +++ b/drivers/isdn/gigaset/Makefile | |||
@@ -5,4 +5,4 @@ ser_gigaset-y := ser-gigaset.o asyncdata.o | |||
5 | 5 | ||
6 | obj-$(CONFIG_GIGASET_M105) += usb_gigaset.o gigaset.o | 6 | obj-$(CONFIG_GIGASET_M105) += usb_gigaset.o gigaset.o |
7 | obj-$(CONFIG_GIGASET_BASE) += bas_gigaset.o gigaset.o | 7 | obj-$(CONFIG_GIGASET_BASE) += bas_gigaset.o gigaset.o |
8 | obj-$(CONFIG_GIGASET_M105) += ser_gigaset.o gigaset.o | 8 | obj-$(CONFIG_GIGASET_M101) += ser_gigaset.o gigaset.o |
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c index ccef56d0c157..ed3426062a8b 100644 --- a/drivers/video/s3c2410fb.c +++ b/drivers/video/s3c2410fb.c | |||
@@ -791,6 +791,8 @@ static int __init s3c2410fb_probe(struct platform_device *pdev) | |||
791 | 791 | ||
792 | info = fbinfo->par; | 792 | info = fbinfo->par; |
793 | info->fb = fbinfo; | 793 | info->fb = fbinfo; |
794 | info->dev = &pdev->dev; | ||
795 | |||
794 | platform_set_drvdata(pdev, fbinfo); | 796 | platform_set_drvdata(pdev, fbinfo); |
795 | 797 | ||
796 | dprintk("devinit\n"); | 798 | dprintk("devinit\n"); |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b3609b7cdf11..403e3bad1455 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -467,6 +467,7 @@ extern struct kmem_cache *ecryptfs_header_cache_1; | |||
467 | extern struct kmem_cache *ecryptfs_header_cache_2; | 467 | extern struct kmem_cache *ecryptfs_header_cache_2; |
468 | extern struct kmem_cache *ecryptfs_xattr_cache; | 468 | extern struct kmem_cache *ecryptfs_xattr_cache; |
469 | extern struct kmem_cache *ecryptfs_lower_page_cache; | 469 | extern struct kmem_cache *ecryptfs_lower_page_cache; |
470 | extern struct kmem_cache *ecryptfs_key_record_cache; | ||
470 | 471 | ||
471 | int ecryptfs_interpose(struct dentry *hidden_dentry, | 472 | int ecryptfs_interpose(struct dentry *hidden_dentry, |
472 | struct dentry *this_dentry, struct super_block *sb, | 473 | struct dentry *this_dentry, struct super_block *sb, |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 81156e95ef8e..b550dea8eee6 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -1638,6 +1638,8 @@ out: | |||
1638 | return rc; | 1638 | return rc; |
1639 | } | 1639 | } |
1640 | 1640 | ||
1641 | struct kmem_cache *ecryptfs_key_record_cache; | ||
1642 | |||
1641 | /** | 1643 | /** |
1642 | * ecryptfs_generate_key_packet_set | 1644 | * ecryptfs_generate_key_packet_set |
1643 | * @dest: Virtual address from which to write the key record set | 1645 | * @dest: Virtual address from which to write the key record set |
@@ -1664,50 +1666,55 @@ ecryptfs_generate_key_packet_set(char *dest_base, | |||
1664 | &ecryptfs_superblock_to_private( | 1666 | &ecryptfs_superblock_to_private( |
1665 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | 1667 | ecryptfs_dentry->d_sb)->mount_crypt_stat; |
1666 | size_t written; | 1668 | size_t written; |
1667 | struct ecryptfs_key_record key_rec; | 1669 | struct ecryptfs_key_record *key_rec; |
1668 | int rc = 0; | 1670 | int rc = 0; |
1669 | 1671 | ||
1670 | (*len) = 0; | 1672 | (*len) = 0; |
1673 | key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL); | ||
1674 | if (!key_rec) { | ||
1675 | rc = -ENOMEM; | ||
1676 | goto out; | ||
1677 | } | ||
1671 | if (mount_crypt_stat->global_auth_tok) { | 1678 | if (mount_crypt_stat->global_auth_tok) { |
1672 | auth_tok = mount_crypt_stat->global_auth_tok; | 1679 | auth_tok = mount_crypt_stat->global_auth_tok; |
1673 | if (auth_tok->token_type == ECRYPTFS_PASSWORD) { | 1680 | if (auth_tok->token_type == ECRYPTFS_PASSWORD) { |
1674 | rc = write_tag_3_packet((dest_base + (*len)), | 1681 | rc = write_tag_3_packet((dest_base + (*len)), |
1675 | max, auth_tok, | 1682 | max, auth_tok, |
1676 | crypt_stat, &key_rec, | 1683 | crypt_stat, key_rec, |
1677 | &written); | 1684 | &written); |
1678 | if (rc) { | 1685 | if (rc) { |
1679 | ecryptfs_printk(KERN_WARNING, "Error " | 1686 | ecryptfs_printk(KERN_WARNING, "Error " |
1680 | "writing tag 3 packet\n"); | 1687 | "writing tag 3 packet\n"); |
1681 | goto out; | 1688 | goto out_free; |
1682 | } | 1689 | } |
1683 | (*len) += written; | 1690 | (*len) += written; |
1684 | /* Write auth tok signature packet */ | 1691 | /* Write auth tok signature packet */ |
1685 | rc = write_tag_11_packet( | 1692 | rc = write_tag_11_packet( |
1686 | (dest_base + (*len)), | 1693 | (dest_base + (*len)), |
1687 | (max - (*len)), | 1694 | (max - (*len)), |
1688 | key_rec.sig, ECRYPTFS_SIG_SIZE, &written); | 1695 | key_rec->sig, ECRYPTFS_SIG_SIZE, &written); |
1689 | if (rc) { | 1696 | if (rc) { |
1690 | ecryptfs_printk(KERN_ERR, "Error writing " | 1697 | ecryptfs_printk(KERN_ERR, "Error writing " |
1691 | "auth tok signature packet\n"); | 1698 | "auth tok signature packet\n"); |
1692 | goto out; | 1699 | goto out_free; |
1693 | } | 1700 | } |
1694 | (*len) += written; | 1701 | (*len) += written; |
1695 | } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { | 1702 | } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { |
1696 | rc = write_tag_1_packet(dest_base + (*len), | 1703 | rc = write_tag_1_packet(dest_base + (*len), |
1697 | max, auth_tok, | 1704 | max, auth_tok, |
1698 | crypt_stat,mount_crypt_stat, | 1705 | crypt_stat,mount_crypt_stat, |
1699 | &key_rec, &written); | 1706 | key_rec, &written); |
1700 | if (rc) { | 1707 | if (rc) { |
1701 | ecryptfs_printk(KERN_WARNING, "Error " | 1708 | ecryptfs_printk(KERN_WARNING, "Error " |
1702 | "writing tag 1 packet\n"); | 1709 | "writing tag 1 packet\n"); |
1703 | goto out; | 1710 | goto out_free; |
1704 | } | 1711 | } |
1705 | (*len) += written; | 1712 | (*len) += written; |
1706 | } else { | 1713 | } else { |
1707 | ecryptfs_printk(KERN_WARNING, "Unsupported " | 1714 | ecryptfs_printk(KERN_WARNING, "Unsupported " |
1708 | "authentication token type\n"); | 1715 | "authentication token type\n"); |
1709 | rc = -EINVAL; | 1716 | rc = -EINVAL; |
1710 | goto out; | 1717 | goto out_free; |
1711 | } | 1718 | } |
1712 | } else | 1719 | } else |
1713 | BUG(); | 1720 | BUG(); |
@@ -1717,6 +1724,9 @@ ecryptfs_generate_key_packet_set(char *dest_base, | |||
1717 | ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); | 1724 | ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); |
1718 | rc = -EIO; | 1725 | rc = -EIO; |
1719 | } | 1726 | } |
1727 | |||
1728 | out_free: | ||
1729 | kmem_cache_free(ecryptfs_key_record_cache, key_rec); | ||
1720 | out: | 1730 | out: |
1721 | if (rc) | 1731 | if (rc) |
1722 | (*len) = 0; | 1732 | (*len) = 0; |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 26fe405a5763..80044d196fe0 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -651,6 +651,11 @@ static struct ecryptfs_cache_info { | |||
651 | .name = "ecryptfs_lower_page_cache", | 651 | .name = "ecryptfs_lower_page_cache", |
652 | .size = PAGE_CACHE_SIZE, | 652 | .size = PAGE_CACHE_SIZE, |
653 | }, | 653 | }, |
654 | { | ||
655 | .cache = &ecryptfs_key_record_cache, | ||
656 | .name = "ecryptfs_key_record_cache", | ||
657 | .size = sizeof(struct ecryptfs_key_record), | ||
658 | }, | ||
654 | }; | 659 | }; |
655 | 660 | ||
656 | static void ecryptfs_free_kmem_caches(void) | 661 | static void ecryptfs_free_kmem_caches(void) |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 47d7e7b611f7..3baf253be95a 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -169,7 +169,8 @@ int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) | |||
169 | if (!new_id) { | 169 | if (!new_id) { |
170 | rc = -ENOMEM; | 170 | rc = -ENOMEM; |
171 | ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable " | 171 | ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable " |
172 | "to register daemon [%d] for user\n", pid, uid); | 172 | "to register daemon [%d] for user [%d]\n", |
173 | pid, uid); | ||
173 | goto unlock; | 174 | goto unlock; |
174 | } | 175 | } |
175 | if (!ecryptfs_find_daemon_id(uid, &old_id)) { | 176 | if (!ecryptfs_find_daemon_id(uid, &old_id)) { |
diff --git a/fs/namei.c b/fs/namei.c index 161e2225c757..ee60cc4d3453 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2688,10 +2688,11 @@ int __page_symlink(struct inode *inode, const char *symname, int len, | |||
2688 | { | 2688 | { |
2689 | struct address_space *mapping = inode->i_mapping; | 2689 | struct address_space *mapping = inode->i_mapping; |
2690 | struct page *page; | 2690 | struct page *page; |
2691 | int err = -ENOMEM; | 2691 | int err; |
2692 | char *kaddr; | 2692 | char *kaddr; |
2693 | 2693 | ||
2694 | retry: | 2694 | retry: |
2695 | err = -ENOMEM; | ||
2695 | page = find_or_create_page(mapping, 0, gfp_mask); | 2696 | page = find_or_create_page(mapping, 0, gfp_mask); |
2696 | if (!page) | 2697 | if (!page) |
2697 | goto fail; | 2698 | goto fail; |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 5d94555cdc83..832673b14587 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -61,9 +61,11 @@ | |||
61 | 61 | ||
62 | /* flags used to simulate posix default ACLs */ | 62 | /* flags used to simulate posix default ACLs */ |
63 | #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ | 63 | #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ |
64 | | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) | 64 | | NFS4_ACE_DIRECTORY_INHERIT_ACE) |
65 | 65 | ||
66 | #define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP) | 66 | #define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS \ |
67 | | NFS4_ACE_INHERIT_ONLY_ACE \ | ||
68 | | NFS4_ACE_IDENTIFIER_GROUP) | ||
67 | 69 | ||
68 | #define MASK_EQUAL(mask1, mask2) \ | 70 | #define MASK_EQUAL(mask1, mask2) \ |
69 | ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) | 71 | ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) |
@@ -87,12 +89,19 @@ mask_from_posix(unsigned short perm, unsigned int flags) | |||
87 | } | 89 | } |
88 | 90 | ||
89 | static u32 | 91 | static u32 |
90 | deny_mask(u32 allow_mask, unsigned int flags) | 92 | deny_mask_from_posix(unsigned short perm, u32 flags) |
91 | { | 93 | { |
92 | u32 ret = ~allow_mask & ~NFS4_MASK_UNSUPP; | 94 | u32 mask = 0; |
93 | if (!(flags & NFS4_ACL_DIR)) | 95 | |
94 | ret &= ~NFS4_ACE_DELETE_CHILD; | 96 | if (perm & ACL_READ) |
95 | return ret; | 97 | mask |= NFS4_READ_MODE; |
98 | if (perm & ACL_WRITE) | ||
99 | mask |= NFS4_WRITE_MODE; | ||
100 | if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR)) | ||
101 | mask |= NFS4_ACE_DELETE_CHILD; | ||
102 | if (perm & ACL_EXECUTE) | ||
103 | mask |= NFS4_EXECUTE_MODE; | ||
104 | return mask; | ||
96 | } | 105 | } |
97 | 106 | ||
98 | /* XXX: modify functions to return NFS errors; they're only ever | 107 | /* XXX: modify functions to return NFS errors; they're only ever |
@@ -126,108 +135,151 @@ struct ace_container { | |||
126 | }; | 135 | }; |
127 | 136 | ||
128 | static short ace2type(struct nfs4_ace *); | 137 | static short ace2type(struct nfs4_ace *); |
129 | static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); | 138 | static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, |
130 | static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); | 139 | unsigned int); |
131 | int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); | 140 | void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); |
132 | static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); | ||
133 | 141 | ||
134 | struct nfs4_acl * | 142 | struct nfs4_acl * |
135 | nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, | 143 | nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, |
136 | unsigned int flags) | 144 | unsigned int flags) |
137 | { | 145 | { |
138 | struct nfs4_acl *acl; | 146 | struct nfs4_acl *acl; |
139 | int error = -EINVAL; | 147 | int size = 0; |
140 | 148 | ||
141 | if ((pacl != NULL && | 149 | if (pacl) { |
142 | (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) || | 150 | if (posix_acl_valid(pacl) < 0) |
143 | (dpacl != NULL && | 151 | return ERR_PTR(-EINVAL); |
144 | (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0))) | 152 | size += 2*pacl->a_count; |
145 | goto out_err; | ||
146 | |||
147 | acl = nfs4_acl_new(); | ||
148 | if (acl == NULL) { | ||
149 | error = -ENOMEM; | ||
150 | goto out_err; | ||
151 | } | 153 | } |
152 | 154 | if (dpacl) { | |
153 | if (pacl != NULL) { | 155 | if (posix_acl_valid(dpacl) < 0) |
154 | error = _posix_to_nfsv4_one(pacl, acl, | 156 | return ERR_PTR(-EINVAL); |
155 | flags & ~NFS4_ACL_TYPE_DEFAULT); | 157 | size += 2*dpacl->a_count; |
156 | if (error < 0) | ||
157 | goto out_acl; | ||
158 | } | 158 | } |
159 | 159 | ||
160 | if (dpacl != NULL) { | 160 | /* Allocate for worst case: one (deny, allow) pair each: */ |
161 | error = _posix_to_nfsv4_one(dpacl, acl, | 161 | acl = nfs4_acl_new(size); |
162 | flags | NFS4_ACL_TYPE_DEFAULT); | 162 | if (acl == NULL) |
163 | if (error < 0) | 163 | return ERR_PTR(-ENOMEM); |
164 | goto out_acl; | ||
165 | } | ||
166 | 164 | ||
167 | return acl; | 165 | if (pacl) |
166 | _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); | ||
168 | 167 | ||
169 | out_acl: | 168 | if (dpacl) |
170 | nfs4_acl_free(acl); | 169 | _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); |
171 | out_err: | ||
172 | acl = ERR_PTR(error); | ||
173 | 170 | ||
174 | return acl; | 171 | return acl; |
175 | } | 172 | } |
176 | 173 | ||
177 | static int | 174 | struct posix_acl_summary { |
178 | nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype, | 175 | unsigned short owner; |
179 | uid_t owner, unsigned int flags) | 176 | unsigned short users; |
177 | unsigned short group; | ||
178 | unsigned short groups; | ||
179 | unsigned short other; | ||
180 | unsigned short mask; | ||
181 | }; | ||
182 | |||
183 | static void | ||
184 | summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) | ||
180 | { | 185 | { |
181 | int error; | 186 | struct posix_acl_entry *pa, *pe; |
182 | 187 | pas->users = 0; | |
183 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, | 188 | pas->groups = 0; |
184 | eflag, mask, whotype, owner); | 189 | pas->mask = 07; |
185 | if (error < 0) | 190 | |
186 | return error; | 191 | pe = acl->a_entries + acl->a_count; |
187 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, | 192 | |
188 | eflag, deny_mask(mask, flags), whotype, owner); | 193 | FOREACH_ACL_ENTRY(pa, acl, pe) { |
189 | return error; | 194 | switch (pa->e_tag) { |
195 | case ACL_USER_OBJ: | ||
196 | pas->owner = pa->e_perm; | ||
197 | break; | ||
198 | case ACL_GROUP_OBJ: | ||
199 | pas->group = pa->e_perm; | ||
200 | break; | ||
201 | case ACL_USER: | ||
202 | pas->users |= pa->e_perm; | ||
203 | break; | ||
204 | case ACL_GROUP: | ||
205 | pas->groups |= pa->e_perm; | ||
206 | break; | ||
207 | case ACL_OTHER: | ||
208 | pas->other = pa->e_perm; | ||
209 | break; | ||
210 | case ACL_MASK: | ||
211 | pas->mask = pa->e_perm; | ||
212 | break; | ||
213 | } | ||
214 | } | ||
215 | /* We'll only care about effective permissions: */ | ||
216 | pas->users &= pas->mask; | ||
217 | pas->group &= pas->mask; | ||
218 | pas->groups &= pas->mask; | ||
190 | } | 219 | } |
191 | 220 | ||
192 | /* We assume the acl has been verified with posix_acl_valid. */ | 221 | /* We assume the acl has been verified with posix_acl_valid. */ |
193 | static int | 222 | static void |
194 | _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, | 223 | _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, |
195 | unsigned int flags) | 224 | unsigned int flags) |
196 | { | 225 | { |
197 | struct posix_acl_entry *pa, *pe, *group_owner_entry; | 226 | struct posix_acl_entry *pa, *group_owner_entry; |
198 | int error = -EINVAL; | 227 | struct nfs4_ace *ace; |
199 | u32 mask, mask_mask; | 228 | struct posix_acl_summary pas; |
229 | unsigned short deny; | ||
200 | int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? | 230 | int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? |
201 | NFS4_INHERITANCE_FLAGS : 0); | 231 | NFS4_INHERITANCE_FLAGS : 0); |
202 | 232 | ||
203 | BUG_ON(pacl->a_count < 3); | 233 | BUG_ON(pacl->a_count < 3); |
204 | pe = pacl->a_entries + pacl->a_count; | 234 | summarize_posix_acl(pacl, &pas); |
205 | pa = pe - 2; /* if mask entry exists, it's second from the last. */ | ||
206 | if (pa->e_tag == ACL_MASK) | ||
207 | mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags); | ||
208 | else | ||
209 | mask_mask = 0; | ||
210 | 235 | ||
211 | pa = pacl->a_entries; | 236 | pa = pacl->a_entries; |
212 | BUG_ON(pa->e_tag != ACL_USER_OBJ); | 237 | ace = acl->aces + acl->naces; |
213 | mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); | ||
214 | error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags); | ||
215 | if (error < 0) | ||
216 | goto out; | ||
217 | pa++; | ||
218 | 238 | ||
219 | while (pa->e_tag == ACL_USER) { | 239 | /* We could deny everything not granted by the owner: */ |
220 | mask = mask_from_posix(pa->e_perm, flags); | 240 | deny = ~pas.owner; |
221 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, | 241 | /* |
222 | eflag, mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id); | 242 | * but it is equivalent (and simpler) to deny only what is not |
223 | if (error < 0) | 243 | * granted by later entries: |
224 | goto out; | 244 | */ |
245 | deny &= pas.users | pas.group | pas.groups | pas.other; | ||
246 | if (deny) { | ||
247 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; | ||
248 | ace->flag = eflag; | ||
249 | ace->access_mask = deny_mask_from_posix(deny, flags); | ||
250 | ace->whotype = NFS4_ACL_WHO_OWNER; | ||
251 | ace++; | ||
252 | acl->naces++; | ||
253 | } | ||
225 | 254 | ||
255 | ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; | ||
256 | ace->flag = eflag; | ||
257 | ace->access_mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); | ||
258 | ace->whotype = NFS4_ACL_WHO_OWNER; | ||
259 | ace++; | ||
260 | acl->naces++; | ||
261 | pa++; | ||
226 | 262 | ||
227 | error = nfs4_acl_add_pair(acl, eflag, mask, | 263 | while (pa->e_tag == ACL_USER) { |
228 | NFS4_ACL_WHO_NAMED, pa->e_id, flags); | 264 | deny = ~(pa->e_perm & pas.mask); |
229 | if (error < 0) | 265 | deny &= pas.groups | pas.group | pas.other; |
230 | goto out; | 266 | if (deny) { |
267 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; | ||
268 | ace->flag = eflag; | ||
269 | ace->access_mask = deny_mask_from_posix(deny, flags); | ||
270 | ace->whotype = NFS4_ACL_WHO_NAMED; | ||
271 | ace->who = pa->e_id; | ||
272 | ace++; | ||
273 | acl->naces++; | ||
274 | } | ||
275 | ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; | ||
276 | ace->flag = eflag; | ||
277 | ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, | ||
278 | flags); | ||
279 | ace->whotype = NFS4_ACL_WHO_NAMED; | ||
280 | ace->who = pa->e_id; | ||
281 | ace++; | ||
282 | acl->naces++; | ||
231 | pa++; | 283 | pa++; |
232 | } | 284 | } |
233 | 285 | ||
@@ -236,67 +288,65 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, | |||
236 | 288 | ||
237 | /* allow ACEs */ | 289 | /* allow ACEs */ |
238 | 290 | ||
239 | if (pacl->a_count > 3) { | ||
240 | BUG_ON(pa->e_tag != ACL_GROUP_OBJ); | ||
241 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, | ||
242 | NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, | ||
243 | NFS4_ACL_WHO_GROUP, 0); | ||
244 | if (error < 0) | ||
245 | goto out; | ||
246 | } | ||
247 | group_owner_entry = pa; | 291 | group_owner_entry = pa; |
248 | mask = mask_from_posix(pa->e_perm, flags); | 292 | |
249 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, | 293 | ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; |
250 | NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, | 294 | ace->flag = eflag; |
251 | NFS4_ACL_WHO_GROUP, 0); | 295 | ace->access_mask = mask_from_posix(pas.group, flags); |
252 | if (error < 0) | 296 | ace->whotype = NFS4_ACL_WHO_GROUP; |
253 | goto out; | 297 | ace++; |
298 | acl->naces++; | ||
254 | pa++; | 299 | pa++; |
255 | 300 | ||
256 | while (pa->e_tag == ACL_GROUP) { | 301 | while (pa->e_tag == ACL_GROUP) { |
257 | mask = mask_from_posix(pa->e_perm, flags); | 302 | ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; |
258 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, | 303 | ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; |
259 | NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, | 304 | ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, |
260 | NFS4_ACL_WHO_NAMED, pa->e_id); | 305 | flags); |
261 | if (error < 0) | 306 | ace->whotype = NFS4_ACL_WHO_NAMED; |
262 | goto out; | 307 | ace->who = pa->e_id; |
263 | 308 | ace++; | |
264 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, | 309 | acl->naces++; |
265 | NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, | ||
266 | NFS4_ACL_WHO_NAMED, pa->e_id); | ||
267 | if (error < 0) | ||
268 | goto out; | ||
269 | pa++; | 310 | pa++; |
270 | } | 311 | } |
271 | 312 | ||
272 | /* deny ACEs */ | 313 | /* deny ACEs */ |
273 | 314 | ||
274 | pa = group_owner_entry; | 315 | pa = group_owner_entry; |
275 | mask = mask_from_posix(pa->e_perm, flags); | 316 | |
276 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, | 317 | deny = ~pas.group & pas.other; |
277 | NFS4_ACE_IDENTIFIER_GROUP | eflag, | 318 | if (deny) { |
278 | deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0); | 319 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; |
279 | if (error < 0) | 320 | ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; |
280 | goto out; | 321 | ace->access_mask = deny_mask_from_posix(deny, flags); |
322 | ace->whotype = NFS4_ACL_WHO_GROUP; | ||
323 | ace++; | ||
324 | acl->naces++; | ||
325 | } | ||
281 | pa++; | 326 | pa++; |
327 | |||
282 | while (pa->e_tag == ACL_GROUP) { | 328 | while (pa->e_tag == ACL_GROUP) { |
283 | mask = mask_from_posix(pa->e_perm, flags); | 329 | deny = ~(pa->e_perm & pas.mask); |
284 | error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, | 330 | deny &= pas.other; |
285 | NFS4_ACE_IDENTIFIER_GROUP | eflag, | 331 | if (deny) { |
286 | deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id); | 332 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; |
287 | if (error < 0) | 333 | ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; |
288 | goto out; | 334 | ace->access_mask = mask_from_posix(deny, flags); |
335 | ace->whotype = NFS4_ACL_WHO_NAMED; | ||
336 | ace->who = pa->e_id; | ||
337 | ace++; | ||
338 | acl->naces++; | ||
339 | } | ||
289 | pa++; | 340 | pa++; |
290 | } | 341 | } |
291 | 342 | ||
292 | if (pa->e_tag == ACL_MASK) | 343 | if (pa->e_tag == ACL_MASK) |
293 | pa++; | 344 | pa++; |
294 | BUG_ON(pa->e_tag != ACL_OTHER); | 345 | ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE; |
295 | mask = mask_from_posix(pa->e_perm, flags); | 346 | ace->flag = eflag; |
296 | error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags); | 347 | ace->access_mask = mask_from_posix(pa->e_perm, flags); |
297 | 348 | ace->whotype = NFS4_ACL_WHO_EVERYONE; | |
298 | out: | 349 | acl->naces++; |
299 | return error; | ||
300 | } | 350 | } |
301 | 351 | ||
302 | static void | 352 | static void |
@@ -342,46 +392,6 @@ sort_pacl(struct posix_acl *pacl) | |||
342 | return; | 392 | return; |
343 | } | 393 | } |
344 | 394 | ||
345 | int | ||
346 | nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, | ||
347 | struct posix_acl **dpacl, unsigned int flags) | ||
348 | { | ||
349 | struct nfs4_acl *dacl; | ||
350 | int error = -ENOMEM; | ||
351 | |||
352 | *pacl = NULL; | ||
353 | *dpacl = NULL; | ||
354 | |||
355 | dacl = nfs4_acl_new(); | ||
356 | if (dacl == NULL) | ||
357 | goto out; | ||
358 | |||
359 | error = nfs4_acl_split(acl, dacl); | ||
360 | if (error) | ||
361 | goto out_acl; | ||
362 | |||
363 | *pacl = _nfsv4_to_posix_one(acl, flags); | ||
364 | if (IS_ERR(*pacl)) { | ||
365 | error = PTR_ERR(*pacl); | ||
366 | *pacl = NULL; | ||
367 | goto out_acl; | ||
368 | } | ||
369 | |||
370 | *dpacl = _nfsv4_to_posix_one(dacl, flags); | ||
371 | if (IS_ERR(*dpacl)) { | ||
372 | error = PTR_ERR(*dpacl); | ||
373 | *dpacl = NULL; | ||
374 | } | ||
375 | out_acl: | ||
376 | if (error) { | ||
377 | posix_acl_release(*pacl); | ||
378 | *pacl = NULL; | ||
379 | } | ||
380 | nfs4_acl_free(dacl); | ||
381 | out: | ||
382 | return error; | ||
383 | } | ||
384 | |||
385 | /* | 395 | /* |
386 | * While processing the NFSv4 ACE, this maintains bitmasks representing | 396 | * While processing the NFSv4 ACE, this maintains bitmasks representing |
387 | * which permission bits have been allowed and which denied to a given | 397 | * which permission bits have been allowed and which denied to a given |
@@ -406,6 +416,7 @@ struct posix_ace_state_array { | |||
406 | * calculated so far: */ | 416 | * calculated so far: */ |
407 | 417 | ||
408 | struct posix_acl_state { | 418 | struct posix_acl_state { |
419 | int empty; | ||
409 | struct posix_ace_state owner; | 420 | struct posix_ace_state owner; |
410 | struct posix_ace_state group; | 421 | struct posix_ace_state group; |
411 | struct posix_ace_state other; | 422 | struct posix_ace_state other; |
@@ -421,6 +432,7 @@ init_state(struct posix_acl_state *state, int cnt) | |||
421 | int alloc; | 432 | int alloc; |
422 | 433 | ||
423 | memset(state, 0, sizeof(struct posix_acl_state)); | 434 | memset(state, 0, sizeof(struct posix_acl_state)); |
435 | state->empty = 1; | ||
424 | /* | 436 | /* |
425 | * In the worst case, each individual acl could be for a distinct | 437 | * In the worst case, each individual acl could be for a distinct |
426 | * named user or group, but we don't no which, so we allocate | 438 | * named user or group, but we don't no which, so we allocate |
@@ -488,6 +500,20 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) | |||
488 | int nace; | 500 | int nace; |
489 | int i, error = 0; | 501 | int i, error = 0; |
490 | 502 | ||
503 | /* | ||
504 | * ACLs with no ACEs are treated differently in the inheritable | ||
505 | * and effective cases: when there are no inheritable ACEs, we | ||
506 | * set a zero-length default posix acl: | ||
507 | */ | ||
508 | if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { | ||
509 | pacl = posix_acl_alloc(0, GFP_KERNEL); | ||
510 | return pacl ? pacl : ERR_PTR(-ENOMEM); | ||
511 | } | ||
512 | /* | ||
513 | * When there are no effective ACEs, the following will end | ||
514 | * up setting a 3-element effective posix ACL with all | ||
515 | * permissions zero. | ||
516 | */ | ||
491 | nace = 4 + state->users->n + state->groups->n; | 517 | nace = 4 + state->users->n + state->groups->n; |
492 | pacl = posix_acl_alloc(nace, GFP_KERNEL); | 518 | pacl = posix_acl_alloc(nace, GFP_KERNEL); |
493 | if (!pacl) | 519 | if (!pacl) |
@@ -603,6 +629,8 @@ static void process_one_v4_ace(struct posix_acl_state *state, | |||
603 | u32 mask = ace->access_mask; | 629 | u32 mask = ace->access_mask; |
604 | int i; | 630 | int i; |
605 | 631 | ||
632 | state->empty = 0; | ||
633 | |||
606 | switch (ace2type(ace)) { | 634 | switch (ace2type(ace)) { |
607 | case ACL_USER_OBJ: | 635 | case ACL_USER_OBJ: |
608 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { | 636 | if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { |
@@ -666,75 +694,62 @@ static void process_one_v4_ace(struct posix_acl_state *state, | |||
666 | } | 694 | } |
667 | } | 695 | } |
668 | 696 | ||
669 | static struct posix_acl * | 697 | int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, |
670 | _nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) | 698 | struct posix_acl **dpacl, unsigned int flags) |
671 | { | 699 | { |
672 | struct posix_acl_state state; | 700 | struct posix_acl_state effective_acl_state, default_acl_state; |
673 | struct posix_acl *pacl; | ||
674 | struct nfs4_ace *ace; | 701 | struct nfs4_ace *ace; |
675 | int ret; | 702 | int ret; |
676 | 703 | ||
677 | ret = init_state(&state, n4acl->naces); | 704 | ret = init_state(&effective_acl_state, acl->naces); |
678 | if (ret) | 705 | if (ret) |
679 | return ERR_PTR(ret); | 706 | return ret; |
680 | 707 | ret = init_state(&default_acl_state, acl->naces); | |
681 | list_for_each_entry(ace, &n4acl->ace_head, l_ace) | 708 | if (ret) |
682 | process_one_v4_ace(&state, ace); | 709 | goto out_estate; |
683 | 710 | ret = -EINVAL; | |
684 | pacl = posix_state_to_acl(&state, flags); | 711 | for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { |
685 | |||
686 | free_state(&state); | ||
687 | |||
688 | if (!IS_ERR(pacl)) | ||
689 | sort_pacl(pacl); | ||
690 | return pacl; | ||
691 | } | ||
692 | |||
693 | static int | ||
694 | nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) | ||
695 | { | ||
696 | struct list_head *h, *n; | ||
697 | struct nfs4_ace *ace; | ||
698 | int error = 0; | ||
699 | |||
700 | list_for_each_safe(h, n, &acl->ace_head) { | ||
701 | ace = list_entry(h, struct nfs4_ace, l_ace); | ||
702 | |||
703 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && | 712 | if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && |
704 | ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) | 713 | ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) |
705 | return -EINVAL; | 714 | goto out_dstate; |
706 | |||
707 | if (ace->flag & ~NFS4_SUPPORTED_FLAGS) | 715 | if (ace->flag & ~NFS4_SUPPORTED_FLAGS) |
708 | return -EINVAL; | 716 | goto out_dstate; |
709 | 717 | if ((ace->flag & NFS4_INHERITANCE_FLAGS) == 0) { | |
710 | switch (ace->flag & NFS4_INHERITANCE_FLAGS) { | 718 | process_one_v4_ace(&effective_acl_state, ace); |
711 | case 0: | ||
712 | /* Leave this ace in the effective acl: */ | ||
713 | continue; | 719 | continue; |
714 | case NFS4_INHERITANCE_FLAGS: | ||
715 | /* Add this ace to the default acl and remove it | ||
716 | * from the effective acl: */ | ||
717 | error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, | ||
718 | ace->access_mask, ace->whotype, ace->who); | ||
719 | if (error) | ||
720 | return error; | ||
721 | list_del(h); | ||
722 | kfree(ace); | ||
723 | acl->naces--; | ||
724 | break; | ||
725 | case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE: | ||
726 | /* Add this ace to the default, but leave it in | ||
727 | * the effective acl as well: */ | ||
728 | error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, | ||
729 | ace->access_mask, ace->whotype, ace->who); | ||
730 | if (error) | ||
731 | return error; | ||
732 | break; | ||
733 | default: | ||
734 | return -EINVAL; | ||
735 | } | 720 | } |
721 | if (!(flags & NFS4_ACL_DIR)) | ||
722 | goto out_dstate; | ||
723 | /* | ||
724 | * Note that when only one of FILE_INHERIT or DIRECTORY_INHERIT | ||
725 | * is set, we're effectively turning on the other. That's OK, | ||
726 | * according to rfc 3530. | ||
727 | */ | ||
728 | process_one_v4_ace(&default_acl_state, ace); | ||
729 | |||
730 | if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE)) | ||
731 | process_one_v4_ace(&effective_acl_state, ace); | ||
736 | } | 732 | } |
737 | return 0; | 733 | *pacl = posix_state_to_acl(&effective_acl_state, flags); |
734 | if (IS_ERR(*pacl)) { | ||
735 | ret = PTR_ERR(*pacl); | ||
736 | goto out_dstate; | ||
737 | } | ||
738 | *dpacl = posix_state_to_acl(&default_acl_state, | ||
739 | flags | NFS4_ACL_TYPE_DEFAULT); | ||
740 | if (IS_ERR(*dpacl)) { | ||
741 | ret = PTR_ERR(*dpacl); | ||
742 | posix_acl_release(*pacl); | ||
743 | goto out_dstate; | ||
744 | } | ||
745 | sort_pacl(*pacl); | ||
746 | sort_pacl(*dpacl); | ||
747 | ret = 0; | ||
748 | out_dstate: | ||
749 | free_state(&default_acl_state); | ||
750 | out_estate: | ||
751 | free_state(&effective_acl_state); | ||
752 | return ret; | ||
738 | } | 753 | } |
739 | 754 | ||
740 | static short | 755 | static short |
@@ -759,48 +774,22 @@ EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); | |||
759 | EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); | 774 | EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); |
760 | 775 | ||
761 | struct nfs4_acl * | 776 | struct nfs4_acl * |
762 | nfs4_acl_new(void) | 777 | nfs4_acl_new(int n) |
763 | { | 778 | { |
764 | struct nfs4_acl *acl; | 779 | struct nfs4_acl *acl; |
765 | 780 | ||
766 | if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL) | 781 | acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL); |
782 | if (acl == NULL) | ||
767 | return NULL; | 783 | return NULL; |
768 | |||
769 | acl->naces = 0; | 784 | acl->naces = 0; |
770 | INIT_LIST_HEAD(&acl->ace_head); | ||
771 | |||
772 | return acl; | 785 | return acl; |
773 | } | 786 | } |
774 | 787 | ||
775 | void | 788 | void |
776 | nfs4_acl_free(struct nfs4_acl *acl) | ||
777 | { | ||
778 | struct list_head *h; | ||
779 | struct nfs4_ace *ace; | ||
780 | |||
781 | if (!acl) | ||
782 | return; | ||
783 | |||
784 | while (!list_empty(&acl->ace_head)) { | ||
785 | h = acl->ace_head.next; | ||
786 | list_del(h); | ||
787 | ace = list_entry(h, struct nfs4_ace, l_ace); | ||
788 | kfree(ace); | ||
789 | } | ||
790 | |||
791 | kfree(acl); | ||
792 | |||
793 | return; | ||
794 | } | ||
795 | |||
796 | int | ||
797 | nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, | 789 | nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, |
798 | int whotype, uid_t who) | 790 | int whotype, uid_t who) |
799 | { | 791 | { |
800 | struct nfs4_ace *ace; | 792 | struct nfs4_ace *ace = acl->aces + acl->naces; |
801 | |||
802 | if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) | ||
803 | return -ENOMEM; | ||
804 | 793 | ||
805 | ace->type = type; | 794 | ace->type = type; |
806 | ace->flag = flag; | 795 | ace->flag = flag; |
@@ -808,10 +797,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, | |||
808 | ace->whotype = whotype; | 797 | ace->whotype = whotype; |
809 | ace->who = who; | 798 | ace->who = who; |
810 | 799 | ||
811 | list_add_tail(&ace->l_ace, &acl->ace_head); | ||
812 | acl->naces++; | 800 | acl->naces++; |
813 | |||
814 | return 0; | ||
815 | } | 801 | } |
816 | 802 | ||
817 | static struct { | 803 | static struct { |
@@ -865,7 +851,6 @@ nfs4_acl_write_who(int who, char *p) | |||
865 | } | 851 | } |
866 | 852 | ||
867 | EXPORT_SYMBOL(nfs4_acl_new); | 853 | EXPORT_SYMBOL(nfs4_acl_new); |
868 | EXPORT_SYMBOL(nfs4_acl_free); | ||
869 | EXPORT_SYMBOL(nfs4_acl_add_ace); | 854 | EXPORT_SYMBOL(nfs4_acl_add_ace); |
870 | EXPORT_SYMBOL(nfs4_acl_get_whotype); | 855 | EXPORT_SYMBOL(nfs4_acl_get_whotype); |
871 | EXPORT_SYMBOL(nfs4_acl_write_who); | 856 | EXPORT_SYMBOL(nfs4_acl_write_who); |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f57655a7a2b6..fb14d68eacab 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -387,7 +387,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
387 | .address = (struct sockaddr *)&addr, | 387 | .address = (struct sockaddr *)&addr, |
388 | .addrsize = sizeof(addr), | 388 | .addrsize = sizeof(addr), |
389 | .timeout = &timeparms, | 389 | .timeout = &timeparms, |
390 | .servername = clp->cl_name.data, | ||
391 | .program = program, | 390 | .program = program, |
392 | .version = nfs_cb_version[1]->number, | 391 | .version = nfs_cb_version[1]->number, |
393 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 392 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
@@ -397,6 +396,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
397 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | 396 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], |
398 | .rpc_argp = clp, | 397 | .rpc_argp = clp, |
399 | }; | 398 | }; |
399 | char clientname[16]; | ||
400 | int status; | 400 | int status; |
401 | 401 | ||
402 | if (atomic_read(&cb->cb_set)) | 402 | if (atomic_read(&cb->cb_set)) |
@@ -419,6 +419,11 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
419 | memset(program->stats, 0, sizeof(cb->cb_stat)); | 419 | memset(program->stats, 0, sizeof(cb->cb_stat)); |
420 | program->stats->program = program; | 420 | program->stats->program = program; |
421 | 421 | ||
422 | /* Just here to make some printk's more useful: */ | ||
423 | snprintf(clientname, sizeof(clientname), | ||
424 | "%u.%u.%u.%u", NIPQUAD(addr.sin_addr)); | ||
425 | args.servername = clientname; | ||
426 | |||
422 | /* Create RPC client */ | 427 | /* Create RPC client */ |
423 | cb->cb_client = rpc_create(&args); | 428 | cb->cb_client = rpc_create(&args); |
424 | if (IS_ERR(cb->cb_client)) { | 429 | if (IS_ERR(cb->cb_client)) { |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0efba557fb55..5d090f11f2be 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -199,24 +199,22 @@ defer_free(struct nfsd4_compoundargs *argp, | |||
199 | 199 | ||
200 | static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) | 200 | static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) |
201 | { | 201 | { |
202 | void *new = NULL; | ||
203 | if (p == argp->tmp) { | 202 | if (p == argp->tmp) { |
204 | new = kmalloc(nbytes, GFP_KERNEL); | 203 | p = kmalloc(nbytes, GFP_KERNEL); |
205 | if (!new) return NULL; | 204 | if (!p) |
206 | p = new; | 205 | return NULL; |
207 | memcpy(p, argp->tmp, nbytes); | 206 | memcpy(p, argp->tmp, nbytes); |
208 | } else { | 207 | } else { |
209 | BUG_ON(p != argp->tmpp); | 208 | BUG_ON(p != argp->tmpp); |
210 | argp->tmpp = NULL; | 209 | argp->tmpp = NULL; |
211 | } | 210 | } |
212 | if (defer_free(argp, kfree, p)) { | 211 | if (defer_free(argp, kfree, p)) { |
213 | kfree(new); | 212 | kfree(p); |
214 | return NULL; | 213 | return NULL; |
215 | } else | 214 | } else |
216 | return (char *)p; | 215 | return (char *)p; |
217 | } | 216 | } |
218 | 217 | ||
219 | |||
220 | static __be32 | 218 | static __be32 |
221 | nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | 219 | nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) |
222 | { | 220 | { |
@@ -255,7 +253,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia | |||
255 | return status; | 253 | return status; |
256 | 254 | ||
257 | /* | 255 | /* |
258 | * According to spec, unsupported attributes return ERR_NOTSUPP; | 256 | * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; |
259 | * read-only attributes return ERR_INVAL. | 257 | * read-only attributes return ERR_INVAL. |
260 | */ | 258 | */ |
261 | if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) | 259 | if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) |
@@ -273,42 +271,42 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia | |||
273 | iattr->ia_valid |= ATTR_SIZE; | 271 | iattr->ia_valid |= ATTR_SIZE; |
274 | } | 272 | } |
275 | if (bmval[0] & FATTR4_WORD0_ACL) { | 273 | if (bmval[0] & FATTR4_WORD0_ACL) { |
276 | int nace, i; | 274 | int nace; |
277 | struct nfs4_ace ace; | 275 | struct nfs4_ace *ace; |
278 | 276 | ||
279 | READ_BUF(4); len += 4; | 277 | READ_BUF(4); len += 4; |
280 | READ32(nace); | 278 | READ32(nace); |
281 | 279 | ||
282 | *acl = nfs4_acl_new(); | 280 | if (nace > NFS4_ACL_MAX) |
281 | return nfserr_resource; | ||
282 | |||
283 | *acl = nfs4_acl_new(nace); | ||
283 | if (*acl == NULL) { | 284 | if (*acl == NULL) { |
284 | host_err = -ENOMEM; | 285 | host_err = -ENOMEM; |
285 | goto out_nfserr; | 286 | goto out_nfserr; |
286 | } | 287 | } |
287 | defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl); | 288 | defer_free(argp, kfree, *acl); |
288 | 289 | ||
289 | for (i = 0; i < nace; i++) { | 290 | (*acl)->naces = nace; |
291 | for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { | ||
290 | READ_BUF(16); len += 16; | 292 | READ_BUF(16); len += 16; |
291 | READ32(ace.type); | 293 | READ32(ace->type); |
292 | READ32(ace.flag); | 294 | READ32(ace->flag); |
293 | READ32(ace.access_mask); | 295 | READ32(ace->access_mask); |
294 | READ32(dummy32); | 296 | READ32(dummy32); |
295 | READ_BUF(dummy32); | 297 | READ_BUF(dummy32); |
296 | len += XDR_QUADLEN(dummy32) << 2; | 298 | len += XDR_QUADLEN(dummy32) << 2; |
297 | READMEM(buf, dummy32); | 299 | READMEM(buf, dummy32); |
298 | ace.whotype = nfs4_acl_get_whotype(buf, dummy32); | 300 | ace->whotype = nfs4_acl_get_whotype(buf, dummy32); |
299 | host_err = 0; | 301 | host_err = 0; |
300 | if (ace.whotype != NFS4_ACL_WHO_NAMED) | 302 | if (ace->whotype != NFS4_ACL_WHO_NAMED) |
301 | ace.who = 0; | 303 | ace->who = 0; |
302 | else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP) | 304 | else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) |
303 | host_err = nfsd_map_name_to_gid(argp->rqstp, | 305 | host_err = nfsd_map_name_to_gid(argp->rqstp, |
304 | buf, dummy32, &ace.who); | 306 | buf, dummy32, &ace->who); |
305 | else | 307 | else |
306 | host_err = nfsd_map_name_to_uid(argp->rqstp, | 308 | host_err = nfsd_map_name_to_uid(argp->rqstp, |
307 | buf, dummy32, &ace.who); | 309 | buf, dummy32, &ace->who); |
308 | if (host_err) | ||
309 | goto out_nfserr; | ||
310 | host_err = nfs4_acl_add_ace(*acl, ace.type, ace.flag, | ||
311 | ace.access_mask, ace.whotype, ace.who); | ||
312 | if (host_err) | 310 | if (host_err) |
313 | goto out_nfserr; | 311 | goto out_nfserr; |
314 | } | 312 | } |
@@ -1596,7 +1594,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1596 | } | 1594 | } |
1597 | if (bmval0 & FATTR4_WORD0_ACL) { | 1595 | if (bmval0 & FATTR4_WORD0_ACL) { |
1598 | struct nfs4_ace *ace; | 1596 | struct nfs4_ace *ace; |
1599 | struct list_head *h; | ||
1600 | 1597 | ||
1601 | if (acl == NULL) { | 1598 | if (acl == NULL) { |
1602 | if ((buflen -= 4) < 0) | 1599 | if ((buflen -= 4) < 0) |
@@ -1609,9 +1606,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1609 | goto out_resource; | 1606 | goto out_resource; |
1610 | WRITE32(acl->naces); | 1607 | WRITE32(acl->naces); |
1611 | 1608 | ||
1612 | list_for_each(h, &acl->ace_head) { | 1609 | for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { |
1613 | ace = list_entry(h, struct nfs4_ace, l_ace); | ||
1614 | |||
1615 | if ((buflen -= 4*3) < 0) | 1610 | if ((buflen -= 4*3) < 0) |
1616 | goto out_resource; | 1611 | goto out_resource; |
1617 | WRITE32(ace->type); | 1612 | WRITE32(ace->type); |
@@ -1821,7 +1816,7 @@ out_acl: | |||
1821 | status = nfs_ok; | 1816 | status = nfs_ok; |
1822 | 1817 | ||
1823 | out: | 1818 | out: |
1824 | nfs4_acl_free(acl); | 1819 | kfree(acl); |
1825 | if (fhp == &tempfh) | 1820 | if (fhp == &tempfh) |
1826 | fh_put(&tempfh); | 1821 | fh_put(&tempfh); |
1827 | return status; | 1822 | return status; |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8283236c6a0f..7e6aa245b5d5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -466,7 +466,10 @@ out: | |||
466 | posix_acl_release(dpacl); | 466 | posix_acl_release(dpacl); |
467 | return (error); | 467 | return (error); |
468 | out_nfserr: | 468 | out_nfserr: |
469 | error = nfserrno(host_error); | 469 | if (host_error == -EOPNOTSUPP) |
470 | error = nfserr_attrnotsupp; | ||
471 | else | ||
472 | error = nfserrno(host_error); | ||
470 | goto out; | 473 | goto out; |
471 | } | 474 | } |
472 | 475 | ||
diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 7798d2a9f793..916c0102db5b 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h | |||
@@ -79,6 +79,7 @@ struct acpi_processor_power { | |||
79 | u32 bm_activity; | 79 | u32 bm_activity; |
80 | int count; | 80 | int count; |
81 | struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER]; | 81 | struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER]; |
82 | int timer_broadcast_on_state; | ||
82 | }; | 83 | }; |
83 | 84 | ||
84 | /* Performance Management */ | 85 | /* Performance Management */ |
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index 3a61206fd108..cc6b1652249a 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h | |||
@@ -95,9 +95,7 @@ static inline void ack_APIC_irq(void) | |||
95 | apic_write_around(APIC_EOI, 0); | 95 | apic_write_around(APIC_EOI, 0); |
96 | } | 96 | } |
97 | 97 | ||
98 | extern void (*wait_timer_tick)(void); | 98 | extern int lapic_get_maxlvt(void); |
99 | |||
100 | extern int get_maxlvt(void); | ||
101 | extern void clear_local_APIC(void); | 99 | extern void clear_local_APIC(void); |
102 | extern void connect_bsp_APIC (void); | 100 | extern void connect_bsp_APIC (void); |
103 | extern void disconnect_bsp_APIC (int virt_wire_setup); | 101 | extern void disconnect_bsp_APIC (int virt_wire_setup); |
@@ -113,14 +111,9 @@ extern void smp_local_timer_interrupt (void); | |||
113 | extern void setup_boot_APIC_clock (void); | 111 | extern void setup_boot_APIC_clock (void); |
114 | extern void setup_secondary_APIC_clock (void); | 112 | extern void setup_secondary_APIC_clock (void); |
115 | extern int APIC_init_uniprocessor (void); | 113 | extern int APIC_init_uniprocessor (void); |
116 | extern void disable_APIC_timer(void); | ||
117 | extern void enable_APIC_timer(void); | ||
118 | 114 | ||
119 | extern void enable_NMI_through_LVT0 (void * dummy); | 115 | extern void enable_NMI_through_LVT0 (void * dummy); |
120 | 116 | ||
121 | void smp_send_timer_broadcast_ipi(void); | ||
122 | void switch_APIC_timer_to_ipi(void *cpumask); | ||
123 | void switch_ipi_to_APIC_timer(void *cpumask); | ||
124 | #define ARCH_APICTIMER_STOPS_ON_C3 1 | 117 | #define ARCH_APICTIMER_STOPS_ON_C3 1 |
125 | 118 | ||
126 | extern int timer_over_8254; | 119 | extern int timer_over_8254; |
diff --git a/include/asm-i386/hpet.h b/include/asm-i386/hpet.h index e47be9a56cc2..fc03cf9de5c4 100644 --- a/include/asm-i386/hpet.h +++ b/include/asm-i386/hpet.h | |||
@@ -90,16 +90,19 @@ | |||
90 | #define HPET_MIN_PERIOD (100000UL) | 90 | #define HPET_MIN_PERIOD (100000UL) |
91 | #define HPET_TICK_RATE (HZ * 100000UL) | 91 | #define HPET_TICK_RATE (HZ * 100000UL) |
92 | 92 | ||
93 | extern unsigned long hpet_tick; /* hpet clks count per tick */ | ||
94 | extern unsigned long hpet_address; /* hpet memory map physical address */ | 93 | extern unsigned long hpet_address; /* hpet memory map physical address */ |
95 | extern int hpet_use_timer; | 94 | extern int is_hpet_enabled(void); |
96 | 95 | ||
96 | #ifdef CONFIG_X86_64 | ||
97 | extern unsigned long hpet_tick; /* hpet clks count per tick */ | ||
98 | extern int hpet_use_timer; | ||
97 | extern int hpet_rtc_timer_init(void); | 99 | extern int hpet_rtc_timer_init(void); |
98 | extern int hpet_enable(void); | 100 | extern int hpet_enable(void); |
99 | extern int hpet_reenable(void); | ||
100 | extern int is_hpet_enabled(void); | ||
101 | extern int is_hpet_capable(void); | 101 | extern int is_hpet_capable(void); |
102 | extern int hpet_readl(unsigned long a); | 102 | extern int hpet_readl(unsigned long a); |
103 | #else | ||
104 | extern int hpet_enable(void); | ||
105 | #endif | ||
103 | 106 | ||
104 | #ifdef CONFIG_HPET_EMULATE_RTC | 107 | #ifdef CONFIG_HPET_EMULATE_RTC |
105 | extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); | 108 | extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); |
@@ -110,5 +113,10 @@ extern int hpet_rtc_dropped_irq(void); | |||
110 | extern int hpet_rtc_timer_init(void); | 113 | extern int hpet_rtc_timer_init(void); |
111 | extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); | 114 | extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); |
112 | #endif /* CONFIG_HPET_EMULATE_RTC */ | 115 | #endif /* CONFIG_HPET_EMULATE_RTC */ |
116 | |||
117 | #else | ||
118 | |||
119 | static inline int hpet_enable(void) { return 0; } | ||
120 | |||
113 | #endif /* CONFIG_HPET_TIMER */ | 121 | #endif /* CONFIG_HPET_TIMER */ |
114 | #endif /* _I386_HPET_H */ | 122 | #endif /* _I386_HPET_H */ |
diff --git a/include/asm-i386/i8253.h b/include/asm-i386/i8253.h index 015d8df07690..6cb0dd4dcdde 100644 --- a/include/asm-i386/i8253.h +++ b/include/asm-i386/i8253.h | |||
@@ -1,6 +1,21 @@ | |||
1 | #ifndef __ASM_I8253_H__ | 1 | #ifndef __ASM_I8253_H__ |
2 | #define __ASM_I8253_H__ | 2 | #define __ASM_I8253_H__ |
3 | 3 | ||
4 | #include <linux/clockchips.h> | ||
5 | |||
4 | extern spinlock_t i8253_lock; | 6 | extern spinlock_t i8253_lock; |
5 | 7 | ||
8 | extern struct clock_event_device *global_clock_event; | ||
9 | |||
10 | /** | ||
11 | * pit_interrupt_hook - hook into timer tick | ||
12 | * @regs: standard registers from interrupt | ||
13 | * | ||
14 | * Call the global clock event handler. | ||
15 | **/ | ||
16 | static inline void pit_interrupt_hook(void) | ||
17 | { | ||
18 | global_clock_event->event_handler(global_clock_event); | ||
19 | } | ||
20 | |||
6 | #endif /* __ASM_I8253_H__ */ | 21 | #endif /* __ASM_I8253_H__ */ |
diff --git a/include/asm-i386/mach-default/do_timer.h b/include/asm-i386/mach-default/do_timer.h index 7d606e3364ae..56e5689863ae 100644 --- a/include/asm-i386/mach-default/do_timer.h +++ b/include/asm-i386/mach-default/do_timer.h | |||
@@ -1,86 +1,16 @@ | |||
1 | /* defines for inline arch setup functions */ | 1 | /* defines for inline arch setup functions */ |
2 | #include <linux/clockchips.h> | ||
2 | 3 | ||
3 | #include <asm/apic.h> | ||
4 | #include <asm/i8259.h> | 4 | #include <asm/i8259.h> |
5 | #include <asm/i8253.h> | ||
5 | 6 | ||
6 | /** | 7 | /** |
7 | * do_timer_interrupt_hook - hook into timer tick | 8 | * do_timer_interrupt_hook - hook into timer tick |
8 | * @regs: standard registers from interrupt | ||
9 | * | 9 | * |
10 | * Description: | 10 | * Call the pit clock event handler. see asm/i8253.h |
11 | * This hook is called immediately after the timer interrupt is ack'd. | ||
12 | * It's primary purpose is to allow architectures that don't possess | ||
13 | * individual per CPU clocks (like the CPU APICs supply) to broadcast the | ||
14 | * timer interrupt as a means of triggering reschedules etc. | ||
15 | **/ | 11 | **/ |
16 | 12 | ||
17 | static inline void do_timer_interrupt_hook(void) | 13 | static inline void do_timer_interrupt_hook(void) |
18 | { | 14 | { |
19 | do_timer(1); | 15 | pit_interrupt_hook(); |
20 | #ifndef CONFIG_SMP | ||
21 | update_process_times(user_mode_vm(get_irq_regs())); | ||
22 | #endif | ||
23 | /* | ||
24 | * In the SMP case we use the local APIC timer interrupt to do the | ||
25 | * profiling, except when we simulate SMP mode on a uniprocessor | ||
26 | * system, in that case we have to call the local interrupt handler. | ||
27 | */ | ||
28 | #ifndef CONFIG_X86_LOCAL_APIC | ||
29 | profile_tick(CPU_PROFILING); | ||
30 | #else | ||
31 | if (!using_apic_timer) | ||
32 | smp_local_timer_interrupt(); | ||
33 | #endif | ||
34 | } | ||
35 | |||
36 | |||
37 | /* you can safely undefine this if you don't have the Neptune chipset */ | ||
38 | |||
39 | #define BUGGY_NEPTUN_TIMER | ||
40 | |||
41 | /** | ||
42 | * do_timer_overflow - process a detected timer overflow condition | ||
43 | * @count: hardware timer interrupt count on overflow | ||
44 | * | ||
45 | * Description: | ||
46 | * This call is invoked when the jiffies count has not incremented but | ||
47 | * the hardware timer interrupt has. It means that a timer tick interrupt | ||
48 | * came along while the previous one was pending, thus a tick was missed | ||
49 | **/ | ||
50 | static inline int do_timer_overflow(int count) | ||
51 | { | ||
52 | int i; | ||
53 | |||
54 | spin_lock(&i8259A_lock); | ||
55 | /* | ||
56 | * This is tricky when I/O APICs are used; | ||
57 | * see do_timer_interrupt(). | ||
58 | */ | ||
59 | i = inb(0x20); | ||
60 | spin_unlock(&i8259A_lock); | ||
61 | |||
62 | /* assumption about timer being IRQ0 */ | ||
63 | if (i & 0x01) { | ||
64 | /* | ||
65 | * We cannot detect lost timer interrupts ... | ||
66 | * well, that's why we call them lost, don't we? :) | ||
67 | * [hmm, on the Pentium and Alpha we can ... sort of] | ||
68 | */ | ||
69 | count -= LATCH; | ||
70 | } else { | ||
71 | #ifdef BUGGY_NEPTUN_TIMER | ||
72 | /* | ||
73 | * for the Neptun bug we know that the 'latch' | ||
74 | * command doesn't latch the high and low value | ||
75 | * of the counter atomically. Thus we have to | ||
76 | * substract 256 from the counter | ||
77 | * ... funny, isnt it? :) | ||
78 | */ | ||
79 | |||
80 | count -= 256; | ||
81 | #else | ||
82 | printk("do_slow_gettimeoffset(): hardware timer problem?\n"); | ||
83 | #endif | ||
84 | } | ||
85 | return count; | ||
86 | } | 16 | } |
diff --git a/include/asm-i386/mach-voyager/do_timer.h b/include/asm-i386/mach-voyager/do_timer.h index 04e69c104a74..60f9dcc15d54 100644 --- a/include/asm-i386/mach-voyager/do_timer.h +++ b/include/asm-i386/mach-voyager/do_timer.h | |||
@@ -1,25 +1,18 @@ | |||
1 | /* defines for inline arch setup functions */ | 1 | /* defines for inline arch setup functions */ |
2 | #include <linux/clockchips.h> | ||
3 | |||
2 | #include <asm/voyager.h> | 4 | #include <asm/voyager.h> |
5 | #include <asm/i8253.h> | ||
3 | 6 | ||
7 | /** | ||
8 | * do_timer_interrupt_hook - hook into timer tick | ||
9 | * @regs: standard registers from interrupt | ||
10 | * | ||
11 | * Call the pit clock event handler. see asm/i8253.h | ||
12 | **/ | ||
4 | static inline void do_timer_interrupt_hook(void) | 13 | static inline void do_timer_interrupt_hook(void) |
5 | { | 14 | { |
6 | do_timer(1); | 15 | pit_interrupt_hook(); |
7 | #ifndef CONFIG_SMP | ||
8 | update_process_times(user_mode_vm(irq_regs)); | ||
9 | #endif | ||
10 | |||
11 | voyager_timer_interrupt(); | 16 | voyager_timer_interrupt(); |
12 | } | 17 | } |
13 | 18 | ||
14 | static inline int do_timer_overflow(int count) | ||
15 | { | ||
16 | /* can't read the ISR, just assume 1 tick | ||
17 | overflow */ | ||
18 | if(count > LATCH || count < 0) { | ||
19 | printk(KERN_ERR "VOYAGER PROBLEM: count is %d, latch is %d\n", count, LATCH); | ||
20 | count = LATCH; | ||
21 | } | ||
22 | count -= LATCH; | ||
23 | |||
24 | return count; | ||
25 | } | ||
diff --git a/include/asm-i386/mpspec.h b/include/asm-i386/mpspec.h index 770bf6da8c3d..f21349399d14 100644 --- a/include/asm-i386/mpspec.h +++ b/include/asm-i386/mpspec.h | |||
@@ -23,7 +23,6 @@ extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; | |||
23 | extern int mpc_default_type; | 23 | extern int mpc_default_type; |
24 | extern unsigned long mp_lapic_addr; | 24 | extern unsigned long mp_lapic_addr; |
25 | extern int pic_mode; | 25 | extern int pic_mode; |
26 | extern int using_apic_timer; | ||
27 | 26 | ||
28 | #ifdef CONFIG_ACPI | 27 | #ifdef CONFIG_ACPI |
29 | extern void mp_register_lapic (u8 id, u8 enabled); | 28 | extern void mp_register_lapic (u8 id, u8 enabled); |
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 609a3899475c..6db40d0583f1 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h | |||
@@ -307,4 +307,7 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) | |||
307 | #define MSR_CORE_PERF_GLOBAL_CTRL 0x38f | 307 | #define MSR_CORE_PERF_GLOBAL_CTRL 0x38f |
308 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 | 308 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 |
309 | 309 | ||
310 | /* Geode defined MSRs */ | ||
311 | #define MSR_GEODE_BUSCONT_CONF0 0x1900 | ||
312 | |||
310 | #endif /* __ASM_MSR_H */ | 313 | #endif /* __ASM_MSR_H */ |
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index c13933185c1c..e997891cc7cc 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h | |||
@@ -1,48 +1 @@ | |||
1 | /* | #include <asm-x86_64/tsc.h> | |
2 | * linux/include/asm-i386/tsc.h | ||
3 | * | ||
4 | * i386 TSC related functions | ||
5 | */ | ||
6 | #ifndef _ASM_i386_TSC_H | ||
7 | #define _ASM_i386_TSC_H | ||
8 | |||
9 | #include <asm/processor.h> | ||
10 | |||
11 | /* | ||
12 | * Standard way to access the cycle counter on i586+ CPUs. | ||
13 | * Currently only used on SMP. | ||
14 | * | ||
15 | * If you really have a SMP machine with i486 chips or older, | ||
16 | * compile for that, and this will just always return zero. | ||
17 | * That's ok, it just means that the nicer scheduling heuristics | ||
18 | * won't work for you. | ||
19 | * | ||
20 | * We only use the low 32 bits, and we'd simply better make sure | ||
21 | * that we reschedule before that wraps. Scheduling at least every | ||
22 | * four billion cycles just basically sounds like a good idea, | ||
23 | * regardless of how fast the machine is. | ||
24 | */ | ||
25 | typedef unsigned long long cycles_t; | ||
26 | |||
27 | extern unsigned int cpu_khz; | ||
28 | extern unsigned int tsc_khz; | ||
29 | |||
30 | static inline cycles_t get_cycles(void) | ||
31 | { | ||
32 | unsigned long long ret = 0; | ||
33 | |||
34 | #ifndef CONFIG_X86_TSC | ||
35 | if (!cpu_has_tsc) | ||
36 | return 0; | ||
37 | #endif | ||
38 | |||
39 | #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC) | ||
40 | rdtscll(ret); | ||
41 | #endif | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | extern void tsc_init(void); | ||
46 | extern void mark_tsc_unstable(void); | ||
47 | |||
48 | #endif | ||
diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h index b39098408b69..59a66f084611 100644 --- a/include/asm-x86_64/hpet.h +++ b/include/asm-x86_64/hpet.h | |||
@@ -56,8 +56,15 @@ | |||
56 | extern int is_hpet_enabled(void); | 56 | extern int is_hpet_enabled(void); |
57 | extern int hpet_rtc_timer_init(void); | 57 | extern int hpet_rtc_timer_init(void); |
58 | extern int apic_is_clustered_box(void); | 58 | extern int apic_is_clustered_box(void); |
59 | extern int hpet_arch_init(void); | ||
60 | extern int hpet_timer_stop_set_go(unsigned long tick); | ||
61 | extern int hpet_reenable(void); | ||
62 | extern unsigned int hpet_calibrate_tsc(void); | ||
59 | 63 | ||
60 | extern int hpet_use_timer; | 64 | extern int hpet_use_timer; |
65 | extern unsigned long hpet_address; | ||
66 | extern unsigned long hpet_period; | ||
67 | extern unsigned long hpet_tick; | ||
61 | 68 | ||
62 | #ifdef CONFIG_HPET_EMULATE_RTC | 69 | #ifdef CONFIG_HPET_EMULATE_RTC |
63 | extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); | 70 | extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); |
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index a6d2ff5c69b7..f54f3abf93ce 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h | |||
@@ -45,11 +45,7 @@ extern u32 pmtmr_ioport; | |||
45 | #else | 45 | #else |
46 | #define pmtmr_ioport 0 | 46 | #define pmtmr_ioport 0 |
47 | #endif | 47 | #endif |
48 | extern unsigned long long monotonic_base; | ||
49 | extern int sysctl_vsyscall; | ||
50 | extern int nohpet; | 48 | extern int nohpet; |
51 | extern unsigned long vxtime_hz; | ||
52 | extern void time_init_gtod(void); | ||
53 | 49 | ||
54 | extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); | 50 | extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); |
55 | 51 | ||
@@ -91,8 +87,6 @@ extern void check_efer(void); | |||
91 | 87 | ||
92 | extern int unhandled_signal(struct task_struct *tsk, int sig); | 88 | extern int unhandled_signal(struct task_struct *tsk, int sig); |
93 | 89 | ||
94 | extern int unsynchronized_tsc(void); | ||
95 | |||
96 | extern void select_idle_routine(const struct cpuinfo_x86 *c); | 90 | extern void select_idle_routine(const struct cpuinfo_x86 *c); |
97 | 91 | ||
98 | extern unsigned long table_start, table_end; | 92 | extern unsigned long table_start, table_end; |
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h index b9e5320b7625..8c6808a3fba4 100644 --- a/include/asm-x86_64/timex.h +++ b/include/asm-x86_64/timex.h | |||
@@ -12,38 +12,21 @@ | |||
12 | #include <asm/hpet.h> | 12 | #include <asm/hpet.h> |
13 | #include <asm/system.h> | 13 | #include <asm/system.h> |
14 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
15 | #include <asm/tsc.h> | ||
15 | #include <linux/compiler.h> | 16 | #include <linux/compiler.h> |
16 | 17 | ||
17 | #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ | 18 | #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ |
18 | 19 | ||
19 | typedef unsigned long long cycles_t; | ||
20 | |||
21 | static inline cycles_t get_cycles (void) | ||
22 | { | ||
23 | unsigned long long ret; | ||
24 | |||
25 | rdtscll(ret); | ||
26 | return ret; | ||
27 | } | ||
28 | |||
29 | /* Like get_cycles, but make sure the CPU is synchronized. */ | ||
30 | static __always_inline cycles_t get_cycles_sync(void) | ||
31 | { | ||
32 | unsigned long long ret; | ||
33 | unsigned eax; | ||
34 | /* Don't do an additional sync on CPUs where we know | ||
35 | RDTSC is already synchronous. */ | ||
36 | alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, | ||
37 | "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); | ||
38 | rdtscll(ret); | ||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | extern unsigned int cpu_khz; | ||
43 | |||
44 | extern int read_current_timer(unsigned long *timer_value); | 20 | extern int read_current_timer(unsigned long *timer_value); |
45 | #define ARCH_HAS_READ_CURRENT_TIMER 1 | 21 | #define ARCH_HAS_READ_CURRENT_TIMER 1 |
46 | 22 | ||
47 | extern struct vxtime_data vxtime; | 23 | #define USEC_PER_TICK (USEC_PER_SEC / HZ) |
24 | #define NSEC_PER_TICK (NSEC_PER_SEC / HZ) | ||
25 | #define FSEC_PER_TICK (FSEC_PER_SEC / HZ) | ||
26 | |||
27 | #define NS_SCALE 10 /* 2^10, carefully chosen */ | ||
28 | #define US_SCALE 32 /* 2^32, arbitralrily chosen */ | ||
48 | 29 | ||
30 | extern void mark_tsc_unstable(void); | ||
31 | extern void set_cyc2ns_scale(unsigned long khz); | ||
49 | #endif | 32 | #endif |
diff --git a/include/asm-x86_64/tsc.h b/include/asm-x86_64/tsc.h new file mode 100644 index 000000000000..9a0a368852c7 --- /dev/null +++ b/include/asm-x86_64/tsc.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * linux/include/asm-x86_64/tsc.h | ||
3 | * | ||
4 | * x86_64 TSC related functions | ||
5 | */ | ||
6 | #ifndef _ASM_x86_64_TSC_H | ||
7 | #define _ASM_x86_64_TSC_H | ||
8 | |||
9 | #include <asm/processor.h> | ||
10 | |||
11 | /* | ||
12 | * Standard way to access the cycle counter. | ||
13 | */ | ||
14 | typedef unsigned long long cycles_t; | ||
15 | |||
16 | extern unsigned int cpu_khz; | ||
17 | extern unsigned int tsc_khz; | ||
18 | |||
19 | static inline cycles_t get_cycles(void) | ||
20 | { | ||
21 | unsigned long long ret = 0; | ||
22 | |||
23 | #ifndef CONFIG_X86_TSC | ||
24 | if (!cpu_has_tsc) | ||
25 | return 0; | ||
26 | #endif | ||
27 | |||
28 | #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC) | ||
29 | rdtscll(ret); | ||
30 | #endif | ||
31 | return ret; | ||
32 | } | ||
33 | |||
34 | /* Like get_cycles, but make sure the CPU is synchronized. */ | ||
35 | static __always_inline cycles_t get_cycles_sync(void) | ||
36 | { | ||
37 | unsigned long long ret; | ||
38 | #ifdef X86_FEATURE_SYNC_RDTSC | ||
39 | unsigned eax; | ||
40 | |||
41 | /* | ||
42 | * Don't do an additional sync on CPUs where we know | ||
43 | * RDTSC is already synchronous: | ||
44 | */ | ||
45 | alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, | ||
46 | "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); | ||
47 | #else | ||
48 | sync_core(); | ||
49 | #endif | ||
50 | rdtscll(ret); | ||
51 | |||
52 | return ret; | ||
53 | } | ||
54 | |||
55 | extern void tsc_init(void); | ||
56 | extern void mark_tsc_unstable(void); | ||
57 | extern int unsynchronized_tsc(void); | ||
58 | |||
59 | /* | ||
60 | * Boot-time check whether the TSCs are synchronized across | ||
61 | * all CPUs/cores: | ||
62 | */ | ||
63 | extern void check_tsc_sync_source(int cpu); | ||
64 | extern void check_tsc_sync_target(void); | ||
65 | |||
66 | #endif | ||
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 0c7847165eae..82b4afe65c91 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h | |||
@@ -16,46 +16,27 @@ enum vsyscall_num { | |||
16 | #ifdef __KERNEL__ | 16 | #ifdef __KERNEL__ |
17 | #include <linux/seqlock.h> | 17 | #include <linux/seqlock.h> |
18 | 18 | ||
19 | #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) | ||
20 | #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) | 19 | #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) |
21 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) | 20 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) |
22 | #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) | ||
23 | #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16))) | ||
24 | #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16))) | ||
25 | #define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16))) | ||
26 | 21 | ||
27 | #define VXTIME_TSC 1 | 22 | /* Definitions for CONFIG_GENERIC_TIME definitions */ |
28 | #define VXTIME_HPET 2 | 23 | #define __section_vsyscall_gtod_data __attribute__ \ |
29 | #define VXTIME_PMTMR 3 | 24 | ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) |
25 | #define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) | ||
30 | 26 | ||
31 | #define VGETCPU_RDTSCP 1 | 27 | #define VGETCPU_RDTSCP 1 |
32 | #define VGETCPU_LSL 2 | 28 | #define VGETCPU_LSL 2 |
33 | 29 | ||
34 | struct vxtime_data { | ||
35 | long hpet_address; /* HPET base address */ | ||
36 | int last; | ||
37 | unsigned long last_tsc; | ||
38 | long quot; | ||
39 | long tsc_quot; | ||
40 | int mode; | ||
41 | }; | ||
42 | |||
43 | #define hpet_readl(a) readl((const void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) | 30 | #define hpet_readl(a) readl((const void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) |
44 | #define hpet_writel(d,a) writel(d, (void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) | 31 | #define hpet_writel(d,a) writel(d, (void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) |
45 | 32 | ||
46 | /* vsyscall space (readonly) */ | ||
47 | extern struct vxtime_data __vxtime; | ||
48 | extern int __vgetcpu_mode; | 33 | extern int __vgetcpu_mode; |
49 | extern struct timespec __xtime; | ||
50 | extern volatile unsigned long __jiffies; | 34 | extern volatile unsigned long __jiffies; |
51 | extern struct timezone __sys_tz; | ||
52 | extern seqlock_t __xtime_lock; | ||
53 | 35 | ||
54 | /* kernel space (writeable) */ | 36 | /* kernel space (writeable) */ |
55 | extern struct vxtime_data vxtime; | ||
56 | extern int vgetcpu_mode; | 37 | extern int vgetcpu_mode; |
57 | extern struct timezone sys_tz; | 38 | extern struct timezone sys_tz; |
58 | extern int sysctl_vsyscall; | 39 | extern struct vsyscall_gtod_data_t vsyscall_gtod_data; |
59 | 40 | ||
60 | #endif /* __KERNEL__ */ | 41 | #endif /* __KERNEL__ */ |
61 | 42 | ||
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h new file mode 100644 index 000000000000..1d0ef1ae8036 --- /dev/null +++ b/include/linux/acpi_pmtmr.h | |||
@@ -0,0 +1,38 @@ | |||
1 | #ifndef _ACPI_PMTMR_H_ | ||
2 | #define _ACPI_PMTMR_H_ | ||
3 | |||
4 | #include <linux/clocksource.h> | ||
5 | |||
6 | /* Number of PMTMR ticks expected during calibration run */ | ||
7 | #define PMTMR_TICKS_PER_SEC 3579545 | ||
8 | |||
9 | /* limit it to 24 bits */ | ||
10 | #define ACPI_PM_MASK CLOCKSOURCE_MASK(24) | ||
11 | |||
12 | /* Overrun value */ | ||
13 | #define ACPI_PM_OVRRUN (1<<24) | ||
14 | |||
15 | #ifdef CONFIG_X86_PM_TIMER | ||
16 | |||
17 | extern u32 acpi_pm_read_verified(void); | ||
18 | extern u32 pmtmr_ioport; | ||
19 | |||
20 | static inline u32 acpi_pm_read_early(void) | ||
21 | { | ||
22 | if (!pmtmr_ioport) | ||
23 | return 0; | ||
24 | /* mask the output to 24 bits */ | ||
25 | return acpi_pm_read_verified() & ACPI_PM_MASK; | ||
26 | } | ||
27 | |||
28 | #else | ||
29 | |||
30 | static inline u32 acpi_pm_read_early(void) | ||
31 | { | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | #endif | ||
36 | |||
37 | #endif | ||
38 | |||
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index a5c8bb5d80ba..abc521cfb084 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h | |||
@@ -87,10 +87,15 @@ struct agp_memory { | |||
87 | u32 physical; | 87 | u32 physical; |
88 | u8 is_bound; | 88 | u8 is_bound; |
89 | u8 is_flushed; | 89 | u8 is_flushed; |
90 | u8 vmalloc_flag; | ||
90 | }; | 91 | }; |
91 | 92 | ||
92 | #define AGP_NORMAL_MEMORY 0 | 93 | #define AGP_NORMAL_MEMORY 0 |
93 | 94 | ||
95 | #define AGP_USER_TYPES (1 << 16) | ||
96 | #define AGP_USER_MEMORY (AGP_USER_TYPES) | ||
97 | #define AGP_USER_CACHED_MEMORY (AGP_USER_TYPES + 1) | ||
98 | |||
94 | extern struct agp_bridge_data *agp_bridge; | 99 | extern struct agp_bridge_data *agp_bridge; |
95 | extern struct list_head agp_bridges; | 100 | extern struct list_head agp_bridges; |
96 | 101 | ||
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h new file mode 100644 index 000000000000..4ea7e7bcfafe --- /dev/null +++ b/include/linux/clockchips.h | |||
@@ -0,0 +1,142 @@ | |||
1 | /* linux/include/linux/clockchips.h | ||
2 | * | ||
3 | * This file contains the structure definitions for clockchips. | ||
4 | * | ||
5 | * If you are not a clockchip, or the time of day code, you should | ||
6 | * not be including this file! | ||
7 | */ | ||
8 | #ifndef _LINUX_CLOCKCHIPS_H | ||
9 | #define _LINUX_CLOCKCHIPS_H | ||
10 | |||
11 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
12 | |||
13 | #include <linux/clocksource.h> | ||
14 | #include <linux/cpumask.h> | ||
15 | #include <linux/ktime.h> | ||
16 | #include <linux/notifier.h> | ||
17 | |||
18 | struct clock_event_device; | ||
19 | |||
20 | /* Clock event mode commands */ | ||
21 | enum clock_event_mode { | ||
22 | CLOCK_EVT_MODE_UNUSED = 0, | ||
23 | CLOCK_EVT_MODE_SHUTDOWN, | ||
24 | CLOCK_EVT_MODE_PERIODIC, | ||
25 | CLOCK_EVT_MODE_ONESHOT, | ||
26 | }; | ||
27 | |||
28 | /* Clock event notification values */ | ||
29 | enum clock_event_nofitiers { | ||
30 | CLOCK_EVT_NOTIFY_ADD, | ||
31 | CLOCK_EVT_NOTIFY_BROADCAST_ON, | ||
32 | CLOCK_EVT_NOTIFY_BROADCAST_OFF, | ||
33 | CLOCK_EVT_NOTIFY_BROADCAST_ENTER, | ||
34 | CLOCK_EVT_NOTIFY_BROADCAST_EXIT, | ||
35 | CLOCK_EVT_NOTIFY_SUSPEND, | ||
36 | CLOCK_EVT_NOTIFY_RESUME, | ||
37 | CLOCK_EVT_NOTIFY_CPU_DEAD, | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * Clock event features | ||
42 | */ | ||
43 | #define CLOCK_EVT_FEAT_PERIODIC 0x000001 | ||
44 | #define CLOCK_EVT_FEAT_ONESHOT 0x000002 | ||
45 | /* | ||
46 | * x86(64) specific misfeatures: | ||
47 | * | ||
48 | * - Clockevent source stops in C3 State and needs broadcast support. | ||
49 | * - Local APIC timer is used as a dummy device. | ||
50 | */ | ||
51 | #define CLOCK_EVT_FEAT_C3STOP 0x000004 | ||
52 | #define CLOCK_EVT_FEAT_DUMMY 0x000008 | ||
53 | |||
54 | /** | ||
55 | * struct clock_event_device - clock event device descriptor | ||
56 | * @name: ptr to clock event name | ||
57 | * @hints: usage hints | ||
58 | * @max_delta_ns: maximum delta value in ns | ||
59 | * @min_delta_ns: minimum delta value in ns | ||
60 | * @mult: nanosecond to cycles multiplier | ||
61 | * @shift: nanoseconds to cycles divisor (power of two) | ||
62 | * @rating: variable to rate clock event devices | ||
63 | * @irq: irq number (only for non cpu local devices) | ||
64 | * @cpumask: cpumask to indicate for which cpus this device works | ||
65 | * @set_next_event: set next event | ||
66 | * @set_mode: set mode function | ||
67 | * @evthandler: Assigned by the framework to be called by the low | ||
68 | * level handler of the event source | ||
69 | * @broadcast: function to broadcast events | ||
70 | * @list: list head for the management code | ||
71 | * @mode: operating mode assigned by the management code | ||
72 | * @next_event: local storage for the next event in oneshot mode | ||
73 | */ | ||
74 | struct clock_event_device { | ||
75 | const char *name; | ||
76 | unsigned int features; | ||
77 | unsigned long max_delta_ns; | ||
78 | unsigned long min_delta_ns; | ||
79 | unsigned long mult; | ||
80 | int shift; | ||
81 | int rating; | ||
82 | int irq; | ||
83 | cpumask_t cpumask; | ||
84 | int (*set_next_event)(unsigned long evt, | ||
85 | struct clock_event_device *); | ||
86 | void (*set_mode)(enum clock_event_mode mode, | ||
87 | struct clock_event_device *); | ||
88 | void (*event_handler)(struct clock_event_device *); | ||
89 | void (*broadcast)(cpumask_t mask); | ||
90 | struct list_head list; | ||
91 | enum clock_event_mode mode; | ||
92 | ktime_t next_event; | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * Calculate a multiplication factor for scaled math, which is used to convert | ||
97 | * nanoseconds based values to clock ticks: | ||
98 | * | ||
99 | * clock_ticks = (nanoseconds * factor) >> shift. | ||
100 | * | ||
101 | * div_sc is the rearranged equation to calculate a factor from a given clock | ||
102 | * ticks / nanoseconds ratio: | ||
103 | * | ||
104 | * factor = (clock_ticks << shift) / nanoseconds | ||
105 | */ | ||
106 | static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, | ||
107 | int shift) | ||
108 | { | ||
109 | uint64_t tmp = ((uint64_t)ticks) << shift; | ||
110 | |||
111 | do_div(tmp, nsec); | ||
112 | return (unsigned long) tmp; | ||
113 | } | ||
114 | |||
115 | /* Clock event layer functions */ | ||
116 | extern unsigned long clockevent_delta2ns(unsigned long latch, | ||
117 | struct clock_event_device *evt); | ||
118 | extern void clockevents_register_device(struct clock_event_device *dev); | ||
119 | |||
120 | extern void clockevents_exchange_device(struct clock_event_device *old, | ||
121 | struct clock_event_device *new); | ||
122 | extern | ||
123 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
124 | cpumask_t cpumask); | ||
125 | extern void clockevents_release_device(struct clock_event_device *dev); | ||
126 | extern void clockevents_set_mode(struct clock_event_device *dev, | ||
127 | enum clock_event_mode mode); | ||
128 | extern int clockevents_register_notifier(struct notifier_block *nb); | ||
129 | extern void clockevents_unregister_notifier(struct notifier_block *nb); | ||
130 | extern int clockevents_program_event(struct clock_event_device *dev, | ||
131 | ktime_t expires, ktime_t now); | ||
132 | |||
133 | extern void clockevents_notify(unsigned long reason, void *arg); | ||
134 | |||
135 | #else | ||
136 | |||
137 | static inline void clockevents_resume_events(void) { } | ||
138 | #define clockevents_notify(reason, arg) do { } while (0) | ||
139 | |||
140 | #endif | ||
141 | |||
142 | #endif | ||
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1622d23a8dc3..daa4940cc0f1 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
@@ -12,11 +12,13 @@ | |||
12 | #include <linux/timex.h> | 12 | #include <linux/timex.h> |
13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
14 | #include <linux/list.h> | 14 | #include <linux/list.h> |
15 | #include <linux/timer.h> | ||
15 | #include <asm/div64.h> | 16 | #include <asm/div64.h> |
16 | #include <asm/io.h> | 17 | #include <asm/io.h> |
17 | 18 | ||
18 | /* clocksource cycle base type */ | 19 | /* clocksource cycle base type */ |
19 | typedef u64 cycle_t; | 20 | typedef u64 cycle_t; |
21 | struct clocksource; | ||
20 | 22 | ||
21 | /** | 23 | /** |
22 | * struct clocksource - hardware abstraction for a free running counter | 24 | * struct clocksource - hardware abstraction for a free running counter |
@@ -44,8 +46,8 @@ typedef u64 cycle_t; | |||
44 | * subtraction of non 64 bit counters | 46 | * subtraction of non 64 bit counters |
45 | * @mult: cycle to nanosecond multiplier | 47 | * @mult: cycle to nanosecond multiplier |
46 | * @shift: cycle to nanosecond divisor (power of two) | 48 | * @shift: cycle to nanosecond divisor (power of two) |
47 | * @update_callback: called when safe to alter clocksource values | 49 | * @flags: flags describing special properties |
48 | * @is_continuous: defines if clocksource is free-running. | 50 | * @vread: vsyscall based read |
49 | * @cycle_interval: Used internally by timekeeping core, please ignore. | 51 | * @cycle_interval: Used internally by timekeeping core, please ignore. |
50 | * @xtime_interval: Used internally by timekeeping core, please ignore. | 52 | * @xtime_interval: Used internally by timekeeping core, please ignore. |
51 | */ | 53 | */ |
@@ -57,15 +59,30 @@ struct clocksource { | |||
57 | cycle_t mask; | 59 | cycle_t mask; |
58 | u32 mult; | 60 | u32 mult; |
59 | u32 shift; | 61 | u32 shift; |
60 | int (*update_callback)(void); | 62 | unsigned long flags; |
61 | int is_continuous; | 63 | cycle_t (*vread)(void); |
62 | 64 | ||
63 | /* timekeeping specific data, ignore */ | 65 | /* timekeeping specific data, ignore */ |
64 | cycle_t cycle_last, cycle_interval; | 66 | cycle_t cycle_last, cycle_interval; |
65 | u64 xtime_nsec, xtime_interval; | 67 | u64 xtime_nsec, xtime_interval; |
66 | s64 error; | 68 | s64 error; |
69 | |||
70 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | ||
71 | /* Watchdog related data, used by the framework */ | ||
72 | struct list_head wd_list; | ||
73 | cycle_t wd_last; | ||
74 | #endif | ||
67 | }; | 75 | }; |
68 | 76 | ||
77 | /* | ||
78 | * Clock source flags bits:: | ||
79 | */ | ||
80 | #define CLOCK_SOURCE_IS_CONTINUOUS 0x01 | ||
81 | #define CLOCK_SOURCE_MUST_VERIFY 0x02 | ||
82 | |||
83 | #define CLOCK_SOURCE_WATCHDOG 0x10 | ||
84 | #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 | ||
85 | |||
69 | /* simplify initialization of mask field */ | 86 | /* simplify initialization of mask field */ |
70 | #define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1) | 87 | #define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1) |
71 | 88 | ||
@@ -178,8 +195,16 @@ static inline void clocksource_calculate_interval(struct clocksource *c, | |||
178 | 195 | ||
179 | 196 | ||
180 | /* used to install a new clocksource */ | 197 | /* used to install a new clocksource */ |
181 | int clocksource_register(struct clocksource*); | 198 | extern int clocksource_register(struct clocksource*); |
182 | void clocksource_reselect(void); | 199 | extern struct clocksource* clocksource_get_next(void); |
183 | struct clocksource* clocksource_get_next(void); | 200 | extern void clocksource_change_rating(struct clocksource *cs, int rating); |
201 | |||
202 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL | ||
203 | extern void update_vsyscall(struct timespec *ts, struct clocksource *c); | ||
204 | #else | ||
205 | static inline void update_vsyscall(struct timespec *ts, struct clocksource *c) | ||
206 | { | ||
207 | } | ||
208 | #endif | ||
184 | 209 | ||
185 | #endif /* _LINUX_CLOCKSOURCE_H */ | 210 | #endif /* _LINUX_CLOCKSOURCE_H */ |
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7f008f6bfdc3..0899e2cdcdd1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h | |||
@@ -84,9 +84,6 @@ struct cpufreq_policy { | |||
84 | unsigned int policy; /* see above */ | 84 | unsigned int policy; /* see above */ |
85 | struct cpufreq_governor *governor; /* see below */ | 85 | struct cpufreq_governor *governor; /* see below */ |
86 | 86 | ||
87 | struct mutex lock; /* CPU ->setpolicy or ->target may | ||
88 | only be called once a time */ | ||
89 | |||
90 | struct work_struct update; /* if update_policy() needs to be | 87 | struct work_struct update; /* if update_policy() needs to be |
91 | * called, but you're in IRQ context */ | 88 | * called, but you're in IRQ context */ |
92 | 89 | ||
@@ -172,11 +169,16 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, | |||
172 | unsigned int relation); | 169 | unsigned int relation); |
173 | 170 | ||
174 | 171 | ||
175 | extern int cpufreq_driver_getavg(struct cpufreq_policy *policy); | 172 | extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy); |
176 | 173 | ||
177 | int cpufreq_register_governor(struct cpufreq_governor *governor); | 174 | int cpufreq_register_governor(struct cpufreq_governor *governor); |
178 | void cpufreq_unregister_governor(struct cpufreq_governor *governor); | 175 | void cpufreq_unregister_governor(struct cpufreq_governor *governor); |
179 | 176 | ||
177 | int lock_policy_rwsem_read(int cpu); | ||
178 | int lock_policy_rwsem_write(int cpu); | ||
179 | void unlock_policy_rwsem_read(int cpu); | ||
180 | void unlock_policy_rwsem_write(int cpu); | ||
181 | |||
180 | 182 | ||
181 | /********************************************************************* | 183 | /********************************************************************* |
182 | * CPUFREQ DRIVER INTERFACE * | 184 | * CPUFREQ DRIVER INTERFACE * |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 612472aaa79c..7803014f3a11 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
@@ -106,7 +106,7 @@ static inline void account_system_vtime(struct task_struct *tsk) | |||
106 | * always balanced, so the interrupted value of ->hardirq_context | 106 | * always balanced, so the interrupted value of ->hardirq_context |
107 | * will always be restored. | 107 | * will always be restored. |
108 | */ | 108 | */ |
109 | #define irq_enter() \ | 109 | #define __irq_enter() \ |
110 | do { \ | 110 | do { \ |
111 | account_system_vtime(current); \ | 111 | account_system_vtime(current); \ |
112 | add_preempt_count(HARDIRQ_OFFSET); \ | 112 | add_preempt_count(HARDIRQ_OFFSET); \ |
@@ -114,6 +114,11 @@ static inline void account_system_vtime(struct task_struct *tsk) | |||
114 | } while (0) | 114 | } while (0) |
115 | 115 | ||
116 | /* | 116 | /* |
117 | * Enter irq context (on NO_HZ, update jiffies): | ||
118 | */ | ||
119 | extern void irq_enter(void); | ||
120 | |||
121 | /* | ||
117 | * Exit irq context without processing softirqs: | 122 | * Exit irq context without processing softirqs: |
118 | */ | 123 | */ |
119 | #define __irq_exit() \ | 124 | #define __irq_exit() \ |
@@ -128,7 +133,7 @@ static inline void account_system_vtime(struct task_struct *tsk) | |||
128 | */ | 133 | */ |
129 | extern void irq_exit(void); | 134 | extern void irq_exit(void); |
130 | 135 | ||
131 | #define nmi_enter() do { lockdep_off(); irq_enter(); } while (0) | 136 | #define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) |
132 | #define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) | 137 | #define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) |
133 | 138 | ||
134 | #endif /* LINUX_HARDIRQ_H */ | 139 | #endif /* LINUX_HARDIRQ_H */ |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fca93025ab51..37f9279192a9 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -21,22 +21,72 @@ | |||
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/wait.h> | 22 | #include <linux/wait.h> |
23 | 23 | ||
24 | struct hrtimer_clock_base; | ||
25 | struct hrtimer_cpu_base; | ||
26 | |||
24 | /* | 27 | /* |
25 | * Mode arguments of xxx_hrtimer functions: | 28 | * Mode arguments of xxx_hrtimer functions: |
26 | */ | 29 | */ |
27 | enum hrtimer_mode { | 30 | enum hrtimer_mode { |
28 | HRTIMER_ABS, /* Time value is absolute */ | 31 | HRTIMER_MODE_ABS, /* Time value is absolute */ |
29 | HRTIMER_REL, /* Time value is relative to now */ | 32 | HRTIMER_MODE_REL, /* Time value is relative to now */ |
30 | }; | 33 | }; |
31 | 34 | ||
35 | /* | ||
36 | * Return values for the callback function | ||
37 | */ | ||
32 | enum hrtimer_restart { | 38 | enum hrtimer_restart { |
33 | HRTIMER_NORESTART, | 39 | HRTIMER_NORESTART, /* Timer is not restarted */ |
34 | HRTIMER_RESTART, | 40 | HRTIMER_RESTART, /* Timer must be restarted */ |
35 | }; | 41 | }; |
36 | 42 | ||
37 | #define HRTIMER_INACTIVE ((void *)1UL) | 43 | /* |
44 | * hrtimer callback modes: | ||
45 | * | ||
46 | * HRTIMER_CB_SOFTIRQ: Callback must run in softirq context | ||
47 | * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context | ||
48 | * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and | ||
49 | * does not restart the timer | ||
50 | * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in softirq context | ||
51 | * Special mode for tick emultation | ||
52 | */ | ||
53 | enum hrtimer_cb_mode { | ||
54 | HRTIMER_CB_SOFTIRQ, | ||
55 | HRTIMER_CB_IRQSAFE, | ||
56 | HRTIMER_CB_IRQSAFE_NO_RESTART, | ||
57 | HRTIMER_CB_IRQSAFE_NO_SOFTIRQ, | ||
58 | }; | ||
38 | 59 | ||
39 | struct hrtimer_base; | 60 | /* |
61 | * Values to track state of the timer | ||
62 | * | ||
63 | * Possible states: | ||
64 | * | ||
65 | * 0x00 inactive | ||
66 | * 0x01 enqueued into rbtree | ||
67 | * 0x02 callback function running | ||
68 | * 0x04 callback pending (high resolution mode) | ||
69 | * | ||
70 | * Special case: | ||
71 | * 0x03 callback function running and enqueued | ||
72 | * (was requeued on another CPU) | ||
73 | * The "callback function running and enqueued" status is only possible on | ||
74 | * SMP. It happens for example when a posix timer expired and the callback | ||
75 | * queued a signal. Between dropping the lock which protects the posix timer | ||
76 | * and reacquiring the base lock of the hrtimer, another CPU can deliver the | ||
77 | * signal and rearm the timer. We have to preserve the callback running state, | ||
78 | * as otherwise the timer could be removed before the softirq code finishes the | ||
79 | * the handling of the timer. | ||
80 | * | ||
81 | * The HRTIMER_STATE_ENQUEUE bit is always or'ed to the current state to | ||
82 | * preserve the HRTIMER_STATE_CALLBACK bit in the above scenario. | ||
83 | * | ||
84 | * All state transitions are protected by cpu_base->lock. | ||
85 | */ | ||
86 | #define HRTIMER_STATE_INACTIVE 0x00 | ||
87 | #define HRTIMER_STATE_ENQUEUED 0x01 | ||
88 | #define HRTIMER_STATE_CALLBACK 0x02 | ||
89 | #define HRTIMER_STATE_PENDING 0x04 | ||
40 | 90 | ||
41 | /** | 91 | /** |
42 | * struct hrtimer - the basic hrtimer structure | 92 | * struct hrtimer - the basic hrtimer structure |
@@ -46,14 +96,34 @@ struct hrtimer_base; | |||
46 | * which the timer is based. | 96 | * which the timer is based. |
47 | * @function: timer expiry callback function | 97 | * @function: timer expiry callback function |
48 | * @base: pointer to the timer base (per cpu and per clock) | 98 | * @base: pointer to the timer base (per cpu and per clock) |
99 | * @state: state information (See bit values above) | ||
100 | * @cb_mode: high resolution timer feature to select the callback execution | ||
101 | * mode | ||
102 | * @cb_entry: list head to enqueue an expired timer into the callback list | ||
103 | * @start_site: timer statistics field to store the site where the timer | ||
104 | * was started | ||
105 | * @start_comm: timer statistics field to store the name of the process which | ||
106 | * started the timer | ||
107 | * @start_pid: timer statistics field to store the pid of the task which | ||
108 | * started the timer | ||
49 | * | 109 | * |
50 | * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE() | 110 | * The hrtimer structure must be initialized by hrtimer_init() |
51 | */ | 111 | */ |
52 | struct hrtimer { | 112 | struct hrtimer { |
53 | struct rb_node node; | 113 | struct rb_node node; |
54 | ktime_t expires; | 114 | ktime_t expires; |
55 | int (*function)(struct hrtimer *); | 115 | enum hrtimer_restart (*function)(struct hrtimer *); |
56 | struct hrtimer_base *base; | 116 | struct hrtimer_clock_base *base; |
117 | unsigned long state; | ||
118 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
119 | enum hrtimer_cb_mode cb_mode; | ||
120 | struct list_head cb_entry; | ||
121 | #endif | ||
122 | #ifdef CONFIG_TIMER_STATS | ||
123 | void *start_site; | ||
124 | char start_comm[16]; | ||
125 | int start_pid; | ||
126 | #endif | ||
57 | }; | 127 | }; |
58 | 128 | ||
59 | /** | 129 | /** |
@@ -70,37 +140,114 @@ struct hrtimer_sleeper { | |||
70 | 140 | ||
71 | /** | 141 | /** |
72 | * struct hrtimer_base - the timer base for a specific clock | 142 | * struct hrtimer_base - the timer base for a specific clock |
73 | * @index: clock type index for per_cpu support when moving a timer | 143 | * @index: clock type index for per_cpu support when moving a |
74 | * to a base on another cpu. | 144 | * timer to a base on another cpu. |
75 | * @lock: lock protecting the base and associated timers | ||
76 | * @active: red black tree root node for the active timers | 145 | * @active: red black tree root node for the active timers |
77 | * @first: pointer to the timer node which expires first | 146 | * @first: pointer to the timer node which expires first |
78 | * @resolution: the resolution of the clock, in nanoseconds | 147 | * @resolution: the resolution of the clock, in nanoseconds |
79 | * @get_time: function to retrieve the current time of the clock | 148 | * @get_time: function to retrieve the current time of the clock |
80 | * @get_softirq_time: function to retrieve the current time from the softirq | 149 | * @get_softirq_time: function to retrieve the current time from the softirq |
81 | * @curr_timer: the timer which is executing a callback right now | ||
82 | * @softirq_time: the time when running the hrtimer queue in the softirq | 150 | * @softirq_time: the time when running the hrtimer queue in the softirq |
83 | * @lock_key: the lock_class_key for use with lockdep | 151 | * @cb_pending: list of timers where the callback is pending |
152 | * @offset: offset of this clock to the monotonic base | ||
153 | * @reprogram: function to reprogram the timer event | ||
84 | */ | 154 | */ |
85 | struct hrtimer_base { | 155 | struct hrtimer_clock_base { |
156 | struct hrtimer_cpu_base *cpu_base; | ||
86 | clockid_t index; | 157 | clockid_t index; |
87 | spinlock_t lock; | ||
88 | struct rb_root active; | 158 | struct rb_root active; |
89 | struct rb_node *first; | 159 | struct rb_node *first; |
90 | ktime_t resolution; | 160 | ktime_t resolution; |
91 | ktime_t (*get_time)(void); | 161 | ktime_t (*get_time)(void); |
92 | ktime_t (*get_softirq_time)(void); | 162 | ktime_t (*get_softirq_time)(void); |
93 | struct hrtimer *curr_timer; | ||
94 | ktime_t softirq_time; | 163 | ktime_t softirq_time; |
95 | struct lock_class_key lock_key; | 164 | #ifdef CONFIG_HIGH_RES_TIMERS |
165 | ktime_t offset; | ||
166 | int (*reprogram)(struct hrtimer *t, | ||
167 | struct hrtimer_clock_base *b, | ||
168 | ktime_t n); | ||
169 | #endif | ||
170 | }; | ||
171 | |||
172 | #define HRTIMER_MAX_CLOCK_BASES 2 | ||
173 | |||
174 | /* | ||
175 | * struct hrtimer_cpu_base - the per cpu clock bases | ||
176 | * @lock: lock protecting the base and associated clock bases | ||
177 | * and timers | ||
178 | * @lock_key: the lock_class_key for use with lockdep | ||
179 | * @clock_base: array of clock bases for this cpu | ||
180 | * @curr_timer: the timer which is executing a callback right now | ||
181 | * @expires_next: absolute time of the next event which was scheduled | ||
182 | * via clock_set_next_event() | ||
183 | * @hres_active: State of high resolution mode | ||
184 | * @check_clocks: Indictator, when set evaluate time source and clock | ||
185 | * event devices whether high resolution mode can be | ||
186 | * activated. | ||
187 | * @cb_pending: Expired timers are moved from the rbtree to this | ||
188 | * list in the timer interrupt. The list is processed | ||
189 | * in the softirq. | ||
190 | * @nr_events: Total number of timer interrupt events | ||
191 | */ | ||
192 | struct hrtimer_cpu_base { | ||
193 | spinlock_t lock; | ||
194 | struct lock_class_key lock_key; | ||
195 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | ||
196 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
197 | ktime_t expires_next; | ||
198 | int hres_active; | ||
199 | struct list_head cb_pending; | ||
200 | unsigned long nr_events; | ||
201 | #endif | ||
96 | }; | 202 | }; |
97 | 203 | ||
204 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
205 | struct clock_event_device; | ||
206 | |||
207 | extern void clock_was_set(void); | ||
208 | extern void hrtimer_interrupt(struct clock_event_device *dev); | ||
209 | |||
210 | /* | ||
211 | * In high resolution mode the time reference must be read accurate | ||
212 | */ | ||
213 | static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) | ||
214 | { | ||
215 | return timer->base->get_time(); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * The resolution of the clocks. The resolution value is returned in | ||
220 | * the clock_getres() system call to give application programmers an | ||
221 | * idea of the (in)accuracy of timers. Timer values are rounded up to | ||
222 | * this resolution values. | ||
223 | */ | ||
224 | # define KTIME_HIGH_RES (ktime_t) { .tv64 = 1 } | ||
225 | # define KTIME_MONOTONIC_RES KTIME_HIGH_RES | ||
226 | |||
227 | #else | ||
228 | |||
229 | # define KTIME_MONOTONIC_RES KTIME_LOW_RES | ||
230 | |||
98 | /* | 231 | /* |
99 | * clock_was_set() is a NOP for non- high-resolution systems. The | 232 | * clock_was_set() is a NOP for non- high-resolution systems. The |
100 | * time-sorted order guarantees that a timer does not expire early and | 233 | * time-sorted order guarantees that a timer does not expire early and |
101 | * is expired in the next softirq when the clock was advanced. | 234 | * is expired in the next softirq when the clock was advanced. |
102 | */ | 235 | */ |
103 | #define clock_was_set() do { } while (0) | 236 | static inline void clock_was_set(void) { } |
237 | |||
238 | /* | ||
239 | * In non high resolution mode the time reference is taken from | ||
240 | * the base softirq time variable. | ||
241 | */ | ||
242 | static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) | ||
243 | { | ||
244 | return timer->base->softirq_time; | ||
245 | } | ||
246 | |||
247 | #endif | ||
248 | |||
249 | extern ktime_t ktime_get(void); | ||
250 | extern ktime_t ktime_get_real(void); | ||
104 | 251 | ||
105 | /* Exported timer functions: */ | 252 | /* Exported timer functions: */ |
106 | 253 | ||
@@ -114,19 +261,33 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, | |||
114 | extern int hrtimer_cancel(struct hrtimer *timer); | 261 | extern int hrtimer_cancel(struct hrtimer *timer); |
115 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); | 262 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); |
116 | 263 | ||
117 | #define hrtimer_restart(timer) hrtimer_start((timer), (timer)->expires, HRTIMER_ABS) | 264 | static inline int hrtimer_restart(struct hrtimer *timer) |
265 | { | ||
266 | return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); | ||
267 | } | ||
118 | 268 | ||
119 | /* Query timers: */ | 269 | /* Query timers: */ |
120 | extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); | 270 | extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); |
121 | extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); | 271 | extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); |
122 | 272 | ||
123 | #ifdef CONFIG_NO_IDLE_HZ | ||
124 | extern ktime_t hrtimer_get_next_event(void); | 273 | extern ktime_t hrtimer_get_next_event(void); |
125 | #endif | ||
126 | 274 | ||
275 | /* | ||
276 | * A timer is active, when it is enqueued into the rbtree or the callback | ||
277 | * function is running. | ||
278 | */ | ||
127 | static inline int hrtimer_active(const struct hrtimer *timer) | 279 | static inline int hrtimer_active(const struct hrtimer *timer) |
128 | { | 280 | { |
129 | return rb_parent(&timer->node) != &timer->node; | 281 | return timer->state != HRTIMER_STATE_INACTIVE; |
282 | } | ||
283 | |||
284 | /* | ||
285 | * Helper function to check, whether the timer is on one of the queues | ||
286 | */ | ||
287 | static inline int hrtimer_is_queued(struct hrtimer *timer) | ||
288 | { | ||
289 | return timer->state & | ||
290 | (HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING); | ||
130 | } | 291 | } |
131 | 292 | ||
132 | /* Forward a hrtimer so it expires after now: */ | 293 | /* Forward a hrtimer so it expires after now: */ |
@@ -149,4 +310,53 @@ extern void hrtimer_run_queues(void); | |||
149 | /* Bootup initialization: */ | 310 | /* Bootup initialization: */ |
150 | extern void __init hrtimers_init(void); | 311 | extern void __init hrtimers_init(void); |
151 | 312 | ||
313 | #if BITS_PER_LONG < 64 | ||
314 | extern unsigned long ktime_divns(const ktime_t kt, s64 div); | ||
315 | #else /* BITS_PER_LONG < 64 */ | ||
316 | # define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) | ||
317 | #endif | ||
318 | |||
319 | /* Show pending timers: */ | ||
320 | extern void sysrq_timer_list_show(void); | ||
321 | |||
322 | /* | ||
323 | * Timer-statistics info: | ||
324 | */ | ||
325 | #ifdef CONFIG_TIMER_STATS | ||
326 | |||
327 | extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | ||
328 | void *timerf, char * comm); | ||
329 | |||
330 | static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | ||
331 | { | ||
332 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
333 | timer->function, timer->start_comm); | ||
334 | } | ||
335 | |||
336 | extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, | ||
337 | void *addr); | ||
338 | |||
339 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | ||
340 | { | ||
341 | __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0)); | ||
342 | } | ||
343 | |||
344 | static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | ||
345 | { | ||
346 | timer->start_site = NULL; | ||
347 | } | ||
348 | #else | ||
349 | static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | ||
350 | { | ||
351 | } | ||
352 | |||
353 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | ||
354 | { | ||
355 | } | ||
356 | |||
357 | static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | ||
358 | { | ||
359 | } | ||
360 | #endif | ||
361 | |||
152 | #endif | 362 | #endif |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5a8ba0b8ccba..e5ea1411050b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -42,6 +42,8 @@ | |||
42 | * IRQF_SHARED - allow sharing the irq among several devices | 42 | * IRQF_SHARED - allow sharing the irq among several devices |
43 | * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur | 43 | * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur |
44 | * IRQF_TIMER - Flag to mark this interrupt as timer interrupt | 44 | * IRQF_TIMER - Flag to mark this interrupt as timer interrupt |
45 | * IRQF_PERCPU - Interrupt is per cpu | ||
46 | * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing | ||
45 | */ | 47 | */ |
46 | #define IRQF_DISABLED 0x00000020 | 48 | #define IRQF_DISABLED 0x00000020 |
47 | #define IRQF_SAMPLE_RANDOM 0x00000040 | 49 | #define IRQF_SAMPLE_RANDOM 0x00000040 |
@@ -49,6 +51,7 @@ | |||
49 | #define IRQF_PROBE_SHARED 0x00000100 | 51 | #define IRQF_PROBE_SHARED 0x00000100 |
50 | #define IRQF_TIMER 0x00000200 | 52 | #define IRQF_TIMER 0x00000200 |
51 | #define IRQF_PERCPU 0x00000400 | 53 | #define IRQF_PERCPU 0x00000400 |
54 | #define IRQF_NOBALANCING 0x00000800 | ||
52 | 55 | ||
53 | /* | 56 | /* |
54 | * Migration helpers. Scheduled for removal in 1/2007 | 57 | * Migration helpers. Scheduled for removal in 1/2007 |
@@ -239,6 +242,9 @@ enum | |||
239 | BLOCK_SOFTIRQ, | 242 | BLOCK_SOFTIRQ, |
240 | TASKLET_SOFTIRQ, | 243 | TASKLET_SOFTIRQ, |
241 | SCHED_SOFTIRQ, | 244 | SCHED_SOFTIRQ, |
245 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
246 | HRTIMER_SOFTIRQ, | ||
247 | #endif | ||
242 | }; | 248 | }; |
243 | 249 | ||
244 | /* softirq mask and active fields moved to irq_cpustat_t in | 250 | /* softirq mask and active fields moved to irq_cpustat_t in |
diff --git a/include/linux/irq.h b/include/linux/irq.h index 5504b671357f..1939d42c21d2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
@@ -31,7 +31,7 @@ typedef void fastcall (*irq_flow_handler_t)(unsigned int irq, | |||
31 | /* | 31 | /* |
32 | * IRQ line status. | 32 | * IRQ line status. |
33 | * | 33 | * |
34 | * Bits 0-16 are reserved for the IRQF_* bits in linux/interrupt.h | 34 | * Bits 0-7 are reserved for the IRQF_* bits in linux/interrupt.h |
35 | * | 35 | * |
36 | * IRQ types | 36 | * IRQ types |
37 | */ | 37 | */ |
@@ -45,28 +45,30 @@ typedef void fastcall (*irq_flow_handler_t)(unsigned int irq, | |||
45 | #define IRQ_TYPE_PROBE 0x00000010 /* Probing in progress */ | 45 | #define IRQ_TYPE_PROBE 0x00000010 /* Probing in progress */ |
46 | 46 | ||
47 | /* Internal flags */ | 47 | /* Internal flags */ |
48 | #define IRQ_INPROGRESS 0x00010000 /* IRQ handler active - do not enter! */ | 48 | #define IRQ_INPROGRESS 0x00000100 /* IRQ handler active - do not enter! */ |
49 | #define IRQ_DISABLED 0x00020000 /* IRQ disabled - do not enter! */ | 49 | #define IRQ_DISABLED 0x00000200 /* IRQ disabled - do not enter! */ |
50 | #define IRQ_PENDING 0x00040000 /* IRQ pending - replay on enable */ | 50 | #define IRQ_PENDING 0x00000400 /* IRQ pending - replay on enable */ |
51 | #define IRQ_REPLAY 0x00080000 /* IRQ has been replayed but not acked yet */ | 51 | #define IRQ_REPLAY 0x00000800 /* IRQ has been replayed but not acked yet */ |
52 | #define IRQ_AUTODETECT 0x00100000 /* IRQ is being autodetected */ | 52 | #define IRQ_AUTODETECT 0x00001000 /* IRQ is being autodetected */ |
53 | #define IRQ_WAITING 0x00200000 /* IRQ not yet seen - for autodetection */ | 53 | #define IRQ_WAITING 0x00002000 /* IRQ not yet seen - for autodetection */ |
54 | #define IRQ_LEVEL 0x00400000 /* IRQ level triggered */ | 54 | #define IRQ_LEVEL 0x00004000 /* IRQ level triggered */ |
55 | #define IRQ_MASKED 0x00800000 /* IRQ masked - shouldn't be seen again */ | 55 | #define IRQ_MASKED 0x00008000 /* IRQ masked - shouldn't be seen again */ |
56 | #define IRQ_PER_CPU 0x01000000 /* IRQ is per CPU */ | 56 | #define IRQ_PER_CPU 0x00010000 /* IRQ is per CPU */ |
57 | #define IRQ_NOPROBE 0x00020000 /* IRQ is not valid for probing */ | ||
58 | #define IRQ_NOREQUEST 0x00040000 /* IRQ cannot be requested */ | ||
59 | #define IRQ_NOAUTOEN 0x00080000 /* IRQ will not be enabled on request irq */ | ||
60 | #define IRQ_WAKEUP 0x00100000 /* IRQ triggers system wakeup */ | ||
61 | #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ | ||
62 | #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ | ||
63 | |||
57 | #ifdef CONFIG_IRQ_PER_CPU | 64 | #ifdef CONFIG_IRQ_PER_CPU |
58 | # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) | 65 | # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) |
66 | # define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | ||
59 | #else | 67 | #else |
60 | # define CHECK_IRQ_PER_CPU(var) 0 | 68 | # define CHECK_IRQ_PER_CPU(var) 0 |
69 | # define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING | ||
61 | #endif | 70 | #endif |
62 | 71 | ||
63 | #define IRQ_NOPROBE 0x02000000 /* IRQ is not valid for probing */ | ||
64 | #define IRQ_NOREQUEST 0x04000000 /* IRQ cannot be requested */ | ||
65 | #define IRQ_NOAUTOEN 0x08000000 /* IRQ will not be enabled on request irq */ | ||
66 | #define IRQ_DELAYED_DISABLE 0x10000000 /* IRQ disable (masking) happens delayed. */ | ||
67 | #define IRQ_WAKEUP 0x20000000 /* IRQ triggers system wakeup */ | ||
68 | #define IRQ_MOVE_PENDING 0x40000000 /* need to re-target IRQ destination */ | ||
69 | |||
70 | struct proc_dir_entry; | 72 | struct proc_dir_entry; |
71 | struct msi_desc; | 73 | struct msi_desc; |
72 | 74 | ||
@@ -127,6 +129,7 @@ struct irq_chip { | |||
127 | * | 129 | * |
128 | * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] | 130 | * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] |
129 | * @chip: low level interrupt hardware access | 131 | * @chip: low level interrupt hardware access |
132 | * @msi_desc: MSI descriptor | ||
130 | * @handler_data: per-IRQ data for the irq_chip methods | 133 | * @handler_data: per-IRQ data for the irq_chip methods |
131 | * @chip_data: platform-specific per-chip private data for the chip | 134 | * @chip_data: platform-specific per-chip private data for the chip |
132 | * methods, to allow shared chip implementations | 135 | * methods, to allow shared chip implementations |
@@ -235,11 +238,21 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask) | |||
235 | 238 | ||
236 | #endif /* CONFIG_GENERIC_PENDING_IRQ */ | 239 | #endif /* CONFIG_GENERIC_PENDING_IRQ */ |
237 | 240 | ||
241 | extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); | ||
242 | extern int irq_can_set_affinity(unsigned int irq); | ||
243 | |||
238 | #else /* CONFIG_SMP */ | 244 | #else /* CONFIG_SMP */ |
239 | 245 | ||
240 | #define move_native_irq(x) | 246 | #define move_native_irq(x) |
241 | #define move_masked_irq(x) | 247 | #define move_masked_irq(x) |
242 | 248 | ||
249 | static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask) | ||
250 | { | ||
251 | return -EINVAL; | ||
252 | } | ||
253 | |||
254 | static inline int irq_can_set_affinity(unsigned int irq) { return 0; } | ||
255 | |||
243 | #endif /* CONFIG_SMP */ | 256 | #endif /* CONFIG_SMP */ |
244 | 257 | ||
245 | #ifdef CONFIG_IRQBALANCE | 258 | #ifdef CONFIG_IRQBALANCE |
@@ -261,6 +274,11 @@ static inline int select_smp_affinity(unsigned int irq) | |||
261 | 274 | ||
262 | extern int no_irq_affinity; | 275 | extern int no_irq_affinity; |
263 | 276 | ||
277 | static inline int irq_balancing_disabled(unsigned int irq) | ||
278 | { | ||
279 | return irq_desc[irq].status & IRQ_NO_BALANCING_MASK; | ||
280 | } | ||
281 | |||
264 | /* Handle irq action chains: */ | 282 | /* Handle irq action chains: */ |
265 | extern int handle_IRQ_event(unsigned int irq, struct irqaction *action); | 283 | extern int handle_IRQ_event(unsigned int irq, struct irqaction *action); |
266 | 284 | ||
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 0ec6e28bccd2..c080f61fb024 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h | |||
@@ -142,13 +142,13 @@ static inline u64 get_jiffies_64(void) | |||
142 | * | 142 | * |
143 | * And some not so obvious. | 143 | * And some not so obvious. |
144 | * | 144 | * |
145 | * Note that we don't want to return MAX_LONG, because | 145 | * Note that we don't want to return LONG_MAX, because |
146 | * for various timeout reasons we often end up having | 146 | * for various timeout reasons we often end up having |
147 | * to wait "jiffies+1" in order to guarantee that we wait | 147 | * to wait "jiffies+1" in order to guarantee that we wait |
148 | * at _least_ "jiffies" - so "jiffies+1" had better still | 148 | * at _least_ "jiffies" - so "jiffies+1" had better still |
149 | * be positive. | 149 | * be positive. |
150 | */ | 150 | */ |
151 | #define MAX_JIFFY_OFFSET ((~0UL >> 1)-1) | 151 | #define MAX_JIFFY_OFFSET ((LONG_MAX >> 1)-1) |
152 | 152 | ||
153 | /* | 153 | /* |
154 | * We want to do realistic conversions of time so we need to use the same | 154 | * We want to do realistic conversions of time so we need to use the same |
@@ -259,207 +259,23 @@ static inline u64 get_jiffies_64(void) | |||
259 | #endif | 259 | #endif |
260 | 260 | ||
261 | /* | 261 | /* |
262 | * Convert jiffies to milliseconds and back. | 262 | * Convert various time units to each other: |
263 | * | ||
264 | * Avoid unnecessary multiplications/divisions in the | ||
265 | * two most common HZ cases: | ||
266 | */ | ||
267 | static inline unsigned int jiffies_to_msecs(const unsigned long j) | ||
268 | { | ||
269 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | ||
270 | return (MSEC_PER_SEC / HZ) * j; | ||
271 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | ||
272 | return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); | ||
273 | #else | ||
274 | return (j * MSEC_PER_SEC) / HZ; | ||
275 | #endif | ||
276 | } | ||
277 | |||
278 | static inline unsigned int jiffies_to_usecs(const unsigned long j) | ||
279 | { | ||
280 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | ||
281 | return (USEC_PER_SEC / HZ) * j; | ||
282 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | ||
283 | return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); | ||
284 | #else | ||
285 | return (j * USEC_PER_SEC) / HZ; | ||
286 | #endif | ||
287 | } | ||
288 | |||
289 | static inline unsigned long msecs_to_jiffies(const unsigned int m) | ||
290 | { | ||
291 | if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) | ||
292 | return MAX_JIFFY_OFFSET; | ||
293 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | ||
294 | return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); | ||
295 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | ||
296 | return m * (HZ / MSEC_PER_SEC); | ||
297 | #else | ||
298 | return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; | ||
299 | #endif | ||
300 | } | ||
301 | |||
302 | static inline unsigned long usecs_to_jiffies(const unsigned int u) | ||
303 | { | ||
304 | if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) | ||
305 | return MAX_JIFFY_OFFSET; | ||
306 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | ||
307 | return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); | ||
308 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | ||
309 | return u * (HZ / USEC_PER_SEC); | ||
310 | #else | ||
311 | return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC; | ||
312 | #endif | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * The TICK_NSEC - 1 rounds up the value to the next resolution. Note | ||
317 | * that a remainder subtract here would not do the right thing as the | ||
318 | * resolution values don't fall on second boundries. I.e. the line: | ||
319 | * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding. | ||
320 | * | ||
321 | * Rather, we just shift the bits off the right. | ||
322 | * | ||
323 | * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec | ||
324 | * value to a scaled second value. | ||
325 | */ | ||
326 | static __inline__ unsigned long | ||
327 | timespec_to_jiffies(const struct timespec *value) | ||
328 | { | ||
329 | unsigned long sec = value->tv_sec; | ||
330 | long nsec = value->tv_nsec + TICK_NSEC - 1; | ||
331 | |||
332 | if (sec >= MAX_SEC_IN_JIFFIES){ | ||
333 | sec = MAX_SEC_IN_JIFFIES; | ||
334 | nsec = 0; | ||
335 | } | ||
336 | return (((u64)sec * SEC_CONVERSION) + | ||
337 | (((u64)nsec * NSEC_CONVERSION) >> | ||
338 | (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | ||
339 | |||
340 | } | ||
341 | |||
342 | static __inline__ void | ||
343 | jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) | ||
344 | { | ||
345 | /* | ||
346 | * Convert jiffies to nanoseconds and separate with | ||
347 | * one divide. | ||
348 | */ | ||
349 | u64 nsec = (u64)jiffies * TICK_NSEC; | ||
350 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); | ||
351 | } | ||
352 | |||
353 | /* Same for "timeval" | ||
354 | * | ||
355 | * Well, almost. The problem here is that the real system resolution is | ||
356 | * in nanoseconds and the value being converted is in micro seconds. | ||
357 | * Also for some machines (those that use HZ = 1024, in-particular), | ||
358 | * there is a LARGE error in the tick size in microseconds. | ||
359 | |||
360 | * The solution we use is to do the rounding AFTER we convert the | ||
361 | * microsecond part. Thus the USEC_ROUND, the bits to be shifted off. | ||
362 | * Instruction wise, this should cost only an additional add with carry | ||
363 | * instruction above the way it was done above. | ||
364 | */ | ||
365 | static __inline__ unsigned long | ||
366 | timeval_to_jiffies(const struct timeval *value) | ||
367 | { | ||
368 | unsigned long sec = value->tv_sec; | ||
369 | long usec = value->tv_usec; | ||
370 | |||
371 | if (sec >= MAX_SEC_IN_JIFFIES){ | ||
372 | sec = MAX_SEC_IN_JIFFIES; | ||
373 | usec = 0; | ||
374 | } | ||
375 | return (((u64)sec * SEC_CONVERSION) + | ||
376 | (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> | ||
377 | (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | ||
378 | } | ||
379 | |||
380 | static __inline__ void | ||
381 | jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) | ||
382 | { | ||
383 | /* | ||
384 | * Convert jiffies to nanoseconds and separate with | ||
385 | * one divide. | ||
386 | */ | ||
387 | u64 nsec = (u64)jiffies * TICK_NSEC; | ||
388 | long tv_usec; | ||
389 | |||
390 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); | ||
391 | tv_usec /= NSEC_PER_USEC; | ||
392 | value->tv_usec = tv_usec; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * Convert jiffies/jiffies_64 to clock_t and back. | ||
397 | */ | 263 | */ |
398 | static inline clock_t jiffies_to_clock_t(long x) | 264 | extern unsigned int jiffies_to_msecs(const unsigned long j); |
399 | { | 265 | extern unsigned int jiffies_to_usecs(const unsigned long j); |
400 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | 266 | extern unsigned long msecs_to_jiffies(const unsigned int m); |
401 | return x / (HZ / USER_HZ); | 267 | extern unsigned long usecs_to_jiffies(const unsigned int u); |
402 | #else | 268 | extern unsigned long timespec_to_jiffies(const struct timespec *value); |
403 | u64 tmp = (u64)x * TICK_NSEC; | 269 | extern void jiffies_to_timespec(const unsigned long jiffies, |
404 | do_div(tmp, (NSEC_PER_SEC / USER_HZ)); | 270 | struct timespec *value); |
405 | return (long)tmp; | 271 | extern unsigned long timeval_to_jiffies(const struct timeval *value); |
406 | #endif | 272 | extern void jiffies_to_timeval(const unsigned long jiffies, |
407 | } | 273 | struct timeval *value); |
408 | 274 | extern clock_t jiffies_to_clock_t(long x); | |
409 | static inline unsigned long clock_t_to_jiffies(unsigned long x) | 275 | extern unsigned long clock_t_to_jiffies(unsigned long x); |
410 | { | 276 | extern u64 jiffies_64_to_clock_t(u64 x); |
411 | #if (HZ % USER_HZ)==0 | 277 | extern u64 nsec_to_clock_t(u64 x); |
412 | if (x >= ~0UL / (HZ / USER_HZ)) | 278 | |
413 | return ~0UL; | 279 | #define TIMESTAMP_SIZE 30 |
414 | return x * (HZ / USER_HZ); | ||
415 | #else | ||
416 | u64 jif; | ||
417 | |||
418 | /* Don't worry about loss of precision here .. */ | ||
419 | if (x >= ~0UL / HZ * USER_HZ) | ||
420 | return ~0UL; | ||
421 | |||
422 | /* .. but do try to contain it here */ | ||
423 | jif = x * (u64) HZ; | ||
424 | do_div(jif, USER_HZ); | ||
425 | return jif; | ||
426 | #endif | ||
427 | } | ||
428 | |||
429 | static inline u64 jiffies_64_to_clock_t(u64 x) | ||
430 | { | ||
431 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | ||
432 | do_div(x, HZ / USER_HZ); | ||
433 | #else | ||
434 | /* | ||
435 | * There are better ways that don't overflow early, | ||
436 | * but even this doesn't overflow in hundreds of years | ||
437 | * in 64 bits, so.. | ||
438 | */ | ||
439 | x *= TICK_NSEC; | ||
440 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
441 | #endif | ||
442 | return x; | ||
443 | } | ||
444 | |||
445 | static inline u64 nsec_to_clock_t(u64 x) | ||
446 | { | ||
447 | #if (NSEC_PER_SEC % USER_HZ) == 0 | ||
448 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
449 | #elif (USER_HZ % 512) == 0 | ||
450 | x *= USER_HZ/512; | ||
451 | do_div(x, (NSEC_PER_SEC / 512)); | ||
452 | #else | ||
453 | /* | ||
454 | * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, | ||
455 | * overflow after 64.99 years. | ||
456 | * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... | ||
457 | */ | ||
458 | x *= 9; | ||
459 | do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) | ||
460 | / USER_HZ)); | ||
461 | #endif | ||
462 | return x; | ||
463 | } | ||
464 | 280 | ||
465 | #endif | 281 | #endif |
diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 7444a6326231..c68c7ac6b232 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h | |||
@@ -261,8 +261,7 @@ static inline s64 ktime_to_ns(const ktime_t kt) | |||
261 | * idea of the (in)accuracy of timers. Timer values are rounded up to | 261 | * idea of the (in)accuracy of timers. Timer values are rounded up to |
262 | * this resolution values. | 262 | * this resolution values. |
263 | */ | 263 | */ |
264 | #define KTIME_REALTIME_RES (ktime_t){ .tv64 = TICK_NSEC } | 264 | #define KTIME_LOW_RES (ktime_t){ .tv64 = TICK_NSEC } |
265 | #define KTIME_MONOTONIC_RES (ktime_t){ .tv64 = TICK_NSEC } | ||
266 | 265 | ||
267 | /* Get the monotonic time in timespec format: */ | 266 | /* Get the monotonic time in timespec format: */ |
268 | extern void ktime_get_ts(struct timespec *ts); | 267 | extern void ktime_get_ts(struct timespec *ts); |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index db05182ca0e8..1be5be88debe 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -105,12 +105,11 @@ struct nfs4_ace { | |||
105 | uint32_t access_mask; | 105 | uint32_t access_mask; |
106 | int whotype; | 106 | int whotype; |
107 | uid_t who; | 107 | uid_t who; |
108 | struct list_head l_ace; | ||
109 | }; | 108 | }; |
110 | 109 | ||
111 | struct nfs4_acl { | 110 | struct nfs4_acl { |
112 | uint32_t naces; | 111 | uint32_t naces; |
113 | struct list_head ace_head; | 112 | struct nfs4_ace aces[0]; |
114 | }; | 113 | }; |
115 | 114 | ||
116 | typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; | 115 | typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; |
diff --git a/include/linux/nfs4_acl.h b/include/linux/nfs4_acl.h index 22aff4d01f20..409b6e02f337 100644 --- a/include/linux/nfs4_acl.h +++ b/include/linux/nfs4_acl.h | |||
@@ -39,9 +39,12 @@ | |||
39 | 39 | ||
40 | #include <linux/posix_acl.h> | 40 | #include <linux/posix_acl.h> |
41 | 41 | ||
42 | struct nfs4_acl *nfs4_acl_new(void); | 42 | /* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to |
43 | void nfs4_acl_free(struct nfs4_acl *); | 43 | * fit in a page: */ |
44 | int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); | 44 | #define NFS4_ACL_MAX 170 |
45 | |||
46 | struct nfs4_acl *nfs4_acl_new(int); | ||
47 | void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); | ||
45 | int nfs4_acl_get_whotype(char *, u32); | 48 | int nfs4_acl_get_whotype(char *, u32); |
46 | int nfs4_acl_write_who(int who, char *p); | 49 | int nfs4_acl_write_who(int who, char *p); |
47 | int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, | 50 | int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, |
diff --git a/include/linux/tick.h b/include/linux/tick.h new file mode 100644 index 000000000000..9a7252e089b9 --- /dev/null +++ b/include/linux/tick.h | |||
@@ -0,0 +1,109 @@ | |||
1 | /* linux/include/linux/tick.h | ||
2 | * | ||
3 | * This file contains the structure definitions for tick related functions | ||
4 | * | ||
5 | */ | ||
6 | #ifndef _LINUX_TICK_H | ||
7 | #define _LINUX_TICK_H | ||
8 | |||
9 | #include <linux/clockchips.h> | ||
10 | |||
11 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
12 | |||
13 | enum tick_device_mode { | ||
14 | TICKDEV_MODE_PERIODIC, | ||
15 | TICKDEV_MODE_ONESHOT, | ||
16 | }; | ||
17 | |||
18 | struct tick_device { | ||
19 | struct clock_event_device *evtdev; | ||
20 | enum tick_device_mode mode; | ||
21 | }; | ||
22 | |||
23 | enum tick_nohz_mode { | ||
24 | NOHZ_MODE_INACTIVE, | ||
25 | NOHZ_MODE_LOWRES, | ||
26 | NOHZ_MODE_HIGHRES, | ||
27 | }; | ||
28 | |||
29 | /** | ||
30 | * struct tick_sched - sched tick emulation and no idle tick control/stats | ||
31 | * @sched_timer: hrtimer to schedule the periodic tick in high | ||
32 | * resolution mode | ||
33 | * @idle_tick: Store the last idle tick expiry time when the tick | ||
34 | * timer is modified for idle sleeps. This is necessary | ||
35 | * to resume the tick timer operation in the timeline | ||
36 | * when the CPU returns from idle | ||
37 | * @tick_stopped: Indicator that the idle tick has been stopped | ||
38 | * @idle_jiffies: jiffies at the entry to idle for idle time accounting | ||
39 | * @idle_calls: Total number of idle calls | ||
40 | * @idle_sleeps: Number of idle calls, where the sched tick was stopped | ||
41 | * @idle_entrytime: Time when the idle call was entered | ||
42 | * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped | ||
43 | */ | ||
44 | struct tick_sched { | ||
45 | struct hrtimer sched_timer; | ||
46 | unsigned long check_clocks; | ||
47 | enum tick_nohz_mode nohz_mode; | ||
48 | ktime_t idle_tick; | ||
49 | int tick_stopped; | ||
50 | unsigned long idle_jiffies; | ||
51 | unsigned long idle_calls; | ||
52 | unsigned long idle_sleeps; | ||
53 | ktime_t idle_entrytime; | ||
54 | ktime_t idle_sleeptime; | ||
55 | unsigned long last_jiffies; | ||
56 | unsigned long next_jiffies; | ||
57 | ktime_t idle_expires; | ||
58 | }; | ||
59 | |||
60 | extern void __init tick_init(void); | ||
61 | extern int tick_is_oneshot_available(void); | ||
62 | extern struct tick_device *tick_get_device(int cpu); | ||
63 | |||
64 | # ifdef CONFIG_HIGH_RES_TIMERS | ||
65 | extern int tick_init_highres(void); | ||
66 | extern int tick_program_event(ktime_t expires, int force); | ||
67 | extern void tick_setup_sched_timer(void); | ||
68 | extern void tick_cancel_sched_timer(int cpu); | ||
69 | # else | ||
70 | static inline void tick_cancel_sched_timer(int cpu) { } | ||
71 | # endif /* HIGHRES */ | ||
72 | |||
73 | # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
74 | extern struct tick_device *tick_get_broadcast_device(void); | ||
75 | extern cpumask_t *tick_get_broadcast_mask(void); | ||
76 | |||
77 | # ifdef CONFIG_TICK_ONESHOT | ||
78 | extern cpumask_t *tick_get_broadcast_oneshot_mask(void); | ||
79 | # endif | ||
80 | |||
81 | # endif /* BROADCAST */ | ||
82 | |||
83 | # ifdef CONFIG_TICK_ONESHOT | ||
84 | extern void tick_clock_notify(void); | ||
85 | extern int tick_check_oneshot_change(int allow_nohz); | ||
86 | extern struct tick_sched *tick_get_tick_sched(int cpu); | ||
87 | # else | ||
88 | static inline void tick_clock_notify(void) { } | ||
89 | static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } | ||
90 | # endif | ||
91 | |||
92 | #else /* CONFIG_GENERIC_CLOCKEVENTS */ | ||
93 | static inline void tick_init(void) { } | ||
94 | static inline void tick_cancel_sched_timer(int cpu) { } | ||
95 | static inline void tick_clock_notify(void) { } | ||
96 | static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } | ||
97 | #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ | ||
98 | |||
99 | # ifdef CONFIG_NO_HZ | ||
100 | extern void tick_nohz_stop_sched_tick(void); | ||
101 | extern void tick_nohz_restart_sched_tick(void); | ||
102 | extern void tick_nohz_update_jiffies(void); | ||
103 | # else | ||
104 | static inline void tick_nohz_stop_sched_tick(void) { } | ||
105 | static inline void tick_nohz_restart_sched_tick(void) { } | ||
106 | static inline void tick_nohz_update_jiffies(void) { } | ||
107 | # endif /* !NO_HZ */ | ||
108 | |||
109 | #endif | ||
diff --git a/include/linux/time.h b/include/linux/time.h index eceb1a59b078..8ea8dea713c7 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -92,6 +92,7 @@ extern struct timespec xtime; | |||
92 | extern struct timespec wall_to_monotonic; | 92 | extern struct timespec wall_to_monotonic; |
93 | extern seqlock_t xtime_lock __attribute__((weak)); | 93 | extern seqlock_t xtime_lock __attribute__((weak)); |
94 | 94 | ||
95 | extern unsigned long read_persistent_clock(void); | ||
95 | void timekeeping_init(void); | 96 | void timekeeping_init(void); |
96 | 97 | ||
97 | static inline unsigned long get_seconds(void) | 98 | static inline unsigned long get_seconds(void) |
diff --git a/include/linux/timer.h b/include/linux/timer.h index fb5edaaf0ebd..719113b652dd 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _LINUX_TIMER_H | 2 | #define _LINUX_TIMER_H |
3 | 3 | ||
4 | #include <linux/list.h> | 4 | #include <linux/list.h> |
5 | #include <linux/ktime.h> | ||
5 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
6 | #include <linux/stddef.h> | 7 | #include <linux/stddef.h> |
7 | 8 | ||
@@ -15,6 +16,11 @@ struct timer_list { | |||
15 | unsigned long data; | 16 | unsigned long data; |
16 | 17 | ||
17 | struct tvec_t_base_s *base; | 18 | struct tvec_t_base_s *base; |
19 | #ifdef CONFIG_TIMER_STATS | ||
20 | void *start_site; | ||
21 | char start_comm[16]; | ||
22 | int start_pid; | ||
23 | #endif | ||
18 | }; | 24 | }; |
19 | 25 | ||
20 | extern struct tvec_t_base_s boot_tvec_bases; | 26 | extern struct tvec_t_base_s boot_tvec_bases; |
@@ -61,7 +67,65 @@ extern int del_timer(struct timer_list * timer); | |||
61 | extern int __mod_timer(struct timer_list *timer, unsigned long expires); | 67 | extern int __mod_timer(struct timer_list *timer, unsigned long expires); |
62 | extern int mod_timer(struct timer_list *timer, unsigned long expires); | 68 | extern int mod_timer(struct timer_list *timer, unsigned long expires); |
63 | 69 | ||
70 | /* | ||
71 | * Return when the next timer-wheel timeout occurs (in absolute jiffies), | ||
72 | * locks the timer base: | ||
73 | */ | ||
64 | extern unsigned long next_timer_interrupt(void); | 74 | extern unsigned long next_timer_interrupt(void); |
75 | /* | ||
76 | * Return when the next timer-wheel timeout occurs (in absolute jiffies), | ||
77 | * locks the timer base and does the comparison against the given | ||
78 | * jiffie. | ||
79 | */ | ||
80 | extern unsigned long get_next_timer_interrupt(unsigned long now); | ||
81 | |||
82 | /* | ||
83 | * Timer-statistics info: | ||
84 | */ | ||
85 | #ifdef CONFIG_TIMER_STATS | ||
86 | |||
87 | extern void init_timer_stats(void); | ||
88 | |||
89 | extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | ||
90 | void *timerf, char * comm); | ||
91 | |||
92 | static inline void timer_stats_account_timer(struct timer_list *timer) | ||
93 | { | ||
94 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
95 | timer->function, timer->start_comm); | ||
96 | } | ||
97 | |||
98 | extern void __timer_stats_timer_set_start_info(struct timer_list *timer, | ||
99 | void *addr); | ||
100 | |||
101 | static inline void timer_stats_timer_set_start_info(struct timer_list *timer) | ||
102 | { | ||
103 | __timer_stats_timer_set_start_info(timer, __builtin_return_address(0)); | ||
104 | } | ||
105 | |||
106 | static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) | ||
107 | { | ||
108 | timer->start_site = NULL; | ||
109 | } | ||
110 | #else | ||
111 | static inline void init_timer_stats(void) | ||
112 | { | ||
113 | } | ||
114 | |||
115 | static inline void timer_stats_account_timer(struct timer_list *timer) | ||
116 | { | ||
117 | } | ||
118 | |||
119 | static inline void timer_stats_timer_set_start_info(struct timer_list *timer) | ||
120 | { | ||
121 | } | ||
122 | |||
123 | static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) | ||
124 | { | ||
125 | } | ||
126 | #endif | ||
127 | |||
128 | extern void delayed_work_timer_fn(unsigned long __data); | ||
65 | 129 | ||
66 | /** | 130 | /** |
67 | * add_timer - start a timer | 131 | * add_timer - start a timer |
@@ -96,7 +160,7 @@ static inline void add_timer(struct timer_list *timer) | |||
96 | extern void init_timers(void); | 160 | extern void init_timers(void); |
97 | extern void run_local_timers(void); | 161 | extern void run_local_timers(void); |
98 | struct hrtimer; | 162 | struct hrtimer; |
99 | extern int it_real_fn(struct hrtimer *); | 163 | extern enum hrtimer_restart it_real_fn(struct hrtimer *); |
100 | 164 | ||
101 | unsigned long __round_jiffies(unsigned long j, int cpu); | 165 | unsigned long __round_jiffies(unsigned long j, int cpu); |
102 | unsigned long __round_jiffies_relative(unsigned long j, int cpu); | 166 | unsigned long __round_jiffies_relative(unsigned long j, int cpu); |
diff --git a/include/linux/timex.h b/include/linux/timex.h index 9a24e500c311..da929dbbea2a 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h | |||
@@ -286,6 +286,13 @@ static inline void time_interpolator_update(long delta_nsec) | |||
286 | 286 | ||
287 | #define TICK_LENGTH_SHIFT 32 | 287 | #define TICK_LENGTH_SHIFT 32 |
288 | 288 | ||
289 | #ifdef CONFIG_NO_HZ | ||
290 | #define NTP_INTERVAL_FREQ (2) | ||
291 | #else | ||
292 | #define NTP_INTERVAL_FREQ (HZ) | ||
293 | #endif | ||
294 | #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) | ||
295 | |||
289 | /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ | 296 | /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ |
290 | extern u64 current_tick_length(void); | 297 | extern u64 current_tick_length(void); |
291 | 298 | ||
diff --git a/init/main.c b/init/main.c index 2421e1544127..953500b02ac4 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/cpuset.h> | 41 | #include <linux/cpuset.h> |
42 | #include <linux/efi.h> | 42 | #include <linux/efi.h> |
43 | #include <linux/tick.h> | ||
43 | #include <linux/taskstats_kern.h> | 44 | #include <linux/taskstats_kern.h> |
44 | #include <linux/delayacct.h> | 45 | #include <linux/delayacct.h> |
45 | #include <linux/unistd.h> | 46 | #include <linux/unistd.h> |
@@ -515,6 +516,7 @@ asmlinkage void __init start_kernel(void) | |||
515 | * enable them | 516 | * enable them |
516 | */ | 517 | */ |
517 | lock_kernel(); | 518 | lock_kernel(); |
519 | tick_init(); | ||
518 | boot_cpu_init(); | 520 | boot_cpu_init(); |
519 | page_address_init(); | 521 | page_address_init(); |
520 | printk(KERN_NOTICE); | 522 | printk(KERN_NOTICE); |
diff --git a/kernel/fork.c b/kernel/fork.c index 0b6293d94d96..d154cc786489 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -858,7 +858,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
858 | init_sigpending(&sig->shared_pending); | 858 | init_sigpending(&sig->shared_pending); |
859 | INIT_LIST_HEAD(&sig->posix_timers); | 859 | INIT_LIST_HEAD(&sig->posix_timers); |
860 | 860 | ||
861 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL); | 861 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
862 | sig->it_real_incr.tv64 = 0; | 862 | sig->it_real_incr.tv64 = 0; |
863 | sig->real_timer.function = it_real_fn; | 863 | sig->real_timer.function = it_real_fn; |
864 | sig->tsk = tsk; | 864 | sig->tsk = tsk; |
diff --git a/kernel/futex.c b/kernel/futex.c index 5a737de857d3..e749e7df14b1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1134,7 +1134,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1134 | 1134 | ||
1135 | if (sec != MAX_SCHEDULE_TIMEOUT) { | 1135 | if (sec != MAX_SCHEDULE_TIMEOUT) { |
1136 | to = &timeout; | 1136 | to = &timeout; |
1137 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS); | 1137 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
1138 | hrtimer_init_sleeper(to, current); | 1138 | hrtimer_init_sleeper(to, current); |
1139 | to->timer.expires = ktime_set(sec, nsec); | 1139 | to->timer.expires = ktime_set(sec, nsec); |
1140 | } | 1140 | } |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f44e499e8fca..476cb0c0b4a4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1,8 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * linux/kernel/hrtimer.c | 2 | * linux/kernel/hrtimer.c |
3 | * | 3 | * |
4 | * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar | 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | ||
6 | * | 7 | * |
7 | * High-resolution kernel timers | 8 | * High-resolution kernel timers |
8 | * | 9 | * |
@@ -31,12 +32,17 @@ | |||
31 | */ | 32 | */ |
32 | 33 | ||
33 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/irq.h> | ||
34 | #include <linux/module.h> | 36 | #include <linux/module.h> |
35 | #include <linux/percpu.h> | 37 | #include <linux/percpu.h> |
36 | #include <linux/hrtimer.h> | 38 | #include <linux/hrtimer.h> |
37 | #include <linux/notifier.h> | 39 | #include <linux/notifier.h> |
38 | #include <linux/syscalls.h> | 40 | #include <linux/syscalls.h> |
41 | #include <linux/kallsyms.h> | ||
39 | #include <linux/interrupt.h> | 42 | #include <linux/interrupt.h> |
43 | #include <linux/tick.h> | ||
44 | #include <linux/seq_file.h> | ||
45 | #include <linux/err.h> | ||
40 | 46 | ||
41 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
42 | 48 | ||
@@ -45,7 +51,7 @@ | |||
45 | * | 51 | * |
46 | * returns the time in ktime_t format | 52 | * returns the time in ktime_t format |
47 | */ | 53 | */ |
48 | static ktime_t ktime_get(void) | 54 | ktime_t ktime_get(void) |
49 | { | 55 | { |
50 | struct timespec now; | 56 | struct timespec now; |
51 | 57 | ||
@@ -59,7 +65,7 @@ static ktime_t ktime_get(void) | |||
59 | * | 65 | * |
60 | * returns the time in ktime_t format | 66 | * returns the time in ktime_t format |
61 | */ | 67 | */ |
62 | static ktime_t ktime_get_real(void) | 68 | ktime_t ktime_get_real(void) |
63 | { | 69 | { |
64 | struct timespec now; | 70 | struct timespec now; |
65 | 71 | ||
@@ -79,21 +85,22 @@ EXPORT_SYMBOL_GPL(ktime_get_real); | |||
79 | * This ensures that we capture erroneous accesses to these clock ids | 85 | * This ensures that we capture erroneous accesses to these clock ids |
80 | * rather than moving them into the range of valid clock id's. | 86 | * rather than moving them into the range of valid clock id's. |
81 | */ | 87 | */ |
82 | 88 | DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = | |
83 | #define MAX_HRTIMER_BASES 2 | ||
84 | |||
85 | static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = | ||
86 | { | 89 | { |
90 | |||
91 | .clock_base = | ||
87 | { | 92 | { |
88 | .index = CLOCK_REALTIME, | 93 | { |
89 | .get_time = &ktime_get_real, | 94 | .index = CLOCK_REALTIME, |
90 | .resolution = KTIME_REALTIME_RES, | 95 | .get_time = &ktime_get_real, |
91 | }, | 96 | .resolution = KTIME_LOW_RES, |
92 | { | 97 | }, |
93 | .index = CLOCK_MONOTONIC, | 98 | { |
94 | .get_time = &ktime_get, | 99 | .index = CLOCK_MONOTONIC, |
95 | .resolution = KTIME_MONOTONIC_RES, | 100 | .get_time = &ktime_get, |
96 | }, | 101 | .resolution = KTIME_LOW_RES, |
102 | }, | ||
103 | } | ||
97 | }; | 104 | }; |
98 | 105 | ||
99 | /** | 106 | /** |
@@ -125,20 +132,35 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); | |||
125 | * Get the coarse grained time at the softirq based on xtime and | 132 | * Get the coarse grained time at the softirq based on xtime and |
126 | * wall_to_monotonic. | 133 | * wall_to_monotonic. |
127 | */ | 134 | */ |
128 | static void hrtimer_get_softirq_time(struct hrtimer_base *base) | 135 | static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) |
129 | { | 136 | { |
130 | ktime_t xtim, tomono; | 137 | ktime_t xtim, tomono; |
138 | struct timespec xts; | ||
131 | unsigned long seq; | 139 | unsigned long seq; |
132 | 140 | ||
133 | do { | 141 | do { |
134 | seq = read_seqbegin(&xtime_lock); | 142 | seq = read_seqbegin(&xtime_lock); |
135 | xtim = timespec_to_ktime(xtime); | 143 | #ifdef CONFIG_NO_HZ |
136 | tomono = timespec_to_ktime(wall_to_monotonic); | 144 | getnstimeofday(&xts); |
137 | 145 | #else | |
146 | xts = xtime; | ||
147 | #endif | ||
138 | } while (read_seqretry(&xtime_lock, seq)); | 148 | } while (read_seqretry(&xtime_lock, seq)); |
139 | 149 | ||
140 | base[CLOCK_REALTIME].softirq_time = xtim; | 150 | xtim = timespec_to_ktime(xts); |
141 | base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); | 151 | tomono = timespec_to_ktime(wall_to_monotonic); |
152 | base->clock_base[CLOCK_REALTIME].softirq_time = xtim; | ||
153 | base->clock_base[CLOCK_MONOTONIC].softirq_time = | ||
154 | ktime_add(xtim, tomono); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Helper function to check, whether the timer is running the callback | ||
159 | * function | ||
160 | */ | ||
161 | static inline int hrtimer_callback_running(struct hrtimer *timer) | ||
162 | { | ||
163 | return timer->state & HRTIMER_STATE_CALLBACK; | ||
142 | } | 164 | } |
143 | 165 | ||
144 | /* | 166 | /* |
@@ -147,8 +169,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_base *base) | |||
147 | */ | 169 | */ |
148 | #ifdef CONFIG_SMP | 170 | #ifdef CONFIG_SMP |
149 | 171 | ||
150 | #define set_curr_timer(b, t) do { (b)->curr_timer = (t); } while (0) | ||
151 | |||
152 | /* | 172 | /* |
153 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock | 173 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
154 | * means that all timers which are tied to this base via timer->base are | 174 | * means that all timers which are tied to this base via timer->base are |
@@ -161,19 +181,20 @@ static void hrtimer_get_softirq_time(struct hrtimer_base *base) | |||
161 | * possible to set timer->base = NULL and drop the lock: the timer remains | 181 | * possible to set timer->base = NULL and drop the lock: the timer remains |
162 | * locked. | 182 | * locked. |
163 | */ | 183 | */ |
164 | static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, | 184 | static |
165 | unsigned long *flags) | 185 | struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, |
186 | unsigned long *flags) | ||
166 | { | 187 | { |
167 | struct hrtimer_base *base; | 188 | struct hrtimer_clock_base *base; |
168 | 189 | ||
169 | for (;;) { | 190 | for (;;) { |
170 | base = timer->base; | 191 | base = timer->base; |
171 | if (likely(base != NULL)) { | 192 | if (likely(base != NULL)) { |
172 | spin_lock_irqsave(&base->lock, *flags); | 193 | spin_lock_irqsave(&base->cpu_base->lock, *flags); |
173 | if (likely(base == timer->base)) | 194 | if (likely(base == timer->base)) |
174 | return base; | 195 | return base; |
175 | /* The timer has migrated to another CPU: */ | 196 | /* The timer has migrated to another CPU: */ |
176 | spin_unlock_irqrestore(&base->lock, *flags); | 197 | spin_unlock_irqrestore(&base->cpu_base->lock, *flags); |
177 | } | 198 | } |
178 | cpu_relax(); | 199 | cpu_relax(); |
179 | } | 200 | } |
@@ -182,12 +203,14 @@ static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
182 | /* | 203 | /* |
183 | * Switch the timer base to the current CPU when possible. | 204 | * Switch the timer base to the current CPU when possible. |
184 | */ | 205 | */ |
185 | static inline struct hrtimer_base * | 206 | static inline struct hrtimer_clock_base * |
186 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | 207 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) |
187 | { | 208 | { |
188 | struct hrtimer_base *new_base; | 209 | struct hrtimer_clock_base *new_base; |
210 | struct hrtimer_cpu_base *new_cpu_base; | ||
189 | 211 | ||
190 | new_base = &__get_cpu_var(hrtimer_bases)[base->index]; | 212 | new_cpu_base = &__get_cpu_var(hrtimer_bases); |
213 | new_base = &new_cpu_base->clock_base[base->index]; | ||
191 | 214 | ||
192 | if (base != new_base) { | 215 | if (base != new_base) { |
193 | /* | 216 | /* |
@@ -199,13 +222,13 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | |||
199 | * completed. There is no conflict as we hold the lock until | 222 | * completed. There is no conflict as we hold the lock until |
200 | * the timer is enqueued. | 223 | * the timer is enqueued. |
201 | */ | 224 | */ |
202 | if (unlikely(base->curr_timer == timer)) | 225 | if (unlikely(hrtimer_callback_running(timer))) |
203 | return base; | 226 | return base; |
204 | 227 | ||
205 | /* See the comment in lock_timer_base() */ | 228 | /* See the comment in lock_timer_base() */ |
206 | timer->base = NULL; | 229 | timer->base = NULL; |
207 | spin_unlock(&base->lock); | 230 | spin_unlock(&base->cpu_base->lock); |
208 | spin_lock(&new_base->lock); | 231 | spin_lock(&new_base->cpu_base->lock); |
209 | timer->base = new_base; | 232 | timer->base = new_base; |
210 | } | 233 | } |
211 | return new_base; | 234 | return new_base; |
@@ -213,19 +236,17 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | |||
213 | 236 | ||
214 | #else /* CONFIG_SMP */ | 237 | #else /* CONFIG_SMP */ |
215 | 238 | ||
216 | #define set_curr_timer(b, t) do { } while (0) | 239 | static inline struct hrtimer_clock_base * |
217 | |||
218 | static inline struct hrtimer_base * | ||
219 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 240 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
220 | { | 241 | { |
221 | struct hrtimer_base *base = timer->base; | 242 | struct hrtimer_clock_base *base = timer->base; |
222 | 243 | ||
223 | spin_lock_irqsave(&base->lock, *flags); | 244 | spin_lock_irqsave(&base->cpu_base->lock, *flags); |
224 | 245 | ||
225 | return base; | 246 | return base; |
226 | } | 247 | } |
227 | 248 | ||
228 | #define switch_hrtimer_base(t, b) (b) | 249 | # define switch_hrtimer_base(t, b) (b) |
229 | 250 | ||
230 | #endif /* !CONFIG_SMP */ | 251 | #endif /* !CONFIG_SMP */ |
231 | 252 | ||
@@ -256,15 +277,12 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) | |||
256 | 277 | ||
257 | return ktime_add(kt, tmp); | 278 | return ktime_add(kt, tmp); |
258 | } | 279 | } |
259 | |||
260 | #else /* CONFIG_KTIME_SCALAR */ | ||
261 | |||
262 | # endif /* !CONFIG_KTIME_SCALAR */ | 280 | # endif /* !CONFIG_KTIME_SCALAR */ |
263 | 281 | ||
264 | /* | 282 | /* |
265 | * Divide a ktime value by a nanosecond value | 283 | * Divide a ktime value by a nanosecond value |
266 | */ | 284 | */ |
267 | static unsigned long ktime_divns(const ktime_t kt, s64 div) | 285 | unsigned long ktime_divns(const ktime_t kt, s64 div) |
268 | { | 286 | { |
269 | u64 dclc, inc, dns; | 287 | u64 dclc, inc, dns; |
270 | int sft = 0; | 288 | int sft = 0; |
@@ -281,18 +299,311 @@ static unsigned long ktime_divns(const ktime_t kt, s64 div) | |||
281 | 299 | ||
282 | return (unsigned long) dclc; | 300 | return (unsigned long) dclc; |
283 | } | 301 | } |
284 | |||
285 | #else /* BITS_PER_LONG < 64 */ | ||
286 | # define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) | ||
287 | #endif /* BITS_PER_LONG >= 64 */ | 302 | #endif /* BITS_PER_LONG >= 64 */ |
288 | 303 | ||
304 | /* High resolution timer related functions */ | ||
305 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
306 | |||
307 | /* | ||
308 | * High resolution timer enabled ? | ||
309 | */ | ||
310 | static int hrtimer_hres_enabled __read_mostly = 1; | ||
311 | |||
312 | /* | ||
313 | * Enable / Disable high resolution mode | ||
314 | */ | ||
315 | static int __init setup_hrtimer_hres(char *str) | ||
316 | { | ||
317 | if (!strcmp(str, "off")) | ||
318 | hrtimer_hres_enabled = 0; | ||
319 | else if (!strcmp(str, "on")) | ||
320 | hrtimer_hres_enabled = 1; | ||
321 | else | ||
322 | return 0; | ||
323 | return 1; | ||
324 | } | ||
325 | |||
326 | __setup("highres=", setup_hrtimer_hres); | ||
327 | |||
328 | /* | ||
329 | * hrtimer_high_res_enabled - query, if the highres mode is enabled | ||
330 | */ | ||
331 | static inline int hrtimer_is_hres_enabled(void) | ||
332 | { | ||
333 | return hrtimer_hres_enabled; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Is the high resolution mode active ? | ||
338 | */ | ||
339 | static inline int hrtimer_hres_active(void) | ||
340 | { | ||
341 | return __get_cpu_var(hrtimer_bases).hres_active; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * Reprogram the event source with checking both queues for the | ||
346 | * next event | ||
347 | * Called with interrupts disabled and base->lock held | ||
348 | */ | ||
349 | static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) | ||
350 | { | ||
351 | int i; | ||
352 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
353 | ktime_t expires; | ||
354 | |||
355 | cpu_base->expires_next.tv64 = KTIME_MAX; | ||
356 | |||
357 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | ||
358 | struct hrtimer *timer; | ||
359 | |||
360 | if (!base->first) | ||
361 | continue; | ||
362 | timer = rb_entry(base->first, struct hrtimer, node); | ||
363 | expires = ktime_sub(timer->expires, base->offset); | ||
364 | if (expires.tv64 < cpu_base->expires_next.tv64) | ||
365 | cpu_base->expires_next = expires; | ||
366 | } | ||
367 | |||
368 | if (cpu_base->expires_next.tv64 != KTIME_MAX) | ||
369 | tick_program_event(cpu_base->expires_next, 1); | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Shared reprogramming for clock_realtime and clock_monotonic | ||
374 | * | ||
375 | * When a timer is enqueued and expires earlier than the already enqueued | ||
376 | * timers, we have to check, whether it expires earlier than the timer for | ||
377 | * which the clock event device was armed. | ||
378 | * | ||
379 | * Called with interrupts disabled and base->cpu_base.lock held | ||
380 | */ | ||
381 | static int hrtimer_reprogram(struct hrtimer *timer, | ||
382 | struct hrtimer_clock_base *base) | ||
383 | { | ||
384 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | ||
385 | ktime_t expires = ktime_sub(timer->expires, base->offset); | ||
386 | int res; | ||
387 | |||
388 | /* | ||
389 | * When the callback is running, we do not reprogram the clock event | ||
390 | * device. The timer callback is either running on a different CPU or | ||
391 | * the callback is executed in the hrtimer_interupt context. The | ||
392 | * reprogramming is handled either by the softirq, which called the | ||
393 | * callback or at the end of the hrtimer_interrupt. | ||
394 | */ | ||
395 | if (hrtimer_callback_running(timer)) | ||
396 | return 0; | ||
397 | |||
398 | if (expires.tv64 >= expires_next->tv64) | ||
399 | return 0; | ||
400 | |||
401 | /* | ||
402 | * Clockevents returns -ETIME, when the event was in the past. | ||
403 | */ | ||
404 | res = tick_program_event(expires, 0); | ||
405 | if (!IS_ERR_VALUE(res)) | ||
406 | *expires_next = expires; | ||
407 | return res; | ||
408 | } | ||
409 | |||
410 | |||
411 | /* | ||
412 | * Retrigger next event is called after clock was set | ||
413 | * | ||
414 | * Called with interrupts disabled via on_each_cpu() | ||
415 | */ | ||
416 | static void retrigger_next_event(void *arg) | ||
417 | { | ||
418 | struct hrtimer_cpu_base *base; | ||
419 | struct timespec realtime_offset; | ||
420 | unsigned long seq; | ||
421 | |||
422 | if (!hrtimer_hres_active()) | ||
423 | return; | ||
424 | |||
425 | do { | ||
426 | seq = read_seqbegin(&xtime_lock); | ||
427 | set_normalized_timespec(&realtime_offset, | ||
428 | -wall_to_monotonic.tv_sec, | ||
429 | -wall_to_monotonic.tv_nsec); | ||
430 | } while (read_seqretry(&xtime_lock, seq)); | ||
431 | |||
432 | base = &__get_cpu_var(hrtimer_bases); | ||
433 | |||
434 | /* Adjust CLOCK_REALTIME offset */ | ||
435 | spin_lock(&base->lock); | ||
436 | base->clock_base[CLOCK_REALTIME].offset = | ||
437 | timespec_to_ktime(realtime_offset); | ||
438 | |||
439 | hrtimer_force_reprogram(base); | ||
440 | spin_unlock(&base->lock); | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * Clock realtime was set | ||
445 | * | ||
446 | * Change the offset of the realtime clock vs. the monotonic | ||
447 | * clock. | ||
448 | * | ||
449 | * We might have to reprogram the high resolution timer interrupt. On | ||
450 | * SMP we call the architecture specific code to retrigger _all_ high | ||
451 | * resolution timer interrupts. On UP we just disable interrupts and | ||
452 | * call the high resolution interrupt code. | ||
453 | */ | ||
454 | void clock_was_set(void) | ||
455 | { | ||
456 | /* Retrigger the CPU local events everywhere */ | ||
457 | on_each_cpu(retrigger_next_event, NULL, 0, 1); | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Check, whether the timer is on the callback pending list | ||
462 | */ | ||
463 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | ||
464 | { | ||
465 | return timer->state & HRTIMER_STATE_PENDING; | ||
466 | } | ||
467 | |||
468 | /* | ||
469 | * Remove a timer from the callback pending list | ||
470 | */ | ||
471 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) | ||
472 | { | ||
473 | list_del_init(&timer->cb_entry); | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Initialize the high resolution related parts of cpu_base | ||
478 | */ | ||
479 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) | ||
480 | { | ||
481 | base->expires_next.tv64 = KTIME_MAX; | ||
482 | base->hres_active = 0; | ||
483 | INIT_LIST_HEAD(&base->cb_pending); | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Initialize the high resolution related parts of a hrtimer | ||
488 | */ | ||
489 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) | ||
490 | { | ||
491 | INIT_LIST_HEAD(&timer->cb_entry); | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * When High resolution timers are active, try to reprogram. Note, that in case | ||
496 | * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry | ||
497 | * check happens. The timer gets enqueued into the rbtree. The reprogramming | ||
498 | * and expiry check is done in the hrtimer_interrupt or in the softirq. | ||
499 | */ | ||
500 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | ||
501 | struct hrtimer_clock_base *base) | ||
502 | { | ||
503 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { | ||
504 | |||
505 | /* Timer is expired, act upon the callback mode */ | ||
506 | switch(timer->cb_mode) { | ||
507 | case HRTIMER_CB_IRQSAFE_NO_RESTART: | ||
508 | /* | ||
509 | * We can call the callback from here. No restart | ||
510 | * happens, so no danger of recursion | ||
511 | */ | ||
512 | BUG_ON(timer->function(timer) != HRTIMER_NORESTART); | ||
513 | return 1; | ||
514 | case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: | ||
515 | /* | ||
516 | * This is solely for the sched tick emulation with | ||
517 | * dynamic tick support to ensure that we do not | ||
518 | * restart the tick right on the edge and end up with | ||
519 | * the tick timer in the softirq ! The calling site | ||
520 | * takes care of this. | ||
521 | */ | ||
522 | return 1; | ||
523 | case HRTIMER_CB_IRQSAFE: | ||
524 | case HRTIMER_CB_SOFTIRQ: | ||
525 | /* | ||
526 | * Move everything else into the softirq pending list ! | ||
527 | */ | ||
528 | list_add_tail(&timer->cb_entry, | ||
529 | &base->cpu_base->cb_pending); | ||
530 | timer->state = HRTIMER_STATE_PENDING; | ||
531 | raise_softirq(HRTIMER_SOFTIRQ); | ||
532 | return 1; | ||
533 | default: | ||
534 | BUG(); | ||
535 | } | ||
536 | } | ||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * Switch to high resolution mode | ||
542 | */ | ||
543 | static void hrtimer_switch_to_hres(void) | ||
544 | { | ||
545 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
546 | unsigned long flags; | ||
547 | |||
548 | if (base->hres_active) | ||
549 | return; | ||
550 | |||
551 | local_irq_save(flags); | ||
552 | |||
553 | if (tick_init_highres()) { | ||
554 | local_irq_restore(flags); | ||
555 | return; | ||
556 | } | ||
557 | base->hres_active = 1; | ||
558 | base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; | ||
559 | base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES; | ||
560 | |||
561 | tick_setup_sched_timer(); | ||
562 | |||
563 | /* "Retrigger" the interrupt to get things going */ | ||
564 | retrigger_next_event(NULL); | ||
565 | local_irq_restore(flags); | ||
566 | printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", | ||
567 | smp_processor_id()); | ||
568 | } | ||
569 | |||
570 | #else | ||
571 | |||
572 | static inline int hrtimer_hres_active(void) { return 0; } | ||
573 | static inline int hrtimer_is_hres_enabled(void) { return 0; } | ||
574 | static inline void hrtimer_switch_to_hres(void) { } | ||
575 | static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } | ||
576 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | ||
577 | struct hrtimer_clock_base *base) | ||
578 | { | ||
579 | return 0; | ||
580 | } | ||
581 | static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } | ||
582 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } | ||
583 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | ||
584 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | ||
585 | |||
586 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
587 | |||
588 | #ifdef CONFIG_TIMER_STATS | ||
589 | void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | ||
590 | { | ||
591 | if (timer->start_site) | ||
592 | return; | ||
593 | |||
594 | timer->start_site = addr; | ||
595 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | ||
596 | timer->start_pid = current->pid; | ||
597 | } | ||
598 | #endif | ||
599 | |||
289 | /* | 600 | /* |
290 | * Counterpart to lock_timer_base above: | 601 | * Counterpart to lock_timer_base above: |
291 | */ | 602 | */ |
292 | static inline | 603 | static inline |
293 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 604 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
294 | { | 605 | { |
295 | spin_unlock_irqrestore(&timer->base->lock, *flags); | 606 | spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); |
296 | } | 607 | } |
297 | 608 | ||
298 | /** | 609 | /** |
@@ -342,7 +653,8 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
342 | * The timer is inserted in expiry order. Insertion into the | 653 | * The timer is inserted in expiry order. Insertion into the |
343 | * red black tree is O(log(n)). Must hold the base lock. | 654 | * red black tree is O(log(n)). Must hold the base lock. |
344 | */ | 655 | */ |
345 | static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 656 | static void enqueue_hrtimer(struct hrtimer *timer, |
657 | struct hrtimer_clock_base *base, int reprogram) | ||
346 | { | 658 | { |
347 | struct rb_node **link = &base->active.rb_node; | 659 | struct rb_node **link = &base->active.rb_node; |
348 | struct rb_node *parent = NULL; | 660 | struct rb_node *parent = NULL; |
@@ -368,39 +680,85 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
368 | * Insert the timer to the rbtree and check whether it | 680 | * Insert the timer to the rbtree and check whether it |
369 | * replaces the first pending timer | 681 | * replaces the first pending timer |
370 | */ | 682 | */ |
371 | rb_link_node(&timer->node, parent, link); | ||
372 | rb_insert_color(&timer->node, &base->active); | ||
373 | |||
374 | if (!base->first || timer->expires.tv64 < | 683 | if (!base->first || timer->expires.tv64 < |
375 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) | 684 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) { |
685 | /* | ||
686 | * Reprogram the clock event device. When the timer is already | ||
687 | * expired hrtimer_enqueue_reprogram has either called the | ||
688 | * callback or added it to the pending list and raised the | ||
689 | * softirq. | ||
690 | * | ||
691 | * This is a NOP for !HIGHRES | ||
692 | */ | ||
693 | if (reprogram && hrtimer_enqueue_reprogram(timer, base)) | ||
694 | return; | ||
695 | |||
376 | base->first = &timer->node; | 696 | base->first = &timer->node; |
697 | } | ||
698 | |||
699 | rb_link_node(&timer->node, parent, link); | ||
700 | rb_insert_color(&timer->node, &base->active); | ||
701 | /* | ||
702 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the | ||
703 | * state of a possibly running callback. | ||
704 | */ | ||
705 | timer->state |= HRTIMER_STATE_ENQUEUED; | ||
377 | } | 706 | } |
378 | 707 | ||
379 | /* | 708 | /* |
380 | * __remove_hrtimer - internal function to remove a timer | 709 | * __remove_hrtimer - internal function to remove a timer |
381 | * | 710 | * |
382 | * Caller must hold the base lock. | 711 | * Caller must hold the base lock. |
712 | * | ||
713 | * High resolution timer mode reprograms the clock event device when the | ||
714 | * timer is the one which expires next. The caller can disable this by setting | ||
715 | * reprogram to zero. This is useful, when the context does a reprogramming | ||
716 | * anyway (e.g. timer interrupt) | ||
383 | */ | 717 | */ |
384 | static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 718 | static void __remove_hrtimer(struct hrtimer *timer, |
719 | struct hrtimer_clock_base *base, | ||
720 | unsigned long newstate, int reprogram) | ||
385 | { | 721 | { |
386 | /* | 722 | /* High res. callback list. NOP for !HIGHRES */ |
387 | * Remove the timer from the rbtree and replace the | 723 | if (hrtimer_cb_pending(timer)) |
388 | * first entry pointer if necessary. | 724 | hrtimer_remove_cb_pending(timer); |
389 | */ | 725 | else { |
390 | if (base->first == &timer->node) | 726 | /* |
391 | base->first = rb_next(&timer->node); | 727 | * Remove the timer from the rbtree and replace the |
392 | rb_erase(&timer->node, &base->active); | 728 | * first entry pointer if necessary. |
393 | rb_set_parent(&timer->node, &timer->node); | 729 | */ |
730 | if (base->first == &timer->node) { | ||
731 | base->first = rb_next(&timer->node); | ||
732 | /* Reprogram the clock event device. if enabled */ | ||
733 | if (reprogram && hrtimer_hres_active()) | ||
734 | hrtimer_force_reprogram(base->cpu_base); | ||
735 | } | ||
736 | rb_erase(&timer->node, &base->active); | ||
737 | } | ||
738 | timer->state = newstate; | ||
394 | } | 739 | } |
395 | 740 | ||
396 | /* | 741 | /* |
397 | * remove hrtimer, called with base lock held | 742 | * remove hrtimer, called with base lock held |
398 | */ | 743 | */ |
399 | static inline int | 744 | static inline int |
400 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 745 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) |
401 | { | 746 | { |
402 | if (hrtimer_active(timer)) { | 747 | if (hrtimer_is_queued(timer)) { |
403 | __remove_hrtimer(timer, base); | 748 | int reprogram; |
749 | |||
750 | /* | ||
751 | * Remove the timer and force reprogramming when high | ||
752 | * resolution mode is active and the timer is on the current | ||
753 | * CPU. If we remove a timer on another CPU, reprogramming is | ||
754 | * skipped. The interrupt event on this CPU is fired and | ||
755 | * reprogramming happens in the interrupt handler. This is a | ||
756 | * rare case and less expensive than a smp call. | ||
757 | */ | ||
758 | timer_stats_hrtimer_clear_start_info(timer); | ||
759 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); | ||
760 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, | ||
761 | reprogram); | ||
404 | return 1; | 762 | return 1; |
405 | } | 763 | } |
406 | return 0; | 764 | return 0; |
@@ -419,7 +777,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
419 | int | 777 | int |
420 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | 778 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) |
421 | { | 779 | { |
422 | struct hrtimer_base *base, *new_base; | 780 | struct hrtimer_clock_base *base, *new_base; |
423 | unsigned long flags; | 781 | unsigned long flags; |
424 | int ret; | 782 | int ret; |
425 | 783 | ||
@@ -431,7 +789,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
431 | /* Switch the timer base, if necessary: */ | 789 | /* Switch the timer base, if necessary: */ |
432 | new_base = switch_hrtimer_base(timer, base); | 790 | new_base = switch_hrtimer_base(timer, base); |
433 | 791 | ||
434 | if (mode == HRTIMER_REL) { | 792 | if (mode == HRTIMER_MODE_REL) { |
435 | tim = ktime_add(tim, new_base->get_time()); | 793 | tim = ktime_add(tim, new_base->get_time()); |
436 | /* | 794 | /* |
437 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 795 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
@@ -446,7 +804,9 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
446 | } | 804 | } |
447 | timer->expires = tim; | 805 | timer->expires = tim; |
448 | 806 | ||
449 | enqueue_hrtimer(timer, new_base); | 807 | timer_stats_hrtimer_set_start_info(timer); |
808 | |||
809 | enqueue_hrtimer(timer, new_base, base == new_base); | ||
450 | 810 | ||
451 | unlock_hrtimer_base(timer, &flags); | 811 | unlock_hrtimer_base(timer, &flags); |
452 | 812 | ||
@@ -466,13 +826,13 @@ EXPORT_SYMBOL_GPL(hrtimer_start); | |||
466 | */ | 826 | */ |
467 | int hrtimer_try_to_cancel(struct hrtimer *timer) | 827 | int hrtimer_try_to_cancel(struct hrtimer *timer) |
468 | { | 828 | { |
469 | struct hrtimer_base *base; | 829 | struct hrtimer_clock_base *base; |
470 | unsigned long flags; | 830 | unsigned long flags; |
471 | int ret = -1; | 831 | int ret = -1; |
472 | 832 | ||
473 | base = lock_hrtimer_base(timer, &flags); | 833 | base = lock_hrtimer_base(timer, &flags); |
474 | 834 | ||
475 | if (base->curr_timer != timer) | 835 | if (!hrtimer_callback_running(timer)) |
476 | ret = remove_hrtimer(timer, base); | 836 | ret = remove_hrtimer(timer, base); |
477 | 837 | ||
478 | unlock_hrtimer_base(timer, &flags); | 838 | unlock_hrtimer_base(timer, &flags); |
@@ -508,19 +868,19 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); | |||
508 | */ | 868 | */ |
509 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | 869 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) |
510 | { | 870 | { |
511 | struct hrtimer_base *base; | 871 | struct hrtimer_clock_base *base; |
512 | unsigned long flags; | 872 | unsigned long flags; |
513 | ktime_t rem; | 873 | ktime_t rem; |
514 | 874 | ||
515 | base = lock_hrtimer_base(timer, &flags); | 875 | base = lock_hrtimer_base(timer, &flags); |
516 | rem = ktime_sub(timer->expires, timer->base->get_time()); | 876 | rem = ktime_sub(timer->expires, base->get_time()); |
517 | unlock_hrtimer_base(timer, &flags); | 877 | unlock_hrtimer_base(timer, &flags); |
518 | 878 | ||
519 | return rem; | 879 | return rem; |
520 | } | 880 | } |
521 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | 881 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); |
522 | 882 | ||
523 | #ifdef CONFIG_NO_IDLE_HZ | 883 | #if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) |
524 | /** | 884 | /** |
525 | * hrtimer_get_next_event - get the time until next expiry event | 885 | * hrtimer_get_next_event - get the time until next expiry event |
526 | * | 886 | * |
@@ -529,26 +889,31 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | |||
529 | */ | 889 | */ |
530 | ktime_t hrtimer_get_next_event(void) | 890 | ktime_t hrtimer_get_next_event(void) |
531 | { | 891 | { |
532 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); | 892 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
893 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
533 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; | 894 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; |
534 | unsigned long flags; | 895 | unsigned long flags; |
535 | int i; | 896 | int i; |
536 | 897 | ||
537 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { | 898 | spin_lock_irqsave(&cpu_base->lock, flags); |
538 | struct hrtimer *timer; | ||
539 | 899 | ||
540 | spin_lock_irqsave(&base->lock, flags); | 900 | if (!hrtimer_hres_active()) { |
541 | if (!base->first) { | 901 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { |
542 | spin_unlock_irqrestore(&base->lock, flags); | 902 | struct hrtimer *timer; |
543 | continue; | 903 | |
904 | if (!base->first) | ||
905 | continue; | ||
906 | |||
907 | timer = rb_entry(base->first, struct hrtimer, node); | ||
908 | delta.tv64 = timer->expires.tv64; | ||
909 | delta = ktime_sub(delta, base->get_time()); | ||
910 | if (delta.tv64 < mindelta.tv64) | ||
911 | mindelta.tv64 = delta.tv64; | ||
544 | } | 912 | } |
545 | timer = rb_entry(base->first, struct hrtimer, node); | ||
546 | delta.tv64 = timer->expires.tv64; | ||
547 | spin_unlock_irqrestore(&base->lock, flags); | ||
548 | delta = ktime_sub(delta, base->get_time()); | ||
549 | if (delta.tv64 < mindelta.tv64) | ||
550 | mindelta.tv64 = delta.tv64; | ||
551 | } | 913 | } |
914 | |||
915 | spin_unlock_irqrestore(&cpu_base->lock, flags); | ||
916 | |||
552 | if (mindelta.tv64 < 0) | 917 | if (mindelta.tv64 < 0) |
553 | mindelta.tv64 = 0; | 918 | mindelta.tv64 = 0; |
554 | return mindelta; | 919 | return mindelta; |
@@ -564,17 +929,23 @@ ktime_t hrtimer_get_next_event(void) | |||
564 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | 929 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
565 | enum hrtimer_mode mode) | 930 | enum hrtimer_mode mode) |
566 | { | 931 | { |
567 | struct hrtimer_base *bases; | 932 | struct hrtimer_cpu_base *cpu_base; |
568 | 933 | ||
569 | memset(timer, 0, sizeof(struct hrtimer)); | 934 | memset(timer, 0, sizeof(struct hrtimer)); |
570 | 935 | ||
571 | bases = __raw_get_cpu_var(hrtimer_bases); | 936 | cpu_base = &__raw_get_cpu_var(hrtimer_bases); |
572 | 937 | ||
573 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) | 938 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) |
574 | clock_id = CLOCK_MONOTONIC; | 939 | clock_id = CLOCK_MONOTONIC; |
575 | 940 | ||
576 | timer->base = &bases[clock_id]; | 941 | timer->base = &cpu_base->clock_base[clock_id]; |
577 | rb_set_parent(&timer->node, &timer->node); | 942 | hrtimer_init_timer_hres(timer); |
943 | |||
944 | #ifdef CONFIG_TIMER_STATS | ||
945 | timer->start_site = NULL; | ||
946 | timer->start_pid = -1; | ||
947 | memset(timer->start_comm, 0, TASK_COMM_LEN); | ||
948 | #endif | ||
578 | } | 949 | } |
579 | EXPORT_SYMBOL_GPL(hrtimer_init); | 950 | EXPORT_SYMBOL_GPL(hrtimer_init); |
580 | 951 | ||
@@ -588,21 +959,159 @@ EXPORT_SYMBOL_GPL(hrtimer_init); | |||
588 | */ | 959 | */ |
589 | int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) | 960 | int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) |
590 | { | 961 | { |
591 | struct hrtimer_base *bases; | 962 | struct hrtimer_cpu_base *cpu_base; |
592 | 963 | ||
593 | bases = __raw_get_cpu_var(hrtimer_bases); | 964 | cpu_base = &__raw_get_cpu_var(hrtimer_bases); |
594 | *tp = ktime_to_timespec(bases[which_clock].resolution); | 965 | *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution); |
595 | 966 | ||
596 | return 0; | 967 | return 0; |
597 | } | 968 | } |
598 | EXPORT_SYMBOL_GPL(hrtimer_get_res); | 969 | EXPORT_SYMBOL_GPL(hrtimer_get_res); |
599 | 970 | ||
971 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
972 | |||
973 | /* | ||
974 | * High resolution timer interrupt | ||
975 | * Called with interrupts disabled | ||
976 | */ | ||
977 | void hrtimer_interrupt(struct clock_event_device *dev) | ||
978 | { | ||
979 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
980 | struct hrtimer_clock_base *base; | ||
981 | ktime_t expires_next, now; | ||
982 | int i, raise = 0; | ||
983 | |||
984 | BUG_ON(!cpu_base->hres_active); | ||
985 | cpu_base->nr_events++; | ||
986 | dev->next_event.tv64 = KTIME_MAX; | ||
987 | |||
988 | retry: | ||
989 | now = ktime_get(); | ||
990 | |||
991 | expires_next.tv64 = KTIME_MAX; | ||
992 | |||
993 | base = cpu_base->clock_base; | ||
994 | |||
995 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | ||
996 | ktime_t basenow; | ||
997 | struct rb_node *node; | ||
998 | |||
999 | spin_lock(&cpu_base->lock); | ||
1000 | |||
1001 | basenow = ktime_add(now, base->offset); | ||
1002 | |||
1003 | while ((node = base->first)) { | ||
1004 | struct hrtimer *timer; | ||
1005 | |||
1006 | timer = rb_entry(node, struct hrtimer, node); | ||
1007 | |||
1008 | if (basenow.tv64 < timer->expires.tv64) { | ||
1009 | ktime_t expires; | ||
1010 | |||
1011 | expires = ktime_sub(timer->expires, | ||
1012 | base->offset); | ||
1013 | if (expires.tv64 < expires_next.tv64) | ||
1014 | expires_next = expires; | ||
1015 | break; | ||
1016 | } | ||
1017 | |||
1018 | /* Move softirq callbacks to the pending list */ | ||
1019 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { | ||
1020 | __remove_hrtimer(timer, base, | ||
1021 | HRTIMER_STATE_PENDING, 0); | ||
1022 | list_add_tail(&timer->cb_entry, | ||
1023 | &base->cpu_base->cb_pending); | ||
1024 | raise = 1; | ||
1025 | continue; | ||
1026 | } | ||
1027 | |||
1028 | __remove_hrtimer(timer, base, | ||
1029 | HRTIMER_STATE_CALLBACK, 0); | ||
1030 | timer_stats_account_hrtimer(timer); | ||
1031 | |||
1032 | /* | ||
1033 | * Note: We clear the CALLBACK bit after | ||
1034 | * enqueue_hrtimer to avoid reprogramming of | ||
1035 | * the event hardware. This happens at the end | ||
1036 | * of this function anyway. | ||
1037 | */ | ||
1038 | if (timer->function(timer) != HRTIMER_NORESTART) { | ||
1039 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | ||
1040 | enqueue_hrtimer(timer, base, 0); | ||
1041 | } | ||
1042 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1043 | } | ||
1044 | spin_unlock(&cpu_base->lock); | ||
1045 | base++; | ||
1046 | } | ||
1047 | |||
1048 | cpu_base->expires_next = expires_next; | ||
1049 | |||
1050 | /* Reprogramming necessary ? */ | ||
1051 | if (expires_next.tv64 != KTIME_MAX) { | ||
1052 | if (tick_program_event(expires_next, 0)) | ||
1053 | goto retry; | ||
1054 | } | ||
1055 | |||
1056 | /* Raise softirq ? */ | ||
1057 | if (raise) | ||
1058 | raise_softirq(HRTIMER_SOFTIRQ); | ||
1059 | } | ||
1060 | |||
1061 | static void run_hrtimer_softirq(struct softirq_action *h) | ||
1062 | { | ||
1063 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
1064 | |||
1065 | spin_lock_irq(&cpu_base->lock); | ||
1066 | |||
1067 | while (!list_empty(&cpu_base->cb_pending)) { | ||
1068 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1069 | struct hrtimer *timer; | ||
1070 | int restart; | ||
1071 | |||
1072 | timer = list_entry(cpu_base->cb_pending.next, | ||
1073 | struct hrtimer, cb_entry); | ||
1074 | |||
1075 | timer_stats_account_hrtimer(timer); | ||
1076 | |||
1077 | fn = timer->function; | ||
1078 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); | ||
1079 | spin_unlock_irq(&cpu_base->lock); | ||
1080 | |||
1081 | restart = fn(timer); | ||
1082 | |||
1083 | spin_lock_irq(&cpu_base->lock); | ||
1084 | |||
1085 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1086 | if (restart == HRTIMER_RESTART) { | ||
1087 | BUG_ON(hrtimer_active(timer)); | ||
1088 | /* | ||
1089 | * Enqueue the timer, allow reprogramming of the event | ||
1090 | * device | ||
1091 | */ | ||
1092 | enqueue_hrtimer(timer, timer->base, 1); | ||
1093 | } else if (hrtimer_active(timer)) { | ||
1094 | /* | ||
1095 | * If the timer was rearmed on another CPU, reprogram | ||
1096 | * the event device. | ||
1097 | */ | ||
1098 | if (timer->base->first == &timer->node) | ||
1099 | hrtimer_reprogram(timer, timer->base); | ||
1100 | } | ||
1101 | } | ||
1102 | spin_unlock_irq(&cpu_base->lock); | ||
1103 | } | ||
1104 | |||
1105 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
1106 | |||
600 | /* | 1107 | /* |
601 | * Expire the per base hrtimer-queue: | 1108 | * Expire the per base hrtimer-queue: |
602 | */ | 1109 | */ |
603 | static inline void run_hrtimer_queue(struct hrtimer_base *base) | 1110 | static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, |
1111 | int index) | ||
604 | { | 1112 | { |
605 | struct rb_node *node; | 1113 | struct rb_node *node; |
1114 | struct hrtimer_clock_base *base = &cpu_base->clock_base[index]; | ||
606 | 1115 | ||
607 | if (!base->first) | 1116 | if (!base->first) |
608 | return; | 1117 | return; |
@@ -610,53 +1119,72 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) | |||
610 | if (base->get_softirq_time) | 1119 | if (base->get_softirq_time) |
611 | base->softirq_time = base->get_softirq_time(); | 1120 | base->softirq_time = base->get_softirq_time(); |
612 | 1121 | ||
613 | spin_lock_irq(&base->lock); | 1122 | spin_lock_irq(&cpu_base->lock); |
614 | 1123 | ||
615 | while ((node = base->first)) { | 1124 | while ((node = base->first)) { |
616 | struct hrtimer *timer; | 1125 | struct hrtimer *timer; |
617 | int (*fn)(struct hrtimer *); | 1126 | enum hrtimer_restart (*fn)(struct hrtimer *); |
618 | int restart; | 1127 | int restart; |
619 | 1128 | ||
620 | timer = rb_entry(node, struct hrtimer, node); | 1129 | timer = rb_entry(node, struct hrtimer, node); |
621 | if (base->softirq_time.tv64 <= timer->expires.tv64) | 1130 | if (base->softirq_time.tv64 <= timer->expires.tv64) |
622 | break; | 1131 | break; |
623 | 1132 | ||
1133 | timer_stats_account_hrtimer(timer); | ||
1134 | |||
624 | fn = timer->function; | 1135 | fn = timer->function; |
625 | set_curr_timer(base, timer); | 1136 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); |
626 | __remove_hrtimer(timer, base); | 1137 | spin_unlock_irq(&cpu_base->lock); |
627 | spin_unlock_irq(&base->lock); | ||
628 | 1138 | ||
629 | restart = fn(timer); | 1139 | restart = fn(timer); |
630 | 1140 | ||
631 | spin_lock_irq(&base->lock); | 1141 | spin_lock_irq(&cpu_base->lock); |
632 | 1142 | ||
1143 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
633 | if (restart != HRTIMER_NORESTART) { | 1144 | if (restart != HRTIMER_NORESTART) { |
634 | BUG_ON(hrtimer_active(timer)); | 1145 | BUG_ON(hrtimer_active(timer)); |
635 | enqueue_hrtimer(timer, base); | 1146 | enqueue_hrtimer(timer, base, 0); |
636 | } | 1147 | } |
637 | } | 1148 | } |
638 | set_curr_timer(base, NULL); | 1149 | spin_unlock_irq(&cpu_base->lock); |
639 | spin_unlock_irq(&base->lock); | ||
640 | } | 1150 | } |
641 | 1151 | ||
642 | /* | 1152 | /* |
643 | * Called from timer softirq every jiffy, expire hrtimers: | 1153 | * Called from timer softirq every jiffy, expire hrtimers: |
1154 | * | ||
1155 | * For HRT its the fall back code to run the softirq in the timer | ||
1156 | * softirq context in case the hrtimer initialization failed or has | ||
1157 | * not been done yet. | ||
644 | */ | 1158 | */ |
645 | void hrtimer_run_queues(void) | 1159 | void hrtimer_run_queues(void) |
646 | { | 1160 | { |
647 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); | 1161 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
648 | int i; | 1162 | int i; |
649 | 1163 | ||
650 | hrtimer_get_softirq_time(base); | 1164 | if (hrtimer_hres_active()) |
1165 | return; | ||
1166 | |||
1167 | /* | ||
1168 | * This _is_ ugly: We have to check in the softirq context, | ||
1169 | * whether we can switch to highres and / or nohz mode. The | ||
1170 | * clocksource switch happens in the timer interrupt with | ||
1171 | * xtime_lock held. Notification from there only sets the | ||
1172 | * check bit in the tick_oneshot code, otherwise we might | ||
1173 | * deadlock vs. xtime_lock. | ||
1174 | */ | ||
1175 | if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) | ||
1176 | hrtimer_switch_to_hres(); | ||
651 | 1177 | ||
652 | for (i = 0; i < MAX_HRTIMER_BASES; i++) | 1178 | hrtimer_get_softirq_time(cpu_base); |
653 | run_hrtimer_queue(&base[i]); | 1179 | |
1180 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | ||
1181 | run_hrtimer_queue(cpu_base, i); | ||
654 | } | 1182 | } |
655 | 1183 | ||
656 | /* | 1184 | /* |
657 | * Sleep related functions: | 1185 | * Sleep related functions: |
658 | */ | 1186 | */ |
659 | static int hrtimer_wakeup(struct hrtimer *timer) | 1187 | static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) |
660 | { | 1188 | { |
661 | struct hrtimer_sleeper *t = | 1189 | struct hrtimer_sleeper *t = |
662 | container_of(timer, struct hrtimer_sleeper, timer); | 1190 | container_of(timer, struct hrtimer_sleeper, timer); |
@@ -673,6 +1201,9 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |||
673 | { | 1201 | { |
674 | sl->timer.function = hrtimer_wakeup; | 1202 | sl->timer.function = hrtimer_wakeup; |
675 | sl->task = task; | 1203 | sl->task = task; |
1204 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1205 | sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART; | ||
1206 | #endif | ||
676 | } | 1207 | } |
677 | 1208 | ||
678 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | 1209 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) |
@@ -683,10 +1214,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
683 | set_current_state(TASK_INTERRUPTIBLE); | 1214 | set_current_state(TASK_INTERRUPTIBLE); |
684 | hrtimer_start(&t->timer, t->timer.expires, mode); | 1215 | hrtimer_start(&t->timer, t->timer.expires, mode); |
685 | 1216 | ||
686 | schedule(); | 1217 | if (likely(t->task)) |
1218 | schedule(); | ||
687 | 1219 | ||
688 | hrtimer_cancel(&t->timer); | 1220 | hrtimer_cancel(&t->timer); |
689 | mode = HRTIMER_ABS; | 1221 | mode = HRTIMER_MODE_ABS; |
690 | 1222 | ||
691 | } while (t->task && !signal_pending(current)); | 1223 | } while (t->task && !signal_pending(current)); |
692 | 1224 | ||
@@ -702,10 +1234,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
702 | 1234 | ||
703 | restart->fn = do_no_restart_syscall; | 1235 | restart->fn = do_no_restart_syscall; |
704 | 1236 | ||
705 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_ABS); | 1237 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); |
706 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; | 1238 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; |
707 | 1239 | ||
708 | if (do_nanosleep(&t, HRTIMER_ABS)) | 1240 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
709 | return 0; | 1241 | return 0; |
710 | 1242 | ||
711 | rmtp = (struct timespec __user *) restart->arg1; | 1243 | rmtp = (struct timespec __user *) restart->arg1; |
@@ -738,7 +1270,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
738 | return 0; | 1270 | return 0; |
739 | 1271 | ||
740 | /* Absolute timers do not update the rmtp value and restart: */ | 1272 | /* Absolute timers do not update the rmtp value and restart: */ |
741 | if (mode == HRTIMER_ABS) | 1273 | if (mode == HRTIMER_MODE_ABS) |
742 | return -ERESTARTNOHAND; | 1274 | return -ERESTARTNOHAND; |
743 | 1275 | ||
744 | if (rmtp) { | 1276 | if (rmtp) { |
@@ -771,7 +1303,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
771 | if (!timespec_valid(&tu)) | 1303 | if (!timespec_valid(&tu)) |
772 | return -EINVAL; | 1304 | return -EINVAL; |
773 | 1305 | ||
774 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_REL, CLOCK_MONOTONIC); | 1306 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); |
775 | } | 1307 | } |
776 | 1308 | ||
777 | /* | 1309 | /* |
@@ -779,56 +1311,60 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
779 | */ | 1311 | */ |
780 | static void __devinit init_hrtimers_cpu(int cpu) | 1312 | static void __devinit init_hrtimers_cpu(int cpu) |
781 | { | 1313 | { |
782 | struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); | 1314 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); |
783 | int i; | 1315 | int i; |
784 | 1316 | ||
785 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { | 1317 | spin_lock_init(&cpu_base->lock); |
786 | spin_lock_init(&base->lock); | 1318 | lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key); |
787 | lockdep_set_class(&base->lock, &base->lock_key); | 1319 | |
788 | } | 1320 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
1321 | cpu_base->clock_base[i].cpu_base = cpu_base; | ||
1322 | |||
1323 | hrtimer_init_hres(cpu_base); | ||
789 | } | 1324 | } |
790 | 1325 | ||
791 | #ifdef CONFIG_HOTPLUG_CPU | 1326 | #ifdef CONFIG_HOTPLUG_CPU |
792 | 1327 | ||
793 | static void migrate_hrtimer_list(struct hrtimer_base *old_base, | 1328 | static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, |
794 | struct hrtimer_base *new_base) | 1329 | struct hrtimer_clock_base *new_base) |
795 | { | 1330 | { |
796 | struct hrtimer *timer; | 1331 | struct hrtimer *timer; |
797 | struct rb_node *node; | 1332 | struct rb_node *node; |
798 | 1333 | ||
799 | while ((node = rb_first(&old_base->active))) { | 1334 | while ((node = rb_first(&old_base->active))) { |
800 | timer = rb_entry(node, struct hrtimer, node); | 1335 | timer = rb_entry(node, struct hrtimer, node); |
801 | __remove_hrtimer(timer, old_base); | 1336 | BUG_ON(hrtimer_callback_running(timer)); |
1337 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); | ||
802 | timer->base = new_base; | 1338 | timer->base = new_base; |
803 | enqueue_hrtimer(timer, new_base); | 1339 | /* |
1340 | * Enqueue the timer. Allow reprogramming of the event device | ||
1341 | */ | ||
1342 | enqueue_hrtimer(timer, new_base, 1); | ||
804 | } | 1343 | } |
805 | } | 1344 | } |
806 | 1345 | ||
807 | static void migrate_hrtimers(int cpu) | 1346 | static void migrate_hrtimers(int cpu) |
808 | { | 1347 | { |
809 | struct hrtimer_base *old_base, *new_base; | 1348 | struct hrtimer_cpu_base *old_base, *new_base; |
810 | int i; | 1349 | int i; |
811 | 1350 | ||
812 | BUG_ON(cpu_online(cpu)); | 1351 | BUG_ON(cpu_online(cpu)); |
813 | old_base = per_cpu(hrtimer_bases, cpu); | 1352 | old_base = &per_cpu(hrtimer_bases, cpu); |
814 | new_base = get_cpu_var(hrtimer_bases); | 1353 | new_base = &get_cpu_var(hrtimer_bases); |
815 | |||
816 | local_irq_disable(); | ||
817 | 1354 | ||
818 | for (i = 0; i < MAX_HRTIMER_BASES; i++) { | 1355 | tick_cancel_sched_timer(cpu); |
819 | 1356 | ||
820 | spin_lock(&new_base->lock); | 1357 | local_irq_disable(); |
821 | spin_lock(&old_base->lock); | ||
822 | |||
823 | BUG_ON(old_base->curr_timer); | ||
824 | 1358 | ||
825 | migrate_hrtimer_list(old_base, new_base); | 1359 | spin_lock(&new_base->lock); |
1360 | spin_lock(&old_base->lock); | ||
826 | 1361 | ||
827 | spin_unlock(&old_base->lock); | 1362 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
828 | spin_unlock(&new_base->lock); | 1363 | migrate_hrtimer_list(&old_base->clock_base[i], |
829 | old_base++; | 1364 | &new_base->clock_base[i]); |
830 | new_base++; | ||
831 | } | 1365 | } |
1366 | spin_unlock(&old_base->lock); | ||
1367 | spin_unlock(&new_base->lock); | ||
832 | 1368 | ||
833 | local_irq_enable(); | 1369 | local_irq_enable(); |
834 | put_cpu_var(hrtimer_bases); | 1370 | put_cpu_var(hrtimer_bases); |
@@ -848,6 +1384,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
848 | 1384 | ||
849 | #ifdef CONFIG_HOTPLUG_CPU | 1385 | #ifdef CONFIG_HOTPLUG_CPU |
850 | case CPU_DEAD: | 1386 | case CPU_DEAD: |
1387 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); | ||
851 | migrate_hrtimers(cpu); | 1388 | migrate_hrtimers(cpu); |
852 | break; | 1389 | break; |
853 | #endif | 1390 | #endif |
@@ -868,5 +1405,8 @@ void __init hrtimers_init(void) | |||
868 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, | 1405 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, |
869 | (void *)(long)smp_processor_id()); | 1406 | (void *)(long)smp_processor_id()); |
870 | register_cpu_notifier(&hrtimers_nb); | 1407 | register_cpu_notifier(&hrtimers_nb); |
1408 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1409 | open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); | ||
1410 | #endif | ||
871 | } | 1411 | } |
872 | 1412 | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 475e8a71bcdc..0133f4f9e9f0 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -168,7 +168,7 @@ EXPORT_SYMBOL(set_irq_data); | |||
168 | /** | 168 | /** |
169 | * set_irq_data - set irq type data for an irq | 169 | * set_irq_data - set irq type data for an irq |
170 | * @irq: Interrupt number | 170 | * @irq: Interrupt number |
171 | * @data: Pointer to interrupt specific data | 171 | * @entry: Pointer to MSI descriptor data |
172 | * | 172 | * |
173 | * Set the hardware irq controller data for an irq | 173 | * Set the hardware irq controller data for an irq |
174 | */ | 174 | */ |
@@ -230,10 +230,6 @@ static void default_enable(unsigned int irq) | |||
230 | */ | 230 | */ |
231 | static void default_disable(unsigned int irq) | 231 | static void default_disable(unsigned int irq) |
232 | { | 232 | { |
233 | struct irq_desc *desc = irq_desc + irq; | ||
234 | |||
235 | if (!(desc->status & IRQ_DELAYED_DISABLE)) | ||
236 | desc->chip->mask(irq); | ||
237 | } | 233 | } |
238 | 234 | ||
239 | /* | 235 | /* |
@@ -298,13 +294,18 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) | |||
298 | 294 | ||
299 | if (unlikely(desc->status & IRQ_INPROGRESS)) | 295 | if (unlikely(desc->status & IRQ_INPROGRESS)) |
300 | goto out_unlock; | 296 | goto out_unlock; |
301 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | ||
302 | kstat_cpu(cpu).irqs[irq]++; | 297 | kstat_cpu(cpu).irqs[irq]++; |
303 | 298 | ||
304 | action = desc->action; | 299 | action = desc->action; |
305 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) | 300 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { |
301 | if (desc->chip->mask) | ||
302 | desc->chip->mask(irq); | ||
303 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | ||
304 | desc->status |= IRQ_PENDING; | ||
306 | goto out_unlock; | 305 | goto out_unlock; |
306 | } | ||
307 | 307 | ||
308 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING | IRQ_PENDING); | ||
308 | desc->status |= IRQ_INPROGRESS; | 309 | desc->status |= IRQ_INPROGRESS; |
309 | spin_unlock(&desc->lock); | 310 | spin_unlock(&desc->lock); |
310 | 311 | ||
@@ -396,11 +397,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
396 | 397 | ||
397 | /* | 398 | /* |
398 | * If its disabled or no action available | 399 | * If its disabled or no action available |
399 | * keep it masked and get out of here | 400 | * then mask it and get out of here: |
400 | */ | 401 | */ |
401 | action = desc->action; | 402 | action = desc->action; |
402 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { | 403 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { |
403 | desc->status |= IRQ_PENDING; | 404 | desc->status |= IRQ_PENDING; |
405 | if (desc->chip->mask) | ||
406 | desc->chip->mask(irq); | ||
404 | goto out; | 407 | goto out; |
405 | } | 408 | } |
406 | 409 | ||
@@ -562,10 +565,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
562 | 565 | ||
563 | /* Uninstall? */ | 566 | /* Uninstall? */ |
564 | if (handle == handle_bad_irq) { | 567 | if (handle == handle_bad_irq) { |
565 | if (desc->chip != &no_irq_chip) { | 568 | if (desc->chip != &no_irq_chip) |
566 | desc->chip->mask(irq); | 569 | mask_ack_irq(desc, irq); |
567 | desc->chip->ack(irq); | ||
568 | } | ||
569 | desc->status |= IRQ_DISABLED; | 570 | desc->status |= IRQ_DISABLED; |
570 | desc->depth = 1; | 571 | desc->depth = 1; |
571 | } | 572 | } |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index acc5d9fe462b..5597c157442a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -38,6 +38,46 @@ void synchronize_irq(unsigned int irq) | |||
38 | } | 38 | } |
39 | EXPORT_SYMBOL(synchronize_irq); | 39 | EXPORT_SYMBOL(synchronize_irq); |
40 | 40 | ||
41 | /** | ||
42 | * irq_can_set_affinity - Check if the affinity of a given irq can be set | ||
43 | * @irq: Interrupt to check | ||
44 | * | ||
45 | */ | ||
46 | int irq_can_set_affinity(unsigned int irq) | ||
47 | { | ||
48 | struct irq_desc *desc = irq_desc + irq; | ||
49 | |||
50 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || | ||
51 | !desc->chip->set_affinity) | ||
52 | return 0; | ||
53 | |||
54 | return 1; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * irq_set_affinity - Set the irq affinity of a given irq | ||
59 | * @irq: Interrupt to set affinity | ||
60 | * @cpumask: cpumask | ||
61 | * | ||
62 | */ | ||
63 | int irq_set_affinity(unsigned int irq, cpumask_t cpumask) | ||
64 | { | ||
65 | struct irq_desc *desc = irq_desc + irq; | ||
66 | |||
67 | if (!desc->chip->set_affinity) | ||
68 | return -EINVAL; | ||
69 | |||
70 | set_balance_irq_affinity(irq, cpumask); | ||
71 | |||
72 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
73 | set_pending_irq(irq, cpumask); | ||
74 | #else | ||
75 | desc->affinity = cpumask; | ||
76 | desc->chip->set_affinity(irq, cpumask); | ||
77 | #endif | ||
78 | return 0; | ||
79 | } | ||
80 | |||
41 | #endif | 81 | #endif |
42 | 82 | ||
43 | /** | 83 | /** |
@@ -281,6 +321,10 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
281 | if (new->flags & IRQF_PERCPU) | 321 | if (new->flags & IRQF_PERCPU) |
282 | desc->status |= IRQ_PER_CPU; | 322 | desc->status |= IRQ_PER_CPU; |
283 | #endif | 323 | #endif |
324 | /* Exclude IRQ from balancing */ | ||
325 | if (new->flags & IRQF_NOBALANCING) | ||
326 | desc->status |= IRQ_NO_BALANCING; | ||
327 | |||
284 | if (!shared) { | 328 | if (!shared) { |
285 | irq_chip_set_defaults(desc->chip); | 329 | irq_chip_set_defaults(desc->chip); |
286 | 330 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6d3be06e8ce6..2db91eb54ad8 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -16,26 +16,6 @@ static struct proc_dir_entry *root_irq_dir; | |||
16 | 16 | ||
17 | #ifdef CONFIG_SMP | 17 | #ifdef CONFIG_SMP |
18 | 18 | ||
19 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
20 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
21 | { | ||
22 | set_balance_irq_affinity(irq, mask_val); | ||
23 | |||
24 | /* | ||
25 | * Save these away for later use. Re-progam when the | ||
26 | * interrupt is pending | ||
27 | */ | ||
28 | set_pending_irq(irq, mask_val); | ||
29 | } | ||
30 | #else | ||
31 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
32 | { | ||
33 | set_balance_irq_affinity(irq, mask_val); | ||
34 | irq_desc[irq].affinity = mask_val; | ||
35 | irq_desc[irq].chip->set_affinity(irq, mask_val); | ||
36 | } | ||
37 | #endif | ||
38 | |||
39 | static int irq_affinity_read_proc(char *page, char **start, off_t off, | 19 | static int irq_affinity_read_proc(char *page, char **start, off_t off, |
40 | int count, int *eof, void *data) | 20 | int count, int *eof, void *data) |
41 | { | 21 | { |
@@ -55,7 +35,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
55 | cpumask_t new_value, tmp; | 35 | cpumask_t new_value, tmp; |
56 | 36 | ||
57 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || | 37 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || |
58 | CHECK_IRQ_PER_CPU(irq_desc[irq].status)) | 38 | irq_balancing_disabled(irq)) |
59 | return -EIO; | 39 | return -EIO; |
60 | 40 | ||
61 | err = cpumask_parse_user(buffer, count, new_value); | 41 | err = cpumask_parse_user(buffer, count, new_value); |
@@ -73,7 +53,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
73 | code to set default SMP affinity. */ | 53 | code to set default SMP affinity. */ |
74 | return select_smp_affinity(irq) ? -EINVAL : full_count; | 54 | return select_smp_affinity(irq) ? -EINVAL : full_count; |
75 | 55 | ||
76 | proc_set_irq_affinity(irq, new_value); | 56 | irq_set_affinity(irq, new_value); |
77 | 57 | ||
78 | return full_count; | 58 | return full_count; |
79 | } | 59 | } |
diff --git a/kernel/itimer.c b/kernel/itimer.c index 204ed7939e75..307c6a632ef6 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -128,18 +128,13 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) | |||
128 | /* | 128 | /* |
129 | * The timer is automagically restarted, when interval != 0 | 129 | * The timer is automagically restarted, when interval != 0 |
130 | */ | 130 | */ |
131 | int it_real_fn(struct hrtimer *timer) | 131 | enum hrtimer_restart it_real_fn(struct hrtimer *timer) |
132 | { | 132 | { |
133 | struct signal_struct *sig = | 133 | struct signal_struct *sig = |
134 | container_of(timer, struct signal_struct, real_timer); | 134 | container_of(timer, struct signal_struct, real_timer); |
135 | 135 | ||
136 | send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); | 136 | send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); |
137 | 137 | ||
138 | if (sig->it_real_incr.tv64 != 0) { | ||
139 | hrtimer_forward(timer, timer->base->softirq_time, | ||
140 | sig->it_real_incr); | ||
141 | return HRTIMER_RESTART; | ||
142 | } | ||
143 | return HRTIMER_NORESTART; | 138 | return HRTIMER_NORESTART; |
144 | } | 139 | } |
145 | 140 | ||
@@ -231,11 +226,14 @@ again: | |||
231 | spin_unlock_irq(&tsk->sighand->siglock); | 226 | spin_unlock_irq(&tsk->sighand->siglock); |
232 | goto again; | 227 | goto again; |
233 | } | 228 | } |
234 | tsk->signal->it_real_incr = | ||
235 | timeval_to_ktime(value->it_interval); | ||
236 | expires = timeval_to_ktime(value->it_value); | 229 | expires = timeval_to_ktime(value->it_value); |
237 | if (expires.tv64 != 0) | 230 | if (expires.tv64 != 0) { |
238 | hrtimer_start(timer, expires, HRTIMER_REL); | 231 | tsk->signal->it_real_incr = |
232 | timeval_to_ktime(value->it_interval); | ||
233 | hrtimer_start(timer, expires, HRTIMER_MODE_REL); | ||
234 | } else | ||
235 | tsk->signal->it_real_incr.tv64 = 0; | ||
236 | |||
239 | spin_unlock_irq(&tsk->sighand->siglock); | 237 | spin_unlock_irq(&tsk->sighand->siglock); |
240 | break; | 238 | break; |
241 | case ITIMER_VIRTUAL: | 239 | case ITIMER_VIRTUAL: |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 7c3e1e6dfb5b..657f77697415 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -304,7 +304,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
304 | * should be able to see it. | 304 | * should be able to see it. |
305 | */ | 305 | */ |
306 | struct task_struct *p; | 306 | struct task_struct *p; |
307 | read_lock(&tasklist_lock); | 307 | rcu_read_lock(); |
308 | p = find_task_by_pid(pid); | 308 | p = find_task_by_pid(pid); |
309 | if (p) { | 309 | if (p) { |
310 | if (CPUCLOCK_PERTHREAD(which_clock)) { | 310 | if (CPUCLOCK_PERTHREAD(which_clock)) { |
@@ -312,12 +312,17 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
312 | error = cpu_clock_sample(which_clock, | 312 | error = cpu_clock_sample(which_clock, |
313 | p, &rtn); | 313 | p, &rtn); |
314 | } | 314 | } |
315 | } else if (p->tgid == pid && p->signal) { | 315 | } else { |
316 | error = cpu_clock_sample_group(which_clock, | 316 | read_lock(&tasklist_lock); |
317 | p, &rtn); | 317 | if (p->tgid == pid && p->signal) { |
318 | error = | ||
319 | cpu_clock_sample_group(which_clock, | ||
320 | p, &rtn); | ||
321 | } | ||
322 | read_unlock(&tasklist_lock); | ||
318 | } | 323 | } |
319 | } | 324 | } |
320 | read_unlock(&tasklist_lock); | 325 | rcu_read_unlock(); |
321 | } | 326 | } |
322 | 327 | ||
323 | if (error) | 328 | if (error) |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a1bf61617839..44318ca71978 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -145,7 +145,7 @@ static int common_timer_set(struct k_itimer *, int, | |||
145 | struct itimerspec *, struct itimerspec *); | 145 | struct itimerspec *, struct itimerspec *); |
146 | static int common_timer_del(struct k_itimer *timer); | 146 | static int common_timer_del(struct k_itimer *timer); |
147 | 147 | ||
148 | static int posix_timer_fn(struct hrtimer *data); | 148 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); |
149 | 149 | ||
150 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); | 150 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); |
151 | 151 | ||
@@ -334,12 +334,12 @@ EXPORT_SYMBOL_GPL(posix_timer_event); | |||
334 | 334 | ||
335 | * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. | 335 | * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. |
336 | */ | 336 | */ |
337 | static int posix_timer_fn(struct hrtimer *timer) | 337 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) |
338 | { | 338 | { |
339 | struct k_itimer *timr; | 339 | struct k_itimer *timr; |
340 | unsigned long flags; | 340 | unsigned long flags; |
341 | int si_private = 0; | 341 | int si_private = 0; |
342 | int ret = HRTIMER_NORESTART; | 342 | enum hrtimer_restart ret = HRTIMER_NORESTART; |
343 | 343 | ||
344 | timr = container_of(timer, struct k_itimer, it.real.timer); | 344 | timr = container_of(timer, struct k_itimer, it.real.timer); |
345 | spin_lock_irqsave(&timr->it_lock, flags); | 345 | spin_lock_irqsave(&timr->it_lock, flags); |
@@ -356,7 +356,7 @@ static int posix_timer_fn(struct hrtimer *timer) | |||
356 | if (timr->it.real.interval.tv64 != 0) { | 356 | if (timr->it.real.interval.tv64 != 0) { |
357 | timr->it_overrun += | 357 | timr->it_overrun += |
358 | hrtimer_forward(timer, | 358 | hrtimer_forward(timer, |
359 | timer->base->softirq_time, | 359 | hrtimer_cb_get_time(timer), |
360 | timr->it.real.interval); | 360 | timr->it.real.interval); |
361 | ret = HRTIMER_RESTART; | 361 | ret = HRTIMER_RESTART; |
362 | ++timr->it_requeue_pending; | 362 | ++timr->it_requeue_pending; |
@@ -722,7 +722,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
722 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) | 722 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) |
723 | return 0; | 723 | return 0; |
724 | 724 | ||
725 | mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; | 725 | mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; |
726 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); | 726 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); |
727 | timr->it.real.timer.function = posix_timer_fn; | 727 | timr->it.real.timer.function = posix_timer_fn; |
728 | 728 | ||
@@ -734,7 +734,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
734 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ | 734 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ |
735 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { | 735 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { |
736 | /* Setup correct expiry time for relative timers */ | 736 | /* Setup correct expiry time for relative timers */ |
737 | if (mode == HRTIMER_REL) | 737 | if (mode == HRTIMER_MODE_REL) |
738 | timer->expires = ktime_add(timer->expires, | 738 | timer->expires = ktime_add(timer->expires, |
739 | timer->base->get_time()); | 739 | timer->base->get_time()); |
740 | return 0; | 740 | return 0; |
@@ -950,7 +950,8 @@ static int common_nsleep(const clockid_t which_clock, int flags, | |||
950 | struct timespec *tsave, struct timespec __user *rmtp) | 950 | struct timespec *tsave, struct timespec __user *rmtp) |
951 | { | 951 | { |
952 | return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? | 952 | return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? |
953 | HRTIMER_ABS : HRTIMER_REL, which_clock); | 953 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
954 | which_clock); | ||
954 | } | 955 | } |
955 | 956 | ||
956 | asmlinkage long | 957 | asmlinkage long |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 4ab17da46fd8..180978cb2f75 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -625,7 +625,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
625 | /* Setup the timer, when timeout != NULL */ | 625 | /* Setup the timer, when timeout != NULL */ |
626 | if (unlikely(timeout)) | 626 | if (unlikely(timeout)) |
627 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 627 | hrtimer_start(&timeout->timer, timeout->timer.expires, |
628 | HRTIMER_ABS); | 628 | HRTIMER_MODE_ABS); |
629 | 629 | ||
630 | for (;;) { | 630 | for (;;) { |
631 | /* Try to acquire the lock: */ | 631 | /* Try to acquire the lock: */ |
diff --git a/kernel/signal.c b/kernel/signal.c index 8072e568bbe0..e2a7d4bf7d57 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -456,26 +456,50 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | |||
456 | int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | 456 | int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) |
457 | { | 457 | { |
458 | int signr = __dequeue_signal(&tsk->pending, mask, info); | 458 | int signr = __dequeue_signal(&tsk->pending, mask, info); |
459 | if (!signr) | 459 | if (!signr) { |
460 | signr = __dequeue_signal(&tsk->signal->shared_pending, | 460 | signr = __dequeue_signal(&tsk->signal->shared_pending, |
461 | mask, info); | 461 | mask, info); |
462 | /* | ||
463 | * itimer signal ? | ||
464 | * | ||
465 | * itimers are process shared and we restart periodic | ||
466 | * itimers in the signal delivery path to prevent DoS | ||
467 | * attacks in the high resolution timer case. This is | ||
468 | * compliant with the old way of self restarting | ||
469 | * itimers, as the SIGALRM is a legacy signal and only | ||
470 | * queued once. Changing the restart behaviour to | ||
471 | * restart the timer in the signal dequeue path is | ||
472 | * reducing the timer noise on heavy loaded !highres | ||
473 | * systems too. | ||
474 | */ | ||
475 | if (unlikely(signr == SIGALRM)) { | ||
476 | struct hrtimer *tmr = &tsk->signal->real_timer; | ||
477 | |||
478 | if (!hrtimer_is_queued(tmr) && | ||
479 | tsk->signal->it_real_incr.tv64 != 0) { | ||
480 | hrtimer_forward(tmr, tmr->base->get_time(), | ||
481 | tsk->signal->it_real_incr); | ||
482 | hrtimer_restart(tmr); | ||
483 | } | ||
484 | } | ||
485 | } | ||
462 | recalc_sigpending_tsk(tsk); | 486 | recalc_sigpending_tsk(tsk); |
463 | if (signr && unlikely(sig_kernel_stop(signr))) { | 487 | if (signr && unlikely(sig_kernel_stop(signr))) { |
464 | /* | 488 | /* |
465 | * Set a marker that we have dequeued a stop signal. Our | 489 | * Set a marker that we have dequeued a stop signal. Our |
466 | * caller might release the siglock and then the pending | 490 | * caller might release the siglock and then the pending |
467 | * stop signal it is about to process is no longer in the | 491 | * stop signal it is about to process is no longer in the |
468 | * pending bitmasks, but must still be cleared by a SIGCONT | 492 | * pending bitmasks, but must still be cleared by a SIGCONT |
469 | * (and overruled by a SIGKILL). So those cases clear this | 493 | * (and overruled by a SIGKILL). So those cases clear this |
470 | * shared flag after we've set it. Note that this flag may | 494 | * shared flag after we've set it. Note that this flag may |
471 | * remain set after the signal we return is ignored or | 495 | * remain set after the signal we return is ignored or |
472 | * handled. That doesn't matter because its only purpose | 496 | * handled. That doesn't matter because its only purpose |
473 | * is to alert stop-signal processing code when another | 497 | * is to alert stop-signal processing code when another |
474 | * processor has come along and cleared the flag. | 498 | * processor has come along and cleared the flag. |
475 | */ | 499 | */ |
476 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) | 500 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) |
477 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; | 501 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; |
478 | } | 502 | } |
479 | if ( signr && | 503 | if ( signr && |
480 | ((info->si_code & __SI_MASK) == __SI_TIMER) && | 504 | ((info->si_code & __SI_MASK) == __SI_TIMER) && |
481 | info->si_sys_private){ | 505 | info->si_sys_private){ |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 918e52df090e..8b75008e2bd8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/tick.h> | ||
20 | 21 | ||
21 | #include <asm/irq.h> | 22 | #include <asm/irq.h> |
22 | /* | 23 | /* |
@@ -273,6 +274,18 @@ EXPORT_SYMBOL(do_softirq); | |||
273 | 274 | ||
274 | #endif | 275 | #endif |
275 | 276 | ||
277 | /* | ||
278 | * Enter an interrupt context. | ||
279 | */ | ||
280 | void irq_enter(void) | ||
281 | { | ||
282 | __irq_enter(); | ||
283 | #ifdef CONFIG_NO_HZ | ||
284 | if (idle_cpu(smp_processor_id())) | ||
285 | tick_nohz_update_jiffies(); | ||
286 | #endif | ||
287 | } | ||
288 | |||
276 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | 289 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED |
277 | # define invoke_softirq() __do_softirq() | 290 | # define invoke_softirq() __do_softirq() |
278 | #else | 291 | #else |
@@ -289,6 +302,12 @@ void irq_exit(void) | |||
289 | sub_preempt_count(IRQ_EXIT_OFFSET); | 302 | sub_preempt_count(IRQ_EXIT_OFFSET); |
290 | if (!in_interrupt() && local_softirq_pending()) | 303 | if (!in_interrupt() && local_softirq_pending()) |
291 | invoke_softirq(); | 304 | invoke_softirq(); |
305 | |||
306 | #ifdef CONFIG_NO_HZ | ||
307 | /* Make sure that timer wheel updates are propagated */ | ||
308 | if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) | ||
309 | tick_nohz_stop_sched_tick(); | ||
310 | #endif | ||
292 | preempt_enable_no_resched(); | 311 | preempt_enable_no_resched(); |
293 | } | 312 | } |
294 | 313 | ||
diff --git a/kernel/time.c b/kernel/time.c index 0e017bff4c19..c6c80ea5d0ea 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -470,6 +470,260 @@ struct timeval ns_to_timeval(const s64 nsec) | |||
470 | return tv; | 470 | return tv; |
471 | } | 471 | } |
472 | 472 | ||
473 | /* | ||
474 | * Convert jiffies to milliseconds and back. | ||
475 | * | ||
476 | * Avoid unnecessary multiplications/divisions in the | ||
477 | * two most common HZ cases: | ||
478 | */ | ||
479 | unsigned int jiffies_to_msecs(const unsigned long j) | ||
480 | { | ||
481 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | ||
482 | return (MSEC_PER_SEC / HZ) * j; | ||
483 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | ||
484 | return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); | ||
485 | #else | ||
486 | return (j * MSEC_PER_SEC) / HZ; | ||
487 | #endif | ||
488 | } | ||
489 | EXPORT_SYMBOL(jiffies_to_msecs); | ||
490 | |||
491 | unsigned int jiffies_to_usecs(const unsigned long j) | ||
492 | { | ||
493 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | ||
494 | return (USEC_PER_SEC / HZ) * j; | ||
495 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | ||
496 | return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); | ||
497 | #else | ||
498 | return (j * USEC_PER_SEC) / HZ; | ||
499 | #endif | ||
500 | } | ||
501 | EXPORT_SYMBOL(jiffies_to_usecs); | ||
502 | |||
503 | /* | ||
504 | * When we convert to jiffies then we interpret incoming values | ||
505 | * the following way: | ||
506 | * | ||
507 | * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) | ||
508 | * | ||
509 | * - 'too large' values [that would result in larger than | ||
510 | * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. | ||
511 | * | ||
512 | * - all other values are converted to jiffies by either multiplying | ||
513 | * the input value by a factor or dividing it with a factor | ||
514 | * | ||
515 | * We must also be careful about 32-bit overflows. | ||
516 | */ | ||
517 | unsigned long msecs_to_jiffies(const unsigned int m) | ||
518 | { | ||
519 | /* | ||
520 | * Negative value, means infinite timeout: | ||
521 | */ | ||
522 | if ((int)m < 0) | ||
523 | return MAX_JIFFY_OFFSET; | ||
524 | |||
525 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | ||
526 | /* | ||
527 | * HZ is equal to or smaller than 1000, and 1000 is a nice | ||
528 | * round multiple of HZ, divide with the factor between them, | ||
529 | * but round upwards: | ||
530 | */ | ||
531 | return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); | ||
532 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | ||
533 | /* | ||
534 | * HZ is larger than 1000, and HZ is a nice round multiple of | ||
535 | * 1000 - simply multiply with the factor between them. | ||
536 | * | ||
537 | * But first make sure the multiplication result cannot | ||
538 | * overflow: | ||
539 | */ | ||
540 | if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) | ||
541 | return MAX_JIFFY_OFFSET; | ||
542 | |||
543 | return m * (HZ / MSEC_PER_SEC); | ||
544 | #else | ||
545 | /* | ||
546 | * Generic case - multiply, round and divide. But first | ||
547 | * check that if we are doing a net multiplication, that | ||
548 | * we wouldnt overflow: | ||
549 | */ | ||
550 | if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) | ||
551 | return MAX_JIFFY_OFFSET; | ||
552 | |||
553 | return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; | ||
554 | #endif | ||
555 | } | ||
556 | EXPORT_SYMBOL(msecs_to_jiffies); | ||
557 | |||
558 | unsigned long usecs_to_jiffies(const unsigned int u) | ||
559 | { | ||
560 | if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) | ||
561 | return MAX_JIFFY_OFFSET; | ||
562 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | ||
563 | return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); | ||
564 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | ||
565 | return u * (HZ / USEC_PER_SEC); | ||
566 | #else | ||
567 | return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC; | ||
568 | #endif | ||
569 | } | ||
570 | EXPORT_SYMBOL(usecs_to_jiffies); | ||
571 | |||
572 | /* | ||
573 | * The TICK_NSEC - 1 rounds up the value to the next resolution. Note | ||
574 | * that a remainder subtract here would not do the right thing as the | ||
575 | * resolution values don't fall on second boundries. I.e. the line: | ||
576 | * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding. | ||
577 | * | ||
578 | * Rather, we just shift the bits off the right. | ||
579 | * | ||
580 | * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec | ||
581 | * value to a scaled second value. | ||
582 | */ | ||
583 | unsigned long | ||
584 | timespec_to_jiffies(const struct timespec *value) | ||
585 | { | ||
586 | unsigned long sec = value->tv_sec; | ||
587 | long nsec = value->tv_nsec + TICK_NSEC - 1; | ||
588 | |||
589 | if (sec >= MAX_SEC_IN_JIFFIES){ | ||
590 | sec = MAX_SEC_IN_JIFFIES; | ||
591 | nsec = 0; | ||
592 | } | ||
593 | return (((u64)sec * SEC_CONVERSION) + | ||
594 | (((u64)nsec * NSEC_CONVERSION) >> | ||
595 | (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | ||
596 | |||
597 | } | ||
598 | EXPORT_SYMBOL(timespec_to_jiffies); | ||
599 | |||
600 | void | ||
601 | jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) | ||
602 | { | ||
603 | /* | ||
604 | * Convert jiffies to nanoseconds and separate with | ||
605 | * one divide. | ||
606 | */ | ||
607 | u64 nsec = (u64)jiffies * TICK_NSEC; | ||
608 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); | ||
609 | } | ||
610 | EXPORT_SYMBOL(jiffies_to_timespec); | ||
611 | |||
612 | /* Same for "timeval" | ||
613 | * | ||
614 | * Well, almost. The problem here is that the real system resolution is | ||
615 | * in nanoseconds and the value being converted is in micro seconds. | ||
616 | * Also for some machines (those that use HZ = 1024, in-particular), | ||
617 | * there is a LARGE error in the tick size in microseconds. | ||
618 | |||
619 | * The solution we use is to do the rounding AFTER we convert the | ||
620 | * microsecond part. Thus the USEC_ROUND, the bits to be shifted off. | ||
621 | * Instruction wise, this should cost only an additional add with carry | ||
622 | * instruction above the way it was done above. | ||
623 | */ | ||
624 | unsigned long | ||
625 | timeval_to_jiffies(const struct timeval *value) | ||
626 | { | ||
627 | unsigned long sec = value->tv_sec; | ||
628 | long usec = value->tv_usec; | ||
629 | |||
630 | if (sec >= MAX_SEC_IN_JIFFIES){ | ||
631 | sec = MAX_SEC_IN_JIFFIES; | ||
632 | usec = 0; | ||
633 | } | ||
634 | return (((u64)sec * SEC_CONVERSION) + | ||
635 | (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> | ||
636 | (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | ||
637 | } | ||
638 | |||
639 | void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) | ||
640 | { | ||
641 | /* | ||
642 | * Convert jiffies to nanoseconds and separate with | ||
643 | * one divide. | ||
644 | */ | ||
645 | u64 nsec = (u64)jiffies * TICK_NSEC; | ||
646 | long tv_usec; | ||
647 | |||
648 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); | ||
649 | tv_usec /= NSEC_PER_USEC; | ||
650 | value->tv_usec = tv_usec; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * Convert jiffies/jiffies_64 to clock_t and back. | ||
655 | */ | ||
656 | clock_t jiffies_to_clock_t(long x) | ||
657 | { | ||
658 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | ||
659 | return x / (HZ / USER_HZ); | ||
660 | #else | ||
661 | u64 tmp = (u64)x * TICK_NSEC; | ||
662 | do_div(tmp, (NSEC_PER_SEC / USER_HZ)); | ||
663 | return (long)tmp; | ||
664 | #endif | ||
665 | } | ||
666 | EXPORT_SYMBOL(jiffies_to_clock_t); | ||
667 | |||
668 | unsigned long clock_t_to_jiffies(unsigned long x) | ||
669 | { | ||
670 | #if (HZ % USER_HZ)==0 | ||
671 | if (x >= ~0UL / (HZ / USER_HZ)) | ||
672 | return ~0UL; | ||
673 | return x * (HZ / USER_HZ); | ||
674 | #else | ||
675 | u64 jif; | ||
676 | |||
677 | /* Don't worry about loss of precision here .. */ | ||
678 | if (x >= ~0UL / HZ * USER_HZ) | ||
679 | return ~0UL; | ||
680 | |||
681 | /* .. but do try to contain it here */ | ||
682 | jif = x * (u64) HZ; | ||
683 | do_div(jif, USER_HZ); | ||
684 | return jif; | ||
685 | #endif | ||
686 | } | ||
687 | EXPORT_SYMBOL(clock_t_to_jiffies); | ||
688 | |||
689 | u64 jiffies_64_to_clock_t(u64 x) | ||
690 | { | ||
691 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | ||
692 | do_div(x, HZ / USER_HZ); | ||
693 | #else | ||
694 | /* | ||
695 | * There are better ways that don't overflow early, | ||
696 | * but even this doesn't overflow in hundreds of years | ||
697 | * in 64 bits, so.. | ||
698 | */ | ||
699 | x *= TICK_NSEC; | ||
700 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
701 | #endif | ||
702 | return x; | ||
703 | } | ||
704 | |||
705 | EXPORT_SYMBOL(jiffies_64_to_clock_t); | ||
706 | |||
707 | u64 nsec_to_clock_t(u64 x) | ||
708 | { | ||
709 | #if (NSEC_PER_SEC % USER_HZ) == 0 | ||
710 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
711 | #elif (USER_HZ % 512) == 0 | ||
712 | x *= USER_HZ/512; | ||
713 | do_div(x, (NSEC_PER_SEC / 512)); | ||
714 | #else | ||
715 | /* | ||
716 | * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, | ||
717 | * overflow after 64.99 years. | ||
718 | * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... | ||
719 | */ | ||
720 | x *= 9; | ||
721 | do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) / | ||
722 | USER_HZ)); | ||
723 | #endif | ||
724 | return x; | ||
725 | } | ||
726 | |||
473 | #if (BITS_PER_LONG < 64) | 727 | #if (BITS_PER_LONG < 64) |
474 | u64 get_jiffies_64(void) | 728 | u64 get_jiffies_64(void) |
475 | { | 729 | { |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig new file mode 100644 index 000000000000..f66351126544 --- /dev/null +++ b/kernel/time/Kconfig | |||
@@ -0,0 +1,25 @@ | |||
1 | # | ||
2 | # Timer subsystem related configuration options | ||
3 | # | ||
4 | config TICK_ONESHOT | ||
5 | bool | ||
6 | default n | ||
7 | |||
8 | config NO_HZ | ||
9 | bool "Tickless System (Dynamic Ticks)" | ||
10 | depends on GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
11 | select TICK_ONESHOT | ||
12 | help | ||
13 | This option enables a tickless system: timer interrupts will | ||
14 | only trigger on an as-needed basis both when the system is | ||
15 | busy and when the system is idle. | ||
16 | |||
17 | config HIGH_RES_TIMERS | ||
18 | bool "High Resolution Timer Support" | ||
19 | depends on GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
20 | select TICK_ONESHOT | ||
21 | help | ||
22 | This option enables high resolution timer support. If your | ||
23 | hardware is not capable then this option only increases | ||
24 | the size of the kernel image. | ||
25 | |||
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 61a3907d16fb..93bccba1f265 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
@@ -1 +1,8 @@ | |||
1 | obj-y += ntp.o clocksource.o jiffies.o | 1 | obj-y += ntp.o clocksource.o jiffies.o timer_list.o |
2 | |||
3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o | ||
4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | ||
5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o | ||
6 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | ||
7 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o | ||
8 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c new file mode 100644 index 000000000000..67932ea78c17 --- /dev/null +++ b/kernel/time/clockevents.c | |||
@@ -0,0 +1,345 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/clockevents.c | ||
3 | * | ||
4 | * This file contains functions which manage clock event devices. | ||
5 | * | ||
6 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
7 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
8 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
9 | * | ||
10 | * This code is licenced under the GPL version 2. For details see | ||
11 | * kernel-base/COPYING. | ||
12 | */ | ||
13 | |||
14 | #include <linux/clockchips.h> | ||
15 | #include <linux/hrtimer.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/notifier.h> | ||
19 | #include <linux/smp.h> | ||
20 | #include <linux/sysdev.h> | ||
21 | |||
22 | /* The registered clock event devices */ | ||
23 | static LIST_HEAD(clockevent_devices); | ||
24 | static LIST_HEAD(clockevents_released); | ||
25 | |||
26 | /* Notification for clock events */ | ||
27 | static RAW_NOTIFIER_HEAD(clockevents_chain); | ||
28 | |||
29 | /* Protection for the above */ | ||
30 | static DEFINE_SPINLOCK(clockevents_lock); | ||
31 | |||
32 | /** | ||
33 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds | ||
34 | * @latch: value to convert | ||
35 | * @evt: pointer to clock event device descriptor | ||
36 | * | ||
37 | * Math helper, returns latch value converted to nanoseconds (bound checked) | ||
38 | */ | ||
39 | unsigned long clockevent_delta2ns(unsigned long latch, | ||
40 | struct clock_event_device *evt) | ||
41 | { | ||
42 | u64 clc = ((u64) latch << evt->shift); | ||
43 | |||
44 | do_div(clc, evt->mult); | ||
45 | if (clc < 1000) | ||
46 | clc = 1000; | ||
47 | if (clc > LONG_MAX) | ||
48 | clc = LONG_MAX; | ||
49 | |||
50 | return (unsigned long) clc; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * clockevents_set_mode - set the operating mode of a clock event device | ||
55 | * @dev: device to modify | ||
56 | * @mode: new mode | ||
57 | * | ||
58 | * Must be called with interrupts disabled ! | ||
59 | */ | ||
60 | void clockevents_set_mode(struct clock_event_device *dev, | ||
61 | enum clock_event_mode mode) | ||
62 | { | ||
63 | if (dev->mode != mode) { | ||
64 | dev->set_mode(mode, dev); | ||
65 | dev->mode = mode; | ||
66 | } | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * clockevents_program_event - Reprogram the clock event device. | ||
71 | * @expires: absolute expiry time (monotonic clock) | ||
72 | * | ||
73 | * Returns 0 on success, -ETIME when the event is in the past. | ||
74 | */ | ||
75 | int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, | ||
76 | ktime_t now) | ||
77 | { | ||
78 | unsigned long long clc; | ||
79 | int64_t delta; | ||
80 | |||
81 | delta = ktime_to_ns(ktime_sub(expires, now)); | ||
82 | |||
83 | if (delta <= 0) | ||
84 | return -ETIME; | ||
85 | |||
86 | dev->next_event = expires; | ||
87 | |||
88 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
89 | return 0; | ||
90 | |||
91 | if (delta > dev->max_delta_ns) | ||
92 | delta = dev->max_delta_ns; | ||
93 | if (delta < dev->min_delta_ns) | ||
94 | delta = dev->min_delta_ns; | ||
95 | |||
96 | clc = delta * dev->mult; | ||
97 | clc >>= dev->shift; | ||
98 | |||
99 | return dev->set_next_event((unsigned long) clc, dev); | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * clockevents_register_notifier - register a clock events change listener | ||
104 | */ | ||
105 | int clockevents_register_notifier(struct notifier_block *nb) | ||
106 | { | ||
107 | int ret; | ||
108 | |||
109 | spin_lock(&clockevents_lock); | ||
110 | ret = raw_notifier_chain_register(&clockevents_chain, nb); | ||
111 | spin_unlock(&clockevents_lock); | ||
112 | |||
113 | return ret; | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * clockevents_unregister_notifier - unregister a clock events change listener | ||
118 | */ | ||
119 | void clockevents_unregister_notifier(struct notifier_block *nb) | ||
120 | { | ||
121 | spin_lock(&clockevents_lock); | ||
122 | raw_notifier_chain_unregister(&clockevents_chain, nb); | ||
123 | spin_unlock(&clockevents_lock); | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Notify about a clock event change. Called with clockevents_lock | ||
128 | * held. | ||
129 | */ | ||
130 | static void clockevents_do_notify(unsigned long reason, void *dev) | ||
131 | { | ||
132 | raw_notifier_call_chain(&clockevents_chain, reason, dev); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Called after a notify add to make devices availble which were | ||
137 | * released from the notifier call. | ||
138 | */ | ||
139 | static void clockevents_notify_released(void) | ||
140 | { | ||
141 | struct clock_event_device *dev; | ||
142 | |||
143 | while (!list_empty(&clockevents_released)) { | ||
144 | dev = list_entry(clockevents_released.next, | ||
145 | struct clock_event_device, list); | ||
146 | list_del(&dev->list); | ||
147 | list_add(&dev->list, &clockevent_devices); | ||
148 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * clockevents_register_device - register a clock event device | ||
154 | * @dev: device to register | ||
155 | */ | ||
156 | void clockevents_register_device(struct clock_event_device *dev) | ||
157 | { | ||
158 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | ||
159 | |||
160 | spin_lock(&clockevents_lock); | ||
161 | |||
162 | list_add(&dev->list, &clockevent_devices); | ||
163 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | ||
164 | clockevents_notify_released(); | ||
165 | |||
166 | spin_unlock(&clockevents_lock); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Noop handler when we shut down an event device | ||
171 | */ | ||
172 | static void clockevents_handle_noop(struct clock_event_device *dev) | ||
173 | { | ||
174 | } | ||
175 | |||
176 | /** | ||
177 | * clockevents_exchange_device - release and request clock devices | ||
178 | * @old: device to release (can be NULL) | ||
179 | * @new: device to request (can be NULL) | ||
180 | * | ||
181 | * Called from the notifier chain. clockevents_lock is held already | ||
182 | */ | ||
183 | void clockevents_exchange_device(struct clock_event_device *old, | ||
184 | struct clock_event_device *new) | ||
185 | { | ||
186 | unsigned long flags; | ||
187 | |||
188 | local_irq_save(flags); | ||
189 | /* | ||
190 | * Caller releases a clock event device. We queue it into the | ||
191 | * released list and do a notify add later. | ||
192 | */ | ||
193 | if (old) { | ||
194 | old->event_handler = clockevents_handle_noop; | ||
195 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); | ||
196 | list_del(&old->list); | ||
197 | list_add(&old->list, &clockevents_released); | ||
198 | } | ||
199 | |||
200 | if (new) { | ||
201 | BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); | ||
202 | clockevents_set_mode(new, CLOCK_EVT_MODE_SHUTDOWN); | ||
203 | } | ||
204 | local_irq_restore(flags); | ||
205 | } | ||
206 | |||
207 | /** | ||
208 | * clockevents_request_device | ||
209 | */ | ||
210 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
211 | cpumask_t cpumask) | ||
212 | { | ||
213 | struct clock_event_device *cur, *dev = NULL; | ||
214 | struct list_head *tmp; | ||
215 | |||
216 | spin_lock(&clockevents_lock); | ||
217 | |||
218 | list_for_each(tmp, &clockevent_devices) { | ||
219 | cur = list_entry(tmp, struct clock_event_device, list); | ||
220 | |||
221 | if ((cur->features & features) == features && | ||
222 | cpus_equal(cpumask, cur->cpumask)) { | ||
223 | if (!dev || dev->rating < cur->rating) | ||
224 | dev = cur; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | clockevents_exchange_device(NULL, dev); | ||
229 | |||
230 | spin_unlock(&clockevents_lock); | ||
231 | |||
232 | return dev; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * clockevents_release_device | ||
237 | */ | ||
238 | void clockevents_release_device(struct clock_event_device *dev) | ||
239 | { | ||
240 | spin_lock(&clockevents_lock); | ||
241 | |||
242 | clockevents_exchange_device(dev, NULL); | ||
243 | clockevents_notify_released(); | ||
244 | |||
245 | spin_unlock(&clockevents_lock); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * clockevents_notify - notification about relevant events | ||
250 | */ | ||
251 | void clockevents_notify(unsigned long reason, void *arg) | ||
252 | { | ||
253 | spin_lock(&clockevents_lock); | ||
254 | clockevents_do_notify(reason, arg); | ||
255 | |||
256 | switch (reason) { | ||
257 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
258 | /* | ||
259 | * Unregister the clock event devices which were | ||
260 | * released from the users in the notify chain. | ||
261 | */ | ||
262 | while (!list_empty(&clockevents_released)) { | ||
263 | struct clock_event_device *dev; | ||
264 | |||
265 | dev = list_entry(clockevents_released.next, | ||
266 | struct clock_event_device, list); | ||
267 | list_del(&dev->list); | ||
268 | } | ||
269 | break; | ||
270 | default: | ||
271 | break; | ||
272 | } | ||
273 | spin_unlock(&clockevents_lock); | ||
274 | } | ||
275 | EXPORT_SYMBOL_GPL(clockevents_notify); | ||
276 | |||
277 | #ifdef CONFIG_SYSFS | ||
278 | |||
279 | /** | ||
280 | * clockevents_show_registered - sysfs interface for listing clockevents | ||
281 | * @dev: unused | ||
282 | * @buf: char buffer to be filled with clock events list | ||
283 | * | ||
284 | * Provides sysfs interface for listing registered clock event devices | ||
285 | */ | ||
286 | static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) | ||
287 | { | ||
288 | struct list_head *tmp; | ||
289 | char *p = buf; | ||
290 | int cpu; | ||
291 | |||
292 | spin_lock(&clockevents_lock); | ||
293 | |||
294 | list_for_each(tmp, &clockevent_devices) { | ||
295 | struct clock_event_device *ce; | ||
296 | |||
297 | ce = list_entry(tmp, struct clock_event_device, list); | ||
298 | p += sprintf(p, "%-20s F:%04x M:%d", ce->name, | ||
299 | ce->features, ce->mode); | ||
300 | p += sprintf(p, " C:"); | ||
301 | if (!cpus_equal(ce->cpumask, cpu_possible_map)) { | ||
302 | for_each_cpu_mask(cpu, ce->cpumask) | ||
303 | p += sprintf(p, " %d", cpu); | ||
304 | } else { | ||
305 | /* | ||
306 | * FIXME: Add the cpu which is handling this sucker | ||
307 | */ | ||
308 | } | ||
309 | p += sprintf(p, "\n"); | ||
310 | } | ||
311 | |||
312 | spin_unlock(&clockevents_lock); | ||
313 | |||
314 | return p - buf; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * Sysfs setup bits: | ||
319 | */ | ||
320 | static SYSDEV_ATTR(registered, 0600, | ||
321 | clockevents_show_registered, NULL); | ||
322 | |||
323 | static struct sysdev_class clockevents_sysclass = { | ||
324 | set_kset_name("clockevents"), | ||
325 | }; | ||
326 | |||
327 | static struct sys_device clockevents_sys_device = { | ||
328 | .id = 0, | ||
329 | .cls = &clockevents_sysclass, | ||
330 | }; | ||
331 | |||
332 | static int __init clockevents_sysfs_init(void) | ||
333 | { | ||
334 | int error = sysdev_class_register(&clockevents_sysclass); | ||
335 | |||
336 | if (!error) | ||
337 | error = sysdev_register(&clockevents_sys_device); | ||
338 | if (!error) | ||
339 | error = sysdev_create_file( | ||
340 | &clockevents_sys_device, | ||
341 | &attr_registered); | ||
342 | return error; | ||
343 | } | ||
344 | device_initcall(clockevents_sysfs_init); | ||
345 | #endif | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d9ef176c4e09..193a0793af95 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
32 | #include <linux/tick.h> | ||
32 | 33 | ||
33 | /* XXX - Would like a better way for initializing curr_clocksource */ | 34 | /* XXX - Would like a better way for initializing curr_clocksource */ |
34 | extern struct clocksource clocksource_jiffies; | 35 | extern struct clocksource clocksource_jiffies; |
@@ -48,6 +49,7 @@ extern struct clocksource clocksource_jiffies; | |||
48 | */ | 49 | */ |
49 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | 50 | static struct clocksource *curr_clocksource = &clocksource_jiffies; |
50 | static struct clocksource *next_clocksource; | 51 | static struct clocksource *next_clocksource; |
52 | static struct clocksource *clocksource_override; | ||
51 | static LIST_HEAD(clocksource_list); | 53 | static LIST_HEAD(clocksource_list); |
52 | static DEFINE_SPINLOCK(clocksource_lock); | 54 | static DEFINE_SPINLOCK(clocksource_lock); |
53 | static char override_name[32]; | 55 | static char override_name[32]; |
@@ -62,9 +64,123 @@ static int __init clocksource_done_booting(void) | |||
62 | finished_booting = 1; | 64 | finished_booting = 1; |
63 | return 0; | 65 | return 0; |
64 | } | 66 | } |
65 | |||
66 | late_initcall(clocksource_done_booting); | 67 | late_initcall(clocksource_done_booting); |
67 | 68 | ||
69 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | ||
70 | static LIST_HEAD(watchdog_list); | ||
71 | static struct clocksource *watchdog; | ||
72 | static struct timer_list watchdog_timer; | ||
73 | static DEFINE_SPINLOCK(watchdog_lock); | ||
74 | static cycle_t watchdog_last; | ||
75 | /* | ||
76 | * Interval: 0.5sec Treshold: 0.0625s | ||
77 | */ | ||
78 | #define WATCHDOG_INTERVAL (HZ >> 1) | ||
79 | #define WATCHDOG_TRESHOLD (NSEC_PER_SEC >> 4) | ||
80 | |||
81 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | ||
82 | { | ||
83 | if (delta > -WATCHDOG_TRESHOLD && delta < WATCHDOG_TRESHOLD) | ||
84 | return; | ||
85 | |||
86 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | ||
87 | cs->name, delta); | ||
88 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | ||
89 | clocksource_change_rating(cs, 0); | ||
90 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
91 | list_del(&cs->wd_list); | ||
92 | } | ||
93 | |||
94 | static void clocksource_watchdog(unsigned long data) | ||
95 | { | ||
96 | struct clocksource *cs, *tmp; | ||
97 | cycle_t csnow, wdnow; | ||
98 | int64_t wd_nsec, cs_nsec; | ||
99 | |||
100 | spin_lock(&watchdog_lock); | ||
101 | |||
102 | wdnow = watchdog->read(); | ||
103 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | ||
104 | watchdog_last = wdnow; | ||
105 | |||
106 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | ||
107 | csnow = cs->read(); | ||
108 | /* Initialized ? */ | ||
109 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | ||
110 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | ||
111 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | ||
112 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
113 | /* | ||
114 | * We just marked the clocksource as | ||
115 | * highres-capable, notify the rest of the | ||
116 | * system as well so that we transition | ||
117 | * into high-res mode: | ||
118 | */ | ||
119 | tick_clock_notify(); | ||
120 | } | ||
121 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | ||
122 | cs->wd_last = csnow; | ||
123 | } else { | ||
124 | cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); | ||
125 | cs->wd_last = csnow; | ||
126 | /* Check the delta. Might remove from the list ! */ | ||
127 | clocksource_ratewd(cs, cs_nsec - wd_nsec); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | if (!list_empty(&watchdog_list)) { | ||
132 | __mod_timer(&watchdog_timer, | ||
133 | watchdog_timer.expires + WATCHDOG_INTERVAL); | ||
134 | } | ||
135 | spin_unlock(&watchdog_lock); | ||
136 | } | ||
137 | static void clocksource_check_watchdog(struct clocksource *cs) | ||
138 | { | ||
139 | struct clocksource *cse; | ||
140 | unsigned long flags; | ||
141 | |||
142 | spin_lock_irqsave(&watchdog_lock, flags); | ||
143 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | ||
144 | int started = !list_empty(&watchdog_list); | ||
145 | |||
146 | list_add(&cs->wd_list, &watchdog_list); | ||
147 | if (!started && watchdog) { | ||
148 | watchdog_last = watchdog->read(); | ||
149 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
150 | add_timer(&watchdog_timer); | ||
151 | } | ||
152 | } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { | ||
153 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
154 | |||
155 | if (!watchdog || cs->rating > watchdog->rating) { | ||
156 | if (watchdog) | ||
157 | del_timer(&watchdog_timer); | ||
158 | watchdog = cs; | ||
159 | init_timer(&watchdog_timer); | ||
160 | watchdog_timer.function = clocksource_watchdog; | ||
161 | |||
162 | /* Reset watchdog cycles */ | ||
163 | list_for_each_entry(cse, &watchdog_list, wd_list) | ||
164 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
165 | /* Start if list is not empty */ | ||
166 | if (!list_empty(&watchdog_list)) { | ||
167 | watchdog_last = watchdog->read(); | ||
168 | watchdog_timer.expires = | ||
169 | jiffies + WATCHDOG_INTERVAL; | ||
170 | add_timer(&watchdog_timer); | ||
171 | } | ||
172 | } | ||
173 | } | ||
174 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
175 | } | ||
176 | #else | ||
177 | static void clocksource_check_watchdog(struct clocksource *cs) | ||
178 | { | ||
179 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | ||
180 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
181 | } | ||
182 | #endif | ||
183 | |||
68 | /** | 184 | /** |
69 | * clocksource_get_next - Returns the selected clocksource | 185 | * clocksource_get_next - Returns the selected clocksource |
70 | * | 186 | * |
@@ -84,60 +200,54 @@ struct clocksource *clocksource_get_next(void) | |||
84 | } | 200 | } |
85 | 201 | ||
86 | /** | 202 | /** |
87 | * select_clocksource - Finds the best registered clocksource. | 203 | * select_clocksource - Selects the best registered clocksource. |
88 | * | 204 | * |
89 | * Private function. Must hold clocksource_lock when called. | 205 | * Private function. Must hold clocksource_lock when called. |
90 | * | 206 | * |
91 | * Looks through the list of registered clocksources, returning | 207 | * Select the clocksource with the best rating, or the clocksource, |
92 | * the one with the highest rating value. If there is a clocksource | 208 | * which is selected by userspace override. |
93 | * name that matches the override string, it returns that clocksource. | ||
94 | */ | 209 | */ |
95 | static struct clocksource *select_clocksource(void) | 210 | static struct clocksource *select_clocksource(void) |
96 | { | 211 | { |
97 | struct clocksource *best = NULL; | 212 | struct clocksource *next; |
98 | struct list_head *tmp; | ||
99 | 213 | ||
100 | list_for_each(tmp, &clocksource_list) { | 214 | if (list_empty(&clocksource_list)) |
101 | struct clocksource *src; | 215 | return NULL; |
102 | 216 | ||
103 | src = list_entry(tmp, struct clocksource, list); | 217 | if (clocksource_override) |
104 | if (!best) | 218 | next = clocksource_override; |
105 | best = src; | 219 | else |
106 | 220 | next = list_entry(clocksource_list.next, struct clocksource, | |
107 | /* check for override: */ | 221 | list); |
108 | if (strlen(src->name) == strlen(override_name) && | 222 | |
109 | !strcmp(src->name, override_name)) { | 223 | if (next == curr_clocksource) |
110 | best = src; | 224 | return NULL; |
111 | break; | ||
112 | } | ||
113 | /* pick the highest rating: */ | ||
114 | if (src->rating > best->rating) | ||
115 | best = src; | ||
116 | } | ||
117 | 225 | ||
118 | return best; | 226 | return next; |
119 | } | 227 | } |
120 | 228 | ||
121 | /** | 229 | /* |
122 | * is_registered_source - Checks if clocksource is registered | 230 | * Enqueue the clocksource sorted by rating |
123 | * @c: pointer to a clocksource | ||
124 | * | ||
125 | * Private helper function. Must hold clocksource_lock when called. | ||
126 | * | ||
127 | * Returns one if the clocksource is already registered, zero otherwise. | ||
128 | */ | 231 | */ |
129 | static int is_registered_source(struct clocksource *c) | 232 | static int clocksource_enqueue(struct clocksource *c) |
130 | { | 233 | { |
131 | int len = strlen(c->name); | 234 | struct list_head *tmp, *entry = &clocksource_list; |
132 | struct list_head *tmp; | ||
133 | 235 | ||
134 | list_for_each(tmp, &clocksource_list) { | 236 | list_for_each(tmp, &clocksource_list) { |
135 | struct clocksource *src; | 237 | struct clocksource *cs; |
136 | 238 | ||
137 | src = list_entry(tmp, struct clocksource, list); | 239 | cs = list_entry(tmp, struct clocksource, list); |
138 | if (strlen(src->name) == len && !strcmp(src->name, c->name)) | 240 | if (cs == c) |
139 | return 1; | 241 | return -EBUSY; |
242 | /* Keep track of the place, where to insert */ | ||
243 | if (cs->rating >= c->rating) | ||
244 | entry = tmp; | ||
140 | } | 245 | } |
246 | list_add(&c->list, entry); | ||
247 | |||
248 | if (strlen(c->name) == strlen(override_name) && | ||
249 | !strcmp(c->name, override_name)) | ||
250 | clocksource_override = c; | ||
141 | 251 | ||
142 | return 0; | 252 | return 0; |
143 | } | 253 | } |
@@ -150,42 +260,35 @@ static int is_registered_source(struct clocksource *c) | |||
150 | */ | 260 | */ |
151 | int clocksource_register(struct clocksource *c) | 261 | int clocksource_register(struct clocksource *c) |
152 | { | 262 | { |
153 | int ret = 0; | ||
154 | unsigned long flags; | 263 | unsigned long flags; |
264 | int ret; | ||
155 | 265 | ||
156 | spin_lock_irqsave(&clocksource_lock, flags); | 266 | spin_lock_irqsave(&clocksource_lock, flags); |
157 | /* check if clocksource is already registered */ | 267 | ret = clocksource_enqueue(c); |
158 | if (is_registered_source(c)) { | 268 | if (!ret) |
159 | printk("register_clocksource: Cannot register %s. " | ||
160 | "Already registered!", c->name); | ||
161 | ret = -EBUSY; | ||
162 | } else { | ||
163 | /* register it */ | ||
164 | list_add(&c->list, &clocksource_list); | ||
165 | /* scan the registered clocksources, and pick the best one */ | ||
166 | next_clocksource = select_clocksource(); | 269 | next_clocksource = select_clocksource(); |
167 | } | ||
168 | spin_unlock_irqrestore(&clocksource_lock, flags); | 270 | spin_unlock_irqrestore(&clocksource_lock, flags); |
271 | if (!ret) | ||
272 | clocksource_check_watchdog(c); | ||
169 | return ret; | 273 | return ret; |
170 | } | 274 | } |
171 | EXPORT_SYMBOL(clocksource_register); | 275 | EXPORT_SYMBOL(clocksource_register); |
172 | 276 | ||
173 | /** | 277 | /** |
174 | * clocksource_reselect - Rescan list for next clocksource | 278 | * clocksource_change_rating - Change the rating of a registered clocksource |
175 | * | 279 | * |
176 | * A quick helper function to be used if a clocksource changes its | ||
177 | * rating. Forces the clocksource list to be re-scanned for the best | ||
178 | * clocksource. | ||
179 | */ | 280 | */ |
180 | void clocksource_reselect(void) | 281 | void clocksource_change_rating(struct clocksource *cs, int rating) |
181 | { | 282 | { |
182 | unsigned long flags; | 283 | unsigned long flags; |
183 | 284 | ||
184 | spin_lock_irqsave(&clocksource_lock, flags); | 285 | spin_lock_irqsave(&clocksource_lock, flags); |
286 | list_del(&cs->list); | ||
287 | cs->rating = rating; | ||
288 | clocksource_enqueue(cs); | ||
185 | next_clocksource = select_clocksource(); | 289 | next_clocksource = select_clocksource(); |
186 | spin_unlock_irqrestore(&clocksource_lock, flags); | 290 | spin_unlock_irqrestore(&clocksource_lock, flags); |
187 | } | 291 | } |
188 | EXPORT_SYMBOL(clocksource_reselect); | ||
189 | 292 | ||
190 | #ifdef CONFIG_SYSFS | 293 | #ifdef CONFIG_SYSFS |
191 | /** | 294 | /** |
@@ -221,7 +324,11 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | |||
221 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | 324 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, |
222 | const char *buf, size_t count) | 325 | const char *buf, size_t count) |
223 | { | 326 | { |
327 | struct clocksource *ovr = NULL; | ||
328 | struct list_head *tmp; | ||
224 | size_t ret = count; | 329 | size_t ret = count; |
330 | int len; | ||
331 | |||
225 | /* strings from sysfs write are not 0 terminated! */ | 332 | /* strings from sysfs write are not 0 terminated! */ |
226 | if (count >= sizeof(override_name)) | 333 | if (count >= sizeof(override_name)) |
227 | return -EINVAL; | 334 | return -EINVAL; |
@@ -229,17 +336,32 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
229 | /* strip of \n: */ | 336 | /* strip of \n: */ |
230 | if (buf[count-1] == '\n') | 337 | if (buf[count-1] == '\n') |
231 | count--; | 338 | count--; |
232 | if (count < 1) | ||
233 | return -EINVAL; | ||
234 | 339 | ||
235 | spin_lock_irq(&clocksource_lock); | 340 | spin_lock_irq(&clocksource_lock); |
236 | 341 | ||
237 | /* copy the name given: */ | 342 | if (count > 0) |
238 | memcpy(override_name, buf, count); | 343 | memcpy(override_name, buf, count); |
239 | override_name[count] = 0; | 344 | override_name[count] = 0; |
240 | 345 | ||
241 | /* try to select it: */ | 346 | len = strlen(override_name); |
242 | next_clocksource = select_clocksource(); | 347 | if (len) { |
348 | ovr = clocksource_override; | ||
349 | /* try to select it: */ | ||
350 | list_for_each(tmp, &clocksource_list) { | ||
351 | struct clocksource *cs; | ||
352 | |||
353 | cs = list_entry(tmp, struct clocksource, list); | ||
354 | if (strlen(cs->name) == len && | ||
355 | !strcmp(cs->name, override_name)) | ||
356 | ovr = cs; | ||
357 | } | ||
358 | } | ||
359 | |||
360 | /* Reselect, when the override name has changed */ | ||
361 | if (ovr != clocksource_override) { | ||
362 | clocksource_override = ovr; | ||
363 | next_clocksource = select_clocksource(); | ||
364 | } | ||
243 | 365 | ||
244 | spin_unlock_irq(&clocksource_lock); | 366 | spin_unlock_irq(&clocksource_lock); |
245 | 367 | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a99b2a6e6a07..3be8da8fed7e 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -62,7 +62,6 @@ struct clocksource clocksource_jiffies = { | |||
62 | .mask = 0xffffffff, /*32bits*/ | 62 | .mask = 0xffffffff, /*32bits*/ |
63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ |
64 | .shift = JIFFIES_SHIFT, | 64 | .shift = JIFFIES_SHIFT, |
65 | .is_continuous = 0, /* tick based, not free running */ | ||
66 | }; | 65 | }; |
67 | 66 | ||
68 | static int __init init_jiffies_clocksource(void) | 67 | static int __init init_jiffies_clocksource(void) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3a73f9..eb12509e00bd 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base; | |||
24 | 24 | ||
25 | #define MAX_TICKADJ 500 /* microsecs */ | 25 | #define MAX_TICKADJ 500 /* microsecs */ |
26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ | 26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ |
27 | TICK_LENGTH_SHIFT) / HZ) | 27 | TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * phase-lock loop variables | 30 | * phase-lock loop variables |
@@ -46,13 +46,17 @@ long time_adjust; | |||
46 | 46 | ||
47 | static void ntp_update_frequency(void) | 47 | static void ntp_update_frequency(void) |
48 | { | 48 | { |
49 | tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; | 49 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) |
50 | tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; | 50 | << TICK_LENGTH_SHIFT; |
51 | tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | 51 | second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; |
52 | second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | ||
52 | 53 | ||
53 | do_div(tick_length_base, HZ); | 54 | tick_length_base = second_length; |
54 | 55 | ||
55 | tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; | 56 | do_div(second_length, HZ); |
57 | tick_nsec = second_length >> TICK_LENGTH_SHIFT; | ||
58 | |||
59 | do_div(tick_length_base, NTP_INTERVAL_FREQ); | ||
56 | } | 60 | } |
57 | 61 | ||
58 | /** | 62 | /** |
@@ -162,7 +166,7 @@ void second_overflow(void) | |||
162 | tick_length -= MAX_TICKADJ_SCALED; | 166 | tick_length -= MAX_TICKADJ_SCALED; |
163 | } else { | 167 | } else { |
164 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / | 168 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / |
165 | HZ) << TICK_LENGTH_SHIFT; | 169 | NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; |
166 | time_adjust = 0; | 170 | time_adjust = 0; |
167 | } | 171 | } |
168 | } | 172 | } |
@@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc) | |||
239 | result = -EINVAL; | 243 | result = -EINVAL; |
240 | goto leave; | 244 | goto leave; |
241 | } | 245 | } |
242 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); | 246 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) |
247 | >> (SHIFT_USEC - SHIFT_NSEC); | ||
243 | } | 248 | } |
244 | 249 | ||
245 | if (txc->modes & ADJ_MAXERROR) { | 250 | if (txc->modes & ADJ_MAXERROR) { |
@@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc) | |||
309 | freq_adj += time_freq; | 314 | freq_adj += time_freq; |
310 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); | 315 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); |
311 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); | 316 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); |
312 | time_offset = (time_offset / HZ) << SHIFT_UPDATE; | 317 | time_offset = (time_offset / NTP_INTERVAL_FREQ) |
318 | << SHIFT_UPDATE; | ||
313 | } /* STA_PLL */ | 319 | } /* STA_PLL */ |
314 | } /* txc->modes & ADJ_OFFSET */ | 320 | } /* txc->modes & ADJ_OFFSET */ |
315 | if (txc->modes & ADJ_TICK) | 321 | if (txc->modes & ADJ_TICK) |
@@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | |||
324 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | 330 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) |
325 | txc->offset = save_adjust; | 331 | txc->offset = save_adjust; |
326 | else | 332 | else |
327 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; | 333 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) |
328 | txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); | 334 | * NTP_INTERVAL_FREQ / 1000; |
335 | txc->freq = (time_freq / NSEC_PER_USEC) | ||
336 | << (SHIFT_USEC - SHIFT_NSEC); | ||
329 | txc->maxerror = time_maxerror; | 337 | txc->maxerror = time_maxerror; |
330 | txc->esterror = time_esterror; | 338 | txc->esterror = time_esterror; |
331 | txc->status = time_status; | 339 | txc->status = time_status; |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c new file mode 100644 index 000000000000..12b3efeb9f6f --- /dev/null +++ b/kernel/time/tick-broadcast.c | |||
@@ -0,0 +1,480 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-broadcast.c | ||
3 | * | ||
4 | * This file contains functions which emulate a local clock-event | ||
5 | * device via a broadcast event source. | ||
6 | * | ||
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
10 | * | ||
11 | * This code is licenced under the GPL version 2. For details see | ||
12 | * kernel-base/COPYING. | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/profile.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tick.h> | ||
22 | |||
23 | #include "tick-internal.h" | ||
24 | |||
25 | /* | ||
26 | * Broadcast support for broken x86 hardware, where the local apic | ||
27 | * timer stops in C3 state. | ||
28 | */ | ||
29 | |||
30 | struct tick_device tick_broadcast_device; | ||
31 | static cpumask_t tick_broadcast_mask; | ||
32 | static DEFINE_SPINLOCK(tick_broadcast_lock); | ||
33 | |||
34 | /* | ||
35 | * Debugging: see timer_list.c | ||
36 | */ | ||
37 | struct tick_device *tick_get_broadcast_device(void) | ||
38 | { | ||
39 | return &tick_broadcast_device; | ||
40 | } | ||
41 | |||
42 | cpumask_t *tick_get_broadcast_mask(void) | ||
43 | { | ||
44 | return &tick_broadcast_mask; | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Start the device in periodic mode | ||
49 | */ | ||
50 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) | ||
51 | { | ||
52 | if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
53 | tick_setup_periodic(bc, 1); | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Check, if the device can be utilized as broadcast device: | ||
58 | */ | ||
59 | int tick_check_broadcast_device(struct clock_event_device *dev) | ||
60 | { | ||
61 | if (tick_broadcast_device.evtdev || | ||
62 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
63 | return 0; | ||
64 | |||
65 | clockevents_exchange_device(NULL, dev); | ||
66 | tick_broadcast_device.evtdev = dev; | ||
67 | if (!cpus_empty(tick_broadcast_mask)) | ||
68 | tick_broadcast_start_periodic(dev); | ||
69 | return 1; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Check, if the device is the broadcast device | ||
74 | */ | ||
75 | int tick_is_broadcast_device(struct clock_event_device *dev) | ||
76 | { | ||
77 | return (dev && tick_broadcast_device.evtdev == dev); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Check, if the device is disfunctional and a place holder, which | ||
82 | * needs to be handled by the broadcast device. | ||
83 | */ | ||
84 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | ||
85 | { | ||
86 | unsigned long flags; | ||
87 | int ret = 0; | ||
88 | |||
89 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
90 | |||
91 | /* | ||
92 | * Devices might be registered with both periodic and oneshot | ||
93 | * mode disabled. This signals, that the device needs to be | ||
94 | * operated from the broadcast device and is a placeholder for | ||
95 | * the cpu local device. | ||
96 | */ | ||
97 | if (!tick_device_is_functional(dev)) { | ||
98 | dev->event_handler = tick_handle_periodic; | ||
99 | cpu_set(cpu, tick_broadcast_mask); | ||
100 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | ||
101 | ret = 1; | ||
102 | } | ||
103 | |||
104 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
105 | return ret; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Broadcast the event to the cpus, which are set in the mask | ||
110 | */ | ||
111 | int tick_do_broadcast(cpumask_t mask) | ||
112 | { | ||
113 | int ret = 0, cpu = smp_processor_id(); | ||
114 | struct tick_device *td; | ||
115 | |||
116 | /* | ||
117 | * Check, if the current cpu is in the mask | ||
118 | */ | ||
119 | if (cpu_isset(cpu, mask)) { | ||
120 | cpu_clear(cpu, mask); | ||
121 | td = &per_cpu(tick_cpu_device, cpu); | ||
122 | td->evtdev->event_handler(td->evtdev); | ||
123 | ret = 1; | ||
124 | } | ||
125 | |||
126 | if (!cpus_empty(mask)) { | ||
127 | /* | ||
128 | * It might be necessary to actually check whether the devices | ||
129 | * have different broadcast functions. For now, just use the | ||
130 | * one of the first device. This works as long as we have this | ||
131 | * misfeature only on x86 (lapic) | ||
132 | */ | ||
133 | cpu = first_cpu(mask); | ||
134 | td = &per_cpu(tick_cpu_device, cpu); | ||
135 | td->evtdev->broadcast(mask); | ||
136 | ret = 1; | ||
137 | } | ||
138 | return ret; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Periodic broadcast: | ||
143 | * - invoke the broadcast handlers | ||
144 | */ | ||
145 | static void tick_do_periodic_broadcast(void) | ||
146 | { | ||
147 | cpumask_t mask; | ||
148 | |||
149 | spin_lock(&tick_broadcast_lock); | ||
150 | |||
151 | cpus_and(mask, cpu_online_map, tick_broadcast_mask); | ||
152 | tick_do_broadcast(mask); | ||
153 | |||
154 | spin_unlock(&tick_broadcast_lock); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Event handler for periodic broadcast ticks | ||
159 | */ | ||
160 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) | ||
161 | { | ||
162 | dev->next_event.tv64 = KTIME_MAX; | ||
163 | |||
164 | tick_do_periodic_broadcast(); | ||
165 | |||
166 | /* | ||
167 | * The device is in periodic mode. No reprogramming necessary: | ||
168 | */ | ||
169 | if (dev->mode == CLOCK_EVT_MODE_PERIODIC) | ||
170 | return; | ||
171 | |||
172 | /* | ||
173 | * Setup the next period for devices, which do not have | ||
174 | * periodic mode: | ||
175 | */ | ||
176 | for (;;) { | ||
177 | ktime_t next = ktime_add(dev->next_event, tick_period); | ||
178 | |||
179 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
180 | return; | ||
181 | tick_do_periodic_broadcast(); | ||
182 | } | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Powerstate information: The system enters/leaves a state, where | ||
187 | * affected devices might stop | ||
188 | */ | ||
189 | static void tick_do_broadcast_on_off(void *why) | ||
190 | { | ||
191 | struct clock_event_device *bc, *dev; | ||
192 | struct tick_device *td; | ||
193 | unsigned long flags, *reason = why; | ||
194 | int cpu; | ||
195 | |||
196 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
197 | |||
198 | cpu = smp_processor_id(); | ||
199 | td = &per_cpu(tick_cpu_device, cpu); | ||
200 | dev = td->evtdev; | ||
201 | bc = tick_broadcast_device.evtdev; | ||
202 | |||
203 | /* | ||
204 | * Is the device in broadcast mode forever or is it not | ||
205 | * affected by the powerstate ? | ||
206 | */ | ||
207 | if (!dev || !tick_device_is_functional(dev) || | ||
208 | !(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
209 | goto out; | ||
210 | |||
211 | if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_ON) { | ||
212 | if (!cpu_isset(cpu, tick_broadcast_mask)) { | ||
213 | cpu_set(cpu, tick_broadcast_mask); | ||
214 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
215 | clockevents_set_mode(dev, | ||
216 | CLOCK_EVT_MODE_SHUTDOWN); | ||
217 | } | ||
218 | } else { | ||
219 | if (cpu_isset(cpu, tick_broadcast_mask)) { | ||
220 | cpu_clear(cpu, tick_broadcast_mask); | ||
221 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
222 | tick_setup_periodic(dev, 0); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | if (cpus_empty(tick_broadcast_mask)) | ||
227 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
228 | else { | ||
229 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | ||
230 | tick_broadcast_start_periodic(bc); | ||
231 | else | ||
232 | tick_broadcast_setup_oneshot(bc); | ||
233 | } | ||
234 | out: | ||
235 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * Powerstate information: The system enters/leaves a state, where | ||
240 | * affected devices might stop. | ||
241 | */ | ||
242 | void tick_broadcast_on_off(unsigned long reason, int *oncpu) | ||
243 | { | ||
244 | int cpu = get_cpu(); | ||
245 | |||
246 | if (cpu == *oncpu) | ||
247 | tick_do_broadcast_on_off(&reason); | ||
248 | else | ||
249 | smp_call_function_single(*oncpu, tick_do_broadcast_on_off, | ||
250 | &reason, 1, 1); | ||
251 | put_cpu(); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Set the periodic handler depending on broadcast on/off | ||
256 | */ | ||
257 | void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) | ||
258 | { | ||
259 | if (!broadcast) | ||
260 | dev->event_handler = tick_handle_periodic; | ||
261 | else | ||
262 | dev->event_handler = tick_handle_periodic_broadcast; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Remove a CPU from broadcasting | ||
267 | */ | ||
268 | void tick_shutdown_broadcast(unsigned int *cpup) | ||
269 | { | ||
270 | struct clock_event_device *bc; | ||
271 | unsigned long flags; | ||
272 | unsigned int cpu = *cpup; | ||
273 | |||
274 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
275 | |||
276 | bc = tick_broadcast_device.evtdev; | ||
277 | cpu_clear(cpu, tick_broadcast_mask); | ||
278 | |||
279 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | ||
280 | if (bc && cpus_empty(tick_broadcast_mask)) | ||
281 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
282 | } | ||
283 | |||
284 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
285 | } | ||
286 | |||
287 | #ifdef CONFIG_TICK_ONESHOT | ||
288 | |||
289 | static cpumask_t tick_broadcast_oneshot_mask; | ||
290 | |||
291 | /* | ||
292 | * Debugging: see timer_list.c | ||
293 | */ | ||
294 | cpumask_t *tick_get_broadcast_oneshot_mask(void) | ||
295 | { | ||
296 | return &tick_broadcast_oneshot_mask; | ||
297 | } | ||
298 | |||
299 | static int tick_broadcast_set_event(ktime_t expires, int force) | ||
300 | { | ||
301 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | ||
302 | ktime_t now = ktime_get(); | ||
303 | int res; | ||
304 | |||
305 | for(;;) { | ||
306 | res = clockevents_program_event(bc, expires, now); | ||
307 | if (!res || !force) | ||
308 | return res; | ||
309 | now = ktime_get(); | ||
310 | expires = ktime_add(now, ktime_set(0, bc->min_delta_ns)); | ||
311 | } | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Reprogram the broadcast device: | ||
316 | * | ||
317 | * Called with tick_broadcast_lock held and interrupts disabled. | ||
318 | */ | ||
319 | static int tick_broadcast_reprogram(void) | ||
320 | { | ||
321 | ktime_t expires = { .tv64 = KTIME_MAX }; | ||
322 | struct tick_device *td; | ||
323 | int cpu; | ||
324 | |||
325 | /* | ||
326 | * Find the event which expires next: | ||
327 | */ | ||
328 | for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; | ||
329 | cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { | ||
330 | td = &per_cpu(tick_cpu_device, cpu); | ||
331 | if (td->evtdev->next_event.tv64 < expires.tv64) | ||
332 | expires = td->evtdev->next_event; | ||
333 | } | ||
334 | |||
335 | if (expires.tv64 == KTIME_MAX) | ||
336 | return 0; | ||
337 | |||
338 | return tick_broadcast_set_event(expires, 0); | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * Handle oneshot mode broadcasting | ||
343 | */ | ||
344 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | ||
345 | { | ||
346 | struct tick_device *td; | ||
347 | cpumask_t mask; | ||
348 | ktime_t now; | ||
349 | int cpu; | ||
350 | |||
351 | spin_lock(&tick_broadcast_lock); | ||
352 | again: | ||
353 | dev->next_event.tv64 = KTIME_MAX; | ||
354 | mask = CPU_MASK_NONE; | ||
355 | now = ktime_get(); | ||
356 | /* Find all expired events */ | ||
357 | for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; | ||
358 | cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { | ||
359 | td = &per_cpu(tick_cpu_device, cpu); | ||
360 | if (td->evtdev->next_event.tv64 <= now.tv64) | ||
361 | cpu_set(cpu, mask); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Wakeup the cpus which have an expired event. The broadcast | ||
366 | * device is reprogrammed in the return from idle code. | ||
367 | */ | ||
368 | if (!tick_do_broadcast(mask)) { | ||
369 | /* | ||
370 | * The global event did not expire any CPU local | ||
371 | * events. This happens in dyntick mode, as the | ||
372 | * maximum PIT delta is quite small. | ||
373 | */ | ||
374 | if (tick_broadcast_reprogram()) | ||
375 | goto again; | ||
376 | } | ||
377 | spin_unlock(&tick_broadcast_lock); | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Powerstate information: The system enters/leaves a state, where | ||
382 | * affected devices might stop | ||
383 | */ | ||
384 | void tick_broadcast_oneshot_control(unsigned long reason) | ||
385 | { | ||
386 | struct clock_event_device *bc, *dev; | ||
387 | struct tick_device *td; | ||
388 | unsigned long flags; | ||
389 | int cpu; | ||
390 | |||
391 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
392 | |||
393 | /* | ||
394 | * Periodic mode does not care about the enter/exit of power | ||
395 | * states | ||
396 | */ | ||
397 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | ||
398 | goto out; | ||
399 | |||
400 | bc = tick_broadcast_device.evtdev; | ||
401 | cpu = smp_processor_id(); | ||
402 | td = &per_cpu(tick_cpu_device, cpu); | ||
403 | dev = td->evtdev; | ||
404 | |||
405 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
406 | goto out; | ||
407 | |||
408 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | ||
409 | if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
410 | cpu_set(cpu, tick_broadcast_oneshot_mask); | ||
411 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | ||
412 | if (dev->next_event.tv64 < bc->next_event.tv64) | ||
413 | tick_broadcast_set_event(dev->next_event, 1); | ||
414 | } | ||
415 | } else { | ||
416 | if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
417 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
418 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
419 | if (dev->next_event.tv64 != KTIME_MAX) | ||
420 | tick_program_event(dev->next_event, 1); | ||
421 | } | ||
422 | } | ||
423 | |||
424 | out: | ||
425 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
426 | } | ||
427 | |||
428 | /** | ||
429 | * tick_broadcast_setup_highres - setup the broadcast device for highres | ||
430 | */ | ||
431 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
432 | { | ||
433 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) { | ||
434 | bc->event_handler = tick_handle_oneshot_broadcast; | ||
435 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
436 | bc->next_event.tv64 = KTIME_MAX; | ||
437 | } | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * Select oneshot operating mode for the broadcast device | ||
442 | */ | ||
443 | void tick_broadcast_switch_to_oneshot(void) | ||
444 | { | ||
445 | struct clock_event_device *bc; | ||
446 | unsigned long flags; | ||
447 | |||
448 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
449 | |||
450 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; | ||
451 | bc = tick_broadcast_device.evtdev; | ||
452 | if (bc) | ||
453 | tick_broadcast_setup_oneshot(bc); | ||
454 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
455 | } | ||
456 | |||
457 | |||
458 | /* | ||
459 | * Remove a dead CPU from broadcasting | ||
460 | */ | ||
461 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | ||
462 | { | ||
463 | struct clock_event_device *bc; | ||
464 | unsigned long flags; | ||
465 | unsigned int cpu = *cpup; | ||
466 | |||
467 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
468 | |||
469 | bc = tick_broadcast_device.evtdev; | ||
470 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
471 | |||
472 | if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { | ||
473 | if (bc && cpus_empty(tick_broadcast_oneshot_mask)) | ||
474 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
475 | } | ||
476 | |||
477 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
478 | } | ||
479 | |||
480 | #endif | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c new file mode 100644 index 000000000000..4500e347f1bb --- /dev/null +++ b/kernel/time/tick-common.c | |||
@@ -0,0 +1,346 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-common.c | ||
3 | * | ||
4 | * This file contains the base functions to manage periodic tick | ||
5 | * related events. | ||
6 | * | ||
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
10 | * | ||
11 | * This code is licenced under the GPL version 2. For details see | ||
12 | * kernel-base/COPYING. | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/profile.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tick.h> | ||
22 | |||
23 | #include "tick-internal.h" | ||
24 | |||
25 | /* | ||
26 | * Tick devices | ||
27 | */ | ||
28 | DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | ||
29 | /* | ||
30 | * Tick next event: keeps track of the tick time | ||
31 | */ | ||
32 | ktime_t tick_next_period; | ||
33 | ktime_t tick_period; | ||
34 | static int tick_do_timer_cpu = -1; | ||
35 | DEFINE_SPINLOCK(tick_device_lock); | ||
36 | |||
37 | /* | ||
38 | * Debugging: see timer_list.c | ||
39 | */ | ||
40 | struct tick_device *tick_get_device(int cpu) | ||
41 | { | ||
42 | return &per_cpu(tick_cpu_device, cpu); | ||
43 | } | ||
44 | |||
45 | /** | ||
46 | * tick_is_oneshot_available - check for a oneshot capable event device | ||
47 | */ | ||
48 | int tick_is_oneshot_available(void) | ||
49 | { | ||
50 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | ||
51 | |||
52 | return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Periodic tick | ||
57 | */ | ||
58 | static void tick_periodic(int cpu) | ||
59 | { | ||
60 | if (tick_do_timer_cpu == cpu) { | ||
61 | write_seqlock(&xtime_lock); | ||
62 | |||
63 | /* Keep track of the next tick event */ | ||
64 | tick_next_period = ktime_add(tick_next_period, tick_period); | ||
65 | |||
66 | do_timer(1); | ||
67 | write_sequnlock(&xtime_lock); | ||
68 | } | ||
69 | |||
70 | update_process_times(user_mode(get_irq_regs())); | ||
71 | profile_tick(CPU_PROFILING); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Event handler for periodic ticks | ||
76 | */ | ||
77 | void tick_handle_periodic(struct clock_event_device *dev) | ||
78 | { | ||
79 | int cpu = smp_processor_id(); | ||
80 | |||
81 | tick_periodic(cpu); | ||
82 | |||
83 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) | ||
84 | return; | ||
85 | /* | ||
86 | * Setup the next period for devices, which do not have | ||
87 | * periodic mode: | ||
88 | */ | ||
89 | for (;;) { | ||
90 | ktime_t next = ktime_add(dev->next_event, tick_period); | ||
91 | |||
92 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
93 | return; | ||
94 | tick_periodic(cpu); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Setup the device for a periodic tick | ||
100 | */ | ||
101 | void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | ||
102 | { | ||
103 | tick_set_periodic_handler(dev, broadcast); | ||
104 | |||
105 | /* Broadcast setup ? */ | ||
106 | if (!tick_device_is_functional(dev)) | ||
107 | return; | ||
108 | |||
109 | if (dev->features & CLOCK_EVT_FEAT_PERIODIC) { | ||
110 | clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); | ||
111 | } else { | ||
112 | unsigned long seq; | ||
113 | ktime_t next; | ||
114 | |||
115 | do { | ||
116 | seq = read_seqbegin(&xtime_lock); | ||
117 | next = tick_next_period; | ||
118 | } while (read_seqretry(&xtime_lock, seq)); | ||
119 | |||
120 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
121 | |||
122 | for (;;) { | ||
123 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
124 | return; | ||
125 | next = ktime_add(next, tick_period); | ||
126 | } | ||
127 | } | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Setup the tick device | ||
132 | */ | ||
133 | static void tick_setup_device(struct tick_device *td, | ||
134 | struct clock_event_device *newdev, int cpu, | ||
135 | cpumask_t cpumask) | ||
136 | { | ||
137 | ktime_t next_event; | ||
138 | void (*handler)(struct clock_event_device *) = NULL; | ||
139 | |||
140 | /* | ||
141 | * First device setup ? | ||
142 | */ | ||
143 | if (!td->evtdev) { | ||
144 | /* | ||
145 | * If no cpu took the do_timer update, assign it to | ||
146 | * this cpu: | ||
147 | */ | ||
148 | if (tick_do_timer_cpu == -1) { | ||
149 | tick_do_timer_cpu = cpu; | ||
150 | tick_next_period = ktime_get(); | ||
151 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Startup in periodic mode first. | ||
156 | */ | ||
157 | td->mode = TICKDEV_MODE_PERIODIC; | ||
158 | } else { | ||
159 | handler = td->evtdev->event_handler; | ||
160 | next_event = td->evtdev->next_event; | ||
161 | } | ||
162 | |||
163 | td->evtdev = newdev; | ||
164 | |||
165 | /* | ||
166 | * When the device is not per cpu, pin the interrupt to the | ||
167 | * current cpu: | ||
168 | */ | ||
169 | if (!cpus_equal(newdev->cpumask, cpumask)) | ||
170 | irq_set_affinity(newdev->irq, cpumask); | ||
171 | |||
172 | /* | ||
173 | * When global broadcasting is active, check if the current | ||
174 | * device is registered as a placeholder for broadcast mode. | ||
175 | * This allows us to handle this x86 misfeature in a generic | ||
176 | * way. | ||
177 | */ | ||
178 | if (tick_device_uses_broadcast(newdev, cpu)) | ||
179 | return; | ||
180 | |||
181 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
182 | tick_setup_periodic(newdev, 0); | ||
183 | else | ||
184 | tick_setup_oneshot(newdev, handler, next_event); | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Check, if the new registered device should be used. | ||
189 | */ | ||
190 | static int tick_check_new_device(struct clock_event_device *newdev) | ||
191 | { | ||
192 | struct clock_event_device *curdev; | ||
193 | struct tick_device *td; | ||
194 | int cpu, ret = NOTIFY_OK; | ||
195 | unsigned long flags; | ||
196 | cpumask_t cpumask; | ||
197 | |||
198 | spin_lock_irqsave(&tick_device_lock, flags); | ||
199 | |||
200 | cpu = smp_processor_id(); | ||
201 | if (!cpu_isset(cpu, newdev->cpumask)) | ||
202 | goto out; | ||
203 | |||
204 | td = &per_cpu(tick_cpu_device, cpu); | ||
205 | curdev = td->evtdev; | ||
206 | cpumask = cpumask_of_cpu(cpu); | ||
207 | |||
208 | /* cpu local device ? */ | ||
209 | if (!cpus_equal(newdev->cpumask, cpumask)) { | ||
210 | |||
211 | /* | ||
212 | * If the cpu affinity of the device interrupt can not | ||
213 | * be set, ignore it. | ||
214 | */ | ||
215 | if (!irq_can_set_affinity(newdev->irq)) | ||
216 | goto out_bc; | ||
217 | |||
218 | /* | ||
219 | * If we have a cpu local device already, do not replace it | ||
220 | * by a non cpu local device | ||
221 | */ | ||
222 | if (curdev && cpus_equal(curdev->cpumask, cpumask)) | ||
223 | goto out_bc; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * If we have an active device, then check the rating and the oneshot | ||
228 | * feature. | ||
229 | */ | ||
230 | if (curdev) { | ||
231 | /* | ||
232 | * Prefer one shot capable devices ! | ||
233 | */ | ||
234 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && | ||
235 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
236 | goto out_bc; | ||
237 | /* | ||
238 | * Check the rating | ||
239 | */ | ||
240 | if (curdev->rating >= newdev->rating) | ||
241 | goto out_bc; | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Replace the eventually existing device by the new | ||
246 | * device. If the current device is the broadcast device, do | ||
247 | * not give it back to the clockevents layer ! | ||
248 | */ | ||
249 | if (tick_is_broadcast_device(curdev)) { | ||
250 | clockevents_set_mode(curdev, CLOCK_EVT_MODE_SHUTDOWN); | ||
251 | curdev = NULL; | ||
252 | } | ||
253 | clockevents_exchange_device(curdev, newdev); | ||
254 | tick_setup_device(td, newdev, cpu, cpumask); | ||
255 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | ||
256 | tick_oneshot_notify(); | ||
257 | |||
258 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
259 | return NOTIFY_STOP; | ||
260 | |||
261 | out_bc: | ||
262 | /* | ||
263 | * Can the new device be used as a broadcast device ? | ||
264 | */ | ||
265 | if (tick_check_broadcast_device(newdev)) | ||
266 | ret = NOTIFY_STOP; | ||
267 | out: | ||
268 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
269 | |||
270 | return ret; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * Shutdown an event device on a given cpu: | ||
275 | * | ||
276 | * This is called on a life CPU, when a CPU is dead. So we cannot | ||
277 | * access the hardware device itself. | ||
278 | * We just set the mode and remove it from the lists. | ||
279 | */ | ||
280 | static void tick_shutdown(unsigned int *cpup) | ||
281 | { | ||
282 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); | ||
283 | struct clock_event_device *dev = td->evtdev; | ||
284 | unsigned long flags; | ||
285 | |||
286 | spin_lock_irqsave(&tick_device_lock, flags); | ||
287 | td->mode = TICKDEV_MODE_PERIODIC; | ||
288 | if (dev) { | ||
289 | /* | ||
290 | * Prevent that the clock events layer tries to call | ||
291 | * the set mode function! | ||
292 | */ | ||
293 | dev->mode = CLOCK_EVT_MODE_UNUSED; | ||
294 | clockevents_exchange_device(dev, NULL); | ||
295 | td->evtdev = NULL; | ||
296 | } | ||
297 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * Notification about clock event devices | ||
302 | */ | ||
303 | static int tick_notify(struct notifier_block *nb, unsigned long reason, | ||
304 | void *dev) | ||
305 | { | ||
306 | switch (reason) { | ||
307 | |||
308 | case CLOCK_EVT_NOTIFY_ADD: | ||
309 | return tick_check_new_device(dev); | ||
310 | |||
311 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
312 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
313 | tick_broadcast_on_off(reason, dev); | ||
314 | break; | ||
315 | |||
316 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
317 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
318 | tick_broadcast_oneshot_control(reason); | ||
319 | break; | ||
320 | |||
321 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
322 | tick_shutdown_broadcast_oneshot(dev); | ||
323 | tick_shutdown_broadcast(dev); | ||
324 | tick_shutdown(dev); | ||
325 | break; | ||
326 | |||
327 | default: | ||
328 | break; | ||
329 | } | ||
330 | |||
331 | return NOTIFY_OK; | ||
332 | } | ||
333 | |||
334 | static struct notifier_block tick_notifier = { | ||
335 | .notifier_call = tick_notify, | ||
336 | }; | ||
337 | |||
338 | /** | ||
339 | * tick_init - initialize the tick control | ||
340 | * | ||
341 | * Register the notifier with the clockevents framework | ||
342 | */ | ||
343 | void __init tick_init(void) | ||
344 | { | ||
345 | clockevents_register_notifier(&tick_notifier); | ||
346 | } | ||
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h new file mode 100644 index 000000000000..54861a0f29ff --- /dev/null +++ b/kernel/time/tick-internal.h | |||
@@ -0,0 +1,110 @@ | |||
1 | /* | ||
2 | * tick internal variable and functions used by low/high res code | ||
3 | */ | ||
4 | DECLARE_PER_CPU(struct tick_device, tick_cpu_device); | ||
5 | extern spinlock_t tick_device_lock; | ||
6 | extern ktime_t tick_next_period; | ||
7 | extern ktime_t tick_period; | ||
8 | |||
9 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); | ||
10 | extern void tick_handle_periodic(struct clock_event_device *dev); | ||
11 | |||
12 | /* | ||
13 | * NO_HZ / high resolution timer shared code | ||
14 | */ | ||
15 | #ifdef CONFIG_TICK_ONESHOT | ||
16 | extern void tick_setup_oneshot(struct clock_event_device *newdev, | ||
17 | void (*handler)(struct clock_event_device *), | ||
18 | ktime_t nextevt); | ||
19 | extern int tick_program_event(ktime_t expires, int force); | ||
20 | extern void tick_oneshot_notify(void); | ||
21 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); | ||
22 | |||
23 | # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
24 | extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); | ||
25 | extern void tick_broadcast_oneshot_control(unsigned long reason); | ||
26 | extern void tick_broadcast_switch_to_oneshot(void); | ||
27 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | ||
28 | # else /* BROADCAST */ | ||
29 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
30 | { | ||
31 | BUG(); | ||
32 | } | ||
33 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | ||
34 | static inline void tick_broadcast_switch_to_oneshot(void) { } | ||
35 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | ||
36 | # endif /* !BROADCAST */ | ||
37 | |||
38 | #else /* !ONESHOT */ | ||
39 | static inline | ||
40 | void tick_setup_oneshot(struct clock_event_device *newdev, | ||
41 | void (*handler)(struct clock_event_device *), | ||
42 | ktime_t nextevt) | ||
43 | { | ||
44 | BUG(); | ||
45 | } | ||
46 | static inline int tick_program_event(ktime_t expires, int force) | ||
47 | { | ||
48 | return 0; | ||
49 | } | ||
50 | static inline void tick_oneshot_notify(void) { } | ||
51 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
52 | { | ||
53 | BUG(); | ||
54 | } | ||
55 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | ||
56 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | ||
57 | #endif /* !TICK_ONESHOT */ | ||
58 | |||
59 | /* | ||
60 | * Broadcasting support | ||
61 | */ | ||
62 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
63 | extern int tick_do_broadcast(cpumask_t mask); | ||
64 | |||
65 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); | ||
66 | extern int tick_check_broadcast_device(struct clock_event_device *dev); | ||
67 | extern int tick_is_broadcast_device(struct clock_event_device *dev); | ||
68 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); | ||
69 | extern void tick_shutdown_broadcast(unsigned int *cpup); | ||
70 | |||
71 | extern void | ||
72 | tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); | ||
73 | |||
74 | #else /* !BROADCAST */ | ||
75 | |||
76 | static inline int tick_check_broadcast_device(struct clock_event_device *dev) | ||
77 | { | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | static inline int tick_device_uses_broadcast(struct clock_event_device *dev, | ||
86 | int cpu) | ||
87 | { | ||
88 | return 0; | ||
89 | } | ||
90 | static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } | ||
91 | static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } | ||
92 | static inline void tick_shutdown_broadcast(unsigned int *cpup) { } | ||
93 | |||
94 | /* | ||
95 | * Set the periodic handler in non broadcast mode | ||
96 | */ | ||
97 | static inline void tick_set_periodic_handler(struct clock_event_device *dev, | ||
98 | int broadcast) | ||
99 | { | ||
100 | dev->event_handler = tick_handle_periodic; | ||
101 | } | ||
102 | #endif /* !BROADCAST */ | ||
103 | |||
104 | /* | ||
105 | * Check, if the device is functional or a dummy for broadcast | ||
106 | */ | ||
107 | static inline int tick_device_is_functional(struct clock_event_device *dev) | ||
108 | { | ||
109 | return !(dev->features & CLOCK_EVT_FEAT_DUMMY); | ||
110 | } | ||
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c new file mode 100644 index 000000000000..2e8b7ff863cc --- /dev/null +++ b/kernel/time/tick-oneshot.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-oneshot.c | ||
3 | * | ||
4 | * This file contains functions which manage high resolution tick | ||
5 | * related events. | ||
6 | * | ||
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
10 | * | ||
11 | * This code is licenced under the GPL version 2. For details see | ||
12 | * kernel-base/COPYING. | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/profile.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/tick.h> | ||
22 | |||
23 | #include "tick-internal.h" | ||
24 | |||
25 | /** | ||
26 | * tick_program_event | ||
27 | */ | ||
28 | int tick_program_event(ktime_t expires, int force) | ||
29 | { | ||
30 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | ||
31 | ktime_t now = ktime_get(); | ||
32 | |||
33 | while (1) { | ||
34 | int ret = clockevents_program_event(dev, expires, now); | ||
35 | |||
36 | if (!ret || !force) | ||
37 | return ret; | ||
38 | now = ktime_get(); | ||
39 | expires = ktime_add(now, ktime_set(0, dev->min_delta_ns)); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) | ||
45 | */ | ||
46 | void tick_setup_oneshot(struct clock_event_device *newdev, | ||
47 | void (*handler)(struct clock_event_device *), | ||
48 | ktime_t next_event) | ||
49 | { | ||
50 | newdev->event_handler = handler; | ||
51 | clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); | ||
52 | clockevents_program_event(newdev, next_event, ktime_get()); | ||
53 | } | ||
54 | |||
55 | /** | ||
56 | * tick_switch_to_oneshot - switch to oneshot mode | ||
57 | */ | ||
58 | int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) | ||
59 | { | ||
60 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | ||
61 | struct clock_event_device *dev = td->evtdev; | ||
62 | |||
63 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || | ||
64 | !tick_device_is_functional(dev)) | ||
65 | return -EINVAL; | ||
66 | |||
67 | td->mode = TICKDEV_MODE_ONESHOT; | ||
68 | dev->event_handler = handler; | ||
69 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
70 | tick_broadcast_switch_to_oneshot(); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
75 | /** | ||
76 | * tick_init_highres - switch to high resolution mode | ||
77 | * | ||
78 | * Called with interrupts disabled. | ||
79 | */ | ||
80 | int tick_init_highres(void) | ||
81 | { | ||
82 | return tick_switch_to_oneshot(hrtimer_interrupt); | ||
83 | } | ||
84 | #endif | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c new file mode 100644 index 000000000000..95e41f7f850b --- /dev/null +++ b/kernel/time/tick-sched.c | |||
@@ -0,0 +1,563 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/tick-sched.c | ||
3 | * | ||
4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | ||
7 | * | ||
8 | * No idle tick implementation for low and high resolution timers | ||
9 | * | ||
10 | * Started by: Thomas Gleixner and Ingo Molnar | ||
11 | * | ||
12 | * For licencing details see kernel-base/COPYING | ||
13 | */ | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/hrtimer.h> | ||
17 | #include <linux/interrupt.h> | ||
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <linux/profile.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/tick.h> | ||
23 | |||
24 | #include "tick-internal.h" | ||
25 | |||
26 | /* | ||
27 | * Per cpu nohz control structure | ||
28 | */ | ||
29 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | ||
30 | |||
31 | /* | ||
32 | * The time, when the last jiffy update happened. Protected by xtime_lock. | ||
33 | */ | ||
34 | static ktime_t last_jiffies_update; | ||
35 | |||
36 | struct tick_sched *tick_get_tick_sched(int cpu) | ||
37 | { | ||
38 | return &per_cpu(tick_cpu_sched, cpu); | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Must be called with interrupts disabled ! | ||
43 | */ | ||
44 | static void tick_do_update_jiffies64(ktime_t now) | ||
45 | { | ||
46 | unsigned long ticks = 0; | ||
47 | ktime_t delta; | ||
48 | |||
49 | /* Reevalute with xtime_lock held */ | ||
50 | write_seqlock(&xtime_lock); | ||
51 | |||
52 | delta = ktime_sub(now, last_jiffies_update); | ||
53 | if (delta.tv64 >= tick_period.tv64) { | ||
54 | |||
55 | delta = ktime_sub(delta, tick_period); | ||
56 | last_jiffies_update = ktime_add(last_jiffies_update, | ||
57 | tick_period); | ||
58 | |||
59 | /* Slow path for long timeouts */ | ||
60 | if (unlikely(delta.tv64 >= tick_period.tv64)) { | ||
61 | s64 incr = ktime_to_ns(tick_period); | ||
62 | |||
63 | ticks = ktime_divns(delta, incr); | ||
64 | |||
65 | last_jiffies_update = ktime_add_ns(last_jiffies_update, | ||
66 | incr * ticks); | ||
67 | } | ||
68 | do_timer(++ticks); | ||
69 | } | ||
70 | write_sequnlock(&xtime_lock); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Initialize and return retrieve the jiffies update. | ||
75 | */ | ||
76 | static ktime_t tick_init_jiffy_update(void) | ||
77 | { | ||
78 | ktime_t period; | ||
79 | |||
80 | write_seqlock(&xtime_lock); | ||
81 | /* Did we start the jiffies update yet ? */ | ||
82 | if (last_jiffies_update.tv64 == 0) | ||
83 | last_jiffies_update = tick_next_period; | ||
84 | period = last_jiffies_update; | ||
85 | write_sequnlock(&xtime_lock); | ||
86 | return period; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * NOHZ - aka dynamic tick functionality | ||
91 | */ | ||
92 | #ifdef CONFIG_NO_HZ | ||
93 | /* | ||
94 | * NO HZ enabled ? | ||
95 | */ | ||
96 | static int tick_nohz_enabled __read_mostly = 1; | ||
97 | |||
98 | /* | ||
99 | * Enable / Disable tickless mode | ||
100 | */ | ||
101 | static int __init setup_tick_nohz(char *str) | ||
102 | { | ||
103 | if (!strcmp(str, "off")) | ||
104 | tick_nohz_enabled = 0; | ||
105 | else if (!strcmp(str, "on")) | ||
106 | tick_nohz_enabled = 1; | ||
107 | else | ||
108 | return 0; | ||
109 | return 1; | ||
110 | } | ||
111 | |||
112 | __setup("nohz=", setup_tick_nohz); | ||
113 | |||
114 | /** | ||
115 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted | ||
116 | * | ||
117 | * Called from interrupt entry when the CPU was idle | ||
118 | * | ||
119 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies | ||
120 | * must be updated. Otherwise an interrupt handler could use a stale jiffy | ||
121 | * value. We do this unconditionally on any cpu, as we don't know whether the | ||
122 | * cpu, which has the update task assigned is in a long sleep. | ||
123 | */ | ||
124 | void tick_nohz_update_jiffies(void) | ||
125 | { | ||
126 | int cpu = smp_processor_id(); | ||
127 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
128 | unsigned long flags; | ||
129 | ktime_t now; | ||
130 | |||
131 | if (!ts->tick_stopped) | ||
132 | return; | ||
133 | |||
134 | cpu_clear(cpu, nohz_cpu_mask); | ||
135 | now = ktime_get(); | ||
136 | |||
137 | local_irq_save(flags); | ||
138 | tick_do_update_jiffies64(now); | ||
139 | local_irq_restore(flags); | ||
140 | } | ||
141 | |||
142 | /** | ||
143 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task | ||
144 | * | ||
145 | * When the next event is more than a tick into the future, stop the idle tick | ||
146 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
147 | * just interrupted by an interrupt which did not cause a reschedule. | ||
148 | */ | ||
149 | void tick_nohz_stop_sched_tick(void) | ||
150 | { | ||
151 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | ||
152 | struct tick_sched *ts; | ||
153 | ktime_t last_update, expires, now, delta; | ||
154 | int cpu; | ||
155 | |||
156 | local_irq_save(flags); | ||
157 | |||
158 | cpu = smp_processor_id(); | ||
159 | ts = &per_cpu(tick_cpu_sched, cpu); | ||
160 | |||
161 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
162 | goto end; | ||
163 | |||
164 | if (need_resched()) | ||
165 | goto end; | ||
166 | |||
167 | cpu = smp_processor_id(); | ||
168 | BUG_ON(local_softirq_pending()); | ||
169 | |||
170 | now = ktime_get(); | ||
171 | /* | ||
172 | * When called from irq_exit we need to account the idle sleep time | ||
173 | * correctly. | ||
174 | */ | ||
175 | if (ts->tick_stopped) { | ||
176 | delta = ktime_sub(now, ts->idle_entrytime); | ||
177 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
178 | } | ||
179 | |||
180 | ts->idle_entrytime = now; | ||
181 | ts->idle_calls++; | ||
182 | |||
183 | /* Read jiffies and the time when jiffies were updated last */ | ||
184 | do { | ||
185 | seq = read_seqbegin(&xtime_lock); | ||
186 | last_update = last_jiffies_update; | ||
187 | last_jiffies = jiffies; | ||
188 | } while (read_seqretry(&xtime_lock, seq)); | ||
189 | |||
190 | /* Get the next timer wheel timer */ | ||
191 | next_jiffies = get_next_timer_interrupt(last_jiffies); | ||
192 | delta_jiffies = next_jiffies - last_jiffies; | ||
193 | |||
194 | /* | ||
195 | * Do not stop the tick, if we are only one off | ||
196 | * or if the cpu is required for rcu | ||
197 | */ | ||
198 | if (!ts->tick_stopped && (delta_jiffies == 1 || rcu_needs_cpu(cpu))) | ||
199 | goto out; | ||
200 | |||
201 | /* Schedule the tick, if we are at least one jiffie off */ | ||
202 | if ((long)delta_jiffies >= 1) { | ||
203 | |||
204 | if (rcu_needs_cpu(cpu)) | ||
205 | delta_jiffies = 1; | ||
206 | else | ||
207 | cpu_set(cpu, nohz_cpu_mask); | ||
208 | /* | ||
209 | * nohz_stop_sched_tick can be called several times before | ||
210 | * the nohz_restart_sched_tick is called. This happens when | ||
211 | * interrupts arrive which do not cause a reschedule. In the | ||
212 | * first call we save the current tick time, so we can restart | ||
213 | * the scheduler tick in nohz_restart_sched_tick. | ||
214 | */ | ||
215 | if (!ts->tick_stopped) { | ||
216 | ts->idle_tick = ts->sched_timer.expires; | ||
217 | ts->tick_stopped = 1; | ||
218 | ts->idle_jiffies = last_jiffies; | ||
219 | } | ||
220 | /* | ||
221 | * calculate the expiry time for the next timer wheel | ||
222 | * timer | ||
223 | */ | ||
224 | expires = ktime_add_ns(last_update, tick_period.tv64 * | ||
225 | delta_jiffies); | ||
226 | ts->idle_expires = expires; | ||
227 | ts->idle_sleeps++; | ||
228 | |||
229 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
230 | hrtimer_start(&ts->sched_timer, expires, | ||
231 | HRTIMER_MODE_ABS); | ||
232 | /* Check, if the timer was already in the past */ | ||
233 | if (hrtimer_active(&ts->sched_timer)) | ||
234 | goto out; | ||
235 | } else if(!tick_program_event(expires, 0)) | ||
236 | goto out; | ||
237 | /* | ||
238 | * We are past the event already. So we crossed a | ||
239 | * jiffie boundary. Update jiffies and raise the | ||
240 | * softirq. | ||
241 | */ | ||
242 | tick_do_update_jiffies64(ktime_get()); | ||
243 | cpu_clear(cpu, nohz_cpu_mask); | ||
244 | } | ||
245 | raise_softirq_irqoff(TIMER_SOFTIRQ); | ||
246 | out: | ||
247 | ts->next_jiffies = next_jiffies; | ||
248 | ts->last_jiffies = last_jiffies; | ||
249 | end: | ||
250 | local_irq_restore(flags); | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * nohz_restart_sched_tick - restart the idle tick from the idle task | ||
255 | * | ||
256 | * Restart the idle tick when the CPU is woken up from idle | ||
257 | */ | ||
258 | void tick_nohz_restart_sched_tick(void) | ||
259 | { | ||
260 | int cpu = smp_processor_id(); | ||
261 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
262 | unsigned long ticks; | ||
263 | ktime_t now, delta; | ||
264 | |||
265 | if (!ts->tick_stopped) | ||
266 | return; | ||
267 | |||
268 | /* Update jiffies first */ | ||
269 | now = ktime_get(); | ||
270 | |||
271 | local_irq_disable(); | ||
272 | tick_do_update_jiffies64(now); | ||
273 | cpu_clear(cpu, nohz_cpu_mask); | ||
274 | |||
275 | /* Account the idle time */ | ||
276 | delta = ktime_sub(now, ts->idle_entrytime); | ||
277 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
278 | |||
279 | /* | ||
280 | * We stopped the tick in idle. Update process times would miss the | ||
281 | * time we slept as update_process_times does only a 1 tick | ||
282 | * accounting. Enforce that this is accounted to idle ! | ||
283 | */ | ||
284 | ticks = jiffies - ts->idle_jiffies; | ||
285 | /* | ||
286 | * We might be one off. Do not randomly account a huge number of ticks! | ||
287 | */ | ||
288 | if (ticks && ticks < LONG_MAX) { | ||
289 | add_preempt_count(HARDIRQ_OFFSET); | ||
290 | account_system_time(current, HARDIRQ_OFFSET, | ||
291 | jiffies_to_cputime(ticks)); | ||
292 | sub_preempt_count(HARDIRQ_OFFSET); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Cancel the scheduled timer and restore the tick | ||
297 | */ | ||
298 | ts->tick_stopped = 0; | ||
299 | hrtimer_cancel(&ts->sched_timer); | ||
300 | ts->sched_timer.expires = ts->idle_tick; | ||
301 | |||
302 | while (1) { | ||
303 | /* Forward the time to expire in the future */ | ||
304 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
305 | |||
306 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
307 | hrtimer_start(&ts->sched_timer, | ||
308 | ts->sched_timer.expires, | ||
309 | HRTIMER_MODE_ABS); | ||
310 | /* Check, if the timer was already in the past */ | ||
311 | if (hrtimer_active(&ts->sched_timer)) | ||
312 | break; | ||
313 | } else { | ||
314 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
315 | break; | ||
316 | } | ||
317 | /* Update jiffies and reread time */ | ||
318 | tick_do_update_jiffies64(now); | ||
319 | now = ktime_get(); | ||
320 | } | ||
321 | local_irq_enable(); | ||
322 | } | ||
323 | |||
324 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | ||
325 | { | ||
326 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
327 | return tick_program_event(ts->sched_timer.expires, 0); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * The nohz low res interrupt handler | ||
332 | */ | ||
333 | static void tick_nohz_handler(struct clock_event_device *dev) | ||
334 | { | ||
335 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
336 | struct pt_regs *regs = get_irq_regs(); | ||
337 | ktime_t now = ktime_get(); | ||
338 | |||
339 | dev->next_event.tv64 = KTIME_MAX; | ||
340 | |||
341 | /* Check, if the jiffies need an update */ | ||
342 | tick_do_update_jiffies64(now); | ||
343 | |||
344 | /* | ||
345 | * When we are idle and the tick is stopped, we have to touch | ||
346 | * the watchdog as we might not schedule for a really long | ||
347 | * time. This happens on complete idle SMP systems while | ||
348 | * waiting on the login prompt. We also increment the "start | ||
349 | * of idle" jiffy stamp so the idle accounting adjustment we | ||
350 | * do when we go busy again does not account too much ticks. | ||
351 | */ | ||
352 | if (ts->tick_stopped) { | ||
353 | touch_softlockup_watchdog(); | ||
354 | ts->idle_jiffies++; | ||
355 | } | ||
356 | |||
357 | update_process_times(user_mode(regs)); | ||
358 | profile_tick(CPU_PROFILING); | ||
359 | |||
360 | /* Do not restart, when we are in the idle loop */ | ||
361 | if (ts->tick_stopped) | ||
362 | return; | ||
363 | |||
364 | while (tick_nohz_reprogram(ts, now)) { | ||
365 | now = ktime_get(); | ||
366 | tick_do_update_jiffies64(now); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | /** | ||
371 | * tick_nohz_switch_to_nohz - switch to nohz mode | ||
372 | */ | ||
373 | static void tick_nohz_switch_to_nohz(void) | ||
374 | { | ||
375 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
376 | ktime_t next; | ||
377 | |||
378 | if (!tick_nohz_enabled) | ||
379 | return; | ||
380 | |||
381 | local_irq_disable(); | ||
382 | if (tick_switch_to_oneshot(tick_nohz_handler)) { | ||
383 | local_irq_enable(); | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | ts->nohz_mode = NOHZ_MODE_LOWRES; | ||
388 | |||
389 | /* | ||
390 | * Recycle the hrtimer in ts, so we can share the | ||
391 | * hrtimer_forward with the highres code. | ||
392 | */ | ||
393 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
394 | /* Get the next period */ | ||
395 | next = tick_init_jiffy_update(); | ||
396 | |||
397 | for (;;) { | ||
398 | ts->sched_timer.expires = next; | ||
399 | if (!tick_program_event(next, 0)) | ||
400 | break; | ||
401 | next = ktime_add(next, tick_period); | ||
402 | } | ||
403 | local_irq_enable(); | ||
404 | |||
405 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", | ||
406 | smp_processor_id()); | ||
407 | } | ||
408 | |||
409 | #else | ||
410 | |||
411 | static inline void tick_nohz_switch_to_nohz(void) { } | ||
412 | |||
413 | #endif /* NO_HZ */ | ||
414 | |||
415 | /* | ||
416 | * High resolution timer specific code | ||
417 | */ | ||
418 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
419 | /* | ||
420 | * We rearm the timer until we get disabled by the idle code | ||
421 | * Called with interrupts disabled and timer->base->cpu_base->lock held. | ||
422 | */ | ||
423 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | ||
424 | { | ||
425 | struct tick_sched *ts = | ||
426 | container_of(timer, struct tick_sched, sched_timer); | ||
427 | struct hrtimer_cpu_base *base = timer->base->cpu_base; | ||
428 | struct pt_regs *regs = get_irq_regs(); | ||
429 | ktime_t now = ktime_get(); | ||
430 | |||
431 | /* Check, if the jiffies need an update */ | ||
432 | tick_do_update_jiffies64(now); | ||
433 | |||
434 | /* | ||
435 | * Do not call, when we are not in irq context and have | ||
436 | * no valid regs pointer | ||
437 | */ | ||
438 | if (regs) { | ||
439 | /* | ||
440 | * When we are idle and the tick is stopped, we have to touch | ||
441 | * the watchdog as we might not schedule for a really long | ||
442 | * time. This happens on complete idle SMP systems while | ||
443 | * waiting on the login prompt. We also increment the "start of | ||
444 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
445 | * when we go busy again does not account too much ticks. | ||
446 | */ | ||
447 | if (ts->tick_stopped) { | ||
448 | touch_softlockup_watchdog(); | ||
449 | ts->idle_jiffies++; | ||
450 | } | ||
451 | /* | ||
452 | * update_process_times() might take tasklist_lock, hence | ||
453 | * drop the base lock. sched-tick hrtimers are per-CPU and | ||
454 | * never accessible by userspace APIs, so this is safe to do. | ||
455 | */ | ||
456 | spin_unlock(&base->lock); | ||
457 | update_process_times(user_mode(regs)); | ||
458 | profile_tick(CPU_PROFILING); | ||
459 | spin_lock(&base->lock); | ||
460 | } | ||
461 | |||
462 | /* Do not restart, when we are in the idle loop */ | ||
463 | if (ts->tick_stopped) | ||
464 | return HRTIMER_NORESTART; | ||
465 | |||
466 | hrtimer_forward(timer, now, tick_period); | ||
467 | |||
468 | return HRTIMER_RESTART; | ||
469 | } | ||
470 | |||
471 | /** | ||
472 | * tick_setup_sched_timer - setup the tick emulation timer | ||
473 | */ | ||
474 | void tick_setup_sched_timer(void) | ||
475 | { | ||
476 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
477 | ktime_t now = ktime_get(); | ||
478 | |||
479 | /* | ||
480 | * Emulate tick processing via per-CPU hrtimers: | ||
481 | */ | ||
482 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
483 | ts->sched_timer.function = tick_sched_timer; | ||
484 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
485 | |||
486 | /* Get the next period */ | ||
487 | ts->sched_timer.expires = tick_init_jiffy_update(); | ||
488 | |||
489 | for (;;) { | ||
490 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
491 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, | ||
492 | HRTIMER_MODE_ABS); | ||
493 | /* Check, if the timer was already in the past */ | ||
494 | if (hrtimer_active(&ts->sched_timer)) | ||
495 | break; | ||
496 | now = ktime_get(); | ||
497 | } | ||
498 | |||
499 | #ifdef CONFIG_NO_HZ | ||
500 | if (tick_nohz_enabled) | ||
501 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | ||
502 | #endif | ||
503 | } | ||
504 | |||
505 | void tick_cancel_sched_timer(int cpu) | ||
506 | { | ||
507 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
508 | |||
509 | if (ts->sched_timer.base) | ||
510 | hrtimer_cancel(&ts->sched_timer); | ||
511 | ts->tick_stopped = 0; | ||
512 | ts->nohz_mode = NOHZ_MODE_INACTIVE; | ||
513 | } | ||
514 | #endif /* HIGH_RES_TIMERS */ | ||
515 | |||
516 | /** | ||
517 | * Async notification about clocksource changes | ||
518 | */ | ||
519 | void tick_clock_notify(void) | ||
520 | { | ||
521 | int cpu; | ||
522 | |||
523 | for_each_possible_cpu(cpu) | ||
524 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * Async notification about clock event changes | ||
529 | */ | ||
530 | void tick_oneshot_notify(void) | ||
531 | { | ||
532 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
533 | |||
534 | set_bit(0, &ts->check_clocks); | ||
535 | } | ||
536 | |||
537 | /** | ||
538 | * Check, if a change happened, which makes oneshot possible. | ||
539 | * | ||
540 | * Called cyclic from the hrtimer softirq (driven by the timer | ||
541 | * softirq) allow_nohz signals, that we can switch into low-res nohz | ||
542 | * mode, because high resolution timers are disabled (either compile | ||
543 | * or runtime). | ||
544 | */ | ||
545 | int tick_check_oneshot_change(int allow_nohz) | ||
546 | { | ||
547 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
548 | |||
549 | if (!test_and_clear_bit(0, &ts->check_clocks)) | ||
550 | return 0; | ||
551 | |||
552 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | ||
553 | return 0; | ||
554 | |||
555 | if (!timekeeping_is_continuous() || !tick_is_oneshot_available()) | ||
556 | return 0; | ||
557 | |||
558 | if (!allow_nohz) | ||
559 | return 1; | ||
560 | |||
561 | tick_nohz_switch_to_nohz(); | ||
562 | return 0; | ||
563 | } | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c new file mode 100644 index 000000000000..f82c635c3d5c --- /dev/null +++ b/kernel/time/timer_list.c | |||
@@ -0,0 +1,287 @@ | |||
1 | /* | ||
2 | * kernel/time/timer_list.c | ||
3 | * | ||
4 | * List pending timers | ||
5 | * | ||
6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/proc_fs.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/spinlock.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/seq_file.h> | ||
18 | #include <linux/kallsyms.h> | ||
19 | #include <linux/tick.h> | ||
20 | |||
21 | #include <asm/uaccess.h> | ||
22 | |||
23 | typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); | ||
24 | |||
25 | DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | ||
26 | |||
27 | /* | ||
28 | * This allows printing both to /proc/timer_list and | ||
29 | * to the console (on SysRq-Q): | ||
30 | */ | ||
31 | #define SEQ_printf(m, x...) \ | ||
32 | do { \ | ||
33 | if (m) \ | ||
34 | seq_printf(m, x); \ | ||
35 | else \ | ||
36 | printk(x); \ | ||
37 | } while (0) | ||
38 | |||
39 | static void print_name_offset(struct seq_file *m, void *sym) | ||
40 | { | ||
41 | unsigned long addr = (unsigned long)sym; | ||
42 | char namebuf[KSYM_NAME_LEN+1]; | ||
43 | unsigned long size, offset; | ||
44 | const char *sym_name; | ||
45 | char *modname; | ||
46 | |||
47 | sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | ||
48 | if (sym_name) | ||
49 | SEQ_printf(m, "%s", sym_name); | ||
50 | else | ||
51 | SEQ_printf(m, "<%p>", sym); | ||
52 | } | ||
53 | |||
54 | static void | ||
55 | print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | ||
56 | { | ||
57 | #ifdef CONFIG_TIMER_STATS | ||
58 | char tmp[TASK_COMM_LEN + 1]; | ||
59 | #endif | ||
60 | SEQ_printf(m, " #%d: ", idx); | ||
61 | print_name_offset(m, timer); | ||
62 | SEQ_printf(m, ", "); | ||
63 | print_name_offset(m, timer->function); | ||
64 | SEQ_printf(m, ", S:%02lx", timer->state); | ||
65 | #ifdef CONFIG_TIMER_STATS | ||
66 | SEQ_printf(m, ", "); | ||
67 | print_name_offset(m, timer->start_site); | ||
68 | memcpy(tmp, timer->start_comm, TASK_COMM_LEN); | ||
69 | tmp[TASK_COMM_LEN] = 0; | ||
70 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | ||
71 | #endif | ||
72 | SEQ_printf(m, "\n"); | ||
73 | SEQ_printf(m, " # expires at %Ld nsecs [in %Ld nsecs]\n", | ||
74 | (unsigned long long)ktime_to_ns(timer->expires), | ||
75 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); | ||
76 | } | ||
77 | |||
78 | static void | ||
79 | print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, | ||
80 | u64 now) | ||
81 | { | ||
82 | struct hrtimer *timer, tmp; | ||
83 | unsigned long next = 0, i; | ||
84 | struct rb_node *curr; | ||
85 | unsigned long flags; | ||
86 | |||
87 | next_one: | ||
88 | i = 0; | ||
89 | spin_lock_irqsave(&base->cpu_base->lock, flags); | ||
90 | |||
91 | curr = base->first; | ||
92 | /* | ||
93 | * Crude but we have to do this O(N*N) thing, because | ||
94 | * we have to unlock the base when printing: | ||
95 | */ | ||
96 | while (curr && i < next) { | ||
97 | curr = rb_next(curr); | ||
98 | i++; | ||
99 | } | ||
100 | |||
101 | if (curr) { | ||
102 | |||
103 | timer = rb_entry(curr, struct hrtimer, node); | ||
104 | tmp = *timer; | ||
105 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | ||
106 | |||
107 | print_timer(m, &tmp, i, now); | ||
108 | next++; | ||
109 | goto next_one; | ||
110 | } | ||
111 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | ||
112 | } | ||
113 | |||
114 | static void | ||
115 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | ||
116 | { | ||
117 | SEQ_printf(m, " .index: %d\n", | ||
118 | base->index); | ||
119 | SEQ_printf(m, " .resolution: %Ld nsecs\n", | ||
120 | (unsigned long long)ktime_to_ns(base->resolution)); | ||
121 | SEQ_printf(m, " .get_time: "); | ||
122 | print_name_offset(m, base->get_time); | ||
123 | SEQ_printf(m, "\n"); | ||
124 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
125 | SEQ_printf(m, " .offset: %Ld nsecs\n", | ||
126 | ktime_to_ns(base->offset)); | ||
127 | #endif | ||
128 | SEQ_printf(m, "active timers:\n"); | ||
129 | print_active_timers(m, base, now); | ||
130 | } | ||
131 | |||
132 | static void print_cpu(struct seq_file *m, int cpu, u64 now) | ||
133 | { | ||
134 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); | ||
135 | int i; | ||
136 | |||
137 | SEQ_printf(m, "\ncpu: %d\n", cpu); | ||
138 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | ||
139 | SEQ_printf(m, " clock %d:\n", i); | ||
140 | print_base(m, cpu_base->clock_base + i, now); | ||
141 | } | ||
142 | #define P(x) \ | ||
143 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(cpu_base->x)) | ||
144 | #define P_ns(x) \ | ||
145 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | ||
146 | (u64)(ktime_to_ns(cpu_base->x))) | ||
147 | |||
148 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
149 | P_ns(expires_next); | ||
150 | P(hres_active); | ||
151 | P(nr_events); | ||
152 | #endif | ||
153 | #undef P | ||
154 | #undef P_ns | ||
155 | |||
156 | #ifdef CONFIG_TICK_ONESHOT | ||
157 | # define P(x) \ | ||
158 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(ts->x)) | ||
159 | # define P_ns(x) \ | ||
160 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | ||
161 | (u64)(ktime_to_ns(ts->x))) | ||
162 | { | ||
163 | struct tick_sched *ts = tick_get_tick_sched(cpu); | ||
164 | P(nohz_mode); | ||
165 | P_ns(idle_tick); | ||
166 | P(tick_stopped); | ||
167 | P(idle_jiffies); | ||
168 | P(idle_calls); | ||
169 | P(idle_sleeps); | ||
170 | P_ns(idle_entrytime); | ||
171 | P_ns(idle_sleeptime); | ||
172 | P(last_jiffies); | ||
173 | P(next_jiffies); | ||
174 | P_ns(idle_expires); | ||
175 | SEQ_printf(m, "jiffies: %Ld\n", (u64)jiffies); | ||
176 | } | ||
177 | #endif | ||
178 | |||
179 | #undef P | ||
180 | #undef P_ns | ||
181 | } | ||
182 | |||
183 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
184 | static void | ||
185 | print_tickdevice(struct seq_file *m, struct tick_device *td) | ||
186 | { | ||
187 | struct clock_event_device *dev = td->evtdev; | ||
188 | |||
189 | SEQ_printf(m, "\nTick Device: mode: %d\n", td->mode); | ||
190 | |||
191 | SEQ_printf(m, "Clock Event Device: "); | ||
192 | if (!dev) { | ||
193 | SEQ_printf(m, "<NULL>\n"); | ||
194 | return; | ||
195 | } | ||
196 | SEQ_printf(m, "%s\n", dev->name); | ||
197 | SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); | ||
198 | SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); | ||
199 | SEQ_printf(m, " mult: %ld\n", dev->mult); | ||
200 | SEQ_printf(m, " shift: %d\n", dev->shift); | ||
201 | SEQ_printf(m, " mode: %d\n", dev->mode); | ||
202 | SEQ_printf(m, " next_event: %Ld nsecs\n", | ||
203 | (unsigned long long) ktime_to_ns(dev->next_event)); | ||
204 | |||
205 | SEQ_printf(m, " set_next_event: "); | ||
206 | print_name_offset(m, dev->set_next_event); | ||
207 | SEQ_printf(m, "\n"); | ||
208 | |||
209 | SEQ_printf(m, " set_mode: "); | ||
210 | print_name_offset(m, dev->set_mode); | ||
211 | SEQ_printf(m, "\n"); | ||
212 | |||
213 | SEQ_printf(m, " event_handler: "); | ||
214 | print_name_offset(m, dev->event_handler); | ||
215 | SEQ_printf(m, "\n"); | ||
216 | } | ||
217 | |||
218 | static void timer_list_show_tickdevices(struct seq_file *m) | ||
219 | { | ||
220 | int cpu; | ||
221 | |||
222 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
223 | print_tickdevice(m, tick_get_broadcast_device()); | ||
224 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", | ||
225 | tick_get_broadcast_mask()->bits[0]); | ||
226 | #ifdef CONFIG_TICK_ONESHOT | ||
227 | SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", | ||
228 | tick_get_broadcast_oneshot_mask()->bits[0]); | ||
229 | #endif | ||
230 | SEQ_printf(m, "\n"); | ||
231 | #endif | ||
232 | for_each_online_cpu(cpu) | ||
233 | print_tickdevice(m, tick_get_device(cpu)); | ||
234 | SEQ_printf(m, "\n"); | ||
235 | } | ||
236 | #else | ||
237 | static void timer_list_show_tickdevices(struct seq_file *m) { } | ||
238 | #endif | ||
239 | |||
240 | static int timer_list_show(struct seq_file *m, void *v) | ||
241 | { | ||
242 | u64 now = ktime_to_ns(ktime_get()); | ||
243 | int cpu; | ||
244 | |||
245 | SEQ_printf(m, "Timer List Version: v0.3\n"); | ||
246 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | ||
247 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | ||
248 | |||
249 | for_each_online_cpu(cpu) | ||
250 | print_cpu(m, cpu, now); | ||
251 | |||
252 | SEQ_printf(m, "\n"); | ||
253 | timer_list_show_tickdevices(m); | ||
254 | |||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | void sysrq_timer_list_show(void) | ||
259 | { | ||
260 | timer_list_show(NULL, NULL); | ||
261 | } | ||
262 | |||
263 | static int timer_list_open(struct inode *inode, struct file *filp) | ||
264 | { | ||
265 | return single_open(filp, timer_list_show, NULL); | ||
266 | } | ||
267 | |||
268 | static struct file_operations timer_list_fops = { | ||
269 | .open = timer_list_open, | ||
270 | .read = seq_read, | ||
271 | .llseek = seq_lseek, | ||
272 | .release = seq_release, | ||
273 | }; | ||
274 | |||
275 | static int __init init_timer_list_procfs(void) | ||
276 | { | ||
277 | struct proc_dir_entry *pe; | ||
278 | |||
279 | pe = create_proc_entry("timer_list", 0644, NULL); | ||
280 | if (!pe) | ||
281 | return -ENOMEM; | ||
282 | |||
283 | pe->proc_fops = &timer_list_fops; | ||
284 | |||
285 | return 0; | ||
286 | } | ||
287 | __initcall(init_timer_list_procfs); | ||
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c new file mode 100644 index 000000000000..1bc4882e28e0 --- /dev/null +++ b/kernel/time/timer_stats.c | |||
@@ -0,0 +1,411 @@ | |||
1 | /* | ||
2 | * kernel/time/timer_stats.c | ||
3 | * | ||
4 | * Collect timer usage statistics. | ||
5 | * | ||
6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | ||
7 | * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
8 | * | ||
9 | * timer_stats is based on timer_top, a similar functionality which was part of | ||
10 | * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the | ||
11 | * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based | ||
12 | * on dynamic allocation of the statistics entries and linear search based | ||
13 | * lookup combined with a global lock, rather than the static array, hash | ||
14 | * and per-CPU locking which is used by timer_stats. It was written for the | ||
15 | * pre hrtimer kernel code and therefore did not take hrtimers into account. | ||
16 | * Nevertheless it provided the base for the timer_stats implementation and | ||
17 | * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks | ||
18 | * for this effort. | ||
19 | * | ||
20 | * timer_top.c is | ||
21 | * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus | ||
22 | * Written by Daniel Petrini <d.pensator@gmail.com> | ||
23 | * timer_top.c was released under the GNU General Public License version 2 | ||
24 | * | ||
25 | * We export the addresses and counting of timer functions being called, | ||
26 | * the pid and cmdline from the owner process if applicable. | ||
27 | * | ||
28 | * Start/stop data collection: | ||
29 | * # echo 1[0] >/proc/timer_stats | ||
30 | * | ||
31 | * Display the information collected so far: | ||
32 | * # cat /proc/timer_stats | ||
33 | * | ||
34 | * This program is free software; you can redistribute it and/or modify | ||
35 | * it under the terms of the GNU General Public License version 2 as | ||
36 | * published by the Free Software Foundation. | ||
37 | */ | ||
38 | |||
39 | #include <linux/proc_fs.h> | ||
40 | #include <linux/module.h> | ||
41 | #include <linux/spinlock.h> | ||
42 | #include <linux/sched.h> | ||
43 | #include <linux/seq_file.h> | ||
44 | #include <linux/kallsyms.h> | ||
45 | |||
46 | #include <asm/uaccess.h> | ||
47 | |||
48 | /* | ||
49 | * This is our basic unit of interest: a timer expiry event identified | ||
50 | * by the timer, its start/expire functions and the PID of the task that | ||
51 | * started the timer. We count the number of times an event happens: | ||
52 | */ | ||
53 | struct entry { | ||
54 | /* | ||
55 | * Hash list: | ||
56 | */ | ||
57 | struct entry *next; | ||
58 | |||
59 | /* | ||
60 | * Hash keys: | ||
61 | */ | ||
62 | void *timer; | ||
63 | void *start_func; | ||
64 | void *expire_func; | ||
65 | pid_t pid; | ||
66 | |||
67 | /* | ||
68 | * Number of timeout events: | ||
69 | */ | ||
70 | unsigned long count; | ||
71 | |||
72 | /* | ||
73 | * We save the command-line string to preserve | ||
74 | * this information past task exit: | ||
75 | */ | ||
76 | char comm[TASK_COMM_LEN + 1]; | ||
77 | |||
78 | } ____cacheline_aligned_in_smp; | ||
79 | |||
80 | /* | ||
81 | * Spinlock protecting the tables - not taken during lookup: | ||
82 | */ | ||
83 | static DEFINE_SPINLOCK(table_lock); | ||
84 | |||
85 | /* | ||
86 | * Per-CPU lookup locks for fast hash lookup: | ||
87 | */ | ||
88 | static DEFINE_PER_CPU(spinlock_t, lookup_lock); | ||
89 | |||
90 | /* | ||
91 | * Mutex to serialize state changes with show-stats activities: | ||
92 | */ | ||
93 | static DEFINE_MUTEX(show_mutex); | ||
94 | |||
95 | /* | ||
96 | * Collection status, active/inactive: | ||
97 | */ | ||
98 | static int __read_mostly active; | ||
99 | |||
100 | /* | ||
101 | * Beginning/end timestamps of measurement: | ||
102 | */ | ||
103 | static ktime_t time_start, time_stop; | ||
104 | |||
105 | /* | ||
106 | * tstat entry structs only get allocated while collection is | ||
107 | * active and never freed during that time - this simplifies | ||
108 | * things quite a bit. | ||
109 | * | ||
110 | * They get freed when a new collection period is started. | ||
111 | */ | ||
112 | #define MAX_ENTRIES_BITS 10 | ||
113 | #define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) | ||
114 | |||
115 | static unsigned long nr_entries; | ||
116 | static struct entry entries[MAX_ENTRIES]; | ||
117 | |||
118 | static atomic_t overflow_count; | ||
119 | |||
120 | static void reset_entries(void) | ||
121 | { | ||
122 | nr_entries = 0; | ||
123 | memset(entries, 0, sizeof(entries)); | ||
124 | atomic_set(&overflow_count, 0); | ||
125 | } | ||
126 | |||
127 | static struct entry *alloc_entry(void) | ||
128 | { | ||
129 | if (nr_entries >= MAX_ENTRIES) | ||
130 | return NULL; | ||
131 | |||
132 | return entries + nr_entries++; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * The entries are in a hash-table, for fast lookup: | ||
137 | */ | ||
138 | #define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) | ||
139 | #define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) | ||
140 | #define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) | ||
141 | |||
142 | #define __tstat_hashfn(entry) \ | ||
143 | (((unsigned long)(entry)->timer ^ \ | ||
144 | (unsigned long)(entry)->start_func ^ \ | ||
145 | (unsigned long)(entry)->expire_func ^ \ | ||
146 | (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) | ||
147 | |||
148 | #define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) | ||
149 | |||
150 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; | ||
151 | |||
152 | static int match_entries(struct entry *entry1, struct entry *entry2) | ||
153 | { | ||
154 | return entry1->timer == entry2->timer && | ||
155 | entry1->start_func == entry2->start_func && | ||
156 | entry1->expire_func == entry2->expire_func && | ||
157 | entry1->pid == entry2->pid; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Look up whether an entry matching this item is present | ||
162 | * in the hash already. Must be called with irqs off and the | ||
163 | * lookup lock held: | ||
164 | */ | ||
165 | static struct entry *tstat_lookup(struct entry *entry, char *comm) | ||
166 | { | ||
167 | struct entry **head, *curr, *prev; | ||
168 | |||
169 | head = tstat_hashentry(entry); | ||
170 | curr = *head; | ||
171 | |||
172 | /* | ||
173 | * The fastpath is when the entry is already hashed, | ||
174 | * we do this with the lookup lock held, but with the | ||
175 | * table lock not held: | ||
176 | */ | ||
177 | while (curr) { | ||
178 | if (match_entries(curr, entry)) | ||
179 | return curr; | ||
180 | |||
181 | curr = curr->next; | ||
182 | } | ||
183 | /* | ||
184 | * Slowpath: allocate, set up and link a new hash entry: | ||
185 | */ | ||
186 | prev = NULL; | ||
187 | curr = *head; | ||
188 | |||
189 | spin_lock(&table_lock); | ||
190 | /* | ||
191 | * Make sure we have not raced with another CPU: | ||
192 | */ | ||
193 | while (curr) { | ||
194 | if (match_entries(curr, entry)) | ||
195 | goto out_unlock; | ||
196 | |||
197 | prev = curr; | ||
198 | curr = curr->next; | ||
199 | } | ||
200 | |||
201 | curr = alloc_entry(); | ||
202 | if (curr) { | ||
203 | *curr = *entry; | ||
204 | curr->count = 0; | ||
205 | memcpy(curr->comm, comm, TASK_COMM_LEN); | ||
206 | if (prev) | ||
207 | prev->next = curr; | ||
208 | else | ||
209 | *head = curr; | ||
210 | curr->next = NULL; | ||
211 | } | ||
212 | out_unlock: | ||
213 | spin_unlock(&table_lock); | ||
214 | |||
215 | return curr; | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * timer_stats_update_stats - Update the statistics for a timer. | ||
220 | * @timer: pointer to either a timer_list or a hrtimer | ||
221 | * @pid: the pid of the task which set up the timer | ||
222 | * @startf: pointer to the function which did the timer setup | ||
223 | * @timerf: pointer to the timer callback function of the timer | ||
224 | * @comm: name of the process which set up the timer | ||
225 | * | ||
226 | * When the timer is already registered, then the event counter is | ||
227 | * incremented. Otherwise the timer is registered in a free slot. | ||
228 | */ | ||
229 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | ||
230 | void *timerf, char * comm) | ||
231 | { | ||
232 | /* | ||
233 | * It doesnt matter which lock we take: | ||
234 | */ | ||
235 | spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | ||
236 | struct entry *entry, input; | ||
237 | unsigned long flags; | ||
238 | |||
239 | input.timer = timer; | ||
240 | input.start_func = startf; | ||
241 | input.expire_func = timerf; | ||
242 | input.pid = pid; | ||
243 | |||
244 | spin_lock_irqsave(lock, flags); | ||
245 | if (!active) | ||
246 | goto out_unlock; | ||
247 | |||
248 | entry = tstat_lookup(&input, comm); | ||
249 | if (likely(entry)) | ||
250 | entry->count++; | ||
251 | else | ||
252 | atomic_inc(&overflow_count); | ||
253 | |||
254 | out_unlock: | ||
255 | spin_unlock_irqrestore(lock, flags); | ||
256 | } | ||
257 | |||
258 | static void print_name_offset(struct seq_file *m, unsigned long addr) | ||
259 | { | ||
260 | char namebuf[KSYM_NAME_LEN+1]; | ||
261 | unsigned long size, offset; | ||
262 | const char *sym_name; | ||
263 | char *modname; | ||
264 | |||
265 | sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | ||
266 | if (sym_name) | ||
267 | seq_printf(m, "%s", sym_name); | ||
268 | else | ||
269 | seq_printf(m, "<%p>", (void *)addr); | ||
270 | } | ||
271 | |||
272 | static int tstats_show(struct seq_file *m, void *v) | ||
273 | { | ||
274 | struct timespec period; | ||
275 | struct entry *entry; | ||
276 | unsigned long ms; | ||
277 | long events = 0; | ||
278 | ktime_t time; | ||
279 | int i; | ||
280 | |||
281 | mutex_lock(&show_mutex); | ||
282 | /* | ||
283 | * If still active then calculate up to now: | ||
284 | */ | ||
285 | if (active) | ||
286 | time_stop = ktime_get(); | ||
287 | |||
288 | time = ktime_sub(time_stop, time_start); | ||
289 | |||
290 | period = ktime_to_timespec(time); | ||
291 | ms = period.tv_nsec / 1000000; | ||
292 | |||
293 | seq_puts(m, "Timer Stats Version: v0.1\n"); | ||
294 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | ||
295 | if (atomic_read(&overflow_count)) | ||
296 | seq_printf(m, "Overflow: %d entries\n", | ||
297 | atomic_read(&overflow_count)); | ||
298 | |||
299 | for (i = 0; i < nr_entries; i++) { | ||
300 | entry = entries + i; | ||
301 | seq_printf(m, "%4lu, %5d %-16s ", | ||
302 | entry->count, entry->pid, entry->comm); | ||
303 | |||
304 | print_name_offset(m, (unsigned long)entry->start_func); | ||
305 | seq_puts(m, " ("); | ||
306 | print_name_offset(m, (unsigned long)entry->expire_func); | ||
307 | seq_puts(m, ")\n"); | ||
308 | |||
309 | events += entry->count; | ||
310 | } | ||
311 | |||
312 | ms += period.tv_sec * 1000; | ||
313 | if (!ms) | ||
314 | ms = 1; | ||
315 | |||
316 | if (events && period.tv_sec) | ||
317 | seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, | ||
318 | events / period.tv_sec, events * 1000 / ms); | ||
319 | else | ||
320 | seq_printf(m, "%ld total events\n", events); | ||
321 | |||
322 | mutex_unlock(&show_mutex); | ||
323 | |||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * After a state change, make sure all concurrent lookup/update | ||
329 | * activities have stopped: | ||
330 | */ | ||
331 | static void sync_access(void) | ||
332 | { | ||
333 | unsigned long flags; | ||
334 | int cpu; | ||
335 | |||
336 | for_each_online_cpu(cpu) { | ||
337 | spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); | ||
338 | /* nothing */ | ||
339 | spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); | ||
340 | } | ||
341 | } | ||
342 | |||
343 | static ssize_t tstats_write(struct file *file, const char __user *buf, | ||
344 | size_t count, loff_t *offs) | ||
345 | { | ||
346 | char ctl[2]; | ||
347 | |||
348 | if (count != 2 || *offs) | ||
349 | return -EINVAL; | ||
350 | |||
351 | if (copy_from_user(ctl, buf, count)) | ||
352 | return -EFAULT; | ||
353 | |||
354 | mutex_lock(&show_mutex); | ||
355 | switch (ctl[0]) { | ||
356 | case '0': | ||
357 | if (active) { | ||
358 | active = 0; | ||
359 | time_stop = ktime_get(); | ||
360 | sync_access(); | ||
361 | } | ||
362 | break; | ||
363 | case '1': | ||
364 | if (!active) { | ||
365 | reset_entries(); | ||
366 | time_start = ktime_get(); | ||
367 | active = 1; | ||
368 | } | ||
369 | break; | ||
370 | default: | ||
371 | count = -EINVAL; | ||
372 | } | ||
373 | mutex_unlock(&show_mutex); | ||
374 | |||
375 | return count; | ||
376 | } | ||
377 | |||
378 | static int tstats_open(struct inode *inode, struct file *filp) | ||
379 | { | ||
380 | return single_open(filp, tstats_show, NULL); | ||
381 | } | ||
382 | |||
383 | static struct file_operations tstats_fops = { | ||
384 | .open = tstats_open, | ||
385 | .read = seq_read, | ||
386 | .write = tstats_write, | ||
387 | .llseek = seq_lseek, | ||
388 | .release = seq_release, | ||
389 | }; | ||
390 | |||
391 | void __init init_timer_stats(void) | ||
392 | { | ||
393 | int cpu; | ||
394 | |||
395 | for_each_possible_cpu(cpu) | ||
396 | spin_lock_init(&per_cpu(lookup_lock, cpu)); | ||
397 | } | ||
398 | |||
399 | static int __init init_tstats_procfs(void) | ||
400 | { | ||
401 | struct proc_dir_entry *pe; | ||
402 | |||
403 | pe = create_proc_entry("timer_stats", 0644, NULL); | ||
404 | if (!pe) | ||
405 | return -ENOMEM; | ||
406 | |||
407 | pe->proc_fops = &tstats_fops; | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | __initcall(init_tstats_procfs); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 4902181e10e6..cb1b86a9c52f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
36 | #include <linux/delay.h> | 36 | #include <linux/delay.h> |
37 | #include <linux/tick.h> | ||
38 | #include <linux/kallsyms.h> | ||
37 | 39 | ||
38 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
39 | #include <asm/unistd.h> | 41 | #include <asm/unistd.h> |
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
262 | list_add_tail(&timer->entry, vec); | 264 | list_add_tail(&timer->entry, vec); |
263 | } | 265 | } |
264 | 266 | ||
267 | #ifdef CONFIG_TIMER_STATS | ||
268 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | ||
269 | { | ||
270 | if (timer->start_site) | ||
271 | return; | ||
272 | |||
273 | timer->start_site = addr; | ||
274 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | ||
275 | timer->start_pid = current->pid; | ||
276 | } | ||
277 | #endif | ||
278 | |||
265 | /** | 279 | /** |
266 | * init_timer - initialize a timer. | 280 | * init_timer - initialize a timer. |
267 | * @timer: the timer to be initialized | 281 | * @timer: the timer to be initialized |
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer) | |||
273 | { | 287 | { |
274 | timer->entry.next = NULL; | 288 | timer->entry.next = NULL; |
275 | timer->base = __raw_get_cpu_var(tvec_bases); | 289 | timer->base = __raw_get_cpu_var(tvec_bases); |
290 | #ifdef CONFIG_TIMER_STATS | ||
291 | timer->start_site = NULL; | ||
292 | timer->start_pid = -1; | ||
293 | memset(timer->start_comm, 0, TASK_COMM_LEN); | ||
294 | #endif | ||
276 | } | 295 | } |
277 | EXPORT_SYMBOL(init_timer); | 296 | EXPORT_SYMBOL(init_timer); |
278 | 297 | ||
279 | static inline void detach_timer(struct timer_list *timer, | 298 | static inline void detach_timer(struct timer_list *timer, |
280 | int clear_pending) | 299 | int clear_pending) |
281 | { | 300 | { |
282 | struct list_head *entry = &timer->entry; | 301 | struct list_head *entry = &timer->entry; |
283 | 302 | ||
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
324 | unsigned long flags; | 343 | unsigned long flags; |
325 | int ret = 0; | 344 | int ret = 0; |
326 | 345 | ||
346 | timer_stats_timer_set_start_info(timer); | ||
327 | BUG_ON(!timer->function); | 347 | BUG_ON(!timer->function); |
328 | 348 | ||
329 | base = lock_timer_base(timer, &flags); | 349 | base = lock_timer_base(timer, &flags); |
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
374 | tvec_base_t *base = per_cpu(tvec_bases, cpu); | 394 | tvec_base_t *base = per_cpu(tvec_bases, cpu); |
375 | unsigned long flags; | 395 | unsigned long flags; |
376 | 396 | ||
397 | timer_stats_timer_set_start_info(timer); | ||
377 | BUG_ON(timer_pending(timer) || !timer->function); | 398 | BUG_ON(timer_pending(timer) || !timer->function); |
378 | spin_lock_irqsave(&base->lock, flags); | 399 | spin_lock_irqsave(&base->lock, flags); |
379 | timer->base = base; | 400 | timer->base = base; |
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
406 | { | 427 | { |
407 | BUG_ON(!timer->function); | 428 | BUG_ON(!timer->function); |
408 | 429 | ||
430 | timer_stats_timer_set_start_info(timer); | ||
409 | /* | 431 | /* |
410 | * This is a common optimization triggered by the | 432 | * This is a common optimization triggered by the |
411 | * networking code - if the timer is re-modified | 433 | * networking code - if the timer is re-modified |
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer) | |||
436 | unsigned long flags; | 458 | unsigned long flags; |
437 | int ret = 0; | 459 | int ret = 0; |
438 | 460 | ||
461 | timer_stats_timer_clear_start_info(timer); | ||
439 | if (timer_pending(timer)) { | 462 | if (timer_pending(timer)) { |
440 | base = lock_timer_base(timer, &flags); | 463 | base = lock_timer_base(timer, &flags); |
441 | if (timer_pending(timer)) { | 464 | if (timer_pending(timer)) { |
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
569 | fn = timer->function; | 592 | fn = timer->function; |
570 | data = timer->data; | 593 | data = timer->data; |
571 | 594 | ||
595 | timer_stats_account_timer(timer); | ||
596 | |||
572 | set_running_timer(base, timer); | 597 | set_running_timer(base, timer); |
573 | detach_timer(timer, 1); | 598 | detach_timer(timer, 1); |
574 | spin_unlock_irq(&base->lock); | 599 | spin_unlock_irq(&base->lock); |
@@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base) | |||
591 | spin_unlock_irq(&base->lock); | 616 | spin_unlock_irq(&base->lock); |
592 | } | 617 | } |
593 | 618 | ||
594 | #ifdef CONFIG_NO_IDLE_HZ | 619 | #if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) |
595 | /* | 620 | /* |
596 | * Find out when the next timer event is due to happen. This | 621 | * Find out when the next timer event is due to happen. This |
597 | * is used on S/390 to stop all activity when a cpus is idle. | 622 | * is used on S/390 to stop all activity when a cpus is idle. |
598 | * This functions needs to be called disabled. | 623 | * This functions needs to be called disabled. |
599 | */ | 624 | */ |
600 | unsigned long next_timer_interrupt(void) | 625 | static unsigned long __next_timer_interrupt(tvec_base_t *base) |
601 | { | 626 | { |
602 | tvec_base_t *base; | 627 | unsigned long timer_jiffies = base->timer_jiffies; |
603 | struct list_head *list; | 628 | unsigned long expires = timer_jiffies + (LONG_MAX >> 1); |
629 | int index, slot, array, found = 0; | ||
604 | struct timer_list *nte; | 630 | struct timer_list *nte; |
605 | unsigned long expires; | ||
606 | unsigned long hr_expires = MAX_JIFFY_OFFSET; | ||
607 | ktime_t hr_delta; | ||
608 | tvec_t *varray[4]; | 631 | tvec_t *varray[4]; |
609 | int i, j; | ||
610 | |||
611 | hr_delta = hrtimer_get_next_event(); | ||
612 | if (hr_delta.tv64 != KTIME_MAX) { | ||
613 | struct timespec tsdelta; | ||
614 | tsdelta = ktime_to_timespec(hr_delta); | ||
615 | hr_expires = timespec_to_jiffies(&tsdelta); | ||
616 | if (hr_expires < 3) | ||
617 | return hr_expires + jiffies; | ||
618 | } | ||
619 | hr_expires += jiffies; | ||
620 | |||
621 | base = __get_cpu_var(tvec_bases); | ||
622 | spin_lock(&base->lock); | ||
623 | expires = base->timer_jiffies + (LONG_MAX >> 1); | ||
624 | list = NULL; | ||
625 | 632 | ||
626 | /* Look for timer events in tv1. */ | 633 | /* Look for timer events in tv1. */ |
627 | j = base->timer_jiffies & TVR_MASK; | 634 | index = slot = timer_jiffies & TVR_MASK; |
628 | do { | 635 | do { |
629 | list_for_each_entry(nte, base->tv1.vec + j, entry) { | 636 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
637 | found = 1; | ||
630 | expires = nte->expires; | 638 | expires = nte->expires; |
631 | if (j < (base->timer_jiffies & TVR_MASK)) | 639 | /* Look at the cascade bucket(s)? */ |
632 | list = base->tv2.vec + (INDEX(0)); | 640 | if (!index || slot < index) |
633 | goto found; | 641 | goto cascade; |
642 | return expires; | ||
634 | } | 643 | } |
635 | j = (j + 1) & TVR_MASK; | 644 | slot = (slot + 1) & TVR_MASK; |
636 | } while (j != (base->timer_jiffies & TVR_MASK)); | 645 | } while (slot != index); |
646 | |||
647 | cascade: | ||
648 | /* Calculate the next cascade event */ | ||
649 | if (index) | ||
650 | timer_jiffies += TVR_SIZE - index; | ||
651 | timer_jiffies >>= TVR_BITS; | ||
637 | 652 | ||
638 | /* Check tv2-tv5. */ | 653 | /* Check tv2-tv5. */ |
639 | varray[0] = &base->tv2; | 654 | varray[0] = &base->tv2; |
640 | varray[1] = &base->tv3; | 655 | varray[1] = &base->tv3; |
641 | varray[2] = &base->tv4; | 656 | varray[2] = &base->tv4; |
642 | varray[3] = &base->tv5; | 657 | varray[3] = &base->tv5; |
643 | for (i = 0; i < 4; i++) { | 658 | |
644 | j = INDEX(i); | 659 | for (array = 0; array < 4; array++) { |
660 | tvec_t *varp = varray[array]; | ||
661 | |||
662 | index = slot = timer_jiffies & TVN_MASK; | ||
645 | do { | 663 | do { |
646 | if (list_empty(varray[i]->vec + j)) { | 664 | list_for_each_entry(nte, varp->vec + slot, entry) { |
647 | j = (j + 1) & TVN_MASK; | 665 | found = 1; |
648 | continue; | ||
649 | } | ||
650 | list_for_each_entry(nte, varray[i]->vec + j, entry) | ||
651 | if (time_before(nte->expires, expires)) | 666 | if (time_before(nte->expires, expires)) |
652 | expires = nte->expires; | 667 | expires = nte->expires; |
653 | if (j < (INDEX(i)) && i < 3) | 668 | } |
654 | list = varray[i + 1]->vec + (INDEX(i + 1)); | 669 | /* |
655 | goto found; | 670 | * Do we still search for the first timer or are |
656 | } while (j != (INDEX(i))); | 671 | * we looking up the cascade buckets ? |
657 | } | 672 | */ |
658 | found: | 673 | if (found) { |
659 | if (list) { | 674 | /* Look at the cascade bucket(s)? */ |
660 | /* | 675 | if (!index || slot < index) |
661 | * The search wrapped. We need to look at the next list | 676 | break; |
662 | * from next tv element that would cascade into tv element | 677 | return expires; |
663 | * where we found the timer element. | 678 | } |
664 | */ | 679 | slot = (slot + 1) & TVN_MASK; |
665 | list_for_each_entry(nte, list, entry) { | 680 | } while (slot != index); |
666 | if (time_before(nte->expires, expires)) | 681 | |
667 | expires = nte->expires; | 682 | if (index) |
668 | } | 683 | timer_jiffies += TVN_SIZE - index; |
684 | timer_jiffies >>= TVN_BITS; | ||
669 | } | 685 | } |
670 | spin_unlock(&base->lock); | 686 | return expires; |
687 | } | ||
671 | 688 | ||
672 | /* | 689 | /* |
673 | * It can happen that other CPUs service timer IRQs and increment | 690 | * Check, if the next hrtimer event is before the next timer wheel |
674 | * jiffies, but we have not yet got a local timer tick to process | 691 | * event: |
675 | * the timer wheels. In that case, the expiry time can be before | 692 | */ |
676 | * jiffies, but since the high-resolution timer here is relative to | 693 | static unsigned long cmp_next_hrtimer_event(unsigned long now, |
677 | * jiffies, the default expression when high-resolution timers are | 694 | unsigned long expires) |
678 | * not active, | 695 | { |
679 | * | 696 | ktime_t hr_delta = hrtimer_get_next_event(); |
680 | * time_before(MAX_JIFFY_OFFSET + jiffies, expires) | 697 | struct timespec tsdelta; |
681 | * | 698 | |
682 | * would falsely evaluate to true. If that is the case, just | 699 | if (hr_delta.tv64 == KTIME_MAX) |
683 | * return jiffies so that we can immediately fire the local timer | 700 | return expires; |
684 | */ | ||
685 | if (time_before(expires, jiffies)) | ||
686 | return jiffies; | ||
687 | 701 | ||
688 | if (time_before(hr_expires, expires)) | 702 | if (hr_delta.tv64 <= TICK_NSEC) |
689 | return hr_expires; | 703 | return now; |
690 | 704 | ||
705 | tsdelta = ktime_to_timespec(hr_delta); | ||
706 | now += timespec_to_jiffies(&tsdelta); | ||
707 | if (time_before(now, expires)) | ||
708 | return now; | ||
691 | return expires; | 709 | return expires; |
692 | } | 710 | } |
711 | |||
712 | /** | ||
713 | * next_timer_interrupt - return the jiffy of the next pending timer | ||
714 | */ | ||
715 | unsigned long get_next_timer_interrupt(unsigned long now) | ||
716 | { | ||
717 | tvec_base_t *base = __get_cpu_var(tvec_bases); | ||
718 | unsigned long expires; | ||
719 | |||
720 | spin_lock(&base->lock); | ||
721 | expires = __next_timer_interrupt(base); | ||
722 | spin_unlock(&base->lock); | ||
723 | |||
724 | if (time_before_eq(expires, now)) | ||
725 | return now; | ||
726 | |||
727 | return cmp_next_hrtimer_event(now, expires); | ||
728 | } | ||
729 | |||
730 | #ifdef CONFIG_NO_IDLE_HZ | ||
731 | unsigned long next_timer_interrupt(void) | ||
732 | { | ||
733 | return get_next_timer_interrupt(jiffies); | ||
734 | } | ||
735 | #endif | ||
736 | |||
693 | #endif | 737 | #endif |
694 | 738 | ||
695 | /******************************************************************/ | 739 | /******************************************************************/ |
@@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday); | |||
832 | * | 876 | * |
833 | * Accumulates current time interval and initializes new clocksource | 877 | * Accumulates current time interval and initializes new clocksource |
834 | */ | 878 | */ |
835 | static int change_clocksource(void) | 879 | static void change_clocksource(void) |
836 | { | 880 | { |
837 | struct clocksource *new; | 881 | struct clocksource *new; |
838 | cycle_t now; | 882 | cycle_t now; |
839 | u64 nsec; | 883 | u64 nsec; |
884 | |||
840 | new = clocksource_get_next(); | 885 | new = clocksource_get_next(); |
841 | if (clock != new) { | 886 | |
842 | now = clocksource_read(new); | 887 | if (clock == new) |
843 | nsec = __get_nsec_offset(); | 888 | return; |
844 | timespec_add_ns(&xtime, nsec); | 889 | |
845 | 890 | now = clocksource_read(new); | |
846 | clock = new; | 891 | nsec = __get_nsec_offset(); |
847 | clock->cycle_last = now; | 892 | timespec_add_ns(&xtime, nsec); |
848 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | 893 | |
849 | clock->name); | 894 | clock = new; |
850 | return 1; | 895 | clock->cycle_last = now; |
851 | } else if (clock->update_callback) { | 896 | |
852 | return clock->update_callback(); | 897 | clock->error = 0; |
853 | } | 898 | clock->xtime_nsec = 0; |
854 | return 0; | 899 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
900 | |||
901 | tick_clock_notify(); | ||
902 | |||
903 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
904 | clock->name); | ||
855 | } | 905 | } |
856 | #else | 906 | #else |
857 | static inline int change_clocksource(void) | 907 | static inline void change_clocksource(void) { } |
858 | { | ||
859 | return 0; | ||
860 | } | ||
861 | #endif | 908 | #endif |
862 | 909 | ||
863 | /** | 910 | /** |
@@ -871,33 +918,56 @@ int timekeeping_is_continuous(void) | |||
871 | do { | 918 | do { |
872 | seq = read_seqbegin(&xtime_lock); | 919 | seq = read_seqbegin(&xtime_lock); |
873 | 920 | ||
874 | ret = clock->is_continuous; | 921 | ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; |
875 | 922 | ||
876 | } while (read_seqretry(&xtime_lock, seq)); | 923 | } while (read_seqretry(&xtime_lock, seq)); |
877 | 924 | ||
878 | return ret; | 925 | return ret; |
879 | } | 926 | } |
880 | 927 | ||
928 | /** | ||
929 | * read_persistent_clock - Return time in seconds from the persistent clock. | ||
930 | * | ||
931 | * Weak dummy function for arches that do not yet support it. | ||
932 | * Returns seconds from epoch using the battery backed persistent clock. | ||
933 | * Returns zero if unsupported. | ||
934 | * | ||
935 | * XXX - Do be sure to remove it once all arches implement it. | ||
936 | */ | ||
937 | unsigned long __attribute__((weak)) read_persistent_clock(void) | ||
938 | { | ||
939 | return 0; | ||
940 | } | ||
941 | |||
881 | /* | 942 | /* |
882 | * timekeeping_init - Initializes the clocksource and common timekeeping values | 943 | * timekeeping_init - Initializes the clocksource and common timekeeping values |
883 | */ | 944 | */ |
884 | void __init timekeeping_init(void) | 945 | void __init timekeeping_init(void) |
885 | { | 946 | { |
886 | unsigned long flags; | 947 | unsigned long flags; |
948 | unsigned long sec = read_persistent_clock(); | ||
887 | 949 | ||
888 | write_seqlock_irqsave(&xtime_lock, flags); | 950 | write_seqlock_irqsave(&xtime_lock, flags); |
889 | 951 | ||
890 | ntp_clear(); | 952 | ntp_clear(); |
891 | 953 | ||
892 | clock = clocksource_get_next(); | 954 | clock = clocksource_get_next(); |
893 | clocksource_calculate_interval(clock, tick_nsec); | 955 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
894 | clock->cycle_last = clocksource_read(clock); | 956 | clock->cycle_last = clocksource_read(clock); |
895 | 957 | ||
958 | xtime.tv_sec = sec; | ||
959 | xtime.tv_nsec = 0; | ||
960 | set_normalized_timespec(&wall_to_monotonic, | ||
961 | -xtime.tv_sec, -xtime.tv_nsec); | ||
962 | |||
896 | write_sequnlock_irqrestore(&xtime_lock, flags); | 963 | write_sequnlock_irqrestore(&xtime_lock, flags); |
897 | } | 964 | } |
898 | 965 | ||
899 | 966 | /* flag for if timekeeping is suspended */ | |
900 | static int timekeeping_suspended; | 967 | static int timekeeping_suspended; |
968 | /* time in seconds when suspend began */ | ||
969 | static unsigned long timekeeping_suspend_time; | ||
970 | |||
901 | /** | 971 | /** |
902 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 972 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
903 | * @dev: unused | 973 | * @dev: unused |
@@ -909,13 +979,26 @@ static int timekeeping_suspended; | |||
909 | static int timekeeping_resume(struct sys_device *dev) | 979 | static int timekeeping_resume(struct sys_device *dev) |
910 | { | 980 | { |
911 | unsigned long flags; | 981 | unsigned long flags; |
982 | unsigned long now = read_persistent_clock(); | ||
912 | 983 | ||
913 | write_seqlock_irqsave(&xtime_lock, flags); | 984 | write_seqlock_irqsave(&xtime_lock, flags); |
914 | /* restart the last cycle value */ | 985 | |
986 | if (now && (now > timekeeping_suspend_time)) { | ||
987 | unsigned long sleep_length = now - timekeeping_suspend_time; | ||
988 | |||
989 | xtime.tv_sec += sleep_length; | ||
990 | wall_to_monotonic.tv_sec -= sleep_length; | ||
991 | } | ||
992 | /* re-base the last cycle value */ | ||
915 | clock->cycle_last = clocksource_read(clock); | 993 | clock->cycle_last = clocksource_read(clock); |
916 | clock->error = 0; | 994 | clock->error = 0; |
917 | timekeeping_suspended = 0; | 995 | timekeeping_suspended = 0; |
918 | write_sequnlock_irqrestore(&xtime_lock, flags); | 996 | write_sequnlock_irqrestore(&xtime_lock, flags); |
997 | |||
998 | touch_softlockup_watchdog(); | ||
999 | /* Resume hrtimers */ | ||
1000 | clock_was_set(); | ||
1001 | |||
919 | return 0; | 1002 | return 0; |
920 | } | 1003 | } |
921 | 1004 | ||
@@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
925 | 1008 | ||
926 | write_seqlock_irqsave(&xtime_lock, flags); | 1009 | write_seqlock_irqsave(&xtime_lock, flags); |
927 | timekeeping_suspended = 1; | 1010 | timekeeping_suspended = 1; |
1011 | timekeeping_suspend_time = read_persistent_clock(); | ||
928 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1012 | write_sequnlock_irqrestore(&xtime_lock, flags); |
929 | return 0; | 1013 | return 0; |
930 | } | 1014 | } |
@@ -1089,11 +1173,8 @@ static void update_wall_time(void) | |||
1089 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 1173 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
1090 | 1174 | ||
1091 | /* check to see if there is a new clocksource to use */ | 1175 | /* check to see if there is a new clocksource to use */ |
1092 | if (change_clocksource()) { | 1176 | change_clocksource(); |
1093 | clock->error = 0; | 1177 | update_vsyscall(&xtime, clock); |
1094 | clock->xtime_nsec = 0; | ||
1095 | clocksource_calculate_interval(clock, tick_nsec); | ||
1096 | } | ||
1097 | } | 1178 | } |
1098 | 1179 | ||
1099 | /* | 1180 | /* |
@@ -1173,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1173 | { | 1254 | { |
1174 | tvec_base_t *base = __get_cpu_var(tvec_bases); | 1255 | tvec_base_t *base = __get_cpu_var(tvec_bases); |
1175 | 1256 | ||
1176 | hrtimer_run_queues(); | 1257 | hrtimer_run_queues(); |
1258 | |||
1177 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1259 | if (time_after_eq(jiffies, base->timer_jiffies)) |
1178 | __run_timers(base); | 1260 | __run_timers(base); |
1179 | } | 1261 | } |
@@ -1619,6 +1701,8 @@ void __init init_timers(void) | |||
1619 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, | 1701 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, |
1620 | (void *)(long)smp_processor_id()); | 1702 | (void *)(long)smp_processor_id()); |
1621 | 1703 | ||
1704 | init_timer_stats(); | ||
1705 | |||
1622 | BUG_ON(err == NOTIFY_BAD); | 1706 | BUG_ON(err == NOTIFY_BAD); |
1623 | register_cpu_notifier(&timers_nb); | 1707 | register_cpu_notifier(&timers_nb); |
1624 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); | 1708 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index baacc3691415..658f638c402c 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include <linux/acct.h> | 22 | #include <linux/acct.h> |
23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
24 | 24 | ||
25 | |||
26 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) | ||
27 | /* | 25 | /* |
28 | * fill in basic accounting fields | 26 | * fill in basic accounting fields |
29 | */ | 27 | */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 020d1fff57dc..b6fa5e63085d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -218,7 +218,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) | |||
218 | } | 218 | } |
219 | EXPORT_SYMBOL_GPL(queue_work); | 219 | EXPORT_SYMBOL_GPL(queue_work); |
220 | 220 | ||
221 | static void delayed_work_timer_fn(unsigned long __data) | 221 | void delayed_work_timer_fn(unsigned long __data) |
222 | { | 222 | { |
223 | struct delayed_work *dwork = (struct delayed_work *)__data; | 223 | struct delayed_work *dwork = (struct delayed_work *)__data; |
224 | struct workqueue_struct *wq = get_wq_data(&dwork->work); | 224 | struct workqueue_struct *wq = get_wq_data(&dwork->work); |
@@ -245,6 +245,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq, | |||
245 | struct timer_list *timer = &dwork->timer; | 245 | struct timer_list *timer = &dwork->timer; |
246 | struct work_struct *work = &dwork->work; | 246 | struct work_struct *work = &dwork->work; |
247 | 247 | ||
248 | timer_stats_timer_set_start_info(timer); | ||
248 | if (delay == 0) | 249 | if (delay == 0) |
249 | return queue_work(wq, work); | 250 | return queue_work(wq, work); |
250 | 251 | ||
@@ -593,8 +594,10 @@ EXPORT_SYMBOL(schedule_work); | |||
593 | * After waiting for a given time this puts a job in the kernel-global | 594 | * After waiting for a given time this puts a job in the kernel-global |
594 | * workqueue. | 595 | * workqueue. |
595 | */ | 596 | */ |
596 | int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) | 597 | int fastcall schedule_delayed_work(struct delayed_work *dwork, |
598 | unsigned long delay) | ||
597 | { | 599 | { |
600 | timer_stats_timer_set_start_info(&dwork->timer); | ||
598 | return queue_delayed_work(keventd_wq, dwork, delay); | 601 | return queue_delayed_work(keventd_wq, dwork, delay); |
599 | } | 602 | } |
600 | EXPORT_SYMBOL(schedule_delayed_work); | 603 | EXPORT_SYMBOL(schedule_delayed_work); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 63f04c15e6f5..4448f91b865c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -134,6 +134,17 @@ config SCHEDSTATS | |||
134 | application, you can say N to avoid the very slight overhead | 134 | application, you can say N to avoid the very slight overhead |
135 | this adds. | 135 | this adds. |
136 | 136 | ||
137 | config TIMER_STATS | ||
138 | bool "Collect kernel timers statistics" | ||
139 | depends on DEBUG_KERNEL && PROC_FS | ||
140 | help | ||
141 | If you say Y here, additional code will be inserted into the | ||
142 | timer routines to collect statistics about kernel timers being | ||
143 | reprogrammed. The statistics can be read from /proc/timer_stats. | ||
144 | The statistics collection is started by writing 1 to /proc/timer_stats, | ||
145 | writing 0 stops it. This feature is useful to collect information | ||
146 | about timer usage patterns in kernel and userspace. | ||
147 | |||
137 | config DEBUG_SLAB | 148 | config DEBUG_SLAB |
138 | bool "Debug slab memory allocations" | 149 | bool "Debug slab memory allocations" |
139 | depends on DEBUG_KERNEL && SLAB | 150 | depends on DEBUG_KERNEL && SLAB |
diff --git a/lib/devres.c b/lib/devres.c index 2a668dd7cac7..eb38849aa717 100644 --- a/lib/devres.c +++ b/lib/devres.c | |||
@@ -274,21 +274,21 @@ int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name) | |||
274 | 274 | ||
275 | rc = pci_request_region(pdev, i, name); | 275 | rc = pci_request_region(pdev, i, name); |
276 | if (rc) | 276 | if (rc) |
277 | goto err_region; | 277 | goto err_inval; |
278 | 278 | ||
279 | rc = -ENOMEM; | 279 | rc = -ENOMEM; |
280 | if (!pcim_iomap(pdev, i, 0)) | 280 | if (!pcim_iomap(pdev, i, 0)) |
281 | goto err_iomap; | 281 | goto err_region; |
282 | } | 282 | } |
283 | 283 | ||
284 | return 0; | 284 | return 0; |
285 | 285 | ||
286 | err_iomap: | ||
287 | pcim_iounmap(pdev, iomap[i]); | ||
288 | err_region: | 286 | err_region: |
289 | pci_release_region(pdev, i); | 287 | pci_release_region(pdev, i); |
290 | err_inval: | 288 | err_inval: |
291 | while (--i >= 0) { | 289 | while (--i >= 0) { |
290 | if (!(mask & (1 << i))) | ||
291 | continue; | ||
292 | pcim_iounmap(pdev, iomap[i]); | 292 | pcim_iounmap(pdev, iomap[i]); |
293 | pci_release_region(pdev, i); | 293 | pci_release_region(pdev, i); |
294 | } | 294 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index 00414849a867..d1060b8d3cd6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2079,21 +2079,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2079 | /* Limit the size of the copy to the caller's write size */ | 2079 | /* Limit the size of the copy to the caller's write size */ |
2080 | bytes = min(bytes, count); | 2080 | bytes = min(bytes, count); |
2081 | 2081 | ||
2082 | /* | 2082 | /* We only need to worry about prefaulting when writes are from |
2083 | * Limit the size of the copy to that of the current segment, | 2083 | * user-space. NFSd uses vfs_writev with several non-aligned |
2084 | * because fault_in_pages_readable() doesn't know how to walk | 2084 | * segments in the vector, and limiting to one segment a time is |
2085 | * segments. | 2085 | * a noticeable performance for re-write |
2086 | */ | 2086 | */ |
2087 | bytes = min(bytes, cur_iov->iov_len - iov_base); | 2087 | if (!segment_eq(get_fs(), KERNEL_DS)) { |
2088 | 2088 | /* | |
2089 | /* | 2089 | * Limit the size of the copy to that of the current |
2090 | * Bring in the user page that we will copy from _first_. | 2090 | * segment, because fault_in_pages_readable() doesn't |
2091 | * Otherwise there's a nasty deadlock on copying from the | 2091 | * know how to walk segments. |
2092 | * same page as we're writing to, without it being marked | 2092 | */ |
2093 | * up-to-date. | 2093 | bytes = min(bytes, cur_iov->iov_len - iov_base); |
2094 | */ | ||
2095 | fault_in_pages_readable(buf, bytes); | ||
2096 | 2094 | ||
2095 | /* | ||
2096 | * Bring in the user page that we will copy from | ||
2097 | * _first_. Otherwise there's a nasty deadlock on | ||
2098 | * copying from the same page as we're writing to, | ||
2099 | * without it being marked up-to-date. | ||
2100 | */ | ||
2101 | fault_in_pages_readable(buf, bytes); | ||
2102 | } | ||
2097 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); | 2103 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); |
2098 | if (!page) { | 2104 | if (!page) { |
2099 | status = -ENOMEM; | 2105 | status = -ENOMEM; |