aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/.gitignore1
-rw-r--r--arch/x86/Kconfig20
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h107
-rw-r--r--arch/x86/kernel/tsc.c14
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/platform/uv/tlb_uv.c388
-rw-r--r--fs/proc/stat.c2
-rw-r--r--kernel/tracepoint.c7
9 files changed, 444 insertions, 104 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index 028079065af6..7cab8c08e6d1 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,3 +1,4 @@
1boot/compressed/vmlinux 1boot/compressed/vmlinux
2tools/test_get_len 2tools/test_get_len
3tools/insn_sanity
3 4
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6c14ecd851d0..864cc6e6ac8e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -125,16 +125,6 @@ config HAVE_LATENCYTOP_SUPPORT
125config MMU 125config MMU
126 def_bool y 126 def_bool y
127 127
128config ZONE_DMA
129 bool "DMA memory allocation support" if EXPERT
130 default y
131 help
132 DMA memory allocation support allows devices with less than 32-bit
133 addressing to allocate within the first 16MB of address space.
134 Disable if no such devices will be used.
135
136 If unsure, say Y.
137
138config SBUS 128config SBUS
139 bool 129 bool
140 130
@@ -255,6 +245,16 @@ source "kernel/Kconfig.freezer"
255 245
256menu "Processor type and features" 246menu "Processor type and features"
257 247
248config ZONE_DMA
249 bool "DMA memory allocation support" if EXPERT
250 default y
251 help
252 DMA memory allocation support allows devices with less than 32-bit
253 addressing to allocate within the first 16MB of address space.
254 Disable if no such devices will be used.
255
256 If unsure, say Y.
257
258source "kernel/time/Kconfig" 258source "kernel/time/Kconfig"
259 259
260config SMP 260config SMP
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index b4a3db7ce140..21f77b89e47a 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -7,6 +7,7 @@
7# include <asm/unistd_32.h> 7# include <asm/unistd_32.h>
8# define __ARCH_WANT_IPC_PARSE_VERSION 8# define __ARCH_WANT_IPC_PARSE_VERSION
9# define __ARCH_WANT_STAT64 9# define __ARCH_WANT_STAT64
10# define __ARCH_WANT_SYS_IPC
10# define __ARCH_WANT_SYS_OLD_MMAP 11# define __ARCH_WANT_SYS_OLD_MMAP
11# define __ARCH_WANT_SYS_OLD_SELECT 12# define __ARCH_WANT_SYS_OLD_SELECT
12 13
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 8e862aaf0d90..becf47b81735 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -65,7 +65,7 @@
65 * UV2: Bit 19 selects between 65 * UV2: Bit 19 selects between
66 * (0): 10 microsecond timebase and 66 * (0): 10 microsecond timebase and
67 * (1): 80 microseconds 67 * (1): 80 microseconds
68 * we're using 655us, similar to UV1: 65 units of 10us 68 * we're using 560us, similar to UV1: 65 units of 10us
69 */ 69 */
70#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) 70#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
71#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL) 71#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
@@ -167,6 +167,7 @@
167#define FLUSH_RETRY_TIMEOUT 2 167#define FLUSH_RETRY_TIMEOUT 2
168#define FLUSH_GIVEUP 3 168#define FLUSH_GIVEUP 3
169#define FLUSH_COMPLETE 4 169#define FLUSH_COMPLETE 4
170#define FLUSH_RETRY_BUSYBUG 5
170 171
171/* 172/*
172 * tuning the action when the numalink network is extremely delayed 173 * tuning the action when the numalink network is extremely delayed
@@ -235,10 +236,10 @@ struct bau_msg_payload {
235 236
236 237
237/* 238/*
238 * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) 239 * UV1 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
239 * see table 4.2.3.0.1 in broacast_assist spec. 240 * see table 4.2.3.0.1 in broacast_assist spec.
240 */ 241 */
241struct bau_msg_header { 242struct uv1_bau_msg_header {
242 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 243 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
243 /* bits 5:0 */ 244 /* bits 5:0 */
244 unsigned int base_dest_nasid:15; /* nasid of the first bit */ 245 unsigned int base_dest_nasid:15; /* nasid of the first bit */
@@ -318,19 +319,87 @@ struct bau_msg_header {
318}; 319};
319 320
320/* 321/*
322 * UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
323 * see figure 9-2 of harp_sys.pdf
324 */
325struct uv2_bau_msg_header {
326 unsigned int base_dest_nasid:15; /* nasid of the first bit */
327 /* bits 14:0 */ /* in uvhub map */
328 unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */
329 /* bits 19:15 */
330 unsigned int rsvd_1:1; /* must be zero */
331 /* bit 20 */
332 /* Address bits 59:21 */
333 /* bits 25:2 of address (44:21) are payload */
334 /* these next 24 bits become bytes 12-14 of msg */
335 /* bits 28:21 land in byte 12 */
336 unsigned int replied_to:1; /* sent as 0 by the source to
337 byte 12 */
338 /* bit 21 */
339 unsigned int msg_type:3; /* software type of the
340 message */
341 /* bits 24:22 */
342 unsigned int canceled:1; /* message canceled, resource
343 is to be freed*/
344 /* bit 25 */
345 unsigned int payload_1:3; /* not currently used */
346 /* bits 28:26 */
347
348 /* bits 36:29 land in byte 13 */
349 unsigned int payload_2a:3; /* not currently used */
350 unsigned int payload_2b:5; /* not currently used */
351 /* bits 36:29 */
352
353 /* bits 44:37 land in byte 14 */
354 unsigned int payload_3:8; /* not currently used */
355 /* bits 44:37 */
356
357 unsigned int rsvd_2:7; /* reserved */
358 /* bits 51:45 */
359 unsigned int swack_flag:1; /* software acknowledge flag */
360 /* bit 52 */
361 unsigned int rsvd_3a:3; /* must be zero */
362 unsigned int rsvd_3b:8; /* must be zero */
363 unsigned int rsvd_3c:8; /* must be zero */
364 unsigned int rsvd_3d:3; /* must be zero */
365 /* bits 74:53 */
366 unsigned int fairness:3; /* usually zero */
367 /* bits 77:75 */
368
369 unsigned int sequence:16; /* message sequence number */
370 /* bits 93:78 Suppl_A */
371 unsigned int chaining:1; /* next descriptor is part of
372 this activation*/
373 /* bit 94 */
374 unsigned int multilevel:1; /* multi-level multicast
375 format */
376 /* bit 95 */
377 unsigned int rsvd_4:24; /* ordered / source node /
378 source subnode / aging
379 must be zero */
380 /* bits 119:96 */
381 unsigned int command:8; /* message type */
382 /* bits 127:120 */
383};
384
385/*
321 * The activation descriptor: 386 * The activation descriptor:
322 * The format of the message to send, plus all accompanying control 387 * The format of the message to send, plus all accompanying control
323 * Should be 64 bytes 388 * Should be 64 bytes
324 */ 389 */
325struct bau_desc { 390struct bau_desc {
326 struct pnmask distribution; 391 struct pnmask distribution;
327 /* 392 /*
328 * message template, consisting of header and payload: 393 * message template, consisting of header and payload:
329 */ 394 */
330 struct bau_msg_header header; 395 union bau_msg_header {
331 struct bau_msg_payload payload; 396 struct uv1_bau_msg_header uv1_hdr;
397 struct uv2_bau_msg_header uv2_hdr;
398 } header;
399
400 struct bau_msg_payload payload;
332}; 401};
333/* 402/* UV1:
334 * -payload-- ---------header------ 403 * -payload-- ---------header------
335 * bytes 0-11 bits 41-56 bits 58-81 404 * bytes 0-11 bits 41-56 bits 58-81
336 * A B (2) C (3) 405 * A B (2) C (3)
@@ -340,6 +409,16 @@ struct bau_desc {
340 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) 409 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
341 * ------------payload queue----------- 410 * ------------payload queue-----------
342 */ 411 */
412/* UV2:
413 * -payload-- ---------header------
414 * bytes 0-11 bits 70-78 bits 21-44
415 * A B (2) C (3)
416 *
417 * A/B/C are moved to:
418 * A C B
419 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
420 * ------------payload queue-----------
421 */
343 422
344/* 423/*
345 * The payload queue on the destination side is an array of these. 424 * The payload queue on the destination side is an array of these.
@@ -385,7 +464,6 @@ struct bau_pq_entry {
385struct msg_desc { 464struct msg_desc {
386 struct bau_pq_entry *msg; 465 struct bau_pq_entry *msg;
387 int msg_slot; 466 int msg_slot;
388 int swack_slot;
389 struct bau_pq_entry *queue_first; 467 struct bau_pq_entry *queue_first;
390 struct bau_pq_entry *queue_last; 468 struct bau_pq_entry *queue_last;
391}; 469};
@@ -405,6 +483,7 @@ struct ptc_stats {
405 requests */ 483 requests */
406 unsigned long s_stimeout; /* source side timeouts */ 484 unsigned long s_stimeout; /* source side timeouts */
407 unsigned long s_dtimeout; /* destination side timeouts */ 485 unsigned long s_dtimeout; /* destination side timeouts */
486 unsigned long s_strongnacks; /* number of strong nack's */
408 unsigned long s_time; /* time spent in sending side */ 487 unsigned long s_time; /* time spent in sending side */
409 unsigned long s_retriesok; /* successful retries */ 488 unsigned long s_retriesok; /* successful retries */
410 unsigned long s_ntargcpu; /* total number of cpu's 489 unsigned long s_ntargcpu; /* total number of cpu's
@@ -439,6 +518,9 @@ struct ptc_stats {
439 unsigned long s_retry_messages; /* retry broadcasts */ 518 unsigned long s_retry_messages; /* retry broadcasts */
440 unsigned long s_bau_reenabled; /* for bau enable/disable */ 519 unsigned long s_bau_reenabled; /* for bau enable/disable */
441 unsigned long s_bau_disabled; /* for bau enable/disable */ 520 unsigned long s_bau_disabled; /* for bau enable/disable */
521 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
522 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
523 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
442 /* destination statistics */ 524 /* destination statistics */
443 unsigned long d_alltlb; /* times all tlb's on this 525 unsigned long d_alltlb; /* times all tlb's on this
444 cpu were flushed */ 526 cpu were flushed */
@@ -511,9 +593,12 @@ struct bau_control {
511 short osnode; 593 short osnode;
512 short uvhub_cpu; 594 short uvhub_cpu;
513 short uvhub; 595 short uvhub;
596 short uvhub_version;
514 short cpus_in_socket; 597 short cpus_in_socket;
515 short cpus_in_uvhub; 598 short cpus_in_uvhub;
516 short partition_base_pnode; 599 short partition_base_pnode;
600 short using_desc; /* an index, like uvhub_cpu */
601 unsigned int inuse_map;
517 unsigned short message_number; 602 unsigned short message_number;
518 unsigned short uvhub_quiesce; 603 unsigned short uvhub_quiesce;
519 short socket_acknowledge_count[DEST_Q_SIZE]; 604 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -531,6 +616,7 @@ struct bau_control {
531 int cong_response_us; 616 int cong_response_us;
532 int cong_reps; 617 int cong_reps;
533 int cong_period; 618 int cong_period;
619 unsigned long clocks_per_100_usec;
534 cycles_t period_time; 620 cycles_t period_time;
535 long period_requests; 621 long period_requests;
536 struct hub_and_pnode *thp; 622 struct hub_and_pnode *thp;
@@ -591,6 +677,11 @@ static inline void write_mmr_sw_ack(unsigned long mr)
591 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); 677 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
592} 678}
593 679
680static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
681{
682 write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
683}
684
594static inline unsigned long read_mmr_sw_ack(void) 685static inline unsigned long read_mmr_sw_ack(void)
595{ 686{
596 return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 687 return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c0dd5b603749..a62c201c97ec 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -290,14 +290,15 @@ static inline int pit_verify_msb(unsigned char val)
290static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 290static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
291{ 291{
292 int count; 292 int count;
293 u64 tsc = 0; 293 u64 tsc = 0, prev_tsc = 0;
294 294
295 for (count = 0; count < 50000; count++) { 295 for (count = 0; count < 50000; count++) {
296 if (!pit_verify_msb(val)) 296 if (!pit_verify_msb(val))
297 break; 297 break;
298 prev_tsc = tsc;
298 tsc = get_cycles(); 299 tsc = get_cycles();
299 } 300 }
300 *deltap = get_cycles() - tsc; 301 *deltap = get_cycles() - prev_tsc;
301 *tscp = tsc; 302 *tscp = tsc;
302 303
303 /* 304 /*
@@ -311,9 +312,9 @@ static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *de
311 * How many MSB values do we want to see? We aim for 312 * How many MSB values do we want to see? We aim for
312 * a maximum error rate of 500ppm (in practice the 313 * a maximum error rate of 500ppm (in practice the
313 * real error is much smaller), but refuse to spend 314 * real error is much smaller), but refuse to spend
314 * more than 25ms on it. 315 * more than 50ms on it.
315 */ 316 */
316#define MAX_QUICK_PIT_MS 25 317#define MAX_QUICK_PIT_MS 50
317#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 318#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
318 319
319static unsigned long quick_pit_calibrate(void) 320static unsigned long quick_pit_calibrate(void)
@@ -383,15 +384,12 @@ success:
383 * 384 *
384 * As a result, we can depend on there not being 385 * As a result, we can depend on there not being
385 * any odd delays anywhere, and the TSC reads are 386 * any odd delays anywhere, and the TSC reads are
386 * reliable (within the error). We also adjust the 387 * reliable (within the error).
387 * delta to the middle of the error bars, just
388 * because it looks nicer.
389 * 388 *
390 * kHz = ticks / time-in-seconds / 1000; 389 * kHz = ticks / time-in-seconds / 1000;
391 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 390 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
392 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 391 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
393 */ 392 */
394 delta += (long)(d2 - d1)/2;
395 delta *= PIT_TICK_RATE; 393 delta *= PIT_TICK_RATE;
396 do_div(delta, i*256*1000); 394 do_div(delta, i*256*1000);
397 printk("Fast TSC calibration using PIT\n"); 395 printk("Fast TSC calibration using PIT\n");
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5b83c51c12e0..819137904428 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -219,7 +219,9 @@ ab: STOS/W/D/Q Yv,rAX
219ac: LODS/B AL,Xb 219ac: LODS/B AL,Xb
220ad: LODS/W/D/Q rAX,Xv 220ad: LODS/W/D/Q rAX,Xv
221ae: SCAS/B AL,Yb 221ae: SCAS/B AL,Yb
222af: SCAS/W/D/Q rAX,Xv 222# Note: The May 2011 Intel manual shows Xv for the second parameter of the
223# next instruction but Yv is correct
224af: SCAS/W/D/Q rAX,Yv
223# 0xb0 - 0xbf 225# 0xb0 - 0xbf
224b0: MOV AL/R8L,Ib 226b0: MOV AL/R8L,Ib
225b1: MOV CL/R9L,Ib 227b1: MOV CL/R9L,Ib
@@ -729,8 +731,8 @@ de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
729df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) 731df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
730f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) 732f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
731f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) 733f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
732f3: ANDN Gy,By,Ey (v) 734f2: ANDN Gy,By,Ey (v)
733f4: Grp17 (1A) 735f3: Grp17 (1A)
734f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) 736f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
735f6: MULX By,Gy,rDX,Ey (F2),(v) 737f6: MULX By,Gy,rDX,Ey (F2),(v)
736f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) 738f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 5b552198f774..9be4cff00a2d 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -157,13 +157,14 @@ static int __init uvhub_to_first_apicid(int uvhub)
157 * clear of the Timeout bit (as well) will free the resource. No reply will 157 * clear of the Timeout bit (as well) will free the resource. No reply will
158 * be sent (the hardware will only do one reply per message). 158 * be sent (the hardware will only do one reply per message).
159 */ 159 */
160static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) 160static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
161 int do_acknowledge)
161{ 162{
162 unsigned long dw; 163 unsigned long dw;
163 struct bau_pq_entry *msg; 164 struct bau_pq_entry *msg;
164 165
165 msg = mdp->msg; 166 msg = mdp->msg;
166 if (!msg->canceled) { 167 if (!msg->canceled && do_acknowledge) {
167 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; 168 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
168 write_mmr_sw_ack(dw); 169 write_mmr_sw_ack(dw);
169 } 170 }
@@ -212,8 +213,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
212 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { 213 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
213 unsigned long mr; 214 unsigned long mr;
214 /* 215 /*
215 * is the resource timed out? 216 * Is the resource timed out?
216 * make everyone ignore the cancelled message. 217 * Make everyone ignore the cancelled message.
217 */ 218 */
218 msg2->canceled = 1; 219 msg2->canceled = 1;
219 stat->d_canceled++; 220 stat->d_canceled++;
@@ -231,8 +232,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
231 * Do all the things a cpu should do for a TLB shootdown message. 232 * Do all the things a cpu should do for a TLB shootdown message.
232 * Other cpu's may come here at the same time for this message. 233 * Other cpu's may come here at the same time for this message.
233 */ 234 */
234static void bau_process_message(struct msg_desc *mdp, 235static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
235 struct bau_control *bcp) 236 int do_acknowledge)
236{ 237{
237 short socket_ack_count = 0; 238 short socket_ack_count = 0;
238 short *sp; 239 short *sp;
@@ -284,8 +285,9 @@ static void bau_process_message(struct msg_desc *mdp,
284 if (msg_ack_count == bcp->cpus_in_uvhub) { 285 if (msg_ack_count == bcp->cpus_in_uvhub) {
285 /* 286 /*
286 * All cpus in uvhub saw it; reply 287 * All cpus in uvhub saw it; reply
288 * (unless we are in the UV2 workaround)
287 */ 289 */
288 reply_to_message(mdp, bcp); 290 reply_to_message(mdp, bcp, do_acknowledge);
289 } 291 }
290 } 292 }
291 293
@@ -491,27 +493,138 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
491/* 493/*
492 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. 494 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
493 */ 495 */
494static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) 496static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
495{ 497{
496 unsigned long descriptor_status; 498 unsigned long descriptor_status;
497 unsigned long descriptor_status2; 499 unsigned long descriptor_status2;
498 500
499 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); 501 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
500 descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; 502 descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
501 descriptor_status = (descriptor_status << 1) | descriptor_status2; 503 descriptor_status = (descriptor_status << 1) | descriptor_status2;
502 return descriptor_status; 504 return descriptor_status;
503} 505}
504 506
507/*
508 * Return whether the status of the descriptor that is normally used for this
509 * cpu (the one indexed by its hub-relative cpu number) is busy.
510 * The status of the original 32 descriptors is always reflected in the 64
511 * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
512 * The bit provided by the activation_status_2 register is irrelevant to
513 * the status if it is only being tested for busy or not busy.
514 */
515int normal_busy(struct bau_control *bcp)
516{
517 int cpu = bcp->uvhub_cpu;
518 int mmr_offset;
519 int right_shift;
520
521 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
522 right_shift = cpu * UV_ACT_STATUS_SIZE;
523 return (((((read_lmmr(mmr_offset) >> right_shift) &
524 UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
525}
526
527/*
528 * Entered when a bau descriptor has gone into a permanent busy wait because
529 * of a hardware bug.
530 * Workaround the bug.
531 */
532int handle_uv2_busy(struct bau_control *bcp)
533{
534 int busy_one = bcp->using_desc;
535 int normal = bcp->uvhub_cpu;
536 int selected = -1;
537 int i;
538 unsigned long descriptor_status;
539 unsigned long status;
540 int mmr_offset;
541 struct bau_desc *bau_desc_old;
542 struct bau_desc *bau_desc_new;
543 struct bau_control *hmaster = bcp->uvhub_master;
544 struct ptc_stats *stat = bcp->statp;
545 cycles_t ttm;
546
547 stat->s_uv2_wars++;
548 spin_lock(&hmaster->uvhub_lock);
549 /* try for the original first */
550 if (busy_one != normal) {
551 if (!normal_busy(bcp))
552 selected = normal;
553 }
554 if (selected < 0) {
555 /* can't use the normal, select an alternate */
556 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
557 descriptor_status = read_lmmr(mmr_offset);
558
559 /* scan available descriptors 32-63 */
560 for (i = 0; i < UV_CPUS_PER_AS; i++) {
561 if ((hmaster->inuse_map & (1 << i)) == 0) {
562 status = ((descriptor_status >>
563 (i * UV_ACT_STATUS_SIZE)) &
564 UV_ACT_STATUS_MASK) << 1;
565 if (status != UV2H_DESC_BUSY) {
566 selected = i + UV_CPUS_PER_AS;
567 break;
568 }
569 }
570 }
571 }
572
573 if (busy_one != normal)
574 /* mark the busy alternate as not in-use */
575 hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
576
577 if (selected >= 0) {
578 /* switch to the selected descriptor */
579 if (selected != normal) {
580 /* set the selected alternate as in-use */
581 hmaster->inuse_map |=
582 (1 << (selected - UV_CPUS_PER_AS));
583 if (selected > stat->s_uv2_wars_hw)
584 stat->s_uv2_wars_hw = selected;
585 }
586 bau_desc_old = bcp->descriptor_base;
587 bau_desc_old += (ITEMS_PER_DESC * busy_one);
588 bcp->using_desc = selected;
589 bau_desc_new = bcp->descriptor_base;
590 bau_desc_new += (ITEMS_PER_DESC * selected);
591 *bau_desc_new = *bau_desc_old;
592 } else {
593 /*
594 * All are busy. Wait for the normal one for this cpu to
595 * free up.
596 */
597 stat->s_uv2_war_waits++;
598 spin_unlock(&hmaster->uvhub_lock);
599 ttm = get_cycles();
600 do {
601 cpu_relax();
602 } while (normal_busy(bcp));
603 spin_lock(&hmaster->uvhub_lock);
604 /* switch to the original descriptor */
605 bcp->using_desc = normal;
606 bau_desc_old = bcp->descriptor_base;
607 bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
608 bcp->using_desc = (ITEMS_PER_DESC * normal);
609 bau_desc_new = bcp->descriptor_base;
610 bau_desc_new += (ITEMS_PER_DESC * normal);
611 *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
612 }
613 spin_unlock(&hmaster->uvhub_lock);
614 return FLUSH_RETRY_BUSYBUG;
615}
616
505static int uv2_wait_completion(struct bau_desc *bau_desc, 617static int uv2_wait_completion(struct bau_desc *bau_desc,
506 unsigned long mmr_offset, int right_shift, 618 unsigned long mmr_offset, int right_shift,
507 struct bau_control *bcp, long try) 619 struct bau_control *bcp, long try)
508{ 620{
509 unsigned long descriptor_stat; 621 unsigned long descriptor_stat;
510 cycles_t ttm; 622 cycles_t ttm;
511 int cpu = bcp->uvhub_cpu; 623 int desc = bcp->using_desc;
624 long busy_reps = 0;
512 struct ptc_stats *stat = bcp->statp; 625 struct ptc_stats *stat = bcp->statp;
513 626
514 descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 627 descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
515 628
516 /* spin on the status MMR, waiting for it to go idle */ 629 /* spin on the status MMR, waiting for it to go idle */
517 while (descriptor_stat != UV2H_DESC_IDLE) { 630 while (descriptor_stat != UV2H_DESC_IDLE) {
@@ -522,32 +635,35 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
522 * our message and its state will stay IDLE. 635 * our message and its state will stay IDLE.
523 */ 636 */
524 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || 637 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
525 (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) ||
526 (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { 638 (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
527 stat->s_stimeout++; 639 stat->s_stimeout++;
528 return FLUSH_GIVEUP; 640 return FLUSH_GIVEUP;
641 } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
642 stat->s_strongnacks++;
643 bcp->conseccompletes = 0;
644 return FLUSH_GIVEUP;
529 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { 645 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
530 stat->s_dtimeout++; 646 stat->s_dtimeout++;
531 ttm = get_cycles();
532 /*
533 * Our retries may be blocked by all destination
534 * swack resources being consumed, and a timeout
535 * pending. In that case hardware returns the
536 * ERROR that looks like a destination timeout.
537 */
538 if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
539 bcp->conseccompletes = 0;
540 return FLUSH_RETRY_PLUGGED;
541 }
542 bcp->conseccompletes = 0; 647 bcp->conseccompletes = 0;
543 return FLUSH_RETRY_TIMEOUT; 648 return FLUSH_RETRY_TIMEOUT;
544 } else { 649 } else {
650 busy_reps++;
651 if (busy_reps > 1000000) {
652 /* not to hammer on the clock */
653 busy_reps = 0;
654 ttm = get_cycles();
655 if ((ttm - bcp->send_message) >
656 (bcp->clocks_per_100_usec)) {
657 return handle_uv2_busy(bcp);
658 }
659 }
545 /* 660 /*
546 * descriptor_stat is still BUSY 661 * descriptor_stat is still BUSY
547 */ 662 */
548 cpu_relax(); 663 cpu_relax();
549 } 664 }
550 descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 665 descriptor_stat = uv2_read_status(mmr_offset, right_shift,
666 desc);
551 } 667 }
552 bcp->conseccompletes++; 668 bcp->conseccompletes++;
553 return FLUSH_COMPLETE; 669 return FLUSH_COMPLETE;
@@ -563,17 +679,17 @@ static int wait_completion(struct bau_desc *bau_desc,
563{ 679{
564 int right_shift; 680 int right_shift;
565 unsigned long mmr_offset; 681 unsigned long mmr_offset;
566 int cpu = bcp->uvhub_cpu; 682 int desc = bcp->using_desc;
567 683
568 if (cpu < UV_CPUS_PER_AS) { 684 if (desc < UV_CPUS_PER_AS) {
569 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 685 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
570 right_shift = cpu * UV_ACT_STATUS_SIZE; 686 right_shift = desc * UV_ACT_STATUS_SIZE;
571 } else { 687 } else {
572 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; 688 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
573 right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); 689 right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
574 } 690 }
575 691
576 if (is_uv1_hub()) 692 if (bcp->uvhub_version == 1)
577 return uv1_wait_completion(bau_desc, mmr_offset, right_shift, 693 return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
578 bcp, try); 694 bcp, try);
579 else 695 else
@@ -752,19 +868,22 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
752 * Returns 1 if it gives up entirely and the original cpu mask is to be 868 * Returns 1 if it gives up entirely and the original cpu mask is to be
753 * returned to the kernel. 869 * returned to the kernel.
754 */ 870 */
755int uv_flush_send_and_wait(struct bau_desc *bau_desc, 871int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
756 struct cpumask *flush_mask, struct bau_control *bcp)
757{ 872{
758 int seq_number = 0; 873 int seq_number = 0;
759 int completion_stat = 0; 874 int completion_stat = 0;
875 int uv1 = 0;
760 long try = 0; 876 long try = 0;
761 unsigned long index; 877 unsigned long index;
762 cycles_t time1; 878 cycles_t time1;
763 cycles_t time2; 879 cycles_t time2;
764 struct ptc_stats *stat = bcp->statp; 880 struct ptc_stats *stat = bcp->statp;
765 struct bau_control *hmaster = bcp->uvhub_master; 881 struct bau_control *hmaster = bcp->uvhub_master;
882 struct uv1_bau_msg_header *uv1_hdr = NULL;
883 struct uv2_bau_msg_header *uv2_hdr = NULL;
884 struct bau_desc *bau_desc;
766 885
767 if (is_uv1_hub()) 886 if (bcp->uvhub_version == 1)
768 uv1_throttle(hmaster, stat); 887 uv1_throttle(hmaster, stat);
769 888
770 while (hmaster->uvhub_quiesce) 889 while (hmaster->uvhub_quiesce)
@@ -772,22 +891,39 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
772 891
773 time1 = get_cycles(); 892 time1 = get_cycles();
774 do { 893 do {
775 if (try == 0) { 894 bau_desc = bcp->descriptor_base;
776 bau_desc->header.msg_type = MSG_REGULAR; 895 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
896 if (bcp->uvhub_version == 1) {
897 uv1 = 1;
898 uv1_hdr = &bau_desc->header.uv1_hdr;
899 } else
900 uv2_hdr = &bau_desc->header.uv2_hdr;
901 if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
902 if (uv1)
903 uv1_hdr->msg_type = MSG_REGULAR;
904 else
905 uv2_hdr->msg_type = MSG_REGULAR;
777 seq_number = bcp->message_number++; 906 seq_number = bcp->message_number++;
778 } else { 907 } else {
779 bau_desc->header.msg_type = MSG_RETRY; 908 if (uv1)
909 uv1_hdr->msg_type = MSG_RETRY;
910 else
911 uv2_hdr->msg_type = MSG_RETRY;
780 stat->s_retry_messages++; 912 stat->s_retry_messages++;
781 } 913 }
782 914
783 bau_desc->header.sequence = seq_number; 915 if (uv1)
784 index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; 916 uv1_hdr->sequence = seq_number;
917 else
918 uv2_hdr->sequence = seq_number;
919 index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
785 bcp->send_message = get_cycles(); 920 bcp->send_message = get_cycles();
786 921
787 write_mmr_activation(index); 922 write_mmr_activation(index);
788 923
789 try++; 924 try++;
790 completion_stat = wait_completion(bau_desc, bcp, try); 925 completion_stat = wait_completion(bau_desc, bcp, try);
926 /* UV2: wait_completion() may change the bcp->using_desc */
791 927
792 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); 928 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
793 929
@@ -798,6 +934,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
798 } 934 }
799 cpu_relax(); 935 cpu_relax();
800 } while ((completion_stat == FLUSH_RETRY_PLUGGED) || 936 } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
937 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
801 (completion_stat == FLUSH_RETRY_TIMEOUT)); 938 (completion_stat == FLUSH_RETRY_TIMEOUT));
802 939
803 time2 = get_cycles(); 940 time2 = get_cycles();
@@ -812,6 +949,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
812 record_send_stats(time1, time2, bcp, stat, completion_stat, try); 949 record_send_stats(time1, time2, bcp, stat, completion_stat, try);
813 950
814 if (completion_stat == FLUSH_GIVEUP) 951 if (completion_stat == FLUSH_GIVEUP)
952 /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
815 return 1; 953 return 1;
816 return 0; 954 return 0;
817} 955}
@@ -967,7 +1105,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
967 stat->s_ntargself++; 1105 stat->s_ntargself++;
968 1106
969 bau_desc = bcp->descriptor_base; 1107 bau_desc = bcp->descriptor_base;
970 bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu; 1108 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
971 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 1109 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
972 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) 1110 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
973 return NULL; 1111 return NULL;
@@ -980,13 +1118,86 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
980 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1118 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
981 * or 1 if it gave up and the original cpumask should be returned. 1119 * or 1 if it gave up and the original cpumask should be returned.
982 */ 1120 */
983 if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) 1121 if (!uv_flush_send_and_wait(flush_mask, bcp))
984 return NULL; 1122 return NULL;
985 else 1123 else
986 return cpumask; 1124 return cpumask;
987} 1125}
988 1126
989/* 1127/*
1128 * Search the message queue for any 'other' message with the same software
1129 * acknowledge resource bit vector.
1130 */
1131struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1132 struct bau_control *bcp, unsigned char swack_vec)
1133{
1134 struct bau_pq_entry *msg_next = msg + 1;
1135
1136 if (msg_next > bcp->queue_last)
1137 msg_next = bcp->queue_first;
1138 while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
1139 if (msg_next->swack_vec == swack_vec)
1140 return msg_next;
1141 msg_next++;
1142 if (msg_next > bcp->queue_last)
1143 msg_next = bcp->queue_first;
1144 }
1145 return NULL;
1146}
1147
1148/*
1149 * UV2 needs to work around a bug in which an arriving message has not
1150 * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
1151 * Such a message must be ignored.
1152 */
1153void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
1154{
1155 unsigned long mmr_image;
1156 unsigned char swack_vec;
1157 struct bau_pq_entry *msg = mdp->msg;
1158 struct bau_pq_entry *other_msg;
1159
1160 mmr_image = read_mmr_sw_ack();
1161 swack_vec = msg->swack_vec;
1162
1163 if ((swack_vec & mmr_image) == 0) {
1164 /*
1165 * This message was assigned a swack resource, but no
1166 * reserved acknowlegment is pending.
1167 * The bug has prevented this message from setting the MMR.
1168 * And no other message has used the same sw_ack resource.
1169 * Do the requested shootdown but do not reply to the msg.
1170 * (the 0 means make no acknowledge)
1171 */
1172 bau_process_message(mdp, bcp, 0);
1173 return;
1174 }
1175
1176 /*
1177 * Some message has set the MMR 'pending' bit; it might have been
1178 * another message. Look for that message.
1179 */
1180 other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
1181 if (other_msg) {
1182 /* There is another. Do not ack the current one. */
1183 bau_process_message(mdp, bcp, 0);
1184 /*
1185 * Let the natural processing of that message acknowledge
1186 * it. Don't get the processing of sw_ack's out of order.
1187 */
1188 return;
1189 }
1190
1191 /*
1192 * There is no other message using this sw_ack, so it is safe to
1193 * acknowledge it.
1194 */
1195 bau_process_message(mdp, bcp, 1);
1196
1197 return;
1198}
1199
1200/*
990 * The BAU message interrupt comes here. (registered by set_intr_gate) 1201 * The BAU message interrupt comes here. (registered by set_intr_gate)
991 * See entry_64.S 1202 * See entry_64.S
992 * 1203 *
@@ -1009,6 +1220,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1009 struct ptc_stats *stat; 1220 struct ptc_stats *stat;
1010 struct msg_desc msgdesc; 1221 struct msg_desc msgdesc;
1011 1222
1223 ack_APIC_irq();
1012 time_start = get_cycles(); 1224 time_start = get_cycles();
1013 1225
1014 bcp = &per_cpu(bau_control, smp_processor_id()); 1226 bcp = &per_cpu(bau_control, smp_processor_id());
@@ -1022,9 +1234,11 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1022 count++; 1234 count++;
1023 1235
1024 msgdesc.msg_slot = msg - msgdesc.queue_first; 1236 msgdesc.msg_slot = msg - msgdesc.queue_first;
1025 msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
1026 msgdesc.msg = msg; 1237 msgdesc.msg = msg;
1027 bau_process_message(&msgdesc, bcp); 1238 if (bcp->uvhub_version == 2)
1239 process_uv2_message(&msgdesc, bcp);
1240 else
1241 bau_process_message(&msgdesc, bcp, 1);
1028 1242
1029 msg++; 1243 msg++;
1030 if (msg > msgdesc.queue_last) 1244 if (msg > msgdesc.queue_last)
@@ -1036,8 +1250,6 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1036 stat->d_nomsg++; 1250 stat->d_nomsg++;
1037 else if (count > 1) 1251 else if (count > 1)
1038 stat->d_multmsg++; 1252 stat->d_multmsg++;
1039
1040 ack_APIC_irq();
1041} 1253}
1042 1254
1043/* 1255/*
@@ -1083,7 +1295,7 @@ static void __init enable_timeouts(void)
1083 */ 1295 */
1084 mmr_image |= (1L << SOFTACK_MSHIFT); 1296 mmr_image |= (1L << SOFTACK_MSHIFT);
1085 if (is_uv2_hub()) { 1297 if (is_uv2_hub()) {
1086 mmr_image |= (1L << UV2_LEG_SHFT); 1298 mmr_image &= ~(1L << UV2_LEG_SHFT);
1087 mmr_image |= (1L << UV2_EXT_SHFT); 1299 mmr_image |= (1L << UV2_EXT_SHFT);
1088 } 1300 }
1089 write_mmr_misc_control(pnode, mmr_image); 1301 write_mmr_misc_control(pnode, mmr_image);
@@ -1136,13 +1348,13 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1136 seq_printf(file, 1348 seq_printf(file,
1137 "remotehub numuvhubs numuvhubs16 numuvhubs8 "); 1349 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
1138 seq_printf(file, 1350 seq_printf(file,
1139 "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok "); 1351 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
1140 seq_printf(file, 1352 seq_printf(file,
1141 "resetp resett giveup sto bz throt swack recv rtime "); 1353 "resetp resett giveup sto bz throt swack recv rtime ");
1142 seq_printf(file, 1354 seq_printf(file,
1143 "all one mult none retry canc nocan reset rcan "); 1355 "all one mult none retry canc nocan reset rcan ");
1144 seq_printf(file, 1356 seq_printf(file,
1145 "disable enable\n"); 1357 "disable enable wars warshw warwaits\n");
1146 } 1358 }
1147 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 1359 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
1148 stat = &per_cpu(ptcstats, cpu); 1360 stat = &per_cpu(ptcstats, cpu);
@@ -1154,10 +1366,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1154 stat->s_ntargremotes, stat->s_ntargcpu, 1366 stat->s_ntargremotes, stat->s_ntargcpu,
1155 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, 1367 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
1156 stat->s_ntarguvhub, stat->s_ntarguvhub16); 1368 stat->s_ntarguvhub, stat->s_ntarguvhub16);
1157 seq_printf(file, "%ld %ld %ld %ld %ld ", 1369 seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
1158 stat->s_ntarguvhub8, stat->s_ntarguvhub4, 1370 stat->s_ntarguvhub8, stat->s_ntarguvhub4,
1159 stat->s_ntarguvhub2, stat->s_ntarguvhub1, 1371 stat->s_ntarguvhub2, stat->s_ntarguvhub1,
1160 stat->s_dtimeout); 1372 stat->s_dtimeout, stat->s_strongnacks);
1161 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", 1373 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
1162 stat->s_retry_messages, stat->s_retriesok, 1374 stat->s_retry_messages, stat->s_retriesok,
1163 stat->s_resets_plug, stat->s_resets_timeout, 1375 stat->s_resets_plug, stat->s_resets_timeout,
@@ -1173,8 +1385,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1173 stat->d_nomsg, stat->d_retries, stat->d_canceled, 1385 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1174 stat->d_nocanceled, stat->d_resets, 1386 stat->d_nocanceled, stat->d_resets,
1175 stat->d_rcanceled); 1387 stat->d_rcanceled);
1176 seq_printf(file, "%ld %ld\n", 1388 seq_printf(file, "%ld %ld %ld %ld %ld\n",
1177 stat->s_bau_disabled, stat->s_bau_reenabled); 1389 stat->s_bau_disabled, stat->s_bau_reenabled,
1390 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1391 stat->s_uv2_war_waits);
1178 } 1392 }
1179 return 0; 1393 return 0;
1180} 1394}
@@ -1432,12 +1646,15 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1432{ 1646{
1433 int i; 1647 int i;
1434 int cpu; 1648 int cpu;
1649 int uv1 = 0;
1435 unsigned long gpa; 1650 unsigned long gpa;
1436 unsigned long m; 1651 unsigned long m;
1437 unsigned long n; 1652 unsigned long n;
1438 size_t dsize; 1653 size_t dsize;
1439 struct bau_desc *bau_desc; 1654 struct bau_desc *bau_desc;
1440 struct bau_desc *bd2; 1655 struct bau_desc *bd2;
1656 struct uv1_bau_msg_header *uv1_hdr;
1657 struct uv2_bau_msg_header *uv2_hdr;
1441 struct bau_control *bcp; 1658 struct bau_control *bcp;
1442 1659
1443 /* 1660 /*
@@ -1451,6 +1668,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1451 gpa = uv_gpa(bau_desc); 1668 gpa = uv_gpa(bau_desc);
1452 n = uv_gpa_to_gnode(gpa); 1669 n = uv_gpa_to_gnode(gpa);
1453 m = uv_gpa_to_offset(gpa); 1670 m = uv_gpa_to_offset(gpa);
1671 if (is_uv1_hub())
1672 uv1 = 1;
1454 1673
1455 /* the 14-bit pnode */ 1674 /* the 14-bit pnode */
1456 write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); 1675 write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1461,21 +1680,33 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1461 */ 1680 */
1462 for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { 1681 for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
1463 memset(bd2, 0, sizeof(struct bau_desc)); 1682 memset(bd2, 0, sizeof(struct bau_desc));
1464 bd2->header.swack_flag = 1; 1683 if (uv1) {
1465 /* 1684 uv1_hdr = &bd2->header.uv1_hdr;
1466 * The base_dest_nasid set in the message header is the nasid 1685 uv1_hdr->swack_flag = 1;
1467 * of the first uvhub in the partition. The bit map will 1686 /*
1468 * indicate destination pnode numbers relative to that base. 1687 * The base_dest_nasid set in the message header
1469 * They may not be consecutive if nasid striding is being used. 1688 * is the nasid of the first uvhub in the partition.
1470 */ 1689 * The bit map will indicate destination pnode numbers
1471 bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); 1690 * relative to that base. They may not be consecutive
1472 bd2->header.dest_subnodeid = UV_LB_SUBNODEID; 1691 * if nasid striding is being used.
1473 bd2->header.command = UV_NET_ENDPOINT_INTD; 1692 */
1474 bd2->header.int_both = 1; 1693 uv1_hdr->base_dest_nasid =
1475 /* 1694 UV_PNODE_TO_NASID(base_pnode);
1476 * all others need to be set to zero: 1695 uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
1477 * fairness chaining multilevel count replied_to 1696 uv1_hdr->command = UV_NET_ENDPOINT_INTD;
1478 */ 1697 uv1_hdr->int_both = 1;
1698 /*
1699 * all others need to be set to zero:
1700 * fairness chaining multilevel count replied_to
1701 */
1702 } else {
1703 uv2_hdr = &bd2->header.uv2_hdr;
1704 uv2_hdr->swack_flag = 1;
1705 uv2_hdr->base_dest_nasid =
1706 UV_PNODE_TO_NASID(base_pnode);
1707 uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID;
1708 uv2_hdr->command = UV_NET_ENDPOINT_INTD;
1709 }
1479 } 1710 }
1480 for_each_present_cpu(cpu) { 1711 for_each_present_cpu(cpu) {
1481 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) 1712 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
@@ -1531,6 +1762,7 @@ static void pq_init(int node, int pnode)
1531 write_mmr_payload_first(pnode, pn_first); 1762 write_mmr_payload_first(pnode, pn_first);
1532 write_mmr_payload_tail(pnode, first); 1763 write_mmr_payload_tail(pnode, first);
1533 write_mmr_payload_last(pnode, last); 1764 write_mmr_payload_last(pnode, last);
1765 write_gmmr_sw_ack(pnode, 0xffffUL);
1534 1766
1535 /* in effect, all msg_type's are set to MSG_NOOP */ 1767 /* in effect, all msg_type's are set to MSG_NOOP */
1536 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); 1768 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
@@ -1584,14 +1816,14 @@ static int calculate_destination_timeout(void)
1584 ts_ns = base * mult1 * mult2; 1816 ts_ns = base * mult1 * mult2;
1585 ret = ts_ns / 1000; 1817 ret = ts_ns / 1000;
1586 } else { 1818 } else {
1587 /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ 1819 /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
1588 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); 1820 mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
1589 mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; 1821 mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
1590 if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) 1822 if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
1591 mult1 = 80; 1823 base = 80;
1592 else 1824 else
1593 mult1 = 10; 1825 base = 10;
1594 base = mmr_image & UV2_ACK_MASK; 1826 mult1 = mmr_image & UV2_ACK_MASK;
1595 ret = mult1 * base; 1827 ret = mult1 * base;
1596 } 1828 }
1597 return ret; 1829 return ret;
@@ -1618,6 +1850,7 @@ static void __init init_per_cpu_tunables(void)
1618 bcp->cong_response_us = congested_respns_us; 1850 bcp->cong_response_us = congested_respns_us;
1619 bcp->cong_reps = congested_reps; 1851 bcp->cong_reps = congested_reps;
1620 bcp->cong_period = congested_period; 1852 bcp->cong_period = congested_period;
1853 bcp->clocks_per_100_usec = usec_2_cycles(100);
1621 } 1854 }
1622} 1855}
1623 1856
@@ -1728,8 +1961,17 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
1728 bcp->cpus_in_socket = sdp->num_cpus; 1961 bcp->cpus_in_socket = sdp->num_cpus;
1729 bcp->socket_master = *smasterp; 1962 bcp->socket_master = *smasterp;
1730 bcp->uvhub = bdp->uvhub; 1963 bcp->uvhub = bdp->uvhub;
1964 if (is_uv1_hub())
1965 bcp->uvhub_version = 1;
1966 else if (is_uv2_hub())
1967 bcp->uvhub_version = 2;
1968 else {
1969 printk(KERN_EMERG "uvhub version not 1 or 2\n");
1970 return 1;
1971 }
1731 bcp->uvhub_master = *hmasterp; 1972 bcp->uvhub_master = *hmasterp;
1732 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; 1973 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
1974 bcp->using_desc = bcp->uvhub_cpu;
1733 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1975 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1734 printk(KERN_EMERG "%d cpus per uvhub invalid\n", 1976 printk(KERN_EMERG "%d cpus per uvhub invalid\n",
1735 bcp->uvhub_cpu); 1977 bcp->uvhub_cpu);
@@ -1845,6 +2087,8 @@ static int __init uv_bau_init(void)
1845 uv_base_pnode = uv_blade_to_pnode(uvhub); 2087 uv_base_pnode = uv_blade_to_pnode(uvhub);
1846 } 2088 }
1847 2089
2090 enable_timeouts();
2091
1848 if (init_per_cpu(nuvhubs, uv_base_pnode)) { 2092 if (init_per_cpu(nuvhubs, uv_base_pnode)) {
1849 nobau = 1; 2093 nobau = 1;
1850 return 0; 2094 return 0;
@@ -1855,7 +2099,6 @@ static int __init uv_bau_init(void)
1855 if (uv_blade_nr_possible_cpus(uvhub)) 2099 if (uv_blade_nr_possible_cpus(uvhub))
1856 init_uvhub(uvhub, vector, uv_base_pnode); 2100 init_uvhub(uvhub, vector, uv_base_pnode);
1857 2101
1858 enable_timeouts();
1859 alloc_intr_gate(vector, uv_bau_message_intr1); 2102 alloc_intr_gate(vector, uv_bau_message_intr1);
1860 2103
1861 for_each_possible_blade(uvhub) { 2104 for_each_possible_blade(uvhub) {
@@ -1867,7 +2110,8 @@ static int __init uv_bau_init(void)
1867 val = 1L << 63; 2110 val = 1L << 63;
1868 write_gmmr_activation(pnode, val); 2111 write_gmmr_activation(pnode, val);
1869 mmr = 1; /* should be 1 to broadcast to both sockets */ 2112 mmr = 1; /* should be 1 to broadcast to both sockets */
1870 write_mmr_data_broadcast(pnode, mmr); 2113 if (!is_uv1_hub())
2114 write_mmr_data_broadcast(pnode, mmr);
1871 } 2115 }
1872 } 2116 }
1873 2117
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index d76ca6ae2b1b..121f77cfef76 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -77,6 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
77 steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; 77 steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
78 guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; 78 guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
79 guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; 79 guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
80 sum += kstat_cpu_irqs_sum(i);
81 sum += arch_irq_stat_cpu(i);
80 82
81 for (j = 0; j < NR_SOFTIRQS; j++) { 83 for (j = 0; j < NR_SOFTIRQS; j++) {
82 unsigned int softirq_stat = kstat_softirqs_cpu(j, i); 84 unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index db110b8ae030..f1539decd99d 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -634,10 +634,11 @@ static int tracepoint_module_coming(struct module *mod)
634 int ret = 0; 634 int ret = 0;
635 635
636 /* 636 /*
637 * We skip modules that tain the kernel, especially those with different 637 * We skip modules that taint the kernel, especially those with different
638 * module header (for forced load), to make sure we don't cause a crash. 638 * module headers (for forced load), to make sure we don't cause a crash.
639 * Staging and out-of-tree GPL modules are fine.
639 */ 640 */
640 if (mod->taints) 641 if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)))
641 return 0; 642 return 0;
642 mutex_lock(&tracepoints_mutex); 643 mutex_lock(&tracepoints_mutex);
643 tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); 644 tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);