aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-07-13 19:03:42 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-07-16 07:04:40 -0400
commit4f0234f4f9da485ecb9729af1b88567700fd4767 (patch)
tree7073115c86dbf4e691ddac12f5c9ce1c58ce53be /arch
parentb3e13fbeb9ac1eb8e7b0791bf56e1775c692972b (diff)
[SPARC64]: Initial LDOM cpu hotplug support.
Only adding cpus is supports at the moment, removal will come next. When new cpus are configured, the machine description is updated. When we get the configure request we pass in a cpu mask of to-be-added cpus to the mdesc CPU node parser so it only fetches information for those cpus. That code also proceeds to update the SMT/multi-core scheduling bitmaps. cpu_up() does all the work and we return the status back over the DS channel. CPUs via dr-cpu need to be booted straight out of the hypervisor, and this requires: 1) A new trampoline mechanism. CPUs are booted straight out of the hypervisor with MMU disabled and running in physical addresses with no mappings installed in the TLB. The new hvtramp.S code sets up the critical cpu state, installs the locked TLB mappings for the kernel, and turns the MMU on. It then proceeds to follow the logic of the existing trampoline.S SMP cpu bringup code. 2) All calls into OBP have to be disallowed when domaining is enabled. Since cpus boot straight into the kernel from the hypervisor, OBP has no state about that cpu and therefore cannot handle being invoked on that cpu. Luckily it's only a handful of interfaces which can be called after the OBP device tree is obtained. For example, rebooting, halting, powering-off, and setting options node variables. CPU removal support will require some infrastructure changes here. Namely we'll have to process the requests via a true kernel thread instead of in a workqueue. workqueues run on a per-cpu thread, but when unconfiguring we might need to force the thread to execute on another cpu if the current cpu is the one being removed. Removal of a cpu also causes the kernel to destroy that cpu's workqueue running thread. Another issue on removal is that we may have interrupts still pointing to the cpu-to-be-removed. So new code will be needed to walk the active INO list and retarget those cpus as-needed. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/Kconfig10
-rw-r--r--arch/sparc64/kernel/Makefile3
-rw-r--r--arch/sparc64/kernel/ds.c514
-rw-r--r--arch/sparc64/kernel/hvtramp.S139
-rw-r--r--arch/sparc64/kernel/mdesc.c53
-rw-r--r--arch/sparc64/kernel/prom.c2
-rw-r--r--arch/sparc64/kernel/smp.c55
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c4
-rw-r--r--arch/sparc64/prom/misc.c8
-rw-r--r--arch/sparc64/prom/p1275.c1
10 files changed, 666 insertions, 123 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index af59daa81058..3c2e3397caf8 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -108,6 +108,15 @@ config SECCOMP
108 108
109source kernel/Kconfig.hz 109source kernel/Kconfig.hz
110 110
111config HOTPLUG_CPU
112 bool "Support for hot-pluggable CPUs"
113 depends on SMP
114 select HOTPLUG
115 ---help---
116 Say Y here to experiment with turning CPUs off and on. CPUs
117 can be controlled through /sys/devices/system/cpu/cpu#.
118 Say N if you want to disable CPU hotplug.
119
111source "init/Kconfig" 120source "init/Kconfig"
112 121
113config SYSVIPC_COMPAT 122config SYSVIPC_COMPAT
@@ -307,6 +316,7 @@ config SUN_IO
307 316
308config SUN_LDOMS 317config SUN_LDOMS
309 bool "Sun Logical Domains support" 318 bool "Sun Logical Domains support"
319 select HOTPLUG_CPU
310 help 320 help
311 Say Y here is you want to support virtual devices via 321 Say Y here is you want to support virtual devices via
312 Logical Domains. 322 Logical Domains.
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 70e6c501392a..62db93c148cd 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -12,7 +12,8 @@ obj-y := process.o setup.o cpu.o idprom.o \
12 irq.o ptrace.o time.o sys_sparc.o signal.o \ 12 irq.o ptrace.o time.o sys_sparc.o signal.o \
13 unaligned.o central.o pci.o starfire.o semaphore.o \ 13 unaligned.o central.o pci.o starfire.o semaphore.o \
14 power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \ 14 power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \
15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o 15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o \
16 hvtramp.o
16 17
17obj-$(CONFIG_STACKTRACE) += stacktrace.o 18obj-$(CONFIG_STACKTRACE) += stacktrace.o
18obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ 19obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c
index 4e20ef232c51..b82c03a25d9c 100644
--- a/arch/sparc64/kernel/ds.c
+++ b/arch/sparc64/kernel/ds.c
@@ -12,11 +12,16 @@
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/workqueue.h>
16#include <linux/cpu.h>
15 17
16#include <asm/ldc.h> 18#include <asm/ldc.h>
17#include <asm/vio.h> 19#include <asm/vio.h>
18#include <asm/power.h> 20#include <asm/power.h>
19#include <asm/mdesc.h> 21#include <asm/mdesc.h>
22#include <asm/head.h>
23#include <asm/io.h>
24#include <asm/hvtramp.h>
20 25
21#define DRV_MODULE_NAME "ds" 26#define DRV_MODULE_NAME "ds"
22#define PFX DRV_MODULE_NAME ": " 27#define PFX DRV_MODULE_NAME ": "
@@ -124,7 +129,7 @@ struct ds_cap_state {
124 __u64 handle; 129 __u64 handle;
125 130
126 void (*data)(struct ldc_channel *lp, 131 void (*data)(struct ldc_channel *lp,
127 struct ds_cap_state *dp, 132 struct ds_cap_state *cp,
128 void *buf, int len); 133 void *buf, int len);
129 134
130 const char *service_id; 135 const char *service_id;
@@ -135,6 +140,91 @@ struct ds_cap_state {
135#define CAP_STATE_REGISTERED 0x02 140#define CAP_STATE_REGISTERED 0x02
136}; 141};
137 142
143static void md_update_data(struct ldc_channel *lp, struct ds_cap_state *cp,
144 void *buf, int len);
145static void domain_shutdown_data(struct ldc_channel *lp,
146 struct ds_cap_state *cp,
147 void *buf, int len);
148static void domain_panic_data(struct ldc_channel *lp,
149 struct ds_cap_state *cp,
150 void *buf, int len);
151static void dr_cpu_data(struct ldc_channel *lp,
152 struct ds_cap_state *cp,
153 void *buf, int len);
154static void ds_pri_data(struct ldc_channel *lp,
155 struct ds_cap_state *cp,
156 void *buf, int len);
157static void ds_var_data(struct ldc_channel *lp,
158 struct ds_cap_state *cp,
159 void *buf, int len);
160
161struct ds_cap_state ds_states[] = {
162 {
163 .service_id = "md-update",
164 .data = md_update_data,
165 },
166 {
167 .service_id = "domain-shutdown",
168 .data = domain_shutdown_data,
169 },
170 {
171 .service_id = "domain-panic",
172 .data = domain_panic_data,
173 },
174 {
175 .service_id = "dr-cpu",
176 .data = dr_cpu_data,
177 },
178 {
179 .service_id = "pri",
180 .data = ds_pri_data,
181 },
182 {
183 .service_id = "var-config",
184 .data = ds_var_data,
185 },
186 {
187 .service_id = "var-config-backup",
188 .data = ds_var_data,
189 },
190};
191
192static DEFINE_SPINLOCK(ds_lock);
193
194struct ds_info {
195 struct ldc_channel *lp;
196 u8 hs_state;
197#define DS_HS_START 0x01
198#define DS_HS_DONE 0x02
199
200 void *rcv_buf;
201 int rcv_buf_len;
202};
203
204static struct ds_info *ds_info;
205
206static struct ds_cap_state *find_cap(u64 handle)
207{
208 unsigned int index = handle >> 32;
209
210 if (index >= ARRAY_SIZE(ds_states))
211 return NULL;
212 return &ds_states[index];
213}
214
215static struct ds_cap_state *find_cap_by_string(const char *name)
216{
217 int i;
218
219 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
220 if (strcmp(ds_states[i].service_id, name))
221 continue;
222
223 return &ds_states[i];
224 }
225 return NULL;
226}
227
138static int ds_send(struct ldc_channel *lp, void *data, int len) 228static int ds_send(struct ldc_channel *lp, void *data, int len)
139{ 229{
140 int err, limit = 1000; 230 int err, limit = 1000;
@@ -265,36 +355,354 @@ static void domain_panic_data(struct ldc_channel *lp,
265 panic("PANIC requested by LDOM manager."); 355 panic("PANIC requested by LDOM manager.");
266} 356}
267 357
268struct ds_cpu_tag { 358struct dr_cpu_tag {
269 __u64 req_num; 359 __u64 req_num;
270 __u32 type; 360 __u32 type;
271#define DS_CPU_CONFIGURE 0x43 361#define DR_CPU_CONFIGURE 0x43
272#define DS_CPU_UNCONFIGURE 0x55 362#define DR_CPU_UNCONFIGURE 0x55
273#define DS_CPU_FORCE_UNCONFIGURE 0x46 363#define DR_CPU_FORCE_UNCONFIGURE 0x46
274#define DS_CPU_STATUS 0x53 364#define DR_CPU_STATUS 0x53
275 365
276/* Responses */ 366/* Responses */
277#define DS_CPU_OK 0x6f 367#define DR_CPU_OK 0x6f
278#define DS_CPU_ERROR 0x65 368#define DR_CPU_ERROR 0x65
279 369
280 __u32 num_records; 370 __u32 num_records;
281}; 371};
282 372
283struct ds_cpu_record { 373struct dr_cpu_resp_entry {
284 __u32 cpu_id; 374 __u32 cpu;
375 __u32 result;
376#define DR_CPU_RES_OK 0x00
377#define DR_CPU_RES_FAILURE 0x01
378#define DR_CPU_RES_BLOCKED 0x02
379#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
380#define DR_CPU_RES_NOT_IN_MD 0x04
381
382 __u32 stat;
383#define DR_CPU_STAT_NOT_PRESENT 0x00
384#define DR_CPU_STAT_UNCONFIGURED 0x01
385#define DR_CPU_STAT_CONFIGURED 0x02
386
387 __u32 str_off;
285}; 388};
286 389
390/* XXX Put this in some common place. XXX */
391static unsigned long kimage_addr_to_ra(void *p)
392{
393 unsigned long val = (unsigned long) p;
394
395 return kern_base + (val - KERNBASE);
396}
397
398void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
399{
400 extern unsigned long sparc64_ttable_tl0;
401 extern unsigned long kern_locked_tte_data;
402 extern int bigkernel;
403 struct hvtramp_descr *hdesc;
404 unsigned long trampoline_ra;
405 struct trap_per_cpu *tb;
406 u64 tte_vaddr, tte_data;
407 unsigned long hv_err;
408
409 hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
410 if (!hdesc) {
411 printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate "
412 "hvtramp_descr.\n");
413 return;
414 }
415
416 hdesc->cpu = cpu;
417 hdesc->num_mappings = (bigkernel ? 2 : 1);
418
419 tb = &trap_block[cpu];
420 tb->hdesc = hdesc;
421
422 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
423 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
424
425 hdesc->thread_reg = thread_reg;
426
427 tte_vaddr = (unsigned long) KERNBASE;
428 tte_data = kern_locked_tte_data;
429
430 hdesc->maps[0].vaddr = tte_vaddr;
431 hdesc->maps[0].tte = tte_data;
432 if (bigkernel) {
433 tte_vaddr += 0x400000;
434 tte_data += 0x400000;
435 hdesc->maps[1].vaddr = tte_vaddr;
436 hdesc->maps[1].tte = tte_data;
437 }
438
439 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
440
441 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
442 kimage_addr_to_ra(&sparc64_ttable_tl0),
443 __pa(hdesc));
444}
445
446/* DR cpu requests get queued onto the work list by the
447 * dr_cpu_data() callback. The list is protected by
448 * ds_lock, and processed by dr_cpu_process() in order.
449 */
450static LIST_HEAD(dr_cpu_work_list);
451
452struct dr_cpu_queue_entry {
453 struct list_head list;
454 char req[0];
455};
456
457static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
458{
459 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
460 struct ds_info *dp = ds_info;
461 struct {
462 struct ds_data data;
463 struct dr_cpu_tag tag;
464 } pkt;
465 int msg_len;
466
467 memset(&pkt, 0, sizeof(pkt));
468 pkt.data.tag.type = DS_DATA;
469 pkt.data.handle = cp->handle;
470 pkt.tag.req_num = tag->req_num;
471 pkt.tag.type = DR_CPU_ERROR;
472 pkt.tag.num_records = 0;
473
474 msg_len = (sizeof(struct ds_data) +
475 sizeof(struct dr_cpu_tag));
476
477 pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
478
479 ds_send(dp->lp, &pkt, msg_len);
480}
481
482static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
483{
484 unsigned long flags;
485
486 spin_lock_irqsave(&ds_lock, flags);
487 __dr_cpu_send_error(cp, data);
488 spin_unlock_irqrestore(&ds_lock, flags);
489}
490
491#define CPU_SENTINEL 0xffffffff
492
493static void purge_dups(u32 *list, u32 num_ents)
494{
495 unsigned int i;
496
497 for (i = 0; i < num_ents; i++) {
498 u32 cpu = list[i];
499 unsigned int j;
500
501 if (cpu == CPU_SENTINEL)
502 continue;
503
504 for (j = i + 1; j < num_ents; j++) {
505 if (list[j] == cpu)
506 list[j] = CPU_SENTINEL;
507 }
508 }
509}
510
511static int dr_cpu_size_response(int ncpus)
512{
513 return (sizeof(struct ds_data) +
514 sizeof(struct dr_cpu_tag) +
515 (sizeof(struct dr_cpu_resp_entry) * ncpus));
516}
517
518static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
519 u64 handle, int resp_len, int ncpus,
520 cpumask_t *mask, u32 default_stat)
521{
522 struct dr_cpu_resp_entry *ent;
523 struct dr_cpu_tag *tag;
524 int i, cpu;
525
526 tag = (struct dr_cpu_tag *) (resp + 1);
527 ent = (struct dr_cpu_resp_entry *) (tag + 1);
528
529 resp->tag.type = DS_DATA;
530 resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
531 resp->handle = handle;
532 tag->req_num = req_num;
533 tag->type = DR_CPU_OK;
534 tag->num_records = ncpus;
535
536 i = 0;
537 for_each_cpu_mask(cpu, *mask) {
538 ent[i].cpu = cpu;
539 ent[i].result = DR_CPU_RES_OK;
540 ent[i].stat = default_stat;
541 i++;
542 }
543 BUG_ON(i != ncpus);
544}
545
546static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
547 u32 res, u32 stat)
548{
549 struct dr_cpu_resp_entry *ent;
550 struct dr_cpu_tag *tag;
551 int i;
552
553 tag = (struct dr_cpu_tag *) (resp + 1);
554 ent = (struct dr_cpu_resp_entry *) (tag + 1);
555
556 for (i = 0; i < ncpus; i++) {
557 if (ent[i].cpu != cpu)
558 continue;
559 ent[i].result = res;
560 ent[i].stat = stat;
561 break;
562 }
563}
564
565static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num,
566 cpumask_t *mask)
567{
568 struct ds_data *resp;
569 int resp_len, ncpus, cpu;
570 unsigned long flags;
571
572 ncpus = cpus_weight(*mask);
573 resp_len = dr_cpu_size_response(ncpus);
574 resp = kzalloc(resp_len, GFP_KERNEL);
575 if (!resp)
576 return -ENOMEM;
577
578 dr_cpu_init_response(resp, req_num, cp->handle,
579 resp_len, ncpus, mask,
580 DR_CPU_STAT_CONFIGURED);
581
582 mdesc_fill_in_cpu_data(*mask);
583
584 for_each_cpu_mask(cpu, *mask) {
585 int err;
586
587 printk(KERN_INFO PFX "Starting cpu %d...\n", cpu);
588 err = cpu_up(cpu);
589 if (err)
590 dr_cpu_mark(resp, cpu, ncpus,
591 DR_CPU_RES_FAILURE,
592 DR_CPU_STAT_UNCONFIGURED);
593 }
594
595 spin_lock_irqsave(&ds_lock, flags);
596 ds_send(ds_info->lp, resp, resp_len);
597 spin_unlock_irqrestore(&ds_lock, flags);
598
599 kfree(resp);
600
601 return 0;
602}
603
604static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
605 cpumask_t *mask)
606{
607 struct ds_data *resp;
608 int resp_len, ncpus;
609
610 ncpus = cpus_weight(*mask);
611 resp_len = dr_cpu_size_response(ncpus);
612 resp = kzalloc(resp_len, GFP_KERNEL);
613 if (!resp)
614 return -ENOMEM;
615
616 dr_cpu_init_response(resp, req_num, cp->handle,
617 resp_len, ncpus, mask,
618 DR_CPU_STAT_UNCONFIGURED);
619
620 kfree(resp);
621
622 return -EOPNOTSUPP;
623}
624
625static void dr_cpu_process(struct work_struct *work)
626{
627 struct dr_cpu_queue_entry *qp, *tmp;
628 struct ds_cap_state *cp;
629 unsigned long flags;
630 LIST_HEAD(todo);
631 cpumask_t mask;
632
633 cp = find_cap_by_string("dr-cpu");
634
635 spin_lock_irqsave(&ds_lock, flags);
636 list_splice(&dr_cpu_work_list, &todo);
637 spin_unlock_irqrestore(&ds_lock, flags);
638
639 list_for_each_entry_safe(qp, tmp, &todo, list) {
640 struct ds_data *data = (struct ds_data *) qp->req;
641 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
642 u32 *cpu_list = (u32 *) (tag + 1);
643 u64 req_num = tag->req_num;
644 unsigned int i;
645 int err;
646
647 switch (tag->type) {
648 case DR_CPU_CONFIGURE:
649 case DR_CPU_UNCONFIGURE:
650 case DR_CPU_FORCE_UNCONFIGURE:
651 break;
652
653 default:
654 dr_cpu_send_error(cp, data);
655 goto next;
656 }
657
658 purge_dups(cpu_list, tag->num_records);
659
660 cpus_clear(mask);
661 for (i = 0; i < tag->num_records; i++) {
662 if (cpu_list[i] == CPU_SENTINEL)
663 continue;
664
665 if (cpu_list[i] < NR_CPUS)
666 cpu_set(cpu_list[i], mask);
667 }
668
669 if (tag->type == DR_CPU_CONFIGURE)
670 err = dr_cpu_configure(cp, req_num, &mask);
671 else
672 err = dr_cpu_unconfigure(cp, req_num, &mask);
673
674 if (err)
675 dr_cpu_send_error(cp, data);
676
677next:
678 list_del(&qp->list);
679 kfree(qp);
680 }
681}
682
683static DECLARE_WORK(dr_cpu_work, dr_cpu_process);
684
287static void dr_cpu_data(struct ldc_channel *lp, 685static void dr_cpu_data(struct ldc_channel *lp,
288 struct ds_cap_state *dp, 686 struct ds_cap_state *dp,
289 void *buf, int len) 687 void *buf, int len)
290{ 688{
689 struct dr_cpu_queue_entry *qp;
291 struct ds_data *dpkt = buf; 690 struct ds_data *dpkt = buf;
292 struct ds_cpu_tag *rp; 691 struct dr_cpu_tag *rp;
293 692
294 rp = (struct ds_cpu_tag *) (dpkt + 1); 693 rp = (struct dr_cpu_tag *) (dpkt + 1);
295 694
296 printk(KERN_ERR PFX "CPU REQ [%lx:%x], len=%d\n", 695 qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC);
297 rp->req_num, rp->type, len); 696 if (!qp) {
697 struct ds_cap_state *cp;
698
699 cp = find_cap_by_string("dr-cpu");
700 __dr_cpu_send_error(cp, dpkt);
701 } else {
702 memcpy(&qp->req, buf, len);
703 list_add_tail(&qp->list, &dr_cpu_work_list);
704 schedule_work(&dr_cpu_work);
705 }
298} 706}
299 707
300struct ds_pri_msg { 708struct ds_pri_msg {
@@ -368,73 +776,6 @@ static void ds_var_data(struct ldc_channel *lp,
368 ds_var_doorbell = 1; 776 ds_var_doorbell = 1;
369} 777}
370 778
371struct ds_cap_state ds_states[] = {
372 {
373 .service_id = "md-update",
374 .data = md_update_data,
375 },
376 {
377 .service_id = "domain-shutdown",
378 .data = domain_shutdown_data,
379 },
380 {
381 .service_id = "domain-panic",
382 .data = domain_panic_data,
383 },
384 {
385 .service_id = "dr-cpu",
386 .data = dr_cpu_data,
387 },
388 {
389 .service_id = "pri",
390 .data = ds_pri_data,
391 },
392 {
393 .service_id = "var-config",
394 .data = ds_var_data,
395 },
396 {
397 .service_id = "var-config-backup",
398 .data = ds_var_data,
399 },
400};
401
402static DEFINE_SPINLOCK(ds_lock);
403
404struct ds_info {
405 struct ldc_channel *lp;
406 u8 hs_state;
407#define DS_HS_START 0x01
408#define DS_HS_DONE 0x02
409
410 void *rcv_buf;
411 int rcv_buf_len;
412};
413
414static struct ds_info *ds_info;
415
416static struct ds_cap_state *find_cap(u64 handle)
417{
418 unsigned int index = handle >> 32;
419
420 if (index >= ARRAY_SIZE(ds_states))
421 return NULL;
422 return &ds_states[index];
423}
424
425static struct ds_cap_state *find_cap_by_string(const char *name)
426{
427 int i;
428
429 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
430 if (strcmp(ds_states[i].service_id, name))
431 continue;
432
433 return &ds_states[i];
434 }
435 return NULL;
436}
437
438void ldom_set_var(const char *var, const char *value) 779void ldom_set_var(const char *var, const char *value)
439{ 780{
440 struct ds_info *dp = ds_info; 781 struct ds_info *dp = ds_info;
@@ -467,8 +808,8 @@ void ldom_set_var(const char *var, const char *value)
467 p += strlen(value) + 1; 808 p += strlen(value) + 1;
468 809
469 msg_len = (sizeof(struct ds_data) + 810 msg_len = (sizeof(struct ds_data) +
470 sizeof(struct ds_var_set_msg) + 811 sizeof(struct ds_var_set_msg) +
471 (p - base)); 812 (p - base));
472 msg_len = (msg_len + 3) & ~3; 813 msg_len = (msg_len + 3) & ~3;
473 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); 814 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
474 815
@@ -520,6 +861,11 @@ void ldom_reboot(const char *boot_command)
520 sun4v_mach_sir(); 861 sun4v_mach_sir();
521} 862}
522 863
864void ldom_power_off(void)
865{
866 sun4v_mach_exit(0);
867}
868
523static void ds_conn_reset(struct ds_info *dp) 869static void ds_conn_reset(struct ds_info *dp)
524{ 870{
525 printk(KERN_ERR PFX "ds_conn_reset() from %p\n", 871 printk(KERN_ERR PFX "ds_conn_reset() from %p\n",
@@ -601,7 +947,7 @@ static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
601 np->handle); 947 np->handle);
602 return 0; 948 return 0;
603 } 949 }
604 printk(KERN_ERR PFX "Could not register %s service\n", 950 printk(KERN_INFO PFX "Could not register %s service\n",
605 cp->service_id); 951 cp->service_id);
606 cp->state = CAP_STATE_UNKNOWN; 952 cp->state = CAP_STATE_UNKNOWN;
607 } 953 }
diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S
new file mode 100644
index 000000000000..76a090e2c2a8
--- /dev/null
+++ b/arch/sparc64/kernel/hvtramp.S
@@ -0,0 +1,139 @@
1/* hvtramp.S: Hypervisor start-cpu trampoline code.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <asm/thread_info.h>
7#include <asm/hypervisor.h>
8#include <asm/scratchpad.h>
9#include <asm/spitfire.h>
10#include <asm/hvtramp.h>
11#include <asm/pstate.h>
12#include <asm/ptrace.h>
13#include <asm/asi.h>
14
15 .text
16 .align 8
17 .globl hv_cpu_startup, hv_cpu_startup_end
18
19 /* This code executes directly out of the hypervisor
20 * with physical addressing (va==pa). %o0 contains
21 * our client argument which for Linux points to
22 * a descriptor data structure which defines the
23 * MMU entries we need to load up.
24 *
25 * After we set things up we enable the MMU and call
26 * into the kernel.
27 *
28 * First setup basic privileged cpu state.
29 */
30hv_cpu_startup:
31 wrpr %g0, 0, %gl
32 wrpr %g0, 15, %pil
33 wrpr %g0, 0, %canrestore
34 wrpr %g0, 0, %otherwin
35 wrpr %g0, 6, %cansave
36 wrpr %g0, 6, %cleanwin
37 wrpr %g0, 0, %cwp
38 wrpr %g0, 0, %wstate
39 wrpr %g0, 0, %tl
40
41 sethi %hi(sparc64_ttable_tl0), %g1
42 wrpr %g1, %tba
43
44 mov %o0, %l0
45
46 lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
47 mov SCRATCHPAD_CPUID, %g2
48 stxa %g1, [%g2] ASI_SCRATCHPAD
49
50 ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
51 stxa %g2, [%g0] ASI_SCRATCHPAD
52
53 mov 0, %l1
54 lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
55 add %l0, HVTRAMP_DESCR_MAPS, %l3
56
571: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
58 clr %o1
59 ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
60 mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
61 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
62 ta HV_FAST_TRAP
63
64 brnz,pn %o0, 80f
65 nop
66
67 add %l1, 1, %l1
68 cmp %l1, %l2
69 blt,a,pt %xcc, 1b
70 add %l3, HVTRAMP_MAPPING_SIZE, %l3
71
72 ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
73 mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
74 ta HV_FAST_TRAP
75
76 brnz,pn %o0, 80f
77 nop
78
79 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
80
81 ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
82
83 mov 1, %o0
84 set 1f, %o1
85 mov HV_FAST_MMU_ENABLE, %o5
86 ta HV_FAST_TRAP
87
88 ba,pt %xcc, 80f
89 nop
90
911:
92 wr %g0, 0, %fprs
93 wr %g0, ASI_P, %asi
94
95 mov PRIMARY_CONTEXT, %g7
96 stxa %g0, [%g7] ASI_MMU
97 membar #Sync
98
99 mov SECONDARY_CONTEXT, %g7
100 stxa %g0, [%g7] ASI_MMU
101 membar #Sync
102
103 mov %l6, %g6
104 ldx [%g6 + TI_TASK], %g4
105
106 mov 1, %g5
107 sllx %g5, THREAD_SHIFT, %g5
108 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
109 add %g6, %g5, %sp
110 mov 0, %fp
111
112 call init_irqwork_curcpu
113 nop
114 call hard_smp_processor_id
115 nop
116
117 mov %o0, %o1
118 mov 0, %o0
119 mov 0, %o2
120 call sun4v_init_mondo_queues
121 mov 1, %o3
122
123 call init_cur_cpu_trap
124 mov %g6, %o0
125
126 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
127
128 call smp_callin
129 nop
130 call cpu_idle
131 mov 0, %o0
132 call cpu_panic
133 nop
134
13580: ba,pt %xcc, 80b
136 nop
137
138 .align 8
139hv_cpu_startup_end:
diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c
index 9e5088d563cc..3f79940a2939 100644
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -434,6 +434,22 @@ static void __init report_platform_properties(void)
434 if (v) 434 if (v)
435 printk("PLATFORM: max-cpus [%lu]\n", *v); 435 printk("PLATFORM: max-cpus [%lu]\n", *v);
436 436
437#ifdef CONFIG_SMP
438 {
439 int max_cpu, i;
440
441 if (v) {
442 max_cpu = *v;
443 if (max_cpu > NR_CPUS)
444 max_cpu = NR_CPUS;
445 } else {
446 max_cpu = NR_CPUS;
447 }
448 for (i = 0; i < max_cpu; i++)
449 cpu_set(i, cpu_possible_map);
450 }
451#endif
452
437 mdesc_release(hp); 453 mdesc_release(hp);
438} 454}
439 455
@@ -451,9 +467,9 @@ static int inline find_in_proplist(const char *list, const char *match, int len)
451 return 0; 467 return 0;
452} 468}
453 469
454static void __init fill_in_one_cache(cpuinfo_sparc *c, 470static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
455 struct mdesc_handle *hp, 471 struct mdesc_handle *hp,
456 u64 mp) 472 u64 mp)
457{ 473{
458 const u64 *level = mdesc_get_property(hp, mp, "level", NULL); 474 const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
459 const u64 *size = mdesc_get_property(hp, mp, "size", NULL); 475 const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
@@ -496,7 +512,8 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c,
496 } 512 }
497} 513}
498 514
499static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) 515static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
516 int core_id)
500{ 517{
501 u64 a; 518 u64 a;
502 519
@@ -529,7 +546,7 @@ static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
529 } 546 }
530} 547}
531 548
532static void __init set_core_ids(struct mdesc_handle *hp) 549static void __devinit set_core_ids(struct mdesc_handle *hp)
533{ 550{
534 int idx; 551 int idx;
535 u64 mp; 552 u64 mp;
@@ -554,7 +571,8 @@ static void __init set_core_ids(struct mdesc_handle *hp)
554 } 571 }
555} 572}
556 573
557static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) 574static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
575 int proc_id)
558{ 576{
559 u64 a; 577 u64 a;
560 578
@@ -573,8 +591,8 @@ static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
573 } 591 }
574} 592}
575 593
576static void __init __set_proc_ids(struct mdesc_handle *hp, 594static void __devinit __set_proc_ids(struct mdesc_handle *hp,
577 const char *exec_unit_name) 595 const char *exec_unit_name)
578{ 596{
579 int idx; 597 int idx;
580 u64 mp; 598 u64 mp;
@@ -595,13 +613,14 @@ static void __init __set_proc_ids(struct mdesc_handle *hp,
595 } 613 }
596} 614}
597 615
598static void __init set_proc_ids(struct mdesc_handle *hp) 616static void __devinit set_proc_ids(struct mdesc_handle *hp)
599{ 617{
600 __set_proc_ids(hp, "exec_unit"); 618 __set_proc_ids(hp, "exec_unit");
601 __set_proc_ids(hp, "exec-unit"); 619 __set_proc_ids(hp, "exec-unit");
602} 620}
603 621
604static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) 622static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
623 unsigned char def)
605{ 624{
606 u64 val; 625 u64 val;
607 626
@@ -619,8 +638,8 @@ use_default:
619 *mask = ((1U << def) * 64U) - 1U; 638 *mask = ((1U << def) * 64U) - 1U;
620} 639}
621 640
622static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp, 641static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
623 struct trap_per_cpu *tb) 642 struct trap_per_cpu *tb)
624{ 643{
625 const u64 *val; 644 const u64 *val;
626 645
@@ -637,7 +656,7 @@ static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp,
637 get_one_mondo_bits(val, &tb->nonresum_qmask, 2); 656 get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
638} 657}
639 658
640static void __init mdesc_fill_in_cpu_data(void) 659void __devinit mdesc_fill_in_cpu_data(cpumask_t mask)
641{ 660{
642 struct mdesc_handle *hp = mdesc_grab(); 661 struct mdesc_handle *hp = mdesc_grab();
643 u64 mp; 662 u64 mp;
@@ -658,6 +677,8 @@ static void __init mdesc_fill_in_cpu_data(void)
658#ifdef CONFIG_SMP 677#ifdef CONFIG_SMP
659 if (cpuid >= NR_CPUS) 678 if (cpuid >= NR_CPUS)
660 continue; 679 continue;
680 if (!cpu_isset(cpuid, mask))
681 continue;
661#else 682#else
662 /* On uniprocessor we only want the values for the 683 /* On uniprocessor we only want the values for the
663 * real physical cpu the kernel booted onto, however 684 * real physical cpu the kernel booted onto, however
@@ -696,7 +717,6 @@ static void __init mdesc_fill_in_cpu_data(void)
696 717
697#ifdef CONFIG_SMP 718#ifdef CONFIG_SMP
698 cpu_set(cpuid, cpu_present_map); 719 cpu_set(cpuid, cpu_present_map);
699 cpu_set(cpuid, phys_cpu_present_map);
700#endif 720#endif
701 721
702 c->core_id = 0; 722 c->core_id = 0;
@@ -719,6 +739,7 @@ void __init sun4v_mdesc_init(void)
719{ 739{
720 struct mdesc_handle *hp; 740 struct mdesc_handle *hp;
721 unsigned long len, real_len, status; 741 unsigned long len, real_len, status;
742 cpumask_t mask;
722 743
723 (void) sun4v_mach_desc(0UL, 0UL, &len); 744 (void) sun4v_mach_desc(0UL, 0UL, &len);
724 745
@@ -742,5 +763,7 @@ void __init sun4v_mdesc_init(void)
742 cur_mdesc = hp; 763 cur_mdesc = hp;
743 764
744 report_platform_properties(); 765 report_platform_properties();
745 mdesc_fill_in_cpu_data(); 766
767 cpus_setall(mask);
768 mdesc_fill_in_cpu_data(mask);
746} 769}
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index 61036b346664..5d220302cd50 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -1808,7 +1808,7 @@ static void __init of_fill_in_cpu_data(void)
1808 1808
1809#ifdef CONFIG_SMP 1809#ifdef CONFIG_SMP
1810 cpu_set(cpuid, cpu_present_map); 1810 cpu_set(cpuid, cpu_present_map);
1811 cpu_set(cpuid, phys_cpu_present_map); 1811 cpu_set(cpuid, cpu_possible_map);
1812#endif 1812#endif
1813 } 1813 }
1814 1814
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 40e40f968d61..315eef0869bd 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -41,6 +41,7 @@
41#include <asm/sections.h> 41#include <asm/sections.h>
42#include <asm/prom.h> 42#include <asm/prom.h>
43#include <asm/mdesc.h> 43#include <asm/mdesc.h>
44#include <asm/ldc.h>
44 45
45extern void calibrate_delay(void); 46extern void calibrate_delay(void);
46 47
@@ -49,12 +50,18 @@ int sparc64_multi_core __read_mostly;
49/* Please don't make this stuff initdata!!! --DaveM */ 50/* Please don't make this stuff initdata!!! --DaveM */
50unsigned char boot_cpu_id; 51unsigned char boot_cpu_id;
51 52
53cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
52cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; 54cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
53cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
54cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = 55cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
55 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 56 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
56cpumask_t cpu_core_map[NR_CPUS] __read_mostly = 57cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
57 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 58 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
59
60EXPORT_SYMBOL(cpu_possible_map);
61EXPORT_SYMBOL(cpu_online_map);
62EXPORT_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
58static cpumask_t smp_commenced_mask; 65static cpumask_t smp_commenced_mask;
59static cpumask_t cpu_callout_map; 66static cpumask_t cpu_callout_map;
60 67
@@ -84,9 +91,10 @@ extern void setup_sparc64_timer(void);
84 91
85static volatile unsigned long callin_flag = 0; 92static volatile unsigned long callin_flag = 0;
86 93
87void __init smp_callin(void) 94void __devinit smp_callin(void)
88{ 95{
89 int cpuid = hard_smp_processor_id(); 96 int cpuid = hard_smp_processor_id();
97 struct trap_per_cpu *tb = &trap_block[cpuid];;
90 98
91 __local_per_cpu_offset = __per_cpu_offset(cpuid); 99 __local_per_cpu_offset = __per_cpu_offset(cpuid);
92 100
@@ -117,6 +125,11 @@ void __init smp_callin(void)
117 atomic_inc(&init_mm.mm_count); 125 atomic_inc(&init_mm.mm_count);
118 current->active_mm = &init_mm; 126 current->active_mm = &init_mm;
119 127
128 if (tb->hdesc) {
129 kfree(tb->hdesc);
130 tb->hdesc = NULL;
131 }
132
120 while (!cpu_isset(cpuid, smp_commenced_mask)) 133 while (!cpu_isset(cpuid, smp_commenced_mask))
121 rmb(); 134 rmb();
122 135
@@ -296,14 +309,20 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
296 /* Alloc the mondo queues, cpu will load them. */ 309 /* Alloc the mondo queues, cpu will load them. */
297 sun4v_init_mondo_queues(0, cpu, 1, 0); 310 sun4v_init_mondo_queues(0, cpu, 1, 0);
298 311
299 prom_startcpu_cpuid(cpu, entry, cookie); 312#ifdef CONFIG_SUN_LDOMS
313 if (ldom_domaining_enabled)
314 ldom_startcpu_cpuid(cpu,
315 (unsigned long) cpu_new_thread);
316 else
317#endif
318 prom_startcpu_cpuid(cpu, entry, cookie);
300 } else { 319 } else {
301 struct device_node *dp = of_find_node_by_cpuid(cpu); 320 struct device_node *dp = of_find_node_by_cpuid(cpu);
302 321
303 prom_startcpu(dp->node, entry, cookie); 322 prom_startcpu(dp->node, entry, cookie);
304 } 323 }
305 324
306 for (timeout = 0; timeout < 5000000; timeout++) { 325 for (timeout = 0; timeout < 50000; timeout++) {
307 if (callin_flag) 326 if (callin_flag)
308 break; 327 break;
309 udelay(100); 328 udelay(100);
@@ -1163,22 +1182,8 @@ int setup_profiling_timer(unsigned int multiplier)
1163 return -EINVAL; 1182 return -EINVAL;
1164} 1183}
1165 1184
1166/* Constrain the number of cpus to max_cpus. */
1167void __init smp_prepare_cpus(unsigned int max_cpus) 1185void __init smp_prepare_cpus(unsigned int max_cpus)
1168{ 1186{
1169 int i;
1170
1171 if (num_possible_cpus() > max_cpus) {
1172 for_each_possible_cpu(i) {
1173 if (i != boot_cpu_id) {
1174 cpu_clear(i, phys_cpu_present_map);
1175 cpu_clear(i, cpu_present_map);
1176 if (num_possible_cpus() <= max_cpus)
1177 break;
1178 }
1179 }
1180 }
1181
1182 cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; 1187 cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
1183} 1188}
1184 1189
@@ -1242,6 +1247,20 @@ int __cpuinit __cpu_up(unsigned int cpu)
1242 return ret; 1247 return ret;
1243} 1248}
1244 1249
1250#ifdef CONFIG_HOTPLUG_CPU
1251int __cpu_disable(void)
1252{
1253 printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n",
1254 smp_processor_id());
1255 return -ENODEV;
1256}
1257
1258void __cpu_die(unsigned int cpu)
1259{
1260 printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu);
1261}
1262#endif
1263
1245void __init smp_cpus_done(unsigned int max_cpus) 1264void __init smp_cpus_done(unsigned int max_cpus)
1246{ 1265{
1247 unsigned long bogosum = 0; 1266 unsigned long bogosum = 0;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 6fa761612899..51e059e36d47 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -124,10 +124,6 @@ EXPORT_SYMBOL(__write_lock);
124EXPORT_SYMBOL(__write_unlock); 124EXPORT_SYMBOL(__write_unlock);
125EXPORT_SYMBOL(__write_trylock); 125EXPORT_SYMBOL(__write_trylock);
126 126
127/* CPU online map and active count. */
128EXPORT_SYMBOL(cpu_online_map);
129EXPORT_SYMBOL(phys_cpu_present_map);
130
131EXPORT_SYMBOL(smp_call_function); 127EXPORT_SYMBOL(smp_call_function);
132#endif /* CONFIG_SMP */ 128#endif /* CONFIG_SMP */
133 129
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index 72d272c9de6b..33c5b7da31e5 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -96,6 +96,10 @@ void prom_cmdline(void)
96 */ 96 */
97void prom_halt(void) 97void prom_halt(void)
98{ 98{
99#ifdef CONFIG_SUN_LDOMS
100 if (ldom_domaining_enabled)
101 ldom_power_off();
102#endif
99again: 103again:
100 p1275_cmd("exit", P1275_INOUT(0, 0)); 104 p1275_cmd("exit", P1275_INOUT(0, 0));
101 goto again; /* PROM is out to get me -DaveM */ 105 goto again; /* PROM is out to get me -DaveM */
@@ -103,6 +107,10 @@ again:
103 107
104void prom_halt_power_off(void) 108void prom_halt_power_off(void)
105{ 109{
110#ifdef CONFIG_SUN_LDOMS
111 if (ldom_domaining_enabled)
112 ldom_power_off();
113#endif
106 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); 114 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
107 115
108 /* if nothing else helps, we just halt */ 116 /* if nothing else helps, we just halt */
diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
index 2b32c489860c..7fcccc0e19cf 100644
--- a/arch/sparc64/prom/p1275.c
+++ b/arch/sparc64/prom/p1275.c
@@ -16,6 +16,7 @@
16#include <asm/system.h> 16#include <asm/system.h>
17#include <asm/spitfire.h> 17#include <asm/spitfire.h>
18#include <asm/pstate.h> 18#include <asm/pstate.h>
19#include <asm/ldc.h>
19 20
20struct { 21struct {
21 long prom_callback; /* 0x00 */ 22 long prom_callback; /* 0x00 */