aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/kernel/ds.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-07-13 19:03:42 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-07-16 07:04:40 -0400
commit4f0234f4f9da485ecb9729af1b88567700fd4767 (patch)
tree7073115c86dbf4e691ddac12f5c9ce1c58ce53be /arch/sparc64/kernel/ds.c
parentb3e13fbeb9ac1eb8e7b0791bf56e1775c692972b (diff)
[SPARC64]: Initial LDOM cpu hotplug support.
Only adding cpus is supports at the moment, removal will come next. When new cpus are configured, the machine description is updated. When we get the configure request we pass in a cpu mask of to-be-added cpus to the mdesc CPU node parser so it only fetches information for those cpus. That code also proceeds to update the SMT/multi-core scheduling bitmaps. cpu_up() does all the work and we return the status back over the DS channel. CPUs via dr-cpu need to be booted straight out of the hypervisor, and this requires: 1) A new trampoline mechanism. CPUs are booted straight out of the hypervisor with MMU disabled and running in physical addresses with no mappings installed in the TLB. The new hvtramp.S code sets up the critical cpu state, installs the locked TLB mappings for the kernel, and turns the MMU on. It then proceeds to follow the logic of the existing trampoline.S SMP cpu bringup code. 2) All calls into OBP have to be disallowed when domaining is enabled. Since cpus boot straight into the kernel from the hypervisor, OBP has no state about that cpu and therefore cannot handle being invoked on that cpu. Luckily it's only a handful of interfaces which can be called after the OBP device tree is obtained. For example, rebooting, halting, powering-off, and setting options node variables. CPU removal support will require some infrastructure changes here. Namely we'll have to process the requests via a true kernel thread instead of in a workqueue. workqueues run on a per-cpu thread, but when unconfiguring we might need to force the thread to execute on another cpu if the current cpu is the one being removed. Removal of a cpu also causes the kernel to destroy that cpu's workqueue running thread. Another issue on removal is that we may have interrupts still pointing to the cpu-to-be-removed. So new code will be needed to walk the active INO list and retarget those cpus as-needed. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/kernel/ds.c')
-rw-r--r--arch/sparc64/kernel/ds.c514
1 files changed, 430 insertions, 84 deletions
diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c
index 4e20ef232c51..b82c03a25d9c 100644
--- a/arch/sparc64/kernel/ds.c
+++ b/arch/sparc64/kernel/ds.c
@@ -12,11 +12,16 @@
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/workqueue.h>
16#include <linux/cpu.h>
15 17
16#include <asm/ldc.h> 18#include <asm/ldc.h>
17#include <asm/vio.h> 19#include <asm/vio.h>
18#include <asm/power.h> 20#include <asm/power.h>
19#include <asm/mdesc.h> 21#include <asm/mdesc.h>
22#include <asm/head.h>
23#include <asm/io.h>
24#include <asm/hvtramp.h>
20 25
21#define DRV_MODULE_NAME "ds" 26#define DRV_MODULE_NAME "ds"
22#define PFX DRV_MODULE_NAME ": " 27#define PFX DRV_MODULE_NAME ": "
@@ -124,7 +129,7 @@ struct ds_cap_state {
124 __u64 handle; 129 __u64 handle;
125 130
126 void (*data)(struct ldc_channel *lp, 131 void (*data)(struct ldc_channel *lp,
127 struct ds_cap_state *dp, 132 struct ds_cap_state *cp,
128 void *buf, int len); 133 void *buf, int len);
129 134
130 const char *service_id; 135 const char *service_id;
@@ -135,6 +140,91 @@ struct ds_cap_state {
135#define CAP_STATE_REGISTERED 0x02 140#define CAP_STATE_REGISTERED 0x02
136}; 141};
137 142
143static void md_update_data(struct ldc_channel *lp, struct ds_cap_state *cp,
144 void *buf, int len);
145static void domain_shutdown_data(struct ldc_channel *lp,
146 struct ds_cap_state *cp,
147 void *buf, int len);
148static void domain_panic_data(struct ldc_channel *lp,
149 struct ds_cap_state *cp,
150 void *buf, int len);
151static void dr_cpu_data(struct ldc_channel *lp,
152 struct ds_cap_state *cp,
153 void *buf, int len);
154static void ds_pri_data(struct ldc_channel *lp,
155 struct ds_cap_state *cp,
156 void *buf, int len);
157static void ds_var_data(struct ldc_channel *lp,
158 struct ds_cap_state *cp,
159 void *buf, int len);
160
161struct ds_cap_state ds_states[] = {
162 {
163 .service_id = "md-update",
164 .data = md_update_data,
165 },
166 {
167 .service_id = "domain-shutdown",
168 .data = domain_shutdown_data,
169 },
170 {
171 .service_id = "domain-panic",
172 .data = domain_panic_data,
173 },
174 {
175 .service_id = "dr-cpu",
176 .data = dr_cpu_data,
177 },
178 {
179 .service_id = "pri",
180 .data = ds_pri_data,
181 },
182 {
183 .service_id = "var-config",
184 .data = ds_var_data,
185 },
186 {
187 .service_id = "var-config-backup",
188 .data = ds_var_data,
189 },
190};
191
192static DEFINE_SPINLOCK(ds_lock);
193
194struct ds_info {
195 struct ldc_channel *lp;
196 u8 hs_state;
197#define DS_HS_START 0x01
198#define DS_HS_DONE 0x02
199
200 void *rcv_buf;
201 int rcv_buf_len;
202};
203
204static struct ds_info *ds_info;
205
206static struct ds_cap_state *find_cap(u64 handle)
207{
208 unsigned int index = handle >> 32;
209
210 if (index >= ARRAY_SIZE(ds_states))
211 return NULL;
212 return &ds_states[index];
213}
214
215static struct ds_cap_state *find_cap_by_string(const char *name)
216{
217 int i;
218
219 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
220 if (strcmp(ds_states[i].service_id, name))
221 continue;
222
223 return &ds_states[i];
224 }
225 return NULL;
226}
227
138static int ds_send(struct ldc_channel *lp, void *data, int len) 228static int ds_send(struct ldc_channel *lp, void *data, int len)
139{ 229{
140 int err, limit = 1000; 230 int err, limit = 1000;
@@ -265,36 +355,354 @@ static void domain_panic_data(struct ldc_channel *lp,
265 panic("PANIC requested by LDOM manager."); 355 panic("PANIC requested by LDOM manager.");
266} 356}
267 357
268struct ds_cpu_tag { 358struct dr_cpu_tag {
269 __u64 req_num; 359 __u64 req_num;
270 __u32 type; 360 __u32 type;
271#define DS_CPU_CONFIGURE 0x43 361#define DR_CPU_CONFIGURE 0x43
272#define DS_CPU_UNCONFIGURE 0x55 362#define DR_CPU_UNCONFIGURE 0x55
273#define DS_CPU_FORCE_UNCONFIGURE 0x46 363#define DR_CPU_FORCE_UNCONFIGURE 0x46
274#define DS_CPU_STATUS 0x53 364#define DR_CPU_STATUS 0x53
275 365
276/* Responses */ 366/* Responses */
277#define DS_CPU_OK 0x6f 367#define DR_CPU_OK 0x6f
278#define DS_CPU_ERROR 0x65 368#define DR_CPU_ERROR 0x65
279 369
280 __u32 num_records; 370 __u32 num_records;
281}; 371};
282 372
283struct ds_cpu_record { 373struct dr_cpu_resp_entry {
284 __u32 cpu_id; 374 __u32 cpu;
375 __u32 result;
376#define DR_CPU_RES_OK 0x00
377#define DR_CPU_RES_FAILURE 0x01
378#define DR_CPU_RES_BLOCKED 0x02
379#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
380#define DR_CPU_RES_NOT_IN_MD 0x04
381
382 __u32 stat;
383#define DR_CPU_STAT_NOT_PRESENT 0x00
384#define DR_CPU_STAT_UNCONFIGURED 0x01
385#define DR_CPU_STAT_CONFIGURED 0x02
386
387 __u32 str_off;
285}; 388};
286 389
390/* XXX Put this in some common place. XXX */
391static unsigned long kimage_addr_to_ra(void *p)
392{
393 unsigned long val = (unsigned long) p;
394
395 return kern_base + (val - KERNBASE);
396}
397
398void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
399{
400 extern unsigned long sparc64_ttable_tl0;
401 extern unsigned long kern_locked_tte_data;
402 extern int bigkernel;
403 struct hvtramp_descr *hdesc;
404 unsigned long trampoline_ra;
405 struct trap_per_cpu *tb;
406 u64 tte_vaddr, tte_data;
407 unsigned long hv_err;
408
409 hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
410 if (!hdesc) {
411 printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate "
412 "hvtramp_descr.\n");
413 return;
414 }
415
416 hdesc->cpu = cpu;
417 hdesc->num_mappings = (bigkernel ? 2 : 1);
418
419 tb = &trap_block[cpu];
420 tb->hdesc = hdesc;
421
422 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
423 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
424
425 hdesc->thread_reg = thread_reg;
426
427 tte_vaddr = (unsigned long) KERNBASE;
428 tte_data = kern_locked_tte_data;
429
430 hdesc->maps[0].vaddr = tte_vaddr;
431 hdesc->maps[0].tte = tte_data;
432 if (bigkernel) {
433 tte_vaddr += 0x400000;
434 tte_data += 0x400000;
435 hdesc->maps[1].vaddr = tte_vaddr;
436 hdesc->maps[1].tte = tte_data;
437 }
438
439 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
440
441 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
442 kimage_addr_to_ra(&sparc64_ttable_tl0),
443 __pa(hdesc));
444}
445
446/* DR cpu requests get queued onto the work list by the
447 * dr_cpu_data() callback. The list is protected by
448 * ds_lock, and processed by dr_cpu_process() in order.
449 */
450static LIST_HEAD(dr_cpu_work_list);
451
452struct dr_cpu_queue_entry {
453 struct list_head list;
454 char req[0];
455};
456
457static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
458{
459 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
460 struct ds_info *dp = ds_info;
461 struct {
462 struct ds_data data;
463 struct dr_cpu_tag tag;
464 } pkt;
465 int msg_len;
466
467 memset(&pkt, 0, sizeof(pkt));
468 pkt.data.tag.type = DS_DATA;
469 pkt.data.handle = cp->handle;
470 pkt.tag.req_num = tag->req_num;
471 pkt.tag.type = DR_CPU_ERROR;
472 pkt.tag.num_records = 0;
473
474 msg_len = (sizeof(struct ds_data) +
475 sizeof(struct dr_cpu_tag));
476
477 pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
478
479 ds_send(dp->lp, &pkt, msg_len);
480}
481
482static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
483{
484 unsigned long flags;
485
486 spin_lock_irqsave(&ds_lock, flags);
487 __dr_cpu_send_error(cp, data);
488 spin_unlock_irqrestore(&ds_lock, flags);
489}
490
491#define CPU_SENTINEL 0xffffffff
492
493static void purge_dups(u32 *list, u32 num_ents)
494{
495 unsigned int i;
496
497 for (i = 0; i < num_ents; i++) {
498 u32 cpu = list[i];
499 unsigned int j;
500
501 if (cpu == CPU_SENTINEL)
502 continue;
503
504 for (j = i + 1; j < num_ents; j++) {
505 if (list[j] == cpu)
506 list[j] = CPU_SENTINEL;
507 }
508 }
509}
510
511static int dr_cpu_size_response(int ncpus)
512{
513 return (sizeof(struct ds_data) +
514 sizeof(struct dr_cpu_tag) +
515 (sizeof(struct dr_cpu_resp_entry) * ncpus));
516}
517
518static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
519 u64 handle, int resp_len, int ncpus,
520 cpumask_t *mask, u32 default_stat)
521{
522 struct dr_cpu_resp_entry *ent;
523 struct dr_cpu_tag *tag;
524 int i, cpu;
525
526 tag = (struct dr_cpu_tag *) (resp + 1);
527 ent = (struct dr_cpu_resp_entry *) (tag + 1);
528
529 resp->tag.type = DS_DATA;
530 resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
531 resp->handle = handle;
532 tag->req_num = req_num;
533 tag->type = DR_CPU_OK;
534 tag->num_records = ncpus;
535
536 i = 0;
537 for_each_cpu_mask(cpu, *mask) {
538 ent[i].cpu = cpu;
539 ent[i].result = DR_CPU_RES_OK;
540 ent[i].stat = default_stat;
541 i++;
542 }
543 BUG_ON(i != ncpus);
544}
545
546static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
547 u32 res, u32 stat)
548{
549 struct dr_cpu_resp_entry *ent;
550 struct dr_cpu_tag *tag;
551 int i;
552
553 tag = (struct dr_cpu_tag *) (resp + 1);
554 ent = (struct dr_cpu_resp_entry *) (tag + 1);
555
556 for (i = 0; i < ncpus; i++) {
557 if (ent[i].cpu != cpu)
558 continue;
559 ent[i].result = res;
560 ent[i].stat = stat;
561 break;
562 }
563}
564
565static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num,
566 cpumask_t *mask)
567{
568 struct ds_data *resp;
569 int resp_len, ncpus, cpu;
570 unsigned long flags;
571
572 ncpus = cpus_weight(*mask);
573 resp_len = dr_cpu_size_response(ncpus);
574 resp = kzalloc(resp_len, GFP_KERNEL);
575 if (!resp)
576 return -ENOMEM;
577
578 dr_cpu_init_response(resp, req_num, cp->handle,
579 resp_len, ncpus, mask,
580 DR_CPU_STAT_CONFIGURED);
581
582 mdesc_fill_in_cpu_data(*mask);
583
584 for_each_cpu_mask(cpu, *mask) {
585 int err;
586
587 printk(KERN_INFO PFX "Starting cpu %d...\n", cpu);
588 err = cpu_up(cpu);
589 if (err)
590 dr_cpu_mark(resp, cpu, ncpus,
591 DR_CPU_RES_FAILURE,
592 DR_CPU_STAT_UNCONFIGURED);
593 }
594
595 spin_lock_irqsave(&ds_lock, flags);
596 ds_send(ds_info->lp, resp, resp_len);
597 spin_unlock_irqrestore(&ds_lock, flags);
598
599 kfree(resp);
600
601 return 0;
602}
603
604static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
605 cpumask_t *mask)
606{
607 struct ds_data *resp;
608 int resp_len, ncpus;
609
610 ncpus = cpus_weight(*mask);
611 resp_len = dr_cpu_size_response(ncpus);
612 resp = kzalloc(resp_len, GFP_KERNEL);
613 if (!resp)
614 return -ENOMEM;
615
616 dr_cpu_init_response(resp, req_num, cp->handle,
617 resp_len, ncpus, mask,
618 DR_CPU_STAT_UNCONFIGURED);
619
620 kfree(resp);
621
622 return -EOPNOTSUPP;
623}
624
625static void dr_cpu_process(struct work_struct *work)
626{
627 struct dr_cpu_queue_entry *qp, *tmp;
628 struct ds_cap_state *cp;
629 unsigned long flags;
630 LIST_HEAD(todo);
631 cpumask_t mask;
632
633 cp = find_cap_by_string("dr-cpu");
634
635 spin_lock_irqsave(&ds_lock, flags);
636 list_splice(&dr_cpu_work_list, &todo);
637 spin_unlock_irqrestore(&ds_lock, flags);
638
639 list_for_each_entry_safe(qp, tmp, &todo, list) {
640 struct ds_data *data = (struct ds_data *) qp->req;
641 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
642 u32 *cpu_list = (u32 *) (tag + 1);
643 u64 req_num = tag->req_num;
644 unsigned int i;
645 int err;
646
647 switch (tag->type) {
648 case DR_CPU_CONFIGURE:
649 case DR_CPU_UNCONFIGURE:
650 case DR_CPU_FORCE_UNCONFIGURE:
651 break;
652
653 default:
654 dr_cpu_send_error(cp, data);
655 goto next;
656 }
657
658 purge_dups(cpu_list, tag->num_records);
659
660 cpus_clear(mask);
661 for (i = 0; i < tag->num_records; i++) {
662 if (cpu_list[i] == CPU_SENTINEL)
663 continue;
664
665 if (cpu_list[i] < NR_CPUS)
666 cpu_set(cpu_list[i], mask);
667 }
668
669 if (tag->type == DR_CPU_CONFIGURE)
670 err = dr_cpu_configure(cp, req_num, &mask);
671 else
672 err = dr_cpu_unconfigure(cp, req_num, &mask);
673
674 if (err)
675 dr_cpu_send_error(cp, data);
676
677next:
678 list_del(&qp->list);
679 kfree(qp);
680 }
681}
682
683static DECLARE_WORK(dr_cpu_work, dr_cpu_process);
684
287static void dr_cpu_data(struct ldc_channel *lp, 685static void dr_cpu_data(struct ldc_channel *lp,
288 struct ds_cap_state *dp, 686 struct ds_cap_state *dp,
289 void *buf, int len) 687 void *buf, int len)
290{ 688{
689 struct dr_cpu_queue_entry *qp;
291 struct ds_data *dpkt = buf; 690 struct ds_data *dpkt = buf;
292 struct ds_cpu_tag *rp; 691 struct dr_cpu_tag *rp;
293 692
294 rp = (struct ds_cpu_tag *) (dpkt + 1); 693 rp = (struct dr_cpu_tag *) (dpkt + 1);
295 694
296 printk(KERN_ERR PFX "CPU REQ [%lx:%x], len=%d\n", 695 qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC);
297 rp->req_num, rp->type, len); 696 if (!qp) {
697 struct ds_cap_state *cp;
698
699 cp = find_cap_by_string("dr-cpu");
700 __dr_cpu_send_error(cp, dpkt);
701 } else {
702 memcpy(&qp->req, buf, len);
703 list_add_tail(&qp->list, &dr_cpu_work_list);
704 schedule_work(&dr_cpu_work);
705 }
298} 706}
299 707
300struct ds_pri_msg { 708struct ds_pri_msg {
@@ -368,73 +776,6 @@ static void ds_var_data(struct ldc_channel *lp,
368 ds_var_doorbell = 1; 776 ds_var_doorbell = 1;
369} 777}
370 778
371struct ds_cap_state ds_states[] = {
372 {
373 .service_id = "md-update",
374 .data = md_update_data,
375 },
376 {
377 .service_id = "domain-shutdown",
378 .data = domain_shutdown_data,
379 },
380 {
381 .service_id = "domain-panic",
382 .data = domain_panic_data,
383 },
384 {
385 .service_id = "dr-cpu",
386 .data = dr_cpu_data,
387 },
388 {
389 .service_id = "pri",
390 .data = ds_pri_data,
391 },
392 {
393 .service_id = "var-config",
394 .data = ds_var_data,
395 },
396 {
397 .service_id = "var-config-backup",
398 .data = ds_var_data,
399 },
400};
401
402static DEFINE_SPINLOCK(ds_lock);
403
404struct ds_info {
405 struct ldc_channel *lp;
406 u8 hs_state;
407#define DS_HS_START 0x01
408#define DS_HS_DONE 0x02
409
410 void *rcv_buf;
411 int rcv_buf_len;
412};
413
414static struct ds_info *ds_info;
415
416static struct ds_cap_state *find_cap(u64 handle)
417{
418 unsigned int index = handle >> 32;
419
420 if (index >= ARRAY_SIZE(ds_states))
421 return NULL;
422 return &ds_states[index];
423}
424
425static struct ds_cap_state *find_cap_by_string(const char *name)
426{
427 int i;
428
429 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
430 if (strcmp(ds_states[i].service_id, name))
431 continue;
432
433 return &ds_states[i];
434 }
435 return NULL;
436}
437
438void ldom_set_var(const char *var, const char *value) 779void ldom_set_var(const char *var, const char *value)
439{ 780{
440 struct ds_info *dp = ds_info; 781 struct ds_info *dp = ds_info;
@@ -467,8 +808,8 @@ void ldom_set_var(const char *var, const char *value)
467 p += strlen(value) + 1; 808 p += strlen(value) + 1;
468 809
469 msg_len = (sizeof(struct ds_data) + 810 msg_len = (sizeof(struct ds_data) +
470 sizeof(struct ds_var_set_msg) + 811 sizeof(struct ds_var_set_msg) +
471 (p - base)); 812 (p - base));
472 msg_len = (msg_len + 3) & ~3; 813 msg_len = (msg_len + 3) & ~3;
473 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); 814 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
474 815
@@ -520,6 +861,11 @@ void ldom_reboot(const char *boot_command)
520 sun4v_mach_sir(); 861 sun4v_mach_sir();
521} 862}
522 863
864void ldom_power_off(void)
865{
866 sun4v_mach_exit(0);
867}
868
523static void ds_conn_reset(struct ds_info *dp) 869static void ds_conn_reset(struct ds_info *dp)
524{ 870{
525 printk(KERN_ERR PFX "ds_conn_reset() from %p\n", 871 printk(KERN_ERR PFX "ds_conn_reset() from %p\n",
@@ -601,7 +947,7 @@ static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
601 np->handle); 947 np->handle);
602 return 0; 948 return 0;
603 } 949 }
604 printk(KERN_ERR PFX "Could not register %s service\n", 950 printk(KERN_INFO PFX "Could not register %s service\n",
605 cp->service_id); 951 cp->service_id);
606 cp->state = CAP_STATE_UNKNOWN; 952 cp->state = CAP_STATE_UNKNOWN;
607 } 953 }