aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/balloon.c62
-rw-r--r--drivers/xen/events.c87
-rw-r--r--drivers/xen/gntdev.c39
-rw-r--r--drivers/xen/grant-table.c6
-rw-r--r--drivers/xen/pci.c105
-rw-r--r--drivers/xen/swiotlb-xen.c70
-rw-r--r--drivers/xen/xen-selfballoon.c67
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c4
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c101
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c2
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c121
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c17
12 files changed, 533 insertions, 148 deletions
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 5dfd8f8ff07f..5876e1ae6c2d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
501 * alloc_xenballooned_pages - get pages that have been ballooned out 501 * alloc_xenballooned_pages - get pages that have been ballooned out
502 * @nr_pages: Number of pages to get 502 * @nr_pages: Number of pages to get
503 * @pages: pages returned 503 * @pages: pages returned
504 * @highmem: highmem or lowmem pages
504 * @return 0 on success, error otherwise 505 * @return 0 on success, error otherwise
505 */ 506 */
506int alloc_xenballooned_pages(int nr_pages, struct page** pages) 507int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
507{ 508{
508 int pgno = 0; 509 int pgno = 0;
509 struct page* page; 510 struct page* page;
510 mutex_lock(&balloon_mutex); 511 mutex_lock(&balloon_mutex);
511 while (pgno < nr_pages) { 512 while (pgno < nr_pages) {
512 page = balloon_retrieve(true); 513 page = balloon_retrieve(highmem);
513 if (page) { 514 if (page && PageHighMem(page) == highmem) {
514 pages[pgno++] = page; 515 pages[pgno++] = page;
515 } else { 516 } else {
516 enum bp_state st; 517 enum bp_state st;
517 st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); 518 if (page)
519 balloon_append(page);
520 st = decrease_reservation(nr_pages - pgno,
521 highmem ? GFP_HIGHUSER : GFP_USER);
518 if (st != BP_DONE) 522 if (st != BP_DONE)
519 goto out_undo; 523 goto out_undo;
520 } 524 }
@@ -555,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages)
555} 559}
556EXPORT_SYMBOL(free_xenballooned_pages); 560EXPORT_SYMBOL(free_xenballooned_pages);
557 561
558static int __init balloon_init(void) 562static void __init balloon_add_region(unsigned long start_pfn,
563 unsigned long pages)
559{ 564{
560 unsigned long pfn, extra_pfn_end; 565 unsigned long pfn, extra_pfn_end;
561 struct page *page; 566 struct page *page;
562 567
568 /*
569 * If the amount of usable memory has been limited (e.g., with
570 * the 'mem' command line parameter), don't add pages beyond
571 * this limit.
572 */
573 extra_pfn_end = min(max_pfn, start_pfn + pages);
574
575 for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
576 page = pfn_to_page(pfn);
577 /* totalram_pages and totalhigh_pages do not
578 include the boot-time balloon extension, so
579 don't subtract from it. */
580 __balloon_append(page);
581 }
582}
583
584static int __init balloon_init(void)
585{
586 int i;
587
563 if (!xen_domain()) 588 if (!xen_domain())
564 return -ENODEV; 589 return -ENODEV;
565 590
566 pr_info("xen/balloon: Initialising balloon driver.\n"); 591 pr_info("xen/balloon: Initialising balloon driver.\n");
567 592
568 balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; 593 balloon_stats.current_pages = xen_pv_domain()
594 ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
595 : max_pfn;
569 balloon_stats.target_pages = balloon_stats.current_pages; 596 balloon_stats.target_pages = balloon_stats.current_pages;
570 balloon_stats.balloon_low = 0; 597 balloon_stats.balloon_low = 0;
571 balloon_stats.balloon_high = 0; 598 balloon_stats.balloon_high = 0;
@@ -584,24 +611,13 @@ static int __init balloon_init(void)
584#endif 611#endif
585 612
586 /* 613 /*
587 * Initialise the balloon with excess memory space. We need 614 * Initialize the balloon with pages from the extra memory
588 * to make sure we don't add memory which doesn't exist or 615 * regions (see arch/x86/xen/setup.c).
589 * logically exist. The E820 map can be trimmed to be smaller
590 * than the amount of physical memory due to the mem= command
591 * line parameter. And if this is a 32-bit non-HIGHMEM kernel
592 * on a system with memory which requires highmem to access,
593 * don't try to use it.
594 */ 616 */
595 extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), 617 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
596 (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); 618 if (xen_extra_mem[i].size)
597 for (pfn = PFN_UP(xen_extra_mem_start); 619 balloon_add_region(PFN_UP(xen_extra_mem[i].start),
598 pfn < extra_pfn_end; 620 PFN_DOWN(xen_extra_mem[i].size));
599 pfn++) {
600 page = pfn_to_page(pfn);
601 /* totalram_pages and totalhigh_pages do not include the boot-time
602 balloon extension, so don't subtract from it. */
603 __balloon_append(page);
604 }
605 621
606 return 0; 622 return 0;
607} 623}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index da70f5c32eb9..7a55b292bf39 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -54,7 +54,7 @@
54 * This lock protects updates to the following mapping and reference-count 54 * This lock protects updates to the following mapping and reference-count
55 * arrays. The lock does not need to be acquired to read the mapping tables. 55 * arrays. The lock does not need to be acquired to read the mapping tables.
56 */ 56 */
57static DEFINE_SPINLOCK(irq_mapping_update_lock); 57static DEFINE_MUTEX(irq_mapping_update_lock);
58 58
59static LIST_HEAD(xen_irq_list_head); 59static LIST_HEAD(xen_irq_list_head);
60 60
@@ -432,7 +432,8 @@ static int __must_check xen_allocate_irq_dynamic(void)
432 432
433 irq = irq_alloc_desc_from(first, -1); 433 irq = irq_alloc_desc_from(first, -1);
434 434
435 xen_irq_init(irq); 435 if (irq >= 0)
436 xen_irq_init(irq);
436 437
437 return irq; 438 return irq;
438} 439}
@@ -631,7 +632,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
631 int irq = -1; 632 int irq = -1;
632 struct physdev_irq irq_op; 633 struct physdev_irq irq_op;
633 634
634 spin_lock(&irq_mapping_update_lock); 635 mutex_lock(&irq_mapping_update_lock);
635 636
636 irq = find_irq_by_gsi(gsi); 637 irq = find_irq_by_gsi(gsi);
637 if (irq != -1) { 638 if (irq != -1) {
@@ -684,7 +685,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
684 handle_edge_irq, name); 685 handle_edge_irq, name);
685 686
686out: 687out:
687 spin_unlock(&irq_mapping_update_lock); 688 mutex_unlock(&irq_mapping_update_lock);
688 689
689 return irq; 690 return irq;
690} 691}
@@ -710,10 +711,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
710{ 711{
711 int irq, ret; 712 int irq, ret;
712 713
713 spin_lock(&irq_mapping_update_lock); 714 mutex_lock(&irq_mapping_update_lock);
714 715
715 irq = xen_allocate_irq_dynamic(); 716 irq = xen_allocate_irq_dynamic();
716 if (irq == -1) 717 if (irq < 0)
717 goto out; 718 goto out;
718 719
719 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, 720 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -724,12 +725,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
724 if (ret < 0) 725 if (ret < 0)
725 goto error_irq; 726 goto error_irq;
726out: 727out:
727 spin_unlock(&irq_mapping_update_lock); 728 mutex_unlock(&irq_mapping_update_lock);
728 return irq; 729 return irq;
729error_irq: 730error_irq:
730 spin_unlock(&irq_mapping_update_lock); 731 mutex_unlock(&irq_mapping_update_lock);
731 xen_free_irq(irq); 732 xen_free_irq(irq);
732 return -1; 733 return ret;
733} 734}
734#endif 735#endif
735 736
@@ -740,7 +741,7 @@ int xen_destroy_irq(int irq)
740 struct irq_info *info = info_for_irq(irq); 741 struct irq_info *info = info_for_irq(irq);
741 int rc = -ENOENT; 742 int rc = -ENOENT;
742 743
743 spin_lock(&irq_mapping_update_lock); 744 mutex_lock(&irq_mapping_update_lock);
744 745
745 desc = irq_to_desc(irq); 746 desc = irq_to_desc(irq);
746 if (!desc) 747 if (!desc)
@@ -766,7 +767,7 @@ int xen_destroy_irq(int irq)
766 xen_free_irq(irq); 767 xen_free_irq(irq);
767 768
768out: 769out:
769 spin_unlock(&irq_mapping_update_lock); 770 mutex_unlock(&irq_mapping_update_lock);
770 return rc; 771 return rc;
771} 772}
772 773
@@ -776,10 +777,10 @@ int xen_irq_from_pirq(unsigned pirq)
776 777
777 struct irq_info *info; 778 struct irq_info *info;
778 779
779 spin_lock(&irq_mapping_update_lock); 780 mutex_lock(&irq_mapping_update_lock);
780 781
781 list_for_each_entry(info, &xen_irq_list_head, list) { 782 list_for_each_entry(info, &xen_irq_list_head, list) {
782 if (info == NULL || info->type != IRQT_PIRQ) 783 if (info->type != IRQT_PIRQ)
783 continue; 784 continue;
784 irq = info->irq; 785 irq = info->irq;
785 if (info->u.pirq.pirq == pirq) 786 if (info->u.pirq.pirq == pirq)
@@ -787,7 +788,7 @@ int xen_irq_from_pirq(unsigned pirq)
787 } 788 }
788 irq = -1; 789 irq = -1;
789out: 790out:
790 spin_unlock(&irq_mapping_update_lock); 791 mutex_unlock(&irq_mapping_update_lock);
791 792
792 return irq; 793 return irq;
793} 794}
@@ -802,7 +803,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
802{ 803{
803 int irq; 804 int irq;
804 805
805 spin_lock(&irq_mapping_update_lock); 806 mutex_lock(&irq_mapping_update_lock);
806 807
807 irq = evtchn_to_irq[evtchn]; 808 irq = evtchn_to_irq[evtchn];
808 809
@@ -818,7 +819,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
818 } 819 }
819 820
820out: 821out:
821 spin_unlock(&irq_mapping_update_lock); 822 mutex_unlock(&irq_mapping_update_lock);
822 823
823 return irq; 824 return irq;
824} 825}
@@ -829,7 +830,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
829 struct evtchn_bind_ipi bind_ipi; 830 struct evtchn_bind_ipi bind_ipi;
830 int evtchn, irq; 831 int evtchn, irq;
831 832
832 spin_lock(&irq_mapping_update_lock); 833 mutex_lock(&irq_mapping_update_lock);
833 834
834 irq = per_cpu(ipi_to_irq, cpu)[ipi]; 835 irq = per_cpu(ipi_to_irq, cpu)[ipi];
835 836
@@ -853,7 +854,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
853 } 854 }
854 855
855 out: 856 out:
856 spin_unlock(&irq_mapping_update_lock); 857 mutex_unlock(&irq_mapping_update_lock);
857 return irq; 858 return irq;
858} 859}
859 860
@@ -872,13 +873,34 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
872 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); 873 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
873} 874}
874 875
876static int find_virq(unsigned int virq, unsigned int cpu)
877{
878 struct evtchn_status status;
879 int port, rc = -ENOENT;
880
881 memset(&status, 0, sizeof(status));
882 for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
883 status.dom = DOMID_SELF;
884 status.port = port;
885 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
886 if (rc < 0)
887 continue;
888 if (status.status != EVTCHNSTAT_virq)
889 continue;
890 if (status.u.virq == virq && status.vcpu == cpu) {
891 rc = port;
892 break;
893 }
894 }
895 return rc;
896}
875 897
876int bind_virq_to_irq(unsigned int virq, unsigned int cpu) 898int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
877{ 899{
878 struct evtchn_bind_virq bind_virq; 900 struct evtchn_bind_virq bind_virq;
879 int evtchn, irq; 901 int evtchn, irq, ret;
880 902
881 spin_lock(&irq_mapping_update_lock); 903 mutex_lock(&irq_mapping_update_lock);
882 904
883 irq = per_cpu(virq_to_irq, cpu)[virq]; 905 irq = per_cpu(virq_to_irq, cpu)[virq];
884 906
@@ -892,10 +914,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
892 914
893 bind_virq.virq = virq; 915 bind_virq.virq = virq;
894 bind_virq.vcpu = cpu; 916 bind_virq.vcpu = cpu;
895 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 917 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
896 &bind_virq) != 0) 918 &bind_virq);
897 BUG(); 919 if (ret == 0)
898 evtchn = bind_virq.port; 920 evtchn = bind_virq.port;
921 else {
922 if (ret == -EEXIST)
923 ret = find_virq(virq, cpu);
924 BUG_ON(ret < 0);
925 evtchn = ret;
926 }
899 927
900 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 928 xen_irq_info_virq_init(cpu, irq, evtchn, virq);
901 929
@@ -903,7 +931,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
903 } 931 }
904 932
905out: 933out:
906 spin_unlock(&irq_mapping_update_lock); 934 mutex_unlock(&irq_mapping_update_lock);
907 935
908 return irq; 936 return irq;
909} 937}
@@ -913,7 +941,7 @@ static void unbind_from_irq(unsigned int irq)
913 struct evtchn_close close; 941 struct evtchn_close close;
914 int evtchn = evtchn_from_irq(irq); 942 int evtchn = evtchn_from_irq(irq);
915 943
916 spin_lock(&irq_mapping_update_lock); 944 mutex_lock(&irq_mapping_update_lock);
917 945
918 if (VALID_EVTCHN(evtchn)) { 946 if (VALID_EVTCHN(evtchn)) {
919 close.port = evtchn; 947 close.port = evtchn;
@@ -943,7 +971,7 @@ static void unbind_from_irq(unsigned int irq)
943 971
944 xen_free_irq(irq); 972 xen_free_irq(irq);
945 973
946 spin_unlock(&irq_mapping_update_lock); 974 mutex_unlock(&irq_mapping_update_lock);
947} 975}
948 976
949int bind_evtchn_to_irqhandler(unsigned int evtchn, 977int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -1279,7 +1307,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
1279 will also be masked. */ 1307 will also be masked. */
1280 disable_irq(irq); 1308 disable_irq(irq);
1281 1309
1282 spin_lock(&irq_mapping_update_lock); 1310 mutex_lock(&irq_mapping_update_lock);
1283 1311
1284 /* After resume the irq<->evtchn mappings are all cleared out */ 1312 /* After resume the irq<->evtchn mappings are all cleared out */
1285 BUG_ON(evtchn_to_irq[evtchn] != -1); 1313 BUG_ON(evtchn_to_irq[evtchn] != -1);
@@ -1289,7 +1317,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
1289 1317
1290 xen_irq_info_evtchn_init(irq, evtchn); 1318 xen_irq_info_evtchn_init(irq, evtchn);
1291 1319
1292 spin_unlock(&irq_mapping_update_lock); 1320 mutex_unlock(&irq_mapping_update_lock);
1293 1321
1294 /* new event channels are always bound to cpu 0 */ 1322 /* new event channels are always bound to cpu 0 */
1295 irq_set_affinity(irq, cpumask_of(0)); 1323 irq_set_affinity(irq, cpumask_of(0));
@@ -1670,6 +1698,7 @@ void __init xen_init_IRQ(void)
1670 1698
1671 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), 1699 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
1672 GFP_KERNEL); 1700 GFP_KERNEL);
1701 BUG_ON(!evtchn_to_irq);
1673 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1702 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1674 evtchn_to_irq[i] = -1; 1703 evtchn_to_irq[i] = -1;
1675 1704
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f914b26cf0c2..880798aae2f2 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -83,6 +83,7 @@ struct grant_map {
83 struct ioctl_gntdev_grant_ref *grants; 83 struct ioctl_gntdev_grant_ref *grants;
84 struct gnttab_map_grant_ref *map_ops; 84 struct gnttab_map_grant_ref *map_ops;
85 struct gnttab_unmap_grant_ref *unmap_ops; 85 struct gnttab_unmap_grant_ref *unmap_ops;
86 struct gnttab_map_grant_ref *kmap_ops;
86 struct page **pages; 87 struct page **pages;
87}; 88};
88 89
@@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
116 add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); 117 add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
117 add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); 118 add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
118 add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); 119 add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
120 add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL);
119 add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); 121 add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
120 if (NULL == add->grants || 122 if (NULL == add->grants ||
121 NULL == add->map_ops || 123 NULL == add->map_ops ||
122 NULL == add->unmap_ops || 124 NULL == add->unmap_ops ||
125 NULL == add->kmap_ops ||
123 NULL == add->pages) 126 NULL == add->pages)
124 goto err; 127 goto err;
125 128
126 if (alloc_xenballooned_pages(count, add->pages)) 129 if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
127 goto err; 130 goto err;
128 131
129 for (i = 0; i < count; i++) { 132 for (i = 0; i < count; i++) {
130 add->map_ops[i].handle = -1; 133 add->map_ops[i].handle = -1;
131 add->unmap_ops[i].handle = -1; 134 add->unmap_ops[i].handle = -1;
135 add->kmap_ops[i].handle = -1;
132 } 136 }
133 137
134 add->index = 0; 138 add->index = 0;
@@ -142,6 +146,7 @@ err:
142 kfree(add->grants); 146 kfree(add->grants);
143 kfree(add->map_ops); 147 kfree(add->map_ops);
144 kfree(add->unmap_ops); 148 kfree(add->unmap_ops);
149 kfree(add->kmap_ops);
145 kfree(add); 150 kfree(add);
146 return NULL; 151 return NULL;
147} 152}
@@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map)
243 gnttab_set_unmap_op(&map->unmap_ops[i], addr, 248 gnttab_set_unmap_op(&map->unmap_ops[i], addr,
244 map->flags, -1 /* handle */); 249 map->flags, -1 /* handle */);
245 } 250 }
251 } else {
252 /*
253 * Setup the map_ops corresponding to the pte entries pointing
254 * to the kernel linear addresses of the struct pages.
255 * These ptes are completely different from the user ptes dealt
256 * with find_grant_ptes.
257 */
258 for (i = 0; i < map->count; i++) {
259 unsigned level;
260 unsigned long address = (unsigned long)
261 pfn_to_kaddr(page_to_pfn(map->pages[i]));
262 pte_t *ptep;
263 u64 pte_maddr = 0;
264 BUG_ON(PageHighMem(map->pages[i]));
265
266 ptep = lookup_address(address, &level);
267 pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
268 gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
269 map->flags |
270 GNTMAP_host_map |
271 GNTMAP_contains_pte,
272 map->grants[i].ref,
273 map->grants[i].domid);
274 }
246 } 275 }
247 276
248 pr_debug("map %d+%d\n", map->index, map->count); 277 pr_debug("map %d+%d\n", map->index, map->count);
249 err = gnttab_map_refs(map->map_ops, map->pages, map->count); 278 err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
279 map->pages, map->count);
250 if (err) 280 if (err)
251 return err; 281 return err;
252 282
@@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip)
462 492
463 pr_debug("priv %p\n", priv); 493 pr_debug("priv %p\n", priv);
464 494
465 spin_lock(&priv->lock);
466 while (!list_empty(&priv->maps)) { 495 while (!list_empty(&priv->maps)) {
467 map = list_entry(priv->maps.next, struct grant_map, next); 496 map = list_entry(priv->maps.next, struct grant_map, next);
468 list_del(&map->next); 497 list_del(&map->next);
469 gntdev_put_map(map); 498 gntdev_put_map(map);
470 } 499 }
471 spin_unlock(&priv->lock);
472 500
473 if (use_ptemod) 501 if (use_ptemod)
474 mmu_notifier_unregister(&priv->mn, priv->mm); 502 mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
532 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 560 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
533 if (map) { 561 if (map) {
534 list_del(&map->next); 562 list_del(&map->next);
535 gntdev_put_map(map);
536 err = 0; 563 err = 0;
537 } 564 }
538 spin_unlock(&priv->lock); 565 spin_unlock(&priv->lock);
566 if (map)
567 gntdev_put_map(map);
539 return err; 568 return err;
540} 569}
541 570
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 4f44b347b24a..8c71ab801756 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -448,7 +448,8 @@ unsigned int gnttab_max_grant_frames(void)
448EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); 448EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
449 449
450int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, 450int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
451 struct page **pages, unsigned int count) 451 struct gnttab_map_grant_ref *kmap_ops,
452 struct page **pages, unsigned int count)
452{ 453{
453 int i, ret; 454 int i, ret;
454 pte_t *pte; 455 pte_t *pte;
@@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
488 */ 489 */
489 return -EOPNOTSUPP; 490 return -EOPNOTSUPP;
490 } 491 }
491 ret = m2p_add_override(mfn, pages[i], 492 ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
492 map_ops[i].flags & GNTMAP_contains_pte);
493 if (ret) 493 if (ret)
494 return ret; 494 return ret;
495 } 495 }
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index cef4bafc07dc..66057075d6e2 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/acpi.h>
21#include <xen/xen.h> 22#include <xen/xen.h>
22#include <xen/interface/physdev.h> 23#include <xen/interface/physdev.h>
23#include <xen/interface/xen.h> 24#include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
26#include <asm/xen/hypercall.h> 27#include <asm/xen/hypercall.h>
27#include "../pci/pci.h" 28#include "../pci/pci.h"
28 29
30static bool __read_mostly pci_seg_supported = true;
31
29static int xen_add_device(struct device *dev) 32static int xen_add_device(struct device *dev)
30{ 33{
31 int r; 34 int r;
32 struct pci_dev *pci_dev = to_pci_dev(dev); 35 struct pci_dev *pci_dev = to_pci_dev(dev);
36#ifdef CONFIG_PCI_IOV
37 struct pci_dev *physfn = pci_dev->physfn;
38#endif
39
40 if (pci_seg_supported) {
41 struct physdev_pci_device_add add = {
42 .seg = pci_domain_nr(pci_dev->bus),
43 .bus = pci_dev->bus->number,
44 .devfn = pci_dev->devfn
45 };
46#ifdef CONFIG_ACPI
47 acpi_handle handle;
48#endif
49
50#ifdef CONFIG_PCI_IOV
51 if (pci_dev->is_virtfn) {
52 add.flags = XEN_PCI_DEV_VIRTFN;
53 add.physfn.bus = physfn->bus->number;
54 add.physfn.devfn = physfn->devfn;
55 } else
56#endif
57 if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
58 add.flags = XEN_PCI_DEV_EXTFN;
59
60#ifdef CONFIG_ACPI
61 handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
62 if (!handle)
63 handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
64#ifdef CONFIG_PCI_IOV
65 if (!handle && pci_dev->is_virtfn)
66 handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
67#endif
68 if (handle) {
69 acpi_status status;
70
71 do {
72 unsigned long long pxm;
73
74 status = acpi_evaluate_integer(handle, "_PXM",
75 NULL, &pxm);
76 if (ACPI_SUCCESS(status)) {
77 add.optarr[0] = pxm;
78 add.flags |= XEN_PCI_DEV_PXM;
79 break;
80 }
81 status = acpi_get_parent(handle, &handle);
82 } while (ACPI_SUCCESS(status));
83 }
84#endif /* CONFIG_ACPI */
85
86 r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
87 if (r != -ENOSYS)
88 return r;
89 pci_seg_supported = false;
90 }
33 91
92 if (pci_domain_nr(pci_dev->bus))
93 r = -ENOSYS;
34#ifdef CONFIG_PCI_IOV 94#ifdef CONFIG_PCI_IOV
35 if (pci_dev->is_virtfn) { 95 else if (pci_dev->is_virtfn) {
36 struct physdev_manage_pci_ext manage_pci_ext = { 96 struct physdev_manage_pci_ext manage_pci_ext = {
37 .bus = pci_dev->bus->number, 97 .bus = pci_dev->bus->number,
38 .devfn = pci_dev->devfn, 98 .devfn = pci_dev->devfn,
39 .is_virtfn = 1, 99 .is_virtfn = 1,
40 .physfn.bus = pci_dev->physfn->bus->number, 100 .physfn.bus = physfn->bus->number,
41 .physfn.devfn = pci_dev->physfn->devfn, 101 .physfn.devfn = physfn->devfn,
42 }; 102 };
43 103
44 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, 104 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
45 &manage_pci_ext); 105 &manage_pci_ext);
46 } else 106 }
47#endif 107#endif
48 if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { 108 else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
49 struct physdev_manage_pci_ext manage_pci_ext = { 109 struct physdev_manage_pci_ext manage_pci_ext = {
50 .bus = pci_dev->bus->number, 110 .bus = pci_dev->bus->number,
51 .devfn = pci_dev->devfn, 111 .devfn = pci_dev->devfn,
@@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev)
71{ 131{
72 int r; 132 int r;
73 struct pci_dev *pci_dev = to_pci_dev(dev); 133 struct pci_dev *pci_dev = to_pci_dev(dev);
74 struct physdev_manage_pci manage_pci;
75 134
76 manage_pci.bus = pci_dev->bus->number; 135 if (pci_seg_supported) {
77 manage_pci.devfn = pci_dev->devfn; 136 struct physdev_pci_device device = {
137 .seg = pci_domain_nr(pci_dev->bus),
138 .bus = pci_dev->bus->number,
139 .devfn = pci_dev->devfn
140 };
78 141
79 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, 142 r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
80 &manage_pci); 143 &device);
144 } else if (pci_domain_nr(pci_dev->bus))
145 r = -ENOSYS;
146 else {
147 struct physdev_manage_pci manage_pci = {
148 .bus = pci_dev->bus->number,
149 .devfn = pci_dev->devfn
150 };
151
152 r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
153 &manage_pci);
154 }
81 155
82 return r; 156 return r;
83} 157}
@@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb,
96 r = xen_remove_device(dev); 170 r = xen_remove_device(dev);
97 break; 171 break;
98 default: 172 default:
99 break; 173 return NOTIFY_DONE;
100 } 174 }
101 175 if (r)
102 return r; 176 dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n",
177 action == BUS_NOTIFY_ADD_DEVICE ? "add" :
178 (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
179 return NOTIFY_OK;
103} 180}
104 181
105struct notifier_block device_nb = { 182static struct notifier_block device_nb = {
106 .notifier_call = xen_pci_notifier, 183 .notifier_call = xen_pci_notifier,
107}; 184};
108 185
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 6e8c15a23201..c984768d98ca 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -38,6 +38,7 @@
38#include <xen/swiotlb-xen.h> 38#include <xen/swiotlb-xen.h>
39#include <xen/page.h> 39#include <xen/page.h>
40#include <xen/xen-ops.h> 40#include <xen/xen-ops.h>
41#include <xen/hvc-console.h>
41/* 42/*
42 * Used to do a quick range check in swiotlb_tbl_unmap_single and 43 * Used to do a quick range check in swiotlb_tbl_unmap_single and
43 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this 44 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
146void __init xen_swiotlb_init(int verbose) 147void __init xen_swiotlb_init(int verbose)
147{ 148{
148 unsigned long bytes; 149 unsigned long bytes;
149 int rc; 150 int rc = -ENOMEM;
150 unsigned long nr_tbl; 151 unsigned long nr_tbl;
152 char *m = NULL;
153 unsigned int repeat = 3;
151 154
152 nr_tbl = swioltb_nr_tbl(); 155 nr_tbl = swioltb_nr_tbl();
153 if (nr_tbl) 156 if (nr_tbl)
@@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose)
156 xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); 159 xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
157 xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); 160 xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
158 } 161 }
159 162retry:
160 bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; 163 bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
161 164
162 /* 165 /*
163 * Get IO TLB memory from any location. 166 * Get IO TLB memory from any location.
164 */ 167 */
165 xen_io_tlb_start = alloc_bootmem(bytes); 168 xen_io_tlb_start = alloc_bootmem(bytes);
166 if (!xen_io_tlb_start) 169 if (!xen_io_tlb_start) {
167 panic("Cannot allocate SWIOTLB buffer"); 170 m = "Cannot allocate Xen-SWIOTLB buffer!\n";
168 171 goto error;
172 }
169 xen_io_tlb_end = xen_io_tlb_start + bytes; 173 xen_io_tlb_end = xen_io_tlb_start + bytes;
170 /* 174 /*
171 * And replace that memory with pages under 4GB. 175 * And replace that memory with pages under 4GB.
@@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose)
173 rc = xen_swiotlb_fixup(xen_io_tlb_start, 177 rc = xen_swiotlb_fixup(xen_io_tlb_start,
174 bytes, 178 bytes,
175 xen_io_tlb_nslabs); 179 xen_io_tlb_nslabs);
176 if (rc) 180 if (rc) {
181 free_bootmem(__pa(xen_io_tlb_start), bytes);
182 m = "Failed to get contiguous memory for DMA from Xen!\n"\
183 "You either: don't have the permissions, do not have"\
184 " enough free memory under 4GB, or the hypervisor memory"\
185 "is too fragmented!";
177 goto error; 186 goto error;
178 187 }
179 start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); 188 start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
180 swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); 189 swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
181 190
182 return; 191 return;
183error: 192error:
184 panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ 193 if (repeat--) {
185 "We either don't have the permission or you do not have enough"\ 194 xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
186 "free memory under 4GB!\n", rc); 195 (xen_io_tlb_nslabs >> 1));
196 printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
197 (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
198 goto retry;
199 }
200 xen_raw_printk("%s (rc:%d)", m, rc);
201 panic("%s (rc:%d)", m, rc);
187} 202}
188 203
189void * 204void *
@@ -194,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
194 int order = get_order(size); 209 int order = get_order(size);
195 u64 dma_mask = DMA_BIT_MASK(32); 210 u64 dma_mask = DMA_BIT_MASK(32);
196 unsigned long vstart; 211 unsigned long vstart;
212 phys_addr_t phys;
213 dma_addr_t dev_addr;
197 214
198 /* 215 /*
199 * Ignore region specifiers - the kernel's ideas of 216 * Ignore region specifiers - the kernel's ideas of
@@ -209,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
209 vstart = __get_free_pages(flags, order); 226 vstart = __get_free_pages(flags, order);
210 ret = (void *)vstart; 227 ret = (void *)vstart;
211 228
229 if (!ret)
230 return ret;
231
212 if (hwdev && hwdev->coherent_dma_mask) 232 if (hwdev && hwdev->coherent_dma_mask)
213 dma_mask = dma_alloc_coherent_mask(hwdev, flags); 233 dma_mask = hwdev->coherent_dma_mask;
214 234
215 if (ret) { 235 phys = virt_to_phys(ret);
236 dev_addr = xen_phys_to_bus(phys);
237 if (((dev_addr + size - 1 <= dma_mask)) &&
238 !range_straddles_page_boundary(phys, size))
239 *dma_handle = dev_addr;
240 else {
216 if (xen_create_contiguous_region(vstart, order, 241 if (xen_create_contiguous_region(vstart, order,
217 fls64(dma_mask)) != 0) { 242 fls64(dma_mask)) != 0) {
218 free_pages(vstart, order); 243 free_pages(vstart, order);
219 return NULL; 244 return NULL;
220 } 245 }
221 memset(ret, 0, size);
222 *dma_handle = virt_to_machine(ret).maddr; 246 *dma_handle = virt_to_machine(ret).maddr;
223 } 247 }
248 memset(ret, 0, size);
224 return ret; 249 return ret;
225} 250}
226EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); 251EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
@@ -230,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
230 dma_addr_t dev_addr) 255 dma_addr_t dev_addr)
231{ 256{
232 int order = get_order(size); 257 int order = get_order(size);
258 phys_addr_t phys;
259 u64 dma_mask = DMA_BIT_MASK(32);
233 260
234 if (dma_release_from_coherent(hwdev, order, vaddr)) 261 if (dma_release_from_coherent(hwdev, order, vaddr))
235 return; 262 return;
236 263
237 xen_destroy_contiguous_region((unsigned long)vaddr, order); 264 if (hwdev && hwdev->coherent_dma_mask)
265 dma_mask = hwdev->coherent_dma_mask;
266
267 phys = virt_to_phys(vaddr);
268
269 if (((dev_addr + size - 1 > dma_mask)) ||
270 range_straddles_page_boundary(phys, size))
271 xen_destroy_contiguous_region((unsigned long)vaddr, order);
272
238 free_pages((unsigned long)vaddr, order); 273 free_pages((unsigned long)vaddr, order);
239} 274}
240EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); 275EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -278,9 +313,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
278 /* 313 /*
279 * Ensure that the address returned is DMA'ble 314 * Ensure that the address returned is DMA'ble
280 */ 315 */
281 if (!dma_capable(dev, dev_addr, size)) 316 if (!dma_capable(dev, dev_addr, size)) {
282 panic("map_single: bounce buffer is not DMA'ble"); 317 swiotlb_tbl_unmap_single(dev, map, size, dir);
283 318 dev_addr = 0;
319 }
284 return dev_addr; 320 return dev_addr;
285} 321}
286EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); 322EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 6ea852e25162..d93c70857e03 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,6 +68,8 @@
68 */ 68 */
69 69
70#include <linux/kernel.h> 70#include <linux/kernel.h>
71#include <linux/bootmem.h>
72#include <linux/swap.h>
71#include <linux/mm.h> 73#include <linux/mm.h>
72#include <linux/mman.h> 74#include <linux/mman.h>
73#include <linux/module.h> 75#include <linux/module.h>
@@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;
93/* In HZ, controls frequency of worker invocation. */ 95/* In HZ, controls frequency of worker invocation. */
94static unsigned int selfballoon_interval __read_mostly = 5; 96static unsigned int selfballoon_interval __read_mostly = 5;
95 97
98/*
99 * Minimum usable RAM in MB for selfballooning target for balloon.
100 * If non-zero, it is added to totalreserve_pages and self-ballooning
101 * will not balloon below the sum. If zero, a piecewise linear function
102 * is calculated as a minimum and added to totalreserve_pages. Note that
103 * setting this value indiscriminately may cause OOMs and crashes.
104 */
105static unsigned int selfballoon_min_usable_mb;
106
96static void selfballoon_process(struct work_struct *work); 107static void selfballoon_process(struct work_struct *work);
97static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); 108static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
98 109
@@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s)
189__setup("selfballooning", xen_selfballooning_setup); 200__setup("selfballooning", xen_selfballooning_setup);
190#endif /* CONFIG_FRONTSWAP */ 201#endif /* CONFIG_FRONTSWAP */
191 202
203#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
204
192/* 205/*
193 * Use current balloon size, the goal (vm_committed_as), and hysteresis 206 * Use current balloon size, the goal (vm_committed_as), and hysteresis
194 * parameters to set a new target balloon size 207 * parameters to set a new target balloon size
195 */ 208 */
196static void selfballoon_process(struct work_struct *work) 209static void selfballoon_process(struct work_struct *work)
197{ 210{
198 unsigned long cur_pages, goal_pages, tgt_pages; 211 unsigned long cur_pages, goal_pages, tgt_pages, floor_pages;
212 unsigned long useful_pages;
199 bool reset_timer = false; 213 bool reset_timer = false;
200 214
201 if (xen_selfballooning_enabled) { 215 if (xen_selfballooning_enabled) {
202 cur_pages = balloon_stats.current_pages; 216 cur_pages = totalram_pages;
203 tgt_pages = cur_pages; /* default is no change */ 217 tgt_pages = cur_pages; /* default is no change */
204 goal_pages = percpu_counter_read_positive(&vm_committed_as) + 218 goal_pages = percpu_counter_read_positive(&vm_committed_as) +
205 balloon_stats.current_pages - totalram_pages; 219 totalreserve_pages;
206#ifdef CONFIG_FRONTSWAP 220#ifdef CONFIG_FRONTSWAP
207 /* allow space for frontswap pages to be repatriated */ 221 /* allow space for frontswap pages to be repatriated */
208 if (frontswap_selfshrinking && frontswap_enabled) 222 if (frontswap_selfshrinking && frontswap_enabled)
@@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work)
217 ((goal_pages - cur_pages) / 231 ((goal_pages - cur_pages) /
218 selfballoon_uphysteresis); 232 selfballoon_uphysteresis);
219 /* else if cur_pages == goal_pages, no change */ 233 /* else if cur_pages == goal_pages, no change */
220 balloon_set_new_target(tgt_pages); 234 useful_pages = max_pfn - totalreserve_pages;
235 if (selfballoon_min_usable_mb != 0)
236 floor_pages = totalreserve_pages +
237 MB2PAGES(selfballoon_min_usable_mb);
238 /* piecewise linear function ending in ~3% slope */
239 else if (useful_pages < MB2PAGES(16))
240 floor_pages = max_pfn; /* not worth ballooning */
241 else if (useful_pages < MB2PAGES(64))
242 floor_pages = totalreserve_pages + MB2PAGES(16) +
243 ((useful_pages - MB2PAGES(16)) >> 1);
244 else if (useful_pages < MB2PAGES(512))
245 floor_pages = totalreserve_pages + MB2PAGES(40) +
246 ((useful_pages - MB2PAGES(40)) >> 3);
247 else /* useful_pages >= MB2PAGES(512) */
248 floor_pages = totalreserve_pages + MB2PAGES(99) +
249 ((useful_pages - MB2PAGES(99)) >> 5);
250 if (tgt_pages < floor_pages)
251 tgt_pages = floor_pages;
252 balloon_set_new_target(tgt_pages +
253 balloon_stats.current_pages - totalram_pages);
221 reset_timer = true; 254 reset_timer = true;
222 } 255 }
223#ifdef CONFIG_FRONTSWAP 256#ifdef CONFIG_FRONTSWAP
@@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev,
340static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, 373static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
341 show_selfballoon_uphys, store_selfballoon_uphys); 374 show_selfballoon_uphys, store_selfballoon_uphys);
342 375
376SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
377 selfballoon_min_usable_mb);
378
379static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev,
380 struct sysdev_attribute *attr,
381 const char *buf,
382 size_t count)
383{
384 unsigned long val;
385 int err;
386
387 if (!capable(CAP_SYS_ADMIN))
388 return -EPERM;
389 err = strict_strtoul(buf, 10, &val);
390 if (err || val == 0)
391 return -EINVAL;
392 selfballoon_min_usable_mb = val;
393 return count;
394}
395
396static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
397 show_selfballoon_min_usable_mb,
398 store_selfballoon_min_usable_mb);
399
400
343#ifdef CONFIG_FRONTSWAP 401#ifdef CONFIG_FRONTSWAP
344SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); 402SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
345 403
@@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = {
421 &attr_selfballoon_interval.attr, 479 &attr_selfballoon_interval.attr,
422 &attr_selfballoon_downhysteresis.attr, 480 &attr_selfballoon_downhysteresis.attr,
423 &attr_selfballoon_uphysteresis.attr, 481 &attr_selfballoon_uphysteresis.attr,
482 &attr_selfballoon_min_usable_mb.attr,
424#ifdef CONFIG_FRONTSWAP 483#ifdef CONFIG_FRONTSWAP
425 &attr_frontswap_selfshrinking.attr, 484 &attr_frontswap_selfshrinking.attr,
426 &attr_frontswap_hysteresis.attr, 485 &attr_frontswap_hysteresis.attr,
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 090c61ee8fd0..2eff7a6aaa20 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@ int xb_init_comms(void)
212 printk(KERN_WARNING "XENBUS response ring is not quiescent " 212 printk(KERN_WARNING "XENBUS response ring is not quiescent "
213 "(%08x:%08x): fixing up\n", 213 "(%08x:%08x): fixing up\n",
214 intf->rsp_cons, intf->rsp_prod); 214 intf->rsp_cons, intf->rsp_prod);
215 intf->rsp_cons = intf->rsp_prod; 215 /* breaks kdump */
216 if (!reset_devices)
217 intf->rsp_cons = intf->rsp_prod;
216 } 218 }
217 219
218 if (xenbus_irq) { 220 if (xenbus_irq) {
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd2f90c9ac8b..cef9b0bf63d5 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -684,64 +684,74 @@ static int __init xenbus_probe_initcall(void)
684 684
685device_initcall(xenbus_probe_initcall); 685device_initcall(xenbus_probe_initcall);
686 686
687static int __init xenbus_init(void) 687/* Set up event channel for xenstored which is run as a local process
688 * (this is normally used only in dom0)
689 */
690static int __init xenstored_local_init(void)
688{ 691{
689 int err = 0; 692 int err = 0;
690 unsigned long page = 0; 693 unsigned long page = 0;
694 struct evtchn_alloc_unbound alloc_unbound;
691 695
692 DPRINTK(""); 696 /* Allocate Xenstore page */
697 page = get_zeroed_page(GFP_KERNEL);
698 if (!page)
699 goto out_err;
693 700
694 err = -ENODEV; 701 xen_store_mfn = xen_start_info->store_mfn =
695 if (!xen_domain()) 702 pfn_to_mfn(virt_to_phys((void *)page) >>
696 return err; 703 PAGE_SHIFT);
697 704
698 /* 705 /* Next allocate a local port which xenstored can bind to */
699 * Domain0 doesn't have a store_evtchn or store_mfn yet. 706 alloc_unbound.dom = DOMID_SELF;
700 */ 707 alloc_unbound.remote_dom = DOMID_SELF;
701 if (xen_initial_domain()) {
702 struct evtchn_alloc_unbound alloc_unbound;
703 708
704 /* Allocate Xenstore page */ 709 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
705 page = get_zeroed_page(GFP_KERNEL); 710 &alloc_unbound);
706 if (!page) 711 if (err == -ENOSYS)
707 goto out_error; 712 goto out_err;
708 713
709 xen_store_mfn = xen_start_info->store_mfn = 714 BUG_ON(err);
710 pfn_to_mfn(virt_to_phys((void *)page) >> 715 xen_store_evtchn = xen_start_info->store_evtchn =
711 PAGE_SHIFT); 716 alloc_unbound.port;
712 717
713 /* Next allocate a local port which xenstored can bind to */ 718 return 0;
714 alloc_unbound.dom = DOMID_SELF;
715 alloc_unbound.remote_dom = 0;
716 719
717 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 720 out_err:
718 &alloc_unbound); 721 if (page != 0)
719 if (err == -ENOSYS) 722 free_page(page);
720 goto out_error; 723 return err;
724}
721 725
722 BUG_ON(err); 726static int __init xenbus_init(void)
723 xen_store_evtchn = xen_start_info->store_evtchn = 727{
724 alloc_unbound.port; 728 int err = 0;
725 729
726 xen_store_interface = mfn_to_virt(xen_store_mfn); 730 if (!xen_domain())
731 return -ENODEV;
732
733 if (xen_hvm_domain()) {
734 uint64_t v = 0;
735 err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
736 if (err)
737 goto out_error;
738 xen_store_evtchn = (int)v;
739 err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
740 if (err)
741 goto out_error;
742 xen_store_mfn = (unsigned long)v;
743 xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
727 } else { 744 } else {
728 if (xen_hvm_domain()) { 745 xen_store_evtchn = xen_start_info->store_evtchn;
729 uint64_t v = 0; 746 xen_store_mfn = xen_start_info->store_mfn;
730 err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); 747 if (xen_store_evtchn)
731 if (err) 748 xenstored_ready = 1;
732 goto out_error; 749 else {
733 xen_store_evtchn = (int)v; 750 err = xenstored_local_init();
734 err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
735 if (err) 751 if (err)
736 goto out_error; 752 goto out_error;
737 xen_store_mfn = (unsigned long)v;
738 xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
739 } else {
740 xen_store_evtchn = xen_start_info->store_evtchn;
741 xen_store_mfn = xen_start_info->store_mfn;
742 xen_store_interface = mfn_to_virt(xen_store_mfn);
743 xenstored_ready = 1;
744 } 753 }
754 xen_store_interface = mfn_to_virt(xen_store_mfn);
745 } 755 }
746 756
747 /* Initialize the interface to xenstore. */ 757 /* Initialize the interface to xenstore. */
@@ -760,12 +770,7 @@ static int __init xenbus_init(void)
760 proc_mkdir("xen", NULL); 770 proc_mkdir("xen", NULL);
761#endif 771#endif
762 772
763 return 0; 773 out_error:
764
765 out_error:
766 if (page != 0)
767 free_page(page);
768
769 return err; 774 return err;
770} 775}
771 776
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 60adf919d78d..32417b5064fd 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev,
104 104
105 xdev = to_xenbus_device(dev); 105 xdev = to_xenbus_device(dev);
106 bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); 106 bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
107 if (xdev == NULL)
108 return -ENODEV;
109 107
110 if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) 108 if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
111 return -ENOMEM; 109 return -ENOMEM;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index ed2ba474a560..540587e18a94 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -248,10 +248,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,
248} 248}
249EXPORT_SYMBOL_GPL(__xenbus_register_frontend); 249EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
250 250
251static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
252static int backend_state;
253
254static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
255 const char **v, unsigned int l)
256{
257 xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
258 printk(KERN_DEBUG "XENBUS: backend %s %s\n",
259 v[XS_WATCH_PATH], xenbus_strstate(backend_state));
260 wake_up(&backend_state_wq);
261}
262
263static void xenbus_reset_wait_for_backend(char *be, int expected)
264{
265 long timeout;
266 timeout = wait_event_interruptible_timeout(backend_state_wq,
267 backend_state == expected, 5 * HZ);
268 if (timeout <= 0)
269 printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
270}
271
272/*
273 * Reset frontend if it is in Connected or Closed state.
274 * Wait for backend to catch up.
275 * State Connected happens during kdump, Closed after kexec.
276 */
277static void xenbus_reset_frontend(char *fe, char *be, int be_state)
278{
279 struct xenbus_watch be_watch;
280
281 printk(KERN_DEBUG "XENBUS: backend %s %s\n",
282 be, xenbus_strstate(be_state));
283
284 memset(&be_watch, 0, sizeof(be_watch));
285 be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
286 if (!be_watch.node)
287 return;
288
289 be_watch.callback = xenbus_reset_backend_state_changed;
290 backend_state = XenbusStateUnknown;
291
292 printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
293 register_xenbus_watch(&be_watch);
294
295 /* fall through to forward backend to state XenbusStateInitialising */
296 switch (be_state) {
297 case XenbusStateConnected:
298 xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
299 xenbus_reset_wait_for_backend(be, XenbusStateClosing);
300
301 case XenbusStateClosing:
302 xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
303 xenbus_reset_wait_for_backend(be, XenbusStateClosed);
304
305 case XenbusStateClosed:
306 xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
307 xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
308 }
309
310 unregister_xenbus_watch(&be_watch);
311 printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
312 kfree(be_watch.node);
313}
314
315static void xenbus_check_frontend(char *class, char *dev)
316{
317 int be_state, fe_state, err;
318 char *backend, *frontend;
319
320 frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
321 if (!frontend)
322 return;
323
324 err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
325 if (err != 1)
326 goto out;
327
328 switch (fe_state) {
329 case XenbusStateConnected:
330 case XenbusStateClosed:
331 printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
332 frontend, xenbus_strstate(fe_state));
333 backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
334 if (!backend || IS_ERR(backend))
335 goto out;
336 err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
337 if (err == 1)
338 xenbus_reset_frontend(frontend, backend, be_state);
339 kfree(backend);
340 break;
341 default:
342 break;
343 }
344out:
345 kfree(frontend);
346}
347
348static void xenbus_reset_state(void)
349{
350 char **devclass, **dev;
351 int devclass_n, dev_n;
352 int i, j;
353
354 devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
355 if (IS_ERR(devclass))
356 return;
357
358 for (i = 0; i < devclass_n; i++) {
359 dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
360 if (IS_ERR(dev))
361 continue;
362 for (j = 0; j < dev_n; j++)
363 xenbus_check_frontend(devclass[i], dev[j]);
364 kfree(dev);
365 }
366 kfree(devclass);
367}
368
251static int frontend_probe_and_watch(struct notifier_block *notifier, 369static int frontend_probe_and_watch(struct notifier_block *notifier,
252 unsigned long event, 370 unsigned long event,
253 void *data) 371 void *data)
254{ 372{
373 /* reset devices in Connected or Closed state */
374 if (xen_hvm_domain())
375 xenbus_reset_state();
255 /* Enumerate devices in xenstore and watch for changes. */ 376 /* Enumerate devices in xenstore and watch for changes. */
256 xenbus_probe_devices(&xenbus_frontend); 377 xenbus_probe_devices(&xenbus_frontend);
257 register_xenbus_watch(&fe_watch); 378 register_xenbus_watch(&fe_watch);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 5534690075af..b3b8f2f3ad10 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -45,6 +45,7 @@
45#include <linux/module.h> 45#include <linux/module.h>
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <xen/xenbus.h> 47#include <xen/xenbus.h>
48#include <xen/xen.h>
48#include "xenbus_comms.h" 49#include "xenbus_comms.h"
49 50
50struct xs_stored_msg { 51struct xs_stored_msg {
@@ -620,6 +621,15 @@ static struct xenbus_watch *find_watch(const char *token)
620 return NULL; 621 return NULL;
621} 622}
622 623
624static void xs_reset_watches(void)
625{
626 int err;
627
628 err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
629 if (err && err != -EEXIST)
630 printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
631}
632
623/* Register callback to watch this node. */ 633/* Register callback to watch this node. */
624int register_xenbus_watch(struct xenbus_watch *watch) 634int register_xenbus_watch(struct xenbus_watch *watch)
625{ 635{
@@ -638,8 +648,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
638 648
639 err = xs_watch(watch->node, token); 649 err = xs_watch(watch->node, token);
640 650
641 /* Ignore errors due to multiple registration. */ 651 if (err) {
642 if ((err != 0) && (err != -EEXIST)) {
643 spin_lock(&watches_lock); 652 spin_lock(&watches_lock);
644 list_del(&watch->list); 653 list_del(&watch->list);
645 spin_unlock(&watches_lock); 654 spin_unlock(&watches_lock);
@@ -897,5 +906,9 @@ int xs_init(void)
897 if (IS_ERR(task)) 906 if (IS_ERR(task))
898 return PTR_ERR(task); 907 return PTR_ERR(task);
899 908
909 /* shutdown watches for kexec boot */
910 if (xen_hvm_domain())
911 xs_reset_watches();
912
900 return 0; 913 return 0;
901} 914}