aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/x86/include/asm/pgtable_types.h11
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/enlighten.c19
-rw-r--r--arch/x86/xen/mmu.c48
-rw-r--r--arch/x86/xen/p2m.c23
-rw-r--r--arch/x86/xen/p2m.h15
-rw-r--r--arch/x86/xen/setup.c370
-rw-r--r--arch/x86/xen/smp.c29
-rw-r--r--arch/x86/xen/smp.h8
-rw-r--r--arch/x86/xen/xen-head.S36
-rw-r--r--drivers/block/xen-blkback/xenbus.c11
-rw-r--r--drivers/block/xen-blkfront.c5
-rw-r--r--drivers/char/tpm/xen-tpmfront.c13
-rw-r--r--drivers/input/misc/xen-kbdfront.c5
-rw-r--r--drivers/net/xen-netback/xenbus.c10
-rw-r--r--drivers/net/xen-netfront.c16
-rw-r--r--drivers/pci/xen-pcifront.c6
-rw-r--r--drivers/scsi/Kconfig10
-rw-r--r--drivers/scsi/Makefile1
-rw-r--r--drivers/scsi/xen-scsifront.c1026
-rw-r--r--drivers/tty/hvc/hvc_xen.c9
-rw-r--r--drivers/video/fbdev/xen-fbfront.c5
-rw-r--r--drivers/xen/Kconfig9
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/efi.c2
-rw-r--r--drivers/xen/events/events_base.c5
-rw-r--r--drivers/xen/grant-table.c2
-rw-r--r--drivers/xen/xen-pciback/xenbus.c6
-rw-r--r--drivers/xen/xen-scsiback.c2126
-rw-r--r--drivers/xen/xenbus/xenbus_client.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c6
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h4
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c8
-rw-r--r--include/xen/events.h2
-rw-r--r--include/xen/interface/elfnote.h48
-rw-r--r--include/xen/interface/io/vscsiif.h229
-rw-r--r--include/xen/interface/xen.h272
-rw-r--r--include/xen/xenbus.h21
46 files changed, 4214 insertions, 263 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index b28dc111d4a7..f8d882e13200 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10268,6 +10268,15 @@ S: Supported
10268F: drivers/block/xen-blkback/* 10268F: drivers/block/xen-blkback/*
10269F: drivers/block/xen* 10269F: drivers/block/xen*
10270 10270
10271XEN PVSCSI DRIVERS
10272M: Juergen Gross <jgross@suse.com>
10273L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
10274L: linux-scsi@vger.kernel.org
10275S: Supported
10276F: drivers/scsi/xen-scsifront.c
10277F: drivers/xen/xen-scsiback.c
10278F: include/xen/interface/io/vscsiif.h
10279
10271XEN SWIOTLB SUBSYSTEM 10280XEN SWIOTLB SUBSYSTEM
10272M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 10281M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
10273L: xen-devel@lists.xenproject.org (moderated for non-subscribers) 10282L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 18f392f8b744..89c4b5ccc68d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1779,7 +1779,7 @@ config XEN_DOM0
1779 depends on XEN 1779 depends on XEN
1780 1780
1781config XEN 1781config XEN
1782 bool "Xen guest support on ARM (EXPERIMENTAL)" 1782 bool "Xen guest support on ARM"
1783 depends on ARM && AEABI && OF 1783 depends on ARM && AEABI && OF
1784 depends on CPU_V7 && !CPU_V6 1784 depends on CPU_V7 && !CPU_V6
1785 depends on !GENERIC_ATOMIC64 1785 depends on !GENERIC_ATOMIC64
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c49ca4c738bb..ac9afde76dea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -349,7 +349,7 @@ config XEN_DOM0
349 depends on XEN 349 depends on XEN
350 350
351config XEN 351config XEN
352 bool "Xen guest support on ARM64 (EXPERIMENTAL)" 352 bool "Xen guest support on ARM64"
353 depends on ARM64 && OF 353 depends on ARM64 && OF
354 select SWIOTLB_XEN 354 select SWIOTLB_XEN
355 help 355 help
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0f9724c9c510..07789647bf33 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,7 +23,6 @@
23#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 23#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
25#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ 25#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
26#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
27#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ 26#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
28#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ 27#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
29#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 28#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
@@ -52,7 +51,7 @@
52#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) 51#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
53#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) 52#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
54#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) 53#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
55#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) 54#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
56#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) 55#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
57#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) 56#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
58#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) 57#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
@@ -168,10 +167,10 @@
168#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) 167#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
169#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 168#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
170 169
171#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) 170#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
172#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) 171#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
173#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) 172#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
174#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) 173#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
175 174
176#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) 175#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
177#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) 176#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..83bb03bfa259 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
933 * cross-processor TLB flush, even if no stale TLB entries exist 933 * cross-processor TLB flush, even if no stale TLB entries exist
934 * on other processors. 934 * on other processors.
935 * 935 *
936 * Spurious faults may only occur if the TLB contains an entry with
937 * fewer permission than the page table entry. Non-present (P = 0)
938 * and reserved bit (R = 1) faults are never spurious.
939 *
936 * There are no security implications to leaving a stale TLB when 940 * There are no security implications to leaving a stale TLB when
937 * increasing the permissions on a page. 941 * increasing the permissions on a page.
942 *
943 * Returns non-zero if a spurious fault was handled, zero otherwise.
944 *
945 * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
946 * (Optional Invalidation).
938 */ 947 */
939static noinline int 948static noinline int
940spurious_fault(unsigned long error_code, unsigned long address) 949spurious_fault(unsigned long error_code, unsigned long address)
@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
945 pte_t *pte; 954 pte_t *pte;
946 int ret; 955 int ret;
947 956
948 /* Reserved-bit violation or user access to kernel space? */ 957 /*
949 if (error_code & (PF_USER | PF_RSVD)) 958 * Only writes to RO or instruction fetches from NX may cause
959 * spurious faults.
960 *
961 * These could be from user or supervisor accesses but the TLB
962 * is only lazily flushed after a kernel mapping protection
963 * change, so user accesses are not expected to cause spurious
964 * faults.
965 */
966 if (error_code != (PF_WRITE | PF_PROT)
967 && error_code != (PF_INSTR | PF_PROT))
950 return 0; 968 return 0;
951 969
952 pgd = init_mm.pgd + pgd_index(address); 970 pgd = init_mm.pgd + pgd_index(address);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7d05565ba781..c8140e12816a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -537,7 +537,7 @@ static void __init pagetable_init(void)
537 permanent_kmaps_init(pgd_base); 537 permanent_kmaps_init(pgd_base);
538} 538}
539 539
540pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); 540pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
541EXPORT_SYMBOL_GPL(__supported_pte_mask); 541EXPORT_SYMBOL_GPL(__supported_pte_mask);
542 542
543/* user-defined highmem size */ 543/* user-defined highmem size */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5621c47d7a1a..5d984769cbd8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
151 * around without checking the pgd every time. 151 * around without checking the pgd every time.
152 */ 152 */
153 153
154pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 154pteval_t __supported_pte_mask __read_mostly = ~0;
155EXPORT_SYMBOL_GPL(__supported_pte_mask); 155EXPORT_SYMBOL_GPL(__supported_pte_mask);
156 156
157int force_personality32; 157int force_personality32;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 2ae525e0d8ba..37c1435889ce 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
442 */ 442 */
443 prot |= _PAGE_CACHE_UC_MINUS; 443 prot |= _PAGE_CACHE_UC_MINUS;
444 444
445 prot |= _PAGE_IOMAP; /* creating a mapping for IO */
446
447 vma->vm_page_prot = __pgprot(prot); 445 vma->vm_page_prot = __pgprot(prot);
448 446
449 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 447 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a02e09e18f57..be14cc3e48d5 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -15,12 +15,14 @@
15 * with this program. If not, see <http://www.gnu.org/licenses/>. 15 * with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include <linux/bitops.h>
18#include <linux/efi.h> 19#include <linux/efi.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/string.h> 21#include <linux/string.h>
21 22
22#include <xen/xen-ops.h> 23#include <xen/xen-ops.h>
23 24
25#include <asm/page.h>
24#include <asm/setup.h> 26#include <asm/setup.h>
25 27
26void __init xen_efi_init(void) 28void __init xen_efi_init(void)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0cb11fb5008..acb0effd8077 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
1463 pv_cpu_ops.load_gdt = xen_load_gdt; 1463 pv_cpu_ops.load_gdt = xen_load_gdt;
1464} 1464}
1465 1465
1466#ifdef CONFIG_XEN_PVH
1466/* 1467/*
1467 * A PV guest starts with default flags that are not set for PVH, set them 1468 * A PV guest starts with default flags that are not set for PVH, set them
1468 * here asap. 1469 * here asap.
@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
1508 return; 1509 return;
1509 1510
1510 xen_have_vector_callback = 1; 1511 xen_have_vector_callback = 1;
1512
1513 xen_pvh_early_cpu_init(0, false);
1511 xen_pvh_set_cr_flags(0); 1514 xen_pvh_set_cr_flags(0);
1512 1515
1513#ifdef CONFIG_X86_32 1516#ifdef CONFIG_X86_32
1514 BUG(); /* PVH: Implement proper support. */ 1517 BUG(); /* PVH: Implement proper support. */
1515#endif 1518#endif
1516} 1519}
1520#endif /* CONFIG_XEN_PVH */
1517 1521
1518/* First C function to be called on Xen boot */ 1522/* First C function to be called on Xen boot */
1519asmlinkage __visible void __init xen_start_kernel(void) 1523asmlinkage __visible void __init xen_start_kernel(void)
1520{ 1524{
1521 struct physdev_set_iopl set_iopl; 1525 struct physdev_set_iopl set_iopl;
1526 unsigned long initrd_start = 0;
1522 int rc; 1527 int rc;
1523 1528
1524 if (!xen_start_info) 1529 if (!xen_start_info)
@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
1527 xen_domain_type = XEN_PV_DOMAIN; 1532 xen_domain_type = XEN_PV_DOMAIN;
1528 1533
1529 xen_setup_features(); 1534 xen_setup_features();
1535#ifdef CONFIG_XEN_PVH
1530 xen_pvh_early_guest_init(); 1536 xen_pvh_early_guest_init();
1537#endif
1531 xen_setup_machphys_mapping(); 1538 xen_setup_machphys_mapping();
1532 1539
1533 /* Install Xen paravirt ops */ 1540 /* Install Xen paravirt ops */
@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1559#endif 1566#endif
1560 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 1567 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1561 1568
1562 __supported_pte_mask |= _PAGE_IOMAP;
1563
1564 /* 1569 /*
1565 * Prevent page tables from being allocated in highmem, even 1570 * Prevent page tables from being allocated in highmem, even
1566 * if CONFIG_HIGHPTE is enabled. 1571 * if CONFIG_HIGHPTE is enabled.
@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
1667 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1672 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1668#endif 1673#endif
1669 1674
1675 if (xen_start_info->mod_start) {
1676 if (xen_start_info->flags & SIF_MOD_START_PFN)
1677 initrd_start = PFN_PHYS(xen_start_info->mod_start);
1678 else
1679 initrd_start = __pa(xen_start_info->mod_start);
1680 }
1681
1670 /* Poke various useful things into boot_params */ 1682 /* Poke various useful things into boot_params */
1671 boot_params.hdr.type_of_loader = (9 << 4) | 0; 1683 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1672 boot_params.hdr.ramdisk_image = xen_start_info->mod_start 1684 boot_params.hdr.ramdisk_image = initrd_start;
1673 ? __pa(xen_start_info->mod_start) : 0;
1674 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1685 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1675 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); 1686 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1676 1687
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 16fb0099b7f2..f62af7647ec9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
399 if (unlikely(mfn == INVALID_P2M_ENTRY)) { 399 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
400 mfn = 0; 400 mfn = 0;
401 flags = 0; 401 flags = 0;
402 } else { 402 } else
403 /* 403 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
404 * Paramount to do this test _after_ the
405 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
406 * IDENTITY_FRAME_BIT resolves to true.
407 */
408 mfn &= ~FOREIGN_FRAME_BIT;
409 if (mfn & IDENTITY_FRAME_BIT) {
410 mfn &= ~IDENTITY_FRAME_BIT;
411 flags |= _PAGE_IOMAP;
412 }
413 }
414 val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 404 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
415 } 405 }
416 406
417 return val; 407 return val;
418} 408}
419 409
420static pteval_t iomap_pte(pteval_t val)
421{
422 if (val & _PAGE_PRESENT) {
423 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
424 pteval_t flags = val & PTE_FLAGS_MASK;
425
426 /* We assume the pte frame number is a MFN, so
427 just use it as-is. */
428 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
429 }
430
431 return val;
432}
433
434__visible pteval_t xen_pte_val(pte_t pte) 410__visible pteval_t xen_pte_val(pte_t pte)
435{ 411{
436 pteval_t pteval = pte.pte; 412 pteval_t pteval = pte.pte;
@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
441 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; 417 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
442 } 418 }
443#endif 419#endif
444 if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
445 return pteval;
446
447 return pte_mfn_to_pfn(pteval); 420 return pte_mfn_to_pfn(pteval);
448} 421}
449PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 422PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
481 454
482__visible pte_t xen_make_pte(pteval_t pte) 455__visible pte_t xen_make_pte(pteval_t pte)
483{ 456{
484 phys_addr_t addr = (pte & PTE_PFN_MASK);
485#if 0 457#if 0
486 /* If Linux is trying to set a WC pte, then map to the Xen WC. 458 /* If Linux is trying to set a WC pte, then map to the Xen WC.
487 * If _PAGE_PAT is set, then it probably means it is really 459 * If _PAGE_PAT is set, then it probably means it is really
@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
496 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; 468 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
497 } 469 }
498#endif 470#endif
499 /* 471 pte = pte_pfn_to_mfn(pte);
500 * Unprivileged domains are allowed to do IOMAPpings for
501 * PCI passthrough, but not map ISA space. The ISA
502 * mappings are just dummy local mappings to keep other
503 * parts of the kernel happy.
504 */
505 if (unlikely(pte & _PAGE_IOMAP) &&
506 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
507 pte = iomap_pte(pte);
508 } else {
509 pte &= ~_PAGE_IOMAP;
510 pte = pte_pfn_to_mfn(pte);
511 }
512 472
513 return native_make_pte(pte); 473 return native_make_pte(pte);
514} 474}
@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2091 2051
2092 default: 2052 default:
2093 /* By default, set_fixmap is used for hardware mappings */ 2053 /* By default, set_fixmap is used for hardware mappings */
2094 pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); 2054 pte = mfn_pte(phys, prot);
2095 break; 2055 break;
2096 } 2056 }
2097 2057
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 3172692381ae..9f5983b01ed9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -173,6 +173,7 @@
173#include <xen/balloon.h> 173#include <xen/balloon.h>
174#include <xen/grant_table.h> 174#include <xen/grant_table.h>
175 175
176#include "p2m.h"
176#include "multicalls.h" 177#include "multicalls.h"
177#include "xen-ops.h" 178#include "xen-ops.h"
178 179
@@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
180 181
181unsigned long xen_max_p2m_pfn __read_mostly; 182unsigned long xen_max_p2m_pfn __read_mostly;
182 183
183#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
184#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
185#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
186
187#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
188
189/* Placeholders for holes in the address space */ 184/* Placeholders for holes in the address space */
190static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 185static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
191static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 186static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
202RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 197RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
203RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 198RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
204 199
205/* We might hit two boundary violations at the start and end, at max each 200/* For each I/O range remapped we may lose up to two leaf pages for the boundary
206 * boundary violation will require three middle nodes. */ 201 * violations and three mid pages to cover up to 3GB. With
207RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); 202 * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
208 203 * remapped region.
209/* When we populate back during bootup, the amount of pages can vary. The 204 */
210 * max we have is seen is 395979, but that does not mean it can't be more. 205RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
211 * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
212 * it can re-use Xen provided mfn_list array, so we only need to allocate at
213 * most three P2M top nodes. */
214RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
215 206
216static inline unsigned p2m_top_index(unsigned long pfn) 207static inline unsigned p2m_top_index(unsigned long pfn)
217{ 208{
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h
new file mode 100644
index 000000000000..ad8aee24ab72
--- /dev/null
+++ b/arch/x86/xen/p2m.h
@@ -0,0 +1,15 @@
1#ifndef _XEN_P2M_H
2#define _XEN_P2M_H
3
4#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
5#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
6#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
7
8#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
9
10#define MAX_REMAP_RANGES 10
11
12extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
13 unsigned long pfn_e);
14
15#endif /* _XEN_P2M_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2e555163c2fe..af7216128d93 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -29,6 +29,7 @@
29#include <xen/features.h> 29#include <xen/features.h>
30#include "xen-ops.h" 30#include "xen-ops.h"
31#include "vdso.h" 31#include "vdso.h"
32#include "p2m.h"
32 33
33/* These are code, but not functions. Defined in entry.S */ 34/* These are code, but not functions. Defined in entry.S */
34extern const char xen_hypervisor_callback[]; 35extern const char xen_hypervisor_callback[];
@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
46/* Number of pages released from the initial allocation. */ 47/* Number of pages released from the initial allocation. */
47unsigned long xen_released_pages; 48unsigned long xen_released_pages;
48 49
50/* Buffer used to remap identity mapped pages */
51unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
52
49/* 53/*
50 * The maximum amount of extra memory compared to the base size. The 54 * The maximum amount of extra memory compared to the base size. The
51 * main scaling factor is the size of struct page. At extreme ratios 55 * main scaling factor is the size of struct page. At extreme ratios
@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
151 return len; 155 return len;
152} 156}
153 157
154static unsigned long __init xen_release_chunk(unsigned long start, 158/*
155 unsigned long end) 159 * Finds the next RAM pfn available in the E820 map after min_pfn.
156{ 160 * This function updates min_pfn with the pfn found and returns
157 return xen_do_chunk(start, end, true); 161 * the size of that range or zero if not found.
158} 162 */
159 163static unsigned long __init xen_find_pfn_range(
160static unsigned long __init xen_populate_chunk(
161 const struct e820entry *list, size_t map_size, 164 const struct e820entry *list, size_t map_size,
162 unsigned long max_pfn, unsigned long *last_pfn, 165 unsigned long *min_pfn)
163 unsigned long credits_left)
164{ 166{
165 const struct e820entry *entry; 167 const struct e820entry *entry;
166 unsigned int i; 168 unsigned int i;
167 unsigned long done = 0; 169 unsigned long done = 0;
168 unsigned long dest_pfn;
169 170
170 for (i = 0, entry = list; i < map_size; i++, entry++) { 171 for (i = 0, entry = list; i < map_size; i++, entry++) {
171 unsigned long s_pfn; 172 unsigned long s_pfn;
172 unsigned long e_pfn; 173 unsigned long e_pfn;
173 unsigned long pfns;
174 long capacity;
175
176 if (credits_left <= 0)
177 break;
178 174
179 if (entry->type != E820_RAM) 175 if (entry->type != E820_RAM)
180 continue; 176 continue;
181 177
182 e_pfn = PFN_DOWN(entry->addr + entry->size); 178 e_pfn = PFN_DOWN(entry->addr + entry->size);
183 179
184 /* We only care about E820 after the xen_start_info->nr_pages */ 180 /* We only care about E820 after this */
185 if (e_pfn <= max_pfn) 181 if (e_pfn < *min_pfn)
186 continue; 182 continue;
187 183
188 s_pfn = PFN_UP(entry->addr); 184 s_pfn = PFN_UP(entry->addr);
189 /* If the E820 falls within the nr_pages, we want to start 185
190 * at the nr_pages PFN. 186 /* If min_pfn falls within the E820 entry, we want to start
191 * If that would mean going past the E820 entry, skip it 187 * at the min_pfn PFN.
192 */ 188 */
193 if (s_pfn <= max_pfn) { 189 if (s_pfn <= *min_pfn) {
194 capacity = e_pfn - max_pfn; 190 done = e_pfn - *min_pfn;
195 dest_pfn = max_pfn;
196 } else { 191 } else {
197 capacity = e_pfn - s_pfn; 192 done = e_pfn - s_pfn;
198 dest_pfn = s_pfn; 193 *min_pfn = s_pfn;
199 } 194 }
195 break;
196 }
200 197
201 if (credits_left < capacity) 198 return done;
202 capacity = credits_left; 199}
203 200
204 pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); 201/*
205 done += pfns; 202 * This releases a chunk of memory and then does the identity map. It's used as
206 *last_pfn = (dest_pfn + pfns); 203 * as a fallback if the remapping fails.
207 if (pfns < capacity) 204 */
208 break; 205static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
209 credits_left -= pfns; 206 unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
207 unsigned long *released)
208{
209 WARN_ON(start_pfn > end_pfn);
210
211 /* Need to release pages first */
212 *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
213 *identity += set_phys_range_identity(start_pfn, end_pfn);
214}
215
216/*
217 * Helper function to update both the p2m and m2p tables.
218 */
219static unsigned long __init xen_update_mem_tables(unsigned long pfn,
220 unsigned long mfn)
221{
222 struct mmu_update update = {
223 .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
224 .val = pfn
225 };
226
227 /* Update p2m */
228 if (!early_set_phys_to_machine(pfn, mfn)) {
229 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
230 pfn, mfn);
231 return false;
210 } 232 }
211 return done; 233
234 /* Update m2p */
235 if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
236 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
237 mfn, pfn);
238 return false;
239 }
240
241 return true;
212} 242}
213 243
214static void __init xen_set_identity_and_release_chunk( 244/*
215 unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, 245 * This function updates the p2m and m2p tables with an identity map from
216 unsigned long *released, unsigned long *identity) 246 * start_pfn to start_pfn+size and remaps the underlying RAM of the original
247 * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
248 * to not exhaust the reserved brk space. Doing it in properly aligned blocks
249 * ensures we only allocate the minimum required leaf pages in the p2m table. It
250 * copies the existing mfns from the p2m table under the 1:1 map, overwrites
251 * them with the identity map and then updates the p2m and m2p tables with the
252 * remapped memory.
253 */
254static unsigned long __init xen_do_set_identity_and_remap_chunk(
255 unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
217{ 256{
218 unsigned long pfn; 257 unsigned long ident_pfn_iter, remap_pfn_iter;
258 unsigned long ident_start_pfn_align, remap_start_pfn_align;
259 unsigned long ident_end_pfn_align, remap_end_pfn_align;
260 unsigned long ident_boundary_pfn, remap_boundary_pfn;
261 unsigned long ident_cnt = 0;
262 unsigned long remap_cnt = 0;
263 unsigned long left = size;
264 unsigned long mod;
265 int i;
266
267 WARN_ON(size == 0);
268
269 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
219 270
220 /* 271 /*
221 * If the PFNs are currently mapped, clear the mappings 272 * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
222 * (except for the ISA region which must be 1:1 mapped) to 273 * blocks. We need to keep track of both the existing pfn mapping and
223 * release the refcounts (in Xen) on the original frames. 274 * the new pfn remapping.
224 */ 275 */
225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { 276 mod = start_pfn % P2M_PER_PAGE;
226 pte_t pte = __pte_ma(0); 277 ident_start_pfn_align =
278 mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
279 mod = remap_pfn % P2M_PER_PAGE;
280 remap_start_pfn_align =
281 mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
282 mod = (start_pfn + size) % P2M_PER_PAGE;
283 ident_end_pfn_align = start_pfn + size - mod;
284 mod = (remap_pfn + size) % P2M_PER_PAGE;
285 remap_end_pfn_align = remap_pfn + size - mod;
286
287 /* Iterate over each p2m leaf node in each range */
288 for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
289 ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
290 ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
291 /* Check we aren't past the end */
292 BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
293 BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
294
295 /* Save p2m mappings */
296 for (i = 0; i < P2M_PER_PAGE; i++)
297 xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
298
299 /* Set identity map which will free a p2m leaf */
300 ident_cnt += set_phys_range_identity(ident_pfn_iter,
301 ident_pfn_iter + P2M_PER_PAGE);
302
303#ifdef DEBUG
304 /* Helps verify a p2m leaf has been freed */
305 for (i = 0; i < P2M_PER_PAGE; i++) {
306 unsigned int pfn = ident_pfn_iter + i;
307 BUG_ON(pfn_to_mfn(pfn) != pfn);
308 }
309#endif
310 /* Now remap memory */
311 for (i = 0; i < P2M_PER_PAGE; i++) {
312 unsigned long mfn = xen_remap_buf[i];
313
314 /* This will use the p2m leaf freed above */
315 if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
316 WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
317 remap_pfn_iter + i, mfn);
318 return 0;
319 }
320
321 remap_cnt++;
322 }
227 323
228 if (pfn < PFN_UP(ISA_END_ADDRESS)) 324 left -= P2M_PER_PAGE;
229 pte = mfn_pte(pfn, PAGE_KERNEL_IO); 325 }
230 326
231 (void)HYPERVISOR_update_va_mapping( 327 /* Max boundary space possible */
232 (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); 328 BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
329
330 /* Now handle the boundary conditions */
331 ident_boundary_pfn = start_pfn;
332 remap_boundary_pfn = remap_pfn;
333 for (i = 0; i < left; i++) {
334 unsigned long mfn;
335
336 /* These two checks move from the start to end boundaries */
337 if (ident_boundary_pfn == ident_start_pfn_align)
338 ident_boundary_pfn = ident_pfn_iter;
339 if (remap_boundary_pfn == remap_start_pfn_align)
340 remap_boundary_pfn = remap_pfn_iter;
341
342 /* Check we aren't past the end */
343 BUG_ON(ident_boundary_pfn >= start_pfn + size);
344 BUG_ON(remap_boundary_pfn >= remap_pfn + size);
345
346 mfn = pfn_to_mfn(ident_boundary_pfn);
347
348 if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
349 WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
350 remap_pfn_iter + i, mfn);
351 return 0;
352 }
353 remap_cnt++;
354
355 ident_boundary_pfn++;
356 remap_boundary_pfn++;
233 } 357 }
234 358
235 if (start_pfn < nr_pages) 359 /* Finish up the identity map */
236 *released += xen_release_chunk( 360 if (ident_start_pfn_align >= ident_end_pfn_align) {
237 start_pfn, min(end_pfn, nr_pages)); 361 /*
362 * In this case we have an identity range which does not span an
363 * aligned block so everything needs to be identity mapped here.
364 * If we didn't check this we might remap too many pages since
365 * the align boundaries are not meaningful in this case.
366 */
367 ident_cnt += set_phys_range_identity(start_pfn,
368 start_pfn + size);
369 } else {
370 /* Remapped above so check each end of the chunk */
371 if (start_pfn < ident_start_pfn_align)
372 ident_cnt += set_phys_range_identity(start_pfn,
373 ident_start_pfn_align);
374 if (start_pfn + size > ident_pfn_iter)
375 ident_cnt += set_phys_range_identity(ident_pfn_iter,
376 start_pfn + size);
377 }
238 378
239 *identity += set_phys_range_identity(start_pfn, end_pfn); 379 BUG_ON(ident_cnt != size);
380 BUG_ON(remap_cnt != size);
381
382 return size;
240} 383}
241 384
242static unsigned long __init xen_set_identity_and_release( 385/*
243 const struct e820entry *list, size_t map_size, unsigned long nr_pages) 386 * This function takes a contiguous pfn range that needs to be identity mapped
387 * and:
388 *
389 * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
390 * 2) Calls the do_ function to actually do the mapping/remapping work.
391 *
392 * The goal is to not allocate additional memory but to remap the existing
393 * pages. In the case of an error the underlying memory is simply released back
394 * to Xen and not remapped.
395 */
396static unsigned long __init xen_set_identity_and_remap_chunk(
397 const struct e820entry *list, size_t map_size, unsigned long start_pfn,
398 unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
399 unsigned long *identity, unsigned long *remapped,
400 unsigned long *released)
401{
402 unsigned long pfn;
403 unsigned long i = 0;
404 unsigned long n = end_pfn - start_pfn;
405
406 while (i < n) {
407 unsigned long cur_pfn = start_pfn + i;
408 unsigned long left = n - i;
409 unsigned long size = left;
410 unsigned long remap_range_size;
411
412 /* Do not remap pages beyond the current allocation */
413 if (cur_pfn >= nr_pages) {
414 /* Identity map remaining pages */
415 *identity += set_phys_range_identity(cur_pfn,
416 cur_pfn + size);
417 break;
418 }
419 if (cur_pfn + size > nr_pages)
420 size = nr_pages - cur_pfn;
421
422 remap_range_size = xen_find_pfn_range(list, map_size,
423 &remap_pfn);
424 if (!remap_range_size) {
425 pr_warning("Unable to find available pfn range, not remapping identity pages\n");
426 xen_set_identity_and_release_chunk(cur_pfn,
427 cur_pfn + left, nr_pages, identity, released);
428 break;
429 }
430 /* Adjust size to fit in current e820 RAM region */
431 if (size > remap_range_size)
432 size = remap_range_size;
433
434 if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
435 WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
436 cur_pfn, size, remap_pfn);
437 xen_set_identity_and_release_chunk(cur_pfn,
438 cur_pfn + left, nr_pages, identity, released);
439 break;
440 }
441
442 /* Update variables to reflect new mappings. */
443 i += size;
444 remap_pfn += size;
445 *identity += size;
446 *remapped += size;
447 }
448
449 /*
450 * If the PFNs are currently mapped, the VA mapping also needs
451 * to be updated to be 1:1.
452 */
453 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
454 (void)HYPERVISOR_update_va_mapping(
455 (unsigned long)__va(pfn << PAGE_SHIFT),
456 mfn_pte(pfn, PAGE_KERNEL_IO), 0);
457
458 return remap_pfn;
459}
460
461static unsigned long __init xen_set_identity_and_remap(
462 const struct e820entry *list, size_t map_size, unsigned long nr_pages,
463 unsigned long *released)
244{ 464{
245 phys_addr_t start = 0; 465 phys_addr_t start = 0;
246 unsigned long released = 0;
247 unsigned long identity = 0; 466 unsigned long identity = 0;
467 unsigned long remapped = 0;
468 unsigned long last_pfn = nr_pages;
248 const struct e820entry *entry; 469 const struct e820entry *entry;
470 unsigned long num_released = 0;
249 int i; 471 int i;
250 472
251 /* 473 /*
252 * Combine non-RAM regions and gaps until a RAM region (or the 474 * Combine non-RAM regions and gaps until a RAM region (or the
253 * end of the map) is reached, then set the 1:1 map and 475 * end of the map) is reached, then set the 1:1 map and
254 * release the pages (if available) in those non-RAM regions. 476 * remap the memory in those non-RAM regions.
255 * 477 *
256 * The combined non-RAM regions are rounded to a whole number 478 * The combined non-RAM regions are rounded to a whole number
257 * of pages so any partial pages are accessible via the 1:1 479 * of pages so any partial pages are accessible via the 1:1
@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release(
269 end_pfn = PFN_UP(entry->addr); 491 end_pfn = PFN_UP(entry->addr);
270 492
271 if (start_pfn < end_pfn) 493 if (start_pfn < end_pfn)
272 xen_set_identity_and_release_chunk( 494 last_pfn = xen_set_identity_and_remap_chunk(
273 start_pfn, end_pfn, nr_pages, 495 list, map_size, start_pfn,
274 &released, &identity); 496 end_pfn, nr_pages, last_pfn,
275 497 &identity, &remapped,
498 &num_released);
276 start = end; 499 start = end;
277 } 500 }
278 } 501 }
279 502
280 if (released) 503 *released = num_released;
281 printk(KERN_INFO "Released %lu pages of unused memory\n", released);
282 if (identity)
283 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
284 504
285 return released; 505 pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
286} 506 pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
507 last_pfn);
508 pr_info("Released %ld page(s)\n", num_released);
287 509
510 return last_pfn;
511}
288static unsigned long __init xen_get_max_pages(void) 512static unsigned long __init xen_get_max_pages(void)
289{ 513{
290 unsigned long max_pages = MAX_DOMAIN_PAGES; 514 unsigned long max_pages = MAX_DOMAIN_PAGES;
@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void)
347 unsigned long max_pages; 571 unsigned long max_pages;
348 unsigned long last_pfn = 0; 572 unsigned long last_pfn = 0;
349 unsigned long extra_pages = 0; 573 unsigned long extra_pages = 0;
350 unsigned long populated;
351 int i; 574 int i;
352 int op; 575 int op;
353 576
@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void)
392 extra_pages += max_pages - max_pfn; 615 extra_pages += max_pages - max_pfn;
393 616
394 /* 617 /*
395 * Set P2M for all non-RAM pages and E820 gaps to be identity 618 * Set identity map on non-RAM pages and remap the underlying RAM.
396 * type PFNs. Any RAM pages that would be made inaccesible by
397 * this are first released.
398 */ 619 */
399 xen_released_pages = xen_set_identity_and_release( 620 last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
400 map, memmap.nr_entries, max_pfn); 621 &xen_released_pages);
401
402 /*
403 * Populate back the non-RAM pages and E820 gaps that had been
404 * released. */
405 populated = xen_populate_chunk(map, memmap.nr_entries,
406 max_pfn, &last_pfn, xen_released_pages);
407 622
408 xen_released_pages -= populated;
409 extra_pages += xen_released_pages; 623 extra_pages += xen_released_pages;
410 624
411 if (last_pfn > max_pfn) { 625 if (last_pfn > max_pfn) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7005974c3ff3..c670d7518cf4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -37,6 +37,7 @@
37#include <xen/hvc-console.h> 37#include <xen/hvc-console.h>
38#include "xen-ops.h" 38#include "xen-ops.h"
39#include "mmu.h" 39#include "mmu.h"
40#include "smp.h"
40 41
41cpumask_var_t xen_cpu_initialized_map; 42cpumask_var_t xen_cpu_initialized_map;
42 43
@@ -99,10 +100,14 @@ static void cpu_bringup(void)
99 wmb(); /* make sure everything is out */ 100 wmb(); /* make sure everything is out */
100} 101}
101 102
102/* Note: cpu parameter is only relevant for PVH */ 103/*
103static void cpu_bringup_and_idle(int cpu) 104 * Note: cpu parameter is only relevant for PVH. The reason for passing it
105 * is we can't do smp_processor_id until the percpu segments are loaded, for
106 * which we need the cpu number! So we pass it in rdi as first parameter.
107 */
108asmlinkage __visible void cpu_bringup_and_idle(int cpu)
104{ 109{
105#ifdef CONFIG_X86_64 110#ifdef CONFIG_XEN_PVH
106 if (xen_feature(XENFEAT_auto_translated_physmap) && 111 if (xen_feature(XENFEAT_auto_translated_physmap) &&
107 xen_feature(XENFEAT_supervisor_mode_kernel)) 112 xen_feature(XENFEAT_supervisor_mode_kernel))
108 xen_pvh_secondary_vcpu_init(cpu); 113 xen_pvh_secondary_vcpu_init(cpu);
@@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
374 ctxt->user_regs.fs = __KERNEL_PERCPU; 379 ctxt->user_regs.fs = __KERNEL_PERCPU;
375 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 380 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
376#endif 381#endif
377 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
378
379 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 382 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
380 383
381 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 384 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
385 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
382 ctxt->flags = VGCF_IN_KERNEL; 386 ctxt->flags = VGCF_IN_KERNEL;
383 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 387 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
384 ctxt->user_regs.ds = __USER_DS; 388 ctxt->user_regs.ds = __USER_DS;
@@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
413 (unsigned long)xen_failsafe_callback; 417 (unsigned long)xen_failsafe_callback;
414 ctxt->user_regs.cs = __KERNEL_CS; 418 ctxt->user_regs.cs = __KERNEL_CS;
415 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 419 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
416#ifdef CONFIG_X86_32
417 } 420 }
418#else 421#ifdef CONFIG_XEN_PVH
419 } else 422 else {
420 /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with 423 /*
421 * %rdi having the cpu number - which means are passing in 424 * The vcpu comes on kernel page tables which have the NX pte
422 * as the first parameter the cpu. Subtle! 425 * bit set. This means before DS/SS is touched, NX in
426 * EFER must be set. Hence the following assembly glue code.
423 */ 427 */
428 ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
424 ctxt->user_regs.rdi = cpu; 429 ctxt->user_regs.rdi = cpu;
430 ctxt->user_regs.rsi = true; /* entry == true */
431 }
425#endif 432#endif
426 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 433 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
427 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 434 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c7c2d89efd76..963d62a35c82 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector);
8extern void xen_send_IPI_all(int vector); 8extern void xen_send_IPI_all(int vector);
9extern void xen_send_IPI_self(int vector); 9extern void xen_send_IPI_self(int vector);
10 10
11#ifdef CONFIG_XEN_PVH
12extern void xen_pvh_early_cpu_init(int cpu, bool entry);
13#else
14static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
15{
16}
17#endif
18
11#endif 19#endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 485b69585540..674b222544b7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -47,6 +47,41 @@ ENTRY(startup_xen)
47 47
48 __FINIT 48 __FINIT
49 49
50#ifdef CONFIG_XEN_PVH
51/*
52 * xen_pvh_early_cpu_init() - early PVH VCPU initialization
53 * @cpu: this cpu number (%rdi)
54 * @entry: true if this is a secondary vcpu coming up on this entry
55 * point, false if this is the boot CPU being initialized for
56 * the first time (%rsi)
57 *
58 * Note: This is called as a function on the boot CPU, and is the entry point
59 * on the secondary CPU.
60 */
61ENTRY(xen_pvh_early_cpu_init)
62 mov %rsi, %r11
63
64 /* Gather features to see if NX implemented. */
65 mov $0x80000001, %eax
66 cpuid
67 mov %edx, %esi
68
69 mov $MSR_EFER, %ecx
70 rdmsr
71 bts $_EFER_SCE, %eax
72
73 bt $20, %esi
74 jnc 1f /* No NX, skip setting it */
75 bts $_EFER_NX, %eax
761: wrmsr
77#ifdef CONFIG_SMP
78 cmp $0, %r11b
79 jne cpu_bringup_and_idle
80#endif
81 ret
82
83#endif /* CONFIG_XEN_PVH */
84
50.pushsection .text 85.pushsection .text
51 .balign PAGE_SIZE 86 .balign PAGE_SIZE
52ENTRY(hypercall_page) 87ENTRY(hypercall_page)
@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6)
124 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, 159 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
125 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) 160 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
126 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) 161 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
162 ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1)
127 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) 163 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
128 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) 164 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
129 165
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 3a8b810b4980..0b13b1c9a01e 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -907,22 +907,17 @@ static int connect_ring(struct backend_info *be)
907 return 0; 907 return 0;
908} 908}
909 909
910
911/* ** Driver Registration ** */
912
913
914static const struct xenbus_device_id xen_blkbk_ids[] = { 910static const struct xenbus_device_id xen_blkbk_ids[] = {
915 { "vbd" }, 911 { "vbd" },
916 { "" } 912 { "" }
917}; 913};
918 914
919 915static struct xenbus_driver xen_blkbk_driver = {
920static DEFINE_XENBUS_DRIVER(xen_blkbk, , 916 .ids = xen_blkbk_ids,
921 .probe = xen_blkbk_probe, 917 .probe = xen_blkbk_probe,
922 .remove = xen_blkbk_remove, 918 .remove = xen_blkbk_remove,
923 .otherend_changed = frontend_changed 919 .otherend_changed = frontend_changed
924); 920};
925
926 921
927int xen_blkif_xenbus_init(void) 922int xen_blkif_xenbus_init(void)
928{ 923{
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5deb235bd18f..37af03e9d859 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2055,13 +2055,14 @@ static const struct xenbus_device_id blkfront_ids[] = {
2055 { "" } 2055 { "" }
2056}; 2056};
2057 2057
2058static DEFINE_XENBUS_DRIVER(blkfront, , 2058static struct xenbus_driver blkfront_driver = {
2059 .ids = blkfront_ids,
2059 .probe = blkfront_probe, 2060 .probe = blkfront_probe,
2060 .remove = blkfront_remove, 2061 .remove = blkfront_remove,
2061 .resume = blkfront_resume, 2062 .resume = blkfront_resume,
2062 .otherend_changed = blkback_changed, 2063 .otherend_changed = blkback_changed,
2063 .is_ready = blkfront_is_ready, 2064 .is_ready = blkfront_is_ready,
2064); 2065};
2065 2066
2066static int __init xlblk_init(void) 2067static int __init xlblk_init(void)
2067{ 2068{
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 2064b4527040..441b44e54226 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -367,12 +367,13 @@ static const struct xenbus_device_id tpmfront_ids[] = {
367}; 367};
368MODULE_ALIAS("xen:vtpm"); 368MODULE_ALIAS("xen:vtpm");
369 369
370static DEFINE_XENBUS_DRIVER(tpmfront, , 370static struct xenbus_driver tpmfront_driver = {
371 .probe = tpmfront_probe, 371 .ids = tpmfront_ids,
372 .remove = tpmfront_remove, 372 .probe = tpmfront_probe,
373 .resume = tpmfront_resume, 373 .remove = tpmfront_remove,
374 .otherend_changed = backend_changed, 374 .resume = tpmfront_resume,
375 ); 375 .otherend_changed = backend_changed,
376};
376 377
377static int __init xen_tpmfront_init(void) 378static int __init xen_tpmfront_init(void)
378{ 379{
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index fbfdc10573be..1af28b06c713 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -365,12 +365,13 @@ static const struct xenbus_device_id xenkbd_ids[] = {
365 { "" } 365 { "" }
366}; 366};
367 367
368static DEFINE_XENBUS_DRIVER(xenkbd, , 368static struct xenbus_driver xenkbd_driver = {
369 .ids = xenkbd_ids,
369 .probe = xenkbd_probe, 370 .probe = xenkbd_probe,
370 .remove = xenkbd_remove, 371 .remove = xenkbd_remove,
371 .resume = xenkbd_resume, 372 .resume = xenkbd_resume,
372 .otherend_changed = xenkbd_backend_changed, 373 .otherend_changed = xenkbd_backend_changed,
373); 374};
374 375
375static int __init xenkbd_init(void) 376static int __init xenkbd_init(void)
376{ 377{
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 9c47b897b6d2..8079c31ac5e6 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -937,22 +937,18 @@ static int read_xenbus_vif_flags(struct backend_info *be)
937 return 0; 937 return 0;
938} 938}
939 939
940
941/* ** Driver Registration ** */
942
943
944static const struct xenbus_device_id netback_ids[] = { 940static const struct xenbus_device_id netback_ids[] = {
945 { "vif" }, 941 { "vif" },
946 { "" } 942 { "" }
947}; 943};
948 944
949 945static struct xenbus_driver netback_driver = {
950static DEFINE_XENBUS_DRIVER(netback, , 946 .ids = netback_ids,
951 .probe = netback_probe, 947 .probe = netback_probe,
952 .remove = netback_remove, 948 .remove = netback_remove,
953 .uevent = netback_uevent, 949 .uevent = netback_uevent,
954 .otherend_changed = frontend_changed, 950 .otherend_changed = frontend_changed,
955); 951};
956 952
957int xenvif_xenbus_init(void) 953int xenvif_xenbus_init(void)
958{ 954{
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ca82f545ec2c..fa671442f420 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2300,12 +2300,6 @@ static void xennet_sysfs_delif(struct net_device *netdev)
2300 2300
2301#endif /* CONFIG_SYSFS */ 2301#endif /* CONFIG_SYSFS */
2302 2302
2303static const struct xenbus_device_id netfront_ids[] = {
2304 { "vif" },
2305 { "" }
2306};
2307
2308
2309static int xennet_remove(struct xenbus_device *dev) 2303static int xennet_remove(struct xenbus_device *dev)
2310{ 2304{
2311 struct netfront_info *info = dev_get_drvdata(&dev->dev); 2305 struct netfront_info *info = dev_get_drvdata(&dev->dev);
@@ -2338,12 +2332,18 @@ static int xennet_remove(struct xenbus_device *dev)
2338 return 0; 2332 return 0;
2339} 2333}
2340 2334
2341static DEFINE_XENBUS_DRIVER(netfront, , 2335static const struct xenbus_device_id netfront_ids[] = {
2336 { "vif" },
2337 { "" }
2338};
2339
2340static struct xenbus_driver netfront_driver = {
2341 .ids = netfront_ids,
2342 .probe = netfront_probe, 2342 .probe = netfront_probe,
2343 .remove = xennet_remove, 2343 .remove = xennet_remove,
2344 .resume = netfront_resume, 2344 .resume = netfront_resume,
2345 .otherend_changed = netback_changed, 2345 .otherend_changed = netback_changed,
2346); 2346};
2347 2347
2348static int __init netif_init(void) 2348static int __init netif_init(void)
2349{ 2349{
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 53df39a22c8a..116ca3746adb 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -1136,11 +1136,13 @@ static const struct xenbus_device_id xenpci_ids[] = {
1136 {""}, 1136 {""},
1137}; 1137};
1138 1138
1139static DEFINE_XENBUS_DRIVER(xenpci, "pcifront", 1139static struct xenbus_driver xenpci_driver = {
1140 .name = "pcifront",
1141 .ids = xenpci_ids,
1140 .probe = pcifront_xenbus_probe, 1142 .probe = pcifront_xenbus_probe,
1141 .remove = pcifront_xenbus_remove, 1143 .remove = pcifront_xenbus_remove,
1142 .otherend_changed = pcifront_backend_changed, 1144 .otherend_changed = pcifront_backend_changed,
1143); 1145};
1144 1146
1145static int __init pcifront_init(void) 1147static int __init pcifront_init(void)
1146{ 1148{
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index e85e64a07d02..296619b7426c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -587,6 +587,16 @@ config VMWARE_PVSCSI
587 To compile this driver as a module, choose M here: the 587 To compile this driver as a module, choose M here: the
588 module will be called vmw_pvscsi. 588 module will be called vmw_pvscsi.
589 589
590config XEN_SCSI_FRONTEND
591 tristate "XEN SCSI frontend driver"
592 depends on SCSI && XEN
593 select XEN_XENBUS_FRONTEND
594 help
595 The XEN SCSI frontend driver allows the kernel to access SCSI Devices
596 within another guest OS (usually Dom0).
597 Only needed if the kernel is running in a XEN guest and generic
598 SCSI access to a device is needed.
599
590config HYPERV_STORAGE 600config HYPERV_STORAGE
591 tristate "Microsoft Hyper-V virtual storage driver" 601 tristate "Microsoft Hyper-V virtual storage driver"
592 depends on SCSI && HYPERV 602 depends on SCSI && HYPERV
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 5f0d299b0093..59f1ce6df2d6 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_ESAS2R) += esas2r/
141obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o 141obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
142obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o 142obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o
143obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o 143obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
144obj-$(CONFIG_XEN_SCSI_FRONTEND) += xen-scsifront.o
144obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o 145obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o
145 146
146obj-$(CONFIG_ARM) += arm/ 147obj-$(CONFIG_ARM) += arm/
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
new file mode 100644
index 000000000000..34199d206ba6
--- /dev/null
+++ b/drivers/scsi/xen-scsifront.c
@@ -0,0 +1,1026 @@
1/*
2 * Xen SCSI frontend driver
3 *
4 * Copyright (c) 2008, FUJITSU Limited
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version 2
8 * as published by the Free Software Foundation; or, when distributed
9 * separately from the Linux kernel or incorporated into other
10 * software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31#include <linux/module.h>
32#include <linux/kernel.h>
33#include <linux/device.h>
34#include <linux/wait.h>
35#include <linux/interrupt.h>
36#include <linux/mutex.h>
37#include <linux/spinlock.h>
38#include <linux/sched.h>
39#include <linux/blkdev.h>
40#include <linux/pfn.h>
41#include <linux/slab.h>
42#include <linux/bitops.h>
43
44#include <scsi/scsi_cmnd.h>
45#include <scsi/scsi_device.h>
46#include <scsi/scsi.h>
47#include <scsi/scsi_host.h>
48
49#include <xen/xen.h>
50#include <xen/xenbus.h>
51#include <xen/grant_table.h>
52#include <xen/events.h>
53#include <xen/page.h>
54
55#include <xen/interface/grant_table.h>
56#include <xen/interface/io/vscsiif.h>
57#include <xen/interface/io/protocols.h>
58
59#include <asm/xen/hypervisor.h>
60
61
62#define GRANT_INVALID_REF 0
63
64#define VSCSIFRONT_OP_ADD_LUN 1
65#define VSCSIFRONT_OP_DEL_LUN 2
66
67/* Tuning point. */
68#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
69#define VSCSIIF_MAX_TARGET 64
70#define VSCSIIF_MAX_LUN 255
71
72#define VSCSIIF_RING_SIZE __CONST_RING_SIZE(vscsiif, PAGE_SIZE)
73#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
74
75#define vscsiif_grants_sg(_sg) (PFN_UP((_sg) * \
76 sizeof(struct scsiif_request_segment)))
77
78struct vscsifrnt_shadow {
79 /* command between backend and frontend */
80 unsigned char act;
81 uint16_t rqid;
82
83 unsigned int nr_grants; /* number of grants in gref[] */
84 struct scsiif_request_segment *sg; /* scatter/gather elements */
85
86 /* Do reset or abort function. */
87 wait_queue_head_t wq_reset; /* reset work queue */
88 int wait_reset; /* reset work queue condition */
89 int32_t rslt_reset; /* reset response status: */
90 /* SUCCESS or FAILED or: */
91#define RSLT_RESET_WAITING 0
92#define RSLT_RESET_ERR -1
93
94 /* Requested struct scsi_cmnd is stored from kernel. */
95 struct scsi_cmnd *sc;
96 int gref[vscsiif_grants_sg(SG_ALL) + SG_ALL];
97};
98
99struct vscsifrnt_info {
100 struct xenbus_device *dev;
101
102 struct Scsi_Host *host;
103 int host_active;
104
105 unsigned int evtchn;
106 unsigned int irq;
107
108 grant_ref_t ring_ref;
109 struct vscsiif_front_ring ring;
110 struct vscsiif_response ring_rsp;
111
112 spinlock_t shadow_lock;
113 DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
114 struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
115
116 wait_queue_head_t wq_sync;
117 unsigned int wait_ring_available:1;
118
119 char dev_state_path[64];
120 struct task_struct *curr;
121};
122
123static DEFINE_MUTEX(scsifront_mutex);
124
125static void scsifront_wake_up(struct vscsifrnt_info *info)
126{
127 info->wait_ring_available = 0;
128 wake_up(&info->wq_sync);
129}
130
131static int scsifront_get_rqid(struct vscsifrnt_info *info)
132{
133 unsigned long flags;
134 int free;
135
136 spin_lock_irqsave(&info->shadow_lock, flags);
137
138 free = find_first_bit(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
139 __clear_bit(free, info->shadow_free_bitmap);
140
141 spin_unlock_irqrestore(&info->shadow_lock, flags);
142
143 return free;
144}
145
146static int _scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
147{
148 int empty = bitmap_empty(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
149
150 __set_bit(id, info->shadow_free_bitmap);
151 info->shadow[id] = NULL;
152
153 return empty || info->wait_ring_available;
154}
155
156static void scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
157{
158 unsigned long flags;
159 int kick;
160
161 spin_lock_irqsave(&info->shadow_lock, flags);
162 kick = _scsifront_put_rqid(info, id);
163 spin_unlock_irqrestore(&info->shadow_lock, flags);
164
165 if (kick)
166 scsifront_wake_up(info);
167}
168
169static struct vscsiif_request *scsifront_pre_req(struct vscsifrnt_info *info)
170{
171 struct vscsiif_front_ring *ring = &(info->ring);
172 struct vscsiif_request *ring_req;
173 uint32_t id;
174
175 id = scsifront_get_rqid(info); /* use id in response */
176 if (id >= VSCSIIF_MAX_REQS)
177 return NULL;
178
179 ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
180
181 ring->req_prod_pvt++;
182
183 ring_req->rqid = (uint16_t)id;
184
185 return ring_req;
186}
187
188static void scsifront_do_request(struct vscsifrnt_info *info)
189{
190 struct vscsiif_front_ring *ring = &(info->ring);
191 int notify;
192
193 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
194 if (notify)
195 notify_remote_via_irq(info->irq);
196}
197
198static void scsifront_gnttab_done(struct vscsifrnt_info *info, uint32_t id)
199{
200 struct vscsifrnt_shadow *s = info->shadow[id];
201 int i;
202
203 if (s->sc->sc_data_direction == DMA_NONE)
204 return;
205
206 for (i = 0; i < s->nr_grants; i++) {
207 if (unlikely(gnttab_query_foreign_access(s->gref[i]) != 0)) {
208 shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
209 "grant still in use by backend\n");
210 BUG();
211 }
212 gnttab_end_foreign_access(s->gref[i], 0, 0UL);
213 }
214
215 kfree(s->sg);
216}
217
218static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
219 struct vscsiif_response *ring_rsp)
220{
221 struct scsi_cmnd *sc;
222 uint32_t id;
223 uint8_t sense_len;
224
225 id = ring_rsp->rqid;
226 sc = info->shadow[id]->sc;
227
228 BUG_ON(sc == NULL);
229
230 scsifront_gnttab_done(info, id);
231 scsifront_put_rqid(info, id);
232
233 sc->result = ring_rsp->rslt;
234 scsi_set_resid(sc, ring_rsp->residual_len);
235
236 sense_len = min_t(uint8_t, VSCSIIF_SENSE_BUFFERSIZE,
237 ring_rsp->sense_len);
238
239 if (sense_len)
240 memcpy(sc->sense_buffer, ring_rsp->sense_buffer, sense_len);
241
242 sc->scsi_done(sc);
243}
244
245static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
246 struct vscsiif_response *ring_rsp)
247{
248 uint16_t id = ring_rsp->rqid;
249 unsigned long flags;
250 struct vscsifrnt_shadow *shadow = info->shadow[id];
251 int kick;
252
253 spin_lock_irqsave(&info->shadow_lock, flags);
254 shadow->wait_reset = 1;
255 switch (shadow->rslt_reset) {
256 case RSLT_RESET_WAITING:
257 shadow->rslt_reset = ring_rsp->rslt;
258 break;
259 case RSLT_RESET_ERR:
260 kick = _scsifront_put_rqid(info, id);
261 spin_unlock_irqrestore(&info->shadow_lock, flags);
262 kfree(shadow);
263 if (kick)
264 scsifront_wake_up(info);
265 return;
266 default:
267 shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
268 "bad reset state %d, possibly leaking %u\n",
269 shadow->rslt_reset, id);
270 break;
271 }
272 spin_unlock_irqrestore(&info->shadow_lock, flags);
273
274 wake_up(&shadow->wq_reset);
275}
276
277static int scsifront_cmd_done(struct vscsifrnt_info *info)
278{
279 struct vscsiif_response *ring_rsp;
280 RING_IDX i, rp;
281 int more_to_do = 0;
282 unsigned long flags;
283
284 spin_lock_irqsave(info->host->host_lock, flags);
285
286 rp = info->ring.sring->rsp_prod;
287 rmb(); /* ordering required respective to dom0 */
288 for (i = info->ring.rsp_cons; i != rp; i++) {
289
290 ring_rsp = RING_GET_RESPONSE(&info->ring, i);
291
292 if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
293 test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
294 "illegal rqid %u returned by backend!\n",
295 ring_rsp->rqid))
296 continue;
297
298 if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
299 scsifront_cdb_cmd_done(info, ring_rsp);
300 else
301 scsifront_sync_cmd_done(info, ring_rsp);
302 }
303
304 info->ring.rsp_cons = i;
305
306 if (i != info->ring.req_prod_pvt)
307 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
308 else
309 info->ring.sring->rsp_event = i + 1;
310
311 info->wait_ring_available = 0;
312
313 spin_unlock_irqrestore(info->host->host_lock, flags);
314
315 wake_up(&info->wq_sync);
316
317 return more_to_do;
318}
319
320static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
321{
322 struct vscsifrnt_info *info = dev_id;
323
324 while (scsifront_cmd_done(info))
325 /* Yield point for this unbounded loop. */
326 cond_resched();
327
328 return IRQ_HANDLED;
329}
330
331static int map_data_for_request(struct vscsifrnt_info *info,
332 struct scsi_cmnd *sc,
333 struct vscsiif_request *ring_req,
334 struct vscsifrnt_shadow *shadow)
335{
336 grant_ref_t gref_head;
337 struct page *page;
338 int err, ref, ref_cnt = 0;
339 int grant_ro = (sc->sc_data_direction == DMA_TO_DEVICE);
340 unsigned int i, off, len, bytes;
341 unsigned int data_len = scsi_bufflen(sc);
342 unsigned int data_grants = 0, seg_grants = 0;
343 struct scatterlist *sg;
344 unsigned long mfn;
345 struct scsiif_request_segment *seg;
346
347 ring_req->nr_segments = 0;
348 if (sc->sc_data_direction == DMA_NONE || !data_len)
349 return 0;
350
351 scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i)
352 data_grants += PFN_UP(sg->offset + sg->length);
353
354 if (data_grants > VSCSIIF_SG_TABLESIZE) {
355 if (data_grants > info->host->sg_tablesize) {
356 shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
357 "Unable to map request_buffer for command!\n");
358 return -E2BIG;
359 }
360 seg_grants = vscsiif_grants_sg(data_grants);
361 shadow->sg = kcalloc(data_grants,
362 sizeof(struct scsiif_request_segment), GFP_ATOMIC);
363 if (!shadow->sg)
364 return -ENOMEM;
365 }
366 seg = shadow->sg ? : ring_req->seg;
367
368 err = gnttab_alloc_grant_references(seg_grants + data_grants,
369 &gref_head);
370 if (err) {
371 kfree(shadow->sg);
372 shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
373 "gnttab_alloc_grant_references() error\n");
374 return -ENOMEM;
375 }
376
377 if (seg_grants) {
378 page = virt_to_page(seg);
379 off = (unsigned long)seg & ~PAGE_MASK;
380 len = sizeof(struct scsiif_request_segment) * data_grants;
381 while (len > 0) {
382 bytes = min_t(unsigned int, len, PAGE_SIZE - off);
383
384 ref = gnttab_claim_grant_reference(&gref_head);
385 BUG_ON(ref == -ENOSPC);
386
387 mfn = pfn_to_mfn(page_to_pfn(page));
388 gnttab_grant_foreign_access_ref(ref,
389 info->dev->otherend_id, mfn, 1);
390 shadow->gref[ref_cnt] = ref;
391 ring_req->seg[ref_cnt].gref = ref;
392 ring_req->seg[ref_cnt].offset = (uint16_t)off;
393 ring_req->seg[ref_cnt].length = (uint16_t)bytes;
394
395 page++;
396 len -= bytes;
397 off = 0;
398 ref_cnt++;
399 }
400 BUG_ON(seg_grants < ref_cnt);
401 seg_grants = ref_cnt;
402 }
403
404 scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) {
405 page = sg_page(sg);
406 off = sg->offset;
407 len = sg->length;
408
409 while (len > 0 && data_len > 0) {
410 /*
411 * sg sends a scatterlist that is larger than
412 * the data_len it wants transferred for certain
413 * IO sizes.
414 */
415 bytes = min_t(unsigned int, len, PAGE_SIZE - off);
416 bytes = min(bytes, data_len);
417
418 ref = gnttab_claim_grant_reference(&gref_head);
419 BUG_ON(ref == -ENOSPC);
420
421 mfn = pfn_to_mfn(page_to_pfn(page));
422 gnttab_grant_foreign_access_ref(ref,
423 info->dev->otherend_id, mfn, grant_ro);
424
425 shadow->gref[ref_cnt] = ref;
426 seg->gref = ref;
427 seg->offset = (uint16_t)off;
428 seg->length = (uint16_t)bytes;
429
430 page++;
431 seg++;
432 len -= bytes;
433 data_len -= bytes;
434 off = 0;
435 ref_cnt++;
436 }
437 }
438
439 if (seg_grants)
440 ring_req->nr_segments = VSCSIIF_SG_GRANT | seg_grants;
441 else
442 ring_req->nr_segments = (uint8_t)ref_cnt;
443 shadow->nr_grants = ref_cnt;
444
445 return 0;
446}
447
448static struct vscsiif_request *scsifront_command2ring(
449 struct vscsifrnt_info *info, struct scsi_cmnd *sc,
450 struct vscsifrnt_shadow *shadow)
451{
452 struct vscsiif_request *ring_req;
453
454 memset(shadow, 0, sizeof(*shadow));
455
456 ring_req = scsifront_pre_req(info);
457 if (!ring_req)
458 return NULL;
459
460 info->shadow[ring_req->rqid] = shadow;
461 shadow->rqid = ring_req->rqid;
462
463 ring_req->id = sc->device->id;
464 ring_req->lun = sc->device->lun;
465 ring_req->channel = sc->device->channel;
466 ring_req->cmd_len = sc->cmd_len;
467
468 BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
469
470 memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
471
472 ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
473 ring_req->timeout_per_command = sc->request->timeout / HZ;
474
475 return ring_req;
476}
477
478static int scsifront_queuecommand(struct Scsi_Host *shost,
479 struct scsi_cmnd *sc)
480{
481 struct vscsifrnt_info *info = shost_priv(shost);
482 struct vscsiif_request *ring_req;
483 struct vscsifrnt_shadow *shadow = scsi_cmd_priv(sc);
484 unsigned long flags;
485 int err;
486 uint16_t rqid;
487
488 spin_lock_irqsave(shost->host_lock, flags);
489 if (RING_FULL(&info->ring))
490 goto busy;
491
492 ring_req = scsifront_command2ring(info, sc, shadow);
493 if (!ring_req)
494 goto busy;
495
496 sc->result = 0;
497
498 rqid = ring_req->rqid;
499 ring_req->act = VSCSIIF_ACT_SCSI_CDB;
500
501 shadow->sc = sc;
502 shadow->act = VSCSIIF_ACT_SCSI_CDB;
503
504 err = map_data_for_request(info, sc, ring_req, shadow);
505 if (err < 0) {
506 pr_debug("%s: err %d\n", __func__, err);
507 scsifront_put_rqid(info, rqid);
508 spin_unlock_irqrestore(shost->host_lock, flags);
509 if (err == -ENOMEM)
510 return SCSI_MLQUEUE_HOST_BUSY;
511 sc->result = DID_ERROR << 16;
512 sc->scsi_done(sc);
513 return 0;
514 }
515
516 scsifront_do_request(info);
517 spin_unlock_irqrestore(shost->host_lock, flags);
518
519 return 0;
520
521busy:
522 spin_unlock_irqrestore(shost->host_lock, flags);
523 pr_debug("%s: busy\n", __func__);
524 return SCSI_MLQUEUE_HOST_BUSY;
525}
526
527/*
528 * Any exception handling (reset or abort) must be forwarded to the backend.
529 * We have to wait until an answer is returned. This answer contains the
530 * result to be returned to the requestor.
531 */
532static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
533{
534 struct Scsi_Host *host = sc->device->host;
535 struct vscsifrnt_info *info = shost_priv(host);
536 struct vscsifrnt_shadow *shadow, *s = scsi_cmd_priv(sc);
537 struct vscsiif_request *ring_req;
538 int err = 0;
539
540 shadow = kmalloc(sizeof(*shadow), GFP_NOIO);
541 if (!shadow)
542 return FAILED;
543
544 spin_lock_irq(host->host_lock);
545
546 for (;;) {
547 if (!RING_FULL(&info->ring)) {
548 ring_req = scsifront_command2ring(info, sc, shadow);
549 if (ring_req)
550 break;
551 }
552 if (err) {
553 spin_unlock_irq(host->host_lock);
554 kfree(shadow);
555 return FAILED;
556 }
557 info->wait_ring_available = 1;
558 spin_unlock_irq(host->host_lock);
559 err = wait_event_interruptible(info->wq_sync,
560 !info->wait_ring_available);
561 spin_lock_irq(host->host_lock);
562 }
563
564 ring_req->act = act;
565 ring_req->ref_rqid = s->rqid;
566
567 shadow->act = act;
568 shadow->rslt_reset = RSLT_RESET_WAITING;
569 init_waitqueue_head(&shadow->wq_reset);
570
571 ring_req->nr_segments = 0;
572
573 scsifront_do_request(info);
574
575 spin_unlock_irq(host->host_lock);
576 err = wait_event_interruptible(shadow->wq_reset, shadow->wait_reset);
577 spin_lock_irq(host->host_lock);
578
579 if (!err) {
580 err = shadow->rslt_reset;
581 scsifront_put_rqid(info, shadow->rqid);
582 kfree(shadow);
583 } else {
584 spin_lock(&info->shadow_lock);
585 shadow->rslt_reset = RSLT_RESET_ERR;
586 spin_unlock(&info->shadow_lock);
587 err = FAILED;
588 }
589
590 spin_unlock_irq(host->host_lock);
591 return err;
592}
593
594static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
595{
596 pr_debug("%s\n", __func__);
597 return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_ABORT);
598}
599
600static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
601{
602 pr_debug("%s\n", __func__);
603 return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_RESET);
604}
605
606static int scsifront_sdev_configure(struct scsi_device *sdev)
607{
608 struct vscsifrnt_info *info = shost_priv(sdev->host);
609
610 if (info && current == info->curr)
611 xenbus_printf(XBT_NIL, info->dev->nodename,
612 info->dev_state_path, "%d", XenbusStateConnected);
613
614 return 0;
615}
616
617static void scsifront_sdev_destroy(struct scsi_device *sdev)
618{
619 struct vscsifrnt_info *info = shost_priv(sdev->host);
620
621 if (info && current == info->curr)
622 xenbus_printf(XBT_NIL, info->dev->nodename,
623 info->dev_state_path, "%d", XenbusStateClosed);
624}
625
626static struct scsi_host_template scsifront_sht = {
627 .module = THIS_MODULE,
628 .name = "Xen SCSI frontend driver",
629 .queuecommand = scsifront_queuecommand,
630 .eh_abort_handler = scsifront_eh_abort_handler,
631 .eh_device_reset_handler = scsifront_dev_reset_handler,
632 .slave_configure = scsifront_sdev_configure,
633 .slave_destroy = scsifront_sdev_destroy,
634 .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
635 .can_queue = VSCSIIF_MAX_REQS,
636 .this_id = -1,
637 .cmd_size = sizeof(struct vscsifrnt_shadow),
638 .sg_tablesize = VSCSIIF_SG_TABLESIZE,
639 .use_clustering = DISABLE_CLUSTERING,
640 .proc_name = "scsifront",
641};
642
643static int scsifront_alloc_ring(struct vscsifrnt_info *info)
644{
645 struct xenbus_device *dev = info->dev;
646 struct vscsiif_sring *sring;
647 int err = -ENOMEM;
648
649 /***** Frontend to Backend ring start *****/
650 sring = (struct vscsiif_sring *)__get_free_page(GFP_KERNEL);
651 if (!sring) {
652 xenbus_dev_fatal(dev, err,
653 "fail to allocate shared ring (Front to Back)");
654 return err;
655 }
656 SHARED_RING_INIT(sring);
657 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
658
659 err = xenbus_grant_ring(dev, virt_to_mfn(sring));
660 if (err < 0) {
661 free_page((unsigned long)sring);
662 xenbus_dev_fatal(dev, err,
663 "fail to grant shared ring (Front to Back)");
664 return err;
665 }
666 info->ring_ref = err;
667
668 err = xenbus_alloc_evtchn(dev, &info->evtchn);
669 if (err) {
670 xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
671 goto free_gnttab;
672 }
673
674 err = bind_evtchn_to_irq(info->evtchn);
675 if (err <= 0) {
676 xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
677 goto free_gnttab;
678 }
679
680 info->irq = err;
681
682 err = request_threaded_irq(info->irq, NULL, scsifront_irq_fn,
683 IRQF_ONESHOT, "scsifront", info);
684 if (err) {
685 xenbus_dev_fatal(dev, err, "request_threaded_irq");
686 goto free_irq;
687 }
688
689 return 0;
690
691/* free resource */
692free_irq:
693 unbind_from_irqhandler(info->irq, info);
694free_gnttab:
695 gnttab_end_foreign_access(info->ring_ref, 0,
696 (unsigned long)info->ring.sring);
697
698 return err;
699}
700
701static int scsifront_init_ring(struct vscsifrnt_info *info)
702{
703 struct xenbus_device *dev = info->dev;
704 struct xenbus_transaction xbt;
705 int err;
706
707 pr_debug("%s\n", __func__);
708
709 err = scsifront_alloc_ring(info);
710 if (err)
711 return err;
712 pr_debug("%s: %u %u\n", __func__, info->ring_ref, info->evtchn);
713
714again:
715 err = xenbus_transaction_start(&xbt);
716 if (err)
717 xenbus_dev_fatal(dev, err, "starting transaction");
718
719 err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
720 info->ring_ref);
721 if (err) {
722 xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
723 goto fail;
724 }
725
726 err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
727 info->evtchn);
728
729 if (err) {
730 xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
731 goto fail;
732 }
733
734 err = xenbus_transaction_end(xbt, 0);
735 if (err) {
736 if (err == -EAGAIN)
737 goto again;
738 xenbus_dev_fatal(dev, err, "completing transaction");
739 goto free_sring;
740 }
741
742 return 0;
743
744fail:
745 xenbus_transaction_end(xbt, 1);
746free_sring:
747 unbind_from_irqhandler(info->irq, info);
748 gnttab_end_foreign_access(info->ring_ref, 0,
749 (unsigned long)info->ring.sring);
750
751 return err;
752}
753
754
755static int scsifront_probe(struct xenbus_device *dev,
756 const struct xenbus_device_id *id)
757{
758 struct vscsifrnt_info *info;
759 struct Scsi_Host *host;
760 int err = -ENOMEM;
761 char name[TASK_COMM_LEN];
762
763 host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
764 if (!host) {
765 xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
766 return err;
767 }
768 info = (struct vscsifrnt_info *)host->hostdata;
769
770 dev_set_drvdata(&dev->dev, info);
771 info->dev = dev;
772
773 bitmap_fill(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
774
775 err = scsifront_init_ring(info);
776 if (err) {
777 scsi_host_put(host);
778 return err;
779 }
780
781 init_waitqueue_head(&info->wq_sync);
782 spin_lock_init(&info->shadow_lock);
783
784 snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
785
786 host->max_id = VSCSIIF_MAX_TARGET;
787 host->max_channel = 0;
788 host->max_lun = VSCSIIF_MAX_LUN;
789 host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
790 host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
791
792 err = scsi_add_host(host, &dev->dev);
793 if (err) {
794 dev_err(&dev->dev, "fail to add scsi host %d\n", err);
795 goto free_sring;
796 }
797 info->host = host;
798 info->host_active = 1;
799
800 xenbus_switch_state(dev, XenbusStateInitialised);
801
802 return 0;
803
804free_sring:
805 unbind_from_irqhandler(info->irq, info);
806 gnttab_end_foreign_access(info->ring_ref, 0,
807 (unsigned long)info->ring.sring);
808 scsi_host_put(host);
809 return err;
810}
811
812static int scsifront_remove(struct xenbus_device *dev)
813{
814 struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
815
816 pr_debug("%s: %s removed\n", __func__, dev->nodename);
817
818 mutex_lock(&scsifront_mutex);
819 if (info->host_active) {
820 /* Scsi_host not yet removed */
821 scsi_remove_host(info->host);
822 info->host_active = 0;
823 }
824 mutex_unlock(&scsifront_mutex);
825
826 gnttab_end_foreign_access(info->ring_ref, 0,
827 (unsigned long)info->ring.sring);
828 unbind_from_irqhandler(info->irq, info);
829
830 scsi_host_put(info->host);
831
832 return 0;
833}
834
835static void scsifront_disconnect(struct vscsifrnt_info *info)
836{
837 struct xenbus_device *dev = info->dev;
838 struct Scsi_Host *host = info->host;
839
840 pr_debug("%s: %s disconnect\n", __func__, dev->nodename);
841
842 /*
843 * When this function is executed, all devices of
844 * Frontend have been deleted.
845 * Therefore, it need not block I/O before remove_host.
846 */
847
848 mutex_lock(&scsifront_mutex);
849 if (info->host_active) {
850 scsi_remove_host(host);
851 info->host_active = 0;
852 }
853 mutex_unlock(&scsifront_mutex);
854
855 xenbus_frontend_closed(dev);
856}
857
858static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
859{
860 struct xenbus_device *dev = info->dev;
861 int i, err = 0;
862 char str[64];
863 char **dir;
864 unsigned int dir_n = 0;
865 unsigned int device_state;
866 unsigned int hst, chn, tgt, lun;
867 struct scsi_device *sdev;
868
869 dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
870 if (IS_ERR(dir))
871 return;
872
873 /* mark current task as the one allowed to modify device states */
874 BUG_ON(info->curr);
875 info->curr = current;
876
877 for (i = 0; i < dir_n; i++) {
878 /* read status */
879 snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
880 err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
881 &device_state);
882 if (XENBUS_EXIST_ERR(err))
883 continue;
884
885 /* virtual SCSI device */
886 snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
887 err = xenbus_scanf(XBT_NIL, dev->otherend, str,
888 "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
889 if (XENBUS_EXIST_ERR(err))
890 continue;
891
892 /*
893 * Front device state path, used in slave_configure called
894 * on successfull scsi_add_device, and in slave_destroy called
895 * on remove of a device.
896 */
897 snprintf(info->dev_state_path, sizeof(info->dev_state_path),
898 "vscsi-devs/%s/state", dir[i]);
899
900 switch (op) {
901 case VSCSIFRONT_OP_ADD_LUN:
902 if (device_state != XenbusStateInitialised)
903 break;
904
905 if (scsi_add_device(info->host, chn, tgt, lun)) {
906 dev_err(&dev->dev, "scsi_add_device\n");
907 xenbus_printf(XBT_NIL, dev->nodename,
908 info->dev_state_path,
909 "%d", XenbusStateClosed);
910 }
911 break;
912 case VSCSIFRONT_OP_DEL_LUN:
913 if (device_state != XenbusStateClosing)
914 break;
915
916 sdev = scsi_device_lookup(info->host, chn, tgt, lun);
917 if (sdev) {
918 scsi_remove_device(sdev);
919 scsi_device_put(sdev);
920 }
921 break;
922 default:
923 break;
924 }
925 }
926
927 info->curr = NULL;
928
929 kfree(dir);
930}
931
932static void scsifront_read_backend_params(struct xenbus_device *dev,
933 struct vscsifrnt_info *info)
934{
935 unsigned int sg_grant;
936 int ret;
937 struct Scsi_Host *host = info->host;
938
939 ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
940 &sg_grant);
941 if (ret == 1 && sg_grant) {
942 sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
943 sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
944 host->sg_tablesize = min_t(unsigned int, sg_grant,
945 VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
946 sizeof(struct scsiif_request_segment));
947 host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
948 }
949 dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
950}
951
952static void scsifront_backend_changed(struct xenbus_device *dev,
953 enum xenbus_state backend_state)
954{
955 struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
956
957 pr_debug("%s: %p %u %u\n", __func__, dev, dev->state, backend_state);
958
959 switch (backend_state) {
960 case XenbusStateUnknown:
961 case XenbusStateInitialising:
962 case XenbusStateInitWait:
963 case XenbusStateInitialised:
964 break;
965
966 case XenbusStateConnected:
967 scsifront_read_backend_params(dev, info);
968 if (xenbus_read_driver_state(dev->nodename) ==
969 XenbusStateInitialised)
970 scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
971
972 if (dev->state != XenbusStateConnected)
973 xenbus_switch_state(dev, XenbusStateConnected);
974 break;
975
976 case XenbusStateClosed:
977 if (dev->state == XenbusStateClosed)
978 break;
979 /* Missed the backend's Closing state -- fallthrough */
980 case XenbusStateClosing:
981 scsifront_disconnect(info);
982 break;
983
984 case XenbusStateReconfiguring:
985 scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
986 xenbus_switch_state(dev, XenbusStateReconfiguring);
987 break;
988
989 case XenbusStateReconfigured:
990 scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
991 xenbus_switch_state(dev, XenbusStateConnected);
992 break;
993 }
994}
995
996static const struct xenbus_device_id scsifront_ids[] = {
997 { "vscsi" },
998 { "" }
999};
1000
1001static struct xenbus_driver scsifront_driver = {
1002 .ids = scsifront_ids,
1003 .probe = scsifront_probe,
1004 .remove = scsifront_remove,
1005 .otherend_changed = scsifront_backend_changed,
1006};
1007
1008static int __init scsifront_init(void)
1009{
1010 if (!xen_domain())
1011 return -ENODEV;
1012
1013 return xenbus_register_frontend(&scsifront_driver);
1014}
1015module_init(scsifront_init);
1016
1017static void __exit scsifront_exit(void)
1018{
1019 xenbus_unregister_driver(&scsifront_driver);
1020}
1021module_exit(scsifront_exit);
1022
1023MODULE_DESCRIPTION("Xen SCSI frontend driver");
1024MODULE_LICENSE("GPL");
1025MODULE_ALIAS("xen:vscsi");
1026MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 2967f0388d2c..f1e57425e39f 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -347,8 +347,6 @@ static int xen_console_remove(struct xencons_info *info)
347} 347}
348 348
349#ifdef CONFIG_HVC_XEN_FRONTEND 349#ifdef CONFIG_HVC_XEN_FRONTEND
350static struct xenbus_driver xencons_driver;
351
352static int xencons_remove(struct xenbus_device *dev) 350static int xencons_remove(struct xenbus_device *dev)
353{ 351{
354 return xen_console_remove(dev_get_drvdata(&dev->dev)); 352 return xen_console_remove(dev_get_drvdata(&dev->dev));
@@ -499,13 +497,14 @@ static const struct xenbus_device_id xencons_ids[] = {
499 { "" } 497 { "" }
500}; 498};
501 499
502 500static struct xenbus_driver xencons_driver = {
503static DEFINE_XENBUS_DRIVER(xencons, "xenconsole", 501 .name = "xenconsole",
502 .ids = xencons_ids,
504 .probe = xencons_probe, 503 .probe = xencons_probe,
505 .remove = xencons_remove, 504 .remove = xencons_remove,
506 .resume = xencons_resume, 505 .resume = xencons_resume,
507 .otherend_changed = xencons_backend_changed, 506 .otherend_changed = xencons_backend_changed,
508); 507};
509#endif /* CONFIG_HVC_XEN_FRONTEND */ 508#endif /* CONFIG_HVC_XEN_FRONTEND */
510 509
511static int __init xen_hvc_init(void) 510static int __init xen_hvc_init(void)
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 901014bbc821..09dc44736c1a 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -684,12 +684,13 @@ static const struct xenbus_device_id xenfb_ids[] = {
684 { "" } 684 { "" }
685}; 685};
686 686
687static DEFINE_XENBUS_DRIVER(xenfb, , 687static struct xenbus_driver xenfb_driver = {
688 .ids = xenfb_ids,
688 .probe = xenfb_probe, 689 .probe = xenfb_probe,
689 .remove = xenfb_remove, 690 .remove = xenfb_remove,
690 .resume = xenfb_resume, 691 .resume = xenfb_resume,
691 .otherend_changed = xenfb_backend_changed, 692 .otherend_changed = xenfb_backend_changed,
692); 693};
693 694
694static int __init xenfb_init(void) 695static int __init xenfb_init(void)
695{ 696{
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8bc01838daf9..b812462083fc 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -172,6 +172,15 @@ config XEN_PCIDEV_BACKEND
172 172
173 If in doubt, say m. 173 If in doubt, say m.
174 174
175config XEN_SCSI_BACKEND
176 tristate "XEN SCSI backend driver"
177 depends on XEN && XEN_BACKEND && TARGET_CORE
178 help
179 The SCSI backend driver allows the kernel to export its SCSI Devices
180 to other guests via a high-performance shared-memory interface.
181 Only needed for systems running as XEN driver domains (e.g. Dom0) and
182 if guests need generic access to SCSI devices.
183
175config XEN_PRIVCMD 184config XEN_PRIVCMD
176 tristate 185 tristate
177 depends on XEN 186 depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 84044b554e33..2140398a2a8c 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
36obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o 36obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o
37obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o 37obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
38obj-$(CONFIG_XEN_EFI) += efi.o 38obj-$(CONFIG_XEN_EFI) += efi.o
39obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o
39xen-evtchn-y := evtchn.o 40xen-evtchn-y := evtchn.o
40xen-gntdev-y := gntdev.o 41xen-gntdev-y := gntdev.o
41xen-gntalloc-y := gntalloc.o 42xen-gntalloc-y := gntalloc.o
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index 31f618a49661..1f850c97482f 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -27,6 +27,8 @@
27#include <xen/interface/platform.h> 27#include <xen/interface/platform.h>
28#include <xen/xen.h> 28#include <xen/xen.h>
29 29
30#include <asm/page.h>
31
30#include <asm/xen/hypercall.h> 32#include <asm/xen/hypercall.h>
31 33
32#define INIT_EFI_OP(name) \ 34#define INIT_EFI_OP(name) \
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 5b5c5ff273fd..b4bca2d4a7e5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -900,8 +900,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
900 return irq; 900 return irq;
901} 901}
902 902
903static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, 903int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
904 unsigned int remote_port) 904 unsigned int remote_port)
905{ 905{
906 struct evtchn_bind_interdomain bind_interdomain; 906 struct evtchn_bind_interdomain bind_interdomain;
907 int err; 907 int err;
@@ -914,6 +914,7 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
914 914
915 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); 915 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
916} 916}
917EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
917 918
918static int find_virq(unsigned int virq, unsigned int cpu) 919static int find_virq(unsigned int virq, unsigned int cpu)
919{ 920{
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index c254ae036f18..7786291ba229 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -592,7 +592,7 @@ static int grow_gnttab_list(unsigned int more_frames)
592 return 0; 592 return 0;
593 593
594grow_nomem: 594grow_nomem:
595 for ( ; i >= nr_glist_frames; i--) 595 while (i-- > nr_glist_frames)
596 free_page((unsigned long) gnttab_list[i]); 596 free_page((unsigned long) gnttab_list[i]);
597 return -ENOMEM; 597 return -ENOMEM;
598} 598}
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index c214daab4829..ad8d30c088fe 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -719,11 +719,13 @@ static const struct xenbus_device_id xen_pcibk_ids[] = {
719 {""}, 719 {""},
720}; 720};
721 721
722static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME, 722static struct xenbus_driver xen_pcibk_driver = {
723 .name = DRV_NAME,
724 .ids = xen_pcibk_ids,
723 .probe = xen_pcibk_xenbus_probe, 725 .probe = xen_pcibk_xenbus_probe,
724 .remove = xen_pcibk_xenbus_remove, 726 .remove = xen_pcibk_xenbus_remove,
725 .otherend_changed = xen_pcibk_frontend_changed, 727 .otherend_changed = xen_pcibk_frontend_changed,
726); 728};
727 729
728const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; 730const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
729 731
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
new file mode 100644
index 000000000000..3e32146472a5
--- /dev/null
+++ b/drivers/xen/xen-scsiback.c
@@ -0,0 +1,2126 @@
1/*
2 * Xen SCSI backend driver
3 *
4 * Copyright (c) 2008, FUJITSU Limited
5 *
6 * Based on the blkback driver code.
7 * Adaption to kernel taget core infrastructure taken from vhost/scsi.c
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <stdarg.h>
35
36#include <linux/module.h>
37#include <linux/utsname.h>
38#include <linux/interrupt.h>
39#include <linux/slab.h>
40#include <linux/wait.h>
41#include <linux/sched.h>
42#include <linux/list.h>
43#include <linux/gfp.h>
44#include <linux/delay.h>
45#include <linux/spinlock.h>
46#include <linux/configfs.h>
47
48#include <generated/utsrelease.h>
49
50#include <scsi/scsi_dbg.h>
51#include <scsi/scsi_eh.h>
52#include <scsi/scsi_tcq.h>
53
54#include <target/target_core_base.h>
55#include <target/target_core_fabric.h>
56#include <target/target_core_configfs.h>
57#include <target/target_core_fabric_configfs.h>
58
59#include <asm/hypervisor.h>
60
61#include <xen/xen.h>
62#include <xen/balloon.h>
63#include <xen/events.h>
64#include <xen/xenbus.h>
65#include <xen/grant_table.h>
66#include <xen/page.h>
67
68#include <xen/interface/grant_table.h>
69#include <xen/interface/io/vscsiif.h>
70
71#define DPRINTK(_f, _a...) \
72 pr_debug("(file=%s, line=%d) " _f, __FILE__ , __LINE__ , ## _a)
73
74#define VSCSI_VERSION "v0.1"
75#define VSCSI_NAMELEN 32
76
77struct ids_tuple {
78 unsigned int hst; /* host */
79 unsigned int chn; /* channel */
80 unsigned int tgt; /* target */
81 unsigned int lun; /* LUN */
82};
83
84struct v2p_entry {
85 struct ids_tuple v; /* translate from */
86 struct scsiback_tpg *tpg; /* translate to */
87 unsigned int lun;
88 struct kref kref;
89 struct list_head l;
90};
91
92struct vscsibk_info {
93 struct xenbus_device *dev;
94
95 domid_t domid;
96 unsigned int irq;
97
98 struct vscsiif_back_ring ring;
99 int ring_error;
100
101 spinlock_t ring_lock;
102 atomic_t nr_unreplied_reqs;
103
104 spinlock_t v2p_lock;
105 struct list_head v2p_entry_lists;
106
107 wait_queue_head_t waiting_to_free;
108};
109
110/* theoretical maximum of grants for one request */
111#define VSCSI_MAX_GRANTS (SG_ALL + VSCSIIF_SG_TABLESIZE)
112
113/*
114 * VSCSI_GRANT_BATCH is the maximum number of grants to be processed in one
115 * call to map/unmap grants. Don't choose it too large, as there are arrays
116 * with VSCSI_GRANT_BATCH elements allocated on the stack.
117 */
118#define VSCSI_GRANT_BATCH 16
119
120struct vscsibk_pend {
121 uint16_t rqid;
122
123 uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
124 uint8_t cmd_len;
125
126 uint8_t sc_data_direction;
127 uint16_t n_sg; /* real length of SG list */
128 uint16_t n_grants; /* SG pages and potentially SG list */
129 uint32_t data_len;
130 uint32_t result;
131
132 struct vscsibk_info *info;
133 struct v2p_entry *v2p;
134 struct scatterlist *sgl;
135
136 uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
137
138 grant_handle_t grant_handles[VSCSI_MAX_GRANTS];
139 struct page *pages[VSCSI_MAX_GRANTS];
140
141 struct se_cmd se_cmd;
142};
143
144struct scsiback_tmr {
145 atomic_t tmr_complete;
146 wait_queue_head_t tmr_wait;
147};
148
149struct scsiback_nexus {
150 /* Pointer to TCM session for I_T Nexus */
151 struct se_session *tvn_se_sess;
152};
153
154struct scsiback_tport {
155 /* SCSI protocol the tport is providing */
156 u8 tport_proto_id;
157 /* Binary World Wide unique Port Name for pvscsi Target port */
158 u64 tport_wwpn;
159 /* ASCII formatted WWPN for pvscsi Target port */
160 char tport_name[VSCSI_NAMELEN];
161 /* Returned by scsiback_make_tport() */
162 struct se_wwn tport_wwn;
163};
164
165struct scsiback_tpg {
166 /* scsiback port target portal group tag for TCM */
167 u16 tport_tpgt;
168 /* track number of TPG Port/Lun Links wrt explicit I_T Nexus shutdown */
169 int tv_tpg_port_count;
170 /* xen-pvscsi references to tpg_nexus, protected by tv_tpg_mutex */
171 int tv_tpg_fe_count;
172 /* list for scsiback_list */
173 struct list_head tv_tpg_list;
174 /* Used to protect access for tpg_nexus */
175 struct mutex tv_tpg_mutex;
176 /* Pointer to the TCM pvscsi I_T Nexus for this TPG endpoint */
177 struct scsiback_nexus *tpg_nexus;
178 /* Pointer back to scsiback_tport */
179 struct scsiback_tport *tport;
180 /* Returned by scsiback_make_tpg() */
181 struct se_portal_group se_tpg;
182 /* alias used in xenstore */
183 char param_alias[VSCSI_NAMELEN];
184 /* list of info structures related to this target portal group */
185 struct list_head info_list;
186};
187
188#define SCSIBACK_INVALID_HANDLE (~0)
189
190static bool log_print_stat;
191module_param(log_print_stat, bool, 0644);
192
193static int scsiback_max_buffer_pages = 1024;
194module_param_named(max_buffer_pages, scsiback_max_buffer_pages, int, 0644);
195MODULE_PARM_DESC(max_buffer_pages,
196"Maximum number of free pages to keep in backend buffer");
197
198static struct kmem_cache *scsiback_cachep;
199static DEFINE_SPINLOCK(free_pages_lock);
200static int free_pages_num;
201static LIST_HEAD(scsiback_free_pages);
202
203/* Global spinlock to protect scsiback TPG list */
204static DEFINE_MUTEX(scsiback_mutex);
205static LIST_HEAD(scsiback_list);
206
207/* Local pointer to allocated TCM configfs fabric module */
208static struct target_fabric_configfs *scsiback_fabric_configfs;
209
210static void scsiback_get(struct vscsibk_info *info)
211{
212 atomic_inc(&info->nr_unreplied_reqs);
213}
214
215static void scsiback_put(struct vscsibk_info *info)
216{
217 if (atomic_dec_and_test(&info->nr_unreplied_reqs))
218 wake_up(&info->waiting_to_free);
219}
220
221static void put_free_pages(struct page **page, int num)
222{
223 unsigned long flags;
224 int i = free_pages_num + num, n = num;
225
226 if (num == 0)
227 return;
228 if (i > scsiback_max_buffer_pages) {
229 n = min(num, i - scsiback_max_buffer_pages);
230 free_xenballooned_pages(n, page + num - n);
231 n = num - n;
232 }
233 spin_lock_irqsave(&free_pages_lock, flags);
234 for (i = 0; i < n; i++)
235 list_add(&page[i]->lru, &scsiback_free_pages);
236 free_pages_num += n;
237 spin_unlock_irqrestore(&free_pages_lock, flags);
238}
239
240static int get_free_page(struct page **page)
241{
242 unsigned long flags;
243
244 spin_lock_irqsave(&free_pages_lock, flags);
245 if (list_empty(&scsiback_free_pages)) {
246 spin_unlock_irqrestore(&free_pages_lock, flags);
247 return alloc_xenballooned_pages(1, page, false);
248 }
249 page[0] = list_first_entry(&scsiback_free_pages, struct page, lru);
250 list_del(&page[0]->lru);
251 free_pages_num--;
252 spin_unlock_irqrestore(&free_pages_lock, flags);
253 return 0;
254}
255
256static unsigned long vaddr_page(struct page *page)
257{
258 unsigned long pfn = page_to_pfn(page);
259
260 return (unsigned long)pfn_to_kaddr(pfn);
261}
262
263static unsigned long vaddr(struct vscsibk_pend *req, int seg)
264{
265 return vaddr_page(req->pages[seg]);
266}
267
268static void scsiback_print_status(char *sense_buffer, int errors,
269 struct vscsibk_pend *pending_req)
270{
271 struct scsiback_tpg *tpg = pending_req->v2p->tpg;
272
273 pr_err("xen-pvscsi[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n",
274 tpg->tport->tport_name, pending_req->v2p->lun,
275 pending_req->cmnd[0], status_byte(errors), msg_byte(errors),
276 host_byte(errors), driver_byte(errors));
277
278 if (CHECK_CONDITION & status_byte(errors))
279 __scsi_print_sense("xen-pvscsi", sense_buffer,
280 SCSI_SENSE_BUFFERSIZE);
281}
282
283static void scsiback_fast_flush_area(struct vscsibk_pend *req)
284{
285 struct gnttab_unmap_grant_ref unmap[VSCSI_GRANT_BATCH];
286 struct page *pages[VSCSI_GRANT_BATCH];
287 unsigned int i, invcount = 0;
288 grant_handle_t handle;
289 int err;
290
291 kfree(req->sgl);
292 req->sgl = NULL;
293 req->n_sg = 0;
294
295 if (!req->n_grants)
296 return;
297
298 for (i = 0; i < req->n_grants; i++) {
299 handle = req->grant_handles[i];
300 if (handle == SCSIBACK_INVALID_HANDLE)
301 continue;
302 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
303 GNTMAP_host_map, handle);
304 req->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
305 pages[invcount] = req->pages[i];
306 put_page(pages[invcount]);
307 invcount++;
308 if (invcount < VSCSI_GRANT_BATCH)
309 continue;
310 err = gnttab_unmap_refs(unmap, NULL, pages, invcount);
311 BUG_ON(err);
312 invcount = 0;
313 }
314
315 if (invcount) {
316 err = gnttab_unmap_refs(unmap, NULL, pages, invcount);
317 BUG_ON(err);
318 }
319
320 put_free_pages(req->pages, req->n_grants);
321 req->n_grants = 0;
322}
323
324static void scsiback_free_translation_entry(struct kref *kref)
325{
326 struct v2p_entry *entry = container_of(kref, struct v2p_entry, kref);
327 struct scsiback_tpg *tpg = entry->tpg;
328
329 mutex_lock(&tpg->tv_tpg_mutex);
330 tpg->tv_tpg_fe_count--;
331 mutex_unlock(&tpg->tv_tpg_mutex);
332
333 kfree(entry);
334}
335
336static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
337 uint32_t resid, struct vscsibk_pend *pending_req)
338{
339 struct vscsiif_response *ring_res;
340 struct vscsibk_info *info = pending_req->info;
341 int notify;
342 struct scsi_sense_hdr sshdr;
343 unsigned long flags;
344 unsigned len;
345
346 spin_lock_irqsave(&info->ring_lock, flags);
347
348 ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
349 info->ring.rsp_prod_pvt++;
350
351 ring_res->rslt = result;
352 ring_res->rqid = pending_req->rqid;
353
354 if (sense_buffer != NULL &&
355 scsi_normalize_sense(sense_buffer, VSCSIIF_SENSE_BUFFERSIZE,
356 &sshdr)) {
357 len = min_t(unsigned, 8 + sense_buffer[7],
358 VSCSIIF_SENSE_BUFFERSIZE);
359 memcpy(ring_res->sense_buffer, sense_buffer, len);
360 ring_res->sense_len = len;
361 } else {
362 ring_res->sense_len = 0;
363 }
364
365 ring_res->residual_len = resid;
366
367 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
368 spin_unlock_irqrestore(&info->ring_lock, flags);
369
370 if (notify)
371 notify_remote_via_irq(info->irq);
372
373 if (pending_req->v2p)
374 kref_put(&pending_req->v2p->kref,
375 scsiback_free_translation_entry);
376}
377
378static void scsiback_cmd_done(struct vscsibk_pend *pending_req)
379{
380 struct vscsibk_info *info = pending_req->info;
381 unsigned char *sense_buffer;
382 unsigned int resid;
383 int errors;
384
385 sense_buffer = pending_req->sense_buffer;
386 resid = pending_req->se_cmd.residual_count;
387 errors = pending_req->result;
388
389 if (errors && log_print_stat)
390 scsiback_print_status(sense_buffer, errors, pending_req);
391
392 scsiback_fast_flush_area(pending_req);
393 scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
394 scsiback_put(info);
395}
396
397static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
398{
399 struct se_cmd *se_cmd = &pending_req->se_cmd;
400 struct se_session *sess = pending_req->v2p->tpg->tpg_nexus->tvn_se_sess;
401 int rc;
402
403 memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
404
405 memset(se_cmd, 0, sizeof(*se_cmd));
406
407 scsiback_get(pending_req->info);
408 rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd,
409 pending_req->sense_buffer, pending_req->v2p->lun,
410 pending_req->data_len, 0,
411 pending_req->sc_data_direction, 0,
412 pending_req->sgl, pending_req->n_sg,
413 NULL, 0, NULL, 0);
414 if (rc < 0) {
415 transport_send_check_condition_and_sense(se_cmd,
416 TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
417 transport_generic_free_cmd(se_cmd, 0);
418 }
419}
420
421static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
422 struct page **pg, grant_handle_t *grant, int cnt)
423{
424 int err, i;
425
426 if (!cnt)
427 return 0;
428
429 err = gnttab_map_refs(map, NULL, pg, cnt);
430 BUG_ON(err);
431 for (i = 0; i < cnt; i++) {
432 if (unlikely(map[i].status != GNTST_okay)) {
433 pr_err("xen-pvscsi: invalid buffer -- could not remap it\n");
434 map[i].handle = SCSIBACK_INVALID_HANDLE;
435 err = -ENOMEM;
436 } else {
437 get_page(pg[i]);
438 }
439 grant[i] = map[i].handle;
440 }
441 return err;
442}
443
444static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req,
445 struct scsiif_request_segment *seg, struct page **pg,
446 grant_handle_t *grant, int cnt, u32 flags)
447{
448 int mapcount = 0, i, err = 0;
449 struct gnttab_map_grant_ref map[VSCSI_GRANT_BATCH];
450 struct vscsibk_info *info = pending_req->info;
451
452 for (i = 0; i < cnt; i++) {
453 if (get_free_page(pg + mapcount)) {
454 put_free_pages(pg, mapcount);
455 pr_err("xen-pvscsi: no grant page\n");
456 return -ENOMEM;
457 }
458 gnttab_set_map_op(&map[mapcount], vaddr_page(pg[mapcount]),
459 flags, seg[i].gref, info->domid);
460 mapcount++;
461 if (mapcount < VSCSI_GRANT_BATCH)
462 continue;
463 err = scsiback_gnttab_data_map_batch(map, pg, grant, mapcount);
464 pg += mapcount;
465 grant += mapcount;
466 pending_req->n_grants += mapcount;
467 if (err)
468 return err;
469 mapcount = 0;
470 }
471 err = scsiback_gnttab_data_map_batch(map, pg, grant, mapcount);
472 pending_req->n_grants += mapcount;
473 return err;
474}
475
476static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req,
477 struct vscsibk_pend *pending_req)
478{
479 u32 flags;
480 int i, err, n_segs, i_seg = 0;
481 struct page **pg;
482 struct scsiif_request_segment *seg;
483 unsigned long end_seg = 0;
484 unsigned int nr_segments = (unsigned int)ring_req->nr_segments;
485 unsigned int nr_sgl = 0;
486 struct scatterlist *sg;
487 grant_handle_t *grant;
488
489 pending_req->n_sg = 0;
490 pending_req->n_grants = 0;
491 pending_req->data_len = 0;
492
493 nr_segments &= ~VSCSIIF_SG_GRANT;
494 if (!nr_segments)
495 return 0;
496
497 if (nr_segments > VSCSIIF_SG_TABLESIZE) {
498 DPRINTK("xen-pvscsi: invalid parameter nr_seg = %d\n",
499 ring_req->nr_segments);
500 return -EINVAL;
501 }
502
503 if (ring_req->nr_segments & VSCSIIF_SG_GRANT) {
504 err = scsiback_gnttab_data_map_list(pending_req, ring_req->seg,
505 pending_req->pages, pending_req->grant_handles,
506 nr_segments, GNTMAP_host_map | GNTMAP_readonly);
507 if (err)
508 return err;
509 nr_sgl = nr_segments;
510 nr_segments = 0;
511 for (i = 0; i < nr_sgl; i++) {
512 n_segs = ring_req->seg[i].length /
513 sizeof(struct scsiif_request_segment);
514 if ((unsigned)ring_req->seg[i].offset +
515 (unsigned)ring_req->seg[i].length > PAGE_SIZE ||
516 n_segs * sizeof(struct scsiif_request_segment) !=
517 ring_req->seg[i].length)
518 return -EINVAL;
519 nr_segments += n_segs;
520 }
521 if (nr_segments > SG_ALL) {
522 DPRINTK("xen-pvscsi: invalid nr_seg = %d\n",
523 nr_segments);
524 return -EINVAL;
525 }
526 }
527
528 /* free of (sgl) in fast_flush_area()*/
529 pending_req->sgl = kmalloc_array(nr_segments,
530 sizeof(struct scatterlist), GFP_KERNEL);
531 if (!pending_req->sgl)
532 return -ENOMEM;
533
534 sg_init_table(pending_req->sgl, nr_segments);
535 pending_req->n_sg = nr_segments;
536
537 flags = GNTMAP_host_map;
538 if (pending_req->sc_data_direction == DMA_TO_DEVICE)
539 flags |= GNTMAP_readonly;
540
541 pg = pending_req->pages + nr_sgl;
542 grant = pending_req->grant_handles + nr_sgl;
543 if (!nr_sgl) {
544 seg = ring_req->seg;
545 err = scsiback_gnttab_data_map_list(pending_req, seg,
546 pg, grant, nr_segments, flags);
547 if (err)
548 return err;
549 } else {
550 for (i = 0; i < nr_sgl; i++) {
551 seg = (struct scsiif_request_segment *)(
552 vaddr(pending_req, i) + ring_req->seg[i].offset);
553 n_segs = ring_req->seg[i].length /
554 sizeof(struct scsiif_request_segment);
555 err = scsiback_gnttab_data_map_list(pending_req, seg,
556 pg, grant, n_segs, flags);
557 if (err)
558 return err;
559 pg += n_segs;
560 grant += n_segs;
561 }
562 end_seg = vaddr(pending_req, 0) + ring_req->seg[0].offset;
563 seg = (struct scsiif_request_segment *)end_seg;
564 end_seg += ring_req->seg[0].length;
565 pg = pending_req->pages + nr_sgl;
566 }
567
568 for_each_sg(pending_req->sgl, sg, nr_segments, i) {
569 sg_set_page(sg, pg[i], seg->length, seg->offset);
570 pending_req->data_len += seg->length;
571 seg++;
572 if (nr_sgl && (unsigned long)seg >= end_seg) {
573 i_seg++;
574 end_seg = vaddr(pending_req, i_seg) +
575 ring_req->seg[i_seg].offset;
576 seg = (struct scsiif_request_segment *)end_seg;
577 end_seg += ring_req->seg[i_seg].length;
578 }
579 if (sg->offset >= PAGE_SIZE ||
580 sg->length > PAGE_SIZE ||
581 sg->offset + sg->length > PAGE_SIZE)
582 return -EINVAL;
583 }
584
585 return 0;
586}
587
588static void scsiback_disconnect(struct vscsibk_info *info)
589{
590 wait_event(info->waiting_to_free,
591 atomic_read(&info->nr_unreplied_reqs) == 0);
592
593 unbind_from_irqhandler(info->irq, info);
594 info->irq = 0;
595 xenbus_unmap_ring_vfree(info->dev, info->ring.sring);
596}
597
598static void scsiback_device_action(struct vscsibk_pend *pending_req,
599 enum tcm_tmreq_table act, int tag)
600{
601 int rc, err = FAILED;
602 struct scsiback_tpg *tpg = pending_req->v2p->tpg;
603 struct se_cmd *se_cmd = &pending_req->se_cmd;
604 struct scsiback_tmr *tmr;
605
606 tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
607 if (!tmr)
608 goto out;
609
610 init_waitqueue_head(&tmr->tmr_wait);
611
612 transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
613 tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG,
614 &pending_req->sense_buffer[0]);
615
616 rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);
617 if (rc < 0)
618 goto out;
619
620 se_cmd->se_tmr_req->ref_task_tag = tag;
621
622 if (transport_lookup_tmr_lun(se_cmd, pending_req->v2p->lun) < 0)
623 goto out;
624
625 transport_generic_handle_tmr(se_cmd);
626 wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
627
628 err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
629 SUCCESS : FAILED;
630
631out:
632 if (tmr) {
633 transport_generic_free_cmd(&pending_req->se_cmd, 1);
634 kfree(tmr);
635 }
636
637 scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
638
639 kmem_cache_free(scsiback_cachep, pending_req);
640}
641
642/*
643 Perform virtual to physical translation
644*/
645static struct v2p_entry *scsiback_do_translation(struct vscsibk_info *info,
646 struct ids_tuple *v)
647{
648 struct v2p_entry *entry;
649 struct list_head *head = &(info->v2p_entry_lists);
650 unsigned long flags;
651
652 spin_lock_irqsave(&info->v2p_lock, flags);
653 list_for_each_entry(entry, head, l) {
654 if ((entry->v.chn == v->chn) &&
655 (entry->v.tgt == v->tgt) &&
656 (entry->v.lun == v->lun)) {
657 kref_get(&entry->kref);
658 goto out;
659 }
660 }
661 entry = NULL;
662
663out:
664 spin_unlock_irqrestore(&info->v2p_lock, flags);
665 return entry;
666}
667
668static int prepare_pending_reqs(struct vscsibk_info *info,
669 struct vscsiif_request *ring_req,
670 struct vscsibk_pend *pending_req)
671{
672 struct v2p_entry *v2p;
673 struct ids_tuple vir;
674
675 pending_req->rqid = ring_req->rqid;
676 pending_req->info = info;
677
678 vir.chn = ring_req->channel;
679 vir.tgt = ring_req->id;
680 vir.lun = ring_req->lun;
681
682 v2p = scsiback_do_translation(info, &vir);
683 if (!v2p) {
684 pending_req->v2p = NULL;
685 DPRINTK("xen-pvscsi: doesn't exist.\n");
686 return -ENODEV;
687 }
688 pending_req->v2p = v2p;
689
690 /* request range check from frontend */
691 pending_req->sc_data_direction = ring_req->sc_data_direction;
692 if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
693 (pending_req->sc_data_direction != DMA_TO_DEVICE) &&
694 (pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
695 (pending_req->sc_data_direction != DMA_NONE)) {
696 DPRINTK("xen-pvscsi: invalid parameter data_dir = %d\n",
697 pending_req->sc_data_direction);
698 return -EINVAL;
699 }
700
701 pending_req->cmd_len = ring_req->cmd_len;
702 if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
703 DPRINTK("xen-pvscsi: invalid parameter cmd_len = %d\n",
704 pending_req->cmd_len);
705 return -EINVAL;
706 }
707 memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
708
709 return 0;
710}
711
712static int scsiback_do_cmd_fn(struct vscsibk_info *info)
713{
714 struct vscsiif_back_ring *ring = &info->ring;
715 struct vscsiif_request *ring_req;
716 struct vscsibk_pend *pending_req;
717 RING_IDX rc, rp;
718 int err, more_to_do;
719 uint32_t result;
720 uint8_t act;
721
722 rc = ring->req_cons;
723 rp = ring->sring->req_prod;
724 rmb(); /* guest system is accessing ring, too */
725
726 if (RING_REQUEST_PROD_OVERFLOW(ring, rp)) {
727 rc = ring->rsp_prod_pvt;
728 pr_warn("xen-pvscsi: Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
729 info->domid, rp, rc, rp - rc);
730 info->ring_error = 1;
731 return 0;
732 }
733
734 while ((rc != rp)) {
735 if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
736 break;
737 pending_req = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
738 if (!pending_req)
739 return 1;
740
741 ring_req = RING_GET_REQUEST(ring, rc);
742 ring->req_cons = ++rc;
743
744 act = ring_req->act;
745 err = prepare_pending_reqs(info, ring_req, pending_req);
746 if (err) {
747 switch (err) {
748 case -ENODEV:
749 result = DID_NO_CONNECT;
750 break;
751 default:
752 result = DRIVER_ERROR;
753 break;
754 }
755 scsiback_do_resp_with_sense(NULL, result << 24, 0,
756 pending_req);
757 kmem_cache_free(scsiback_cachep, pending_req);
758 return 1;
759 }
760
761 switch (act) {
762 case VSCSIIF_ACT_SCSI_CDB:
763 if (scsiback_gnttab_data_map(ring_req, pending_req)) {
764 scsiback_fast_flush_area(pending_req);
765 scsiback_do_resp_with_sense(NULL,
766 DRIVER_ERROR << 24, 0, pending_req);
767 kmem_cache_free(scsiback_cachep, pending_req);
768 } else {
769 scsiback_cmd_exec(pending_req);
770 }
771 break;
772 case VSCSIIF_ACT_SCSI_ABORT:
773 scsiback_device_action(pending_req, TMR_ABORT_TASK,
774 ring_req->ref_rqid);
775 break;
776 case VSCSIIF_ACT_SCSI_RESET:
777 scsiback_device_action(pending_req, TMR_LUN_RESET, 0);
778 break;
779 default:
780 pr_err_ratelimited("xen-pvscsi: invalid request\n");
781 scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
782 0, pending_req);
783 kmem_cache_free(scsiback_cachep, pending_req);
784 break;
785 }
786
787 /* Yield point for this unbounded loop. */
788 cond_resched();
789 }
790
791 RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
792 return more_to_do;
793}
794
795static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
796{
797 struct vscsibk_info *info = dev_id;
798
799 if (info->ring_error)
800 return IRQ_HANDLED;
801
802 while (scsiback_do_cmd_fn(info))
803 cond_resched();
804
805 return IRQ_HANDLED;
806}
807
808static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
809 evtchn_port_t evtchn)
810{
811 void *area;
812 struct vscsiif_sring *sring;
813 int err;
814
815 if (info->irq)
816 return -1;
817
818 err = xenbus_map_ring_valloc(info->dev, ring_ref, &area);
819 if (err)
820 return err;
821
822 sring = (struct vscsiif_sring *)area;
823 BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
824
825 err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
826 if (err < 0)
827 goto unmap_page;
828
829 info->irq = err;
830
831 err = request_threaded_irq(info->irq, NULL, scsiback_irq_fn,
832 IRQF_ONESHOT, "vscsiif-backend", info);
833 if (err)
834 goto free_irq;
835
836 return 0;
837
838free_irq:
839 unbind_from_irqhandler(info->irq, info);
840 info->irq = 0;
841unmap_page:
842 xenbus_unmap_ring_vfree(info->dev, area);
843
844 return err;
845}
846
847static int scsiback_map(struct vscsibk_info *info)
848{
849 struct xenbus_device *dev = info->dev;
850 unsigned int ring_ref, evtchn;
851 int err;
852
853 err = xenbus_gather(XBT_NIL, dev->otherend,
854 "ring-ref", "%u", &ring_ref,
855 "event-channel", "%u", &evtchn, NULL);
856 if (err) {
857 xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
858 return err;
859 }
860
861 return scsiback_init_sring(info, ring_ref, evtchn);
862}
863
864/*
865 Add a new translation entry
866*/
867static int scsiback_add_translation_entry(struct vscsibk_info *info,
868 char *phy, struct ids_tuple *v)
869{
870 int err = 0;
871 struct v2p_entry *entry;
872 struct v2p_entry *new;
873 struct list_head *head = &(info->v2p_entry_lists);
874 unsigned long flags;
875 char *lunp;
876 unsigned int lun;
877 struct scsiback_tpg *tpg_entry, *tpg = NULL;
878 char *error = "doesn't exist";
879
880 lunp = strrchr(phy, ':');
881 if (!lunp) {
882 pr_err("xen-pvscsi: illegal format of physical device %s\n",
883 phy);
884 return -EINVAL;
885 }
886 *lunp = 0;
887 lunp++;
888 if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) {
889 pr_err("xen-pvscsi: lun number not valid: %s\n", lunp);
890 return -EINVAL;
891 }
892
893 mutex_lock(&scsiback_mutex);
894 list_for_each_entry(tpg_entry, &scsiback_list, tv_tpg_list) {
895 if (!strcmp(phy, tpg_entry->tport->tport_name) ||
896 !strcmp(phy, tpg_entry->param_alias)) {
897 spin_lock(&tpg_entry->se_tpg.tpg_lun_lock);
898 if (tpg_entry->se_tpg.tpg_lun_list[lun]->lun_status ==
899 TRANSPORT_LUN_STATUS_ACTIVE) {
900 if (!tpg_entry->tpg_nexus)
901 error = "nexus undefined";
902 else
903 tpg = tpg_entry;
904 }
905 spin_unlock(&tpg_entry->se_tpg.tpg_lun_lock);
906 break;
907 }
908 }
909 if (tpg) {
910 mutex_lock(&tpg->tv_tpg_mutex);
911 tpg->tv_tpg_fe_count++;
912 mutex_unlock(&tpg->tv_tpg_mutex);
913 }
914 mutex_unlock(&scsiback_mutex);
915
916 if (!tpg) {
917 pr_err("xen-pvscsi: %s:%d %s\n", phy, lun, error);
918 return -ENODEV;
919 }
920
921 new = kmalloc(sizeof(struct v2p_entry), GFP_KERNEL);
922 if (new == NULL) {
923 err = -ENOMEM;
924 goto out_free;
925 }
926
927 spin_lock_irqsave(&info->v2p_lock, flags);
928
929 /* Check double assignment to identical virtual ID */
930 list_for_each_entry(entry, head, l) {
931 if ((entry->v.chn == v->chn) &&
932 (entry->v.tgt == v->tgt) &&
933 (entry->v.lun == v->lun)) {
934 pr_warn("xen-pvscsi: Virtual ID is already used. Assignment was not performed.\n");
935 err = -EEXIST;
936 goto out;
937 }
938
939 }
940
941 /* Create a new translation entry and add to the list */
942 kref_init(&new->kref);
943 new->v = *v;
944 new->tpg = tpg;
945 new->lun = lun;
946 list_add_tail(&new->l, head);
947
948out:
949 spin_unlock_irqrestore(&info->v2p_lock, flags);
950
951out_free:
952 mutex_lock(&tpg->tv_tpg_mutex);
953 tpg->tv_tpg_fe_count--;
954 mutex_unlock(&tpg->tv_tpg_mutex);
955
956 if (err)
957 kfree(new);
958
959 return err;
960}
961
962static void __scsiback_del_translation_entry(struct v2p_entry *entry)
963{
964 list_del(&entry->l);
965 kref_put(&entry->kref, scsiback_free_translation_entry);
966}
967
968/*
969 Delete the translation entry specfied
970*/
971static int scsiback_del_translation_entry(struct vscsibk_info *info,
972 struct ids_tuple *v)
973{
974 struct v2p_entry *entry;
975 struct list_head *head = &(info->v2p_entry_lists);
976 unsigned long flags;
977
978 spin_lock_irqsave(&info->v2p_lock, flags);
979 /* Find out the translation entry specified */
980 list_for_each_entry(entry, head, l) {
981 if ((entry->v.chn == v->chn) &&
982 (entry->v.tgt == v->tgt) &&
983 (entry->v.lun == v->lun)) {
984 goto found;
985 }
986 }
987
988 spin_unlock_irqrestore(&info->v2p_lock, flags);
989 return 1;
990
991found:
992 /* Delete the translation entry specfied */
993 __scsiback_del_translation_entry(entry);
994
995 spin_unlock_irqrestore(&info->v2p_lock, flags);
996 return 0;
997}
998
999static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
1000 char *phy, struct ids_tuple *vir)
1001{
1002 if (!scsiback_add_translation_entry(info, phy, vir)) {
1003 if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
1004 "%d", XenbusStateInitialised)) {
1005 pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
1006 scsiback_del_translation_entry(info, vir);
1007 }
1008 } else {
1009 xenbus_printf(XBT_NIL, info->dev->nodename, state,
1010 "%d", XenbusStateClosed);
1011 }
1012}
1013
1014static void scsiback_do_del_lun(struct vscsibk_info *info, const char *state,
1015 struct ids_tuple *vir)
1016{
1017 if (!scsiback_del_translation_entry(info, vir)) {
1018 if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
1019 "%d", XenbusStateClosed))
1020 pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
1021 }
1022}
1023
1024#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
1025#define VSCSIBACK_OP_UPDATEDEV_STATE 2
1026
1027static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
1028 char *ent)
1029{
1030 int err;
1031 struct ids_tuple vir;
1032 char *val;
1033 int device_state;
1034 char phy[VSCSI_NAMELEN];
1035 char str[64];
1036 char state[64];
1037 struct xenbus_device *dev = info->dev;
1038
1039 /* read status */
1040 snprintf(state, sizeof(state), "vscsi-devs/%s/state", ent);
1041 err = xenbus_scanf(XBT_NIL, dev->nodename, state, "%u", &device_state);
1042 if (XENBUS_EXIST_ERR(err))
1043 return;
1044
1045 /* physical SCSI device */
1046 snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent);
1047 val = xenbus_read(XBT_NIL, dev->nodename, str, NULL);
1048 if (IS_ERR(val)) {
1049 xenbus_printf(XBT_NIL, dev->nodename, state,
1050 "%d", XenbusStateClosed);
1051 return;
1052 }
1053 strlcpy(phy, val, VSCSI_NAMELEN);
1054 kfree(val);
1055
1056 /* virtual SCSI device */
1057 snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", ent);
1058 err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u",
1059 &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
1060 if (XENBUS_EXIST_ERR(err)) {
1061 xenbus_printf(XBT_NIL, dev->nodename, state,
1062 "%d", XenbusStateClosed);
1063 return;
1064 }
1065
1066 switch (op) {
1067 case VSCSIBACK_OP_ADD_OR_DEL_LUN:
1068 if (device_state == XenbusStateInitialising)
1069 scsiback_do_add_lun(info, state, phy, &vir);
1070 if (device_state == XenbusStateClosing)
1071 scsiback_do_del_lun(info, state, &vir);
1072 break;
1073
1074 case VSCSIBACK_OP_UPDATEDEV_STATE:
1075 if (device_state == XenbusStateInitialised) {
1076 /* modify vscsi-devs/dev-x/state */
1077 if (xenbus_printf(XBT_NIL, dev->nodename, state,
1078 "%d", XenbusStateConnected)) {
1079 pr_err("xen-pvscsi: xenbus_printf error %s\n",
1080 str);
1081 scsiback_del_translation_entry(info, &vir);
1082 xenbus_printf(XBT_NIL, dev->nodename, state,
1083 "%d", XenbusStateClosed);
1084 }
1085 }
1086 break;
1087 /*When it is necessary, processing is added here.*/
1088 default:
1089 break;
1090 }
1091}
1092
1093static void scsiback_do_lun_hotplug(struct vscsibk_info *info, int op)
1094{
1095 int i;
1096 char **dir;
1097 unsigned int ndir = 0;
1098
1099 dir = xenbus_directory(XBT_NIL, info->dev->nodename, "vscsi-devs",
1100 &ndir);
1101 if (IS_ERR(dir))
1102 return;
1103
1104 for (i = 0; i < ndir; i++)
1105 scsiback_do_1lun_hotplug(info, op, dir[i]);
1106
1107 kfree(dir);
1108}
1109
1110static void scsiback_frontend_changed(struct xenbus_device *dev,
1111 enum xenbus_state frontend_state)
1112{
1113 struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
1114
1115 switch (frontend_state) {
1116 case XenbusStateInitialising:
1117 break;
1118
1119 case XenbusStateInitialised:
1120 if (scsiback_map(info))
1121 break;
1122
1123 scsiback_do_lun_hotplug(info, VSCSIBACK_OP_ADD_OR_DEL_LUN);
1124 xenbus_switch_state(dev, XenbusStateConnected);
1125 break;
1126
1127 case XenbusStateConnected:
1128 scsiback_do_lun_hotplug(info, VSCSIBACK_OP_UPDATEDEV_STATE);
1129
1130 if (dev->state == XenbusStateConnected)
1131 break;
1132
1133 xenbus_switch_state(dev, XenbusStateConnected);
1134 break;
1135
1136 case XenbusStateClosing:
1137 if (info->irq)
1138 scsiback_disconnect(info);
1139
1140 xenbus_switch_state(dev, XenbusStateClosing);
1141 break;
1142
1143 case XenbusStateClosed:
1144 xenbus_switch_state(dev, XenbusStateClosed);
1145 if (xenbus_dev_is_online(dev))
1146 break;
1147 /* fall through if not online */
1148 case XenbusStateUnknown:
1149 device_unregister(&dev->dev);
1150 break;
1151
1152 case XenbusStateReconfiguring:
1153 scsiback_do_lun_hotplug(info, VSCSIBACK_OP_ADD_OR_DEL_LUN);
1154 xenbus_switch_state(dev, XenbusStateReconfigured);
1155
1156 break;
1157
1158 default:
1159 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
1160 frontend_state);
1161 break;
1162 }
1163}
1164
1165/*
1166 Release the translation entry specfied
1167*/
1168static void scsiback_release_translation_entry(struct vscsibk_info *info)
1169{
1170 struct v2p_entry *entry, *tmp;
1171 struct list_head *head = &(info->v2p_entry_lists);
1172 unsigned long flags;
1173
1174 spin_lock_irqsave(&info->v2p_lock, flags);
1175
1176 list_for_each_entry_safe(entry, tmp, head, l)
1177 __scsiback_del_translation_entry(entry);
1178
1179 spin_unlock_irqrestore(&info->v2p_lock, flags);
1180}
1181
1182static int scsiback_remove(struct xenbus_device *dev)
1183{
1184 struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
1185
1186 if (info->irq)
1187 scsiback_disconnect(info);
1188
1189 scsiback_release_translation_entry(info);
1190
1191 dev_set_drvdata(&dev->dev, NULL);
1192
1193 return 0;
1194}
1195
1196static int scsiback_probe(struct xenbus_device *dev,
1197 const struct xenbus_device_id *id)
1198{
1199 int err;
1200
1201 struct vscsibk_info *info = kzalloc(sizeof(struct vscsibk_info),
1202 GFP_KERNEL);
1203
1204 DPRINTK("%p %d\n", dev, dev->otherend_id);
1205
1206 if (!info) {
1207 xenbus_dev_fatal(dev, -ENOMEM, "allocating backend structure");
1208 return -ENOMEM;
1209 }
1210 info->dev = dev;
1211 dev_set_drvdata(&dev->dev, info);
1212
1213 info->domid = dev->otherend_id;
1214 spin_lock_init(&info->ring_lock);
1215 info->ring_error = 0;
1216 atomic_set(&info->nr_unreplied_reqs, 0);
1217 init_waitqueue_head(&info->waiting_to_free);
1218 info->dev = dev;
1219 info->irq = 0;
1220 INIT_LIST_HEAD(&info->v2p_entry_lists);
1221 spin_lock_init(&info->v2p_lock);
1222
1223 err = xenbus_printf(XBT_NIL, dev->nodename, "feature-sg-grant", "%u",
1224 SG_ALL);
1225 if (err)
1226 xenbus_dev_error(dev, err, "writing feature-sg-grant");
1227
1228 err = xenbus_switch_state(dev, XenbusStateInitWait);
1229 if (err)
1230 goto fail;
1231
1232 return 0;
1233
1234fail:
1235 pr_warn("xen-pvscsi: %s failed\n", __func__);
1236 scsiback_remove(dev);
1237
1238 return err;
1239}
1240
1241static char *scsiback_dump_proto_id(struct scsiback_tport *tport)
1242{
1243 switch (tport->tport_proto_id) {
1244 case SCSI_PROTOCOL_SAS:
1245 return "SAS";
1246 case SCSI_PROTOCOL_FCP:
1247 return "FCP";
1248 case SCSI_PROTOCOL_ISCSI:
1249 return "iSCSI";
1250 default:
1251 break;
1252 }
1253
1254 return "Unknown";
1255}
1256
1257static u8 scsiback_get_fabric_proto_ident(struct se_portal_group *se_tpg)
1258{
1259 struct scsiback_tpg *tpg = container_of(se_tpg,
1260 struct scsiback_tpg, se_tpg);
1261 struct scsiback_tport *tport = tpg->tport;
1262
1263 switch (tport->tport_proto_id) {
1264 case SCSI_PROTOCOL_SAS:
1265 return sas_get_fabric_proto_ident(se_tpg);
1266 case SCSI_PROTOCOL_FCP:
1267 return fc_get_fabric_proto_ident(se_tpg);
1268 case SCSI_PROTOCOL_ISCSI:
1269 return iscsi_get_fabric_proto_ident(se_tpg);
1270 default:
1271 pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
1272 tport->tport_proto_id);
1273 break;
1274 }
1275
1276 return sas_get_fabric_proto_ident(se_tpg);
1277}
1278
1279static char *scsiback_get_fabric_wwn(struct se_portal_group *se_tpg)
1280{
1281 struct scsiback_tpg *tpg = container_of(se_tpg,
1282 struct scsiback_tpg, se_tpg);
1283 struct scsiback_tport *tport = tpg->tport;
1284
1285 return &tport->tport_name[0];
1286}
1287
1288static u16 scsiback_get_tag(struct se_portal_group *se_tpg)
1289{
1290 struct scsiback_tpg *tpg = container_of(se_tpg,
1291 struct scsiback_tpg, se_tpg);
1292 return tpg->tport_tpgt;
1293}
1294
1295static u32 scsiback_get_default_depth(struct se_portal_group *se_tpg)
1296{
1297 return 1;
1298}
1299
1300static u32
1301scsiback_get_pr_transport_id(struct se_portal_group *se_tpg,
1302 struct se_node_acl *se_nacl,
1303 struct t10_pr_registration *pr_reg,
1304 int *format_code,
1305 unsigned char *buf)
1306{
1307 struct scsiback_tpg *tpg = container_of(se_tpg,
1308 struct scsiback_tpg, se_tpg);
1309 struct scsiback_tport *tport = tpg->tport;
1310
1311 switch (tport->tport_proto_id) {
1312 case SCSI_PROTOCOL_SAS:
1313 return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
1314 format_code, buf);
1315 case SCSI_PROTOCOL_FCP:
1316 return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
1317 format_code, buf);
1318 case SCSI_PROTOCOL_ISCSI:
1319 return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
1320 format_code, buf);
1321 default:
1322 pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
1323 tport->tport_proto_id);
1324 break;
1325 }
1326
1327 return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
1328 format_code, buf);
1329}
1330
1331static u32
1332scsiback_get_pr_transport_id_len(struct se_portal_group *se_tpg,
1333 struct se_node_acl *se_nacl,
1334 struct t10_pr_registration *pr_reg,
1335 int *format_code)
1336{
1337 struct scsiback_tpg *tpg = container_of(se_tpg,
1338 struct scsiback_tpg, se_tpg);
1339 struct scsiback_tport *tport = tpg->tport;
1340
1341 switch (tport->tport_proto_id) {
1342 case SCSI_PROTOCOL_SAS:
1343 return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
1344 format_code);
1345 case SCSI_PROTOCOL_FCP:
1346 return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
1347 format_code);
1348 case SCSI_PROTOCOL_ISCSI:
1349 return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
1350 format_code);
1351 default:
1352 pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
1353 tport->tport_proto_id);
1354 break;
1355 }
1356
1357 return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
1358 format_code);
1359}
1360
1361static char *
1362scsiback_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
1363 const char *buf,
1364 u32 *out_tid_len,
1365 char **port_nexus_ptr)
1366{
1367 struct scsiback_tpg *tpg = container_of(se_tpg,
1368 struct scsiback_tpg, se_tpg);
1369 struct scsiback_tport *tport = tpg->tport;
1370
1371 switch (tport->tport_proto_id) {
1372 case SCSI_PROTOCOL_SAS:
1373 return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
1374 port_nexus_ptr);
1375 case SCSI_PROTOCOL_FCP:
1376 return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
1377 port_nexus_ptr);
1378 case SCSI_PROTOCOL_ISCSI:
1379 return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
1380 port_nexus_ptr);
1381 default:
1382 pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
1383 tport->tport_proto_id);
1384 break;
1385 }
1386
1387 return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
1388 port_nexus_ptr);
1389}
1390
1391static struct se_wwn *
1392scsiback_make_tport(struct target_fabric_configfs *tf,
1393 struct config_group *group,
1394 const char *name)
1395{
1396 struct scsiback_tport *tport;
1397 char *ptr;
1398 u64 wwpn = 0;
1399 int off = 0;
1400
1401 tport = kzalloc(sizeof(struct scsiback_tport), GFP_KERNEL);
1402 if (!tport)
1403 return ERR_PTR(-ENOMEM);
1404
1405 tport->tport_wwpn = wwpn;
1406 /*
1407 * Determine the emulated Protocol Identifier and Target Port Name
1408 * based on the incoming configfs directory name.
1409 */
1410 ptr = strstr(name, "naa.");
1411 if (ptr) {
1412 tport->tport_proto_id = SCSI_PROTOCOL_SAS;
1413 goto check_len;
1414 }
1415 ptr = strstr(name, "fc.");
1416 if (ptr) {
1417 tport->tport_proto_id = SCSI_PROTOCOL_FCP;
1418 off = 3; /* Skip over "fc." */
1419 goto check_len;
1420 }
1421 ptr = strstr(name, "iqn.");
1422 if (ptr) {
1423 tport->tport_proto_id = SCSI_PROTOCOL_ISCSI;
1424 goto check_len;
1425 }
1426
1427 pr_err("Unable to locate prefix for emulated Target Port: %s\n", name);
1428 kfree(tport);
1429 return ERR_PTR(-EINVAL);
1430
1431check_len:
1432 if (strlen(name) >= VSCSI_NAMELEN) {
1433 pr_err("Emulated %s Address: %s, exceeds max: %d\n", name,
1434 scsiback_dump_proto_id(tport), VSCSI_NAMELEN);
1435 kfree(tport);
1436 return ERR_PTR(-EINVAL);
1437 }
1438 snprintf(&tport->tport_name[0], VSCSI_NAMELEN, "%s", &name[off]);
1439
1440 pr_debug("xen-pvscsi: Allocated emulated Target %s Address: %s\n",
1441 scsiback_dump_proto_id(tport), name);
1442
1443 return &tport->tport_wwn;
1444}
1445
1446static void scsiback_drop_tport(struct se_wwn *wwn)
1447{
1448 struct scsiback_tport *tport = container_of(wwn,
1449 struct scsiback_tport, tport_wwn);
1450
1451 pr_debug("xen-pvscsi: Deallocating emulated Target %s Address: %s\n",
1452 scsiback_dump_proto_id(tport), tport->tport_name);
1453
1454 kfree(tport);
1455}
1456
1457static struct se_node_acl *
1458scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg)
1459{
1460 return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL);
1461}
1462
1463static void
1464scsiback_release_fabric_acl(struct se_portal_group *se_tpg,
1465 struct se_node_acl *se_nacl)
1466{
1467 kfree(se_nacl);
1468}
1469
1470static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg)
1471{
1472 return 1;
1473}
1474
1475static int scsiback_check_stop_free(struct se_cmd *se_cmd)
1476{
1477 /*
1478 * Do not release struct se_cmd's containing a valid TMR
1479 * pointer. These will be released directly in scsiback_device_action()
1480 * with transport_generic_free_cmd().
1481 */
1482 if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
1483 return 0;
1484
1485 transport_generic_free_cmd(se_cmd, 0);
1486 return 1;
1487}
1488
1489static void scsiback_release_cmd(struct se_cmd *se_cmd)
1490{
1491 struct vscsibk_pend *pending_req = container_of(se_cmd,
1492 struct vscsibk_pend, se_cmd);
1493
1494 kmem_cache_free(scsiback_cachep, pending_req);
1495}
1496
1497static int scsiback_shutdown_session(struct se_session *se_sess)
1498{
1499 return 0;
1500}
1501
1502static void scsiback_close_session(struct se_session *se_sess)
1503{
1504}
1505
1506static u32 scsiback_sess_get_index(struct se_session *se_sess)
1507{
1508 return 0;
1509}
1510
1511static int scsiback_write_pending(struct se_cmd *se_cmd)
1512{
1513 /* Go ahead and process the write immediately */
1514 target_execute_cmd(se_cmd);
1515
1516 return 0;
1517}
1518
1519static int scsiback_write_pending_status(struct se_cmd *se_cmd)
1520{
1521 return 0;
1522}
1523
1524static void scsiback_set_default_node_attrs(struct se_node_acl *nacl)
1525{
1526}
1527
1528static u32 scsiback_get_task_tag(struct se_cmd *se_cmd)
1529{
1530 struct vscsibk_pend *pending_req = container_of(se_cmd,
1531 struct vscsibk_pend, se_cmd);
1532
1533 return pending_req->rqid;
1534}
1535
1536static int scsiback_get_cmd_state(struct se_cmd *se_cmd)
1537{
1538 return 0;
1539}
1540
1541static int scsiback_queue_data_in(struct se_cmd *se_cmd)
1542{
1543 struct vscsibk_pend *pending_req = container_of(se_cmd,
1544 struct vscsibk_pend, se_cmd);
1545
1546 pending_req->result = SAM_STAT_GOOD;
1547 scsiback_cmd_done(pending_req);
1548 return 0;
1549}
1550
1551static int scsiback_queue_status(struct se_cmd *se_cmd)
1552{
1553 struct vscsibk_pend *pending_req = container_of(se_cmd,
1554 struct vscsibk_pend, se_cmd);
1555
1556 if (se_cmd->sense_buffer &&
1557 ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
1558 (se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE)))
1559 pending_req->result = (DRIVER_SENSE << 24) |
1560 SAM_STAT_CHECK_CONDITION;
1561 else
1562 pending_req->result = se_cmd->scsi_status;
1563
1564 scsiback_cmd_done(pending_req);
1565 return 0;
1566}
1567
1568static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
1569{
1570 struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
1571 struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
1572
1573 atomic_set(&tmr->tmr_complete, 1);
1574 wake_up(&tmr->tmr_wait);
1575}
1576
1577static void scsiback_aborted_task(struct se_cmd *se_cmd)
1578{
1579}
1580
1581static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg,
1582 char *page)
1583{
1584 struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
1585 se_tpg);
1586 ssize_t rb;
1587
1588 mutex_lock(&tpg->tv_tpg_mutex);
1589 rb = snprintf(page, PAGE_SIZE, "%s\n", tpg->param_alias);
1590 mutex_unlock(&tpg->tv_tpg_mutex);
1591
1592 return rb;
1593}
1594
1595static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg,
1596 const char *page, size_t count)
1597{
1598 struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
1599 se_tpg);
1600 int len;
1601
1602 if (strlen(page) >= VSCSI_NAMELEN) {
1603 pr_err("param alias: %s, exceeds max: %d\n", page,
1604 VSCSI_NAMELEN);
1605 return -EINVAL;
1606 }
1607
1608 mutex_lock(&tpg->tv_tpg_mutex);
1609 len = snprintf(tpg->param_alias, VSCSI_NAMELEN, "%s", page);
1610 if (tpg->param_alias[len - 1] == '\n')
1611 tpg->param_alias[len - 1] = '\0';
1612 mutex_unlock(&tpg->tv_tpg_mutex);
1613
1614 return count;
1615}
1616
1617TF_TPG_PARAM_ATTR(scsiback, alias, S_IRUGO | S_IWUSR);
1618
1619static struct configfs_attribute *scsiback_param_attrs[] = {
1620 &scsiback_tpg_param_alias.attr,
1621 NULL,
1622};
1623
1624static int scsiback_make_nexus(struct scsiback_tpg *tpg,
1625 const char *name)
1626{
1627 struct se_portal_group *se_tpg;
1628 struct se_session *se_sess;
1629 struct scsiback_nexus *tv_nexus;
1630
1631 mutex_lock(&tpg->tv_tpg_mutex);
1632 if (tpg->tpg_nexus) {
1633 mutex_unlock(&tpg->tv_tpg_mutex);
1634 pr_debug("tpg->tpg_nexus already exists\n");
1635 return -EEXIST;
1636 }
1637 se_tpg = &tpg->se_tpg;
1638
1639 tv_nexus = kzalloc(sizeof(struct scsiback_nexus), GFP_KERNEL);
1640 if (!tv_nexus) {
1641 mutex_unlock(&tpg->tv_tpg_mutex);
1642 return -ENOMEM;
1643 }
1644 /*
1645 * Initialize the struct se_session pointer
1646 */
1647 tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL);
1648 if (IS_ERR(tv_nexus->tvn_se_sess)) {
1649 mutex_unlock(&tpg->tv_tpg_mutex);
1650 kfree(tv_nexus);
1651 return -ENOMEM;
1652 }
1653 se_sess = tv_nexus->tvn_se_sess;
1654 /*
1655 * Since we are running in 'demo mode' this call with generate a
1656 * struct se_node_acl for the scsiback struct se_portal_group with
1657 * the SCSI Initiator port name of the passed configfs group 'name'.
1658 */
1659 tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
1660 se_tpg, (unsigned char *)name);
1661 if (!tv_nexus->tvn_se_sess->se_node_acl) {
1662 mutex_unlock(&tpg->tv_tpg_mutex);
1663 pr_debug("core_tpg_check_initiator_node_acl() failed for %s\n",
1664 name);
1665 goto out;
1666 }
1667 /*
1668 * Now register the TCM pvscsi virtual I_T Nexus as active with the
1669 * call to __transport_register_session()
1670 */
1671 __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
1672 tv_nexus->tvn_se_sess, tv_nexus);
1673 tpg->tpg_nexus = tv_nexus;
1674
1675 mutex_unlock(&tpg->tv_tpg_mutex);
1676 return 0;
1677
1678out:
1679 transport_free_session(se_sess);
1680 kfree(tv_nexus);
1681 return -ENOMEM;
1682}
1683
1684static int scsiback_drop_nexus(struct scsiback_tpg *tpg)
1685{
1686 struct se_session *se_sess;
1687 struct scsiback_nexus *tv_nexus;
1688
1689 mutex_lock(&tpg->tv_tpg_mutex);
1690 tv_nexus = tpg->tpg_nexus;
1691 if (!tv_nexus) {
1692 mutex_unlock(&tpg->tv_tpg_mutex);
1693 return -ENODEV;
1694 }
1695
1696 se_sess = tv_nexus->tvn_se_sess;
1697 if (!se_sess) {
1698 mutex_unlock(&tpg->tv_tpg_mutex);
1699 return -ENODEV;
1700 }
1701
1702 if (tpg->tv_tpg_port_count != 0) {
1703 mutex_unlock(&tpg->tv_tpg_mutex);
1704 pr_err("Unable to remove xen-pvscsi I_T Nexus with active TPG port count: %d\n",
1705 tpg->tv_tpg_port_count);
1706 return -EBUSY;
1707 }
1708
1709 if (tpg->tv_tpg_fe_count != 0) {
1710 mutex_unlock(&tpg->tv_tpg_mutex);
1711 pr_err("Unable to remove xen-pvscsi I_T Nexus with active TPG frontend count: %d\n",
1712 tpg->tv_tpg_fe_count);
1713 return -EBUSY;
1714 }
1715
1716 pr_debug("xen-pvscsi: Removing I_T Nexus to emulated %s Initiator Port: %s\n",
1717 scsiback_dump_proto_id(tpg->tport),
1718 tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
1719
1720 /*
1721 * Release the SCSI I_T Nexus to the emulated xen-pvscsi Target Port
1722 */
1723 transport_deregister_session(tv_nexus->tvn_se_sess);
1724 tpg->tpg_nexus = NULL;
1725 mutex_unlock(&tpg->tv_tpg_mutex);
1726
1727 kfree(tv_nexus);
1728 return 0;
1729}
1730
1731static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg,
1732 char *page)
1733{
1734 struct scsiback_tpg *tpg = container_of(se_tpg,
1735 struct scsiback_tpg, se_tpg);
1736 struct scsiback_nexus *tv_nexus;
1737 ssize_t ret;
1738
1739 mutex_lock(&tpg->tv_tpg_mutex);
1740 tv_nexus = tpg->tpg_nexus;
1741 if (!tv_nexus) {
1742 mutex_unlock(&tpg->tv_tpg_mutex);
1743 return -ENODEV;
1744 }
1745 ret = snprintf(page, PAGE_SIZE, "%s\n",
1746 tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
1747 mutex_unlock(&tpg->tv_tpg_mutex);
1748
1749 return ret;
1750}
1751
1752static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg,
1753 const char *page,
1754 size_t count)
1755{
1756 struct scsiback_tpg *tpg = container_of(se_tpg,
1757 struct scsiback_tpg, se_tpg);
1758 struct scsiback_tport *tport_wwn = tpg->tport;
1759 unsigned char i_port[VSCSI_NAMELEN], *ptr, *port_ptr;
1760 int ret;
1761 /*
1762 * Shutdown the active I_T nexus if 'NULL' is passed..
1763 */
1764 if (!strncmp(page, "NULL", 4)) {
1765 ret = scsiback_drop_nexus(tpg);
1766 return (!ret) ? count : ret;
1767 }
1768 /*
1769 * Otherwise make sure the passed virtual Initiator port WWN matches
1770 * the fabric protocol_id set in scsiback_make_tport(), and call
1771 * scsiback_make_nexus().
1772 */
1773 if (strlen(page) >= VSCSI_NAMELEN) {
1774 pr_err("Emulated NAA Sas Address: %s, exceeds max: %d\n",
1775 page, VSCSI_NAMELEN);
1776 return -EINVAL;
1777 }
1778 snprintf(&i_port[0], VSCSI_NAMELEN, "%s", page);
1779
1780 ptr = strstr(i_port, "naa.");
1781 if (ptr) {
1782 if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_SAS) {
1783 pr_err("Passed SAS Initiator Port %s does not match target port protoid: %s\n",
1784 i_port, scsiback_dump_proto_id(tport_wwn));
1785 return -EINVAL;
1786 }
1787 port_ptr = &i_port[0];
1788 goto check_newline;
1789 }
1790 ptr = strstr(i_port, "fc.");
1791 if (ptr) {
1792 if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_FCP) {
1793 pr_err("Passed FCP Initiator Port %s does not match target port protoid: %s\n",
1794 i_port, scsiback_dump_proto_id(tport_wwn));
1795 return -EINVAL;
1796 }
1797 port_ptr = &i_port[3]; /* Skip over "fc." */
1798 goto check_newline;
1799 }
1800 ptr = strstr(i_port, "iqn.");
1801 if (ptr) {
1802 if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_ISCSI) {
1803 pr_err("Passed iSCSI Initiator Port %s does not match target port protoid: %s\n",
1804 i_port, scsiback_dump_proto_id(tport_wwn));
1805 return -EINVAL;
1806 }
1807 port_ptr = &i_port[0];
1808 goto check_newline;
1809 }
1810 pr_err("Unable to locate prefix for emulated Initiator Port: %s\n",
1811 i_port);
1812 return -EINVAL;
1813 /*
1814 * Clear any trailing newline for the NAA WWN
1815 */
1816check_newline:
1817 if (i_port[strlen(i_port) - 1] == '\n')
1818 i_port[strlen(i_port) - 1] = '\0';
1819
1820 ret = scsiback_make_nexus(tpg, port_ptr);
1821 if (ret < 0)
1822 return ret;
1823
1824 return count;
1825}
1826
1827TF_TPG_BASE_ATTR(scsiback, nexus, S_IRUGO | S_IWUSR);
1828
1829static struct configfs_attribute *scsiback_tpg_attrs[] = {
1830 &scsiback_tpg_nexus.attr,
1831 NULL,
1832};
1833
1834static ssize_t
1835scsiback_wwn_show_attr_version(struct target_fabric_configfs *tf,
1836 char *page)
1837{
1838 return sprintf(page, "xen-pvscsi fabric module %s on %s/%s on "
1839 UTS_RELEASE"\n",
1840 VSCSI_VERSION, utsname()->sysname, utsname()->machine);
1841}
1842
1843TF_WWN_ATTR_RO(scsiback, version);
1844
1845static struct configfs_attribute *scsiback_wwn_attrs[] = {
1846 &scsiback_wwn_version.attr,
1847 NULL,
1848};
1849
1850static char *scsiback_get_fabric_name(void)
1851{
1852 return "xen-pvscsi";
1853}
1854
1855static int scsiback_port_link(struct se_portal_group *se_tpg,
1856 struct se_lun *lun)
1857{
1858 struct scsiback_tpg *tpg = container_of(se_tpg,
1859 struct scsiback_tpg, se_tpg);
1860
1861 mutex_lock(&tpg->tv_tpg_mutex);
1862 tpg->tv_tpg_port_count++;
1863 mutex_unlock(&tpg->tv_tpg_mutex);
1864
1865 return 0;
1866}
1867
1868static void scsiback_port_unlink(struct se_portal_group *se_tpg,
1869 struct se_lun *lun)
1870{
1871 struct scsiback_tpg *tpg = container_of(se_tpg,
1872 struct scsiback_tpg, se_tpg);
1873
1874 mutex_lock(&tpg->tv_tpg_mutex);
1875 tpg->tv_tpg_port_count--;
1876 mutex_unlock(&tpg->tv_tpg_mutex);
1877}
1878
1879static struct se_portal_group *
1880scsiback_make_tpg(struct se_wwn *wwn,
1881 struct config_group *group,
1882 const char *name)
1883{
1884 struct scsiback_tport *tport = container_of(wwn,
1885 struct scsiback_tport, tport_wwn);
1886
1887 struct scsiback_tpg *tpg;
1888 u16 tpgt;
1889 int ret;
1890
1891 if (strstr(name, "tpgt_") != name)
1892 return ERR_PTR(-EINVAL);
1893 ret = kstrtou16(name + 5, 10, &tpgt);
1894 if (ret)
1895 return ERR_PTR(ret);
1896
1897 tpg = kzalloc(sizeof(struct scsiback_tpg), GFP_KERNEL);
1898 if (!tpg)
1899 return ERR_PTR(-ENOMEM);
1900
1901 mutex_init(&tpg->tv_tpg_mutex);
1902 INIT_LIST_HEAD(&tpg->tv_tpg_list);
1903 INIT_LIST_HEAD(&tpg->info_list);
1904 tpg->tport = tport;
1905 tpg->tport_tpgt = tpgt;
1906
1907 ret = core_tpg_register(&scsiback_fabric_configfs->tf_ops, wwn,
1908 &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
1909 if (ret < 0) {
1910 kfree(tpg);
1911 return NULL;
1912 }
1913 mutex_lock(&scsiback_mutex);
1914 list_add_tail(&tpg->tv_tpg_list, &scsiback_list);
1915 mutex_unlock(&scsiback_mutex);
1916
1917 return &tpg->se_tpg;
1918}
1919
1920static void scsiback_drop_tpg(struct se_portal_group *se_tpg)
1921{
1922 struct scsiback_tpg *tpg = container_of(se_tpg,
1923 struct scsiback_tpg, se_tpg);
1924
1925 mutex_lock(&scsiback_mutex);
1926 list_del(&tpg->tv_tpg_list);
1927 mutex_unlock(&scsiback_mutex);
1928 /*
1929 * Release the virtual I_T Nexus for this xen-pvscsi TPG
1930 */
1931 scsiback_drop_nexus(tpg);
1932 /*
1933 * Deregister the se_tpg from TCM..
1934 */
1935 core_tpg_deregister(se_tpg);
1936 kfree(tpg);
1937}
1938
1939static int scsiback_check_true(struct se_portal_group *se_tpg)
1940{
1941 return 1;
1942}
1943
1944static int scsiback_check_false(struct se_portal_group *se_tpg)
1945{
1946 return 0;
1947}
1948
1949static struct target_core_fabric_ops scsiback_ops = {
1950 .get_fabric_name = scsiback_get_fabric_name,
1951 .get_fabric_proto_ident = scsiback_get_fabric_proto_ident,
1952 .tpg_get_wwn = scsiback_get_fabric_wwn,
1953 .tpg_get_tag = scsiback_get_tag,
1954 .tpg_get_default_depth = scsiback_get_default_depth,
1955 .tpg_get_pr_transport_id = scsiback_get_pr_transport_id,
1956 .tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len,
1957 .tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id,
1958 .tpg_check_demo_mode = scsiback_check_true,
1959 .tpg_check_demo_mode_cache = scsiback_check_true,
1960 .tpg_check_demo_mode_write_protect = scsiback_check_false,
1961 .tpg_check_prod_mode_write_protect = scsiback_check_false,
1962 .tpg_alloc_fabric_acl = scsiback_alloc_fabric_acl,
1963 .tpg_release_fabric_acl = scsiback_release_fabric_acl,
1964 .tpg_get_inst_index = scsiback_tpg_get_inst_index,
1965 .check_stop_free = scsiback_check_stop_free,
1966 .release_cmd = scsiback_release_cmd,
1967 .put_session = NULL,
1968 .shutdown_session = scsiback_shutdown_session,
1969 .close_session = scsiback_close_session,
1970 .sess_get_index = scsiback_sess_get_index,
1971 .sess_get_initiator_sid = NULL,
1972 .write_pending = scsiback_write_pending,
1973 .write_pending_status = scsiback_write_pending_status,
1974 .set_default_node_attributes = scsiback_set_default_node_attrs,
1975 .get_task_tag = scsiback_get_task_tag,
1976 .get_cmd_state = scsiback_get_cmd_state,
1977 .queue_data_in = scsiback_queue_data_in,
1978 .queue_status = scsiback_queue_status,
1979 .queue_tm_rsp = scsiback_queue_tm_rsp,
1980 .aborted_task = scsiback_aborted_task,
1981 /*
1982 * Setup callers for generic logic in target_core_fabric_configfs.c
1983 */
1984 .fabric_make_wwn = scsiback_make_tport,
1985 .fabric_drop_wwn = scsiback_drop_tport,
1986 .fabric_make_tpg = scsiback_make_tpg,
1987 .fabric_drop_tpg = scsiback_drop_tpg,
1988 .fabric_post_link = scsiback_port_link,
1989 .fabric_pre_unlink = scsiback_port_unlink,
1990 .fabric_make_np = NULL,
1991 .fabric_drop_np = NULL,
1992#if 0
1993 .fabric_make_nodeacl = scsiback_make_nodeacl,
1994 .fabric_drop_nodeacl = scsiback_drop_nodeacl,
1995#endif
1996};
1997
1998static int scsiback_register_configfs(void)
1999{
2000 struct target_fabric_configfs *fabric;
2001 int ret;
2002
2003 pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
2004 VSCSI_VERSION, utsname()->sysname, utsname()->machine);
2005 /*
2006 * Register the top level struct config_item_type with TCM core
2007 */
2008 fabric = target_fabric_configfs_init(THIS_MODULE, "xen-pvscsi");
2009 if (IS_ERR(fabric))
2010 return PTR_ERR(fabric);
2011
2012 /*
2013 * Setup fabric->tf_ops from our local scsiback_ops
2014 */
2015 fabric->tf_ops = scsiback_ops;
2016 /*
2017 * Setup default attribute lists for various fabric->tf_cit_tmpl
2018 */
2019 fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = scsiback_wwn_attrs;
2020 fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = scsiback_tpg_attrs;
2021 fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
2022 fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = scsiback_param_attrs;
2023 fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
2024 fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
2025 fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
2026 fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
2027 fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
2028 /*
2029 * Register the fabric for use within TCM
2030 */
2031 ret = target_fabric_configfs_register(fabric);
2032 if (ret < 0) {
2033 target_fabric_configfs_free(fabric);
2034 return ret;
2035 }
2036 /*
2037 * Setup our local pointer to *fabric
2038 */
2039 scsiback_fabric_configfs = fabric;
2040 pr_debug("xen-pvscsi: Set fabric -> scsiback_fabric_configfs\n");
2041 return 0;
2042};
2043
2044static void scsiback_deregister_configfs(void)
2045{
2046 if (!scsiback_fabric_configfs)
2047 return;
2048
2049 target_fabric_configfs_deregister(scsiback_fabric_configfs);
2050 scsiback_fabric_configfs = NULL;
2051 pr_debug("xen-pvscsi: Cleared scsiback_fabric_configfs\n");
2052};
2053
2054static const struct xenbus_device_id scsiback_ids[] = {
2055 { "vscsi" },
2056 { "" }
2057};
2058
2059static struct xenbus_driver scsiback_driver = {
2060 .ids = scsiback_ids,
2061 .probe = scsiback_probe,
2062 .remove = scsiback_remove,
2063 .otherend_changed = scsiback_frontend_changed
2064};
2065
2066static void scsiback_init_pend(void *p)
2067{
2068 struct vscsibk_pend *pend = p;
2069 int i;
2070
2071 memset(pend, 0, sizeof(*pend));
2072 for (i = 0; i < VSCSI_MAX_GRANTS; i++)
2073 pend->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
2074}
2075
2076static int __init scsiback_init(void)
2077{
2078 int ret;
2079
2080 if (!xen_domain())
2081 return -ENODEV;
2082
2083 scsiback_cachep = kmem_cache_create("vscsiif_cache",
2084 sizeof(struct vscsibk_pend), 0, 0, scsiback_init_pend);
2085 if (!scsiback_cachep)
2086 return -ENOMEM;
2087
2088 ret = xenbus_register_backend(&scsiback_driver);
2089 if (ret)
2090 goto out_cache_destroy;
2091
2092 ret = scsiback_register_configfs();
2093 if (ret)
2094 goto out_unregister_xenbus;
2095
2096 return 0;
2097
2098out_unregister_xenbus:
2099 xenbus_unregister_driver(&scsiback_driver);
2100out_cache_destroy:
2101 kmem_cache_destroy(scsiback_cachep);
2102 pr_err("xen-pvscsi: %s: error %d\n", __func__, ret);
2103 return ret;
2104}
2105
2106static void __exit scsiback_exit(void)
2107{
2108 struct page *page;
2109
2110 while (free_pages_num) {
2111 if (get_free_page(&page))
2112 BUG();
2113 free_xenballooned_pages(1, &page);
2114 }
2115 scsiback_deregister_configfs();
2116 xenbus_unregister_driver(&scsiback_driver);
2117 kmem_cache_destroy(scsiback_cachep);
2118}
2119
2120module_init(scsiback_init);
2121module_exit(scsiback_exit);
2122
2123MODULE_DESCRIPTION("Xen SCSI backend driver");
2124MODULE_LICENSE("Dual BSD/GPL");
2125MODULE_ALIAS("xen-backend:vscsi");
2126MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 439c9dca9eee..ca744102b666 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -259,7 +259,6 @@ static char *error_path(struct xenbus_device *dev)
259static void xenbus_va_dev_error(struct xenbus_device *dev, int err, 259static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
260 const char *fmt, va_list ap) 260 const char *fmt, va_list ap)
261{ 261{
262 int ret;
263 unsigned int len; 262 unsigned int len;
264 char *printf_buffer = NULL; 263 char *printf_buffer = NULL;
265 char *path_buffer = NULL; 264 char *path_buffer = NULL;
@@ -270,9 +269,7 @@ static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
270 goto fail; 269 goto fail;
271 270
272 len = sprintf(printf_buffer, "%i ", -err); 271 len = sprintf(printf_buffer, "%i ", -err);
273 ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); 272 vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
274
275 BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
276 273
277 dev_err(&dev->dev, "%s\n", printf_buffer); 274 dev_err(&dev->dev, "%s\n", printf_buffer);
278 275
@@ -361,8 +358,8 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
361 * @ring_mfn: mfn of ring to grant 358 * @ring_mfn: mfn of ring to grant
362 359
363 * Grant access to the given @ring_mfn to the peer of the given device. Return 360 * Grant access to the given @ring_mfn to the peer of the given device. Return
364 * 0 on success, or -errno on error. On error, the device will switch to 361 * a grant reference on success, or -errno on error. On error, the device will
365 * XenbusStateClosing, and the error will be saved in the store. 362 * switch to XenbusStateClosing, and the error will be saved in the store.
366 */ 363 */
367int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) 364int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
368{ 365{
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3c0a74b3e9b1..564b31584860 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -297,9 +297,13 @@ void xenbus_dev_shutdown(struct device *_dev)
297EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); 297EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
298 298
299int xenbus_register_driver_common(struct xenbus_driver *drv, 299int xenbus_register_driver_common(struct xenbus_driver *drv,
300 struct xen_bus_type *bus) 300 struct xen_bus_type *bus,
301 struct module *owner, const char *mod_name)
301{ 302{
303 drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype;
302 drv->driver.bus = &bus->bus; 304 drv->driver.bus = &bus->bus;
305 drv->driver.owner = owner;
306 drv->driver.mod_name = mod_name;
303 307
304 return driver_register(&drv->driver); 308 return driver_register(&drv->driver);
305} 309}
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 1085ec294a19..c9ec7ca1f7ab 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -60,7 +60,9 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
60extern int xenbus_dev_probe(struct device *_dev); 60extern int xenbus_dev_probe(struct device *_dev);
61extern int xenbus_dev_remove(struct device *_dev); 61extern int xenbus_dev_remove(struct device *_dev);
62extern int xenbus_register_driver_common(struct xenbus_driver *drv, 62extern int xenbus_register_driver_common(struct xenbus_driver *drv,
63 struct xen_bus_type *bus); 63 struct xen_bus_type *bus,
64 struct module *owner,
65 const char *mod_name);
64extern int xenbus_probe_node(struct xen_bus_type *bus, 66extern int xenbus_probe_node(struct xen_bus_type *bus,
65 const char *type, 67 const char *type,
66 const char *nodename); 68 const char *nodename);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 5125dce11a60..04f7f85a5edf 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -234,13 +234,15 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
234} 234}
235EXPORT_SYMBOL_GPL(xenbus_dev_is_online); 235EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
236 236
237int xenbus_register_backend(struct xenbus_driver *drv) 237int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner,
238 const char *mod_name)
238{ 239{
239 drv->read_otherend_details = read_frontend_details; 240 drv->read_otherend_details = read_frontend_details;
240 241
241 return xenbus_register_driver_common(drv, &xenbus_backend); 242 return xenbus_register_driver_common(drv, &xenbus_backend,
243 owner, mod_name);
242} 244}
243EXPORT_SYMBOL_GPL(xenbus_register_backend); 245EXPORT_SYMBOL_GPL(__xenbus_register_backend);
244 246
245static int backend_probe_and_watch(struct notifier_block *notifier, 247static int backend_probe_and_watch(struct notifier_block *notifier,
246 unsigned long event, 248 unsigned long event,
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index cb385c10d2b1..bcb53bdc469c 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -317,13 +317,15 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
317 print_device_status); 317 print_device_status);
318} 318}
319 319
320int xenbus_register_frontend(struct xenbus_driver *drv) 320int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner,
321 const char *mod_name)
321{ 322{
322 int ret; 323 int ret;
323 324
324 drv->read_otherend_details = read_backend_details; 325 drv->read_otherend_details = read_backend_details;
325 326
326 ret = xenbus_register_driver_common(drv, &xenbus_frontend); 327 ret = xenbus_register_driver_common(drv, &xenbus_frontend,
328 owner, mod_name);
327 if (ret) 329 if (ret)
328 return ret; 330 return ret;
329 331
@@ -332,7 +334,7 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
332 334
333 return 0; 335 return 0;
334} 336}
335EXPORT_SYMBOL_GPL(xenbus_register_frontend); 337EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
336 338
337static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); 339static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
338static int backend_state; 340static int backend_state;
diff --git a/include/xen/events.h b/include/xen/events.h
index 8bee7a75e850..5321cd9636e6 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -28,6 +28,8 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
28 unsigned long irqflags, 28 unsigned long irqflags,
29 const char *devname, 29 const char *devname,
30 void *dev_id); 30 void *dev_id);
31int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
32 unsigned int remote_port);
31int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, 33int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
32 unsigned int remote_port, 34 unsigned int remote_port,
33 irq_handler_t handler, 35 irq_handler_t handler,
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 6f4eae328ca7..f90b03454659 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -3,6 +3,24 @@
3 * 3 *
4 * Definitions used for the Xen ELF notes. 4 * Definitions used for the Xen ELF notes.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Ltd. 24 * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
7 */ 25 */
8 26
@@ -18,12 +36,13 @@
18 * 36 *
19 * LEGACY indicated the fields in the legacy __xen_guest string which 37 * LEGACY indicated the fields in the legacy __xen_guest string which
20 * this a note type replaces. 38 * this a note type replaces.
39 *
40 * String values (for non-legacy) are NULL terminated ASCII, also known
41 * as ASCIZ type.
21 */ 42 */
22 43
23/* 44/*
24 * NAME=VALUE pair (string). 45 * NAME=VALUE pair (string).
25 *
26 * LEGACY: FEATURES and PAE
27 */ 46 */
28#define XEN_ELFNOTE_INFO 0 47#define XEN_ELFNOTE_INFO 0
29 48
@@ -137,10 +156,30 @@
137 156
138/* 157/*
139 * Whether or not the guest supports cooperative suspend cancellation. 158 * Whether or not the guest supports cooperative suspend cancellation.
159 * This is a numeric value.
160 *
161 * Default is 0
140 */ 162 */
141#define XEN_ELFNOTE_SUSPEND_CANCEL 14 163#define XEN_ELFNOTE_SUSPEND_CANCEL 14
142 164
143/* 165/*
166 * The (non-default) location the initial phys-to-machine map should be
167 * placed at by the hypervisor (Dom0) or the tools (DomU).
168 * The kernel must be prepared for this mapping to be established using
169 * large pages, despite such otherwise not being available to guests.
170 * The kernel must also be able to handle the page table pages used for
171 * this mapping not being accessible through the initial mapping.
172 * (Only x86-64 supports this at present.)
173 */
174#define XEN_ELFNOTE_INIT_P2M 15
175
176/*
177 * Whether or not the guest can deal with being passed an initrd not
178 * mapped through its initial page tables.
179 */
180#define XEN_ELFNOTE_MOD_START_PFN 16
181
182/*
144 * The features supported by this kernel (numeric). 183 * The features supported by this kernel (numeric).
145 * 184 *
146 * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a 185 * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
@@ -153,6 +192,11 @@
153 */ 192 */
154#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 193#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
155 194
195/*
196 * The number of the highest elfnote defined.
197 */
198#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
199
156#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ 200#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
157 201
158/* 202/*
diff --git a/include/xen/interface/io/vscsiif.h b/include/xen/interface/io/vscsiif.h
new file mode 100644
index 000000000000..d07d7aca8d1c
--- /dev/null
+++ b/include/xen/interface/io/vscsiif.h
@@ -0,0 +1,229 @@
1/******************************************************************************
2 * vscsiif.h
3 *
4 * Based on the blkif.h code.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Copyright(c) FUJITSU Limited 2008.
25 */
26
27#ifndef __XEN__PUBLIC_IO_SCSI_H__
28#define __XEN__PUBLIC_IO_SCSI_H__
29
30#include "ring.h"
31#include "../grant_table.h"
32
33/*
34 * Feature and Parameter Negotiation
35 * =================================
36 * The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to
37 * communicate capabilities and to negotiate operating parameters. This
38 * section enumerates these nodes which reside in the respective front and
39 * backend portions of the XenStore, following the XenBus convention.
40 *
41 * Any specified default value is in effect if the corresponding XenBus node
42 * is not present in the XenStore.
43 *
44 * XenStore nodes in sections marked "PRIVATE" are solely for use by the
45 * driver side whose XenBus tree contains them.
46 *
47 *****************************************************************************
48 * Backend XenBus Nodes
49 *****************************************************************************
50 *
51 *------------------ Backend Device Identification (PRIVATE) ------------------
52 *
53 * p-devname
54 * Values: string
55 *
56 * A free string used to identify the physical device (e.g. a disk name).
57 *
58 * p-dev
59 * Values: string
60 *
61 * A string specifying the backend device: either a 4-tuple "h:c:t:l"
62 * (host, controller, target, lun, all integers), or a WWN (e.g.
63 * "naa.60014054ac780582").
64 *
65 * v-dev
66 * Values: string
67 *
68 * A string specifying the frontend device in form of a 4-tuple "h:c:t:l"
69 * (host, controller, target, lun, all integers).
70 *
71 *--------------------------------- Features ---------------------------------
72 *
73 * feature-sg-grant
74 * Values: unsigned [VSCSIIF_SG_TABLESIZE...65535]
75 * Default Value: 0
76 *
77 * Specifies the maximum number of scatter/gather elements in grant pages
78 * supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE
79 * SG elements specified directly in the request.
80 *
81 *****************************************************************************
82 * Frontend XenBus Nodes
83 *****************************************************************************
84 *
85 *----------------------- Request Transport Parameters -----------------------
86 *
87 * event-channel
88 * Values: unsigned
89 *
90 * The identifier of the Xen event channel used to signal activity
91 * in the ring buffer.
92 *
93 * ring-ref
94 * Values: unsigned
95 *
96 * The Xen grant reference granting permission for the backend to map
97 * the sole page in a single page sized ring buffer.
98 *
99 * protocol
100 * Values: string (XEN_IO_PROTO_ABI_*)
101 * Default Value: XEN_IO_PROTO_ABI_NATIVE
102 *
103 * The machine ABI rules governing the format of all ring request and
104 * response structures.
105 */
106
107/* Requests from the frontend to the backend */
108
109/*
110 * Request a SCSI operation specified via a CDB in vscsiif_request.cmnd.
111 * The target is specified via channel, id and lun.
112 *
113 * The operation to be performed is specified via a CDB in cmnd[], the length
114 * of the CDB is in cmd_len. sc_data_direction specifies the direction of data
115 * (to the device, from the device, or none at all).
116 *
117 * If data is to be transferred to or from the device the buffer(s) in the
118 * guest memory is/are specified via one or multiple scsiif_request_segment
119 * descriptors each specifying a memory page via a grant_ref_t, a offset into
120 * the page and the length of the area in that page. All scsiif_request_segment
121 * areas concatenated form the resulting data buffer used by the operation.
122 * If the number of scsiif_request_segment areas is not too large (less than
123 * or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the
124 * seg[] array and the number of valid scsiif_request_segment elements is to be
125 * set in nr_segments.
126 *
127 * If "feature-sg-grant" in the Xenstore is set it is possible to specify more
128 * than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection.
129 * The maximum number of allowed scsiif_request_segment elements is the value
130 * of the "feature-sg-grant" entry from Xenstore. When using indirection the
131 * seg[] array doesn't contain specifications of the data buffers, but
132 * references to scsiif_request_segment arrays, which in turn reference the
133 * data buffers. While nr_segments holds the number of populated seg[] entries
134 * (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment
135 * elements referencing the target data buffers is calculated from the lengths
136 * of the seg[] elements (the sum of all valid seg[].length divided by the
137 * size of one scsiif_request_segment structure).
138 */
139#define VSCSIIF_ACT_SCSI_CDB 1
140
141/*
142 * Request abort of a running operation for the specified target given by
143 * channel, id, lun and the operation's rqid in ref_rqid.
144 */
145#define VSCSIIF_ACT_SCSI_ABORT 2
146
147/*
148 * Request a device reset of the specified target (channel and id).
149 */
150#define VSCSIIF_ACT_SCSI_RESET 3
151
152/*
153 * Preset scatter/gather elements for a following request. Deprecated.
154 * Keeping the define only to avoid usage of the value "4" for other actions.
155 */
156#define VSCSIIF_ACT_SCSI_SG_PRESET 4
157
158/*
159 * Maximum scatter/gather segments per request.
160 *
161 * Considering balance between allocating at least 16 "vscsiif_request"
162 * structures on one page (4096 bytes) and the number of scatter/gather
163 * elements needed, we decided to use 26 as a magic number.
164 *
165 * If "feature-sg-grant" is set, more scatter/gather elements can be specified
166 * by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages.
167 * In this case the vscsiif_request seg elements don't contain references to
168 * the user data, but to the SG elements referencing the user data.
169 */
170#define VSCSIIF_SG_TABLESIZE 26
171
172/*
173 * based on Linux kernel 2.6.18, still valid
174 * Changing these values requires support of multiple protocols via the rings
175 * as "old clients" will blindly use these values and the resulting structure
176 * sizes.
177 */
178#define VSCSIIF_MAX_COMMAND_SIZE 16
179#define VSCSIIF_SENSE_BUFFERSIZE 96
180
181struct scsiif_request_segment {
182 grant_ref_t gref;
183 uint16_t offset;
184 uint16_t length;
185};
186
187#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment))
188
189/* Size of one request is 252 bytes */
190struct vscsiif_request {
191 uint16_t rqid; /* private guest value, echoed in resp */
192 uint8_t act; /* command between backend and frontend */
193 uint8_t cmd_len; /* valid CDB bytes */
194
195 uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */
196 uint16_t timeout_per_command; /* deprecated */
197 uint16_t channel, id, lun; /* (virtual) device specification */
198 uint16_t ref_rqid; /* command abort reference */
199 uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
200 DMA_FROM_DEVICE(2)
201 DMA_NONE(3) requests */
202 uint8_t nr_segments; /* Number of pieces of scatter-gather */
203/*
204 * flag in nr_segments: SG elements via grant page
205 *
206 * If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number
207 * of grant pages containing SG elements. Usable if "feature-sg-grant" set.
208 */
209#define VSCSIIF_SG_GRANT 0x80
210
211 struct scsiif_request_segment seg[VSCSIIF_SG_TABLESIZE];
212 uint32_t reserved[3];
213};
214
215/* Size of one response is 252 bytes */
216struct vscsiif_response {
217 uint16_t rqid; /* identifies request */
218 uint8_t padding;
219 uint8_t sense_len;
220 uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
221 int32_t rslt;
222 uint32_t residual_len; /* request bufflen -
223 return the value from physical device */
224 uint32_t reserved[36];
225};
226
227DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
228
229#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index de082130ba4b..f68719f405af 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -3,6 +3,24 @@
3 * 3 *
4 * Guest OS interface to Xen. 4 * Guest OS interface to Xen.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
6 * Copyright (c) 2004, K A Fraser 24 * Copyright (c) 2004, K A Fraser
7 */ 25 */
8 26
@@ -73,13 +91,23 @@
73 * VIRTUAL INTERRUPTS 91 * VIRTUAL INTERRUPTS
74 * 92 *
75 * Virtual interrupts that a guest OS may receive from Xen. 93 * Virtual interrupts that a guest OS may receive from Xen.
94 * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
95 * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
96 * The latter can be allocated only once per guest: they must initially be
97 * allocated to VCPU0 but can subsequently be re-bound.
76 */ 98 */
77#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */ 99#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */
78#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ 100#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */
79#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ 101#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */
80#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ 102#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */
81#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ 103#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
82#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */ 104#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
105#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
106#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
107#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */
108#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
109#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
110#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
83 111
84/* Architecture-specific VIRQ definitions. */ 112/* Architecture-specific VIRQ definitions. */
85#define VIRQ_ARCH_0 16 113#define VIRQ_ARCH_0 16
@@ -92,24 +120,68 @@
92#define VIRQ_ARCH_7 23 120#define VIRQ_ARCH_7 23
93 121
94#define NR_VIRQS 24 122#define NR_VIRQS 24
123
95/* 124/*
96 * MMU-UPDATE REQUESTS 125 * enum neg_errnoval HYPERVISOR_mmu_update(const struct mmu_update reqs[],
97 * 126 * unsigned count, unsigned *done_out,
98 * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. 127 * unsigned foreigndom)
99 * A foreigndom (FD) can be specified (or DOMID_SELF for none). 128 * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
100 * Where the FD has some effect, it is described below. 129 * @count is the length of the above array.
101 * ptr[1:0] specifies the appropriate MMU_* command. 130 * @pdone is an output parameter indicating number of completed operations
131 * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
132 * hypercall invocation. Can be DOMID_SELF.
133 * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
134 * in this hypercall invocation. The value of this field
135 * (x) encodes the PFD as follows:
136 * x == 0 => PFD == DOMID_SELF
137 * x != 0 => PFD == x - 1
102 * 138 *
139 * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
140 * -------------
103 * ptr[1:0] == MMU_NORMAL_PT_UPDATE: 141 * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
104 * Updates an entry in a page table. If updating an L1 table, and the new 142 * Updates an entry in a page table belonging to PFD. If updating an L1 table,
105 * table entry is valid/present, the mapped frame must belong to the FD, if 143 * and the new table entry is valid/present, the mapped frame must belong to
106 * an FD has been specified. If attempting to map an I/O page then the 144 * FD. If attempting to map an I/O page then the caller assumes the privilege
107 * caller assumes the privilege of the FD. 145 * of the FD.
108 * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. 146 * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
109 * FD == DOMID_XEN: Map restricted areas of Xen's heap space. 147 * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
110 * ptr[:2] -- Machine address of the page-table entry to modify. 148 * ptr[:2] -- Machine address of the page-table entry to modify.
111 * val -- Value to write. 149 * val -- Value to write.
112 * 150 *
151 * There also certain implicit requirements when using this hypercall. The
152 * pages that make up a pagetable must be mapped read-only in the guest.
153 * This prevents uncontrolled guest updates to the pagetable. Xen strictly
154 * enforces this, and will disallow any pagetable update which will end up
155 * mapping pagetable page RW, and will disallow using any writable page as a
156 * pagetable. In practice it means that when constructing a page table for a
157 * process, thread, etc, we MUST be very dilligient in following these rules:
158 * 1). Start with top-level page (PGD or in Xen language: L4). Fill out
159 * the entries.
160 * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
161 * or L2).
162 * 3). Start filling out the PTE table (L1) with the PTE entries. Once
163 * done, make sure to set each of those entries to RO (so writeable bit
164 * is unset). Once that has been completed, set the PMD (L2) for this
165 * PTE table as RO.
166 * 4). When completed with all of the PMD (L2) entries, and all of them have
167 * been set to RO, make sure to set RO the PUD (L3). Do the same
168 * operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
169 * 5). Now before you can use those pages (so setting the cr3), you MUST also
170 * pin them so that the hypervisor can verify the entries. This is done
171 * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
172 * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
173 * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
174 * issued.
175 * For 32-bit guests, the L4 is not used (as there is less pagetables), so
176 * instead use L3.
177 * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
178 * hypercall. Also if so desired the OS can also try to write to the PTE
179 * and be trapped by the hypervisor (as the PTE entry is RO).
180 *
181 * To deallocate the pages, the operations are the reverse of the steps
182 * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
183 * pagetable MUST not be in use (meaning that the cr3 is not set to it).
184 *
113 * ptr[1:0] == MMU_MACHPHYS_UPDATE: 185 * ptr[1:0] == MMU_MACHPHYS_UPDATE:
114 * Updates an entry in the machine->pseudo-physical mapping table. 186 * Updates an entry in the machine->pseudo-physical mapping table.
115 * ptr[:2] -- Machine address within the frame whose mapping to modify. 187 * ptr[:2] -- Machine address within the frame whose mapping to modify.
@@ -119,6 +191,72 @@
119 * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: 191 * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
120 * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed 192 * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
121 * with those in @val. 193 * with those in @val.
194 *
195 * @val is usually the machine frame number along with some attributes.
196 * The attributes by default follow the architecture defined bits. Meaning that
197 * if this is a X86_64 machine and four page table layout is used, the layout
198 * of val is:
199 * - 63 if set means No execute (NX)
200 * - 46-13 the machine frame number
201 * - 12 available for guest
202 * - 11 available for guest
203 * - 10 available for guest
204 * - 9 available for guest
205 * - 8 global
206 * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
207 * - 6 dirty
208 * - 5 accessed
209 * - 4 page cached disabled
210 * - 3 page write through
211 * - 2 userspace accessible
212 * - 1 writeable
213 * - 0 present
214 *
215 * The one bits that does not fit with the default layout is the PAGE_PSE
216 * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
217 * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
218 * (or 2MB) instead of using the PAGE_PSE bit.
219 *
220 * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
221 * using it as the Page Attribute Table (PAT) bit - for details on it please
222 * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
223 * pages instead of using MTRRs.
224 *
225 * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
226 * PAT4 PAT0
227 * +-----+-----+----+----+----+-----+----+----+
228 * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux
229 * +-----+-----+----+----+----+-----+----+----+
230 * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots)
231 * +-----+-----+----+----+----+-----+----+----+
232 * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen
233 * +-----+-----+----+----+----+-----+----+----+
234 *
235 * The lookup of this index table translates to looking up
236 * Bit 7, Bit 4, and Bit 3 of val entry:
237 *
238 * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
239 *
240 * If all bits are off, then we are using PAT0. If bit 3 turned on,
241 * then we are using PAT1, if bit 3 and bit 4, then PAT2..
242 *
243 * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
244 * that if a guest that follows Linux's PAT setup and would like to set Write
245 * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
246 * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
247 * caching as:
248 *
249 * WB = none (so PAT0)
250 * WC = PWT (bit 3 on)
251 * UC = PWT | PCD (bit 3 and 4 are on).
252 *
253 * To make it work with Xen, it needs to translate the WC bit as so:
254 *
255 * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
256 *
257 * And to translate back it would:
258 *
259 * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
122 */ 260 */
123#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ 261#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
124#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ 262#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
@@ -127,7 +265,12 @@
127/* 265/*
128 * MMU EXTENDED OPERATIONS 266 * MMU EXTENDED OPERATIONS
129 * 267 *
130 * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. 268 * enum neg_errnoval HYPERVISOR_mmuext_op(mmuext_op_t uops[],
269 * unsigned int count,
270 * unsigned int *pdone,
271 * unsigned int foreigndom)
272 */
273/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
131 * A foreigndom (FD) can be specified (or DOMID_SELF for none). 274 * A foreigndom (FD) can be specified (or DOMID_SELF for none).
132 * Where the FD has some effect, it is described below. 275 * Where the FD has some effect, it is described below.
133 * 276 *
@@ -164,9 +307,23 @@
164 * cmd: MMUEXT_FLUSH_CACHE 307 * cmd: MMUEXT_FLUSH_CACHE
165 * No additional arguments. Writes back and flushes cache contents. 308 * No additional arguments. Writes back and flushes cache contents.
166 * 309 *
310 * cmd: MMUEXT_FLUSH_CACHE_GLOBAL
311 * No additional arguments. Writes back and flushes cache contents
312 * on all CPUs in the system.
313 *
167 * cmd: MMUEXT_SET_LDT 314 * cmd: MMUEXT_SET_LDT
168 * linear_addr: Linear address of LDT base (NB. must be page-aligned). 315 * linear_addr: Linear address of LDT base (NB. must be page-aligned).
169 * nr_ents: Number of entries in LDT. 316 * nr_ents: Number of entries in LDT.
317 *
318 * cmd: MMUEXT_CLEAR_PAGE
319 * mfn: Machine frame number to be cleared.
320 *
321 * cmd: MMUEXT_COPY_PAGE
322 * mfn: Machine frame number of the destination page.
323 * src_mfn: Machine frame number of the source page.
324 *
325 * cmd: MMUEXT_[UN]MARK_SUPER
326 * mfn: Machine frame number of head of superpage to be [un]marked.
170 */ 327 */
171#define MMUEXT_PIN_L1_TABLE 0 328#define MMUEXT_PIN_L1_TABLE 0
172#define MMUEXT_PIN_L2_TABLE 1 329#define MMUEXT_PIN_L2_TABLE 1
@@ -183,12 +340,18 @@
183#define MMUEXT_FLUSH_CACHE 12 340#define MMUEXT_FLUSH_CACHE 12
184#define MMUEXT_SET_LDT 13 341#define MMUEXT_SET_LDT 13
185#define MMUEXT_NEW_USER_BASEPTR 15 342#define MMUEXT_NEW_USER_BASEPTR 15
343#define MMUEXT_CLEAR_PAGE 16
344#define MMUEXT_COPY_PAGE 17
345#define MMUEXT_FLUSH_CACHE_GLOBAL 18
346#define MMUEXT_MARK_SUPER 19
347#define MMUEXT_UNMARK_SUPER 20
186 348
187#ifndef __ASSEMBLY__ 349#ifndef __ASSEMBLY__
188struct mmuext_op { 350struct mmuext_op {
189 unsigned int cmd; 351 unsigned int cmd;
190 union { 352 union {
191 /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ 353 /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
354 * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
192 xen_pfn_t mfn; 355 xen_pfn_t mfn;
193 /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ 356 /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
194 unsigned long linear_addr; 357 unsigned long linear_addr;
@@ -198,6 +361,8 @@ struct mmuext_op {
198 unsigned int nr_ents; 361 unsigned int nr_ents;
199 /* TLB_FLUSH_MULTI, INVLPG_MULTI */ 362 /* TLB_FLUSH_MULTI, INVLPG_MULTI */
200 void *vcpumask; 363 void *vcpumask;
364 /* COPY_PAGE */
365 xen_pfn_t src_mfn;
201 } arg2; 366 } arg2;
202}; 367};
203DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); 368DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
@@ -225,10 +390,23 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
225 */ 390 */
226#define VMASST_CMD_enable 0 391#define VMASST_CMD_enable 0
227#define VMASST_CMD_disable 1 392#define VMASST_CMD_disable 1
393
394/* x86/32 guests: simulate full 4GB segment limits. */
228#define VMASST_TYPE_4gb_segments 0 395#define VMASST_TYPE_4gb_segments 0
396
397/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
229#define VMASST_TYPE_4gb_segments_notify 1 398#define VMASST_TYPE_4gb_segments_notify 1
399
400/*
401 * x86 guests: support writes to bottom-level PTEs.
402 * NB1. Page-directory entries cannot be written.
403 * NB2. Guest must continue to remove all writable mappings of PTEs.
404 */
230#define VMASST_TYPE_writable_pagetables 2 405#define VMASST_TYPE_writable_pagetables 2
406
407/* x86/PAE guests: support PDPTs above 4GB. */
231#define VMASST_TYPE_pae_extended_cr3 3 408#define VMASST_TYPE_pae_extended_cr3 3
409
232#define MAX_VMASST_TYPE 3 410#define MAX_VMASST_TYPE 3
233 411
234#ifndef __ASSEMBLY__ 412#ifndef __ASSEMBLY__
@@ -260,6 +438,15 @@ typedef uint16_t domid_t;
260 */ 438 */
261#define DOMID_XEN (0x7FF2U) 439#define DOMID_XEN (0x7FF2U)
262 440
441/* DOMID_COW is used as the owner of sharable pages */
442#define DOMID_COW (0x7FF3U)
443
444/* DOMID_INVALID is used to identify pages with unknown owner. */
445#define DOMID_INVALID (0x7FF4U)
446
447/* Idle domain. */
448#define DOMID_IDLE (0x7FFFU)
449
263/* 450/*
264 * Send an array of these to HYPERVISOR_mmu_update(). 451 * Send an array of these to HYPERVISOR_mmu_update().
265 * NB. The fields are natural pointer/address size for this architecture. 452 * NB. The fields are natural pointer/address size for this architecture.
@@ -272,7 +459,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
272 459
273/* 460/*
274 * Send an array of these to HYPERVISOR_multicall(). 461 * Send an array of these to HYPERVISOR_multicall().
275 * NB. The fields are natural register size for this architecture. 462 * NB. The fields are logically the natural register size for this
463 * architecture. In cases where xen_ulong_t is larger than this then
464 * any unused bits in the upper portion must be zero.
276 */ 465 */
277struct multicall_entry { 466struct multicall_entry {
278 xen_ulong_t op; 467 xen_ulong_t op;
@@ -442,8 +631,48 @@ struct start_info {
442 unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ 631 unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
443 unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ 632 unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
444 int8_t cmd_line[MAX_GUEST_CMDLINE]; 633 int8_t cmd_line[MAX_GUEST_CMDLINE];
634 /* The pfn range here covers both page table and p->m table frames. */
635 unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
636 unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
445}; 637};
446 638
639/* These flags are passed in the 'flags' field of start_info_t. */
640#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
641#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
642#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
643#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
644#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
645
646/*
647 * A multiboot module is a package containing modules very similar to a
648 * multiboot module array. The only differences are:
649 * - the array of module descriptors is by convention simply at the beginning
650 * of the multiboot module,
651 * - addresses in the module descriptors are based on the beginning of the
652 * multiboot module,
653 * - the number of modules is determined by a termination descriptor that has
654 * mod_start == 0.
655 *
656 * This permits to both build it statically and reference it in a configuration
657 * file, and let the PV guest easily rebase the addresses to virtual addresses
658 * and at the same time count the number of modules.
659 */
660struct xen_multiboot_mod_list {
661 /* Address of first byte of the module */
662 uint32_t mod_start;
663 /* Address of last byte of the module (inclusive) */
664 uint32_t mod_end;
665 /* Address of zero-terminated command line */
666 uint32_t cmdline;
667 /* Unused, must be zero */
668 uint32_t pad;
669};
670/*
671 * The console structure in start_info.console.dom0
672 *
673 * This structure includes a variety of information required to
674 * have a working VGA/VESA console.
675 */
447struct dom0_vga_console_info { 676struct dom0_vga_console_info {
448 uint8_t video_type; 677 uint8_t video_type;
449#define XEN_VGATYPE_TEXT_MODE_3 0x03 678#define XEN_VGATYPE_TEXT_MODE_3 0x03
@@ -484,11 +713,6 @@ struct dom0_vga_console_info {
484 } u; 713 } u;
485}; 714};
486 715
487/* These flags are passed in the 'flags' field of start_info_t. */
488#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
489#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
490#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
491
492typedef uint64_t cpumap_t; 716typedef uint64_t cpumap_t;
493 717
494typedef uint8_t xen_domain_handle_t[16]; 718typedef uint8_t xen_domain_handle_t[16];
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 0324c6d340c1..b78f21caf55a 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -86,6 +86,7 @@ struct xenbus_device_id
86 86
87/* A xenbus driver. */ 87/* A xenbus driver. */
88struct xenbus_driver { 88struct xenbus_driver {
89 const char *name; /* defaults to ids[0].devicetype */
89 const struct xenbus_device_id *ids; 90 const struct xenbus_device_id *ids;
90 int (*probe)(struct xenbus_device *dev, 91 int (*probe)(struct xenbus_device *dev,
91 const struct xenbus_device_id *id); 92 const struct xenbus_device_id *id);
@@ -100,20 +101,22 @@ struct xenbus_driver {
100 int (*is_ready)(struct xenbus_device *dev); 101 int (*is_ready)(struct xenbus_device *dev);
101}; 102};
102 103
103#define DEFINE_XENBUS_DRIVER(var, drvname, methods...) \
104struct xenbus_driver var ## _driver = { \
105 .driver.name = drvname + 0 ?: var ## _ids->devicetype, \
106 .driver.owner = THIS_MODULE, \
107 .ids = var ## _ids, ## methods \
108}
109
110static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) 104static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
111{ 105{
112 return container_of(drv, struct xenbus_driver, driver); 106 return container_of(drv, struct xenbus_driver, driver);
113} 107}
114 108
115int __must_check xenbus_register_frontend(struct xenbus_driver *); 109int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
116int __must_check xenbus_register_backend(struct xenbus_driver *); 110 struct module *owner,
111 const char *mod_name);
112int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
113 struct module *owner,
114 const char *mod_name);
115
116#define xenbus_register_frontend(drv) \
117 __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
118#define xenbus_register_backend(drv) \
119 __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
117 120
118void xenbus_unregister_driver(struct xenbus_driver *drv); 121void xenbus_unregister_driver(struct xenbus_driver *drv);
119 122