diff options
80 files changed, 5088 insertions, 937 deletions
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt new file mode 100644 index 000000000000..c2321903aa09 --- /dev/null +++ b/Documentation/Intel-IOMMU.txt | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | Linux IOMMU Support | ||
| 2 | =================== | ||
| 3 | |||
| 4 | The architecture spec can be obtained from the below location. | ||
| 5 | |||
| 6 | http://www.intel.com/technology/virtualization/ | ||
| 7 | |||
| 8 | This guide gives a quick cheat sheet for some basic understanding. | ||
| 9 | |||
| 10 | Some Keywords | ||
| 11 | |||
| 12 | DMAR - DMA remapping | ||
| 13 | DRHD - DMA Engine Reporting Structure | ||
| 14 | RMRR - Reserved memory Region Reporting Structure | ||
| 15 | ZLR - Zero length reads from PCI devices | ||
| 16 | IOVA - IO Virtual address. | ||
| 17 | |||
| 18 | Basic stuff | ||
| 19 | ----------- | ||
| 20 | |||
| 21 | ACPI enumerates and lists the different DMA engines in the platform, and | ||
| 22 | device scope relationships between PCI devices and which DMA engine controls | ||
| 23 | them. | ||
| 24 | |||
| 25 | What is RMRR? | ||
| 26 | ------------- | ||
| 27 | |||
| 28 | There are some devices the BIOS controls, for e.g USB devices to perform | ||
| 29 | PS2 emulation. The regions of memory used for these devices are marked | ||
| 30 | reserved in the e820 map. When we turn on DMA translation, DMA to those | ||
| 31 | regions will fail. Hence BIOS uses RMRR to specify these regions along with | ||
| 32 | devices that need to access these regions. OS is expected to setup | ||
| 33 | unity mappings for these regions for these devices to access these regions. | ||
| 34 | |||
| 35 | How is IOVA generated? | ||
| 36 | --------------------- | ||
| 37 | |||
| 38 | Well behaved drivers call pci_map_*() calls before sending command to device | ||
| 39 | that needs to perform DMA. Once DMA is completed and mapping is no longer | ||
| 40 | required, device performs a pci_unmap_*() calls to unmap the region. | ||
| 41 | |||
| 42 | The Intel IOMMU driver allocates a virtual address per domain. Each PCIE | ||
| 43 | device has its own domain (hence protection). Devices under p2p bridges | ||
| 44 | share the virtual address with all devices under the p2p bridge due to | ||
| 45 | transaction id aliasing for p2p bridges. | ||
| 46 | |||
| 47 | IOVA generation is pretty generic. We used the same technique as vmalloc() | ||
| 48 | but these are not global address spaces, but separate for each domain. | ||
| 49 | Different DMA engines may support different number of domains. | ||
| 50 | |||
| 51 | We also allocate gaurd pages with each mapping, so we can attempt to catch | ||
| 52 | any overflow that might happen. | ||
| 53 | |||
| 54 | |||
| 55 | Graphics Problems? | ||
| 56 | ------------------ | ||
| 57 | If you encounter issues with graphics devices, you can try adding | ||
| 58 | option intel_iommu=igfx_off to turn off the integrated graphics engine. | ||
| 59 | |||
| 60 | If it happens to be a PCI device included in the INCLUDE_ALL Engine, | ||
| 61 | then try enabling CONFIG_DMAR_GFX_WA to setup a 1-1 map. We hear | ||
| 62 | graphics drivers may be in process of using DMA api's in the near | ||
| 63 | future and at that time this option can be yanked out. | ||
| 64 | |||
| 65 | Some exceptions to IOVA | ||
| 66 | ----------------------- | ||
| 67 | Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff). | ||
| 68 | The same is true for peer to peer transactions. Hence we reserve the | ||
| 69 | address from PCI MMIO ranges so they are not allocated for IOVA addresses. | ||
| 70 | |||
| 71 | |||
| 72 | Fault reporting | ||
| 73 | --------------- | ||
| 74 | When errors are reported, the DMA engine signals via an interrupt. The fault | ||
| 75 | reason and device that caused it with fault reason is printed on console. | ||
| 76 | |||
| 77 | See below for sample. | ||
| 78 | |||
| 79 | |||
| 80 | Boot Message Sample | ||
| 81 | ------------------- | ||
| 82 | |||
| 83 | Something like this gets printed indicating presence of DMAR tables | ||
| 84 | in ACPI. | ||
| 85 | |||
| 86 | ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0 | ||
| 87 | |||
| 88 | When DMAR is being processed and initialized by ACPI, prints DMAR locations | ||
| 89 | and any RMRR's processed. | ||
| 90 | |||
| 91 | ACPI DMAR:Host address width 36 | ||
| 92 | ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000 | ||
| 93 | ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000 | ||
| 94 | ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000 | ||
| 95 | ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff | ||
| 96 | ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff | ||
| 97 | |||
| 98 | When DMAR is enabled for use, you will notice.. | ||
| 99 | |||
| 100 | PCI-DMA: Using DMAR IOMMU | ||
| 101 | |||
| 102 | Fault reporting | ||
| 103 | --------------- | ||
| 104 | |||
| 105 | DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000 | ||
| 106 | DMAR:[fault reason 05] PTE Write access is not set | ||
| 107 | DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000 | ||
| 108 | DMAR:[fault reason 05] PTE Write access is not set | ||
| 109 | |||
| 110 | TBD | ||
| 111 | ---- | ||
| 112 | |||
| 113 | - For compatibility testing, could use unity map domain for all devices, just | ||
| 114 | provide a 1-1 for all useful memory under a single domain for all devices. | ||
| 115 | - API for paravirt ops for abstracting functionlity for VMM folks. | ||
diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/Exporting index 31047e0fe14b..87019d2b5981 100644 --- a/Documentation/filesystems/Exporting +++ b/Documentation/filesystems/Exporting | |||
| @@ -2,9 +2,12 @@ | |||
| 2 | Making Filesystems Exportable | 2 | Making Filesystems Exportable |
| 3 | ============================= | 3 | ============================= |
| 4 | 4 | ||
| 5 | Most filesystem operations require a dentry (or two) as a starting | 5 | Overview |
| 6 | -------- | ||
| 7 | |||
| 8 | All filesystem operations require a dentry (or two) as a starting | ||
| 6 | point. Local applications have a reference-counted hold on suitable | 9 | point. Local applications have a reference-counted hold on suitable |
| 7 | dentrys via open file descriptors or cwd/root. However remote | 10 | dentries via open file descriptors or cwd/root. However remote |
| 8 | applications that access a filesystem via a remote filesystem protocol | 11 | applications that access a filesystem via a remote filesystem protocol |
| 9 | such as NFS may not be able to hold such a reference, and so need a | 12 | such as NFS may not be able to hold such a reference, and so need a |
| 10 | different way to refer to a particular dentry. As the alternative | 13 | different way to refer to a particular dentry. As the alternative |
| @@ -13,14 +16,14 @@ server-reboot (among other things, though these tend to be the most | |||
| 13 | problematic), there is no simple answer like 'filename'. | 16 | problematic), there is no simple answer like 'filename'. |
| 14 | 17 | ||
| 15 | The mechanism discussed here allows each filesystem implementation to | 18 | The mechanism discussed here allows each filesystem implementation to |
| 16 | specify how to generate an opaque (out side of the filesystem) byte | 19 | specify how to generate an opaque (outside of the filesystem) byte |
| 17 | string for any dentry, and how to find an appropriate dentry for any | 20 | string for any dentry, and how to find an appropriate dentry for any |
| 18 | given opaque byte string. | 21 | given opaque byte string. |
| 19 | This byte string will be called a "filehandle fragment" as it | 22 | This byte string will be called a "filehandle fragment" as it |
| 20 | corresponds to part of an NFS filehandle. | 23 | corresponds to part of an NFS filehandle. |
| 21 | 24 | ||
| 22 | A filesystem which supports the mapping between filehandle fragments | 25 | A filesystem which supports the mapping between filehandle fragments |
| 23 | and dentrys will be termed "exportable". | 26 | and dentries will be termed "exportable". |
| 24 | 27 | ||
| 25 | 28 | ||
| 26 | 29 | ||
| @@ -89,11 +92,9 @@ For a filesystem to be exportable it must: | |||
| 89 | 1/ provide the filehandle fragment routines described below. | 92 | 1/ provide the filehandle fragment routines described below. |
| 90 | 2/ make sure that d_splice_alias is used rather than d_add | 93 | 2/ make sure that d_splice_alias is used rather than d_add |
| 91 | when ->lookup finds an inode for a given parent and name. | 94 | when ->lookup finds an inode for a given parent and name. |
| 92 | Typically the ->lookup routine will end: | 95 | Typically the ->lookup routine will end with a: |
| 93 | if (inode) | 96 | |
| 94 | return d_splice(inode, dentry); | 97 | return d_splice_alias(inode, dentry); |
| 95 | d_add(dentry, inode); | ||
| 96 | return NULL; | ||
| 97 | } | 98 | } |
| 98 | 99 | ||
| 99 | 100 | ||
| @@ -101,67 +102,39 @@ For a filesystem to be exportable it must: | |||
| 101 | A file system implementation declares that instances of the filesystem | 102 | A file system implementation declares that instances of the filesystem |
| 102 | are exportable by setting the s_export_op field in the struct | 103 | are exportable by setting the s_export_op field in the struct |
| 103 | super_block. This field must point to a "struct export_operations" | 104 | super_block. This field must point to a "struct export_operations" |
| 104 | struct which could potentially be full of NULLs, though normally at | 105 | struct which has the following members: |
| 105 | least get_parent will be set. | 106 | |
| 106 | 107 | encode_fh (optional) | |
| 107 | The primary operations are decode_fh and encode_fh. | 108 | Takes a dentry and creates a filehandle fragment which can later be used |
| 108 | decode_fh takes a filehandle fragment and tries to find or create a | 109 | to find or create a dentry for the same object. The default |
| 109 | dentry for the object referred to by the filehandle. | 110 | implementation creates a filehandle fragment that encodes a 32bit inode |
| 110 | encode_fh takes a dentry and creates a filehandle fragment which can | 111 | and generation number for the inode encoded, and if necessary the |
| 111 | later be used to find/create a dentry for the same object. | 112 | same information for the parent. |
| 112 | 113 | ||
| 113 | decode_fh will probably make use of "find_exported_dentry". | 114 | fh_to_dentry (mandatory) |
| 114 | This function lives in the "exportfs" module which a filesystem does | 115 | Given a filehandle fragment, this should find the implied object and |
| 115 | not need unless it is being exported. So rather that calling | 116 | create a dentry for it (possibly with d_alloc_anon). |
| 116 | find_exported_dentry directly, each filesystem should call it through | 117 | |
| 117 | the find_exported_dentry pointer in it's export_operations table. | 118 | fh_to_parent (optional but strongly recommended) |
| 118 | This field is set correctly by the exporting agent (e.g. nfsd) when a | 119 | Given a filehandle fragment, this should find the parent of the |
| 119 | filesystem is exported, and before any export operations are called. | 120 | implied object and create a dentry for it (possibly with d_alloc_anon). |
| 120 | 121 | May fail if the filehandle fragment is too small. | |
| 121 | find_exported_dentry needs three support functions from the | 122 | |
| 122 | filesystem: | 123 | get_parent (optional but strongly recommended) |
| 123 | get_name. When given a parent dentry and a child dentry, this | 124 | When given a dentry for a directory, this should return a dentry for |
| 124 | should find a name in the directory identified by the parent | 125 | the parent. Quite possibly the parent dentry will have been allocated |
| 125 | dentry, which leads to the object identified by the child dentry. | 126 | by d_alloc_anon. The default get_parent function just returns an error |
| 126 | If no get_name function is supplied, a default implementation is | 127 | so any filehandle lookup that requires finding a parent will fail. |
| 127 | provided which uses vfs_readdir to find potential names, and | 128 | ->lookup("..") is *not* used as a default as it can leave ".." entries |
| 128 | matches inode numbers to find the correct match. | 129 | in the dcache which are too messy to work with. |
| 129 | 130 | ||
| 130 | get_parent. When given a dentry for a directory, this should return | 131 | get_name (optional) |
| 131 | a dentry for the parent. Quite possibly the parent dentry will | 132 | When given a parent dentry and a child dentry, this should find a name |
| 132 | have been allocated by d_alloc_anon. | 133 | in the directory identified by the parent dentry, which leads to the |
| 133 | The default get_parent function just returns an error so any | 134 | object identified by the child dentry. If no get_name function is |
| 134 | filehandle lookup that requires finding a parent will fail. | 135 | supplied, a default implementation is provided which uses vfs_readdir |
| 135 | ->lookup("..") is *not* used as a default as it can leave ".." | 136 | to find potential names, and matches inode numbers to find the correct |
| 136 | entries in the dcache which are too messy to work with. | 137 | match. |
| 137 | |||
| 138 | get_dentry. When given an opaque datum, this should find the | ||
| 139 | implied object and create a dentry for it (possibly with | ||
| 140 | d_alloc_anon). | ||
| 141 | The opaque datum is whatever is passed down by the decode_fh | ||
| 142 | function, and is often simply a fragment of the filehandle | ||
| 143 | fragment. | ||
| 144 | decode_fh passes two datums through find_exported_dentry. One that | ||
| 145 | should be used to identify the target object, and one that can be | ||
| 146 | used to identify the object's parent, should that be necessary. | ||
| 147 | The default get_dentry function assumes that the datum contains an | ||
| 148 | inode number and a generation number, and it attempts to get the | ||
| 149 | inode using "iget" and check it's validity by matching the | ||
| 150 | generation number. A filesystem should only depend on the default | ||
| 151 | if iget can safely be used this way. | ||
| 152 | |||
| 153 | If decode_fh and/or encode_fh are left as NULL, then default | ||
| 154 | implementations are used. These defaults are suitable for ext2 and | ||
| 155 | extremely similar filesystems (like ext3). | ||
| 156 | |||
| 157 | The default encode_fh creates a filehandle fragment from the inode | ||
| 158 | number and generation number of the target together with the inode | ||
| 159 | number and generation number of the parent (if the parent is | ||
| 160 | required). | ||
| 161 | |||
| 162 | The default decode_fh extract the target and parent datums from the | ||
| 163 | filehandle assuming the format used by the default encode_fh and | ||
| 164 | passed them to find_exported_dentry. | ||
| 165 | 138 | ||
| 166 | 139 | ||
| 167 | A filehandle fragment consists of an array of 1 or more 4byte words, | 140 | A filehandle fragment consists of an array of 1 or more 4byte words, |
| @@ -172,5 +145,3 @@ generated by encode_fh, in which case it will have been padded with | |||
| 172 | nuls. Rather, the encode_fh routine should choose a "type" which | 145 | nuls. Rather, the encode_fh routine should choose a "type" which |
| 173 | indicates the decode_fh how much of the filehandle is valid, and how | 146 | indicates the decode_fh how much of the filehandle is valid, and how |
| 174 | it should be interpreted. | 147 | it should be interpreted. |
| 175 | |||
| 176 | |||
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 35985b34d5a6..2f75e750e4f5 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt | |||
| @@ -168,6 +168,8 @@ Offset Proto Name Meaning | |||
| 168 | 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not | 168 | 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not |
| 169 | 0235/3 N/A pad2 Unused | 169 | 0235/3 N/A pad2 Unused |
| 170 | 0238/4 2.06+ cmdline_size Maximum size of the kernel command line | 170 | 0238/4 2.06+ cmdline_size Maximum size of the kernel command line |
| 171 | 023C/4 2.07+ hardware_subarch Hardware subarchitecture | ||
| 172 | 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data | ||
| 171 | 173 | ||
| 172 | (1) For backwards compatibility, if the setup_sects field contains 0, the | 174 | (1) For backwards compatibility, if the setup_sects field contains 0, the |
| 173 | real value is 4. | 175 | real value is 4. |
| @@ -204,7 +206,7 @@ boot loaders can ignore those fields. | |||
| 204 | 206 | ||
| 205 | The byte order of all fields is littleendian (this is x86, after all.) | 207 | The byte order of all fields is littleendian (this is x86, after all.) |
| 206 | 208 | ||
| 207 | Field name: setup_secs | 209 | Field name: setup_sects |
| 208 | Type: read | 210 | Type: read |
| 209 | Offset/size: 0x1f1/1 | 211 | Offset/size: 0x1f1/1 |
| 210 | Protocol: ALL | 212 | Protocol: ALL |
| @@ -356,6 +358,13 @@ Protocol: 2.00+ | |||
| 356 | - If 0, the protected-mode code is loaded at 0x10000. | 358 | - If 0, the protected-mode code is loaded at 0x10000. |
| 357 | - If 1, the protected-mode code is loaded at 0x100000. | 359 | - If 1, the protected-mode code is loaded at 0x100000. |
| 358 | 360 | ||
| 361 | Bit 6 (write): KEEP_SEGMENTS | ||
| 362 | Protocol: 2.07+ | ||
| 363 | - if 0, reload the segment registers in the 32bit entry point. | ||
| 364 | - if 1, do not reload the segment registers in the 32bit entry point. | ||
| 365 | Assume that %cs %ds %ss %es are all set to flat segments with | ||
| 366 | a base of 0 (or the equivalent for their environment). | ||
| 367 | |||
| 359 | Bit 7 (write): CAN_USE_HEAP | 368 | Bit 7 (write): CAN_USE_HEAP |
| 360 | Set this bit to 1 to indicate that the value entered in the | 369 | Set this bit to 1 to indicate that the value entered in the |
| 361 | heap_end_ptr is valid. If this field is clear, some setup code | 370 | heap_end_ptr is valid. If this field is clear, some setup code |
| @@ -480,6 +489,29 @@ Protocol: 2.06+ | |||
| 480 | cmdline_size characters. With protocol version 2.05 and earlier, the | 489 | cmdline_size characters. With protocol version 2.05 and earlier, the |
| 481 | maximum size was 255. | 490 | maximum size was 255. |
| 482 | 491 | ||
| 492 | Field name: hardware_subarch | ||
| 493 | Type: write | ||
| 494 | Offset/size: 0x23c/4 | ||
| 495 | Protocol: 2.07+ | ||
| 496 | |||
| 497 | In a paravirtualized environment the hardware low level architectural | ||
| 498 | pieces such as interrupt handling, page table handling, and | ||
| 499 | accessing process control registers needs to be done differently. | ||
| 500 | |||
| 501 | This field allows the bootloader to inform the kernel we are in one | ||
| 502 | one of those environments. | ||
| 503 | |||
| 504 | 0x00000000 The default x86/PC environment | ||
| 505 | 0x00000001 lguest | ||
| 506 | 0x00000002 Xen | ||
| 507 | |||
| 508 | Field name: hardware_subarch_data | ||
| 509 | Type: write | ||
| 510 | Offset/size: 0x240/8 | ||
| 511 | Protocol: 2.07+ | ||
| 512 | |||
| 513 | A pointer to data that is specific to hardware subarch | ||
| 514 | |||
| 483 | 515 | ||
| 484 | **** THE KERNEL COMMAND LINE | 516 | **** THE KERNEL COMMAND LINE |
| 485 | 517 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6accd360da73..b2361667839f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -772,6 +772,23 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 772 | 772 | ||
| 773 | inttest= [IA64] | 773 | inttest= [IA64] |
| 774 | 774 | ||
| 775 | intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option | ||
| 776 | off | ||
| 777 | Disable intel iommu driver. | ||
| 778 | igfx_off [Default Off] | ||
| 779 | By default, gfx is mapped as normal device. If a gfx | ||
| 780 | device has a dedicated DMAR unit, the DMAR unit is | ||
| 781 | bypassed by not enabling DMAR with this option. In | ||
| 782 | this case, gfx device will use physical address for | ||
| 783 | DMA. | ||
| 784 | forcedac [x86_64] | ||
| 785 | With this option iommu will not optimize to look | ||
| 786 | for io virtual address below 32 bit forcing dual | ||
| 787 | address cycle on pci bus for cards supporting greater | ||
| 788 | than 32 bit addressing. The default is to look | ||
| 789 | for translation below 32 bit and if not available | ||
| 790 | then look in the higher range. | ||
| 791 | |||
| 775 | io7= [HW] IO7 for Marvel based alpha systems | 792 | io7= [HW] IO7 for Marvel based alpha systems |
| 776 | See comment before marvel_specify_io7 in | 793 | See comment before marvel_specify_io7 in |
| 777 | arch/alpha/kernel/core_marvel.c. | 794 | arch/alpha/kernel/core_marvel.c. |
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 5fbcc22c98e9..168117bd6ee8 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt | |||
| @@ -2,7 +2,8 @@ | |||
| 2 | Memory Hotplug | 2 | Memory Hotplug |
| 3 | ============== | 3 | ============== |
| 4 | 4 | ||
| 5 | Last Updated: Jul 28 2007 | 5 | Created: Jul 28 2007 |
| 6 | Add description of notifier of memory hotplug Oct 11 2007 | ||
| 6 | 7 | ||
| 7 | This document is about memory hotplug including how-to-use and current status. | 8 | This document is about memory hotplug including how-to-use and current status. |
| 8 | Because Memory Hotplug is still under development, contents of this text will | 9 | Because Memory Hotplug is still under development, contents of this text will |
| @@ -24,7 +25,8 @@ be changed often. | |||
| 24 | 6.1 Memory offline and ZONE_MOVABLE | 25 | 6.1 Memory offline and ZONE_MOVABLE |
| 25 | 6.2. How to offline memory | 26 | 6.2. How to offline memory |
| 26 | 7. Physical memory remove | 27 | 7. Physical memory remove |
| 27 | 8. Future Work List | 28 | 8. Memory hotplug event notifier |
| 29 | 9. Future Work List | ||
| 28 | 30 | ||
| 29 | Note(1): x86_64's has special implementation for memory hotplug. | 31 | Note(1): x86_64's has special implementation for memory hotplug. |
| 30 | This text does not describe it. | 32 | This text does not describe it. |
| @@ -307,8 +309,58 @@ Need more implementation yet.... | |||
| 307 | - Notification completion of remove works by OS to firmware. | 309 | - Notification completion of remove works by OS to firmware. |
| 308 | - Guard from remove if not yet. | 310 | - Guard from remove if not yet. |
| 309 | 311 | ||
| 312 | -------------------------------- | ||
| 313 | 8. Memory hotplug event notifier | ||
| 314 | -------------------------------- | ||
| 315 | Memory hotplug has event notifer. There are 6 types of notification. | ||
| 316 | |||
| 317 | MEMORY_GOING_ONLINE | ||
| 318 | Generated before new memory becomes available in order to be able to | ||
| 319 | prepare subsystems to handle memory. The page allocator is still unable | ||
| 320 | to allocate from the new memory. | ||
| 321 | |||
| 322 | MEMORY_CANCEL_ONLINE | ||
| 323 | Generated if MEMORY_GOING_ONLINE fails. | ||
| 324 | |||
| 325 | MEMORY_ONLINE | ||
| 326 | Generated when memory has succesfully brought online. The callback may | ||
| 327 | allocate pages from the new memory. | ||
| 328 | |||
| 329 | MEMORY_GOING_OFFLINE | ||
| 330 | Generated to begin the process of offlining memory. Allocations are no | ||
| 331 | longer possible from the memory but some of the memory to be offlined | ||
| 332 | is still in use. The callback can be used to free memory known to a | ||
| 333 | subsystem from the indicated memory section. | ||
| 334 | |||
| 335 | MEMORY_CANCEL_OFFLINE | ||
| 336 | Generated if MEMORY_GOING_OFFLINE fails. Memory is available again from | ||
| 337 | the section that we attempted to offline. | ||
| 338 | |||
| 339 | MEMORY_OFFLINE | ||
| 340 | Generated after offlining memory is complete. | ||
| 341 | |||
| 342 | A callback routine can be registered by | ||
| 343 | hotplug_memory_notifier(callback_func, priority) | ||
| 344 | |||
| 345 | The second argument of callback function (action) is event types of above. | ||
| 346 | The third argument is passed by pointer of struct memory_notify. | ||
| 347 | |||
| 348 | struct memory_notify { | ||
| 349 | unsigned long start_pfn; | ||
| 350 | unsigned long nr_pages; | ||
| 351 | int status_cahnge_nid; | ||
| 352 | } | ||
| 353 | |||
| 354 | start_pfn is start_pfn of online/offline memory. | ||
| 355 | nr_pages is # of pages of online/offline memory. | ||
| 356 | status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be) | ||
| 357 | set/clear. It means a new(memoryless) node gets new memory by online and a | ||
| 358 | node loses all memory. If this is -1, then nodemask status is not changed. | ||
| 359 | If status_changed_nid >= 0, callback should create/discard structures for the | ||
| 360 | node if necessary. | ||
| 361 | |||
| 310 | -------------- | 362 | -------------- |
| 311 | 8. Future Work | 363 | 9. Future Work |
| 312 | -------------- | 364 | -------------- |
| 313 | - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like | 365 | - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like |
| 314 | sysctl or new control file. | 366 | sysctl or new control file. |
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 8e4894b205e2..3f7ea13358e9 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c | |||
| @@ -1090,7 +1090,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e) | |||
| 1090 | 1090 | ||
| 1091 | void | 1091 | void |
| 1092 | efi_initialize_iomem_resources(struct resource *code_resource, | 1092 | efi_initialize_iomem_resources(struct resource *code_resource, |
| 1093 | struct resource *data_resource) | 1093 | struct resource *data_resource, |
| 1094 | struct resource *bss_resource) | ||
| 1094 | { | 1095 | { |
| 1095 | struct resource *res; | 1096 | struct resource *res; |
| 1096 | void *efi_map_start, *efi_map_end, *p; | 1097 | void *efi_map_start, *efi_map_end, *p; |
| @@ -1171,6 +1172,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, | |||
| 1171 | */ | 1172 | */ |
| 1172 | insert_resource(res, code_resource); | 1173 | insert_resource(res, code_resource); |
| 1173 | insert_resource(res, data_resource); | 1174 | insert_resource(res, data_resource); |
| 1175 | insert_resource(res, bss_resource); | ||
| 1174 | #ifdef CONFIG_KEXEC | 1176 | #ifdef CONFIG_KEXEC |
| 1175 | insert_resource(res, &efi_memmap_res); | 1177 | insert_resource(res, &efi_memmap_res); |
| 1176 | insert_resource(res, &boot_param_res); | 1178 | insert_resource(res, &boot_param_res); |
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index cbf67f1aa291..ae6c3c02e117 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
| @@ -90,7 +90,12 @@ static struct resource code_resource = { | |||
| 90 | .name = "Kernel code", | 90 | .name = "Kernel code", |
| 91 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 91 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
| 92 | }; | 92 | }; |
| 93 | extern char _text[], _end[], _etext[]; | 93 | |
| 94 | static struct resource bss_resource = { | ||
| 95 | .name = "Kernel bss", | ||
| 96 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
| 97 | }; | ||
| 98 | extern char _text[], _end[], _etext[], _edata[], _bss[]; | ||
| 94 | 99 | ||
| 95 | unsigned long ia64_max_cacheline_size; | 100 | unsigned long ia64_max_cacheline_size; |
| 96 | 101 | ||
| @@ -200,8 +205,11 @@ static int __init register_memory(void) | |||
| 200 | code_resource.start = ia64_tpa(_text); | 205 | code_resource.start = ia64_tpa(_text); |
| 201 | code_resource.end = ia64_tpa(_etext) - 1; | 206 | code_resource.end = ia64_tpa(_etext) - 1; |
| 202 | data_resource.start = ia64_tpa(_etext); | 207 | data_resource.start = ia64_tpa(_etext); |
| 203 | data_resource.end = ia64_tpa(_end) - 1; | 208 | data_resource.end = ia64_tpa(_edata) - 1; |
| 204 | efi_initialize_iomem_resources(&code_resource, &data_resource); | 209 | bss_resource.start = ia64_tpa(_bss); |
| 210 | bss_resource.end = ia64_tpa(_end) - 1; | ||
| 211 | efi_initialize_iomem_resources(&code_resource, &data_resource, | ||
| 212 | &bss_resource); | ||
| 205 | 213 | ||
| 206 | return 0; | 214 | return 0; |
| 207 | } | 215 | } |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f35ea2237522..a0ae2e7f6cec 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
| @@ -27,13 +27,22 @@ | |||
| 27 | #include <asm/segment.h> | 27 | #include <asm/segment.h> |
| 28 | #include <asm/page.h> | 28 | #include <asm/page.h> |
| 29 | #include <asm/boot.h> | 29 | #include <asm/boot.h> |
| 30 | #include <asm/asm-offsets.h> | ||
| 30 | 31 | ||
| 31 | .section ".text.head","ax",@progbits | 32 | .section ".text.head","ax",@progbits |
| 32 | .globl startup_32 | 33 | .globl startup_32 |
| 33 | 34 | ||
| 34 | startup_32: | 35 | startup_32: |
| 35 | cld | 36 | /* check to see if KEEP_SEGMENTS flag is meaningful */ |
| 36 | cli | 37 | cmpw $0x207, BP_version(%esi) |
| 38 | jb 1f | ||
| 39 | |||
| 40 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | ||
| 41 | * us to not reload segments */ | ||
| 42 | testb $(1<<6), BP_loadflags(%esi) | ||
| 43 | jnz 2f | ||
| 44 | |||
| 45 | 1: cli | ||
| 37 | movl $(__BOOT_DS),%eax | 46 | movl $(__BOOT_DS),%eax |
| 38 | movl %eax,%ds | 47 | movl %eax,%ds |
| 39 | movl %eax,%es | 48 | movl %eax,%es |
| @@ -41,6 +50,8 @@ startup_32: | |||
| 41 | movl %eax,%gs | 50 | movl %eax,%gs |
| 42 | movl %eax,%ss | 51 | movl %eax,%ss |
| 43 | 52 | ||
| 53 | 2: cld | ||
| 54 | |||
| 44 | /* Calculate the delta between where we were compiled to run | 55 | /* Calculate the delta between where we were compiled to run |
| 45 | * at and where we were actually loaded at. This can only be done | 56 | * at and where we were actually loaded at. This can only be done |
| 46 | * with a short local call on x86. Nothing else will tell us what | 57 | * with a short local call on x86. Nothing else will tell us what |
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c index 1dc1e19c0a9f..b74d60d1b2fa 100644 --- a/arch/x86/boot/compressed/misc_32.c +++ b/arch/x86/boot/compressed/misc_32.c | |||
| @@ -247,6 +247,9 @@ static void putstr(const char *s) | |||
| 247 | int x,y,pos; | 247 | int x,y,pos; |
| 248 | char c; | 248 | char c; |
| 249 | 249 | ||
| 250 | if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) | ||
| 251 | return; | ||
| 252 | |||
| 250 | x = RM_SCREEN_INFO.orig_x; | 253 | x = RM_SCREEN_INFO.orig_x; |
| 251 | y = RM_SCREEN_INFO.orig_y; | 254 | y = RM_SCREEN_INFO.orig_y; |
| 252 | 255 | ||
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f3140e596d40..8353c81c41c0 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
| @@ -119,7 +119,7 @@ _start: | |||
| 119 | # Part 2 of the header, from the old setup.S | 119 | # Part 2 of the header, from the old setup.S |
| 120 | 120 | ||
| 121 | .ascii "HdrS" # header signature | 121 | .ascii "HdrS" # header signature |
| 122 | .word 0x0206 # header version number (>= 0x0105) | 122 | .word 0x0207 # header version number (>= 0x0105) |
| 123 | # or else old loadlin-1.5 will fail) | 123 | # or else old loadlin-1.5 will fail) |
| 124 | .globl realmode_swtch | 124 | .globl realmode_swtch |
| 125 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 125 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
| @@ -214,6 +214,11 @@ cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | |||
| 214 | #added with boot protocol | 214 | #added with boot protocol |
| 215 | #version 2.06 | 215 | #version 2.06 |
| 216 | 216 | ||
| 217 | hardware_subarch: .long 0 # subarchitecture, added with 2.07 | ||
| 218 | # default to 0 for normal x86 PC | ||
| 219 | |||
| 220 | hardware_subarch_data: .quad 0 | ||
| 221 | |||
| 217 | # End of setup header ##################################################### | 222 | # End of setup header ##################################################### |
| 218 | 223 | ||
| 219 | .section ".inittext", "ax" | 224 | .section ".inittext", "ax" |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index f1b7cdda82b3..f8764716b0c0 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <asm/fixmap.h> | 15 | #include <asm/fixmap.h> |
| 16 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
| 17 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
| 18 | #include <asm/bootparam.h> | ||
| 18 | #include <asm/elf.h> | 19 | #include <asm/elf.h> |
| 19 | 20 | ||
| 20 | #include <xen/interface/xen.h> | 21 | #include <xen/interface/xen.h> |
| @@ -146,4 +147,10 @@ void foo(void) | |||
| 146 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); | 147 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); |
| 147 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); | 148 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); |
| 148 | #endif | 149 | #endif |
| 150 | |||
| 151 | BLANK(); | ||
| 152 | OFFSET(BP_scratch, boot_params, scratch); | ||
| 153 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | ||
| 154 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | ||
| 155 | OFFSET(BP_version, boot_params, hdr.version); | ||
| 149 | } | 156 | } |
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 58fd54eb5577..18f500d185a2 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c | |||
| @@ -51,6 +51,13 @@ struct resource code_resource = { | |||
| 51 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 51 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
| 52 | }; | 52 | }; |
| 53 | 53 | ||
| 54 | struct resource bss_resource = { | ||
| 55 | .name = "Kernel bss", | ||
| 56 | .start = 0, | ||
| 57 | .end = 0, | ||
| 58 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
| 59 | }; | ||
| 60 | |||
| 54 | static struct resource system_rom_resource = { | 61 | static struct resource system_rom_resource = { |
| 55 | .name = "System ROM", | 62 | .name = "System ROM", |
| 56 | .start = 0xf0000, | 63 | .start = 0xf0000, |
| @@ -254,7 +261,9 @@ static void __init probe_roms(void) | |||
| 254 | * and also for regions reported as reserved by the e820. | 261 | * and also for regions reported as reserved by the e820. |
| 255 | */ | 262 | */ |
| 256 | static void __init | 263 | static void __init |
| 257 | legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) | 264 | legacy_init_iomem_resources(struct resource *code_resource, |
| 265 | struct resource *data_resource, | ||
| 266 | struct resource *bss_resource) | ||
| 258 | { | 267 | { |
| 259 | int i; | 268 | int i; |
| 260 | 269 | ||
| @@ -287,6 +296,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat | |||
| 287 | */ | 296 | */ |
| 288 | request_resource(res, code_resource); | 297 | request_resource(res, code_resource); |
| 289 | request_resource(res, data_resource); | 298 | request_resource(res, data_resource); |
| 299 | request_resource(res, bss_resource); | ||
| 290 | #ifdef CONFIG_KEXEC | 300 | #ifdef CONFIG_KEXEC |
| 291 | if (crashk_res.start != crashk_res.end) | 301 | if (crashk_res.start != crashk_res.end) |
| 292 | request_resource(res, &crashk_res); | 302 | request_resource(res, &crashk_res); |
| @@ -307,9 +317,11 @@ static int __init request_standard_resources(void) | |||
| 307 | 317 | ||
| 308 | printk("Setting up standard PCI resources\n"); | 318 | printk("Setting up standard PCI resources\n"); |
| 309 | if (efi_enabled) | 319 | if (efi_enabled) |
| 310 | efi_initialize_iomem_resources(&code_resource, &data_resource); | 320 | efi_initialize_iomem_resources(&code_resource, |
| 321 | &data_resource, &bss_resource); | ||
| 311 | else | 322 | else |
| 312 | legacy_init_iomem_resources(&code_resource, &data_resource); | 323 | legacy_init_iomem_resources(&code_resource, |
| 324 | &data_resource, &bss_resource); | ||
| 313 | 325 | ||
| 314 | /* EFI systems may still have VGA */ | 326 | /* EFI systems may still have VGA */ |
| 315 | request_resource(&iomem_resource, &video_ram_resource); | 327 | request_resource(&iomem_resource, &video_ram_resource); |
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 57616865d8a0..04698e0b056c 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
| @@ -47,7 +47,7 @@ unsigned long end_pfn_map; | |||
| 47 | */ | 47 | */ |
| 48 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | 48 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; |
| 49 | 49 | ||
| 50 | extern struct resource code_resource, data_resource; | 50 | extern struct resource code_resource, data_resource, bss_resource; |
| 51 | 51 | ||
| 52 | /* Check for some hardcoded bad areas that early boot is not allowed to touch */ | 52 | /* Check for some hardcoded bad areas that early boot is not allowed to touch */ |
| 53 | static inline int bad_addr(unsigned long *addrp, unsigned long size) | 53 | static inline int bad_addr(unsigned long *addrp, unsigned long size) |
| @@ -225,6 +225,7 @@ void __init e820_reserve_resources(void) | |||
| 225 | */ | 225 | */ |
| 226 | request_resource(res, &code_resource); | 226 | request_resource(res, &code_resource); |
| 227 | request_resource(res, &data_resource); | 227 | request_resource(res, &data_resource); |
| 228 | request_resource(res, &bss_resource); | ||
| 228 | #ifdef CONFIG_KEXEC | 229 | #ifdef CONFIG_KEXEC |
| 229 | if (crashk_res.start != crashk_res.end) | 230 | if (crashk_res.start != crashk_res.end) |
| 230 | request_resource(res, &crashk_res); | 231 | request_resource(res, &crashk_res); |
| @@ -729,3 +730,22 @@ __init void e820_setup_gap(void) | |||
| 729 | printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | 730 | printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", |
| 730 | pci_mem_start, gapstart, gapsize); | 731 | pci_mem_start, gapstart, gapsize); |
| 731 | } | 732 | } |
| 733 | |||
| 734 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | ||
| 735 | { | ||
| 736 | int i; | ||
| 737 | |||
| 738 | if (slot < 0 || slot >= e820.nr_map) | ||
| 739 | return -1; | ||
| 740 | for (i = slot; i < e820.nr_map; i++) { | ||
| 741 | if (e820.map[i].type != E820_RAM) | ||
| 742 | continue; | ||
| 743 | break; | ||
| 744 | } | ||
| 745 | if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) | ||
| 746 | return -1; | ||
| 747 | *addr = e820.map[i].addr; | ||
| 748 | *size = min_t(u64, e820.map[i].size + e820.map[i].addr, | ||
| 749 | max_pfn << PAGE_SHIFT) - *addr; | ||
| 750 | return i + 1; | ||
| 751 | } | ||
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index b42558c48e9d..e2be78f49399 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
| @@ -603,7 +603,8 @@ void __init efi_enter_virtual_mode(void) | |||
| 603 | 603 | ||
| 604 | void __init | 604 | void __init |
| 605 | efi_initialize_iomem_resources(struct resource *code_resource, | 605 | efi_initialize_iomem_resources(struct resource *code_resource, |
| 606 | struct resource *data_resource) | 606 | struct resource *data_resource, |
| 607 | struct resource *bss_resource) | ||
| 607 | { | 608 | { |
| 608 | struct resource *res; | 609 | struct resource *res; |
| 609 | efi_memory_desc_t *md; | 610 | efi_memory_desc_t *md; |
| @@ -675,6 +676,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, | |||
| 675 | if (md->type == EFI_CONVENTIONAL_MEMORY) { | 676 | if (md->type == EFI_CONVENTIONAL_MEMORY) { |
| 676 | request_resource(res, code_resource); | 677 | request_resource(res, code_resource); |
| 677 | request_resource(res, data_resource); | 678 | request_resource(res, data_resource); |
| 679 | request_resource(res, bss_resource); | ||
| 678 | #ifdef CONFIG_KEXEC | 680 | #ifdef CONFIG_KEXEC |
| 679 | request_resource(res, &crashk_res); | 681 | request_resource(res, &crashk_res); |
| 680 | #endif | 682 | #endif |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 39677965e161..00b1c2c56454 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -79,22 +79,30 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ | |||
| 79 | */ | 79 | */ |
| 80 | .section .text.head,"ax",@progbits | 80 | .section .text.head,"ax",@progbits |
| 81 | ENTRY(startup_32) | 81 | ENTRY(startup_32) |
| 82 | /* check to see if KEEP_SEGMENTS flag is meaningful */ | ||
| 83 | cmpw $0x207, BP_version(%esi) | ||
| 84 | jb 1f | ||
| 85 | |||
| 86 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | ||
| 87 | us to not reload segments */ | ||
| 88 | testb $(1<<6), BP_loadflags(%esi) | ||
| 89 | jnz 2f | ||
| 82 | 90 | ||
| 83 | /* | 91 | /* |
| 84 | * Set segments to known values. | 92 | * Set segments to known values. |
| 85 | */ | 93 | */ |
| 86 | cld | 94 | 1: lgdt boot_gdt_descr - __PAGE_OFFSET |
| 87 | lgdt boot_gdt_descr - __PAGE_OFFSET | ||
| 88 | movl $(__BOOT_DS),%eax | 95 | movl $(__BOOT_DS),%eax |
| 89 | movl %eax,%ds | 96 | movl %eax,%ds |
| 90 | movl %eax,%es | 97 | movl %eax,%es |
| 91 | movl %eax,%fs | 98 | movl %eax,%fs |
| 92 | movl %eax,%gs | 99 | movl %eax,%gs |
| 100 | 2: | ||
| 93 | 101 | ||
| 94 | /* | 102 | /* |
| 95 | * Clear BSS first so that there are no surprises... | 103 | * Clear BSS first so that there are no surprises... |
| 96 | * No need to cld as DF is already clear from cld above... | ||
| 97 | */ | 104 | */ |
| 105 | cld | ||
| 98 | xorl %eax,%eax | 106 | xorl %eax,%eax |
| 99 | movl $__bss_start - __PAGE_OFFSET,%edi | 107 | movl $__bss_start - __PAGE_OFFSET,%edi |
| 100 | movl $__bss_stop - __PAGE_OFFSET,%ecx | 108 | movl $__bss_stop - __PAGE_OFFSET,%ecx |
| @@ -128,6 +136,35 @@ ENTRY(startup_32) | |||
| 128 | movsl | 136 | movsl |
| 129 | 1: | 137 | 1: |
| 130 | 138 | ||
| 139 | #ifdef CONFIG_PARAVIRT | ||
| 140 | cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) | ||
| 141 | jb default_entry | ||
| 142 | |||
| 143 | /* Paravirt-compatible boot parameters. Look to see what architecture | ||
| 144 | we're booting under. */ | ||
| 145 | movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax | ||
| 146 | cmpl $num_subarch_entries, %eax | ||
| 147 | jae bad_subarch | ||
| 148 | |||
| 149 | movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax | ||
| 150 | subl $__PAGE_OFFSET, %eax | ||
| 151 | jmp *%eax | ||
| 152 | |||
| 153 | bad_subarch: | ||
| 154 | WEAK(lguest_entry) | ||
| 155 | WEAK(xen_entry) | ||
| 156 | /* Unknown implementation; there's really | ||
| 157 | nothing we can do at this point. */ | ||
| 158 | ud2a | ||
| 159 | .data | ||
| 160 | subarch_entries: | ||
| 161 | .long default_entry /* normal x86/PC */ | ||
| 162 | .long lguest_entry /* lguest hypervisor */ | ||
| 163 | .long xen_entry /* Xen hypervisor */ | ||
| 164 | num_subarch_entries = (. - subarch_entries) / 4 | ||
| 165 | .previous | ||
| 166 | #endif /* CONFIG_PARAVIRT */ | ||
| 167 | |||
| 131 | /* | 168 | /* |
| 132 | * Initialize page tables. This creates a PDE and a set of page | 169 | * Initialize page tables. This creates a PDE and a set of page |
| 133 | * tables, which are located immediately beyond _end. The variable | 170 | * tables, which are located immediately beyond _end. The variable |
| @@ -140,6 +177,7 @@ ENTRY(startup_32) | |||
| 140 | */ | 177 | */ |
| 141 | page_pde_offset = (__PAGE_OFFSET >> 20); | 178 | page_pde_offset = (__PAGE_OFFSET >> 20); |
| 142 | 179 | ||
| 180 | default_entry: | ||
| 143 | movl $(pg0 - __PAGE_OFFSET), %edi | 181 | movl $(pg0 - __PAGE_OFFSET), %edi |
| 144 | movl $(swapper_pg_dir - __PAGE_OFFSET), %edx | 182 | movl $(swapper_pg_dir - __PAGE_OFFSET), %edx |
| 145 | movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ | 183 | movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b3c2d268d708..953328b55a30 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/sysdev.h> | 31 | #include <linux/sysdev.h> |
| 32 | #include <linux/msi.h> | 32 | #include <linux/msi.h> |
| 33 | #include <linux/htirq.h> | 33 | #include <linux/htirq.h> |
| 34 | #include <linux/dmar.h> | ||
| 34 | #ifdef CONFIG_ACPI | 35 | #ifdef CONFIG_ACPI |
| 35 | #include <acpi/acpi_bus.h> | 36 | #include <acpi/acpi_bus.h> |
| 36 | #endif | 37 | #endif |
| @@ -2031,8 +2032,64 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
| 2031 | destroy_irq(irq); | 2032 | destroy_irq(irq); |
| 2032 | } | 2033 | } |
| 2033 | 2034 | ||
| 2034 | #endif /* CONFIG_PCI_MSI */ | 2035 | #ifdef CONFIG_DMAR |
| 2036 | #ifdef CONFIG_SMP | ||
| 2037 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | ||
| 2038 | { | ||
| 2039 | struct irq_cfg *cfg = irq_cfg + irq; | ||
| 2040 | struct msi_msg msg; | ||
| 2041 | unsigned int dest; | ||
| 2042 | cpumask_t tmp; | ||
| 2043 | |||
| 2044 | cpus_and(tmp, mask, cpu_online_map); | ||
| 2045 | if (cpus_empty(tmp)) | ||
| 2046 | return; | ||
| 2047 | |||
| 2048 | if (assign_irq_vector(irq, mask)) | ||
| 2049 | return; | ||
| 2050 | |||
| 2051 | cpus_and(tmp, cfg->domain, mask); | ||
| 2052 | dest = cpu_mask_to_apicid(tmp); | ||
| 2053 | |||
| 2054 | dmar_msi_read(irq, &msg); | ||
| 2055 | |||
| 2056 | msg.data &= ~MSI_DATA_VECTOR_MASK; | ||
| 2057 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | ||
| 2058 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | ||
| 2059 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | ||
| 2060 | |||
| 2061 | dmar_msi_write(irq, &msg); | ||
| 2062 | irq_desc[irq].affinity = mask; | ||
| 2063 | } | ||
| 2064 | #endif /* CONFIG_SMP */ | ||
| 2065 | |||
| 2066 | struct irq_chip dmar_msi_type = { | ||
| 2067 | .name = "DMAR_MSI", | ||
| 2068 | .unmask = dmar_msi_unmask, | ||
| 2069 | .mask = dmar_msi_mask, | ||
| 2070 | .ack = ack_apic_edge, | ||
| 2071 | #ifdef CONFIG_SMP | ||
| 2072 | .set_affinity = dmar_msi_set_affinity, | ||
| 2073 | #endif | ||
| 2074 | .retrigger = ioapic_retrigger_irq, | ||
| 2075 | }; | ||
| 2076 | |||
| 2077 | int arch_setup_dmar_msi(unsigned int irq) | ||
| 2078 | { | ||
| 2079 | int ret; | ||
| 2080 | struct msi_msg msg; | ||
| 2081 | |||
| 2082 | ret = msi_compose_msg(NULL, irq, &msg); | ||
| 2083 | if (ret < 0) | ||
| 2084 | return ret; | ||
| 2085 | dmar_msi_write(irq, &msg); | ||
| 2086 | set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, | ||
| 2087 | "edge"); | ||
| 2088 | return 0; | ||
| 2089 | } | ||
| 2090 | #endif | ||
| 2035 | 2091 | ||
| 2092 | #endif /* CONFIG_PCI_MSI */ | ||
| 2036 | /* | 2093 | /* |
| 2037 | * Hypertransport interrupt support | 2094 | * Hypertransport interrupt support |
| 2038 | */ | 2095 | */ |
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index afaf9f12c032..393e2725a6e3 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
| 8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
| 9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 10 | #include <linux/dmar.h> | ||
| 10 | #include <asm/io.h> | 11 | #include <asm/io.h> |
| 11 | #include <asm/iommu.h> | 12 | #include <asm/iommu.h> |
| 12 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
| @@ -305,6 +306,8 @@ void __init pci_iommu_alloc(void) | |||
| 305 | detect_calgary(); | 306 | detect_calgary(); |
| 306 | #endif | 307 | #endif |
| 307 | 308 | ||
| 309 | detect_intel_iommu(); | ||
| 310 | |||
| 308 | #ifdef CONFIG_SWIOTLB | 311 | #ifdef CONFIG_SWIOTLB |
| 309 | pci_swiotlb_init(); | 312 | pci_swiotlb_init(); |
| 310 | #endif | 313 | #endif |
| @@ -316,6 +319,8 @@ static int __init pci_iommu_init(void) | |||
| 316 | calgary_iommu_init(); | 319 | calgary_iommu_init(); |
| 317 | #endif | 320 | #endif |
| 318 | 321 | ||
| 322 | intel_iommu_init(); | ||
| 323 | |||
| 319 | #ifdef CONFIG_IOMMU | 324 | #ifdef CONFIG_IOMMU |
| 320 | gart_iommu_init(); | 325 | gart_iommu_init(); |
| 321 | #endif | 326 | #endif |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index ba2e165a8a0f..cc0e91447b76 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
| @@ -60,6 +60,7 @@ | |||
| 60 | #include <asm/vmi.h> | 60 | #include <asm/vmi.h> |
| 61 | #include <setup_arch.h> | 61 | #include <setup_arch.h> |
| 62 | #include <bios_ebda.h> | 62 | #include <bios_ebda.h> |
| 63 | #include <asm/cacheflush.h> | ||
| 63 | 64 | ||
| 64 | /* This value is set up by the early boot code to point to the value | 65 | /* This value is set up by the early boot code to point to the value |
| 65 | immediately after the boot time page tables. It contains a *physical* | 66 | immediately after the boot time page tables. It contains a *physical* |
| @@ -73,6 +74,7 @@ int disable_pse __devinitdata = 0; | |||
| 73 | */ | 74 | */ |
| 74 | extern struct resource code_resource; | 75 | extern struct resource code_resource; |
| 75 | extern struct resource data_resource; | 76 | extern struct resource data_resource; |
| 77 | extern struct resource bss_resource; | ||
| 76 | 78 | ||
| 77 | /* cpu data as detected by the assembly code in head.S */ | 79 | /* cpu data as detected by the assembly code in head.S */ |
| 78 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | 80 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
| @@ -600,6 +602,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 600 | code_resource.end = virt_to_phys(_etext)-1; | 602 | code_resource.end = virt_to_phys(_etext)-1; |
| 601 | data_resource.start = virt_to_phys(_etext); | 603 | data_resource.start = virt_to_phys(_etext); |
| 602 | data_resource.end = virt_to_phys(_edata)-1; | 604 | data_resource.end = virt_to_phys(_edata)-1; |
| 605 | bss_resource.start = virt_to_phys(&__bss_start); | ||
| 606 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | ||
| 603 | 607 | ||
| 604 | parse_early_param(); | 608 | parse_early_param(); |
| 605 | 609 | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 31322d42eaae..e7a9e36bd52d 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | #include <asm/numa.h> | 58 | #include <asm/numa.h> |
| 59 | #include <asm/sections.h> | 59 | #include <asm/sections.h> |
| 60 | #include <asm/dmi.h> | 60 | #include <asm/dmi.h> |
| 61 | #include <asm/cacheflush.h> | ||
| 61 | 62 | ||
| 62 | /* | 63 | /* |
| 63 | * Machine setup.. | 64 | * Machine setup.. |
| @@ -133,6 +134,12 @@ struct resource code_resource = { | |||
| 133 | .end = 0, | 134 | .end = 0, |
| 134 | .flags = IORESOURCE_RAM, | 135 | .flags = IORESOURCE_RAM, |
| 135 | }; | 136 | }; |
| 137 | struct resource bss_resource = { | ||
| 138 | .name = "Kernel bss", | ||
| 139 | .start = 0, | ||
| 140 | .end = 0, | ||
| 141 | .flags = IORESOURCE_RAM, | ||
| 142 | }; | ||
| 136 | 143 | ||
| 137 | #ifdef CONFIG_PROC_VMCORE | 144 | #ifdef CONFIG_PROC_VMCORE |
| 138 | /* elfcorehdr= specifies the location of elf core header | 145 | /* elfcorehdr= specifies the location of elf core header |
| @@ -276,6 +283,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 276 | code_resource.end = virt_to_phys(&_etext)-1; | 283 | code_resource.end = virt_to_phys(&_etext)-1; |
| 277 | data_resource.start = virt_to_phys(&_etext); | 284 | data_resource.start = virt_to_phys(&_etext); |
| 278 | data_resource.end = virt_to_phys(&_edata)-1; | 285 | data_resource.end = virt_to_phys(&_edata)-1; |
| 286 | bss_resource.start = virt_to_phys(&__bss_start); | ||
| 287 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | ||
| 279 | 288 | ||
| 280 | early_identify_cpu(&boot_cpu_data); | 289 | early_identify_cpu(&boot_cpu_data); |
| 281 | 290 | ||
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c index c7b7dfe1d405..c40afbaaf93d 100644 --- a/arch/x86/mm/pageattr_64.c +++ b/arch/x86/mm/pageattr_64.c | |||
| @@ -61,10 +61,10 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
| 61 | return base; | 61 | return base; |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | static void cache_flush_page(void *adr) | 64 | void clflush_cache_range(void *adr, int size) |
| 65 | { | 65 | { |
| 66 | int i; | 66 | int i; |
| 67 | for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | 67 | for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) |
| 68 | clflush(adr+i); | 68 | clflush(adr+i); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| @@ -80,7 +80,7 @@ static void flush_kernel_map(void *arg) | |||
| 80 | asm volatile("wbinvd" ::: "memory"); | 80 | asm volatile("wbinvd" ::: "memory"); |
| 81 | else list_for_each_entry(pg, l, lru) { | 81 | else list_for_each_entry(pg, l, lru) { |
| 82 | void *adr = page_address(pg); | 82 | void *adr = page_address(pg); |
| 83 | cache_flush_page(adr); | 83 | clflush_cache_range(adr, PAGE_SIZE); |
| 84 | } | 84 | } |
| 85 | __flush_tlb_all(); | 85 | __flush_tlb_all(); |
| 86 | } | 86 | } |
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index aab25f3ba3ce..c2d24991bb2b 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
| @@ -750,6 +750,38 @@ config PCI_DOMAINS | |||
| 750 | depends on PCI | 750 | depends on PCI |
| 751 | default y | 751 | default y |
| 752 | 752 | ||
| 753 | config DMAR | ||
| 754 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | ||
| 755 | depends on PCI_MSI && ACPI && EXPERIMENTAL | ||
| 756 | default y | ||
| 757 | help | ||
| 758 | DMA remapping (DMAR) devices support enables independent address | ||
| 759 | translations for Direct Memory Access (DMA) from devices. | ||
| 760 | These DMA remapping devices are reported via ACPI tables | ||
| 761 | and include PCI device scope covered by these DMA | ||
| 762 | remapping devices. | ||
| 763 | |||
| 764 | config DMAR_GFX_WA | ||
| 765 | bool "Support for Graphics workaround" | ||
| 766 | depends on DMAR | ||
| 767 | default y | ||
| 768 | help | ||
| 769 | Current Graphics drivers tend to use physical address | ||
| 770 | for DMA and avoid using DMA APIs. Setting this config | ||
| 771 | option permits the IOMMU driver to set a unity map for | ||
| 772 | all the OS-visible memory. Hence the driver can continue | ||
| 773 | to use physical addresses for DMA. | ||
| 774 | |||
| 775 | config DMAR_FLOPPY_WA | ||
| 776 | bool | ||
| 777 | depends on DMAR | ||
| 778 | default y | ||
| 779 | help | ||
| 780 | Floppy disk drivers are know to bypass DMA API calls | ||
| 781 | thereby failing to work when IOMMU is enabled. This | ||
| 782 | workaround will setup a 1:1 mapping for the first | ||
| 783 | 16M to make floppy (an ISA device) work. | ||
| 784 | |||
| 753 | source "drivers/pci/pcie/Kconfig" | 785 | source "drivers/pci/pcie/Kconfig" |
| 754 | 786 | ||
| 755 | source "drivers/pci/Kconfig" | 787 | source "drivers/pci/Kconfig" |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c41d0728efe2..7868707c7eda 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
| @@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf) | |||
| 137 | return len; | 137 | return len; |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | static inline int memory_notify(unsigned long val, void *v) | 140 | int memory_notify(unsigned long val, void *v) |
| 141 | { | 141 | { |
| 142 | return blocking_notifier_call_chain(&memory_chain, val, v); | 142 | return blocking_notifier_call_chain(&memory_chain, val, v); |
| 143 | } | 143 | } |
| @@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) | |||
| 183 | break; | 183 | break; |
| 184 | case MEM_OFFLINE: | 184 | case MEM_OFFLINE: |
| 185 | mem->state = MEM_GOING_OFFLINE; | 185 | mem->state = MEM_GOING_OFFLINE; |
| 186 | memory_notify(MEM_GOING_OFFLINE, NULL); | ||
| 187 | start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; | 186 | start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; |
| 188 | ret = remove_memory(start_paddr, | 187 | ret = remove_memory(start_paddr, |
| 189 | PAGES_PER_SECTION << PAGE_SHIFT); | 188 | PAGES_PER_SECTION << PAGE_SHIFT); |
| @@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) | |||
| 191 | mem->state = old_state; | 190 | mem->state = old_state; |
| 192 | break; | 191 | break; |
| 193 | } | 192 | } |
| 194 | memory_notify(MEM_MAPPING_INVALID, NULL); | ||
| 195 | break; | 193 | break; |
| 196 | default: | 194 | default: |
| 197 | printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", | 195 | printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", |
| @@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) | |||
| 199 | WARN_ON(1); | 197 | WARN_ON(1); |
| 200 | ret = -EINVAL; | 198 | ret = -EINVAL; |
| 201 | } | 199 | } |
| 202 | /* | ||
| 203 | * For now, only notify on successful memory operations | ||
| 204 | */ | ||
| 205 | if (!ret) | ||
| 206 | memory_notify(action, NULL); | ||
| 207 | 200 | ||
| 208 | return ret; | 201 | return ret; |
| 209 | } | 202 | } |
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 006054a40995..555055650733 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile | |||
| @@ -20,6 +20,9 @@ obj-$(CONFIG_PCI_MSI) += msi.o | |||
| 20 | # Build the Hypertransport interrupt support | 20 | # Build the Hypertransport interrupt support |
| 21 | obj-$(CONFIG_HT_IRQ) += htirq.o | 21 | obj-$(CONFIG_HT_IRQ) += htirq.o |
| 22 | 22 | ||
| 23 | # Build Intel IOMMU support | ||
| 24 | obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o | ||
| 25 | |||
| 23 | # | 26 | # |
| 24 | # Some architectures use the generic PCI setup functions | 27 | # Some architectures use the generic PCI setup functions |
| 25 | # | 28 | # |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c new file mode 100644 index 000000000000..5dfdfdac92e1 --- /dev/null +++ b/drivers/pci/dmar.c | |||
| @@ -0,0 +1,329 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License along with | ||
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 16 | * | ||
| 17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
| 18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
| 19 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
| 20 | * | ||
| 21 | * This file implements early detection/parsing of DMA Remapping Devices | ||
| 22 | * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI | ||
| 23 | * tables. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/pci.h> | ||
| 27 | #include <linux/dmar.h> | ||
| 28 | |||
| 29 | #undef PREFIX | ||
| 30 | #define PREFIX "DMAR:" | ||
| 31 | |||
| 32 | /* No locks are needed as DMA remapping hardware unit | ||
| 33 | * list is constructed at boot time and hotplug of | ||
| 34 | * these units are not supported by the architecture. | ||
| 35 | */ | ||
| 36 | LIST_HEAD(dmar_drhd_units); | ||
| 37 | LIST_HEAD(dmar_rmrr_units); | ||
| 38 | |||
| 39 | static struct acpi_table_header * __initdata dmar_tbl; | ||
| 40 | |||
| 41 | static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) | ||
| 42 | { | ||
| 43 | /* | ||
| 44 | * add INCLUDE_ALL at the tail, so scan the list will find it at | ||
| 45 | * the very end. | ||
| 46 | */ | ||
| 47 | if (drhd->include_all) | ||
| 48 | list_add_tail(&drhd->list, &dmar_drhd_units); | ||
| 49 | else | ||
| 50 | list_add(&drhd->list, &dmar_drhd_units); | ||
| 51 | } | ||
| 52 | |||
| 53 | static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) | ||
| 54 | { | ||
| 55 | list_add(&rmrr->list, &dmar_rmrr_units); | ||
| 56 | } | ||
| 57 | |||
| 58 | static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, | ||
| 59 | struct pci_dev **dev, u16 segment) | ||
| 60 | { | ||
| 61 | struct pci_bus *bus; | ||
| 62 | struct pci_dev *pdev = NULL; | ||
| 63 | struct acpi_dmar_pci_path *path; | ||
| 64 | int count; | ||
| 65 | |||
| 66 | bus = pci_find_bus(segment, scope->bus); | ||
| 67 | path = (struct acpi_dmar_pci_path *)(scope + 1); | ||
| 68 | count = (scope->length - sizeof(struct acpi_dmar_device_scope)) | ||
| 69 | / sizeof(struct acpi_dmar_pci_path); | ||
| 70 | |||
| 71 | while (count) { | ||
| 72 | if (pdev) | ||
| 73 | pci_dev_put(pdev); | ||
| 74 | /* | ||
| 75 | * Some BIOSes list non-exist devices in DMAR table, just | ||
| 76 | * ignore it | ||
| 77 | */ | ||
| 78 | if (!bus) { | ||
| 79 | printk(KERN_WARNING | ||
| 80 | PREFIX "Device scope bus [%d] not found\n", | ||
| 81 | scope->bus); | ||
| 82 | break; | ||
| 83 | } | ||
| 84 | pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); | ||
| 85 | if (!pdev) { | ||
| 86 | printk(KERN_WARNING PREFIX | ||
| 87 | "Device scope device [%04x:%02x:%02x.%02x] not found\n", | ||
| 88 | segment, bus->number, path->dev, path->fn); | ||
| 89 | break; | ||
| 90 | } | ||
| 91 | path ++; | ||
| 92 | count --; | ||
| 93 | bus = pdev->subordinate; | ||
| 94 | } | ||
| 95 | if (!pdev) { | ||
| 96 | printk(KERN_WARNING PREFIX | ||
| 97 | "Device scope device [%04x:%02x:%02x.%02x] not found\n", | ||
| 98 | segment, scope->bus, path->dev, path->fn); | ||
| 99 | *dev = NULL; | ||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ | ||
| 103 | pdev->subordinate) || (scope->entry_type == \ | ||
| 104 | ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { | ||
| 105 | pci_dev_put(pdev); | ||
| 106 | printk(KERN_WARNING PREFIX | ||
| 107 | "Device scope type does not match for %s\n", | ||
| 108 | pci_name(pdev)); | ||
| 109 | return -EINVAL; | ||
| 110 | } | ||
| 111 | *dev = pdev; | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, | ||
| 116 | struct pci_dev ***devices, u16 segment) | ||
| 117 | { | ||
| 118 | struct acpi_dmar_device_scope *scope; | ||
| 119 | void * tmp = start; | ||
| 120 | int index; | ||
| 121 | int ret; | ||
| 122 | |||
| 123 | *cnt = 0; | ||
| 124 | while (start < end) { | ||
| 125 | scope = start; | ||
| 126 | if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || | ||
| 127 | scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) | ||
| 128 | (*cnt)++; | ||
| 129 | else | ||
| 130 | printk(KERN_WARNING PREFIX | ||
| 131 | "Unsupported device scope\n"); | ||
| 132 | start += scope->length; | ||
| 133 | } | ||
| 134 | if (*cnt == 0) | ||
| 135 | return 0; | ||
| 136 | |||
| 137 | *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); | ||
| 138 | if (!*devices) | ||
| 139 | return -ENOMEM; | ||
| 140 | |||
| 141 | start = tmp; | ||
| 142 | index = 0; | ||
| 143 | while (start < end) { | ||
| 144 | scope = start; | ||
| 145 | if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || | ||
| 146 | scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { | ||
| 147 | ret = dmar_parse_one_dev_scope(scope, | ||
| 148 | &(*devices)[index], segment); | ||
| 149 | if (ret) { | ||
| 150 | kfree(*devices); | ||
| 151 | return ret; | ||
| 152 | } | ||
| 153 | index ++; | ||
| 154 | } | ||
| 155 | start += scope->length; | ||
| 156 | } | ||
| 157 | |||
| 158 | return 0; | ||
| 159 | } | ||
| 160 | |||
| 161 | /** | ||
| 162 | * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition | ||
| 163 | * structure which uniquely represent one DMA remapping hardware unit | ||
| 164 | * present in the platform | ||
| 165 | */ | ||
| 166 | static int __init | ||
| 167 | dmar_parse_one_drhd(struct acpi_dmar_header *header) | ||
| 168 | { | ||
| 169 | struct acpi_dmar_hardware_unit *drhd; | ||
| 170 | struct dmar_drhd_unit *dmaru; | ||
| 171 | int ret = 0; | ||
| 172 | static int include_all; | ||
| 173 | |||
| 174 | dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); | ||
| 175 | if (!dmaru) | ||
| 176 | return -ENOMEM; | ||
| 177 | |||
| 178 | drhd = (struct acpi_dmar_hardware_unit *)header; | ||
| 179 | dmaru->reg_base_addr = drhd->address; | ||
| 180 | dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ | ||
| 181 | |||
| 182 | if (!dmaru->include_all) | ||
| 183 | ret = dmar_parse_dev_scope((void *)(drhd + 1), | ||
| 184 | ((void *)drhd) + header->length, | ||
| 185 | &dmaru->devices_cnt, &dmaru->devices, | ||
| 186 | drhd->segment); | ||
| 187 | else { | ||
| 188 | /* Only allow one INCLUDE_ALL */ | ||
| 189 | if (include_all) { | ||
| 190 | printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " | ||
| 191 | "device scope is allowed\n"); | ||
| 192 | ret = -EINVAL; | ||
| 193 | } | ||
| 194 | include_all = 1; | ||
| 195 | } | ||
| 196 | |||
| 197 | if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) | ||
| 198 | kfree(dmaru); | ||
| 199 | else | ||
| 200 | dmar_register_drhd_unit(dmaru); | ||
| 201 | return ret; | ||
| 202 | } | ||
| 203 | |||
| 204 | static int __init | ||
| 205 | dmar_parse_one_rmrr(struct acpi_dmar_header *header) | ||
| 206 | { | ||
| 207 | struct acpi_dmar_reserved_memory *rmrr; | ||
| 208 | struct dmar_rmrr_unit *rmrru; | ||
| 209 | int ret = 0; | ||
| 210 | |||
| 211 | rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); | ||
| 212 | if (!rmrru) | ||
| 213 | return -ENOMEM; | ||
| 214 | |||
| 215 | rmrr = (struct acpi_dmar_reserved_memory *)header; | ||
| 216 | rmrru->base_address = rmrr->base_address; | ||
| 217 | rmrru->end_address = rmrr->end_address; | ||
| 218 | ret = dmar_parse_dev_scope((void *)(rmrr + 1), | ||
| 219 | ((void *)rmrr) + header->length, | ||
| 220 | &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); | ||
| 221 | |||
| 222 | if (ret || (rmrru->devices_cnt == 0)) | ||
| 223 | kfree(rmrru); | ||
| 224 | else | ||
| 225 | dmar_register_rmrr_unit(rmrru); | ||
| 226 | return ret; | ||
| 227 | } | ||
| 228 | |||
| 229 | static void __init | ||
| 230 | dmar_table_print_dmar_entry(struct acpi_dmar_header *header) | ||
| 231 | { | ||
| 232 | struct acpi_dmar_hardware_unit *drhd; | ||
| 233 | struct acpi_dmar_reserved_memory *rmrr; | ||
| 234 | |||
| 235 | switch (header->type) { | ||
| 236 | case ACPI_DMAR_TYPE_HARDWARE_UNIT: | ||
| 237 | drhd = (struct acpi_dmar_hardware_unit *)header; | ||
| 238 | printk (KERN_INFO PREFIX | ||
| 239 | "DRHD (flags: 0x%08x)base: 0x%016Lx\n", | ||
| 240 | drhd->flags, drhd->address); | ||
| 241 | break; | ||
| 242 | case ACPI_DMAR_TYPE_RESERVED_MEMORY: | ||
| 243 | rmrr = (struct acpi_dmar_reserved_memory *)header; | ||
| 244 | |||
| 245 | printk (KERN_INFO PREFIX | ||
| 246 | "RMRR base: 0x%016Lx end: 0x%016Lx\n", | ||
| 247 | rmrr->base_address, rmrr->end_address); | ||
| 248 | break; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | /** | ||
| 253 | * parse_dmar_table - parses the DMA reporting table | ||
| 254 | */ | ||
| 255 | static int __init | ||
| 256 | parse_dmar_table(void) | ||
| 257 | { | ||
| 258 | struct acpi_table_dmar *dmar; | ||
| 259 | struct acpi_dmar_header *entry_header; | ||
| 260 | int ret = 0; | ||
| 261 | |||
| 262 | dmar = (struct acpi_table_dmar *)dmar_tbl; | ||
| 263 | if (!dmar) | ||
| 264 | return -ENODEV; | ||
| 265 | |||
| 266 | if (!dmar->width) { | ||
| 267 | printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n"); | ||
| 268 | return -EINVAL; | ||
| 269 | } | ||
| 270 | |||
| 271 | printk (KERN_INFO PREFIX "Host address width %d\n", | ||
| 272 | dmar->width + 1); | ||
| 273 | |||
| 274 | entry_header = (struct acpi_dmar_header *)(dmar + 1); | ||
| 275 | while (((unsigned long)entry_header) < | ||
| 276 | (((unsigned long)dmar) + dmar_tbl->length)) { | ||
| 277 | dmar_table_print_dmar_entry(entry_header); | ||
| 278 | |||
| 279 | switch (entry_header->type) { | ||
| 280 | case ACPI_DMAR_TYPE_HARDWARE_UNIT: | ||
| 281 | ret = dmar_parse_one_drhd(entry_header); | ||
| 282 | break; | ||
| 283 | case ACPI_DMAR_TYPE_RESERVED_MEMORY: | ||
| 284 | ret = dmar_parse_one_rmrr(entry_header); | ||
| 285 | break; | ||
| 286 | default: | ||
| 287 | printk(KERN_WARNING PREFIX | ||
| 288 | "Unknown DMAR structure type\n"); | ||
| 289 | ret = 0; /* for forward compatibility */ | ||
| 290 | break; | ||
| 291 | } | ||
| 292 | if (ret) | ||
| 293 | break; | ||
| 294 | |||
| 295 | entry_header = ((void *)entry_header + entry_header->length); | ||
| 296 | } | ||
| 297 | return ret; | ||
| 298 | } | ||
| 299 | |||
| 300 | |||
| 301 | int __init dmar_table_init(void) | ||
| 302 | { | ||
| 303 | |||
| 304 | parse_dmar_table(); | ||
| 305 | if (list_empty(&dmar_drhd_units)) { | ||
| 306 | printk(KERN_INFO PREFIX "No DMAR devices found\n"); | ||
| 307 | return -ENODEV; | ||
| 308 | } | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | |||
| 312 | /** | ||
| 313 | * early_dmar_detect - checks to see if the platform supports DMAR devices | ||
| 314 | */ | ||
| 315 | int __init early_dmar_detect(void) | ||
| 316 | { | ||
| 317 | acpi_status status = AE_OK; | ||
| 318 | |||
| 319 | /* if we could find DMAR table, then there are DMAR devices */ | ||
| 320 | status = acpi_get_table(ACPI_SIG_DMAR, 0, | ||
| 321 | (struct acpi_table_header **)&dmar_tbl); | ||
| 322 | |||
| 323 | if (ACPI_SUCCESS(status) && !dmar_tbl) { | ||
| 324 | printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); | ||
| 325 | status = AE_NOT_FOUND; | ||
| 326 | } | ||
| 327 | |||
| 328 | return (ACPI_SUCCESS(status) ? 1 : 0); | ||
| 329 | } | ||
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c new file mode 100644 index 000000000000..b3d70310af49 --- /dev/null +++ b/drivers/pci/intel-iommu.c | |||
| @@ -0,0 +1,2271 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License along with | ||
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 16 | * | ||
| 17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
| 18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
| 19 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/init.h> | ||
| 23 | #include <linux/bitmap.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/irq.h> | ||
| 26 | #include <linux/interrupt.h> | ||
| 27 | #include <linux/sysdev.h> | ||
| 28 | #include <linux/spinlock.h> | ||
| 29 | #include <linux/pci.h> | ||
| 30 | #include <linux/dmar.h> | ||
| 31 | #include <linux/dma-mapping.h> | ||
| 32 | #include <linux/mempool.h> | ||
| 33 | #include "iova.h" | ||
| 34 | #include "intel-iommu.h" | ||
| 35 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ | ||
| 36 | #include <asm/cacheflush.h> | ||
| 37 | #include <asm/iommu.h> | ||
| 38 | #include "pci.h" | ||
| 39 | |||
| 40 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) | ||
| 41 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) | ||
| 42 | |||
| 43 | #define IOAPIC_RANGE_START (0xfee00000) | ||
| 44 | #define IOAPIC_RANGE_END (0xfeefffff) | ||
| 45 | #define IOVA_START_ADDR (0x1000) | ||
| 46 | |||
| 47 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 | ||
| 48 | |||
| 49 | #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ | ||
| 50 | |||
| 51 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | ||
| 52 | |||
| 53 | static void domain_remove_dev_info(struct dmar_domain *domain); | ||
| 54 | |||
| 55 | static int dmar_disabled; | ||
| 56 | static int __initdata dmar_map_gfx = 1; | ||
| 57 | static int dmar_forcedac; | ||
| 58 | |||
| 59 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | ||
| 60 | static DEFINE_SPINLOCK(device_domain_lock); | ||
| 61 | static LIST_HEAD(device_domain_list); | ||
| 62 | |||
| 63 | static int __init intel_iommu_setup(char *str) | ||
| 64 | { | ||
| 65 | if (!str) | ||
| 66 | return -EINVAL; | ||
| 67 | while (*str) { | ||
| 68 | if (!strncmp(str, "off", 3)) { | ||
| 69 | dmar_disabled = 1; | ||
| 70 | printk(KERN_INFO"Intel-IOMMU: disabled\n"); | ||
| 71 | } else if (!strncmp(str, "igfx_off", 8)) { | ||
| 72 | dmar_map_gfx = 0; | ||
| 73 | printk(KERN_INFO | ||
| 74 | "Intel-IOMMU: disable GFX device mapping\n"); | ||
| 75 | } else if (!strncmp(str, "forcedac", 8)) { | ||
| 76 | printk (KERN_INFO | ||
| 77 | "Intel-IOMMU: Forcing DAC for PCI devices\n"); | ||
| 78 | dmar_forcedac = 1; | ||
| 79 | } | ||
| 80 | |||
| 81 | str += strcspn(str, ","); | ||
| 82 | while (*str == ',') | ||
| 83 | str++; | ||
| 84 | } | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | __setup("intel_iommu=", intel_iommu_setup); | ||
| 88 | |||
| 89 | static struct kmem_cache *iommu_domain_cache; | ||
| 90 | static struct kmem_cache *iommu_devinfo_cache; | ||
| 91 | static struct kmem_cache *iommu_iova_cache; | ||
| 92 | |||
| 93 | static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) | ||
| 94 | { | ||
| 95 | unsigned int flags; | ||
| 96 | void *vaddr; | ||
| 97 | |||
| 98 | /* trying to avoid low memory issues */ | ||
| 99 | flags = current->flags & PF_MEMALLOC; | ||
| 100 | current->flags |= PF_MEMALLOC; | ||
| 101 | vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); | ||
| 102 | current->flags &= (~PF_MEMALLOC | flags); | ||
| 103 | return vaddr; | ||
| 104 | } | ||
| 105 | |||
| 106 | |||
| 107 | static inline void *alloc_pgtable_page(void) | ||
| 108 | { | ||
| 109 | unsigned int flags; | ||
| 110 | void *vaddr; | ||
| 111 | |||
| 112 | /* trying to avoid low memory issues */ | ||
| 113 | flags = current->flags & PF_MEMALLOC; | ||
| 114 | current->flags |= PF_MEMALLOC; | ||
| 115 | vaddr = (void *)get_zeroed_page(GFP_ATOMIC); | ||
| 116 | current->flags &= (~PF_MEMALLOC | flags); | ||
| 117 | return vaddr; | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline void free_pgtable_page(void *vaddr) | ||
| 121 | { | ||
| 122 | free_page((unsigned long)vaddr); | ||
| 123 | } | ||
| 124 | |||
| 125 | static inline void *alloc_domain_mem(void) | ||
| 126 | { | ||
| 127 | return iommu_kmem_cache_alloc(iommu_domain_cache); | ||
| 128 | } | ||
| 129 | |||
| 130 | static inline void free_domain_mem(void *vaddr) | ||
| 131 | { | ||
| 132 | kmem_cache_free(iommu_domain_cache, vaddr); | ||
| 133 | } | ||
| 134 | |||
| 135 | static inline void * alloc_devinfo_mem(void) | ||
| 136 | { | ||
| 137 | return iommu_kmem_cache_alloc(iommu_devinfo_cache); | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline void free_devinfo_mem(void *vaddr) | ||
| 141 | { | ||
| 142 | kmem_cache_free(iommu_devinfo_cache, vaddr); | ||
| 143 | } | ||
| 144 | |||
| 145 | struct iova *alloc_iova_mem(void) | ||
| 146 | { | ||
| 147 | return iommu_kmem_cache_alloc(iommu_iova_cache); | ||
| 148 | } | ||
| 149 | |||
| 150 | void free_iova_mem(struct iova *iova) | ||
| 151 | { | ||
| 152 | kmem_cache_free(iommu_iova_cache, iova); | ||
| 153 | } | ||
| 154 | |||
| 155 | static inline void __iommu_flush_cache( | ||
| 156 | struct intel_iommu *iommu, void *addr, int size) | ||
| 157 | { | ||
| 158 | if (!ecap_coherent(iommu->ecap)) | ||
| 159 | clflush_cache_range(addr, size); | ||
| 160 | } | ||
| 161 | |||
| 162 | /* Gets context entry for a given bus and devfn */ | ||
| 163 | static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, | ||
| 164 | u8 bus, u8 devfn) | ||
| 165 | { | ||
| 166 | struct root_entry *root; | ||
| 167 | struct context_entry *context; | ||
| 168 | unsigned long phy_addr; | ||
| 169 | unsigned long flags; | ||
| 170 | |||
| 171 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 172 | root = &iommu->root_entry[bus]; | ||
| 173 | context = get_context_addr_from_root(root); | ||
| 174 | if (!context) { | ||
| 175 | context = (struct context_entry *)alloc_pgtable_page(); | ||
| 176 | if (!context) { | ||
| 177 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 178 | return NULL; | ||
| 179 | } | ||
| 180 | __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K); | ||
| 181 | phy_addr = virt_to_phys((void *)context); | ||
| 182 | set_root_value(root, phy_addr); | ||
| 183 | set_root_present(root); | ||
| 184 | __iommu_flush_cache(iommu, root, sizeof(*root)); | ||
| 185 | } | ||
| 186 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 187 | return &context[devfn]; | ||
| 188 | } | ||
| 189 | |||
| 190 | static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) | ||
| 191 | { | ||
| 192 | struct root_entry *root; | ||
| 193 | struct context_entry *context; | ||
| 194 | int ret; | ||
| 195 | unsigned long flags; | ||
| 196 | |||
| 197 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 198 | root = &iommu->root_entry[bus]; | ||
| 199 | context = get_context_addr_from_root(root); | ||
| 200 | if (!context) { | ||
| 201 | ret = 0; | ||
| 202 | goto out; | ||
| 203 | } | ||
| 204 | ret = context_present(context[devfn]); | ||
| 205 | out: | ||
| 206 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 207 | return ret; | ||
| 208 | } | ||
| 209 | |||
| 210 | static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) | ||
| 211 | { | ||
| 212 | struct root_entry *root; | ||
| 213 | struct context_entry *context; | ||
| 214 | unsigned long flags; | ||
| 215 | |||
| 216 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 217 | root = &iommu->root_entry[bus]; | ||
| 218 | context = get_context_addr_from_root(root); | ||
| 219 | if (context) { | ||
| 220 | context_clear_entry(context[devfn]); | ||
| 221 | __iommu_flush_cache(iommu, &context[devfn], \ | ||
| 222 | sizeof(*context)); | ||
| 223 | } | ||
| 224 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 225 | } | ||
| 226 | |||
| 227 | static void free_context_table(struct intel_iommu *iommu) | ||
| 228 | { | ||
| 229 | struct root_entry *root; | ||
| 230 | int i; | ||
| 231 | unsigned long flags; | ||
| 232 | struct context_entry *context; | ||
| 233 | |||
| 234 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 235 | if (!iommu->root_entry) { | ||
| 236 | goto out; | ||
| 237 | } | ||
| 238 | for (i = 0; i < ROOT_ENTRY_NR; i++) { | ||
| 239 | root = &iommu->root_entry[i]; | ||
| 240 | context = get_context_addr_from_root(root); | ||
| 241 | if (context) | ||
| 242 | free_pgtable_page(context); | ||
| 243 | } | ||
| 244 | free_pgtable_page(iommu->root_entry); | ||
| 245 | iommu->root_entry = NULL; | ||
| 246 | out: | ||
| 247 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 248 | } | ||
| 249 | |||
| 250 | /* page table handling */ | ||
| 251 | #define LEVEL_STRIDE (9) | ||
| 252 | #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) | ||
| 253 | |||
| 254 | static inline int agaw_to_level(int agaw) | ||
| 255 | { | ||
| 256 | return agaw + 2; | ||
| 257 | } | ||
| 258 | |||
| 259 | static inline int agaw_to_width(int agaw) | ||
| 260 | { | ||
| 261 | return 30 + agaw * LEVEL_STRIDE; | ||
| 262 | |||
| 263 | } | ||
| 264 | |||
| 265 | static inline int width_to_agaw(int width) | ||
| 266 | { | ||
| 267 | return (width - 30) / LEVEL_STRIDE; | ||
| 268 | } | ||
| 269 | |||
| 270 | static inline unsigned int level_to_offset_bits(int level) | ||
| 271 | { | ||
| 272 | return (12 + (level - 1) * LEVEL_STRIDE); | ||
| 273 | } | ||
| 274 | |||
| 275 | static inline int address_level_offset(u64 addr, int level) | ||
| 276 | { | ||
| 277 | return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); | ||
| 278 | } | ||
| 279 | |||
| 280 | static inline u64 level_mask(int level) | ||
| 281 | { | ||
| 282 | return ((u64)-1 << level_to_offset_bits(level)); | ||
| 283 | } | ||
| 284 | |||
| 285 | static inline u64 level_size(int level) | ||
| 286 | { | ||
| 287 | return ((u64)1 << level_to_offset_bits(level)); | ||
| 288 | } | ||
| 289 | |||
| 290 | static inline u64 align_to_level(u64 addr, int level) | ||
| 291 | { | ||
| 292 | return ((addr + level_size(level) - 1) & level_mask(level)); | ||
| 293 | } | ||
| 294 | |||
| 295 | static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) | ||
| 296 | { | ||
| 297 | int addr_width = agaw_to_width(domain->agaw); | ||
| 298 | struct dma_pte *parent, *pte = NULL; | ||
| 299 | int level = agaw_to_level(domain->agaw); | ||
| 300 | int offset; | ||
| 301 | unsigned long flags; | ||
| 302 | |||
| 303 | BUG_ON(!domain->pgd); | ||
| 304 | |||
| 305 | addr &= (((u64)1) << addr_width) - 1; | ||
| 306 | parent = domain->pgd; | ||
| 307 | |||
| 308 | spin_lock_irqsave(&domain->mapping_lock, flags); | ||
| 309 | while (level > 0) { | ||
| 310 | void *tmp_page; | ||
| 311 | |||
| 312 | offset = address_level_offset(addr, level); | ||
| 313 | pte = &parent[offset]; | ||
| 314 | if (level == 1) | ||
| 315 | break; | ||
| 316 | |||
| 317 | if (!dma_pte_present(*pte)) { | ||
| 318 | tmp_page = alloc_pgtable_page(); | ||
| 319 | |||
| 320 | if (!tmp_page) { | ||
| 321 | spin_unlock_irqrestore(&domain->mapping_lock, | ||
| 322 | flags); | ||
| 323 | return NULL; | ||
| 324 | } | ||
| 325 | __iommu_flush_cache(domain->iommu, tmp_page, | ||
| 326 | PAGE_SIZE_4K); | ||
| 327 | dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); | ||
| 328 | /* | ||
| 329 | * high level table always sets r/w, last level page | ||
| 330 | * table control read/write | ||
| 331 | */ | ||
| 332 | dma_set_pte_readable(*pte); | ||
| 333 | dma_set_pte_writable(*pte); | ||
| 334 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
| 335 | } | ||
| 336 | parent = phys_to_virt(dma_pte_addr(*pte)); | ||
| 337 | level--; | ||
| 338 | } | ||
| 339 | |||
| 340 | spin_unlock_irqrestore(&domain->mapping_lock, flags); | ||
| 341 | return pte; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* return address's pte at specific level */ | ||
| 345 | static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, | ||
| 346 | int level) | ||
| 347 | { | ||
| 348 | struct dma_pte *parent, *pte = NULL; | ||
| 349 | int total = agaw_to_level(domain->agaw); | ||
| 350 | int offset; | ||
| 351 | |||
| 352 | parent = domain->pgd; | ||
| 353 | while (level <= total) { | ||
| 354 | offset = address_level_offset(addr, total); | ||
| 355 | pte = &parent[offset]; | ||
| 356 | if (level == total) | ||
| 357 | return pte; | ||
| 358 | |||
| 359 | if (!dma_pte_present(*pte)) | ||
| 360 | break; | ||
| 361 | parent = phys_to_virt(dma_pte_addr(*pte)); | ||
| 362 | total--; | ||
| 363 | } | ||
| 364 | return NULL; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* clear one page's page table */ | ||
| 368 | static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) | ||
| 369 | { | ||
| 370 | struct dma_pte *pte = NULL; | ||
| 371 | |||
| 372 | /* get last level pte */ | ||
| 373 | pte = dma_addr_level_pte(domain, addr, 1); | ||
| 374 | |||
| 375 | if (pte) { | ||
| 376 | dma_clear_pte(*pte); | ||
| 377 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
| 378 | } | ||
| 379 | } | ||
| 380 | |||
| 381 | /* clear last level pte, a tlb flush should be followed */ | ||
| 382 | static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) | ||
| 383 | { | ||
| 384 | int addr_width = agaw_to_width(domain->agaw); | ||
| 385 | |||
| 386 | start &= (((u64)1) << addr_width) - 1; | ||
| 387 | end &= (((u64)1) << addr_width) - 1; | ||
| 388 | /* in case it's partial page */ | ||
| 389 | start = PAGE_ALIGN_4K(start); | ||
| 390 | end &= PAGE_MASK_4K; | ||
| 391 | |||
| 392 | /* we don't need lock here, nobody else touches the iova range */ | ||
| 393 | while (start < end) { | ||
| 394 | dma_pte_clear_one(domain, start); | ||
| 395 | start += PAGE_SIZE_4K; | ||
| 396 | } | ||
| 397 | } | ||
| 398 | |||
| 399 | /* free page table pages. last level pte should already be cleared */ | ||
| 400 | static void dma_pte_free_pagetable(struct dmar_domain *domain, | ||
| 401 | u64 start, u64 end) | ||
| 402 | { | ||
| 403 | int addr_width = agaw_to_width(domain->agaw); | ||
| 404 | struct dma_pte *pte; | ||
| 405 | int total = agaw_to_level(domain->agaw); | ||
| 406 | int level; | ||
| 407 | u64 tmp; | ||
| 408 | |||
| 409 | start &= (((u64)1) << addr_width) - 1; | ||
| 410 | end &= (((u64)1) << addr_width) - 1; | ||
| 411 | |||
| 412 | /* we don't need lock here, nobody else touches the iova range */ | ||
| 413 | level = 2; | ||
| 414 | while (level <= total) { | ||
| 415 | tmp = align_to_level(start, level); | ||
| 416 | if (tmp >= end || (tmp + level_size(level) > end)) | ||
| 417 | return; | ||
| 418 | |||
| 419 | while (tmp < end) { | ||
| 420 | pte = dma_addr_level_pte(domain, tmp, level); | ||
| 421 | if (pte) { | ||
| 422 | free_pgtable_page( | ||
| 423 | phys_to_virt(dma_pte_addr(*pte))); | ||
| 424 | dma_clear_pte(*pte); | ||
| 425 | __iommu_flush_cache(domain->iommu, | ||
| 426 | pte, sizeof(*pte)); | ||
| 427 | } | ||
| 428 | tmp += level_size(level); | ||
| 429 | } | ||
| 430 | level++; | ||
| 431 | } | ||
| 432 | /* free pgd */ | ||
| 433 | if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { | ||
| 434 | free_pgtable_page(domain->pgd); | ||
| 435 | domain->pgd = NULL; | ||
| 436 | } | ||
| 437 | } | ||
| 438 | |||
| 439 | /* iommu handling */ | ||
| 440 | static int iommu_alloc_root_entry(struct intel_iommu *iommu) | ||
| 441 | { | ||
| 442 | struct root_entry *root; | ||
| 443 | unsigned long flags; | ||
| 444 | |||
| 445 | root = (struct root_entry *)alloc_pgtable_page(); | ||
| 446 | if (!root) | ||
| 447 | return -ENOMEM; | ||
| 448 | |||
| 449 | __iommu_flush_cache(iommu, root, PAGE_SIZE_4K); | ||
| 450 | |||
| 451 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 452 | iommu->root_entry = root; | ||
| 453 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 454 | |||
| 455 | return 0; | ||
| 456 | } | ||
| 457 | |||
| 458 | #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ | ||
| 459 | {\ | ||
| 460 | unsigned long start_time = jiffies;\ | ||
| 461 | while (1) {\ | ||
| 462 | sts = op (iommu->reg + offset);\ | ||
| 463 | if (cond)\ | ||
| 464 | break;\ | ||
| 465 | if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\ | ||
| 466 | panic("DMAR hardware is malfunctioning\n");\ | ||
| 467 | cpu_relax();\ | ||
| 468 | }\ | ||
| 469 | } | ||
| 470 | |||
| 471 | static void iommu_set_root_entry(struct intel_iommu *iommu) | ||
| 472 | { | ||
| 473 | void *addr; | ||
| 474 | u32 cmd, sts; | ||
| 475 | unsigned long flag; | ||
| 476 | |||
| 477 | addr = iommu->root_entry; | ||
| 478 | |||
| 479 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 480 | dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); | ||
| 481 | |||
| 482 | cmd = iommu->gcmd | DMA_GCMD_SRTP; | ||
| 483 | writel(cmd, iommu->reg + DMAR_GCMD_REG); | ||
| 484 | |||
| 485 | /* Make sure hardware complete it */ | ||
| 486 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
| 487 | readl, (sts & DMA_GSTS_RTPS), sts); | ||
| 488 | |||
| 489 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 490 | } | ||
| 491 | |||
| 492 | static void iommu_flush_write_buffer(struct intel_iommu *iommu) | ||
| 493 | { | ||
| 494 | u32 val; | ||
| 495 | unsigned long flag; | ||
| 496 | |||
| 497 | if (!cap_rwbf(iommu->cap)) | ||
| 498 | return; | ||
| 499 | val = iommu->gcmd | DMA_GCMD_WBF; | ||
| 500 | |||
| 501 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 502 | writel(val, iommu->reg + DMAR_GCMD_REG); | ||
| 503 | |||
| 504 | /* Make sure hardware complete it */ | ||
| 505 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
| 506 | readl, (!(val & DMA_GSTS_WBFS)), val); | ||
| 507 | |||
| 508 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 509 | } | ||
| 510 | |||
| 511 | /* return value determine if we need a write buffer flush */ | ||
| 512 | static int __iommu_flush_context(struct intel_iommu *iommu, | ||
| 513 | u16 did, u16 source_id, u8 function_mask, u64 type, | ||
| 514 | int non_present_entry_flush) | ||
| 515 | { | ||
| 516 | u64 val = 0; | ||
| 517 | unsigned long flag; | ||
| 518 | |||
| 519 | /* | ||
| 520 | * In the non-present entry flush case, if hardware doesn't cache | ||
| 521 | * non-present entry we do nothing and if hardware cache non-present | ||
| 522 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
| 523 | * any non-present entries) | ||
| 524 | */ | ||
| 525 | if (non_present_entry_flush) { | ||
| 526 | if (!cap_caching_mode(iommu->cap)) | ||
| 527 | return 1; | ||
| 528 | else | ||
| 529 | did = 0; | ||
| 530 | } | ||
| 531 | |||
| 532 | switch (type) { | ||
| 533 | case DMA_CCMD_GLOBAL_INVL: | ||
| 534 | val = DMA_CCMD_GLOBAL_INVL; | ||
| 535 | break; | ||
| 536 | case DMA_CCMD_DOMAIN_INVL: | ||
| 537 | val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); | ||
| 538 | break; | ||
| 539 | case DMA_CCMD_DEVICE_INVL: | ||
| 540 | val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) | ||
| 541 | | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); | ||
| 542 | break; | ||
| 543 | default: | ||
| 544 | BUG(); | ||
| 545 | } | ||
| 546 | val |= DMA_CCMD_ICC; | ||
| 547 | |||
| 548 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 549 | dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); | ||
| 550 | |||
| 551 | /* Make sure hardware complete it */ | ||
| 552 | IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, | ||
| 553 | dmar_readq, (!(val & DMA_CCMD_ICC)), val); | ||
| 554 | |||
| 555 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 556 | |||
| 557 | /* flush context entry will implictly flush write buffer */ | ||
| 558 | return 0; | ||
| 559 | } | ||
| 560 | |||
| 561 | static int inline iommu_flush_context_global(struct intel_iommu *iommu, | ||
| 562 | int non_present_entry_flush) | ||
| 563 | { | ||
| 564 | return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, | ||
| 565 | non_present_entry_flush); | ||
| 566 | } | ||
| 567 | |||
| 568 | static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did, | ||
| 569 | int non_present_entry_flush) | ||
| 570 | { | ||
| 571 | return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, | ||
| 572 | non_present_entry_flush); | ||
| 573 | } | ||
| 574 | |||
| 575 | static int inline iommu_flush_context_device(struct intel_iommu *iommu, | ||
| 576 | u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) | ||
| 577 | { | ||
| 578 | return __iommu_flush_context(iommu, did, source_id, function_mask, | ||
| 579 | DMA_CCMD_DEVICE_INVL, non_present_entry_flush); | ||
| 580 | } | ||
| 581 | |||
| 582 | /* return value determine if we need a write buffer flush */ | ||
| 583 | static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, | ||
| 584 | u64 addr, unsigned int size_order, u64 type, | ||
| 585 | int non_present_entry_flush) | ||
| 586 | { | ||
| 587 | int tlb_offset = ecap_iotlb_offset(iommu->ecap); | ||
| 588 | u64 val = 0, val_iva = 0; | ||
| 589 | unsigned long flag; | ||
| 590 | |||
| 591 | /* | ||
| 592 | * In the non-present entry flush case, if hardware doesn't cache | ||
| 593 | * non-present entry we do nothing and if hardware cache non-present | ||
| 594 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
| 595 | * any non-present entries) | ||
| 596 | */ | ||
| 597 | if (non_present_entry_flush) { | ||
| 598 | if (!cap_caching_mode(iommu->cap)) | ||
| 599 | return 1; | ||
| 600 | else | ||
| 601 | did = 0; | ||
| 602 | } | ||
| 603 | |||
| 604 | switch (type) { | ||
| 605 | case DMA_TLB_GLOBAL_FLUSH: | ||
| 606 | /* global flush doesn't need set IVA_REG */ | ||
| 607 | val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; | ||
| 608 | break; | ||
| 609 | case DMA_TLB_DSI_FLUSH: | ||
| 610 | val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); | ||
| 611 | break; | ||
| 612 | case DMA_TLB_PSI_FLUSH: | ||
| 613 | val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); | ||
| 614 | /* Note: always flush non-leaf currently */ | ||
| 615 | val_iva = size_order | addr; | ||
| 616 | break; | ||
| 617 | default: | ||
| 618 | BUG(); | ||
| 619 | } | ||
| 620 | /* Note: set drain read/write */ | ||
| 621 | #if 0 | ||
| 622 | /* | ||
| 623 | * This is probably to be super secure.. Looks like we can | ||
| 624 | * ignore it without any impact. | ||
| 625 | */ | ||
| 626 | if (cap_read_drain(iommu->cap)) | ||
| 627 | val |= DMA_TLB_READ_DRAIN; | ||
| 628 | #endif | ||
| 629 | if (cap_write_drain(iommu->cap)) | ||
| 630 | val |= DMA_TLB_WRITE_DRAIN; | ||
| 631 | |||
| 632 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 633 | /* Note: Only uses first TLB reg currently */ | ||
| 634 | if (val_iva) | ||
| 635 | dmar_writeq(iommu->reg + tlb_offset, val_iva); | ||
| 636 | dmar_writeq(iommu->reg + tlb_offset + 8, val); | ||
| 637 | |||
| 638 | /* Make sure hardware complete it */ | ||
| 639 | IOMMU_WAIT_OP(iommu, tlb_offset + 8, | ||
| 640 | dmar_readq, (!(val & DMA_TLB_IVT)), val); | ||
| 641 | |||
| 642 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 643 | |||
| 644 | /* check IOTLB invalidation granularity */ | ||
| 645 | if (DMA_TLB_IAIG(val) == 0) | ||
| 646 | printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); | ||
| 647 | if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) | ||
| 648 | pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", | ||
| 649 | DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); | ||
| 650 | /* flush context entry will implictly flush write buffer */ | ||
| 651 | return 0; | ||
| 652 | } | ||
| 653 | |||
| 654 | static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu, | ||
| 655 | int non_present_entry_flush) | ||
| 656 | { | ||
| 657 | return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, | ||
| 658 | non_present_entry_flush); | ||
| 659 | } | ||
| 660 | |||
| 661 | static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, | ||
| 662 | int non_present_entry_flush) | ||
| 663 | { | ||
| 664 | return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, | ||
| 665 | non_present_entry_flush); | ||
| 666 | } | ||
| 667 | |||
| 668 | static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, | ||
| 669 | u64 addr, unsigned int pages, int non_present_entry_flush) | ||
| 670 | { | ||
| 671 | unsigned int mask; | ||
| 672 | |||
| 673 | BUG_ON(addr & (~PAGE_MASK_4K)); | ||
| 674 | BUG_ON(pages == 0); | ||
| 675 | |||
| 676 | /* Fallback to domain selective flush if no PSI support */ | ||
| 677 | if (!cap_pgsel_inv(iommu->cap)) | ||
| 678 | return iommu_flush_iotlb_dsi(iommu, did, | ||
| 679 | non_present_entry_flush); | ||
| 680 | |||
| 681 | /* | ||
| 682 | * PSI requires page size to be 2 ^ x, and the base address is naturally | ||
| 683 | * aligned to the size | ||
| 684 | */ | ||
| 685 | mask = ilog2(__roundup_pow_of_two(pages)); | ||
| 686 | /* Fallback to domain selective flush if size is too big */ | ||
| 687 | if (mask > cap_max_amask_val(iommu->cap)) | ||
| 688 | return iommu_flush_iotlb_dsi(iommu, did, | ||
| 689 | non_present_entry_flush); | ||
| 690 | |||
| 691 | return __iommu_flush_iotlb(iommu, did, addr, mask, | ||
| 692 | DMA_TLB_PSI_FLUSH, non_present_entry_flush); | ||
| 693 | } | ||
| 694 | |||
| 695 | static int iommu_enable_translation(struct intel_iommu *iommu) | ||
| 696 | { | ||
| 697 | u32 sts; | ||
| 698 | unsigned long flags; | ||
| 699 | |||
| 700 | spin_lock_irqsave(&iommu->register_lock, flags); | ||
| 701 | writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); | ||
| 702 | |||
| 703 | /* Make sure hardware complete it */ | ||
| 704 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
| 705 | readl, (sts & DMA_GSTS_TES), sts); | ||
| 706 | |||
| 707 | iommu->gcmd |= DMA_GCMD_TE; | ||
| 708 | spin_unlock_irqrestore(&iommu->register_lock, flags); | ||
| 709 | return 0; | ||
| 710 | } | ||
| 711 | |||
| 712 | static int iommu_disable_translation(struct intel_iommu *iommu) | ||
| 713 | { | ||
| 714 | u32 sts; | ||
| 715 | unsigned long flag; | ||
| 716 | |||
| 717 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 718 | iommu->gcmd &= ~DMA_GCMD_TE; | ||
| 719 | writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); | ||
| 720 | |||
| 721 | /* Make sure hardware complete it */ | ||
| 722 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
| 723 | readl, (!(sts & DMA_GSTS_TES)), sts); | ||
| 724 | |||
| 725 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 726 | return 0; | ||
| 727 | } | ||
| 728 | |||
| 729 | /* iommu interrupt handling. Most stuff are MSI-like. */ | ||
| 730 | |||
| 731 | static char *fault_reason_strings[] = | ||
| 732 | { | ||
| 733 | "Software", | ||
| 734 | "Present bit in root entry is clear", | ||
| 735 | "Present bit in context entry is clear", | ||
| 736 | "Invalid context entry", | ||
| 737 | "Access beyond MGAW", | ||
| 738 | "PTE Write access is not set", | ||
| 739 | "PTE Read access is not set", | ||
| 740 | "Next page table ptr is invalid", | ||
| 741 | "Root table address invalid", | ||
| 742 | "Context table ptr is invalid", | ||
| 743 | "non-zero reserved fields in RTP", | ||
| 744 | "non-zero reserved fields in CTP", | ||
| 745 | "non-zero reserved fields in PTE", | ||
| 746 | "Unknown" | ||
| 747 | }; | ||
| 748 | #define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) | ||
| 749 | |||
| 750 | char *dmar_get_fault_reason(u8 fault_reason) | ||
| 751 | { | ||
| 752 | if (fault_reason > MAX_FAULT_REASON_IDX) | ||
| 753 | return fault_reason_strings[MAX_FAULT_REASON_IDX]; | ||
| 754 | else | ||
| 755 | return fault_reason_strings[fault_reason]; | ||
| 756 | } | ||
| 757 | |||
| 758 | void dmar_msi_unmask(unsigned int irq) | ||
| 759 | { | ||
| 760 | struct intel_iommu *iommu = get_irq_data(irq); | ||
| 761 | unsigned long flag; | ||
| 762 | |||
| 763 | /* unmask it */ | ||
| 764 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 765 | writel(0, iommu->reg + DMAR_FECTL_REG); | ||
| 766 | /* Read a reg to force flush the post write */ | ||
| 767 | readl(iommu->reg + DMAR_FECTL_REG); | ||
| 768 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 769 | } | ||
| 770 | |||
| 771 | void dmar_msi_mask(unsigned int irq) | ||
| 772 | { | ||
| 773 | unsigned long flag; | ||
| 774 | struct intel_iommu *iommu = get_irq_data(irq); | ||
| 775 | |||
| 776 | /* mask it */ | ||
| 777 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 778 | writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); | ||
| 779 | /* Read a reg to force flush the post write */ | ||
| 780 | readl(iommu->reg + DMAR_FECTL_REG); | ||
| 781 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 782 | } | ||
| 783 | |||
| 784 | void dmar_msi_write(int irq, struct msi_msg *msg) | ||
| 785 | { | ||
| 786 | struct intel_iommu *iommu = get_irq_data(irq); | ||
| 787 | unsigned long flag; | ||
| 788 | |||
| 789 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 790 | writel(msg->data, iommu->reg + DMAR_FEDATA_REG); | ||
| 791 | writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); | ||
| 792 | writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); | ||
| 793 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 794 | } | ||
| 795 | |||
| 796 | void dmar_msi_read(int irq, struct msi_msg *msg) | ||
| 797 | { | ||
| 798 | struct intel_iommu *iommu = get_irq_data(irq); | ||
| 799 | unsigned long flag; | ||
| 800 | |||
| 801 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 802 | msg->data = readl(iommu->reg + DMAR_FEDATA_REG); | ||
| 803 | msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); | ||
| 804 | msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); | ||
| 805 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 806 | } | ||
| 807 | |||
| 808 | static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, | ||
| 809 | u8 fault_reason, u16 source_id, u64 addr) | ||
| 810 | { | ||
| 811 | char *reason; | ||
| 812 | |||
| 813 | reason = dmar_get_fault_reason(fault_reason); | ||
| 814 | |||
| 815 | printk(KERN_ERR | ||
| 816 | "DMAR:[%s] Request device [%02x:%02x.%d] " | ||
| 817 | "fault addr %llx \n" | ||
| 818 | "DMAR:[fault reason %02d] %s\n", | ||
| 819 | (type ? "DMA Read" : "DMA Write"), | ||
| 820 | (source_id >> 8), PCI_SLOT(source_id & 0xFF), | ||
| 821 | PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); | ||
| 822 | return 0; | ||
| 823 | } | ||
| 824 | |||
| 825 | #define PRIMARY_FAULT_REG_LEN (16) | ||
| 826 | static irqreturn_t iommu_page_fault(int irq, void *dev_id) | ||
| 827 | { | ||
| 828 | struct intel_iommu *iommu = dev_id; | ||
| 829 | int reg, fault_index; | ||
| 830 | u32 fault_status; | ||
| 831 | unsigned long flag; | ||
| 832 | |||
| 833 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 834 | fault_status = readl(iommu->reg + DMAR_FSTS_REG); | ||
| 835 | |||
| 836 | /* TBD: ignore advanced fault log currently */ | ||
| 837 | if (!(fault_status & DMA_FSTS_PPF)) | ||
| 838 | goto clear_overflow; | ||
| 839 | |||
| 840 | fault_index = dma_fsts_fault_record_index(fault_status); | ||
| 841 | reg = cap_fault_reg_offset(iommu->cap); | ||
| 842 | while (1) { | ||
| 843 | u8 fault_reason; | ||
| 844 | u16 source_id; | ||
| 845 | u64 guest_addr; | ||
| 846 | int type; | ||
| 847 | u32 data; | ||
| 848 | |||
| 849 | /* highest 32 bits */ | ||
| 850 | data = readl(iommu->reg + reg + | ||
| 851 | fault_index * PRIMARY_FAULT_REG_LEN + 12); | ||
| 852 | if (!(data & DMA_FRCD_F)) | ||
| 853 | break; | ||
| 854 | |||
| 855 | fault_reason = dma_frcd_fault_reason(data); | ||
| 856 | type = dma_frcd_type(data); | ||
| 857 | |||
| 858 | data = readl(iommu->reg + reg + | ||
| 859 | fault_index * PRIMARY_FAULT_REG_LEN + 8); | ||
| 860 | source_id = dma_frcd_source_id(data); | ||
| 861 | |||
| 862 | guest_addr = dmar_readq(iommu->reg + reg + | ||
| 863 | fault_index * PRIMARY_FAULT_REG_LEN); | ||
| 864 | guest_addr = dma_frcd_page_addr(guest_addr); | ||
| 865 | /* clear the fault */ | ||
| 866 | writel(DMA_FRCD_F, iommu->reg + reg + | ||
| 867 | fault_index * PRIMARY_FAULT_REG_LEN + 12); | ||
| 868 | |||
| 869 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 870 | |||
| 871 | iommu_page_fault_do_one(iommu, type, fault_reason, | ||
| 872 | source_id, guest_addr); | ||
| 873 | |||
| 874 | fault_index++; | ||
| 875 | if (fault_index > cap_num_fault_regs(iommu->cap)) | ||
| 876 | fault_index = 0; | ||
| 877 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
| 878 | } | ||
| 879 | clear_overflow: | ||
| 880 | /* clear primary fault overflow */ | ||
| 881 | fault_status = readl(iommu->reg + DMAR_FSTS_REG); | ||
| 882 | if (fault_status & DMA_FSTS_PFO) | ||
| 883 | writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); | ||
| 884 | |||
| 885 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
| 886 | return IRQ_HANDLED; | ||
| 887 | } | ||
| 888 | |||
| 889 | int dmar_set_interrupt(struct intel_iommu *iommu) | ||
| 890 | { | ||
| 891 | int irq, ret; | ||
| 892 | |||
| 893 | irq = create_irq(); | ||
| 894 | if (!irq) { | ||
| 895 | printk(KERN_ERR "IOMMU: no free vectors\n"); | ||
| 896 | return -EINVAL; | ||
| 897 | } | ||
| 898 | |||
| 899 | set_irq_data(irq, iommu); | ||
| 900 | iommu->irq = irq; | ||
| 901 | |||
| 902 | ret = arch_setup_dmar_msi(irq); | ||
| 903 | if (ret) { | ||
| 904 | set_irq_data(irq, NULL); | ||
| 905 | iommu->irq = 0; | ||
| 906 | destroy_irq(irq); | ||
| 907 | return 0; | ||
| 908 | } | ||
| 909 | |||
| 910 | /* Force fault register is cleared */ | ||
| 911 | iommu_page_fault(irq, iommu); | ||
| 912 | |||
| 913 | ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu); | ||
| 914 | if (ret) | ||
| 915 | printk(KERN_ERR "IOMMU: can't request irq\n"); | ||
| 916 | return ret; | ||
| 917 | } | ||
| 918 | |||
| 919 | static int iommu_init_domains(struct intel_iommu *iommu) | ||
| 920 | { | ||
| 921 | unsigned long ndomains; | ||
| 922 | unsigned long nlongs; | ||
| 923 | |||
| 924 | ndomains = cap_ndoms(iommu->cap); | ||
| 925 | pr_debug("Number of Domains supportd <%ld>\n", ndomains); | ||
| 926 | nlongs = BITS_TO_LONGS(ndomains); | ||
| 927 | |||
| 928 | /* TBD: there might be 64K domains, | ||
| 929 | * consider other allocation for future chip | ||
| 930 | */ | ||
| 931 | iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); | ||
| 932 | if (!iommu->domain_ids) { | ||
| 933 | printk(KERN_ERR "Allocating domain id array failed\n"); | ||
| 934 | return -ENOMEM; | ||
| 935 | } | ||
| 936 | iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), | ||
| 937 | GFP_KERNEL); | ||
| 938 | if (!iommu->domains) { | ||
| 939 | printk(KERN_ERR "Allocating domain array failed\n"); | ||
| 940 | kfree(iommu->domain_ids); | ||
| 941 | return -ENOMEM; | ||
| 942 | } | ||
| 943 | |||
| 944 | /* | ||
| 945 | * if Caching mode is set, then invalid translations are tagged | ||
| 946 | * with domainid 0. Hence we need to pre-allocate it. | ||
| 947 | */ | ||
| 948 | if (cap_caching_mode(iommu->cap)) | ||
| 949 | set_bit(0, iommu->domain_ids); | ||
| 950 | return 0; | ||
| 951 | } | ||
| 952 | |||
| 953 | static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) | ||
| 954 | { | ||
| 955 | struct intel_iommu *iommu; | ||
| 956 | int ret; | ||
| 957 | int map_size; | ||
| 958 | u32 ver; | ||
| 959 | |||
| 960 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); | ||
| 961 | if (!iommu) | ||
| 962 | return NULL; | ||
| 963 | iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); | ||
| 964 | if (!iommu->reg) { | ||
| 965 | printk(KERN_ERR "IOMMU: can't map the region\n"); | ||
| 966 | goto error; | ||
| 967 | } | ||
| 968 | iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); | ||
| 969 | iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); | ||
| 970 | |||
| 971 | /* the registers might be more than one page */ | ||
| 972 | map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), | ||
| 973 | cap_max_fault_reg_offset(iommu->cap)); | ||
| 974 | map_size = PAGE_ALIGN_4K(map_size); | ||
| 975 | if (map_size > PAGE_SIZE_4K) { | ||
| 976 | iounmap(iommu->reg); | ||
| 977 | iommu->reg = ioremap(drhd->reg_base_addr, map_size); | ||
| 978 | if (!iommu->reg) { | ||
| 979 | printk(KERN_ERR "IOMMU: can't map the region\n"); | ||
| 980 | goto error; | ||
| 981 | } | ||
| 982 | } | ||
| 983 | |||
| 984 | ver = readl(iommu->reg + DMAR_VER_REG); | ||
| 985 | pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", | ||
| 986 | drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), | ||
| 987 | iommu->cap, iommu->ecap); | ||
| 988 | ret = iommu_init_domains(iommu); | ||
| 989 | if (ret) | ||
| 990 | goto error_unmap; | ||
| 991 | spin_lock_init(&iommu->lock); | ||
| 992 | spin_lock_init(&iommu->register_lock); | ||
| 993 | |||
| 994 | drhd->iommu = iommu; | ||
| 995 | return iommu; | ||
| 996 | error_unmap: | ||
| 997 | iounmap(iommu->reg); | ||
| 998 | iommu->reg = 0; | ||
| 999 | error: | ||
| 1000 | kfree(iommu); | ||
| 1001 | return NULL; | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | static void domain_exit(struct dmar_domain *domain); | ||
| 1005 | static void free_iommu(struct intel_iommu *iommu) | ||
| 1006 | { | ||
| 1007 | struct dmar_domain *domain; | ||
| 1008 | int i; | ||
| 1009 | |||
| 1010 | if (!iommu) | ||
| 1011 | return; | ||
| 1012 | |||
| 1013 | i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); | ||
| 1014 | for (; i < cap_ndoms(iommu->cap); ) { | ||
| 1015 | domain = iommu->domains[i]; | ||
| 1016 | clear_bit(i, iommu->domain_ids); | ||
| 1017 | domain_exit(domain); | ||
| 1018 | i = find_next_bit(iommu->domain_ids, | ||
| 1019 | cap_ndoms(iommu->cap), i+1); | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | if (iommu->gcmd & DMA_GCMD_TE) | ||
| 1023 | iommu_disable_translation(iommu); | ||
| 1024 | |||
| 1025 | if (iommu->irq) { | ||
| 1026 | set_irq_data(iommu->irq, NULL); | ||
| 1027 | /* This will mask the irq */ | ||
| 1028 | free_irq(iommu->irq, iommu); | ||
| 1029 | destroy_irq(iommu->irq); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | kfree(iommu->domains); | ||
| 1033 | kfree(iommu->domain_ids); | ||
| 1034 | |||
| 1035 | /* free context mapping */ | ||
| 1036 | free_context_table(iommu); | ||
| 1037 | |||
| 1038 | if (iommu->reg) | ||
| 1039 | iounmap(iommu->reg); | ||
| 1040 | kfree(iommu); | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) | ||
| 1044 | { | ||
| 1045 | unsigned long num; | ||
| 1046 | unsigned long ndomains; | ||
| 1047 | struct dmar_domain *domain; | ||
| 1048 | unsigned long flags; | ||
| 1049 | |||
| 1050 | domain = alloc_domain_mem(); | ||
| 1051 | if (!domain) | ||
| 1052 | return NULL; | ||
| 1053 | |||
| 1054 | ndomains = cap_ndoms(iommu->cap); | ||
| 1055 | |||
| 1056 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 1057 | num = find_first_zero_bit(iommu->domain_ids, ndomains); | ||
| 1058 | if (num >= ndomains) { | ||
| 1059 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 1060 | free_domain_mem(domain); | ||
| 1061 | printk(KERN_ERR "IOMMU: no free domain ids\n"); | ||
| 1062 | return NULL; | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | set_bit(num, iommu->domain_ids); | ||
| 1066 | domain->id = num; | ||
| 1067 | domain->iommu = iommu; | ||
| 1068 | iommu->domains[num] = domain; | ||
| 1069 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 1070 | |||
| 1071 | return domain; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | static void iommu_free_domain(struct dmar_domain *domain) | ||
| 1075 | { | ||
| 1076 | unsigned long flags; | ||
| 1077 | |||
| 1078 | spin_lock_irqsave(&domain->iommu->lock, flags); | ||
| 1079 | clear_bit(domain->id, domain->iommu->domain_ids); | ||
| 1080 | spin_unlock_irqrestore(&domain->iommu->lock, flags); | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | static struct iova_domain reserved_iova_list; | ||
| 1084 | |||
| 1085 | static void dmar_init_reserved_ranges(void) | ||
| 1086 | { | ||
| 1087 | struct pci_dev *pdev = NULL; | ||
| 1088 | struct iova *iova; | ||
| 1089 | int i; | ||
| 1090 | u64 addr, size; | ||
| 1091 | |||
| 1092 | init_iova_domain(&reserved_iova_list); | ||
| 1093 | |||
| 1094 | /* IOAPIC ranges shouldn't be accessed by DMA */ | ||
| 1095 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), | ||
| 1096 | IOVA_PFN(IOAPIC_RANGE_END)); | ||
| 1097 | if (!iova) | ||
| 1098 | printk(KERN_ERR "Reserve IOAPIC range failed\n"); | ||
| 1099 | |||
| 1100 | /* Reserve all PCI MMIO to avoid peer-to-peer access */ | ||
| 1101 | for_each_pci_dev(pdev) { | ||
| 1102 | struct resource *r; | ||
| 1103 | |||
| 1104 | for (i = 0; i < PCI_NUM_RESOURCES; i++) { | ||
| 1105 | r = &pdev->resource[i]; | ||
| 1106 | if (!r->flags || !(r->flags & IORESOURCE_MEM)) | ||
| 1107 | continue; | ||
| 1108 | addr = r->start; | ||
| 1109 | addr &= PAGE_MASK_4K; | ||
| 1110 | size = r->end - addr; | ||
| 1111 | size = PAGE_ALIGN_4K(size); | ||
| 1112 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), | ||
| 1113 | IOVA_PFN(size + addr) - 1); | ||
| 1114 | if (!iova) | ||
| 1115 | printk(KERN_ERR "Reserve iova failed\n"); | ||
| 1116 | } | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | } | ||
| 1120 | |||
| 1121 | static void domain_reserve_special_ranges(struct dmar_domain *domain) | ||
| 1122 | { | ||
| 1123 | copy_reserved_iova(&reserved_iova_list, &domain->iovad); | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | static inline int guestwidth_to_adjustwidth(int gaw) | ||
| 1127 | { | ||
| 1128 | int agaw; | ||
| 1129 | int r = (gaw - 12) % 9; | ||
| 1130 | |||
| 1131 | if (r == 0) | ||
| 1132 | agaw = gaw; | ||
| 1133 | else | ||
| 1134 | agaw = gaw + 9 - r; | ||
| 1135 | if (agaw > 64) | ||
| 1136 | agaw = 64; | ||
| 1137 | return agaw; | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | static int domain_init(struct dmar_domain *domain, int guest_width) | ||
| 1141 | { | ||
| 1142 | struct intel_iommu *iommu; | ||
| 1143 | int adjust_width, agaw; | ||
| 1144 | unsigned long sagaw; | ||
| 1145 | |||
| 1146 | init_iova_domain(&domain->iovad); | ||
| 1147 | spin_lock_init(&domain->mapping_lock); | ||
| 1148 | |||
| 1149 | domain_reserve_special_ranges(domain); | ||
| 1150 | |||
| 1151 | /* calculate AGAW */ | ||
| 1152 | iommu = domain->iommu; | ||
| 1153 | if (guest_width > cap_mgaw(iommu->cap)) | ||
| 1154 | guest_width = cap_mgaw(iommu->cap); | ||
| 1155 | domain->gaw = guest_width; | ||
| 1156 | adjust_width = guestwidth_to_adjustwidth(guest_width); | ||
| 1157 | agaw = width_to_agaw(adjust_width); | ||
| 1158 | sagaw = cap_sagaw(iommu->cap); | ||
| 1159 | if (!test_bit(agaw, &sagaw)) { | ||
| 1160 | /* hardware doesn't support it, choose a bigger one */ | ||
| 1161 | pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); | ||
| 1162 | agaw = find_next_bit(&sagaw, 5, agaw); | ||
| 1163 | if (agaw >= 5) | ||
| 1164 | return -ENODEV; | ||
| 1165 | } | ||
| 1166 | domain->agaw = agaw; | ||
| 1167 | INIT_LIST_HEAD(&domain->devices); | ||
| 1168 | |||
| 1169 | /* always allocate the top pgd */ | ||
| 1170 | domain->pgd = (struct dma_pte *)alloc_pgtable_page(); | ||
| 1171 | if (!domain->pgd) | ||
| 1172 | return -ENOMEM; | ||
| 1173 | __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K); | ||
| 1174 | return 0; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | static void domain_exit(struct dmar_domain *domain) | ||
| 1178 | { | ||
| 1179 | u64 end; | ||
| 1180 | |||
| 1181 | /* Domain 0 is reserved, so dont process it */ | ||
| 1182 | if (!domain) | ||
| 1183 | return; | ||
| 1184 | |||
| 1185 | domain_remove_dev_info(domain); | ||
| 1186 | /* destroy iovas */ | ||
| 1187 | put_iova_domain(&domain->iovad); | ||
| 1188 | end = DOMAIN_MAX_ADDR(domain->gaw); | ||
| 1189 | end = end & (~PAGE_MASK_4K); | ||
| 1190 | |||
| 1191 | /* clear ptes */ | ||
| 1192 | dma_pte_clear_range(domain, 0, end); | ||
| 1193 | |||
| 1194 | /* free page tables */ | ||
| 1195 | dma_pte_free_pagetable(domain, 0, end); | ||
| 1196 | |||
| 1197 | iommu_free_domain(domain); | ||
| 1198 | free_domain_mem(domain); | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | static int domain_context_mapping_one(struct dmar_domain *domain, | ||
| 1202 | u8 bus, u8 devfn) | ||
| 1203 | { | ||
| 1204 | struct context_entry *context; | ||
| 1205 | struct intel_iommu *iommu = domain->iommu; | ||
| 1206 | unsigned long flags; | ||
| 1207 | |||
| 1208 | pr_debug("Set context mapping for %02x:%02x.%d\n", | ||
| 1209 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); | ||
| 1210 | BUG_ON(!domain->pgd); | ||
| 1211 | context = device_to_context_entry(iommu, bus, devfn); | ||
| 1212 | if (!context) | ||
| 1213 | return -ENOMEM; | ||
| 1214 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 1215 | if (context_present(*context)) { | ||
| 1216 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 1217 | return 0; | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | context_set_domain_id(*context, domain->id); | ||
| 1221 | context_set_address_width(*context, domain->agaw); | ||
| 1222 | context_set_address_root(*context, virt_to_phys(domain->pgd)); | ||
| 1223 | context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); | ||
| 1224 | context_set_fault_enable(*context); | ||
| 1225 | context_set_present(*context); | ||
| 1226 | __iommu_flush_cache(iommu, context, sizeof(*context)); | ||
| 1227 | |||
| 1228 | /* it's a non-present to present mapping */ | ||
| 1229 | if (iommu_flush_context_device(iommu, domain->id, | ||
| 1230 | (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) | ||
| 1231 | iommu_flush_write_buffer(iommu); | ||
| 1232 | else | ||
| 1233 | iommu_flush_iotlb_dsi(iommu, 0, 0); | ||
| 1234 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 1235 | return 0; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | static int | ||
| 1239 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | ||
| 1240 | { | ||
| 1241 | int ret; | ||
| 1242 | struct pci_dev *tmp, *parent; | ||
| 1243 | |||
| 1244 | ret = domain_context_mapping_one(domain, pdev->bus->number, | ||
| 1245 | pdev->devfn); | ||
| 1246 | if (ret) | ||
| 1247 | return ret; | ||
| 1248 | |||
| 1249 | /* dependent device mapping */ | ||
| 1250 | tmp = pci_find_upstream_pcie_bridge(pdev); | ||
| 1251 | if (!tmp) | ||
| 1252 | return 0; | ||
| 1253 | /* Secondary interface's bus number and devfn 0 */ | ||
| 1254 | parent = pdev->bus->self; | ||
| 1255 | while (parent != tmp) { | ||
| 1256 | ret = domain_context_mapping_one(domain, parent->bus->number, | ||
| 1257 | parent->devfn); | ||
| 1258 | if (ret) | ||
| 1259 | return ret; | ||
| 1260 | parent = parent->bus->self; | ||
| 1261 | } | ||
| 1262 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ | ||
| 1263 | return domain_context_mapping_one(domain, | ||
| 1264 | tmp->subordinate->number, 0); | ||
| 1265 | else /* this is a legacy PCI bridge */ | ||
| 1266 | return domain_context_mapping_one(domain, | ||
| 1267 | tmp->bus->number, tmp->devfn); | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | static int domain_context_mapped(struct dmar_domain *domain, | ||
| 1271 | struct pci_dev *pdev) | ||
| 1272 | { | ||
| 1273 | int ret; | ||
| 1274 | struct pci_dev *tmp, *parent; | ||
| 1275 | |||
| 1276 | ret = device_context_mapped(domain->iommu, | ||
| 1277 | pdev->bus->number, pdev->devfn); | ||
| 1278 | if (!ret) | ||
| 1279 | return ret; | ||
| 1280 | /* dependent device mapping */ | ||
| 1281 | tmp = pci_find_upstream_pcie_bridge(pdev); | ||
| 1282 | if (!tmp) | ||
| 1283 | return ret; | ||
| 1284 | /* Secondary interface's bus number and devfn 0 */ | ||
| 1285 | parent = pdev->bus->self; | ||
| 1286 | while (parent != tmp) { | ||
| 1287 | ret = device_context_mapped(domain->iommu, parent->bus->number, | ||
| 1288 | parent->devfn); | ||
| 1289 | if (!ret) | ||
| 1290 | return ret; | ||
| 1291 | parent = parent->bus->self; | ||
| 1292 | } | ||
| 1293 | if (tmp->is_pcie) | ||
| 1294 | return device_context_mapped(domain->iommu, | ||
| 1295 | tmp->subordinate->number, 0); | ||
| 1296 | else | ||
| 1297 | return device_context_mapped(domain->iommu, | ||
| 1298 | tmp->bus->number, tmp->devfn); | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | static int | ||
| 1302 | domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, | ||
| 1303 | u64 hpa, size_t size, int prot) | ||
| 1304 | { | ||
| 1305 | u64 start_pfn, end_pfn; | ||
| 1306 | struct dma_pte *pte; | ||
| 1307 | int index; | ||
| 1308 | |||
| 1309 | if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) | ||
| 1310 | return -EINVAL; | ||
| 1311 | iova &= PAGE_MASK_4K; | ||
| 1312 | start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K; | ||
| 1313 | end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K; | ||
| 1314 | index = 0; | ||
| 1315 | while (start_pfn < end_pfn) { | ||
| 1316 | pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); | ||
| 1317 | if (!pte) | ||
| 1318 | return -ENOMEM; | ||
| 1319 | /* We don't need lock here, nobody else | ||
| 1320 | * touches the iova range | ||
| 1321 | */ | ||
| 1322 | BUG_ON(dma_pte_addr(*pte)); | ||
| 1323 | dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); | ||
| 1324 | dma_set_pte_prot(*pte, prot); | ||
| 1325 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
| 1326 | start_pfn++; | ||
| 1327 | index++; | ||
| 1328 | } | ||
| 1329 | return 0; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) | ||
| 1333 | { | ||
| 1334 | clear_context_table(domain->iommu, bus, devfn); | ||
| 1335 | iommu_flush_context_global(domain->iommu, 0); | ||
| 1336 | iommu_flush_iotlb_global(domain->iommu, 0); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | static void domain_remove_dev_info(struct dmar_domain *domain) | ||
| 1340 | { | ||
| 1341 | struct device_domain_info *info; | ||
| 1342 | unsigned long flags; | ||
| 1343 | |||
| 1344 | spin_lock_irqsave(&device_domain_lock, flags); | ||
| 1345 | while (!list_empty(&domain->devices)) { | ||
| 1346 | info = list_entry(domain->devices.next, | ||
| 1347 | struct device_domain_info, link); | ||
| 1348 | list_del(&info->link); | ||
| 1349 | list_del(&info->global); | ||
| 1350 | if (info->dev) | ||
| 1351 | info->dev->dev.archdata.iommu = NULL; | ||
| 1352 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
| 1353 | |||
| 1354 | detach_domain_for_dev(info->domain, info->bus, info->devfn); | ||
| 1355 | free_devinfo_mem(info); | ||
| 1356 | |||
| 1357 | spin_lock_irqsave(&device_domain_lock, flags); | ||
| 1358 | } | ||
| 1359 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | /* | ||
| 1363 | * find_domain | ||
| 1364 | * Note: we use struct pci_dev->dev.archdata.iommu stores the info | ||
| 1365 | */ | ||
| 1366 | struct dmar_domain * | ||
| 1367 | find_domain(struct pci_dev *pdev) | ||
| 1368 | { | ||
| 1369 | struct device_domain_info *info; | ||
| 1370 | |||
| 1371 | /* No lock here, assumes no domain exit in normal case */ | ||
| 1372 | info = pdev->dev.archdata.iommu; | ||
| 1373 | if (info) | ||
| 1374 | return info->domain; | ||
| 1375 | return NULL; | ||
| 1376 | } | ||
| 1377 | |||
| 1378 | static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, | ||
| 1379 | struct pci_dev *dev) | ||
| 1380 | { | ||
| 1381 | int index; | ||
| 1382 | |||
| 1383 | while (dev) { | ||
| 1384 | for (index = 0; index < cnt; index ++) | ||
| 1385 | if (dev == devices[index]) | ||
| 1386 | return 1; | ||
| 1387 | |||
| 1388 | /* Check our parent */ | ||
| 1389 | dev = dev->bus->self; | ||
| 1390 | } | ||
| 1391 | |||
| 1392 | return 0; | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | static struct dmar_drhd_unit * | ||
| 1396 | dmar_find_matched_drhd_unit(struct pci_dev *dev) | ||
| 1397 | { | ||
| 1398 | struct dmar_drhd_unit *drhd = NULL; | ||
| 1399 | |||
| 1400 | list_for_each_entry(drhd, &dmar_drhd_units, list) { | ||
| 1401 | if (drhd->include_all || dmar_pci_device_match(drhd->devices, | ||
| 1402 | drhd->devices_cnt, dev)) | ||
| 1403 | return drhd; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | return NULL; | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | /* domain is initialized */ | ||
| 1410 | static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) | ||
| 1411 | { | ||
| 1412 | struct dmar_domain *domain, *found = NULL; | ||
| 1413 | struct intel_iommu *iommu; | ||
| 1414 | struct dmar_drhd_unit *drhd; | ||
| 1415 | struct device_domain_info *info, *tmp; | ||
| 1416 | struct pci_dev *dev_tmp; | ||
| 1417 | unsigned long flags; | ||
| 1418 | int bus = 0, devfn = 0; | ||
| 1419 | |||
| 1420 | domain = find_domain(pdev); | ||
| 1421 | if (domain) | ||
| 1422 | return domain; | ||
| 1423 | |||
| 1424 | dev_tmp = pci_find_upstream_pcie_bridge(pdev); | ||
| 1425 | if (dev_tmp) { | ||
| 1426 | if (dev_tmp->is_pcie) { | ||
| 1427 | bus = dev_tmp->subordinate->number; | ||
| 1428 | devfn = 0; | ||
| 1429 | } else { | ||
| 1430 | bus = dev_tmp->bus->number; | ||
| 1431 | devfn = dev_tmp->devfn; | ||
| 1432 | } | ||
| 1433 | spin_lock_irqsave(&device_domain_lock, flags); | ||
| 1434 | list_for_each_entry(info, &device_domain_list, global) { | ||
| 1435 | if (info->bus == bus && info->devfn == devfn) { | ||
| 1436 | found = info->domain; | ||
| 1437 | break; | ||
| 1438 | } | ||
| 1439 | } | ||
| 1440 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
| 1441 | /* pcie-pci bridge already has a domain, uses it */ | ||
| 1442 | if (found) { | ||
| 1443 | domain = found; | ||
| 1444 | goto found_domain; | ||
| 1445 | } | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | /* Allocate new domain for the device */ | ||
| 1449 | drhd = dmar_find_matched_drhd_unit(pdev); | ||
| 1450 | if (!drhd) { | ||
| 1451 | printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", | ||
| 1452 | pci_name(pdev)); | ||
| 1453 | return NULL; | ||
| 1454 | } | ||
| 1455 | iommu = drhd->iommu; | ||
| 1456 | |||
| 1457 | domain = iommu_alloc_domain(iommu); | ||
| 1458 | if (!domain) | ||
| 1459 | goto error; | ||
| 1460 | |||
| 1461 | if (domain_init(domain, gaw)) { | ||
| 1462 | domain_exit(domain); | ||
| 1463 | goto error; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | /* register pcie-to-pci device */ | ||
| 1467 | if (dev_tmp) { | ||
| 1468 | info = alloc_devinfo_mem(); | ||
| 1469 | if (!info) { | ||
| 1470 | domain_exit(domain); | ||
| 1471 | goto error; | ||
| 1472 | } | ||
| 1473 | info->bus = bus; | ||
| 1474 | info->devfn = devfn; | ||
| 1475 | info->dev = NULL; | ||
| 1476 | info->domain = domain; | ||
| 1477 | /* This domain is shared by devices under p2p bridge */ | ||
| 1478 | domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; | ||
| 1479 | |||
| 1480 | /* pcie-to-pci bridge already has a domain, uses it */ | ||
| 1481 | found = NULL; | ||
| 1482 | spin_lock_irqsave(&device_domain_lock, flags); | ||
| 1483 | list_for_each_entry(tmp, &device_domain_list, global) { | ||
| 1484 | if (tmp->bus == bus && tmp->devfn == devfn) { | ||
| 1485 | found = tmp->domain; | ||
| 1486 | break; | ||
| 1487 | } | ||
| 1488 | } | ||
| 1489 | if (found) { | ||
| 1490 | free_devinfo_mem(info); | ||
| 1491 | domain_exit(domain); | ||
| 1492 | domain = found; | ||
| 1493 | } else { | ||
| 1494 | list_add(&info->link, &domain->devices); | ||
| 1495 | list_add(&info->global, &device_domain_list); | ||
| 1496 | } | ||
| 1497 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | found_domain: | ||
| 1501 | info = alloc_devinfo_mem(); | ||
| 1502 | if (!info) | ||
| 1503 | goto error; | ||
| 1504 | info->bus = pdev->bus->number; | ||
| 1505 | info->devfn = pdev->devfn; | ||
| 1506 | info->dev = pdev; | ||
| 1507 | info->domain = domain; | ||
| 1508 | spin_lock_irqsave(&device_domain_lock, flags); | ||
| 1509 | /* somebody is fast */ | ||
| 1510 | found = find_domain(pdev); | ||
| 1511 | if (found != NULL) { | ||
| 1512 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
| 1513 | if (found != domain) { | ||
| 1514 | domain_exit(domain); | ||
| 1515 | domain = found; | ||
| 1516 | } | ||
| 1517 | free_devinfo_mem(info); | ||
| 1518 | return domain; | ||
| 1519 | } | ||
| 1520 | list_add(&info->link, &domain->devices); | ||
| 1521 | list_add(&info->global, &device_domain_list); | ||
| 1522 | pdev->dev.archdata.iommu = info; | ||
| 1523 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
| 1524 | return domain; | ||
| 1525 | error: | ||
| 1526 | /* recheck it here, maybe others set it */ | ||
| 1527 | return find_domain(pdev); | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) | ||
| 1531 | { | ||
| 1532 | struct dmar_domain *domain; | ||
| 1533 | unsigned long size; | ||
| 1534 | u64 base; | ||
| 1535 | int ret; | ||
| 1536 | |||
| 1537 | printk(KERN_INFO | ||
| 1538 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", | ||
| 1539 | pci_name(pdev), start, end); | ||
| 1540 | /* page table init */ | ||
| 1541 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
| 1542 | if (!domain) | ||
| 1543 | return -ENOMEM; | ||
| 1544 | |||
| 1545 | /* The address might not be aligned */ | ||
| 1546 | base = start & PAGE_MASK_4K; | ||
| 1547 | size = end - base; | ||
| 1548 | size = PAGE_ALIGN_4K(size); | ||
| 1549 | if (!reserve_iova(&domain->iovad, IOVA_PFN(base), | ||
| 1550 | IOVA_PFN(base + size) - 1)) { | ||
| 1551 | printk(KERN_ERR "IOMMU: reserve iova failed\n"); | ||
| 1552 | ret = -ENOMEM; | ||
| 1553 | goto error; | ||
| 1554 | } | ||
| 1555 | |||
| 1556 | pr_debug("Mapping reserved region %lx@%llx for %s\n", | ||
| 1557 | size, base, pci_name(pdev)); | ||
| 1558 | /* | ||
| 1559 | * RMRR range might have overlap with physical memory range, | ||
| 1560 | * clear it first | ||
| 1561 | */ | ||
| 1562 | dma_pte_clear_range(domain, base, base + size); | ||
| 1563 | |||
| 1564 | ret = domain_page_mapping(domain, base, base, size, | ||
| 1565 | DMA_PTE_READ|DMA_PTE_WRITE); | ||
| 1566 | if (ret) | ||
| 1567 | goto error; | ||
| 1568 | |||
| 1569 | /* context entry init */ | ||
| 1570 | ret = domain_context_mapping(domain, pdev); | ||
| 1571 | if (!ret) | ||
| 1572 | return 0; | ||
| 1573 | error: | ||
| 1574 | domain_exit(domain); | ||
| 1575 | return ret; | ||
| 1576 | |||
| 1577 | } | ||
| 1578 | |||
| 1579 | static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | ||
| 1580 | struct pci_dev *pdev) | ||
| 1581 | { | ||
| 1582 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
| 1583 | return 0; | ||
| 1584 | return iommu_prepare_identity_map(pdev, rmrr->base_address, | ||
| 1585 | rmrr->end_address + 1); | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | #ifdef CONFIG_DMAR_GFX_WA | ||
| 1589 | extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); | ||
| 1590 | static void __init iommu_prepare_gfx_mapping(void) | ||
| 1591 | { | ||
| 1592 | struct pci_dev *pdev = NULL; | ||
| 1593 | u64 base, size; | ||
| 1594 | int slot; | ||
| 1595 | int ret; | ||
| 1596 | |||
| 1597 | for_each_pci_dev(pdev) { | ||
| 1598 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || | ||
| 1599 | !IS_GFX_DEVICE(pdev)) | ||
| 1600 | continue; | ||
| 1601 | printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", | ||
| 1602 | pci_name(pdev)); | ||
| 1603 | slot = arch_get_ram_range(0, &base, &size); | ||
| 1604 | while (slot >= 0) { | ||
| 1605 | ret = iommu_prepare_identity_map(pdev, | ||
| 1606 | base, base + size); | ||
| 1607 | if (ret) | ||
| 1608 | goto error; | ||
| 1609 | slot = arch_get_ram_range(slot, &base, &size); | ||
| 1610 | } | ||
| 1611 | continue; | ||
| 1612 | error: | ||
| 1613 | printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); | ||
| 1614 | } | ||
| 1615 | } | ||
| 1616 | #endif | ||
| 1617 | |||
| 1618 | #ifdef CONFIG_DMAR_FLOPPY_WA | ||
| 1619 | static inline void iommu_prepare_isa(void) | ||
| 1620 | { | ||
| 1621 | struct pci_dev *pdev; | ||
| 1622 | int ret; | ||
| 1623 | |||
| 1624 | pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); | ||
| 1625 | if (!pdev) | ||
| 1626 | return; | ||
| 1627 | |||
| 1628 | printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); | ||
| 1629 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); | ||
| 1630 | |||
| 1631 | if (ret) | ||
| 1632 | printk("IOMMU: Failed to create 0-64M identity map, " | ||
| 1633 | "floppy might not work\n"); | ||
| 1634 | |||
| 1635 | } | ||
| 1636 | #else | ||
| 1637 | static inline void iommu_prepare_isa(void) | ||
| 1638 | { | ||
| 1639 | return; | ||
| 1640 | } | ||
| 1641 | #endif /* !CONFIG_DMAR_FLPY_WA */ | ||
| 1642 | |||
| 1643 | int __init init_dmars(void) | ||
| 1644 | { | ||
| 1645 | struct dmar_drhd_unit *drhd; | ||
| 1646 | struct dmar_rmrr_unit *rmrr; | ||
| 1647 | struct pci_dev *pdev; | ||
| 1648 | struct intel_iommu *iommu; | ||
| 1649 | int ret, unit = 0; | ||
| 1650 | |||
| 1651 | /* | ||
| 1652 | * for each drhd | ||
| 1653 | * allocate root | ||
| 1654 | * initialize and program root entry to not present | ||
| 1655 | * endfor | ||
| 1656 | */ | ||
| 1657 | for_each_drhd_unit(drhd) { | ||
| 1658 | if (drhd->ignored) | ||
| 1659 | continue; | ||
| 1660 | iommu = alloc_iommu(drhd); | ||
| 1661 | if (!iommu) { | ||
| 1662 | ret = -ENOMEM; | ||
| 1663 | goto error; | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | /* | ||
| 1667 | * TBD: | ||
| 1668 | * we could share the same root & context tables | ||
| 1669 | * amoung all IOMMU's. Need to Split it later. | ||
| 1670 | */ | ||
| 1671 | ret = iommu_alloc_root_entry(iommu); | ||
| 1672 | if (ret) { | ||
| 1673 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); | ||
| 1674 | goto error; | ||
| 1675 | } | ||
| 1676 | } | ||
| 1677 | |||
| 1678 | /* | ||
| 1679 | * For each rmrr | ||
| 1680 | * for each dev attached to rmrr | ||
| 1681 | * do | ||
| 1682 | * locate drhd for dev, alloc domain for dev | ||
| 1683 | * allocate free domain | ||
| 1684 | * allocate page table entries for rmrr | ||
| 1685 | * if context not allocated for bus | ||
| 1686 | * allocate and init context | ||
| 1687 | * set present in root table for this bus | ||
| 1688 | * init context with domain, translation etc | ||
| 1689 | * endfor | ||
| 1690 | * endfor | ||
| 1691 | */ | ||
| 1692 | for_each_rmrr_units(rmrr) { | ||
| 1693 | int i; | ||
| 1694 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
| 1695 | pdev = rmrr->devices[i]; | ||
| 1696 | /* some BIOS lists non-exist devices in DMAR table */ | ||
| 1697 | if (!pdev) | ||
| 1698 | continue; | ||
| 1699 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
| 1700 | if (ret) | ||
| 1701 | printk(KERN_ERR | ||
| 1702 | "IOMMU: mapping reserved region failed\n"); | ||
| 1703 | } | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | iommu_prepare_gfx_mapping(); | ||
| 1707 | |||
| 1708 | iommu_prepare_isa(); | ||
| 1709 | |||
| 1710 | /* | ||
| 1711 | * for each drhd | ||
| 1712 | * enable fault log | ||
| 1713 | * global invalidate context cache | ||
| 1714 | * global invalidate iotlb | ||
| 1715 | * enable translation | ||
| 1716 | */ | ||
| 1717 | for_each_drhd_unit(drhd) { | ||
| 1718 | if (drhd->ignored) | ||
| 1719 | continue; | ||
| 1720 | iommu = drhd->iommu; | ||
| 1721 | sprintf (iommu->name, "dmar%d", unit++); | ||
| 1722 | |||
| 1723 | iommu_flush_write_buffer(iommu); | ||
| 1724 | |||
| 1725 | ret = dmar_set_interrupt(iommu); | ||
| 1726 | if (ret) | ||
| 1727 | goto error; | ||
| 1728 | |||
| 1729 | iommu_set_root_entry(iommu); | ||
| 1730 | |||
| 1731 | iommu_flush_context_global(iommu, 0); | ||
| 1732 | iommu_flush_iotlb_global(iommu, 0); | ||
| 1733 | |||
| 1734 | ret = iommu_enable_translation(iommu); | ||
| 1735 | if (ret) | ||
| 1736 | goto error; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | return 0; | ||
| 1740 | error: | ||
| 1741 | for_each_drhd_unit(drhd) { | ||
| 1742 | if (drhd->ignored) | ||
| 1743 | continue; | ||
| 1744 | iommu = drhd->iommu; | ||
| 1745 | free_iommu(iommu); | ||
| 1746 | } | ||
| 1747 | return ret; | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | static inline u64 aligned_size(u64 host_addr, size_t size) | ||
| 1751 | { | ||
| 1752 | u64 addr; | ||
| 1753 | addr = (host_addr & (~PAGE_MASK_4K)) + size; | ||
| 1754 | return PAGE_ALIGN_4K(addr); | ||
| 1755 | } | ||
| 1756 | |||
| 1757 | struct iova * | ||
| 1758 | iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) | ||
| 1759 | { | ||
| 1760 | struct iova *piova; | ||
| 1761 | |||
| 1762 | /* Make sure it's in range */ | ||
| 1763 | end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); | ||
| 1764 | if (!size || (IOVA_START_ADDR + size > end)) | ||
| 1765 | return NULL; | ||
| 1766 | |||
| 1767 | piova = alloc_iova(&domain->iovad, | ||
| 1768 | size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1); | ||
| 1769 | return piova; | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | static struct iova * | ||
| 1773 | __intel_alloc_iova(struct device *dev, struct dmar_domain *domain, | ||
| 1774 | size_t size) | ||
| 1775 | { | ||
| 1776 | struct pci_dev *pdev = to_pci_dev(dev); | ||
| 1777 | struct iova *iova = NULL; | ||
| 1778 | |||
| 1779 | if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) { | ||
| 1780 | iova = iommu_alloc_iova(domain, size, pdev->dma_mask); | ||
| 1781 | } else { | ||
| 1782 | /* | ||
| 1783 | * First try to allocate an io virtual address in | ||
| 1784 | * DMA_32BIT_MASK and if that fails then try allocating | ||
| 1785 | * from higer range | ||
| 1786 | */ | ||
| 1787 | iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK); | ||
| 1788 | if (!iova) | ||
| 1789 | iova = iommu_alloc_iova(domain, size, pdev->dma_mask); | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | if (!iova) { | ||
| 1793 | printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); | ||
| 1794 | return NULL; | ||
| 1795 | } | ||
| 1796 | |||
| 1797 | return iova; | ||
| 1798 | } | ||
| 1799 | |||
| 1800 | static struct dmar_domain * | ||
| 1801 | get_valid_domain_for_dev(struct pci_dev *pdev) | ||
| 1802 | { | ||
| 1803 | struct dmar_domain *domain; | ||
| 1804 | int ret; | ||
| 1805 | |||
| 1806 | domain = get_domain_for_dev(pdev, | ||
| 1807 | DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
| 1808 | if (!domain) { | ||
| 1809 | printk(KERN_ERR | ||
| 1810 | "Allocating domain for %s failed", pci_name(pdev)); | ||
| 1811 | return 0; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | /* make sure context mapping is ok */ | ||
| 1815 | if (unlikely(!domain_context_mapped(domain, pdev))) { | ||
| 1816 | ret = domain_context_mapping(domain, pdev); | ||
| 1817 | if (ret) { | ||
| 1818 | printk(KERN_ERR | ||
| 1819 | "Domain context map for %s failed", | ||
| 1820 | pci_name(pdev)); | ||
| 1821 | return 0; | ||
| 1822 | } | ||
| 1823 | } | ||
| 1824 | |||
| 1825 | return domain; | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | static dma_addr_t intel_map_single(struct device *hwdev, void *addr, | ||
| 1829 | size_t size, int dir) | ||
| 1830 | { | ||
| 1831 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
| 1832 | int ret; | ||
| 1833 | struct dmar_domain *domain; | ||
| 1834 | unsigned long start_addr; | ||
| 1835 | struct iova *iova; | ||
| 1836 | int prot = 0; | ||
| 1837 | |||
| 1838 | BUG_ON(dir == DMA_NONE); | ||
| 1839 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
| 1840 | return virt_to_bus(addr); | ||
| 1841 | |||
| 1842 | domain = get_valid_domain_for_dev(pdev); | ||
| 1843 | if (!domain) | ||
| 1844 | return 0; | ||
| 1845 | |||
| 1846 | addr = (void *)virt_to_phys(addr); | ||
| 1847 | size = aligned_size((u64)addr, size); | ||
| 1848 | |||
| 1849 | iova = __intel_alloc_iova(hwdev, domain, size); | ||
| 1850 | if (!iova) | ||
| 1851 | goto error; | ||
| 1852 | |||
| 1853 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
| 1854 | |||
| 1855 | /* | ||
| 1856 | * Check if DMAR supports zero-length reads on write only | ||
| 1857 | * mappings.. | ||
| 1858 | */ | ||
| 1859 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ | ||
| 1860 | !cap_zlr(domain->iommu->cap)) | ||
| 1861 | prot |= DMA_PTE_READ; | ||
| 1862 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) | ||
| 1863 | prot |= DMA_PTE_WRITE; | ||
| 1864 | /* | ||
| 1865 | * addr - (addr + size) might be partial page, we should map the whole | ||
| 1866 | * page. Note: if two part of one page are separately mapped, we | ||
| 1867 | * might have two guest_addr mapping to the same host addr, but this | ||
| 1868 | * is not a big problem | ||
| 1869 | */ | ||
| 1870 | ret = domain_page_mapping(domain, start_addr, | ||
| 1871 | ((u64)addr) & PAGE_MASK_4K, size, prot); | ||
| 1872 | if (ret) | ||
| 1873 | goto error; | ||
| 1874 | |||
| 1875 | pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", | ||
| 1876 | pci_name(pdev), size, (u64)addr, | ||
| 1877 | size, (u64)start_addr, dir); | ||
| 1878 | |||
| 1879 | /* it's a non-present to present mapping */ | ||
| 1880 | ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, | ||
| 1881 | start_addr, size >> PAGE_SHIFT_4K, 1); | ||
| 1882 | if (ret) | ||
| 1883 | iommu_flush_write_buffer(domain->iommu); | ||
| 1884 | |||
| 1885 | return (start_addr + ((u64)addr & (~PAGE_MASK_4K))); | ||
| 1886 | |||
| 1887 | error: | ||
| 1888 | if (iova) | ||
| 1889 | __free_iova(&domain->iovad, iova); | ||
| 1890 | printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", | ||
| 1891 | pci_name(pdev), size, (u64)addr, dir); | ||
| 1892 | return 0; | ||
| 1893 | } | ||
| 1894 | |||
| 1895 | static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | ||
| 1896 | size_t size, int dir) | ||
| 1897 | { | ||
| 1898 | struct pci_dev *pdev = to_pci_dev(dev); | ||
| 1899 | struct dmar_domain *domain; | ||
| 1900 | unsigned long start_addr; | ||
| 1901 | struct iova *iova; | ||
| 1902 | |||
| 1903 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
| 1904 | return; | ||
| 1905 | domain = find_domain(pdev); | ||
| 1906 | BUG_ON(!domain); | ||
| 1907 | |||
| 1908 | iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); | ||
| 1909 | if (!iova) | ||
| 1910 | return; | ||
| 1911 | |||
| 1912 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
| 1913 | size = aligned_size((u64)dev_addr, size); | ||
| 1914 | |||
| 1915 | pr_debug("Device %s unmapping: %lx@%llx\n", | ||
| 1916 | pci_name(pdev), size, (u64)start_addr); | ||
| 1917 | |||
| 1918 | /* clear the whole page */ | ||
| 1919 | dma_pte_clear_range(domain, start_addr, start_addr + size); | ||
| 1920 | /* free page tables */ | ||
| 1921 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | ||
| 1922 | |||
| 1923 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, | ||
| 1924 | size >> PAGE_SHIFT_4K, 0)) | ||
| 1925 | iommu_flush_write_buffer(domain->iommu); | ||
| 1926 | |||
| 1927 | /* free iova */ | ||
| 1928 | __free_iova(&domain->iovad, iova); | ||
| 1929 | } | ||
| 1930 | |||
| 1931 | static void * intel_alloc_coherent(struct device *hwdev, size_t size, | ||
| 1932 | dma_addr_t *dma_handle, gfp_t flags) | ||
| 1933 | { | ||
| 1934 | void *vaddr; | ||
| 1935 | int order; | ||
| 1936 | |||
| 1937 | size = PAGE_ALIGN_4K(size); | ||
| 1938 | order = get_order(size); | ||
| 1939 | flags &= ~(GFP_DMA | GFP_DMA32); | ||
| 1940 | |||
| 1941 | vaddr = (void *)__get_free_pages(flags, order); | ||
| 1942 | if (!vaddr) | ||
| 1943 | return NULL; | ||
| 1944 | memset(vaddr, 0, size); | ||
| 1945 | |||
| 1946 | *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL); | ||
| 1947 | if (*dma_handle) | ||
| 1948 | return vaddr; | ||
| 1949 | free_pages((unsigned long)vaddr, order); | ||
| 1950 | return NULL; | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | static void intel_free_coherent(struct device *hwdev, size_t size, | ||
| 1954 | void *vaddr, dma_addr_t dma_handle) | ||
| 1955 | { | ||
| 1956 | int order; | ||
| 1957 | |||
| 1958 | size = PAGE_ALIGN_4K(size); | ||
| 1959 | order = get_order(size); | ||
| 1960 | |||
| 1961 | intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); | ||
| 1962 | free_pages((unsigned long)vaddr, order); | ||
| 1963 | } | ||
| 1964 | |||
| 1965 | #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) | ||
| 1966 | static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, | ||
| 1967 | int nelems, int dir) | ||
| 1968 | { | ||
| 1969 | int i; | ||
| 1970 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
| 1971 | struct dmar_domain *domain; | ||
| 1972 | unsigned long start_addr; | ||
| 1973 | struct iova *iova; | ||
| 1974 | size_t size = 0; | ||
| 1975 | void *addr; | ||
| 1976 | struct scatterlist *sg; | ||
| 1977 | |||
| 1978 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
| 1979 | return; | ||
| 1980 | |||
| 1981 | domain = find_domain(pdev); | ||
| 1982 | |||
| 1983 | iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); | ||
| 1984 | if (!iova) | ||
| 1985 | return; | ||
| 1986 | for_each_sg(sglist, sg, nelems, i) { | ||
| 1987 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
| 1988 | size += aligned_size((u64)addr, sg->length); | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
| 1992 | |||
| 1993 | /* clear the whole page */ | ||
| 1994 | dma_pte_clear_range(domain, start_addr, start_addr + size); | ||
| 1995 | /* free page tables */ | ||
| 1996 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | ||
| 1997 | |||
| 1998 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, | ||
| 1999 | size >> PAGE_SHIFT_4K, 0)) | ||
| 2000 | iommu_flush_write_buffer(domain->iommu); | ||
| 2001 | |||
| 2002 | /* free iova */ | ||
| 2003 | __free_iova(&domain->iovad, iova); | ||
| 2004 | } | ||
| 2005 | |||
| 2006 | static int intel_nontranslate_map_sg(struct device *hddev, | ||
| 2007 | struct scatterlist *sglist, int nelems, int dir) | ||
| 2008 | { | ||
| 2009 | int i; | ||
| 2010 | struct scatterlist *sg; | ||
| 2011 | |||
| 2012 | for_each_sg(sglist, sg, nelems, i) { | ||
| 2013 | BUG_ON(!sg->page); | ||
| 2014 | sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)); | ||
| 2015 | sg->dma_length = sg->length; | ||
| 2016 | } | ||
| 2017 | return nelems; | ||
| 2018 | } | ||
| 2019 | |||
| 2020 | static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, | ||
| 2021 | int nelems, int dir) | ||
| 2022 | { | ||
| 2023 | void *addr; | ||
| 2024 | int i; | ||
| 2025 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
| 2026 | struct dmar_domain *domain; | ||
| 2027 | size_t size = 0; | ||
| 2028 | int prot = 0; | ||
| 2029 | size_t offset = 0; | ||
| 2030 | struct iova *iova = NULL; | ||
| 2031 | int ret; | ||
| 2032 | struct scatterlist *sg; | ||
| 2033 | unsigned long start_addr; | ||
| 2034 | |||
| 2035 | BUG_ON(dir == DMA_NONE); | ||
| 2036 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
| 2037 | return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); | ||
| 2038 | |||
| 2039 | domain = get_valid_domain_for_dev(pdev); | ||
| 2040 | if (!domain) | ||
| 2041 | return 0; | ||
| 2042 | |||
| 2043 | for_each_sg(sglist, sg, nelems, i) { | ||
| 2044 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
| 2045 | addr = (void *)virt_to_phys(addr); | ||
| 2046 | size += aligned_size((u64)addr, sg->length); | ||
| 2047 | } | ||
| 2048 | |||
| 2049 | iova = __intel_alloc_iova(hwdev, domain, size); | ||
| 2050 | if (!iova) { | ||
| 2051 | sglist->dma_length = 0; | ||
| 2052 | return 0; | ||
| 2053 | } | ||
| 2054 | |||
| 2055 | /* | ||
| 2056 | * Check if DMAR supports zero-length reads on write only | ||
| 2057 | * mappings.. | ||
| 2058 | */ | ||
| 2059 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ | ||
| 2060 | !cap_zlr(domain->iommu->cap)) | ||
| 2061 | prot |= DMA_PTE_READ; | ||
| 2062 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) | ||
| 2063 | prot |= DMA_PTE_WRITE; | ||
| 2064 | |||
| 2065 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
| 2066 | offset = 0; | ||
| 2067 | for_each_sg(sglist, sg, nelems, i) { | ||
| 2068 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
| 2069 | addr = (void *)virt_to_phys(addr); | ||
| 2070 | size = aligned_size((u64)addr, sg->length); | ||
| 2071 | ret = domain_page_mapping(domain, start_addr + offset, | ||
| 2072 | ((u64)addr) & PAGE_MASK_4K, | ||
| 2073 | size, prot); | ||
| 2074 | if (ret) { | ||
| 2075 | /* clear the page */ | ||
| 2076 | dma_pte_clear_range(domain, start_addr, | ||
| 2077 | start_addr + offset); | ||
| 2078 | /* free page tables */ | ||
| 2079 | dma_pte_free_pagetable(domain, start_addr, | ||
| 2080 | start_addr + offset); | ||
| 2081 | /* free iova */ | ||
| 2082 | __free_iova(&domain->iovad, iova); | ||
| 2083 | return 0; | ||
| 2084 | } | ||
| 2085 | sg->dma_address = start_addr + offset + | ||
| 2086 | ((u64)addr & (~PAGE_MASK_4K)); | ||
| 2087 | sg->dma_length = sg->length; | ||
| 2088 | offset += size; | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | /* it's a non-present to present mapping */ | ||
| 2092 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, | ||
| 2093 | start_addr, offset >> PAGE_SHIFT_4K, 1)) | ||
| 2094 | iommu_flush_write_buffer(domain->iommu); | ||
| 2095 | return nelems; | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | static struct dma_mapping_ops intel_dma_ops = { | ||
| 2099 | .alloc_coherent = intel_alloc_coherent, | ||
| 2100 | .free_coherent = intel_free_coherent, | ||
| 2101 | .map_single = intel_map_single, | ||
| 2102 | .unmap_single = intel_unmap_single, | ||
| 2103 | .map_sg = intel_map_sg, | ||
| 2104 | .unmap_sg = intel_unmap_sg, | ||
| 2105 | }; | ||
| 2106 | |||
| 2107 | static inline int iommu_domain_cache_init(void) | ||
| 2108 | { | ||
| 2109 | int ret = 0; | ||
| 2110 | |||
| 2111 | iommu_domain_cache = kmem_cache_create("iommu_domain", | ||
| 2112 | sizeof(struct dmar_domain), | ||
| 2113 | 0, | ||
| 2114 | SLAB_HWCACHE_ALIGN, | ||
| 2115 | |||
| 2116 | NULL); | ||
| 2117 | if (!iommu_domain_cache) { | ||
| 2118 | printk(KERN_ERR "Couldn't create iommu_domain cache\n"); | ||
| 2119 | ret = -ENOMEM; | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | return ret; | ||
| 2123 | } | ||
| 2124 | |||
| 2125 | static inline int iommu_devinfo_cache_init(void) | ||
| 2126 | { | ||
| 2127 | int ret = 0; | ||
| 2128 | |||
| 2129 | iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", | ||
| 2130 | sizeof(struct device_domain_info), | ||
| 2131 | 0, | ||
| 2132 | SLAB_HWCACHE_ALIGN, | ||
| 2133 | |||
| 2134 | NULL); | ||
| 2135 | if (!iommu_devinfo_cache) { | ||
| 2136 | printk(KERN_ERR "Couldn't create devinfo cache\n"); | ||
| 2137 | ret = -ENOMEM; | ||
| 2138 | } | ||
| 2139 | |||
| 2140 | return ret; | ||
| 2141 | } | ||
| 2142 | |||
| 2143 | static inline int iommu_iova_cache_init(void) | ||
| 2144 | { | ||
| 2145 | int ret = 0; | ||
| 2146 | |||
| 2147 | iommu_iova_cache = kmem_cache_create("iommu_iova", | ||
| 2148 | sizeof(struct iova), | ||
| 2149 | 0, | ||
| 2150 | SLAB_HWCACHE_ALIGN, | ||
| 2151 | |||
| 2152 | NULL); | ||
| 2153 | if (!iommu_iova_cache) { | ||
| 2154 | printk(KERN_ERR "Couldn't create iova cache\n"); | ||
| 2155 | ret = -ENOMEM; | ||
| 2156 | } | ||
| 2157 | |||
| 2158 | return ret; | ||
| 2159 | } | ||
| 2160 | |||
| 2161 | static int __init iommu_init_mempool(void) | ||
| 2162 | { | ||
| 2163 | int ret; | ||
| 2164 | ret = iommu_iova_cache_init(); | ||
| 2165 | if (ret) | ||
| 2166 | return ret; | ||
| 2167 | |||
| 2168 | ret = iommu_domain_cache_init(); | ||
| 2169 | if (ret) | ||
| 2170 | goto domain_error; | ||
| 2171 | |||
| 2172 | ret = iommu_devinfo_cache_init(); | ||
| 2173 | if (!ret) | ||
| 2174 | return ret; | ||
| 2175 | |||
| 2176 | kmem_cache_destroy(iommu_domain_cache); | ||
| 2177 | domain_error: | ||
| 2178 | kmem_cache_destroy(iommu_iova_cache); | ||
| 2179 | |||
| 2180 | return -ENOMEM; | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | static void __init iommu_exit_mempool(void) | ||
| 2184 | { | ||
| 2185 | kmem_cache_destroy(iommu_devinfo_cache); | ||
| 2186 | kmem_cache_destroy(iommu_domain_cache); | ||
| 2187 | kmem_cache_destroy(iommu_iova_cache); | ||
| 2188 | |||
| 2189 | } | ||
| 2190 | |||
| 2191 | void __init detect_intel_iommu(void) | ||
| 2192 | { | ||
| 2193 | if (swiotlb || no_iommu || iommu_detected || dmar_disabled) | ||
| 2194 | return; | ||
| 2195 | if (early_dmar_detect()) { | ||
| 2196 | iommu_detected = 1; | ||
| 2197 | } | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | static void __init init_no_remapping_devices(void) | ||
| 2201 | { | ||
| 2202 | struct dmar_drhd_unit *drhd; | ||
| 2203 | |||
| 2204 | for_each_drhd_unit(drhd) { | ||
| 2205 | if (!drhd->include_all) { | ||
| 2206 | int i; | ||
| 2207 | for (i = 0; i < drhd->devices_cnt; i++) | ||
| 2208 | if (drhd->devices[i] != NULL) | ||
| 2209 | break; | ||
| 2210 | /* ignore DMAR unit if no pci devices exist */ | ||
| 2211 | if (i == drhd->devices_cnt) | ||
| 2212 | drhd->ignored = 1; | ||
| 2213 | } | ||
| 2214 | } | ||
| 2215 | |||
| 2216 | if (dmar_map_gfx) | ||
| 2217 | return; | ||
| 2218 | |||
| 2219 | for_each_drhd_unit(drhd) { | ||
| 2220 | int i; | ||
| 2221 | if (drhd->ignored || drhd->include_all) | ||
| 2222 | continue; | ||
| 2223 | |||
| 2224 | for (i = 0; i < drhd->devices_cnt; i++) | ||
| 2225 | if (drhd->devices[i] && | ||
| 2226 | !IS_GFX_DEVICE(drhd->devices[i])) | ||
| 2227 | break; | ||
| 2228 | |||
| 2229 | if (i < drhd->devices_cnt) | ||
| 2230 | continue; | ||
| 2231 | |||
| 2232 | /* bypass IOMMU if it is just for gfx devices */ | ||
| 2233 | drhd->ignored = 1; | ||
| 2234 | for (i = 0; i < drhd->devices_cnt; i++) { | ||
| 2235 | if (!drhd->devices[i]) | ||
| 2236 | continue; | ||
| 2237 | drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; | ||
| 2238 | } | ||
| 2239 | } | ||
| 2240 | } | ||
| 2241 | |||
| 2242 | int __init intel_iommu_init(void) | ||
| 2243 | { | ||
| 2244 | int ret = 0; | ||
| 2245 | |||
| 2246 | if (no_iommu || swiotlb || dmar_disabled) | ||
| 2247 | return -ENODEV; | ||
| 2248 | |||
| 2249 | if (dmar_table_init()) | ||
| 2250 | return -ENODEV; | ||
| 2251 | |||
| 2252 | iommu_init_mempool(); | ||
| 2253 | dmar_init_reserved_ranges(); | ||
| 2254 | |||
| 2255 | init_no_remapping_devices(); | ||
| 2256 | |||
| 2257 | ret = init_dmars(); | ||
| 2258 | if (ret) { | ||
| 2259 | printk(KERN_ERR "IOMMU: dmar init failed\n"); | ||
| 2260 | put_iova_domain(&reserved_iova_list); | ||
| 2261 | iommu_exit_mempool(); | ||
| 2262 | return ret; | ||
| 2263 | } | ||
| 2264 | printk(KERN_INFO | ||
| 2265 | "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); | ||
| 2266 | |||
| 2267 | force_iommu = 1; | ||
| 2268 | dma_ops = &intel_dma_ops; | ||
| 2269 | return 0; | ||
| 2270 | } | ||
| 2271 | |||
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h new file mode 100644 index 000000000000..ee88dd2400cb --- /dev/null +++ b/drivers/pci/intel-iommu.h | |||
| @@ -0,0 +1,325 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License along with | ||
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 16 | * | ||
| 17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
| 18 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
| 19 | */ | ||
| 20 | |||
| 21 | #ifndef _INTEL_IOMMU_H_ | ||
| 22 | #define _INTEL_IOMMU_H_ | ||
| 23 | |||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/msi.h> | ||
| 26 | #include "iova.h" | ||
| 27 | #include <linux/io.h> | ||
| 28 | |||
| 29 | /* | ||
| 30 | * Intel IOMMU register specification per version 1.0 public spec. | ||
| 31 | */ | ||
| 32 | |||
| 33 | #define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ | ||
| 34 | #define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ | ||
| 35 | #define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ | ||
| 36 | #define DMAR_GCMD_REG 0x18 /* Global command register */ | ||
| 37 | #define DMAR_GSTS_REG 0x1c /* Global status register */ | ||
| 38 | #define DMAR_RTADDR_REG 0x20 /* Root entry table */ | ||
| 39 | #define DMAR_CCMD_REG 0x28 /* Context command reg */ | ||
| 40 | #define DMAR_FSTS_REG 0x34 /* Fault Status register */ | ||
| 41 | #define DMAR_FECTL_REG 0x38 /* Fault control register */ | ||
| 42 | #define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ | ||
| 43 | #define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ | ||
| 44 | #define DMAR_FEUADDR_REG 0x44 /* Upper address register */ | ||
| 45 | #define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ | ||
| 46 | #define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ | ||
| 47 | #define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ | ||
| 48 | #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ | ||
| 49 | #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ | ||
| 50 | #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ | ||
| 51 | |||
| 52 | #define OFFSET_STRIDE (9) | ||
| 53 | /* | ||
| 54 | #define dmar_readl(dmar, reg) readl(dmar + reg) | ||
| 55 | #define dmar_readq(dmar, reg) ({ \ | ||
| 56 | u32 lo, hi; \ | ||
| 57 | lo = readl(dmar + reg); \ | ||
| 58 | hi = readl(dmar + reg + 4); \ | ||
| 59 | (((u64) hi) << 32) + lo; }) | ||
| 60 | */ | ||
| 61 | static inline u64 dmar_readq(void *addr) | ||
| 62 | { | ||
| 63 | u32 lo, hi; | ||
| 64 | lo = readl(addr); | ||
| 65 | hi = readl(addr + 4); | ||
| 66 | return (((u64) hi) << 32) + lo; | ||
| 67 | } | ||
| 68 | |||
| 69 | static inline void dmar_writeq(void __iomem *addr, u64 val) | ||
| 70 | { | ||
| 71 | writel((u32)val, addr); | ||
| 72 | writel((u32)(val >> 32), addr + 4); | ||
| 73 | } | ||
| 74 | |||
| 75 | #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) | ||
| 76 | #define DMAR_VER_MINOR(v) ((v) & 0x0f) | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Decoding Capability Register | ||
| 80 | */ | ||
| 81 | #define cap_read_drain(c) (((c) >> 55) & 1) | ||
| 82 | #define cap_write_drain(c) (((c) >> 54) & 1) | ||
| 83 | #define cap_max_amask_val(c) (((c) >> 48) & 0x3f) | ||
| 84 | #define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) | ||
| 85 | #define cap_pgsel_inv(c) (((c) >> 39) & 1) | ||
| 86 | |||
| 87 | #define cap_super_page_val(c) (((c) >> 34) & 0xf) | ||
| 88 | #define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ | ||
| 89 | * OFFSET_STRIDE) + 21) | ||
| 90 | |||
| 91 | #define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) | ||
| 92 | #define cap_max_fault_reg_offset(c) \ | ||
| 93 | (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) | ||
| 94 | |||
| 95 | #define cap_zlr(c) (((c) >> 22) & 1) | ||
| 96 | #define cap_isoch(c) (((c) >> 23) & 1) | ||
| 97 | #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) | ||
| 98 | #define cap_sagaw(c) (((c) >> 8) & 0x1f) | ||
| 99 | #define cap_caching_mode(c) (((c) >> 7) & 1) | ||
| 100 | #define cap_phmr(c) (((c) >> 6) & 1) | ||
| 101 | #define cap_plmr(c) (((c) >> 5) & 1) | ||
| 102 | #define cap_rwbf(c) (((c) >> 4) & 1) | ||
| 103 | #define cap_afl(c) (((c) >> 3) & 1) | ||
| 104 | #define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) | ||
| 105 | /* | ||
| 106 | * Extended Capability Register | ||
| 107 | */ | ||
| 108 | |||
| 109 | #define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) | ||
| 110 | #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) | ||
| 111 | #define ecap_max_iotlb_offset(e) \ | ||
| 112 | (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) | ||
| 113 | #define ecap_coherent(e) ((e) & 0x1) | ||
| 114 | |||
| 115 | |||
| 116 | /* IOTLB_REG */ | ||
| 117 | #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) | ||
| 118 | #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) | ||
| 119 | #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) | ||
| 120 | #define DMA_TLB_IIRG(type) ((type >> 60) & 7) | ||
| 121 | #define DMA_TLB_IAIG(val) (((val) >> 57) & 7) | ||
| 122 | #define DMA_TLB_READ_DRAIN (((u64)1) << 49) | ||
| 123 | #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) | ||
| 124 | #define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) | ||
| 125 | #define DMA_TLB_IVT (((u64)1) << 63) | ||
| 126 | #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) | ||
| 127 | #define DMA_TLB_MAX_SIZE (0x3f) | ||
| 128 | |||
| 129 | /* GCMD_REG */ | ||
| 130 | #define DMA_GCMD_TE (((u32)1) << 31) | ||
| 131 | #define DMA_GCMD_SRTP (((u32)1) << 30) | ||
| 132 | #define DMA_GCMD_SFL (((u32)1) << 29) | ||
| 133 | #define DMA_GCMD_EAFL (((u32)1) << 28) | ||
| 134 | #define DMA_GCMD_WBF (((u32)1) << 27) | ||
| 135 | |||
| 136 | /* GSTS_REG */ | ||
| 137 | #define DMA_GSTS_TES (((u32)1) << 31) | ||
| 138 | #define DMA_GSTS_RTPS (((u32)1) << 30) | ||
| 139 | #define DMA_GSTS_FLS (((u32)1) << 29) | ||
| 140 | #define DMA_GSTS_AFLS (((u32)1) << 28) | ||
| 141 | #define DMA_GSTS_WBFS (((u32)1) << 27) | ||
| 142 | |||
| 143 | /* CCMD_REG */ | ||
| 144 | #define DMA_CCMD_ICC (((u64)1) << 63) | ||
| 145 | #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) | ||
| 146 | #define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) | ||
| 147 | #define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) | ||
| 148 | #define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) | ||
| 149 | #define DMA_CCMD_MASK_NOBIT 0 | ||
| 150 | #define DMA_CCMD_MASK_1BIT 1 | ||
| 151 | #define DMA_CCMD_MASK_2BIT 2 | ||
| 152 | #define DMA_CCMD_MASK_3BIT 3 | ||
| 153 | #define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) | ||
| 154 | #define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) | ||
| 155 | |||
| 156 | /* FECTL_REG */ | ||
| 157 | #define DMA_FECTL_IM (((u32)1) << 31) | ||
| 158 | |||
| 159 | /* FSTS_REG */ | ||
| 160 | #define DMA_FSTS_PPF ((u32)2) | ||
| 161 | #define DMA_FSTS_PFO ((u32)1) | ||
| 162 | #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) | ||
| 163 | |||
| 164 | /* FRCD_REG, 32 bits access */ | ||
| 165 | #define DMA_FRCD_F (((u32)1) << 31) | ||
| 166 | #define dma_frcd_type(d) ((d >> 30) & 1) | ||
| 167 | #define dma_frcd_fault_reason(c) (c & 0xff) | ||
| 168 | #define dma_frcd_source_id(c) (c & 0xffff) | ||
| 169 | #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ | ||
| 170 | |||
| 171 | /* | ||
| 172 | * 0: Present | ||
| 173 | * 1-11: Reserved | ||
| 174 | * 12-63: Context Ptr (12 - (haw-1)) | ||
| 175 | * 64-127: Reserved | ||
| 176 | */ | ||
| 177 | struct root_entry { | ||
| 178 | u64 val; | ||
| 179 | u64 rsvd1; | ||
| 180 | }; | ||
| 181 | #define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) | ||
| 182 | static inline bool root_present(struct root_entry *root) | ||
| 183 | { | ||
| 184 | return (root->val & 1); | ||
| 185 | } | ||
| 186 | static inline void set_root_present(struct root_entry *root) | ||
| 187 | { | ||
| 188 | root->val |= 1; | ||
| 189 | } | ||
| 190 | static inline void set_root_value(struct root_entry *root, unsigned long value) | ||
| 191 | { | ||
| 192 | root->val |= value & PAGE_MASK_4K; | ||
| 193 | } | ||
| 194 | |||
| 195 | struct context_entry; | ||
| 196 | static inline struct context_entry * | ||
| 197 | get_context_addr_from_root(struct root_entry *root) | ||
| 198 | { | ||
| 199 | return (struct context_entry *) | ||
| 200 | (root_present(root)?phys_to_virt( | ||
| 201 | root->val & PAGE_MASK_4K): | ||
| 202 | NULL); | ||
| 203 | } | ||
| 204 | |||
| 205 | /* | ||
| 206 | * low 64 bits: | ||
| 207 | * 0: present | ||
| 208 | * 1: fault processing disable | ||
| 209 | * 2-3: translation type | ||
| 210 | * 12-63: address space root | ||
| 211 | * high 64 bits: | ||
| 212 | * 0-2: address width | ||
| 213 | * 3-6: aval | ||
| 214 | * 8-23: domain id | ||
| 215 | */ | ||
| 216 | struct context_entry { | ||
| 217 | u64 lo; | ||
| 218 | u64 hi; | ||
| 219 | }; | ||
| 220 | #define context_present(c) ((c).lo & 1) | ||
| 221 | #define context_fault_disable(c) (((c).lo >> 1) & 1) | ||
| 222 | #define context_translation_type(c) (((c).lo >> 2) & 3) | ||
| 223 | #define context_address_root(c) ((c).lo & PAGE_MASK_4K) | ||
| 224 | #define context_address_width(c) ((c).hi & 7) | ||
| 225 | #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) | ||
| 226 | |||
| 227 | #define context_set_present(c) do {(c).lo |= 1;} while (0) | ||
| 228 | #define context_set_fault_enable(c) \ | ||
| 229 | do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) | ||
| 230 | #define context_set_translation_type(c, val) \ | ||
| 231 | do { \ | ||
| 232 | (c).lo &= (((u64)-1) << 4) | 3; \ | ||
| 233 | (c).lo |= ((val) & 3) << 2; \ | ||
| 234 | } while (0) | ||
| 235 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
| 236 | #define context_set_address_root(c, val) \ | ||
| 237 | do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) | ||
| 238 | #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) | ||
| 239 | #define context_set_domain_id(c, val) \ | ||
| 240 | do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) | ||
| 241 | #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) | ||
| 242 | |||
| 243 | /* | ||
| 244 | * 0: readable | ||
| 245 | * 1: writable | ||
| 246 | * 2-6: reserved | ||
| 247 | * 7: super page | ||
| 248 | * 8-11: available | ||
| 249 | * 12-63: Host physcial address | ||
| 250 | */ | ||
| 251 | struct dma_pte { | ||
| 252 | u64 val; | ||
| 253 | }; | ||
| 254 | #define dma_clear_pte(p) do {(p).val = 0;} while (0) | ||
| 255 | |||
| 256 | #define DMA_PTE_READ (1) | ||
| 257 | #define DMA_PTE_WRITE (2) | ||
| 258 | |||
| 259 | #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) | ||
| 260 | #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) | ||
| 261 | #define dma_set_pte_prot(p, prot) \ | ||
| 262 | do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) | ||
| 263 | #define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) | ||
| 264 | #define dma_set_pte_addr(p, addr) do {\ | ||
| 265 | (p).val |= ((addr) & PAGE_MASK_4K); } while (0) | ||
| 266 | #define dma_pte_present(p) (((p).val & 3) != 0) | ||
| 267 | |||
| 268 | struct intel_iommu; | ||
| 269 | |||
| 270 | struct dmar_domain { | ||
| 271 | int id; /* domain id */ | ||
| 272 | struct intel_iommu *iommu; /* back pointer to owning iommu */ | ||
| 273 | |||
| 274 | struct list_head devices; /* all devices' list */ | ||
| 275 | struct iova_domain iovad; /* iova's that belong to this domain */ | ||
| 276 | |||
| 277 | struct dma_pte *pgd; /* virtual address */ | ||
| 278 | spinlock_t mapping_lock; /* page table lock */ | ||
| 279 | int gaw; /* max guest address width */ | ||
| 280 | |||
| 281 | /* adjusted guest address width, 0 is level 2 30-bit */ | ||
| 282 | int agaw; | ||
| 283 | |||
| 284 | #define DOMAIN_FLAG_MULTIPLE_DEVICES 1 | ||
| 285 | int flags; | ||
| 286 | }; | ||
| 287 | |||
| 288 | /* PCI domain-device relationship */ | ||
| 289 | struct device_domain_info { | ||
| 290 | struct list_head link; /* link to domain siblings */ | ||
| 291 | struct list_head global; /* link to global list */ | ||
| 292 | u8 bus; /* PCI bus numer */ | ||
| 293 | u8 devfn; /* PCI devfn number */ | ||
| 294 | struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ | ||
| 295 | struct dmar_domain *domain; /* pointer to domain */ | ||
| 296 | }; | ||
| 297 | |||
| 298 | extern int init_dmars(void); | ||
| 299 | |||
| 300 | struct intel_iommu { | ||
| 301 | void __iomem *reg; /* Pointer to hardware regs, virtual addr */ | ||
| 302 | u64 cap; | ||
| 303 | u64 ecap; | ||
| 304 | unsigned long *domain_ids; /* bitmap of domains */ | ||
| 305 | struct dmar_domain **domains; /* ptr to domains */ | ||
| 306 | int seg; | ||
| 307 | u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ | ||
| 308 | spinlock_t lock; /* protect context, domain ids */ | ||
| 309 | spinlock_t register_lock; /* protect register handling */ | ||
| 310 | struct root_entry *root_entry; /* virtual address */ | ||
| 311 | |||
| 312 | unsigned int irq; | ||
| 313 | unsigned char name[7]; /* Device Name */ | ||
| 314 | struct msi_msg saved_msg; | ||
| 315 | struct sys_device sysdev; | ||
| 316 | }; | ||
| 317 | |||
| 318 | #ifndef CONFIG_DMAR_GFX_WA | ||
| 319 | static inline void iommu_prepare_gfx_mapping(void) | ||
| 320 | { | ||
| 321 | return; | ||
| 322 | } | ||
| 323 | #endif /* !CONFIG_DMAR_GFX_WA */ | ||
| 324 | |||
| 325 | #endif | ||
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c new file mode 100644 index 000000000000..a84571c29360 --- /dev/null +++ b/drivers/pci/iova.c | |||
| @@ -0,0 +1,394 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This file is released under the GPLv2. | ||
| 5 | * | ||
| 6 | * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "iova.h" | ||
| 10 | |||
| 11 | void | ||
| 12 | init_iova_domain(struct iova_domain *iovad) | ||
| 13 | { | ||
| 14 | spin_lock_init(&iovad->iova_alloc_lock); | ||
| 15 | spin_lock_init(&iovad->iova_rbtree_lock); | ||
| 16 | iovad->rbroot = RB_ROOT; | ||
| 17 | iovad->cached32_node = NULL; | ||
| 18 | |||
| 19 | } | ||
| 20 | |||
| 21 | static struct rb_node * | ||
| 22 | __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) | ||
| 23 | { | ||
| 24 | if ((*limit_pfn != DMA_32BIT_PFN) || | ||
| 25 | (iovad->cached32_node == NULL)) | ||
| 26 | return rb_last(&iovad->rbroot); | ||
| 27 | else { | ||
| 28 | struct rb_node *prev_node = rb_prev(iovad->cached32_node); | ||
| 29 | struct iova *curr_iova = | ||
| 30 | container_of(iovad->cached32_node, struct iova, node); | ||
| 31 | *limit_pfn = curr_iova->pfn_lo - 1; | ||
| 32 | return prev_node; | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | static void | ||
| 37 | __cached_rbnode_insert_update(struct iova_domain *iovad, | ||
| 38 | unsigned long limit_pfn, struct iova *new) | ||
| 39 | { | ||
| 40 | if (limit_pfn != DMA_32BIT_PFN) | ||
| 41 | return; | ||
| 42 | iovad->cached32_node = &new->node; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void | ||
| 46 | __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) | ||
| 47 | { | ||
| 48 | struct iova *cached_iova; | ||
| 49 | struct rb_node *curr; | ||
| 50 | |||
| 51 | if (!iovad->cached32_node) | ||
| 52 | return; | ||
| 53 | curr = iovad->cached32_node; | ||
| 54 | cached_iova = container_of(curr, struct iova, node); | ||
| 55 | |||
| 56 | if (free->pfn_lo >= cached_iova->pfn_lo) | ||
| 57 | iovad->cached32_node = rb_next(&free->node); | ||
| 58 | } | ||
| 59 | |||
| 60 | /* Computes the padding size required, to make the | ||
| 61 | * the start address naturally aligned on its size | ||
| 62 | */ | ||
| 63 | static int | ||
| 64 | iova_get_pad_size(int size, unsigned int limit_pfn) | ||
| 65 | { | ||
| 66 | unsigned int pad_size = 0; | ||
| 67 | unsigned int order = ilog2(size); | ||
| 68 | |||
| 69 | if (order) | ||
| 70 | pad_size = (limit_pfn + 1) % (1 << order); | ||
| 71 | |||
| 72 | return pad_size; | ||
| 73 | } | ||
| 74 | |||
| 75 | static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, | ||
| 76 | unsigned long limit_pfn, struct iova *new, bool size_aligned) | ||
| 77 | { | ||
| 78 | struct rb_node *curr = NULL; | ||
| 79 | unsigned long flags; | ||
| 80 | unsigned long saved_pfn; | ||
| 81 | unsigned int pad_size = 0; | ||
| 82 | |||
| 83 | /* Walk the tree backwards */ | ||
| 84 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
| 85 | saved_pfn = limit_pfn; | ||
| 86 | curr = __get_cached_rbnode(iovad, &limit_pfn); | ||
| 87 | while (curr) { | ||
| 88 | struct iova *curr_iova = container_of(curr, struct iova, node); | ||
| 89 | if (limit_pfn < curr_iova->pfn_lo) | ||
| 90 | goto move_left; | ||
| 91 | else if (limit_pfn < curr_iova->pfn_hi) | ||
| 92 | goto adjust_limit_pfn; | ||
| 93 | else { | ||
| 94 | if (size_aligned) | ||
| 95 | pad_size = iova_get_pad_size(size, limit_pfn); | ||
| 96 | if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) | ||
| 97 | break; /* found a free slot */ | ||
| 98 | } | ||
| 99 | adjust_limit_pfn: | ||
| 100 | limit_pfn = curr_iova->pfn_lo - 1; | ||
| 101 | move_left: | ||
| 102 | curr = rb_prev(curr); | ||
| 103 | } | ||
| 104 | |||
| 105 | if (!curr) { | ||
| 106 | if (size_aligned) | ||
| 107 | pad_size = iova_get_pad_size(size, limit_pfn); | ||
| 108 | if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { | ||
| 109 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
| 110 | return -ENOMEM; | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | /* pfn_lo will point to size aligned address if size_aligned is set */ | ||
| 115 | new->pfn_lo = limit_pfn - (size + pad_size) + 1; | ||
| 116 | new->pfn_hi = new->pfn_lo + size - 1; | ||
| 117 | |||
| 118 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
| 119 | return 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | static void | ||
| 123 | iova_insert_rbtree(struct rb_root *root, struct iova *iova) | ||
| 124 | { | ||
| 125 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
| 126 | /* Figure out where to put new node */ | ||
| 127 | while (*new) { | ||
| 128 | struct iova *this = container_of(*new, struct iova, node); | ||
| 129 | parent = *new; | ||
| 130 | |||
| 131 | if (iova->pfn_lo < this->pfn_lo) | ||
| 132 | new = &((*new)->rb_left); | ||
| 133 | else if (iova->pfn_lo > this->pfn_lo) | ||
| 134 | new = &((*new)->rb_right); | ||
| 135 | else | ||
| 136 | BUG(); /* this should not happen */ | ||
| 137 | } | ||
| 138 | /* Add new node and rebalance tree. */ | ||
| 139 | rb_link_node(&iova->node, parent, new); | ||
| 140 | rb_insert_color(&iova->node, root); | ||
| 141 | } | ||
| 142 | |||
| 143 | /** | ||
| 144 | * alloc_iova - allocates an iova | ||
| 145 | * @iovad - iova domain in question | ||
| 146 | * @size - size of page frames to allocate | ||
| 147 | * @limit_pfn - max limit address | ||
| 148 | * @size_aligned - set if size_aligned address range is required | ||
| 149 | * This function allocates an iova in the range limit_pfn to IOVA_START_PFN | ||
| 150 | * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned | ||
| 151 | * flag is set then the allocated address iova->pfn_lo will be naturally | ||
| 152 | * aligned on roundup_power_of_two(size). | ||
| 153 | */ | ||
| 154 | struct iova * | ||
| 155 | alloc_iova(struct iova_domain *iovad, unsigned long size, | ||
| 156 | unsigned long limit_pfn, | ||
| 157 | bool size_aligned) | ||
| 158 | { | ||
| 159 | unsigned long flags; | ||
| 160 | struct iova *new_iova; | ||
| 161 | int ret; | ||
| 162 | |||
| 163 | new_iova = alloc_iova_mem(); | ||
| 164 | if (!new_iova) | ||
| 165 | return NULL; | ||
| 166 | |||
| 167 | /* If size aligned is set then round the size to | ||
| 168 | * to next power of two. | ||
| 169 | */ | ||
| 170 | if (size_aligned) | ||
| 171 | size = __roundup_pow_of_two(size); | ||
| 172 | |||
| 173 | spin_lock_irqsave(&iovad->iova_alloc_lock, flags); | ||
| 174 | ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova, | ||
| 175 | size_aligned); | ||
| 176 | |||
| 177 | if (ret) { | ||
| 178 | spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); | ||
| 179 | free_iova_mem(new_iova); | ||
| 180 | return NULL; | ||
| 181 | } | ||
| 182 | |||
| 183 | /* Insert the new_iova into domain rbtree by holding writer lock */ | ||
| 184 | spin_lock(&iovad->iova_rbtree_lock); | ||
| 185 | iova_insert_rbtree(&iovad->rbroot, new_iova); | ||
| 186 | __cached_rbnode_insert_update(iovad, limit_pfn, new_iova); | ||
| 187 | spin_unlock(&iovad->iova_rbtree_lock); | ||
| 188 | |||
| 189 | spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); | ||
| 190 | |||
| 191 | return new_iova; | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * find_iova - find's an iova for a given pfn | ||
| 196 | * @iovad - iova domain in question. | ||
| 197 | * pfn - page frame number | ||
| 198 | * This function finds and returns an iova belonging to the | ||
| 199 | * given doamin which matches the given pfn. | ||
| 200 | */ | ||
| 201 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) | ||
| 202 | { | ||
| 203 | unsigned long flags; | ||
| 204 | struct rb_node *node; | ||
| 205 | |||
| 206 | /* Take the lock so that no other thread is manipulating the rbtree */ | ||
| 207 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
| 208 | node = iovad->rbroot.rb_node; | ||
| 209 | while (node) { | ||
| 210 | struct iova *iova = container_of(node, struct iova, node); | ||
| 211 | |||
| 212 | /* If pfn falls within iova's range, return iova */ | ||
| 213 | if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { | ||
| 214 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
| 215 | /* We are not holding the lock while this iova | ||
| 216 | * is referenced by the caller as the same thread | ||
| 217 | * which called this function also calls __free_iova() | ||
| 218 | * and it is by desing that only one thread can possibly | ||
| 219 | * reference a particular iova and hence no conflict. | ||
| 220 | */ | ||
| 221 | return iova; | ||
| 222 | } | ||
| 223 | |||
| 224 | if (pfn < iova->pfn_lo) | ||
| 225 | node = node->rb_left; | ||
| 226 | else if (pfn > iova->pfn_lo) | ||
| 227 | node = node->rb_right; | ||
| 228 | } | ||
| 229 | |||
| 230 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
| 231 | return NULL; | ||
| 232 | } | ||
| 233 | |||
| 234 | /** | ||
| 235 | * __free_iova - frees the given iova | ||
| 236 | * @iovad: iova domain in question. | ||
| 237 | * @iova: iova in question. | ||
| 238 | * Frees the given iova belonging to the giving domain | ||
| 239 | */ | ||
| 240 | void | ||
| 241 | __free_iova(struct iova_domain *iovad, struct iova *iova) | ||
| 242 | { | ||
| 243 | unsigned long flags; | ||
| 244 | |||
| 245 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
| 246 | __cached_rbnode_delete_update(iovad, iova); | ||
| 247 | rb_erase(&iova->node, &iovad->rbroot); | ||
| 248 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
| 249 | free_iova_mem(iova); | ||
| 250 | } | ||
| 251 | |||
| 252 | /** | ||
| 253 | * free_iova - finds and frees the iova for a given pfn | ||
| 254 | * @iovad: - iova domain in question. | ||
| 255 | * @pfn: - pfn that is allocated previously | ||
| 256 | * This functions finds an iova for a given pfn and then | ||
| 257 | * frees the iova from that domain. | ||
| 258 | */ | ||
| 259 | void | ||
| 260 | free_iova(struct iova_domain *iovad, unsigned long pfn) | ||
| 261 | { | ||
| 262 | struct iova *iova = find_iova(iovad, pfn); | ||
| 263 | if (iova) | ||
| 264 | __free_iova(iovad, iova); | ||
| 265 | |||
| 266 | } | ||
| 267 | |||
| 268 | /** | ||
| 269 | * put_iova_domain - destroys the iova doamin | ||
| 270 | * @iovad: - iova domain in question. | ||
| 271 | * All the iova's in that domain are destroyed. | ||
| 272 | */ | ||
| 273 | void put_iova_domain(struct iova_domain *iovad) | ||
| 274 | { | ||
| 275 | struct rb_node *node; | ||
| 276 | unsigned long flags; | ||
| 277 | |||
| 278 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
| 279 | node = rb_first(&iovad->rbroot); | ||
| 280 | while (node) { | ||
| 281 | struct iova *iova = container_of(node, struct iova, node); | ||
| 282 | rb_erase(node, &iovad->rbroot); | ||
| 283 | free_iova_mem(iova); | ||
| 284 | node = rb_first(&iovad->rbroot); | ||
| 285 | } | ||
| 286 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
| 287 | } | ||
| 288 | |||
| 289 | static int | ||
| 290 | __is_range_overlap(struct rb_node *node, | ||
| 291 | unsigned long pfn_lo, unsigned long pfn_hi) | ||
| 292 | { | ||
| 293 | struct iova *iova = container_of(node, struct iova, node); | ||
| 294 | |||
| 295 | if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) | ||
| 296 | return 1; | ||
| 297 | return 0; | ||
| 298 | } | ||
| 299 | |||
| 300 | static struct iova * | ||
| 301 | __insert_new_range(struct iova_domain *iovad, | ||
| 302 | unsigned long pfn_lo, unsigned long pfn_hi) | ||
| 303 | { | ||
| 304 | struct iova *iova; | ||
| 305 | |||
| 306 | iova = alloc_iova_mem(); | ||
| 307 | if (!iova) | ||
| 308 | return iova; | ||
| 309 | |||
| 310 | iova->pfn_hi = pfn_hi; | ||
| 311 | iova->pfn_lo = pfn_lo; | ||
| 312 | iova_insert_rbtree(&iovad->rbroot, iova); | ||
| 313 | return iova; | ||
| 314 | } | ||
| 315 | |||
| 316 | static void | ||
| 317 | __adjust_overlap_range(struct iova *iova, | ||
| 318 | unsigned long *pfn_lo, unsigned long *pfn_hi) | ||
| 319 | { | ||
| 320 | if (*pfn_lo < iova->pfn_lo) | ||
| 321 | iova->pfn_lo = *pfn_lo; | ||
| 322 | if (*pfn_hi > iova->pfn_hi) | ||
| 323 | *pfn_lo = iova->pfn_hi + 1; | ||
| 324 | } | ||
| 325 | |||
| 326 | /** | ||
| 327 | * reserve_iova - reserves an iova in the given range | ||
| 328 | * @iovad: - iova domain pointer | ||
| 329 | * @pfn_lo: - lower page frame address | ||
| 330 | * @pfn_hi:- higher pfn adderss | ||
| 331 | * This function allocates reserves the address range from pfn_lo to pfn_hi so | ||
| 332 | * that this address is not dished out as part of alloc_iova. | ||
| 333 | */ | ||
| 334 | struct iova * | ||
| 335 | reserve_iova(struct iova_domain *iovad, | ||
| 336 | unsigned long pfn_lo, unsigned long pfn_hi) | ||
| 337 | { | ||
| 338 | struct rb_node *node; | ||
| 339 | unsigned long flags; | ||
| 340 | struct iova *iova; | ||
| 341 | unsigned int overlap = 0; | ||
| 342 | |||
| 343 | spin_lock_irqsave(&iovad->iova_alloc_lock, flags); | ||
| 344 | spin_lock(&iovad->iova_rbtree_lock); | ||
| 345 | for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { | ||
| 346 | if (__is_range_overlap(node, pfn_lo, pfn_hi)) { | ||
| 347 | iova = container_of(node, struct iova, node); | ||
| 348 | __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); | ||
| 349 | if ((pfn_lo >= iova->pfn_lo) && | ||
| 350 | (pfn_hi <= iova->pfn_hi)) | ||
| 351 | goto finish; | ||
| 352 | overlap = 1; | ||
| 353 | |||
| 354 | } else if (overlap) | ||
| 355 | break; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* We are here either becasue this is the first reserver node | ||
| 359 | * or need to insert remaining non overlap addr range | ||
| 360 | */ | ||
| 361 | iova = __insert_new_range(iovad, pfn_lo, pfn_hi); | ||
| 362 | finish: | ||
| 363 | |||
| 364 | spin_unlock(&iovad->iova_rbtree_lock); | ||
| 365 | spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); | ||
| 366 | return iova; | ||
| 367 | } | ||
| 368 | |||
| 369 | /** | ||
| 370 | * copy_reserved_iova - copies the reserved between domains | ||
| 371 | * @from: - source doamin from where to copy | ||
| 372 | * @to: - destination domin where to copy | ||
| 373 | * This function copies reserved iova's from one doamin to | ||
| 374 | * other. | ||
| 375 | */ | ||
| 376 | void | ||
| 377 | copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) | ||
| 378 | { | ||
| 379 | unsigned long flags; | ||
| 380 | struct rb_node *node; | ||
| 381 | |||
| 382 | spin_lock_irqsave(&from->iova_alloc_lock, flags); | ||
| 383 | spin_lock(&from->iova_rbtree_lock); | ||
| 384 | for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { | ||
| 385 | struct iova *iova = container_of(node, struct iova, node); | ||
| 386 | struct iova *new_iova; | ||
| 387 | new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); | ||
| 388 | if (!new_iova) | ||
| 389 | printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", | ||
| 390 | iova->pfn_lo, iova->pfn_lo); | ||
| 391 | } | ||
| 392 | spin_unlock(&from->iova_rbtree_lock); | ||
| 393 | spin_unlock_irqrestore(&from->iova_alloc_lock, flags); | ||
| 394 | } | ||
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h new file mode 100644 index 000000000000..ae3028d5a941 --- /dev/null +++ b/drivers/pci/iova.h | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This file is released under the GPLv2. | ||
| 5 | * | ||
| 6 | * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
| 7 | * | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _IOVA_H_ | ||
| 11 | #define _IOVA_H_ | ||
| 12 | |||
| 13 | #include <linux/types.h> | ||
| 14 | #include <linux/kernel.h> | ||
| 15 | #include <linux/rbtree.h> | ||
| 16 | #include <linux/dma-mapping.h> | ||
| 17 | |||
| 18 | /* | ||
| 19 | * We need a fixed PAGE_SIZE of 4K irrespective of | ||
| 20 | * arch PAGE_SIZE for IOMMU page tables. | ||
| 21 | */ | ||
| 22 | #define PAGE_SHIFT_4K (12) | ||
| 23 | #define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) | ||
| 24 | #define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) | ||
| 25 | #define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) | ||
| 26 | |||
| 27 | /* IO virtual address start page frame number */ | ||
| 28 | #define IOVA_START_PFN (1) | ||
| 29 | |||
| 30 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) | ||
| 31 | #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) | ||
| 32 | #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) | ||
| 33 | |||
| 34 | /* iova structure */ | ||
| 35 | struct iova { | ||
| 36 | struct rb_node node; | ||
| 37 | unsigned long pfn_hi; /* IOMMU dish out addr hi */ | ||
| 38 | unsigned long pfn_lo; /* IOMMU dish out addr lo */ | ||
| 39 | }; | ||
| 40 | |||
| 41 | /* holds all the iova translations for a domain */ | ||
| 42 | struct iova_domain { | ||
| 43 | spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ | ||
| 44 | spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ | ||
| 45 | struct rb_root rbroot; /* iova domain rbtree root */ | ||
| 46 | struct rb_node *cached32_node; /* Save last alloced node */ | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct iova *alloc_iova_mem(void); | ||
| 50 | void free_iova_mem(struct iova *iova); | ||
| 51 | void free_iova(struct iova_domain *iovad, unsigned long pfn); | ||
| 52 | void __free_iova(struct iova_domain *iovad, struct iova *iova); | ||
| 53 | struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, | ||
| 54 | unsigned long limit_pfn, | ||
| 55 | bool size_aligned); | ||
| 56 | struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, | ||
| 57 | unsigned long pfn_hi); | ||
| 58 | void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); | ||
| 59 | void init_iova_domain(struct iova_domain *iovad); | ||
| 60 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); | ||
| 61 | void put_iova_domain(struct iova_domain *iovad); | ||
| 62 | |||
| 63 | #endif | ||
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6fda33de84e8..fc87e14b50de 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h | |||
| @@ -90,3 +90,4 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) | |||
| 90 | return NULL; | 90 | return NULL; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev); | ||
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5db6b6690b59..463a5a9d583d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c | |||
| @@ -837,6 +837,19 @@ static void pci_release_dev(struct device *dev) | |||
| 837 | kfree(pci_dev); | 837 | kfree(pci_dev); |
| 838 | } | 838 | } |
| 839 | 839 | ||
| 840 | static void set_pcie_port_type(struct pci_dev *pdev) | ||
| 841 | { | ||
| 842 | int pos; | ||
| 843 | u16 reg16; | ||
| 844 | |||
| 845 | pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); | ||
| 846 | if (!pos) | ||
| 847 | return; | ||
| 848 | pdev->is_pcie = 1; | ||
| 849 | pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); | ||
| 850 | pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; | ||
| 851 | } | ||
| 852 | |||
| 840 | /** | 853 | /** |
| 841 | * pci_cfg_space_size - get the configuration space size of the PCI device. | 854 | * pci_cfg_space_size - get the configuration space size of the PCI device. |
| 842 | * @dev: PCI device | 855 | * @dev: PCI device |
| @@ -951,6 +964,7 @@ pci_scan_device(struct pci_bus *bus, int devfn) | |||
| 951 | dev->device = (l >> 16) & 0xffff; | 964 | dev->device = (l >> 16) & 0xffff; |
| 952 | dev->cfg_size = pci_cfg_space_size(dev); | 965 | dev->cfg_size = pci_cfg_space_size(dev); |
| 953 | dev->error_state = pci_channel_io_normal; | 966 | dev->error_state = pci_channel_io_normal; |
| 967 | set_pcie_port_type(dev); | ||
| 954 | 968 | ||
| 955 | /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) | 969 | /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) |
| 956 | set this higher, assuming the system even supports it. */ | 970 | set this higher, assuming the system even supports it. */ |
diff --git a/drivers/pci/search.c b/drivers/pci/search.c index c6e79d01ce3d..b001b5922e33 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c | |||
| @@ -14,6 +14,40 @@ | |||
| 14 | #include "pci.h" | 14 | #include "pci.h" |
| 15 | 15 | ||
| 16 | DECLARE_RWSEM(pci_bus_sem); | 16 | DECLARE_RWSEM(pci_bus_sem); |
| 17 | /* | ||
| 18 | * find the upstream PCIE-to-PCI bridge of a PCI device | ||
| 19 | * if the device is PCIE, return NULL | ||
| 20 | * if the device isn't connected to a PCIE bridge (that is its parent is a | ||
| 21 | * legacy PCI bridge and the bridge is directly connected to bus 0), return its | ||
| 22 | * parent | ||
| 23 | */ | ||
| 24 | struct pci_dev * | ||
| 25 | pci_find_upstream_pcie_bridge(struct pci_dev *pdev) | ||
| 26 | { | ||
| 27 | struct pci_dev *tmp = NULL; | ||
| 28 | |||
| 29 | if (pdev->is_pcie) | ||
| 30 | return NULL; | ||
| 31 | while (1) { | ||
| 32 | if (!pdev->bus->self) | ||
| 33 | break; | ||
| 34 | pdev = pdev->bus->self; | ||
| 35 | /* a p2p bridge */ | ||
| 36 | if (!pdev->is_pcie) { | ||
| 37 | tmp = pdev; | ||
| 38 | continue; | ||
| 39 | } | ||
| 40 | /* PCI device should connect to a PCIE bridge */ | ||
| 41 | if (pdev->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE) { | ||
| 42 | /* Busted hardware? */ | ||
| 43 | WARN_ON_ONCE(1); | ||
| 44 | return NULL; | ||
| 45 | } | ||
| 46 | return pdev; | ||
| 47 | } | ||
| 48 | |||
| 49 | return tmp; | ||
| 50 | } | ||
| 17 | 51 | ||
| 18 | static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr) | 52 | static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr) |
| 19 | { | 53 | { |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0a3ee5a322b0..5574ba3ab1f9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
| @@ -103,7 +103,7 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep, | |||
| 103 | unsigned int command, unsigned long arg); | 103 | unsigned int command, unsigned long arg); |
| 104 | 104 | ||
| 105 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 105 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
| 106 | extern struct export_operations cifs_export_ops; | 106 | extern const struct export_operations cifs_export_ops; |
| 107 | #endif /* EXPERIMENTAL */ | 107 | #endif /* EXPERIMENTAL */ |
| 108 | 108 | ||
| 109 | #define CIFS_VERSION "1.51" | 109 | #define CIFS_VERSION "1.51" |
diff --git a/fs/cifs/export.c b/fs/cifs/export.c index d614b91caeca..75949d6a5f1b 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c | |||
| @@ -53,7 +53,7 @@ static struct dentry *cifs_get_parent(struct dentry *dentry) | |||
| 53 | return ERR_PTR(-EACCES); | 53 | return ERR_PTR(-EACCES); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | struct export_operations cifs_export_ops = { | 56 | const struct export_operations cifs_export_ops = { |
| 57 | .get_parent = cifs_get_parent, | 57 | .get_parent = cifs_get_parent, |
| 58 | /* Following five export operations are unneeded so far and can default: | 58 | /* Following five export operations are unneeded so far and can default: |
| 59 | .get_dentry = | 59 | .get_dentry = |
diff --git a/fs/dcache.c b/fs/dcache.c index 2bb3f7ac683b..d9ca1e5ceb92 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1479,6 +1479,8 @@ static void switch_names(struct dentry *dentry, struct dentry *target) | |||
| 1479 | * dentry:internal, target:external. Steal target's | 1479 | * dentry:internal, target:external. Steal target's |
| 1480 | * storage and make target internal. | 1480 | * storage and make target internal. |
| 1481 | */ | 1481 | */ |
| 1482 | memcpy(target->d_iname, dentry->d_name.name, | ||
| 1483 | dentry->d_name.len + 1); | ||
| 1482 | dentry->d_name.name = target->d_name.name; | 1484 | dentry->d_name.name = target->d_name.name; |
| 1483 | target->d_name.name = target->d_iname; | 1485 | target->d_name.name = target->d_iname; |
| 1484 | } | 1486 | } |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 5276b19423c1..f7f407075be1 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
| @@ -10,6 +10,8 @@ | |||
| 10 | #include <linux/string.h> | 10 | #include <linux/string.h> |
| 11 | #include <linux/efs_fs.h> | 11 | #include <linux/efs_fs.h> |
| 12 | #include <linux/smp_lock.h> | 12 | #include <linux/smp_lock.h> |
| 13 | #include <linux/exportfs.h> | ||
| 14 | |||
| 13 | 15 | ||
| 14 | static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { | 16 | static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { |
| 15 | struct buffer_head *bh; | 17 | struct buffer_head *bh; |
| @@ -75,13 +77,10 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
| 75 | return NULL; | 77 | return NULL; |
| 76 | } | 78 | } |
| 77 | 79 | ||
| 78 | struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) | 80 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, |
| 81 | u32 generation) | ||
| 79 | { | 82 | { |
| 80 | __u32 *objp = vobjp; | ||
| 81 | unsigned long ino = objp[0]; | ||
| 82 | __u32 generation = objp[1]; | ||
| 83 | struct inode *inode; | 83 | struct inode *inode; |
| 84 | struct dentry *result; | ||
| 85 | 84 | ||
| 86 | if (ino == 0) | 85 | if (ino == 0) |
| 87 | return ERR_PTR(-ESTALE); | 86 | return ERR_PTR(-ESTALE); |
| @@ -91,20 +90,25 @@ struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) | |||
| 91 | 90 | ||
| 92 | if (is_bad_inode(inode) || | 91 | if (is_bad_inode(inode) || |
| 93 | (generation && inode->i_generation != generation)) { | 92 | (generation && inode->i_generation != generation)) { |
| 94 | result = ERR_PTR(-ESTALE); | 93 | iput(inode); |
| 95 | goto out_iput; | 94 | return ERR_PTR(-ESTALE); |
| 96 | } | 95 | } |
| 97 | 96 | ||
| 98 | result = d_alloc_anon(inode); | 97 | return inode; |
| 99 | if (!result) { | 98 | } |
| 100 | result = ERR_PTR(-ENOMEM); | ||
| 101 | goto out_iput; | ||
| 102 | } | ||
| 103 | return result; | ||
| 104 | 99 | ||
| 105 | out_iput: | 100 | struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 106 | iput(inode); | 101 | int fh_len, int fh_type) |
| 107 | return result; | 102 | { |
| 103 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
| 104 | efs_nfs_get_inode); | ||
| 105 | } | ||
| 106 | |||
| 107 | struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 108 | int fh_len, int fh_type) | ||
| 109 | { | ||
| 110 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
| 111 | efs_nfs_get_inode); | ||
| 108 | } | 112 | } |
| 109 | 113 | ||
| 110 | struct dentry *efs_get_parent(struct dentry *child) | 114 | struct dentry *efs_get_parent(struct dentry *child) |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 25d0326c5f1c..c79bc627f107 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
| @@ -113,8 +113,9 @@ static const struct super_operations efs_superblock_operations = { | |||
| 113 | .remount_fs = efs_remount, | 113 | .remount_fs = efs_remount, |
| 114 | }; | 114 | }; |
| 115 | 115 | ||
| 116 | static struct export_operations efs_export_ops = { | 116 | static const struct export_operations efs_export_ops = { |
| 117 | .get_dentry = efs_get_dentry, | 117 | .fh_to_dentry = efs_fh_to_dentry, |
| 118 | .fh_to_parent = efs_fh_to_parent, | ||
| 118 | .get_parent = efs_get_parent, | 119 | .get_parent = efs_get_parent, |
| 119 | }; | 120 | }; |
| 120 | 121 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 8adb32a9387a..109ab5e44eca 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
| @@ -1,4 +1,13 @@ | |||
| 1 | 1 | /* | |
| 2 | * Copyright (C) Neil Brown 2002 | ||
| 3 | * Copyright (C) Christoph Hellwig 2007 | ||
| 4 | * | ||
| 5 | * This file contains the code mapping from inodes to NFS file handles, | ||
| 6 | * and for mapping back from file handles to dentries. | ||
| 7 | * | ||
| 8 | * For details on why we do all the strange and hairy things in here | ||
| 9 | * take a look at Documentation/filesystems/Exporting. | ||
| 10 | */ | ||
| 2 | #include <linux/exportfs.h> | 11 | #include <linux/exportfs.h> |
| 3 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
| 4 | #include <linux/file.h> | 13 | #include <linux/file.h> |
| @@ -9,32 +18,19 @@ | |||
| 9 | #define dprintk(fmt, args...) do{}while(0) | 18 | #define dprintk(fmt, args...) do{}while(0) |
| 10 | 19 | ||
| 11 | 20 | ||
| 12 | static int get_name(struct dentry *dentry, char *name, | 21 | static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, |
| 13 | struct dentry *child); | 22 | struct dentry *child); |
| 14 | 23 | ||
| 15 | 24 | ||
| 16 | static struct dentry *exportfs_get_dentry(struct super_block *sb, void *obj) | 25 | static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, |
| 26 | char *name, struct dentry *child) | ||
| 17 | { | 27 | { |
| 18 | struct dentry *result = ERR_PTR(-ESTALE); | 28 | const struct export_operations *nop = dir->d_sb->s_export_op; |
| 19 | |||
| 20 | if (sb->s_export_op->get_dentry) { | ||
| 21 | result = sb->s_export_op->get_dentry(sb, obj); | ||
| 22 | if (!result) | ||
| 23 | result = ERR_PTR(-ESTALE); | ||
| 24 | } | ||
| 25 | |||
| 26 | return result; | ||
| 27 | } | ||
| 28 | |||
| 29 | static int exportfs_get_name(struct dentry *dir, char *name, | ||
| 30 | struct dentry *child) | ||
| 31 | { | ||
| 32 | struct export_operations *nop = dir->d_sb->s_export_op; | ||
| 33 | 29 | ||
| 34 | if (nop->get_name) | 30 | if (nop->get_name) |
| 35 | return nop->get_name(dir, name, child); | 31 | return nop->get_name(dir, name, child); |
| 36 | else | 32 | else |
| 37 | return get_name(dir, name, child); | 33 | return get_name(mnt, dir, name, child); |
| 38 | } | 34 | } |
| 39 | 35 | ||
| 40 | /* | 36 | /* |
| @@ -98,7 +94,7 @@ find_disconnected_root(struct dentry *dentry) | |||
| 98 | * It may already be, as the flag isn't always updated when connection happens. | 94 | * It may already be, as the flag isn't always updated when connection happens. |
| 99 | */ | 95 | */ |
| 100 | static int | 96 | static int |
| 101 | reconnect_path(struct super_block *sb, struct dentry *target_dir) | 97 | reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) |
| 102 | { | 98 | { |
| 103 | char nbuf[NAME_MAX+1]; | 99 | char nbuf[NAME_MAX+1]; |
| 104 | int noprogress = 0; | 100 | int noprogress = 0; |
| @@ -121,7 +117,7 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
| 121 | pd->d_flags &= ~DCACHE_DISCONNECTED; | 117 | pd->d_flags &= ~DCACHE_DISCONNECTED; |
| 122 | spin_unlock(&pd->d_lock); | 118 | spin_unlock(&pd->d_lock); |
| 123 | noprogress = 0; | 119 | noprogress = 0; |
| 124 | } else if (pd == sb->s_root) { | 120 | } else if (pd == mnt->mnt_sb->s_root) { |
| 125 | printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); | 121 | printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); |
| 126 | spin_lock(&pd->d_lock); | 122 | spin_lock(&pd->d_lock); |
| 127 | pd->d_flags &= ~DCACHE_DISCONNECTED; | 123 | pd->d_flags &= ~DCACHE_DISCONNECTED; |
| @@ -147,8 +143,8 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
| 147 | struct dentry *npd; | 143 | struct dentry *npd; |
| 148 | 144 | ||
| 149 | mutex_lock(&pd->d_inode->i_mutex); | 145 | mutex_lock(&pd->d_inode->i_mutex); |
| 150 | if (sb->s_export_op->get_parent) | 146 | if (mnt->mnt_sb->s_export_op->get_parent) |
| 151 | ppd = sb->s_export_op->get_parent(pd); | 147 | ppd = mnt->mnt_sb->s_export_op->get_parent(pd); |
| 152 | mutex_unlock(&pd->d_inode->i_mutex); | 148 | mutex_unlock(&pd->d_inode->i_mutex); |
| 153 | 149 | ||
| 154 | if (IS_ERR(ppd)) { | 150 | if (IS_ERR(ppd)) { |
| @@ -161,7 +157,7 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
| 161 | 157 | ||
| 162 | dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, | 158 | dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, |
| 163 | pd->d_inode->i_ino, ppd->d_inode->i_ino); | 159 | pd->d_inode->i_ino, ppd->d_inode->i_ino); |
| 164 | err = exportfs_get_name(ppd, nbuf, pd); | 160 | err = exportfs_get_name(mnt, ppd, nbuf, pd); |
| 165 | if (err) { | 161 | if (err) { |
| 166 | dput(ppd); | 162 | dput(ppd); |
| 167 | dput(pd); | 163 | dput(pd); |
| @@ -214,125 +210,6 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
| 214 | return 0; | 210 | return 0; |
| 215 | } | 211 | } |
| 216 | 212 | ||
| 217 | /** | ||
| 218 | * find_exported_dentry - helper routine to implement export_operations->decode_fh | ||
| 219 | * @sb: The &super_block identifying the filesystem | ||
| 220 | * @obj: An opaque identifier of the object to be found - passed to | ||
| 221 | * get_inode | ||
| 222 | * @parent: An optional opqaue identifier of the parent of the object. | ||
| 223 | * @acceptable: A function used to test possible &dentries to see if they are | ||
| 224 | * acceptable | ||
| 225 | * @context: A parameter to @acceptable so that it knows on what basis to | ||
| 226 | * judge. | ||
| 227 | * | ||
| 228 | * find_exported_dentry is the central helper routine to enable file systems | ||
| 229 | * to provide the decode_fh() export_operation. It's main task is to take | ||
| 230 | * an &inode, find or create an appropriate &dentry structure, and possibly | ||
| 231 | * splice this into the dcache in the correct place. | ||
| 232 | * | ||
| 233 | * The decode_fh() operation provided by the filesystem should call | ||
| 234 | * find_exported_dentry() with the same parameters that it received except | ||
| 235 | * that instead of the file handle fragment, pointers to opaque identifiers | ||
| 236 | * for the object and optionally its parent are passed. The default decode_fh | ||
| 237 | * routine passes one pointer to the start of the filehandle fragment, and | ||
| 238 | * one 8 bytes into the fragment. It is expected that most filesystems will | ||
| 239 | * take this approach, though the offset to the parent identifier may well be | ||
| 240 | * different. | ||
| 241 | * | ||
| 242 | * find_exported_dentry() will call get_dentry to get an dentry pointer from | ||
| 243 | * the file system. If any &dentry in the d_alias list is acceptable, it will | ||
| 244 | * be returned. Otherwise find_exported_dentry() will attempt to splice a new | ||
| 245 | * &dentry into the dcache using get_name() and get_parent() to find the | ||
| 246 | * appropriate place. | ||
| 247 | */ | ||
| 248 | |||
| 249 | struct dentry * | ||
| 250 | find_exported_dentry(struct super_block *sb, void *obj, void *parent, | ||
| 251 | int (*acceptable)(void *context, struct dentry *de), | ||
| 252 | void *context) | ||
| 253 | { | ||
| 254 | struct dentry *result, *alias; | ||
| 255 | int err = -ESTALE; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Attempt to find the inode. | ||
| 259 | */ | ||
| 260 | result = exportfs_get_dentry(sb, obj); | ||
| 261 | if (IS_ERR(result)) | ||
| 262 | return result; | ||
| 263 | |||
| 264 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
| 265 | if (!(result->d_flags & DCACHE_DISCONNECTED)) { | ||
| 266 | if (acceptable(context, result)) | ||
| 267 | return result; | ||
| 268 | err = -EACCES; | ||
| 269 | goto err_result; | ||
| 270 | } | ||
| 271 | |||
| 272 | err = reconnect_path(sb, result); | ||
| 273 | if (err) | ||
| 274 | goto err_result; | ||
| 275 | } else { | ||
| 276 | struct dentry *target_dir, *nresult; | ||
| 277 | char nbuf[NAME_MAX+1]; | ||
| 278 | |||
| 279 | alias = find_acceptable_alias(result, acceptable, context); | ||
| 280 | if (alias) | ||
| 281 | return alias; | ||
| 282 | |||
| 283 | if (parent == NULL) | ||
| 284 | goto err_result; | ||
| 285 | |||
| 286 | target_dir = exportfs_get_dentry(sb,parent); | ||
| 287 | if (IS_ERR(target_dir)) { | ||
| 288 | err = PTR_ERR(target_dir); | ||
| 289 | goto err_result; | ||
| 290 | } | ||
| 291 | |||
| 292 | err = reconnect_path(sb, target_dir); | ||
| 293 | if (err) { | ||
| 294 | dput(target_dir); | ||
| 295 | goto err_result; | ||
| 296 | } | ||
| 297 | |||
| 298 | /* | ||
| 299 | * As we weren't after a directory, have one more step to go. | ||
| 300 | */ | ||
| 301 | err = exportfs_get_name(target_dir, nbuf, result); | ||
| 302 | if (!err) { | ||
| 303 | mutex_lock(&target_dir->d_inode->i_mutex); | ||
| 304 | nresult = lookup_one_len(nbuf, target_dir, | ||
| 305 | strlen(nbuf)); | ||
| 306 | mutex_unlock(&target_dir->d_inode->i_mutex); | ||
| 307 | if (!IS_ERR(nresult)) { | ||
| 308 | if (nresult->d_inode) { | ||
| 309 | dput(result); | ||
| 310 | result = nresult; | ||
| 311 | } else | ||
| 312 | dput(nresult); | ||
| 313 | } | ||
| 314 | } | ||
| 315 | dput(target_dir); | ||
| 316 | } | ||
| 317 | |||
| 318 | alias = find_acceptable_alias(result, acceptable, context); | ||
| 319 | if (alias) | ||
| 320 | return alias; | ||
| 321 | |||
| 322 | /* drat - I just cannot find anything acceptable */ | ||
| 323 | dput(result); | ||
| 324 | /* It might be justifiable to return ESTALE here, | ||
| 325 | * but the filehandle at-least looks reasonable good | ||
| 326 | * and it may just be a permission problem, so returning | ||
| 327 | * -EACCESS is safer | ||
| 328 | */ | ||
| 329 | return ERR_PTR(-EACCES); | ||
| 330 | |||
| 331 | err_result: | ||
| 332 | dput(result); | ||
| 333 | return ERR_PTR(err); | ||
| 334 | } | ||
| 335 | |||
| 336 | struct getdents_callback { | 213 | struct getdents_callback { |
| 337 | char *name; /* name that was found. It already points to a | 214 | char *name; /* name that was found. It already points to a |
| 338 | buffer NAME_MAX+1 is size */ | 215 | buffer NAME_MAX+1 is size */ |
| @@ -370,8 +247,8 @@ static int filldir_one(void * __buf, const char * name, int len, | |||
| 370 | * calls readdir on the parent until it finds an entry with | 247 | * calls readdir on the parent until it finds an entry with |
| 371 | * the same inode number as the child, and returns that. | 248 | * the same inode number as the child, and returns that. |
| 372 | */ | 249 | */ |
| 373 | static int get_name(struct dentry *dentry, char *name, | 250 | static int get_name(struct vfsmount *mnt, struct dentry *dentry, |
| 374 | struct dentry *child) | 251 | char *name, struct dentry *child) |
| 375 | { | 252 | { |
| 376 | struct inode *dir = dentry->d_inode; | 253 | struct inode *dir = dentry->d_inode; |
| 377 | int error; | 254 | int error; |
| @@ -387,7 +264,7 @@ static int get_name(struct dentry *dentry, char *name, | |||
| 387 | /* | 264 | /* |
| 388 | * Open the directory ... | 265 | * Open the directory ... |
| 389 | */ | 266 | */ |
| 390 | file = dentry_open(dget(dentry), NULL, O_RDONLY); | 267 | file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY); |
| 391 | error = PTR_ERR(file); | 268 | error = PTR_ERR(file); |
| 392 | if (IS_ERR(file)) | 269 | if (IS_ERR(file)) |
| 393 | goto out; | 270 | goto out; |
| @@ -434,100 +311,177 @@ out: | |||
| 434 | * can be used to check that it is still valid. It places them in the | 311 | * can be used to check that it is still valid. It places them in the |
| 435 | * filehandle fragment where export_decode_fh expects to find them. | 312 | * filehandle fragment where export_decode_fh expects to find them. |
| 436 | */ | 313 | */ |
| 437 | static int export_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, | 314 | static int export_encode_fh(struct dentry *dentry, struct fid *fid, |
| 438 | int connectable) | 315 | int *max_len, int connectable) |
| 439 | { | 316 | { |
| 440 | struct inode * inode = dentry->d_inode; | 317 | struct inode * inode = dentry->d_inode; |
| 441 | int len = *max_len; | 318 | int len = *max_len; |
| 442 | int type = 1; | 319 | int type = FILEID_INO32_GEN; |
| 443 | 320 | ||
| 444 | if (len < 2 || (connectable && len < 4)) | 321 | if (len < 2 || (connectable && len < 4)) |
| 445 | return 255; | 322 | return 255; |
| 446 | 323 | ||
| 447 | len = 2; | 324 | len = 2; |
| 448 | fh[0] = inode->i_ino; | 325 | fid->i32.ino = inode->i_ino; |
| 449 | fh[1] = inode->i_generation; | 326 | fid->i32.gen = inode->i_generation; |
| 450 | if (connectable && !S_ISDIR(inode->i_mode)) { | 327 | if (connectable && !S_ISDIR(inode->i_mode)) { |
| 451 | struct inode *parent; | 328 | struct inode *parent; |
| 452 | 329 | ||
| 453 | spin_lock(&dentry->d_lock); | 330 | spin_lock(&dentry->d_lock); |
| 454 | parent = dentry->d_parent->d_inode; | 331 | parent = dentry->d_parent->d_inode; |
| 455 | fh[2] = parent->i_ino; | 332 | fid->i32.parent_ino = parent->i_ino; |
| 456 | fh[3] = parent->i_generation; | 333 | fid->i32.parent_gen = parent->i_generation; |
| 457 | spin_unlock(&dentry->d_lock); | 334 | spin_unlock(&dentry->d_lock); |
| 458 | len = 4; | 335 | len = 4; |
| 459 | type = 2; | 336 | type = FILEID_INO32_GEN_PARENT; |
| 460 | } | 337 | } |
| 461 | *max_len = len; | 338 | *max_len = len; |
| 462 | return type; | 339 | return type; |
| 463 | } | 340 | } |
| 464 | 341 | ||
| 465 | 342 | int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, | |
| 466 | /** | ||
| 467 | * export_decode_fh - default export_operations->decode_fh function | ||
| 468 | * @sb: The superblock | ||
| 469 | * @fh: pointer to the file handle fragment | ||
| 470 | * @fh_len: length of file handle fragment | ||
| 471 | * @acceptable: function for testing acceptability of dentrys | ||
| 472 | * @context: context for @acceptable | ||
| 473 | * | ||
| 474 | * This is the default decode_fh() function. | ||
| 475 | * a fileid_type of 1 indicates that the filehandlefragment | ||
| 476 | * just contains an object identifier understood by get_dentry. | ||
| 477 | * a fileid_type of 2 says that there is also a directory | ||
| 478 | * identifier 8 bytes in to the filehandlefragement. | ||
| 479 | */ | ||
| 480 | static struct dentry *export_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, | ||
| 481 | int fileid_type, | ||
| 482 | int (*acceptable)(void *context, struct dentry *de), | ||
| 483 | void *context) | ||
| 484 | { | ||
| 485 | __u32 parent[2]; | ||
| 486 | parent[0] = parent[1] = 0; | ||
| 487 | if (fh_len < 2 || fileid_type > 2) | ||
| 488 | return NULL; | ||
| 489 | if (fileid_type == 2) { | ||
| 490 | if (fh_len > 2) parent[0] = fh[2]; | ||
| 491 | if (fh_len > 3) parent[1] = fh[3]; | ||
| 492 | } | ||
| 493 | return find_exported_dentry(sb, fh, parent, | ||
| 494 | acceptable, context); | ||
| 495 | } | ||
| 496 | |||
| 497 | int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, | ||
| 498 | int connectable) | 343 | int connectable) |
| 499 | { | 344 | { |
| 500 | struct export_operations *nop = dentry->d_sb->s_export_op; | 345 | const struct export_operations *nop = dentry->d_sb->s_export_op; |
| 501 | int error; | 346 | int error; |
| 502 | 347 | ||
| 503 | if (nop->encode_fh) | 348 | if (nop->encode_fh) |
| 504 | error = nop->encode_fh(dentry, fh, max_len, connectable); | 349 | error = nop->encode_fh(dentry, fid->raw, max_len, connectable); |
| 505 | else | 350 | else |
| 506 | error = export_encode_fh(dentry, fh, max_len, connectable); | 351 | error = export_encode_fh(dentry, fid, max_len, connectable); |
| 507 | 352 | ||
| 508 | return error; | 353 | return error; |
| 509 | } | 354 | } |
| 510 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); | 355 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); |
| 511 | 356 | ||
| 512 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, int fh_len, | 357 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, |
| 513 | int fileid_type, int (*acceptable)(void *, struct dentry *), | 358 | int fh_len, int fileid_type, |
| 514 | void *context) | 359 | int (*acceptable)(void *, struct dentry *), void *context) |
| 515 | { | 360 | { |
| 516 | struct export_operations *nop = mnt->mnt_sb->s_export_op; | 361 | const struct export_operations *nop = mnt->mnt_sb->s_export_op; |
| 517 | struct dentry *result; | 362 | struct dentry *result, *alias; |
| 363 | int err; | ||
| 518 | 364 | ||
| 519 | if (nop->decode_fh) { | 365 | /* |
| 520 | result = nop->decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | 366 | * Try to get any dentry for the given file handle from the filesystem. |
| 521 | acceptable, context); | 367 | */ |
| 368 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); | ||
| 369 | if (!result) | ||
| 370 | result = ERR_PTR(-ESTALE); | ||
| 371 | if (IS_ERR(result)) | ||
| 372 | return result; | ||
| 373 | |||
| 374 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
| 375 | /* | ||
| 376 | * This request is for a directory. | ||
| 377 | * | ||
| 378 | * On the positive side there is only one dentry for each | ||
| 379 | * directory inode. On the negative side this implies that we | ||
| 380 | * to ensure our dentry is connected all the way up to the | ||
| 381 | * filesystem root. | ||
| 382 | */ | ||
| 383 | if (result->d_flags & DCACHE_DISCONNECTED) { | ||
| 384 | err = reconnect_path(mnt, result); | ||
| 385 | if (err) | ||
| 386 | goto err_result; | ||
| 387 | } | ||
| 388 | |||
| 389 | if (!acceptable(context, result)) { | ||
| 390 | err = -EACCES; | ||
| 391 | goto err_result; | ||
| 392 | } | ||
| 393 | |||
| 394 | return result; | ||
| 522 | } else { | 395 | } else { |
| 523 | result = export_decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | 396 | /* |
| 524 | acceptable, context); | 397 | * It's not a directory. Life is a little more complicated. |
| 398 | */ | ||
| 399 | struct dentry *target_dir, *nresult; | ||
| 400 | char nbuf[NAME_MAX+1]; | ||
| 401 | |||
| 402 | /* | ||
| 403 | * See if either the dentry we just got from the filesystem | ||
| 404 | * or any alias for it is acceptable. This is always true | ||
| 405 | * if this filesystem is exported without the subtreecheck | ||
| 406 | * option. If the filesystem is exported with the subtree | ||
| 407 | * check option there's a fair chance we need to look at | ||
| 408 | * the parent directory in the file handle and make sure | ||
| 409 | * it's connected to the filesystem root. | ||
| 410 | */ | ||
| 411 | alias = find_acceptable_alias(result, acceptable, context); | ||
| 412 | if (alias) | ||
| 413 | return alias; | ||
| 414 | |||
| 415 | /* | ||
| 416 | * Try to extract a dentry for the parent directory from the | ||
| 417 | * file handle. If this fails we'll have to give up. | ||
| 418 | */ | ||
| 419 | err = -ESTALE; | ||
| 420 | if (!nop->fh_to_parent) | ||
| 421 | goto err_result; | ||
| 422 | |||
| 423 | target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, | ||
| 424 | fh_len, fileid_type); | ||
| 425 | if (!target_dir) | ||
| 426 | goto err_result; | ||
| 427 | err = PTR_ERR(target_dir); | ||
| 428 | if (IS_ERR(target_dir)) | ||
| 429 | goto err_result; | ||
| 430 | |||
| 431 | /* | ||
| 432 | * And as usual we need to make sure the parent directory is | ||
| 433 | * connected to the filesystem root. The VFS really doesn't | ||
| 434 | * like disconnected directories.. | ||
| 435 | */ | ||
| 436 | err = reconnect_path(mnt, target_dir); | ||
| 437 | if (err) { | ||
| 438 | dput(target_dir); | ||
| 439 | goto err_result; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * Now that we've got both a well-connected parent and a | ||
| 444 | * dentry for the inode we're after, make sure that our | ||
| 445 | * inode is actually connected to the parent. | ||
| 446 | */ | ||
| 447 | err = exportfs_get_name(mnt, target_dir, nbuf, result); | ||
| 448 | if (!err) { | ||
| 449 | mutex_lock(&target_dir->d_inode->i_mutex); | ||
| 450 | nresult = lookup_one_len(nbuf, target_dir, | ||
| 451 | strlen(nbuf)); | ||
| 452 | mutex_unlock(&target_dir->d_inode->i_mutex); | ||
| 453 | if (!IS_ERR(nresult)) { | ||
| 454 | if (nresult->d_inode) { | ||
| 455 | dput(result); | ||
| 456 | result = nresult; | ||
| 457 | } else | ||
| 458 | dput(nresult); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | /* | ||
| 463 | * At this point we are done with the parent, but it's pinned | ||
| 464 | * by the child dentry anyway. | ||
| 465 | */ | ||
| 466 | dput(target_dir); | ||
| 467 | |||
| 468 | /* | ||
| 469 | * And finally make sure the dentry is actually acceptable | ||
| 470 | * to NFSD. | ||
| 471 | */ | ||
| 472 | alias = find_acceptable_alias(result, acceptable, context); | ||
| 473 | if (!alias) { | ||
| 474 | err = -EACCES; | ||
| 475 | goto err_result; | ||
| 476 | } | ||
| 477 | |||
| 478 | return alias; | ||
| 525 | } | 479 | } |
| 526 | 480 | ||
| 527 | return result; | 481 | err_result: |
| 482 | dput(result); | ||
| 483 | return ERR_PTR(err); | ||
| 528 | } | 484 | } |
| 529 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); | 485 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); |
| 530 | 486 | ||
| 531 | EXPORT_SYMBOL(find_exported_dentry); | ||
| 532 | |||
| 533 | MODULE_LICENSE("GPL"); | 487 | MODULE_LICENSE("GPL"); |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 05d9342bb64e..d868e26c15eb 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
| @@ -28,6 +28,24 @@ | |||
| 28 | 28 | ||
| 29 | typedef struct ext2_dir_entry_2 ext2_dirent; | 29 | typedef struct ext2_dir_entry_2 ext2_dirent; |
| 30 | 30 | ||
| 31 | static inline unsigned ext2_rec_len_from_disk(__le16 dlen) | ||
| 32 | { | ||
| 33 | unsigned len = le16_to_cpu(dlen); | ||
| 34 | |||
| 35 | if (len == EXT2_MAX_REC_LEN) | ||
| 36 | return 1 << 16; | ||
| 37 | return len; | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline __le16 ext2_rec_len_to_disk(unsigned len) | ||
| 41 | { | ||
| 42 | if (len == (1 << 16)) | ||
| 43 | return cpu_to_le16(EXT2_MAX_REC_LEN); | ||
| 44 | else if (len > (1 << 16)) | ||
| 45 | BUG(); | ||
| 46 | return cpu_to_le16(len); | ||
| 47 | } | ||
| 48 | |||
| 31 | /* | 49 | /* |
| 32 | * ext2 uses block-sized chunks. Arguably, sector-sized ones would be | 50 | * ext2 uses block-sized chunks. Arguably, sector-sized ones would be |
| 33 | * more robust, but we have what we have | 51 | * more robust, but we have what we have |
| @@ -106,7 +124,7 @@ static void ext2_check_page(struct page *page) | |||
| 106 | } | 124 | } |
| 107 | for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { | 125 | for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { |
| 108 | p = (ext2_dirent *)(kaddr + offs); | 126 | p = (ext2_dirent *)(kaddr + offs); |
| 109 | rec_len = le16_to_cpu(p->rec_len); | 127 | rec_len = ext2_rec_len_from_disk(p->rec_len); |
| 110 | 128 | ||
| 111 | if (rec_len < EXT2_DIR_REC_LEN(1)) | 129 | if (rec_len < EXT2_DIR_REC_LEN(1)) |
| 112 | goto Eshort; | 130 | goto Eshort; |
| @@ -204,7 +222,8 @@ static inline int ext2_match (int len, const char * const name, | |||
| 204 | */ | 222 | */ |
| 205 | static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) | 223 | static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) |
| 206 | { | 224 | { |
| 207 | return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); | 225 | return (ext2_dirent *)((char *)p + |
| 226 | ext2_rec_len_from_disk(p->rec_len)); | ||
| 208 | } | 227 | } |
| 209 | 228 | ||
| 210 | static inline unsigned | 229 | static inline unsigned |
| @@ -316,7 +335,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
| 316 | return 0; | 335 | return 0; |
| 317 | } | 336 | } |
| 318 | } | 337 | } |
| 319 | filp->f_pos += le16_to_cpu(de->rec_len); | 338 | filp->f_pos += ext2_rec_len_from_disk(de->rec_len); |
| 320 | } | 339 | } |
| 321 | ext2_put_page(page); | 340 | ext2_put_page(page); |
| 322 | } | 341 | } |
| @@ -425,7 +444,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, | |||
| 425 | { | 444 | { |
| 426 | loff_t pos = page_offset(page) + | 445 | loff_t pos = page_offset(page) + |
| 427 | (char *) de - (char *) page_address(page); | 446 | (char *) de - (char *) page_address(page); |
| 428 | unsigned len = le16_to_cpu(de->rec_len); | 447 | unsigned len = ext2_rec_len_from_disk(de->rec_len); |
| 429 | int err; | 448 | int err; |
| 430 | 449 | ||
| 431 | lock_page(page); | 450 | lock_page(page); |
| @@ -482,7 +501,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
| 482 | /* We hit i_size */ | 501 | /* We hit i_size */ |
| 483 | name_len = 0; | 502 | name_len = 0; |
| 484 | rec_len = chunk_size; | 503 | rec_len = chunk_size; |
| 485 | de->rec_len = cpu_to_le16(chunk_size); | 504 | de->rec_len = ext2_rec_len_to_disk(chunk_size); |
| 486 | de->inode = 0; | 505 | de->inode = 0; |
| 487 | goto got_it; | 506 | goto got_it; |
| 488 | } | 507 | } |
| @@ -496,7 +515,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
| 496 | if (ext2_match (namelen, name, de)) | 515 | if (ext2_match (namelen, name, de)) |
| 497 | goto out_unlock; | 516 | goto out_unlock; |
| 498 | name_len = EXT2_DIR_REC_LEN(de->name_len); | 517 | name_len = EXT2_DIR_REC_LEN(de->name_len); |
| 499 | rec_len = le16_to_cpu(de->rec_len); | 518 | rec_len = ext2_rec_len_from_disk(de->rec_len); |
| 500 | if (!de->inode && rec_len >= reclen) | 519 | if (!de->inode && rec_len >= reclen) |
| 501 | goto got_it; | 520 | goto got_it; |
| 502 | if (rec_len >= name_len + reclen) | 521 | if (rec_len >= name_len + reclen) |
| @@ -518,8 +537,8 @@ got_it: | |||
| 518 | goto out_unlock; | 537 | goto out_unlock; |
| 519 | if (de->inode) { | 538 | if (de->inode) { |
| 520 | ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); | 539 | ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); |
| 521 | de1->rec_len = cpu_to_le16(rec_len - name_len); | 540 | de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len); |
| 522 | de->rec_len = cpu_to_le16(name_len); | 541 | de->rec_len = ext2_rec_len_to_disk(name_len); |
| 523 | de = de1; | 542 | de = de1; |
| 524 | } | 543 | } |
| 525 | de->name_len = namelen; | 544 | de->name_len = namelen; |
| @@ -550,7 +569,8 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | |||
| 550 | struct inode *inode = mapping->host; | 569 | struct inode *inode = mapping->host; |
| 551 | char *kaddr = page_address(page); | 570 | char *kaddr = page_address(page); |
| 552 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); | 571 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); |
| 553 | unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); | 572 | unsigned to = ((char *)dir - kaddr) + |
| 573 | ext2_rec_len_from_disk(dir->rec_len); | ||
| 554 | loff_t pos; | 574 | loff_t pos; |
| 555 | ext2_dirent * pde = NULL; | 575 | ext2_dirent * pde = NULL; |
| 556 | ext2_dirent * de = (ext2_dirent *) (kaddr + from); | 576 | ext2_dirent * de = (ext2_dirent *) (kaddr + from); |
| @@ -574,7 +594,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | |||
| 574 | &page, NULL); | 594 | &page, NULL); |
| 575 | BUG_ON(err); | 595 | BUG_ON(err); |
| 576 | if (pde) | 596 | if (pde) |
| 577 | pde->rec_len = cpu_to_le16(to - from); | 597 | pde->rec_len = ext2_rec_len_to_disk(to - from); |
| 578 | dir->inode = 0; | 598 | dir->inode = 0; |
| 579 | err = ext2_commit_chunk(page, pos, to - from); | 599 | err = ext2_commit_chunk(page, pos, to - from); |
| 580 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 600 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
| @@ -610,14 +630,14 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) | |||
| 610 | memset(kaddr, 0, chunk_size); | 630 | memset(kaddr, 0, chunk_size); |
| 611 | de = (struct ext2_dir_entry_2 *)kaddr; | 631 | de = (struct ext2_dir_entry_2 *)kaddr; |
| 612 | de->name_len = 1; | 632 | de->name_len = 1; |
| 613 | de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); | 633 | de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1)); |
| 614 | memcpy (de->name, ".\0\0", 4); | 634 | memcpy (de->name, ".\0\0", 4); |
| 615 | de->inode = cpu_to_le32(inode->i_ino); | 635 | de->inode = cpu_to_le32(inode->i_ino); |
| 616 | ext2_set_de_type (de, inode); | 636 | ext2_set_de_type (de, inode); |
| 617 | 637 | ||
| 618 | de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); | 638 | de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); |
| 619 | de->name_len = 2; | 639 | de->name_len = 2; |
| 620 | de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); | 640 | de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1)); |
| 621 | de->inode = cpu_to_le32(parent->i_ino); | 641 | de->inode = cpu_to_le32(parent->i_ino); |
| 622 | memcpy (de->name, "..\0", 4); | 642 | memcpy (de->name, "..\0", 4); |
| 623 | ext2_set_de_type (de, inode); | 643 | ext2_set_de_type (de, inode); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 77bd5f9262f9..154e25f13d77 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -311,13 +311,10 @@ static const struct super_operations ext2_sops = { | |||
| 311 | #endif | 311 | #endif |
| 312 | }; | 312 | }; |
| 313 | 313 | ||
| 314 | static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | 314 | static struct inode *ext2_nfs_get_inode(struct super_block *sb, |
| 315 | u64 ino, u32 generation) | ||
| 315 | { | 316 | { |
| 316 | __u32 *objp = vobjp; | ||
| 317 | unsigned long ino = objp[0]; | ||
| 318 | __u32 generation = objp[1]; | ||
| 319 | struct inode *inode; | 317 | struct inode *inode; |
| 320 | struct dentry *result; | ||
| 321 | 318 | ||
| 322 | if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) | 319 | if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) |
| 323 | return ERR_PTR(-ESTALE); | 320 | return ERR_PTR(-ESTALE); |
| @@ -338,15 +335,21 @@ static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | |||
| 338 | iput(inode); | 335 | iput(inode); |
| 339 | return ERR_PTR(-ESTALE); | 336 | return ERR_PTR(-ESTALE); |
| 340 | } | 337 | } |
| 341 | /* now to find a dentry. | 338 | return inode; |
| 342 | * If possible, get a well-connected one | 339 | } |
| 343 | */ | 340 | |
| 344 | result = d_alloc_anon(inode); | 341 | static struct dentry *ext2_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 345 | if (!result) { | 342 | int fh_len, int fh_type) |
| 346 | iput(inode); | 343 | { |
| 347 | return ERR_PTR(-ENOMEM); | 344 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
| 348 | } | 345 | ext2_nfs_get_inode); |
| 349 | return result; | 346 | } |
| 347 | |||
| 348 | static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 349 | int fh_len, int fh_type) | ||
| 350 | { | ||
| 351 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
| 352 | ext2_nfs_get_inode); | ||
| 350 | } | 353 | } |
| 351 | 354 | ||
| 352 | /* Yes, most of these are left as NULL!! | 355 | /* Yes, most of these are left as NULL!! |
| @@ -354,9 +357,10 @@ static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | |||
| 354 | * systems, but can be improved upon. | 357 | * systems, but can be improved upon. |
| 355 | * Currently only get_parent is required. | 358 | * Currently only get_parent is required. |
| 356 | */ | 359 | */ |
| 357 | static struct export_operations ext2_export_ops = { | 360 | static const struct export_operations ext2_export_ops = { |
| 361 | .fh_to_dentry = ext2_fh_to_dentry, | ||
| 362 | .fh_to_parent = ext2_fh_to_parent, | ||
| 358 | .get_parent = ext2_get_parent, | 363 | .get_parent = ext2_get_parent, |
| 359 | .get_dentry = ext2_get_dentry, | ||
| 360 | }; | 364 | }; |
| 361 | 365 | ||
| 362 | static unsigned long get_sb_block(void **data) | 366 | static unsigned long get_sb_block(void **data) |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 81868c0bc40e..de55da9e28ba 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -631,13 +631,10 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 631 | } | 631 | } |
| 632 | 632 | ||
| 633 | 633 | ||
| 634 | static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) | 634 | static struct inode *ext3_nfs_get_inode(struct super_block *sb, |
| 635 | u64 ino, u32 generation) | ||
| 635 | { | 636 | { |
| 636 | __u32 *objp = vobjp; | ||
| 637 | unsigned long ino = objp[0]; | ||
| 638 | __u32 generation = objp[1]; | ||
| 639 | struct inode *inode; | 637 | struct inode *inode; |
| 640 | struct dentry *result; | ||
| 641 | 638 | ||
| 642 | if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) | 639 | if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) |
| 643 | return ERR_PTR(-ESTALE); | 640 | return ERR_PTR(-ESTALE); |
| @@ -660,15 +657,22 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) | |||
| 660 | iput(inode); | 657 | iput(inode); |
| 661 | return ERR_PTR(-ESTALE); | 658 | return ERR_PTR(-ESTALE); |
| 662 | } | 659 | } |
| 663 | /* now to find a dentry. | 660 | |
| 664 | * If possible, get a well-connected one | 661 | return inode; |
| 665 | */ | 662 | } |
| 666 | result = d_alloc_anon(inode); | 663 | |
| 667 | if (!result) { | 664 | static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 668 | iput(inode); | 665 | int fh_len, int fh_type) |
| 669 | return ERR_PTR(-ENOMEM); | 666 | { |
| 670 | } | 667 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
| 671 | return result; | 668 | ext3_nfs_get_inode); |
| 669 | } | ||
| 670 | |||
| 671 | static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 672 | int fh_len, int fh_type) | ||
| 673 | { | ||
| 674 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
| 675 | ext3_nfs_get_inode); | ||
| 672 | } | 676 | } |
| 673 | 677 | ||
| 674 | #ifdef CONFIG_QUOTA | 678 | #ifdef CONFIG_QUOTA |
| @@ -737,9 +741,10 @@ static const struct super_operations ext3_sops = { | |||
| 737 | #endif | 741 | #endif |
| 738 | }; | 742 | }; |
| 739 | 743 | ||
| 740 | static struct export_operations ext3_export_ops = { | 744 | static const struct export_operations ext3_export_ops = { |
| 745 | .fh_to_dentry = ext3_fh_to_dentry, | ||
| 746 | .fh_to_parent = ext3_fh_to_parent, | ||
| 741 | .get_parent = ext3_get_parent, | 747 | .get_parent = ext3_get_parent, |
| 742 | .get_dentry = ext3_get_dentry, | ||
| 743 | }; | 748 | }; |
| 744 | 749 | ||
| 745 | enum { | 750 | enum { |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b11e9e2bcd01..8031dc0e24e5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -686,13 +686,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 686 | } | 686 | } |
| 687 | 687 | ||
| 688 | 688 | ||
| 689 | static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) | 689 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
| 690 | u64 ino, u32 generation) | ||
| 690 | { | 691 | { |
| 691 | __u32 *objp = vobjp; | ||
| 692 | unsigned long ino = objp[0]; | ||
| 693 | __u32 generation = objp[1]; | ||
| 694 | struct inode *inode; | 692 | struct inode *inode; |
| 695 | struct dentry *result; | ||
| 696 | 693 | ||
| 697 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) | 694 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) |
| 698 | return ERR_PTR(-ESTALE); | 695 | return ERR_PTR(-ESTALE); |
| @@ -715,15 +712,22 @@ static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) | |||
| 715 | iput(inode); | 712 | iput(inode); |
| 716 | return ERR_PTR(-ESTALE); | 713 | return ERR_PTR(-ESTALE); |
| 717 | } | 714 | } |
| 718 | /* now to find a dentry. | 715 | |
| 719 | * If possible, get a well-connected one | 716 | return inode; |
| 720 | */ | 717 | } |
| 721 | result = d_alloc_anon(inode); | 718 | |
| 722 | if (!result) { | 719 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 723 | iput(inode); | 720 | int fh_len, int fh_type) |
| 724 | return ERR_PTR(-ENOMEM); | 721 | { |
| 725 | } | 722 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
| 726 | return result; | 723 | ext4_nfs_get_inode); |
| 724 | } | ||
| 725 | |||
| 726 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 727 | int fh_len, int fh_type) | ||
| 728 | { | ||
| 729 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
| 730 | ext4_nfs_get_inode); | ||
| 727 | } | 731 | } |
| 728 | 732 | ||
| 729 | #ifdef CONFIG_QUOTA | 733 | #ifdef CONFIG_QUOTA |
| @@ -792,9 +796,10 @@ static const struct super_operations ext4_sops = { | |||
| 792 | #endif | 796 | #endif |
| 793 | }; | 797 | }; |
| 794 | 798 | ||
| 795 | static struct export_operations ext4_export_ops = { | 799 | static const struct export_operations ext4_export_ops = { |
| 800 | .fh_to_dentry = ext4_fh_to_dentry, | ||
| 801 | .fh_to_parent = ext4_fh_to_parent, | ||
| 796 | .get_parent = ext4_get_parent, | 802 | .get_parent = ext4_get_parent, |
| 797 | .get_dentry = ext4_get_dentry, | ||
| 798 | }; | 803 | }; |
| 799 | 804 | ||
| 800 | enum { | 805 | enum { |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c0c5e9c55b58..920a576e1c25 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -653,24 +653,15 @@ static const struct super_operations fat_sops = { | |||
| 653 | * of i_logstart is used to store the directory entry offset. | 653 | * of i_logstart is used to store the directory entry offset. |
| 654 | */ | 654 | */ |
| 655 | 655 | ||
| 656 | static struct dentry * | 656 | static struct dentry *fat_fh_to_dentry(struct super_block *sb, |
| 657 | fat_decode_fh(struct super_block *sb, __u32 *fh, int len, int fhtype, | 657 | struct fid *fid, int fh_len, int fh_type) |
| 658 | int (*acceptable)(void *context, struct dentry *de), | ||
| 659 | void *context) | ||
| 660 | { | ||
| 661 | if (fhtype != 3) | ||
| 662 | return ERR_PTR(-ESTALE); | ||
| 663 | if (len < 5) | ||
| 664 | return ERR_PTR(-ESTALE); | ||
| 665 | |||
| 666 | return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, context); | ||
| 667 | } | ||
| 668 | |||
| 669 | static struct dentry *fat_get_dentry(struct super_block *sb, void *inump) | ||
| 670 | { | 658 | { |
| 671 | struct inode *inode = NULL; | 659 | struct inode *inode = NULL; |
| 672 | struct dentry *result; | 660 | struct dentry *result; |
| 673 | __u32 *fh = inump; | 661 | u32 *fh = fid->raw; |
| 662 | |||
| 663 | if (fh_len < 5 || fh_type != 3) | ||
| 664 | return NULL; | ||
| 674 | 665 | ||
| 675 | inode = iget(sb, fh[0]); | 666 | inode = iget(sb, fh[0]); |
| 676 | if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { | 667 | if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { |
| @@ -783,10 +774,9 @@ out: | |||
| 783 | return parent; | 774 | return parent; |
| 784 | } | 775 | } |
| 785 | 776 | ||
| 786 | static struct export_operations fat_export_ops = { | 777 | static const struct export_operations fat_export_ops = { |
| 787 | .decode_fh = fat_decode_fh, | ||
| 788 | .encode_fh = fat_encode_fh, | 778 | .encode_fh = fat_encode_fh, |
| 789 | .get_dentry = fat_get_dentry, | 779 | .fh_to_dentry = fat_fh_to_dentry, |
| 790 | .get_parent = fat_get_parent, | 780 | .get_parent = fat_get_parent, |
| 791 | }; | 781 | }; |
| 792 | 782 | ||
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index e2d1347796a9..b9da62348a87 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
| @@ -31,40 +31,6 @@ | |||
| 31 | #define GFS2_LARGE_FH_SIZE 8 | 31 | #define GFS2_LARGE_FH_SIZE 8 |
| 32 | #define GFS2_OLD_FH_SIZE 10 | 32 | #define GFS2_OLD_FH_SIZE 10 |
| 33 | 33 | ||
| 34 | static struct dentry *gfs2_decode_fh(struct super_block *sb, | ||
| 35 | __u32 *p, | ||
| 36 | int fh_len, | ||
| 37 | int fh_type, | ||
| 38 | int (*acceptable)(void *context, | ||
| 39 | struct dentry *dentry), | ||
| 40 | void *context) | ||
| 41 | { | ||
| 42 | __be32 *fh = (__force __be32 *)p; | ||
| 43 | struct gfs2_inum_host inum, parent; | ||
| 44 | |||
| 45 | memset(&parent, 0, sizeof(struct gfs2_inum)); | ||
| 46 | |||
| 47 | switch (fh_len) { | ||
| 48 | case GFS2_LARGE_FH_SIZE: | ||
| 49 | case GFS2_OLD_FH_SIZE: | ||
| 50 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; | ||
| 51 | parent.no_formal_ino |= be32_to_cpu(fh[5]); | ||
| 52 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; | ||
| 53 | parent.no_addr |= be32_to_cpu(fh[7]); | ||
| 54 | case GFS2_SMALL_FH_SIZE: | ||
| 55 | inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; | ||
| 56 | inum.no_formal_ino |= be32_to_cpu(fh[1]); | ||
| 57 | inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; | ||
| 58 | inum.no_addr |= be32_to_cpu(fh[3]); | ||
| 59 | break; | ||
| 60 | default: | ||
| 61 | return NULL; | ||
| 62 | } | ||
| 63 | |||
| 64 | return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent, | ||
| 65 | acceptable, context); | ||
| 66 | } | ||
| 67 | |||
| 68 | static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, | 34 | static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, |
| 69 | int connectable) | 35 | int connectable) |
| 70 | { | 36 | { |
| @@ -189,10 +155,10 @@ static struct dentry *gfs2_get_parent(struct dentry *child) | |||
| 189 | return dentry; | 155 | return dentry; |
| 190 | } | 156 | } |
| 191 | 157 | ||
| 192 | static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) | 158 | static struct dentry *gfs2_get_dentry(struct super_block *sb, |
| 159 | struct gfs2_inum_host *inum) | ||
| 193 | { | 160 | { |
| 194 | struct gfs2_sbd *sdp = sb->s_fs_info; | 161 | struct gfs2_sbd *sdp = sb->s_fs_info; |
| 195 | struct gfs2_inum_host *inum = inum_obj; | ||
| 196 | struct gfs2_holder i_gh, ri_gh, rgd_gh; | 162 | struct gfs2_holder i_gh, ri_gh, rgd_gh; |
| 197 | struct gfs2_rgrpd *rgd; | 163 | struct gfs2_rgrpd *rgd; |
| 198 | struct inode *inode; | 164 | struct inode *inode; |
| @@ -289,11 +255,50 @@ fail: | |||
| 289 | return ERR_PTR(error); | 255 | return ERR_PTR(error); |
| 290 | } | 256 | } |
| 291 | 257 | ||
| 292 | struct export_operations gfs2_export_ops = { | 258 | static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 293 | .decode_fh = gfs2_decode_fh, | 259 | int fh_len, int fh_type) |
| 260 | { | ||
| 261 | struct gfs2_inum_host this; | ||
| 262 | __be32 *fh = (__force __be32 *)fid->raw; | ||
| 263 | |||
| 264 | switch (fh_type) { | ||
| 265 | case GFS2_SMALL_FH_SIZE: | ||
| 266 | case GFS2_LARGE_FH_SIZE: | ||
| 267 | case GFS2_OLD_FH_SIZE: | ||
| 268 | this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; | ||
| 269 | this.no_formal_ino |= be32_to_cpu(fh[1]); | ||
| 270 | this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; | ||
| 271 | this.no_addr |= be32_to_cpu(fh[3]); | ||
| 272 | return gfs2_get_dentry(sb, &this); | ||
| 273 | default: | ||
| 274 | return NULL; | ||
| 275 | } | ||
| 276 | } | ||
| 277 | |||
| 278 | static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 279 | int fh_len, int fh_type) | ||
| 280 | { | ||
| 281 | struct gfs2_inum_host parent; | ||
| 282 | __be32 *fh = (__force __be32 *)fid->raw; | ||
| 283 | |||
| 284 | switch (fh_type) { | ||
| 285 | case GFS2_LARGE_FH_SIZE: | ||
| 286 | case GFS2_OLD_FH_SIZE: | ||
| 287 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; | ||
| 288 | parent.no_formal_ino |= be32_to_cpu(fh[5]); | ||
| 289 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; | ||
| 290 | parent.no_addr |= be32_to_cpu(fh[7]); | ||
| 291 | return gfs2_get_dentry(sb, &parent); | ||
| 292 | default: | ||
| 293 | return NULL; | ||
| 294 | } | ||
| 295 | } | ||
| 296 | |||
| 297 | const struct export_operations gfs2_export_ops = { | ||
| 294 | .encode_fh = gfs2_encode_fh, | 298 | .encode_fh = gfs2_encode_fh, |
| 299 | .fh_to_dentry = gfs2_fh_to_dentry, | ||
| 300 | .fh_to_parent = gfs2_fh_to_parent, | ||
| 295 | .get_name = gfs2_get_name, | 301 | .get_name = gfs2_get_name, |
| 296 | .get_parent = gfs2_get_parent, | 302 | .get_parent = gfs2_get_parent, |
| 297 | .get_dentry = gfs2_get_dentry, | ||
| 298 | }; | 303 | }; |
| 299 | 304 | ||
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h index 407029b3b2b3..da8490511836 100644 --- a/fs/gfs2/ops_fstype.h +++ b/fs/gfs2/ops_fstype.h | |||
| @@ -14,6 +14,6 @@ | |||
| 14 | 14 | ||
| 15 | extern struct file_system_type gfs2_fs_type; | 15 | extern struct file_system_type gfs2_fs_type; |
| 16 | extern struct file_system_type gfs2meta_fs_type; | 16 | extern struct file_system_type gfs2meta_fs_type; |
| 17 | extern struct export_operations gfs2_export_ops; | 17 | extern const struct export_operations gfs2_export_ops; |
| 18 | 18 | ||
| 19 | #endif /* __OPS_FSTYPE_DOT_H__ */ | 19 | #endif /* __OPS_FSTYPE_DOT_H__ */ |
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 4af856a7fda7..29f9753ae5e5 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
| @@ -42,16 +42,6 @@ isofs_export_iget(struct super_block *sb, | |||
| 42 | return result; | 42 | return result; |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | static struct dentry * | ||
| 46 | isofs_export_get_dentry(struct super_block *sb, void *vobjp) | ||
| 47 | { | ||
| 48 | __u32 *objp = vobjp; | ||
| 49 | unsigned long block = objp[0]; | ||
| 50 | unsigned long offset = objp[1]; | ||
| 51 | __u32 generation = objp[2]; | ||
| 52 | return isofs_export_iget(sb, block, offset, generation); | ||
| 53 | } | ||
| 54 | |||
| 55 | /* This function is surprisingly simple. The trick is understanding | 45 | /* This function is surprisingly simple. The trick is understanding |
| 56 | * that "child" is always a directory. So, to find its parent, you | 46 | * that "child" is always a directory. So, to find its parent, you |
| 57 | * simply need to find its ".." entry, normalize its block and offset, | 47 | * simply need to find its ".." entry, normalize its block and offset, |
| @@ -182,43 +172,44 @@ isofs_export_encode_fh(struct dentry *dentry, | |||
| 182 | return type; | 172 | return type; |
| 183 | } | 173 | } |
| 184 | 174 | ||
| 175 | struct isofs_fid { | ||
| 176 | u32 block; | ||
| 177 | u16 offset; | ||
| 178 | u16 parent_offset; | ||
| 179 | u32 generation; | ||
| 180 | u32 parent_block; | ||
| 181 | u32 parent_generation; | ||
| 182 | }; | ||
| 185 | 183 | ||
| 186 | static struct dentry * | 184 | static struct dentry *isofs_fh_to_dentry(struct super_block *sb, |
| 187 | isofs_export_decode_fh(struct super_block *sb, | 185 | struct fid *fid, int fh_len, int fh_type) |
| 188 | __u32 *fh32, | ||
| 189 | int fh_len, | ||
| 190 | int fileid_type, | ||
| 191 | int (*acceptable)(void *context, struct dentry *de), | ||
| 192 | void *context) | ||
| 193 | { | 186 | { |
| 194 | __u16 *fh16 = (__u16*)fh32; | 187 | struct isofs_fid *ifid = (struct isofs_fid *)fid; |
| 195 | __u32 child[3]; /* The child is what triggered all this. */ | ||
| 196 | __u32 parent[3]; /* The parent is just along for the ride. */ | ||
| 197 | 188 | ||
| 198 | if (fh_len < 3 || fileid_type > 2) | 189 | if (fh_len < 3 || fh_type > 2) |
| 199 | return NULL; | 190 | return NULL; |
| 200 | 191 | ||
| 201 | child[0] = fh32[0]; | 192 | return isofs_export_iget(sb, ifid->block, ifid->offset, |
| 202 | child[1] = fh16[2]; /* fh16 [sic] */ | 193 | ifid->generation); |
| 203 | child[2] = fh32[2]; | ||
| 204 | |||
| 205 | parent[0] = 0; | ||
| 206 | parent[1] = 0; | ||
| 207 | parent[2] = 0; | ||
| 208 | if (fileid_type == 2) { | ||
| 209 | if (fh_len > 2) parent[0] = fh32[3]; | ||
| 210 | parent[1] = fh16[3]; /* fh16 [sic] */ | ||
| 211 | if (fh_len > 4) parent[2] = fh32[4]; | ||
| 212 | } | ||
| 213 | |||
| 214 | return sb->s_export_op->find_exported_dentry(sb, child, parent, | ||
| 215 | acceptable, context); | ||
| 216 | } | 194 | } |
| 217 | 195 | ||
| 196 | static struct dentry *isofs_fh_to_parent(struct super_block *sb, | ||
| 197 | struct fid *fid, int fh_len, int fh_type) | ||
| 198 | { | ||
| 199 | struct isofs_fid *ifid = (struct isofs_fid *)fid; | ||
| 200 | |||
| 201 | if (fh_type != 2) | ||
| 202 | return NULL; | ||
| 203 | |||
| 204 | return isofs_export_iget(sb, | ||
| 205 | fh_len > 2 ? ifid->parent_block : 0, | ||
| 206 | ifid->parent_offset, | ||
| 207 | fh_len > 4 ? ifid->parent_generation : 0); | ||
| 208 | } | ||
| 218 | 209 | ||
| 219 | struct export_operations isofs_export_ops = { | 210 | const struct export_operations isofs_export_ops = { |
| 220 | .decode_fh = isofs_export_decode_fh, | ||
| 221 | .encode_fh = isofs_export_encode_fh, | 211 | .encode_fh = isofs_export_encode_fh, |
| 222 | .get_dentry = isofs_export_get_dentry, | 212 | .fh_to_dentry = isofs_fh_to_dentry, |
| 213 | .fh_to_parent = isofs_fh_to_parent, | ||
| 223 | .get_parent = isofs_export_get_parent, | 214 | .get_parent = isofs_export_get_parent, |
| 224 | }; | 215 | }; |
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index a07e67b1ea7f..f3213f9f89af 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
| @@ -178,4 +178,4 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de, | |||
| 178 | extern const struct inode_operations isofs_dir_inode_operations; | 178 | extern const struct inode_operations isofs_dir_inode_operations; |
| 179 | extern const struct file_operations isofs_dir_operations; | 179 | extern const struct file_operations isofs_dir_operations; |
| 180 | extern const struct address_space_operations isofs_symlink_aops; | 180 | extern const struct address_space_operations isofs_symlink_aops; |
| 181 | extern struct export_operations isofs_export_ops; | 181 | extern const struct export_operations isofs_export_ops; |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index f0ec72b263f1..8e2cf2cde185 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #ifndef _H_JFS_INODE | 18 | #ifndef _H_JFS_INODE |
| 19 | #define _H_JFS_INODE | 19 | #define _H_JFS_INODE |
| 20 | 20 | ||
| 21 | struct fid; | ||
| 22 | |||
| 21 | extern struct inode *ialloc(struct inode *, umode_t); | 23 | extern struct inode *ialloc(struct inode *, umode_t); |
| 22 | extern int jfs_fsync(struct file *, struct dentry *, int); | 24 | extern int jfs_fsync(struct file *, struct dentry *, int); |
| 23 | extern int jfs_ioctl(struct inode *, struct file *, | 25 | extern int jfs_ioctl(struct inode *, struct file *, |
| @@ -32,7 +34,10 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); | |||
| 32 | extern void jfs_free_zero_link(struct inode *); | 34 | extern void jfs_free_zero_link(struct inode *); |
| 33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); | 35 | extern struct dentry *jfs_get_parent(struct dentry *dentry); |
| 34 | extern void jfs_get_inode_flags(struct jfs_inode_info *); | 36 | extern void jfs_get_inode_flags(struct jfs_inode_info *); |
| 35 | extern struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp); | 37 | extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 38 | int fh_len, int fh_type); | ||
| 39 | extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 40 | int fh_len, int fh_type); | ||
| 36 | extern void jfs_set_inode_flags(struct inode *); | 41 | extern void jfs_set_inode_flags(struct inode *); |
| 37 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 42 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
| 38 | 43 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 932797ba433b..4e0a8493cef6 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
| 21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
| 22 | #include <linux/quotaops.h> | 22 | #include <linux/quotaops.h> |
| 23 | #include <linux/exportfs.h> | ||
| 23 | #include "jfs_incore.h" | 24 | #include "jfs_incore.h" |
| 24 | #include "jfs_superblock.h" | 25 | #include "jfs_superblock.h" |
| 25 | #include "jfs_inode.h" | 26 | #include "jfs_inode.h" |
| @@ -1477,13 +1478,10 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc | |||
| 1477 | return dentry; | 1478 | return dentry; |
| 1478 | } | 1479 | } |
| 1479 | 1480 | ||
| 1480 | struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) | 1481 | static struct inode *jfs_nfs_get_inode(struct super_block *sb, |
| 1482 | u64 ino, u32 generation) | ||
| 1481 | { | 1483 | { |
| 1482 | __u32 *objp = vobjp; | ||
| 1483 | unsigned long ino = objp[0]; | ||
| 1484 | __u32 generation = objp[1]; | ||
| 1485 | struct inode *inode; | 1484 | struct inode *inode; |
| 1486 | struct dentry *result; | ||
| 1487 | 1485 | ||
| 1488 | if (ino == 0) | 1486 | if (ino == 0) |
| 1489 | return ERR_PTR(-ESTALE); | 1487 | return ERR_PTR(-ESTALE); |
| @@ -1493,20 +1491,25 @@ struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) | |||
| 1493 | 1491 | ||
| 1494 | if (is_bad_inode(inode) || | 1492 | if (is_bad_inode(inode) || |
| 1495 | (generation && inode->i_generation != generation)) { | 1493 | (generation && inode->i_generation != generation)) { |
| 1496 | result = ERR_PTR(-ESTALE); | 1494 | iput(inode); |
| 1497 | goto out_iput; | 1495 | return ERR_PTR(-ESTALE); |
| 1498 | } | 1496 | } |
| 1499 | 1497 | ||
| 1500 | result = d_alloc_anon(inode); | 1498 | return inode; |
| 1501 | if (!result) { | 1499 | } |
| 1502 | result = ERR_PTR(-ENOMEM); | ||
| 1503 | goto out_iput; | ||
| 1504 | } | ||
| 1505 | return result; | ||
| 1506 | 1500 | ||
| 1507 | out_iput: | 1501 | struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 1508 | iput(inode); | 1502 | int fh_len, int fh_type) |
| 1509 | return result; | 1503 | { |
| 1504 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
| 1505 | jfs_nfs_get_inode); | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 1509 | int fh_len, int fh_type) | ||
| 1510 | { | ||
| 1511 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
| 1512 | jfs_nfs_get_inode); | ||
| 1510 | } | 1513 | } |
| 1511 | 1514 | ||
| 1512 | struct dentry *jfs_get_parent(struct dentry *dentry) | 1515 | struct dentry *jfs_get_parent(struct dentry *dentry) |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index cff60c171943..314bb4ff1ba8 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -48,7 +48,7 @@ MODULE_LICENSE("GPL"); | |||
| 48 | static struct kmem_cache * jfs_inode_cachep; | 48 | static struct kmem_cache * jfs_inode_cachep; |
| 49 | 49 | ||
| 50 | static const struct super_operations jfs_super_operations; | 50 | static const struct super_operations jfs_super_operations; |
| 51 | static struct export_operations jfs_export_operations; | 51 | static const struct export_operations jfs_export_operations; |
| 52 | static struct file_system_type jfs_fs_type; | 52 | static struct file_system_type jfs_fs_type; |
| 53 | 53 | ||
| 54 | #define MAX_COMMIT_THREADS 64 | 54 | #define MAX_COMMIT_THREADS 64 |
| @@ -737,8 +737,9 @@ static const struct super_operations jfs_super_operations = { | |||
| 737 | #endif | 737 | #endif |
| 738 | }; | 738 | }; |
| 739 | 739 | ||
| 740 | static struct export_operations jfs_export_operations = { | 740 | static const struct export_operations jfs_export_operations = { |
| 741 | .get_dentry = jfs_get_dentry, | 741 | .fh_to_dentry = jfs_fh_to_dentry, |
| 742 | .fh_to_parent = jfs_fh_to_parent, | ||
| 742 | .get_parent = jfs_get_parent, | 743 | .get_parent = jfs_get_parent, |
| 743 | }; | 744 | }; |
| 744 | 745 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index ae51481e45e5..6e68b700958d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <linux/mount.h> | 8 | #include <linux/mount.h> |
| 9 | #include <linux/vfs.h> | 9 | #include <linux/vfs.h> |
| 10 | #include <linux/mutex.h> | 10 | #include <linux/mutex.h> |
| 11 | #include <linux/exportfs.h> | ||
| 11 | 12 | ||
| 12 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
| 13 | 14 | ||
| @@ -678,6 +679,93 @@ out: | |||
| 678 | return ret; | 679 | return ret; |
| 679 | } | 680 | } |
| 680 | 681 | ||
| 682 | /* | ||
| 683 | * This is what d_alloc_anon should have been. Once the exportfs | ||
| 684 | * argument transition has been finished I will update d_alloc_anon | ||
| 685 | * to this prototype and this wrapper will go away. --hch | ||
| 686 | */ | ||
| 687 | static struct dentry *exportfs_d_alloc(struct inode *inode) | ||
| 688 | { | ||
| 689 | struct dentry *dentry; | ||
| 690 | |||
| 691 | if (!inode) | ||
| 692 | return NULL; | ||
| 693 | if (IS_ERR(inode)) | ||
| 694 | return ERR_PTR(PTR_ERR(inode)); | ||
| 695 | |||
| 696 | dentry = d_alloc_anon(inode); | ||
| 697 | if (!dentry) { | ||
| 698 | iput(inode); | ||
| 699 | dentry = ERR_PTR(-ENOMEM); | ||
| 700 | } | ||
| 701 | return dentry; | ||
| 702 | } | ||
| 703 | |||
| 704 | /** | ||
| 705 | * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation | ||
| 706 | * @sb: filesystem to do the file handle conversion on | ||
| 707 | * @fid: file handle to convert | ||
| 708 | * @fh_len: length of the file handle in bytes | ||
| 709 | * @fh_type: type of file handle | ||
| 710 | * @get_inode: filesystem callback to retrieve inode | ||
| 711 | * | ||
| 712 | * This function decodes @fid as long as it has one of the well-known | ||
| 713 | * Linux filehandle types and calls @get_inode on it to retrieve the | ||
| 714 | * inode for the object specified in the file handle. | ||
| 715 | */ | ||
| 716 | struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
| 717 | int fh_len, int fh_type, struct inode *(*get_inode) | ||
| 718 | (struct super_block *sb, u64 ino, u32 gen)) | ||
| 719 | { | ||
| 720 | struct inode *inode = NULL; | ||
| 721 | |||
| 722 | if (fh_len < 2) | ||
| 723 | return NULL; | ||
| 724 | |||
| 725 | switch (fh_type) { | ||
| 726 | case FILEID_INO32_GEN: | ||
| 727 | case FILEID_INO32_GEN_PARENT: | ||
| 728 | inode = get_inode(sb, fid->i32.ino, fid->i32.gen); | ||
| 729 | break; | ||
| 730 | } | ||
| 731 | |||
| 732 | return exportfs_d_alloc(inode); | ||
| 733 | } | ||
| 734 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); | ||
| 735 | |||
| 736 | /** | ||
| 737 | * generic_fh_to_dentry - generic helper for the fh_to_parent export operation | ||
| 738 | * @sb: filesystem to do the file handle conversion on | ||
| 739 | * @fid: file handle to convert | ||
| 740 | * @fh_len: length of the file handle in bytes | ||
| 741 | * @fh_type: type of file handle | ||
| 742 | * @get_inode: filesystem callback to retrieve inode | ||
| 743 | * | ||
| 744 | * This function decodes @fid as long as it has one of the well-known | ||
| 745 | * Linux filehandle types and calls @get_inode on it to retrieve the | ||
| 746 | * inode for the _parent_ object specified in the file handle if it | ||
| 747 | * is specified in the file handle, or NULL otherwise. | ||
| 748 | */ | ||
| 749 | struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 750 | int fh_len, int fh_type, struct inode *(*get_inode) | ||
| 751 | (struct super_block *sb, u64 ino, u32 gen)) | ||
| 752 | { | ||
| 753 | struct inode *inode = NULL; | ||
| 754 | |||
| 755 | if (fh_len <= 2) | ||
| 756 | return NULL; | ||
| 757 | |||
| 758 | switch (fh_type) { | ||
| 759 | case FILEID_INO32_GEN_PARENT: | ||
| 760 | inode = get_inode(sb, fid->i32.parent_ino, | ||
| 761 | (fh_len > 3 ? fid->i32.parent_gen : 0)); | ||
| 762 | break; | ||
| 763 | } | ||
| 764 | |||
| 765 | return exportfs_d_alloc(inode); | ||
| 766 | } | ||
| 767 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); | ||
| 768 | |||
| 681 | EXPORT_SYMBOL(dcache_dir_close); | 769 | EXPORT_SYMBOL(dcache_dir_close); |
| 682 | EXPORT_SYMBOL(dcache_dir_lseek); | 770 | EXPORT_SYMBOL(dcache_dir_lseek); |
| 683 | EXPORT_SYMBOL(dcache_dir_open); | 771 | EXPORT_SYMBOL(dcache_dir_open); |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 04b266729802..66d0aeb32a47 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
| @@ -386,15 +386,13 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid) | |||
| 386 | dprintk("exp_export: export of non-dev fs without fsid\n"); | 386 | dprintk("exp_export: export of non-dev fs without fsid\n"); |
| 387 | return -EINVAL; | 387 | return -EINVAL; |
| 388 | } | 388 | } |
| 389 | if (!inode->i_sb->s_export_op) { | 389 | |
| 390 | if (!inode->i_sb->s_export_op || | ||
| 391 | !inode->i_sb->s_export_op->fh_to_dentry) { | ||
| 390 | dprintk("exp_export: export of invalid fs type.\n"); | 392 | dprintk("exp_export: export of invalid fs type.\n"); |
| 391 | return -EINVAL; | 393 | return -EINVAL; |
| 392 | } | 394 | } |
| 393 | 395 | ||
| 394 | /* Ok, we can export it */; | ||
| 395 | if (!inode->i_sb->s_export_op->find_exported_dentry) | ||
| 396 | inode->i_sb->s_export_op->find_exported_dentry = | ||
| 397 | find_exported_dentry; | ||
| 398 | return 0; | 396 | return 0; |
| 399 | 397 | ||
| 400 | } | 398 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 7011d62acfc8..4f712e970584 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
| @@ -115,8 +115,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 115 | dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); | 115 | dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); |
| 116 | 116 | ||
| 117 | if (!fhp->fh_dentry) { | 117 | if (!fhp->fh_dentry) { |
| 118 | __u32 *datap=NULL; | 118 | struct fid *fid = NULL, sfid; |
| 119 | __u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */ | ||
| 120 | int fileid_type; | 119 | int fileid_type; |
| 121 | int data_left = fh->fh_size/4; | 120 | int data_left = fh->fh_size/4; |
| 122 | 121 | ||
| @@ -128,7 +127,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 128 | 127 | ||
| 129 | if (fh->fh_version == 1) { | 128 | if (fh->fh_version == 1) { |
| 130 | int len; | 129 | int len; |
| 131 | datap = fh->fh_auth; | ||
| 132 | if (--data_left<0) goto out; | 130 | if (--data_left<0) goto out; |
| 133 | switch (fh->fh_auth_type) { | 131 | switch (fh->fh_auth_type) { |
| 134 | case 0: break; | 132 | case 0: break; |
| @@ -144,9 +142,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 144 | fh->fh_fsid[1] = fh->fh_fsid[2]; | 142 | fh->fh_fsid[1] = fh->fh_fsid[2]; |
| 145 | } | 143 | } |
| 146 | if ((data_left -= len)<0) goto out; | 144 | if ((data_left -= len)<0) goto out; |
| 147 | exp = rqst_exp_find(rqstp, fh->fh_fsid_type, datap); | 145 | exp = rqst_exp_find(rqstp, fh->fh_fsid_type, |
| 148 | datap += len; | 146 | fh->fh_auth); |
| 147 | fid = (struct fid *)(fh->fh_auth + len); | ||
| 149 | } else { | 148 | } else { |
| 149 | __u32 tfh[2]; | ||
| 150 | dev_t xdev; | 150 | dev_t xdev; |
| 151 | ino_t xino; | 151 | ino_t xino; |
| 152 | if (fh->fh_size != NFS_FHSIZE) | 152 | if (fh->fh_size != NFS_FHSIZE) |
| @@ -190,22 +190,22 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 190 | error = nfserr_badhandle; | 190 | error = nfserr_badhandle; |
| 191 | 191 | ||
| 192 | if (fh->fh_version != 1) { | 192 | if (fh->fh_version != 1) { |
| 193 | tfh[0] = fh->ofh_ino; | 193 | sfid.i32.ino = fh->ofh_ino; |
| 194 | tfh[1] = fh->ofh_generation; | 194 | sfid.i32.gen = fh->ofh_generation; |
| 195 | tfh[2] = fh->ofh_dirino; | 195 | sfid.i32.parent_ino = fh->ofh_dirino; |
| 196 | datap = tfh; | 196 | fid = &sfid; |
| 197 | data_left = 3; | 197 | data_left = 3; |
| 198 | if (fh->ofh_dirino == 0) | 198 | if (fh->ofh_dirino == 0) |
| 199 | fileid_type = 1; | 199 | fileid_type = FILEID_INO32_GEN; |
| 200 | else | 200 | else |
| 201 | fileid_type = 2; | 201 | fileid_type = FILEID_INO32_GEN_PARENT; |
| 202 | } else | 202 | } else |
| 203 | fileid_type = fh->fh_fileid_type; | 203 | fileid_type = fh->fh_fileid_type; |
| 204 | 204 | ||
| 205 | if (fileid_type == 0) | 205 | if (fileid_type == FILEID_ROOT) |
| 206 | dentry = dget(exp->ex_dentry); | 206 | dentry = dget(exp->ex_dentry); |
| 207 | else { | 207 | else { |
| 208 | dentry = exportfs_decode_fh(exp->ex_mnt, datap, | 208 | dentry = exportfs_decode_fh(exp->ex_mnt, fid, |
| 209 | data_left, fileid_type, | 209 | data_left, fileid_type, |
| 210 | nfsd_acceptable, exp); | 210 | nfsd_acceptable, exp); |
| 211 | } | 211 | } |
| @@ -286,16 +286,21 @@ out: | |||
| 286 | * an inode. In this case a call to fh_update should be made | 286 | * an inode. In this case a call to fh_update should be made |
| 287 | * before the fh goes out on the wire ... | 287 | * before the fh goes out on the wire ... |
| 288 | */ | 288 | */ |
| 289 | static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, | 289 | static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, |
| 290 | __u32 *datap, int *maxsize) | 290 | struct dentry *dentry) |
| 291 | { | 291 | { |
| 292 | if (dentry == exp->ex_dentry) { | 292 | if (dentry != exp->ex_dentry) { |
| 293 | *maxsize = 0; | 293 | struct fid *fid = (struct fid *) |
| 294 | return 0; | 294 | (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); |
| 295 | } | 295 | int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; |
| 296 | int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); | ||
| 296 | 297 | ||
| 297 | return exportfs_encode_fh(dentry, datap, maxsize, | 298 | fhp->fh_handle.fh_fileid_type = |
| 298 | !(exp->ex_flags & NFSEXP_NOSUBTREECHECK)); | 299 | exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck); |
| 300 | fhp->fh_handle.fh_size += maxsize * 4; | ||
| 301 | } else { | ||
| 302 | fhp->fh_handle.fh_fileid_type = FILEID_ROOT; | ||
| 303 | } | ||
| 299 | } | 304 | } |
| 300 | 305 | ||
| 301 | /* | 306 | /* |
| @@ -457,12 +462,8 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
| 457 | datap += len/4; | 462 | datap += len/4; |
| 458 | fhp->fh_handle.fh_size = 4 + len; | 463 | fhp->fh_handle.fh_size = 4 + len; |
| 459 | 464 | ||
| 460 | if (inode) { | 465 | if (inode) |
| 461 | int size = (fhp->fh_maxsize-len-4)/4; | 466 | _fh_update(fhp, exp, dentry); |
| 462 | fhp->fh_handle.fh_fileid_type = | ||
| 463 | _fh_update(dentry, exp, datap, &size); | ||
| 464 | fhp->fh_handle.fh_size += size*4; | ||
| 465 | } | ||
| 466 | if (fhp->fh_handle.fh_fileid_type == 255) | 467 | if (fhp->fh_handle.fh_fileid_type == 255) |
| 467 | return nfserr_opnotsupp; | 468 | return nfserr_opnotsupp; |
| 468 | } | 469 | } |
| @@ -479,7 +480,6 @@ __be32 | |||
| 479 | fh_update(struct svc_fh *fhp) | 480 | fh_update(struct svc_fh *fhp) |
| 480 | { | 481 | { |
| 481 | struct dentry *dentry; | 482 | struct dentry *dentry; |
| 482 | __u32 *datap; | ||
| 483 | 483 | ||
| 484 | if (!fhp->fh_dentry) | 484 | if (!fhp->fh_dentry) |
| 485 | goto out_bad; | 485 | goto out_bad; |
| @@ -490,15 +490,10 @@ fh_update(struct svc_fh *fhp) | |||
| 490 | if (fhp->fh_handle.fh_version != 1) { | 490 | if (fhp->fh_handle.fh_version != 1) { |
| 491 | _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); | 491 | _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); |
| 492 | } else { | 492 | } else { |
| 493 | int size; | 493 | if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) |
| 494 | if (fhp->fh_handle.fh_fileid_type != 0) | ||
| 495 | goto out; | 494 | goto out; |
| 496 | datap = fhp->fh_handle.fh_auth+ | 495 | |
| 497 | fhp->fh_handle.fh_size/4 -1; | 496 | _fh_update(fhp, fhp->fh_export, dentry); |
| 498 | size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; | ||
| 499 | fhp->fh_handle.fh_fileid_type = | ||
| 500 | _fh_update(dentry, fhp->fh_export, datap, &size); | ||
| 501 | fhp->fh_handle.fh_size += size*4; | ||
| 502 | if (fhp->fh_handle.fh_fileid_type == 255) | 497 | if (fhp->fh_handle.fh_fileid_type == 255) |
| 503 | return nfserr_opnotsupp; | 498 | return nfserr_opnotsupp; |
| 504 | } | 499 | } |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e93c6142b23c..e1781c8b1650 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
| @@ -450,58 +450,40 @@ try_next: | |||
| 450 | return parent_dent; | 450 | return parent_dent; |
| 451 | } | 451 | } |
| 452 | 452 | ||
| 453 | /** | 453 | static struct inode *ntfs_nfs_get_inode(struct super_block *sb, |
| 454 | * ntfs_get_dentry - find a dentry for the inode from a file handle sub-fragment | 454 | u64 ino, u32 generation) |
| 455 | * @sb: super block identifying the mounted ntfs volume | ||
| 456 | * @fh: the file handle sub-fragment | ||
| 457 | * | ||
| 458 | * Find a dentry for the inode given a file handle sub-fragment. This function | ||
| 459 | * is called from fs/exportfs/expfs.c::find_exported_dentry() which in turn is | ||
| 460 | * called from the default ->decode_fh() which is export_decode_fh() in the | ||
| 461 | * same file. The code is closely based on the default ->get_dentry() helper | ||
| 462 | * fs/exportfs/expfs.c::get_object(). | ||
| 463 | * | ||
| 464 | * The @fh contains two 32-bit unsigned values, the first one is the inode | ||
| 465 | * number and the second one is the inode generation. | ||
| 466 | * | ||
| 467 | * Return the dentry on success or the error code on error (IS_ERR() is true). | ||
| 468 | */ | ||
| 469 | static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) | ||
| 470 | { | 455 | { |
| 471 | struct inode *vi; | 456 | struct inode *inode; |
| 472 | struct dentry *dent; | ||
| 473 | unsigned long ino = ((u32 *)fh)[0]; | ||
| 474 | u32 gen = ((u32 *)fh)[1]; | ||
| 475 | 457 | ||
| 476 | ntfs_debug("Entering for inode 0x%lx, generation 0x%x.", ino, gen); | 458 | inode = ntfs_iget(sb, ino); |
| 477 | vi = ntfs_iget(sb, ino); | 459 | if (!IS_ERR(inode)) { |
| 478 | if (IS_ERR(vi)) { | 460 | if (is_bad_inode(inode) || inode->i_generation != generation) { |
| 479 | ntfs_error(sb, "Failed to get inode 0x%lx.", ino); | 461 | iput(inode); |
| 480 | return (struct dentry *)vi; | 462 | inode = ERR_PTR(-ESTALE); |
| 481 | } | 463 | } |
| 482 | if (unlikely(is_bad_inode(vi) || vi->i_generation != gen)) { | ||
| 483 | /* We didn't find the right inode. */ | ||
| 484 | ntfs_error(sb, "Inode 0x%lx, bad count: %d %d or version 0x%x " | ||
| 485 | "0x%x.", vi->i_ino, vi->i_nlink, | ||
| 486 | atomic_read(&vi->i_count), vi->i_generation, | ||
| 487 | gen); | ||
| 488 | iput(vi); | ||
| 489 | return ERR_PTR(-ESTALE); | ||
| 490 | } | ||
| 491 | /* Now find a dentry. If possible, get a well-connected one. */ | ||
| 492 | dent = d_alloc_anon(vi); | ||
| 493 | if (unlikely(!dent)) { | ||
| 494 | iput(vi); | ||
| 495 | return ERR_PTR(-ENOMEM); | ||
| 496 | } | 464 | } |
| 497 | ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); | 465 | |
| 498 | return dent; | 466 | return inode; |
| 467 | } | ||
| 468 | |||
| 469 | static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
| 470 | int fh_len, int fh_type) | ||
| 471 | { | ||
| 472 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
| 473 | ntfs_nfs_get_inode); | ||
| 474 | } | ||
| 475 | |||
| 476 | static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 477 | int fh_len, int fh_type) | ||
| 478 | { | ||
| 479 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
| 480 | ntfs_nfs_get_inode); | ||
| 499 | } | 481 | } |
| 500 | 482 | ||
| 501 | /** | 483 | /** |
| 502 | * Export operations allowing NFS exporting of mounted NTFS partitions. | 484 | * Export operations allowing NFS exporting of mounted NTFS partitions. |
| 503 | * | 485 | * |
| 504 | * We use the default ->decode_fh() and ->encode_fh() for now. Note that they | 486 | * We use the default ->encode_fh() for now. Note that they |
| 505 | * use 32 bits to store the inode number which is an unsigned long so on 64-bit | 487 | * use 32 bits to store the inode number which is an unsigned long so on 64-bit |
| 506 | * architectures is usually 64 bits so it would all fail horribly on huge | 488 | * architectures is usually 64 bits so it would all fail horribly on huge |
| 507 | * volumes. I guess we need to define our own encode and decode fh functions | 489 | * volumes. I guess we need to define our own encode and decode fh functions |
| @@ -517,10 +499,9 @@ static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) | |||
| 517 | * allowing the inode number 0 which is used in NTFS for the system file $MFT | 499 | * allowing the inode number 0 which is used in NTFS for the system file $MFT |
| 518 | * and due to using iget() whereas NTFS needs ntfs_iget(). | 500 | * and due to using iget() whereas NTFS needs ntfs_iget(). |
| 519 | */ | 501 | */ |
| 520 | struct export_operations ntfs_export_ops = { | 502 | const struct export_operations ntfs_export_ops = { |
| 521 | .get_parent = ntfs_get_parent, /* Find the parent of a given | 503 | .get_parent = ntfs_get_parent, /* Find the parent of a given |
| 522 | directory. */ | 504 | directory. */ |
| 523 | .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode | 505 | .fh_to_dentry = ntfs_fh_to_dentry, |
| 524 | given a file handle | 506 | .fh_to_parent = ntfs_fh_to_parent, |
| 525 | sub-fragment. */ | ||
| 526 | }; | 507 | }; |
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index d73f5a9ac341..d6a340bf80fc 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h | |||
| @@ -69,7 +69,7 @@ extern const struct inode_operations ntfs_dir_inode_ops; | |||
| 69 | extern const struct file_operations ntfs_empty_file_ops; | 69 | extern const struct file_operations ntfs_empty_file_ops; |
| 70 | extern const struct inode_operations ntfs_empty_inode_ops; | 70 | extern const struct inode_operations ntfs_empty_inode_ops; |
| 71 | 71 | ||
| 72 | extern struct export_operations ntfs_export_ops; | 72 | extern const struct export_operations ntfs_export_ops; |
| 73 | 73 | ||
| 74 | /** | 74 | /** |
| 75 | * NTFS_SB - return the ntfs volume given a vfs super block | 75 | * NTFS_SB - return the ntfs volume given a vfs super block |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index c3bbc198f9ce..535bfa9568a4 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
| @@ -45,9 +45,9 @@ struct ocfs2_inode_handle | |||
| 45 | u32 ih_generation; | 45 | u32 ih_generation; |
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) | 48 | static struct dentry *ocfs2_get_dentry(struct super_block *sb, |
| 49 | struct ocfs2_inode_handle *handle) | ||
| 49 | { | 50 | { |
| 50 | struct ocfs2_inode_handle *handle = vobjp; | ||
| 51 | struct inode *inode; | 51 | struct inode *inode; |
| 52 | struct dentry *result; | 52 | struct dentry *result; |
| 53 | 53 | ||
| @@ -194,54 +194,37 @@ bail: | |||
| 194 | return type; | 194 | return type; |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | static struct dentry *ocfs2_decode_fh(struct super_block *sb, u32 *fh_in, | 197 | static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb, |
| 198 | int fh_len, int fileid_type, | 198 | struct fid *fid, int fh_len, int fh_type) |
| 199 | int (*acceptable)(void *context, | ||
| 200 | struct dentry *de), | ||
| 201 | void *context) | ||
| 202 | { | 199 | { |
| 203 | struct ocfs2_inode_handle handle, parent; | 200 | struct ocfs2_inode_handle handle; |
| 204 | struct dentry *ret = NULL; | ||
| 205 | __le32 *fh = (__force __le32 *) fh_in; | ||
| 206 | |||
| 207 | mlog_entry("(0x%p, 0x%p, %d, %d, 0x%p, 0x%p)\n", | ||
| 208 | sb, fh, fh_len, fileid_type, acceptable, context); | ||
| 209 | |||
| 210 | if (fh_len < 3 || fileid_type > 2) | ||
| 211 | goto bail; | ||
| 212 | |||
| 213 | if (fileid_type == 2) { | ||
| 214 | if (fh_len < 6) | ||
| 215 | goto bail; | ||
| 216 | |||
| 217 | parent.ih_blkno = (u64)le32_to_cpu(fh[3]) << 32; | ||
| 218 | parent.ih_blkno |= (u64)le32_to_cpu(fh[4]); | ||
| 219 | parent.ih_generation = le32_to_cpu(fh[5]); | ||
| 220 | 201 | ||
| 221 | mlog(0, "Decoding parent: blkno: %llu, generation: %u\n", | 202 | if (fh_len < 3 || fh_type > 2) |
| 222 | (unsigned long long)parent.ih_blkno, | 203 | return NULL; |
| 223 | parent.ih_generation); | ||
| 224 | } | ||
| 225 | 204 | ||
| 226 | handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32; | 205 | handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32; |
| 227 | handle.ih_blkno |= (u64)le32_to_cpu(fh[1]); | 206 | handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]); |
| 228 | handle.ih_generation = le32_to_cpu(fh[2]); | 207 | handle.ih_generation = le32_to_cpu(fid->raw[2]); |
| 208 | return ocfs2_get_dentry(sb, &handle); | ||
| 209 | } | ||
| 229 | 210 | ||
| 230 | mlog(0, "Encoding fh: blkno: %llu, generation: %u\n", | 211 | static struct dentry *ocfs2_fh_to_parent(struct super_block *sb, |
| 231 | (unsigned long long)handle.ih_blkno, handle.ih_generation); | 212 | struct fid *fid, int fh_len, int fh_type) |
| 213 | { | ||
| 214 | struct ocfs2_inode_handle parent; | ||
| 232 | 215 | ||
| 233 | ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent, | 216 | if (fh_type != 2 || fh_len < 6) |
| 234 | acceptable, context); | 217 | return NULL; |
| 235 | 218 | ||
| 236 | bail: | 219 | parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32; |
| 237 | mlog_exit_ptr(ret); | 220 | parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]); |
| 238 | return ret; | 221 | parent.ih_generation = le32_to_cpu(fid->raw[5]); |
| 222 | return ocfs2_get_dentry(sb, &parent); | ||
| 239 | } | 223 | } |
| 240 | 224 | ||
| 241 | struct export_operations ocfs2_export_ops = { | 225 | const struct export_operations ocfs2_export_ops = { |
| 242 | .decode_fh = ocfs2_decode_fh, | ||
| 243 | .encode_fh = ocfs2_encode_fh, | 226 | .encode_fh = ocfs2_encode_fh, |
| 244 | 227 | .fh_to_dentry = ocfs2_fh_to_dentry, | |
| 228 | .fh_to_parent = ocfs2_fh_to_parent, | ||
| 245 | .get_parent = ocfs2_get_parent, | 229 | .get_parent = ocfs2_get_parent, |
| 246 | .get_dentry = ocfs2_get_dentry, | ||
| 247 | }; | 230 | }; |
diff --git a/fs/ocfs2/export.h b/fs/ocfs2/export.h index e08bed9e45a0..41a738678c37 100644 --- a/fs/ocfs2/export.h +++ b/fs/ocfs2/export.h | |||
| @@ -28,6 +28,6 @@ | |||
| 28 | 28 | ||
| 29 | #include <linux/exportfs.h> | 29 | #include <linux/exportfs.h> |
| 30 | 30 | ||
| 31 | extern struct export_operations ocfs2_export_ops; | 31 | extern const struct export_operations ocfs2_export_ops; |
| 32 | 32 | ||
| 33 | #endif /* OCFS2_EXPORT_H */ | 33 | #endif /* OCFS2_EXPORT_H */ |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a991af96f3f0..231fd5ccadc5 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -1515,19 +1515,20 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) | |||
| 1515 | return inode; | 1515 | return inode; |
| 1516 | } | 1516 | } |
| 1517 | 1517 | ||
| 1518 | struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) | 1518 | static struct dentry *reiserfs_get_dentry(struct super_block *sb, |
| 1519 | u32 objectid, u32 dir_id, u32 generation) | ||
| 1520 | |||
| 1519 | { | 1521 | { |
| 1520 | __u32 *data = vobjp; | ||
| 1521 | struct cpu_key key; | 1522 | struct cpu_key key; |
| 1522 | struct dentry *result; | 1523 | struct dentry *result; |
| 1523 | struct inode *inode; | 1524 | struct inode *inode; |
| 1524 | 1525 | ||
| 1525 | key.on_disk_key.k_objectid = data[0]; | 1526 | key.on_disk_key.k_objectid = objectid; |
| 1526 | key.on_disk_key.k_dir_id = data[1]; | 1527 | key.on_disk_key.k_dir_id = dir_id; |
| 1527 | reiserfs_write_lock(sb); | 1528 | reiserfs_write_lock(sb); |
| 1528 | inode = reiserfs_iget(sb, &key); | 1529 | inode = reiserfs_iget(sb, &key); |
| 1529 | if (inode && !IS_ERR(inode) && data[2] != 0 && | 1530 | if (inode && !IS_ERR(inode) && generation != 0 && |
| 1530 | data[2] != inode->i_generation) { | 1531 | generation != inode->i_generation) { |
| 1531 | iput(inode); | 1532 | iput(inode); |
| 1532 | inode = NULL; | 1533 | inode = NULL; |
| 1533 | } | 1534 | } |
| @@ -1544,14 +1545,9 @@ struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) | |||
| 1544 | return result; | 1545 | return result; |
| 1545 | } | 1546 | } |
| 1546 | 1547 | ||
| 1547 | struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | 1548 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 1548 | int len, int fhtype, | 1549 | int fh_len, int fh_type) |
| 1549 | int (*acceptable) (void *contect, | ||
| 1550 | struct dentry * de), | ||
| 1551 | void *context) | ||
| 1552 | { | 1550 | { |
| 1553 | __u32 obj[3], parent[3]; | ||
| 1554 | |||
| 1555 | /* fhtype happens to reflect the number of u32s encoded. | 1551 | /* fhtype happens to reflect the number of u32s encoded. |
| 1556 | * due to a bug in earlier code, fhtype might indicate there | 1552 | * due to a bug in earlier code, fhtype might indicate there |
| 1557 | * are more u32s then actually fitted. | 1553 | * are more u32s then actually fitted. |
| @@ -1564,32 +1560,28 @@ struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | |||
| 1564 | * 6 - as above plus generation of directory | 1560 | * 6 - as above plus generation of directory |
| 1565 | * 6 does not fit in NFSv2 handles | 1561 | * 6 does not fit in NFSv2 handles |
| 1566 | */ | 1562 | */ |
| 1567 | if (fhtype > len) { | 1563 | if (fh_type > fh_len) { |
| 1568 | if (fhtype != 6 || len != 5) | 1564 | if (fh_type != 6 || fh_len != 5) |
| 1569 | reiserfs_warning(sb, | 1565 | reiserfs_warning(sb, |
| 1570 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", | 1566 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", |
| 1571 | fhtype, len); | 1567 | fh_type, fh_len); |
| 1572 | fhtype = 5; | 1568 | fh_type = 5; |
| 1573 | } | 1569 | } |
| 1574 | 1570 | ||
| 1575 | obj[0] = data[0]; | 1571 | return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], |
| 1576 | obj[1] = data[1]; | 1572 | (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); |
| 1577 | if (fhtype == 3 || fhtype >= 5) | 1573 | } |
| 1578 | obj[2] = data[2]; | ||
| 1579 | else | ||
| 1580 | obj[2] = 0; /* generation number */ | ||
| 1581 | 1574 | ||
| 1582 | if (fhtype >= 4) { | 1575 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
| 1583 | parent[0] = data[fhtype >= 5 ? 3 : 2]; | 1576 | int fh_len, int fh_type) |
| 1584 | parent[1] = data[fhtype >= 5 ? 4 : 3]; | 1577 | { |
| 1585 | if (fhtype == 6) | 1578 | if (fh_type < 4) |
| 1586 | parent[2] = data[5]; | 1579 | return NULL; |
| 1587 | else | 1580 | |
| 1588 | parent[2] = 0; | 1581 | return reiserfs_get_dentry(sb, |
| 1589 | } | 1582 | (fh_type >= 5) ? fid->raw[3] : fid->raw[2], |
| 1590 | return sb->s_export_op->find_exported_dentry(sb, obj, | 1583 | (fh_type >= 5) ? fid->raw[4] : fid->raw[3], |
| 1591 | fhtype < 4 ? NULL : parent, | 1584 | (fh_type == 6) ? fid->raw[5] : 0); |
| 1592 | acceptable, context); | ||
| 1593 | } | 1585 | } |
| 1594 | 1586 | ||
| 1595 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1587 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 98c3781bc069..5cd85fe5df5d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
| @@ -661,11 +661,11 @@ static struct quotactl_ops reiserfs_qctl_operations = { | |||
| 661 | }; | 661 | }; |
| 662 | #endif | 662 | #endif |
| 663 | 663 | ||
| 664 | static struct export_operations reiserfs_export_ops = { | 664 | static const struct export_operations reiserfs_export_ops = { |
| 665 | .encode_fh = reiserfs_encode_fh, | 665 | .encode_fh = reiserfs_encode_fh, |
| 666 | .decode_fh = reiserfs_decode_fh, | 666 | .fh_to_dentry = reiserfs_fh_to_dentry, |
| 667 | .fh_to_parent = reiserfs_fh_to_parent, | ||
| 667 | .get_parent = reiserfs_get_parent, | 668 | .get_parent = reiserfs_get_parent, |
| 668 | .get_dentry = reiserfs_get_dentry, | ||
| 669 | }; | 669 | }; |
| 670 | 670 | ||
| 671 | /* this struct is used in reiserfs_getopt () for containing the value for those | 671 | /* this struct is used in reiserfs_getopt () for containing the value for those |
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3586c7a28d2c..15bd4948832c 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
| @@ -33,62 +33,25 @@ | |||
| 33 | static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; | 33 | static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; |
| 34 | 34 | ||
| 35 | /* | 35 | /* |
| 36 | * XFS encodes and decodes the fileid portion of NFS filehandles | 36 | * Note that we only accept fileids which are long enough rather than allow |
| 37 | * itself instead of letting the generic NFS code do it. This | 37 | * the parent generation number to default to zero. XFS considers zero a |
| 38 | * allows filesystems with 64 bit inode numbers to be exported. | 38 | * valid generation number not an invalid/wildcard value. |
| 39 | * | ||
| 40 | * Note that a side effect is that xfs_vget() won't be passed a | ||
| 41 | * zero inode/generation pair under normal circumstances. As | ||
| 42 | * however a malicious client could send us such data, the check | ||
| 43 | * remains in that code. | ||
| 44 | */ | 39 | */ |
| 45 | 40 | static int xfs_fileid_length(int fileid_type) | |
| 46 | STATIC struct dentry * | ||
| 47 | xfs_fs_decode_fh( | ||
| 48 | struct super_block *sb, | ||
| 49 | __u32 *fh, | ||
| 50 | int fh_len, | ||
| 51 | int fileid_type, | ||
| 52 | int (*acceptable)( | ||
| 53 | void *context, | ||
| 54 | struct dentry *de), | ||
| 55 | void *context) | ||
| 56 | { | 41 | { |
| 57 | xfs_fid_t ifid; | 42 | switch (fileid_type) { |
| 58 | xfs_fid_t pfid; | 43 | case FILEID_INO32_GEN: |
| 59 | void *parent = NULL; | 44 | return 2; |
| 60 | int is64 = 0; | 45 | case FILEID_INO32_GEN_PARENT: |
| 61 | __u32 *p = fh; | 46 | return 4; |
| 62 | 47 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | |
| 63 | #if XFS_BIG_INUMS | 48 | return 3; |
| 64 | is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG); | 49 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: |
| 65 | fileid_type &= ~XFS_FILEID_TYPE_64FLAG; | 50 | return 6; |
| 66 | #endif | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Note that we only accept fileids which are long enough | ||
| 70 | * rather than allow the parent generation number to default | ||
| 71 | * to zero. XFS considers zero a valid generation number not | ||
| 72 | * an invalid/wildcard value. There's little point printk'ing | ||
| 73 | * a warning here as we don't have the client information | ||
| 74 | * which would make such a warning useful. | ||
| 75 | */ | ||
| 76 | if (fileid_type > 2 || | ||
| 77 | fh_len < xfs_fileid_length((fileid_type == 2), is64)) | ||
| 78 | return NULL; | ||
| 79 | |||
| 80 | p = xfs_fileid_decode_fid2(p, &ifid, is64); | ||
| 81 | |||
| 82 | if (fileid_type == 2) { | ||
| 83 | p = xfs_fileid_decode_fid2(p, &pfid, is64); | ||
| 84 | parent = &pfid; | ||
| 85 | } | 51 | } |
| 86 | 52 | return 255; /* invalid */ | |
| 87 | fh = (__u32 *)&ifid; | ||
| 88 | return sb->s_export_op->find_exported_dentry(sb, fh, parent, acceptable, context); | ||
| 89 | } | 53 | } |
| 90 | 54 | ||
| 91 | |||
| 92 | STATIC int | 55 | STATIC int |
| 93 | xfs_fs_encode_fh( | 56 | xfs_fs_encode_fh( |
| 94 | struct dentry *dentry, | 57 | struct dentry *dentry, |
| @@ -96,21 +59,21 @@ xfs_fs_encode_fh( | |||
| 96 | int *max_len, | 59 | int *max_len, |
| 97 | int connectable) | 60 | int connectable) |
| 98 | { | 61 | { |
| 62 | struct fid *fid = (struct fid *)fh; | ||
| 63 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; | ||
| 99 | struct inode *inode = dentry->d_inode; | 64 | struct inode *inode = dentry->d_inode; |
| 100 | int type = 1; | 65 | int fileid_type; |
| 101 | __u32 *p = fh; | ||
| 102 | int len; | 66 | int len; |
| 103 | int is64 = 0; | ||
| 104 | #if XFS_BIG_INUMS | ||
| 105 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) { | ||
| 106 | /* filesystem may contain 64bit inode numbers */ | ||
| 107 | is64 = XFS_FILEID_TYPE_64FLAG; | ||
| 108 | } | ||
| 109 | #endif | ||
| 110 | 67 | ||
| 111 | /* Directories don't need their parent encoded, they have ".." */ | 68 | /* Directories don't need their parent encoded, they have ".." */ |
| 112 | if (S_ISDIR(inode->i_mode)) | 69 | if (S_ISDIR(inode->i_mode)) |
| 113 | connectable = 0; | 70 | fileid_type = FILEID_INO32_GEN; |
| 71 | else | ||
| 72 | fileid_type = FILEID_INO32_GEN_PARENT; | ||
| 73 | |||
| 74 | /* filesystem may contain 64bit inode numbers */ | ||
| 75 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | ||
| 76 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | ||
| 114 | 77 | ||
| 115 | /* | 78 | /* |
| 116 | * Only encode if there is enough space given. In practice | 79 | * Only encode if there is enough space given. In practice |
| @@ -118,39 +81,118 @@ xfs_fs_encode_fh( | |||
| 118 | * over NFSv2 with the subtree_check export option; the other | 81 | * over NFSv2 with the subtree_check export option; the other |
| 119 | * seven combinations work. The real answer is "don't use v2". | 82 | * seven combinations work. The real answer is "don't use v2". |
| 120 | */ | 83 | */ |
| 121 | len = xfs_fileid_length(connectable, is64); | 84 | len = xfs_fileid_length(fileid_type); |
| 122 | if (*max_len < len) | 85 | if (*max_len < len) |
| 123 | return 255; | 86 | return 255; |
| 124 | *max_len = len; | 87 | *max_len = len; |
| 125 | 88 | ||
| 126 | p = xfs_fileid_encode_inode(p, inode, is64); | 89 | switch (fileid_type) { |
| 127 | if (connectable) { | 90 | case FILEID_INO32_GEN_PARENT: |
| 128 | spin_lock(&dentry->d_lock); | 91 | spin_lock(&dentry->d_lock); |
| 129 | p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64); | 92 | fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; |
| 93 | fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; | ||
| 130 | spin_unlock(&dentry->d_lock); | 94 | spin_unlock(&dentry->d_lock); |
| 131 | type = 2; | 95 | /*FALLTHRU*/ |
| 96 | case FILEID_INO32_GEN: | ||
| 97 | fid->i32.ino = inode->i_ino; | ||
| 98 | fid->i32.gen = inode->i_generation; | ||
| 99 | break; | ||
| 100 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | ||
| 101 | spin_lock(&dentry->d_lock); | ||
| 102 | fid64->parent_ino = dentry->d_parent->d_inode->i_ino; | ||
| 103 | fid64->parent_gen = dentry->d_parent->d_inode->i_generation; | ||
| 104 | spin_unlock(&dentry->d_lock); | ||
| 105 | /*FALLTHRU*/ | ||
| 106 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | ||
| 107 | fid64->ino = inode->i_ino; | ||
| 108 | fid64->gen = inode->i_generation; | ||
| 109 | break; | ||
| 132 | } | 110 | } |
| 133 | BUG_ON((p - fh) != len); | 111 | |
| 134 | return type | is64; | 112 | return fileid_type; |
| 135 | } | 113 | } |
| 136 | 114 | ||
| 137 | STATIC struct dentry * | 115 | STATIC struct inode * |
| 138 | xfs_fs_get_dentry( | 116 | xfs_nfs_get_inode( |
| 139 | struct super_block *sb, | 117 | struct super_block *sb, |
| 140 | void *data) | 118 | u64 ino, |
| 141 | { | 119 | u32 generation) |
| 120 | { | ||
| 121 | xfs_fid_t xfid; | ||
| 142 | bhv_vnode_t *vp; | 122 | bhv_vnode_t *vp; |
| 143 | struct inode *inode; | ||
| 144 | struct dentry *result; | ||
| 145 | int error; | 123 | int error; |
| 146 | 124 | ||
| 147 | error = xfs_vget(XFS_M(sb), &vp, data); | 125 | xfid.fid_len = sizeof(xfs_fid_t) - sizeof(xfid.fid_len); |
| 148 | if (error || vp == NULL) | 126 | xfid.fid_pad = 0; |
| 149 | return ERR_PTR(-ESTALE) ; | 127 | xfid.fid_ino = ino; |
| 128 | xfid.fid_gen = generation; | ||
| 150 | 129 | ||
| 151 | inode = vn_to_inode(vp); | 130 | error = xfs_vget(XFS_M(sb), &vp, &xfid); |
| 131 | if (error) | ||
| 132 | return ERR_PTR(-error); | ||
| 133 | |||
| 134 | return vp ? vn_to_inode(vp) : NULL; | ||
| 135 | } | ||
| 136 | |||
| 137 | STATIC struct dentry * | ||
| 138 | xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
| 139 | int fh_len, int fileid_type) | ||
| 140 | { | ||
| 141 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | ||
| 142 | struct inode *inode = NULL; | ||
| 143 | struct dentry *result; | ||
| 144 | |||
| 145 | if (fh_len < xfs_fileid_length(fileid_type)) | ||
| 146 | return NULL; | ||
| 147 | |||
| 148 | switch (fileid_type) { | ||
| 149 | case FILEID_INO32_GEN_PARENT: | ||
| 150 | case FILEID_INO32_GEN: | ||
| 151 | inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); | ||
| 152 | break; | ||
| 153 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | ||
| 154 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | ||
| 155 | inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | |||
| 159 | if (!inode) | ||
| 160 | return NULL; | ||
| 161 | if (IS_ERR(inode)) | ||
| 162 | return ERR_PTR(PTR_ERR(inode)); | ||
| 163 | result = d_alloc_anon(inode); | ||
| 164 | if (!result) { | ||
| 165 | iput(inode); | ||
| 166 | return ERR_PTR(-ENOMEM); | ||
| 167 | } | ||
| 168 | return result; | ||
| 169 | } | ||
| 170 | |||
| 171 | STATIC struct dentry * | ||
| 172 | xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 173 | int fh_len, int fileid_type) | ||
| 174 | { | ||
| 175 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | ||
| 176 | struct inode *inode = NULL; | ||
| 177 | struct dentry *result; | ||
| 178 | |||
| 179 | switch (fileid_type) { | ||
| 180 | case FILEID_INO32_GEN_PARENT: | ||
| 181 | inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, | ||
| 182 | fid->i32.parent_gen); | ||
| 183 | break; | ||
| 184 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | ||
| 185 | inode = xfs_nfs_get_inode(sb, fid64->parent_ino, | ||
| 186 | fid64->parent_gen); | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | |||
| 190 | if (!inode) | ||
| 191 | return NULL; | ||
| 192 | if (IS_ERR(inode)) | ||
| 193 | return ERR_PTR(PTR_ERR(inode)); | ||
| 152 | result = d_alloc_anon(inode); | 194 | result = d_alloc_anon(inode); |
| 153 | if (!result) { | 195 | if (!result) { |
| 154 | iput(inode); | 196 | iput(inode); |
| 155 | return ERR_PTR(-ENOMEM); | 197 | return ERR_PTR(-ENOMEM); |
| 156 | } | 198 | } |
| @@ -178,9 +220,9 @@ xfs_fs_get_parent( | |||
| 178 | return parent; | 220 | return parent; |
| 179 | } | 221 | } |
| 180 | 222 | ||
| 181 | struct export_operations xfs_export_operations = { | 223 | const struct export_operations xfs_export_operations = { |
| 182 | .decode_fh = xfs_fs_decode_fh, | ||
| 183 | .encode_fh = xfs_fs_encode_fh, | 224 | .encode_fh = xfs_fs_encode_fh, |
| 225 | .fh_to_dentry = xfs_fs_fh_to_dentry, | ||
| 226 | .fh_to_parent = xfs_fs_fh_to_parent, | ||
| 184 | .get_parent = xfs_fs_get_parent, | 227 | .get_parent = xfs_fs_get_parent, |
| 185 | .get_dentry = xfs_fs_get_dentry, | ||
| 186 | }; | 228 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h index 2f36071a86f7..3272b6ae7a35 100644 --- a/fs/xfs/linux-2.6/xfs_export.h +++ b/fs/xfs/linux-2.6/xfs_export.h | |||
| @@ -59,50 +59,14 @@ | |||
| 59 | * a subdirectory) or use the "fsid" export option. | 59 | * a subdirectory) or use the "fsid" export option. |
| 60 | */ | 60 | */ |
| 61 | 61 | ||
| 62 | struct xfs_fid64 { | ||
| 63 | u64 ino; | ||
| 64 | u32 gen; | ||
| 65 | u64 parent_ino; | ||
| 66 | u32 parent_gen; | ||
| 67 | } __attribute__((packed)); | ||
| 68 | |||
| 62 | /* This flag goes on the wire. Don't play with it. */ | 69 | /* This flag goes on the wire. Don't play with it. */ |
| 63 | #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ | 70 | #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ |
| 64 | 71 | ||
| 65 | /* Calculate the length in u32 units of the fileid data */ | ||
| 66 | static inline int | ||
| 67 | xfs_fileid_length(int hasparent, int is64) | ||
| 68 | { | ||
| 69 | return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2); | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Decode encoded inode information (either for the inode itself | ||
| 74 | * or the parent) into an xfs_fid_t structure. Advances and | ||
| 75 | * returns the new data pointer | ||
| 76 | */ | ||
| 77 | static inline __u32 * | ||
| 78 | xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64) | ||
| 79 | { | ||
| 80 | fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len); | ||
| 81 | fid->fid_pad = 0; | ||
| 82 | fid->fid_ino = *p++; | ||
| 83 | #if XFS_BIG_INUMS | ||
| 84 | if (is64) | ||
| 85 | fid->fid_ino |= (((__u64)(*p++)) << 32); | ||
| 86 | #endif | ||
| 87 | fid->fid_gen = *p++; | ||
| 88 | return p; | ||
| 89 | } | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Encode inode information (either for the inode itself or the | ||
| 93 | * parent) into a fileid buffer. Advances and returns the new | ||
| 94 | * data pointer. | ||
| 95 | */ | ||
| 96 | static inline __u32 * | ||
| 97 | xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64) | ||
| 98 | { | ||
| 99 | *p++ = (__u32)inode->i_ino; | ||
| 100 | #if XFS_BIG_INUMS | ||
| 101 | if (is64) | ||
| 102 | *p++ = (__u32)(inode->i_ino >> 32); | ||
| 103 | #endif | ||
| 104 | *p++ = inode->i_generation; | ||
| 105 | return p; | ||
| 106 | } | ||
| 107 | |||
| 108 | #endif /* __XFS_EXPORT_H__ */ | 72 | #endif /* __XFS_EXPORT_H__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index c78c23310fe8..3efcf45b14ab 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
| @@ -118,7 +118,7 @@ extern int xfs_blkdev_get(struct xfs_mount *, const char *, | |||
| 118 | extern void xfs_blkdev_put(struct block_device *); | 118 | extern void xfs_blkdev_put(struct block_device *); |
| 119 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 119 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
| 120 | 120 | ||
| 121 | extern struct export_operations xfs_export_operations; | 121 | extern const struct export_operations xfs_export_operations; |
| 122 | 122 | ||
| 123 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 123 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
| 124 | 124 | ||
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4e5d3ca53a8e..a1b1b2ee3e51 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h | |||
| @@ -257,7 +257,8 @@ struct acpi_table_dbgp { | |||
| 257 | struct acpi_table_dmar { | 257 | struct acpi_table_dmar { |
| 258 | struct acpi_table_header header; /* Common ACPI table header */ | 258 | struct acpi_table_header header; /* Common ACPI table header */ |
| 259 | u8 width; /* Host Address Width */ | 259 | u8 width; /* Host Address Width */ |
| 260 | u8 reserved[11]; | 260 | u8 flags; |
| 261 | u8 reserved[10]; | ||
| 261 | }; | 262 | }; |
| 262 | 263 | ||
| 263 | /* DMAR subtable header */ | 264 | /* DMAR subtable header */ |
| @@ -265,8 +266,6 @@ struct acpi_table_dmar { | |||
| 265 | struct acpi_dmar_header { | 266 | struct acpi_dmar_header { |
| 266 | u16 type; | 267 | u16 type; |
| 267 | u16 length; | 268 | u16 length; |
| 268 | u8 flags; | ||
| 269 | u8 reserved[3]; | ||
| 270 | }; | 269 | }; |
| 271 | 270 | ||
| 272 | /* Values for subtable type in struct acpi_dmar_header */ | 271 | /* Values for subtable type in struct acpi_dmar_header */ |
| @@ -274,13 +273,15 @@ struct acpi_dmar_header { | |||
| 274 | enum acpi_dmar_type { | 273 | enum acpi_dmar_type { |
| 275 | ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, | 274 | ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, |
| 276 | ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, | 275 | ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, |
| 277 | ACPI_DMAR_TYPE_RESERVED = 2 /* 2 and greater are reserved */ | 276 | ACPI_DMAR_TYPE_ATSR = 2, |
| 277 | ACPI_DMAR_TYPE_RESERVED = 3 /* 3 and greater are reserved */ | ||
| 278 | }; | 278 | }; |
| 279 | 279 | ||
| 280 | struct acpi_dmar_device_scope { | 280 | struct acpi_dmar_device_scope { |
| 281 | u8 entry_type; | 281 | u8 entry_type; |
| 282 | u8 length; | 282 | u8 length; |
| 283 | u8 segment; | 283 | u16 reserved; |
| 284 | u8 enumeration_id; | ||
| 284 | u8 bus; | 285 | u8 bus; |
| 285 | }; | 286 | }; |
| 286 | 287 | ||
| @@ -290,7 +291,14 @@ enum acpi_dmar_scope_type { | |||
| 290 | ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, | 291 | ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, |
| 291 | ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, | 292 | ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, |
| 292 | ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, | 293 | ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, |
| 293 | ACPI_DMAR_SCOPE_TYPE_RESERVED = 3 /* 3 and greater are reserved */ | 294 | ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, |
| 295 | ACPI_DMAR_SCOPE_TYPE_HPET = 4, | ||
| 296 | ACPI_DMAR_SCOPE_TYPE_RESERVED = 5 /* 5 and greater are reserved */ | ||
| 297 | }; | ||
| 298 | |||
| 299 | struct acpi_dmar_pci_path { | ||
| 300 | u8 dev; | ||
| 301 | u8 fn; | ||
| 294 | }; | 302 | }; |
| 295 | 303 | ||
| 296 | /* | 304 | /* |
| @@ -301,6 +309,9 @@ enum acpi_dmar_scope_type { | |||
| 301 | 309 | ||
| 302 | struct acpi_dmar_hardware_unit { | 310 | struct acpi_dmar_hardware_unit { |
| 303 | struct acpi_dmar_header header; | 311 | struct acpi_dmar_header header; |
| 312 | u8 flags; | ||
| 313 | u8 reserved; | ||
| 314 | u16 segment; | ||
| 304 | u64 address; /* Register Base Address */ | 315 | u64 address; /* Register Base Address */ |
| 305 | }; | 316 | }; |
| 306 | 317 | ||
| @@ -312,7 +323,9 @@ struct acpi_dmar_hardware_unit { | |||
| 312 | 323 | ||
| 313 | struct acpi_dmar_reserved_memory { | 324 | struct acpi_dmar_reserved_memory { |
| 314 | struct acpi_dmar_header header; | 325 | struct acpi_dmar_header header; |
| 315 | u64 address; /* 4_k aligned base address */ | 326 | u16 reserved; |
| 327 | u16 segment; | ||
| 328 | u64 base_address; /* 4_k aligned base address */ | ||
| 316 | u64 end_address; /* 4_k aligned limit address */ | 329 | u64 end_address; /* 4_k aligned limit address */ |
| 317 | }; | 330 | }; |
| 318 | 331 | ||
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index ef67b59dbdb9..dc031cf44633 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h | |||
| @@ -28,8 +28,9 @@ struct setup_header { | |||
| 28 | u16 kernel_version; | 28 | u16 kernel_version; |
| 29 | u8 type_of_loader; | 29 | u8 type_of_loader; |
| 30 | u8 loadflags; | 30 | u8 loadflags; |
| 31 | #define LOADED_HIGH 0x01 | 31 | #define LOADED_HIGH (1<<0) |
| 32 | #define CAN_USE_HEAP 0x80 | 32 | #define KEEP_SEGMENTS (1<<6) |
| 33 | #define CAN_USE_HEAP (1<<7) | ||
| 33 | u16 setup_move_size; | 34 | u16 setup_move_size; |
| 34 | u32 code32_start; | 35 | u32 code32_start; |
| 35 | u32 ramdisk_image; | 36 | u32 ramdisk_image; |
| @@ -41,6 +42,10 @@ struct setup_header { | |||
| 41 | u32 initrd_addr_max; | 42 | u32 initrd_addr_max; |
| 42 | u32 kernel_alignment; | 43 | u32 kernel_alignment; |
| 43 | u8 relocatable_kernel; | 44 | u8 relocatable_kernel; |
| 45 | u8 _pad2[3]; | ||
| 46 | u32 cmdline_size; | ||
| 47 | u32 hardware_subarch; | ||
| 48 | u64 hardware_subarch_data; | ||
| 44 | } __attribute__((packed)); | 49 | } __attribute__((packed)); |
| 45 | 50 | ||
| 46 | struct sys_desc_table { | 51 | struct sys_desc_table { |
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index b3d43de44c59..9411a2d3f19c 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | void global_flush_tlb(void); | 27 | void global_flush_tlb(void); |
| 28 | int change_page_attr(struct page *page, int numpages, pgprot_t prot); | 28 | int change_page_attr(struct page *page, int numpages, pgprot_t prot); |
| 29 | int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); | 29 | int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); |
| 30 | void clflush_cache_range(void *addr, int size); | ||
| 30 | 31 | ||
| 31 | #ifdef CONFIG_DEBUG_PAGEALLOC | 32 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 32 | /* internal debugging function */ | 33 | /* internal debugging function */ |
diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index d9ee5e52e91b..87a715367a1b 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h | |||
| @@ -5,6 +5,9 @@ struct dev_archdata { | |||
| 5 | #ifdef CONFIG_ACPI | 5 | #ifdef CONFIG_ACPI |
| 6 | void *acpi_handle; | 6 | void *acpi_handle; |
| 7 | #endif | 7 | #endif |
| 8 | #ifdef CONFIG_DMAR | ||
| 9 | void *iommu; /* hook for IOMMU specific extension */ | ||
| 10 | #endif | ||
| 8 | }; | 11 | }; |
| 9 | 12 | ||
| 10 | #endif /* _ASM_X86_DEVICE_H */ | 13 | #endif /* _ASM_X86_DEVICE_H */ |
diff --git a/include/linux/capability.h b/include/linux/capability.h index 7a8d7ade28a0..bb017edffd56 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h | |||
| @@ -56,10 +56,8 @@ typedef struct __user_cap_data_struct { | |||
| 56 | 56 | ||
| 57 | struct vfs_cap_data { | 57 | struct vfs_cap_data { |
| 58 | __u32 magic_etc; /* Little endian */ | 58 | __u32 magic_etc; /* Little endian */ |
| 59 | struct { | 59 | __u32 permitted; /* Little endian */ |
| 60 | __u32 permitted; /* Little endian */ | 60 | __u32 inheritable; /* Little endian */ |
| 61 | __u32 inheritable; /* Little endian */ | ||
| 62 | } data[1]; | ||
| 63 | }; | 61 | }; |
| 64 | 62 | ||
| 65 | #ifdef __KERNEL__ | 63 | #ifdef __KERNEL__ |
diff --git a/include/linux/dmar.h b/include/linux/dmar.h new file mode 100644 index 000000000000..ffb6439cb5e6 --- /dev/null +++ b/include/linux/dmar.h | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License along with | ||
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 16 | * | ||
| 17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
| 18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
| 19 | */ | ||
| 20 | |||
| 21 | #ifndef __DMAR_H__ | ||
| 22 | #define __DMAR_H__ | ||
| 23 | |||
| 24 | #include <linux/acpi.h> | ||
| 25 | #include <linux/types.h> | ||
| 26 | #include <linux/msi.h> | ||
| 27 | |||
| 28 | #ifdef CONFIG_DMAR | ||
| 29 | struct intel_iommu; | ||
| 30 | |||
| 31 | extern char *dmar_get_fault_reason(u8 fault_reason); | ||
| 32 | |||
| 33 | /* Can't use the common MSI interrupt functions | ||
| 34 | * since DMAR is not a pci device | ||
| 35 | */ | ||
| 36 | extern void dmar_msi_unmask(unsigned int irq); | ||
| 37 | extern void dmar_msi_mask(unsigned int irq); | ||
| 38 | extern void dmar_msi_read(int irq, struct msi_msg *msg); | ||
| 39 | extern void dmar_msi_write(int irq, struct msi_msg *msg); | ||
| 40 | extern int dmar_set_interrupt(struct intel_iommu *iommu); | ||
| 41 | extern int arch_setup_dmar_msi(unsigned int irq); | ||
| 42 | |||
| 43 | /* Intel IOMMU detection and initialization functions */ | ||
| 44 | extern void detect_intel_iommu(void); | ||
| 45 | extern int intel_iommu_init(void); | ||
| 46 | |||
| 47 | extern int dmar_table_init(void); | ||
| 48 | extern int early_dmar_detect(void); | ||
| 49 | |||
| 50 | extern struct list_head dmar_drhd_units; | ||
| 51 | extern struct list_head dmar_rmrr_units; | ||
| 52 | |||
| 53 | struct dmar_drhd_unit { | ||
| 54 | struct list_head list; /* list of drhd units */ | ||
| 55 | u64 reg_base_addr; /* register base address*/ | ||
| 56 | struct pci_dev **devices; /* target device array */ | ||
| 57 | int devices_cnt; /* target device count */ | ||
| 58 | u8 ignored:1; /* ignore drhd */ | ||
| 59 | u8 include_all:1; | ||
| 60 | struct intel_iommu *iommu; | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct dmar_rmrr_unit { | ||
| 64 | struct list_head list; /* list of rmrr units */ | ||
| 65 | u64 base_address; /* reserved base address*/ | ||
| 66 | u64 end_address; /* reserved end address */ | ||
| 67 | struct pci_dev **devices; /* target devices */ | ||
| 68 | int devices_cnt; /* target device count */ | ||
| 69 | }; | ||
| 70 | |||
| 71 | #define for_each_drhd_unit(drhd) \ | ||
| 72 | list_for_each_entry(drhd, &dmar_drhd_units, list) | ||
| 73 | #define for_each_rmrr_units(rmrr) \ | ||
| 74 | list_for_each_entry(rmrr, &dmar_rmrr_units, list) | ||
| 75 | #else | ||
| 76 | static inline void detect_intel_iommu(void) | ||
| 77 | { | ||
| 78 | return; | ||
| 79 | } | ||
| 80 | static inline int intel_iommu_init(void) | ||
| 81 | { | ||
| 82 | return -ENODEV; | ||
| 83 | } | ||
| 84 | |||
| 85 | #endif /* !CONFIG_DMAR */ | ||
| 86 | #endif /* __DMAR_H__ */ | ||
diff --git a/include/linux/efi.h b/include/linux/efi.h index 0b9579a4cd42..14813b595802 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h | |||
| @@ -298,7 +298,7 @@ extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, | |||
| 298 | u64 attr); | 298 | u64 attr); |
| 299 | extern int __init efi_uart_console_only (void); | 299 | extern int __init efi_uart_console_only (void); |
| 300 | extern void efi_initialize_iomem_resources(struct resource *code_resource, | 300 | extern void efi_initialize_iomem_resources(struct resource *code_resource, |
| 301 | struct resource *data_resource); | 301 | struct resource *data_resource, struct resource *bss_resource); |
| 302 | extern unsigned long efi_get_time(void); | 302 | extern unsigned long efi_get_time(void); |
| 303 | extern int efi_set_rtc_mmss(unsigned long nowtime); | 303 | extern int efi_set_rtc_mmss(unsigned long nowtime); |
| 304 | extern int is_available_memory(efi_memory_desc_t * md); | 304 | extern int is_available_memory(efi_memory_desc_t * md); |
diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index 16cb25cbf7c5..dd57fe523e97 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h | |||
| @@ -35,6 +35,7 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb) | |||
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | struct statfs; | 37 | struct statfs; |
| 38 | struct fid; | ||
| 38 | 39 | ||
| 39 | extern const struct inode_operations efs_dir_inode_operations; | 40 | extern const struct inode_operations efs_dir_inode_operations; |
| 40 | extern const struct file_operations efs_dir_operations; | 41 | extern const struct file_operations efs_dir_operations; |
| @@ -45,7 +46,10 @@ extern efs_block_t efs_map_block(struct inode *, efs_block_t); | |||
| 45 | extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 46 | extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
| 46 | 47 | ||
| 47 | extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); | 48 | extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); |
| 48 | extern struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp); | 49 | extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 50 | int fh_len, int fh_type); | ||
| 51 | extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
| 52 | int fh_len, int fh_type); | ||
| 49 | extern struct dentry *efs_get_parent(struct dentry *); | 53 | extern struct dentry *efs_get_parent(struct dentry *); |
| 50 | extern int efs_bmap(struct inode *, int); | 54 | extern int efs_bmap(struct inode *, int); |
| 51 | 55 | ||
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 8872fe8392d6..51d214138814 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h | |||
| @@ -4,9 +4,48 @@ | |||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
| 6 | struct dentry; | 6 | struct dentry; |
| 7 | struct inode; | ||
| 7 | struct super_block; | 8 | struct super_block; |
| 8 | struct vfsmount; | 9 | struct vfsmount; |
| 9 | 10 | ||
| 11 | /* | ||
| 12 | * The fileid_type identifies how the file within the filesystem is encoded. | ||
| 13 | * In theory this is freely set and parsed by the filesystem, but we try to | ||
| 14 | * stick to conventions so we can share some generic code and don't confuse | ||
| 15 | * sniffers like ethereal/wireshark. | ||
| 16 | * | ||
| 17 | * The filesystem must not use the value '0' or '0xff'. | ||
| 18 | */ | ||
| 19 | enum fid_type { | ||
| 20 | /* | ||
| 21 | * The root, or export point, of the filesystem. | ||
| 22 | * (Never actually passed down to the filesystem. | ||
| 23 | */ | ||
| 24 | FILEID_ROOT = 0, | ||
| 25 | |||
| 26 | /* | ||
| 27 | * 32bit inode number, 32 bit generation number. | ||
| 28 | */ | ||
| 29 | FILEID_INO32_GEN = 1, | ||
| 30 | |||
| 31 | /* | ||
| 32 | * 32bit inode number, 32 bit generation number, | ||
| 33 | * 32 bit parent directory inode number. | ||
| 34 | */ | ||
| 35 | FILEID_INO32_GEN_PARENT = 2, | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct fid { | ||
| 39 | union { | ||
| 40 | struct { | ||
| 41 | u32 ino; | ||
| 42 | u32 gen; | ||
| 43 | u32 parent_ino; | ||
| 44 | u32 parent_gen; | ||
| 45 | } i32; | ||
| 46 | __u32 raw[6]; | ||
| 47 | }; | ||
| 48 | }; | ||
| 10 | 49 | ||
| 11 | /** | 50 | /** |
| 12 | * struct export_operations - for nfsd to communicate with file systems | 51 | * struct export_operations - for nfsd to communicate with file systems |
| @@ -15,43 +54,9 @@ struct vfsmount; | |||
| 15 | * @get_name: find the name for a given inode in a given directory | 54 | * @get_name: find the name for a given inode in a given directory |
| 16 | * @get_parent: find the parent of a given directory | 55 | * @get_parent: find the parent of a given directory |
| 17 | * @get_dentry: find a dentry for the inode given a file handle sub-fragment | 56 | * @get_dentry: find a dentry for the inode given a file handle sub-fragment |
| 18 | * @find_exported_dentry: | ||
| 19 | * set by the exporting module to a standard helper function. | ||
| 20 | * | ||
| 21 | * Description: | ||
| 22 | * The export_operations structure provides a means for nfsd to communicate | ||
| 23 | * with a particular exported file system - particularly enabling nfsd and | ||
| 24 | * the filesystem to co-operate when dealing with file handles. | ||
| 25 | * | ||
| 26 | * export_operations contains two basic operation for dealing with file | ||
| 27 | * handles, decode_fh() and encode_fh(), and allows for some other | ||
| 28 | * operations to be defined which standard helper routines use to get | ||
| 29 | * specific information from the filesystem. | ||
| 30 | * | ||
| 31 | * nfsd encodes information use to determine which filesystem a filehandle | ||
| 32 | * applies to in the initial part of the file handle. The remainder, termed | ||
| 33 | * a file handle fragment, is controlled completely by the filesystem. The | ||
| 34 | * standard helper routines assume that this fragment will contain one or | ||
| 35 | * two sub-fragments, one which identifies the file, and one which may be | ||
| 36 | * used to identify the (a) directory containing the file. | ||
| 37 | * | 57 | * |
| 38 | * In some situations, nfsd needs to get a dentry which is connected into a | 58 | * See Documentation/filesystems/Exporting for details on how to use |
| 39 | * specific part of the file tree. To allow for this, it passes the | 59 | * this interface correctly. |
| 40 | * function acceptable() together with a @context which can be used to see | ||
| 41 | * if the dentry is acceptable. As there can be multiple dentrys for a | ||
| 42 | * given file, the filesystem should check each one for acceptability before | ||
| 43 | * looking for the next. As soon as an acceptable one is found, it should | ||
| 44 | * be returned. | ||
| 45 | * | ||
| 46 | * decode_fh: | ||
| 47 | * @decode_fh is given a &struct super_block (@sb), a file handle fragment | ||
| 48 | * (@fh, @fh_len) and an acceptability testing function (@acceptable, | ||
| 49 | * @context). It should return a &struct dentry which refers to the same | ||
| 50 | * file that the file handle fragment refers to, and which passes the | ||
| 51 | * acceptability test. If it cannot, it should return a %NULL pointer if | ||
| 52 | * the file was found but no acceptable &dentries were available, or a | ||
| 53 | * %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or | ||
| 54 | * %ENOMEM). | ||
| 55 | * | 60 | * |
| 56 | * encode_fh: | 61 | * encode_fh: |
| 57 | * @encode_fh should store in the file handle fragment @fh (using at most | 62 | * @encode_fh should store in the file handle fragment @fh (using at most |
| @@ -63,6 +68,21 @@ struct vfsmount; | |||
| 63 | * the filehandle fragment. encode_fh() should return the number of bytes | 68 | * the filehandle fragment. encode_fh() should return the number of bytes |
| 64 | * stored or a negative error code such as %-ENOSPC | 69 | * stored or a negative error code such as %-ENOSPC |
| 65 | * | 70 | * |
| 71 | * fh_to_dentry: | ||
| 72 | * @fh_to_dentry is given a &struct super_block (@sb) and a file handle | ||
| 73 | * fragment (@fh, @fh_len). It should return a &struct dentry which refers | ||
| 74 | * to the same file that the file handle fragment refers to. If it cannot, | ||
| 75 | * it should return a %NULL pointer if the file was found but no acceptable | ||
| 76 | * &dentries were available, or an %ERR_PTR error code indicating why it | ||
| 77 | * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be | ||
| 78 | * returned including, if necessary, a new dentry created with d_alloc_root. | ||
| 79 | * The caller can then find any other extant dentries by following the | ||
| 80 | * d_alias links. | ||
| 81 | * | ||
| 82 | * fh_to_parent: | ||
| 83 | * Same as @fh_to_dentry, except that it returns a pointer to the parent | ||
| 84 | * dentry if it was encoded into the filehandle fragment by @encode_fh. | ||
| 85 | * | ||
| 66 | * get_name: | 86 | * get_name: |
| 67 | * @get_name should find a name for the given @child in the given @parent | 87 | * @get_name should find a name for the given @child in the given @parent |
| 68 | * directory. The name should be stored in the @name (with the | 88 | * directory. The name should be stored in the @name (with the |
| @@ -75,52 +95,37 @@ struct vfsmount; | |||
| 75 | * is also a directory. In the event that it cannot be found, or storage | 95 | * is also a directory. In the event that it cannot be found, or storage |
| 76 | * space cannot be allocated, a %ERR_PTR should be returned. | 96 | * space cannot be allocated, a %ERR_PTR should be returned. |
| 77 | * | 97 | * |
| 78 | * get_dentry: | ||
| 79 | * Given a &super_block (@sb) and a pointer to a file-system specific inode | ||
| 80 | * identifier, possibly an inode number, (@inump) get_dentry() should find | ||
| 81 | * the identified inode and return a dentry for that inode. Any suitable | ||
| 82 | * dentry can be returned including, if necessary, a new dentry created with | ||
| 83 | * d_alloc_root. The caller can then find any other extant dentrys by | ||
| 84 | * following the d_alias links. If a new dentry was created using | ||
| 85 | * d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry | ||
| 86 | * should be d_rehash()ed. | ||
| 87 | * | ||
| 88 | * If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code | ||
| 89 | * can be returned. The @inump will be whatever was passed to | ||
| 90 | * nfsd_find_fh_dentry() in either the @obj or @parent parameters. | ||
| 91 | * | ||
| 92 | * Locking rules: | 98 | * Locking rules: |
| 93 | * get_parent is called with child->d_inode->i_mutex down | 99 | * get_parent is called with child->d_inode->i_mutex down |
| 94 | * get_name is not (which is possibly inconsistent) | 100 | * get_name is not (which is possibly inconsistent) |
| 95 | */ | 101 | */ |
| 96 | 102 | ||
| 97 | struct export_operations { | 103 | struct export_operations { |
| 98 | struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, | ||
| 99 | int fh_len, int fh_type, | ||
| 100 | int (*acceptable)(void *context, struct dentry *de), | ||
| 101 | void *context); | ||
| 102 | int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, | 104 | int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, |
| 103 | int connectable); | 105 | int connectable); |
| 106 | struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, | ||
| 107 | int fh_len, int fh_type); | ||
| 108 | struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, | ||
| 109 | int fh_len, int fh_type); | ||
| 104 | int (*get_name)(struct dentry *parent, char *name, | 110 | int (*get_name)(struct dentry *parent, char *name, |
| 105 | struct dentry *child); | 111 | struct dentry *child); |
| 106 | struct dentry * (*get_parent)(struct dentry *child); | 112 | struct dentry * (*get_parent)(struct dentry *child); |
| 107 | struct dentry * (*get_dentry)(struct super_block *sb, void *inump); | ||
| 108 | |||
| 109 | /* This is set by the exporting module to a standard helper */ | ||
| 110 | struct dentry * (*find_exported_dentry)( | ||
| 111 | struct super_block *sb, void *obj, void *parent, | ||
| 112 | int (*acceptable)(void *context, struct dentry *de), | ||
| 113 | void *context); | ||
| 114 | }; | 113 | }; |
| 115 | 114 | ||
| 116 | extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj, | 115 | extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, |
| 117 | void *parent, int (*acceptable)(void *context, struct dentry *de), | 116 | int *max_len, int connectable); |
| 118 | void *context); | 117 | extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, |
| 119 | |||
| 120 | extern int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, | ||
| 121 | int connectable); | ||
| 122 | extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, | ||
| 123 | int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), | 118 | int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), |
| 124 | void *context); | 119 | void *context); |
| 125 | 120 | ||
| 121 | /* | ||
| 122 | * Generic helpers for filesystems. | ||
| 123 | */ | ||
| 124 | extern struct dentry *generic_fh_to_dentry(struct super_block *sb, | ||
| 125 | struct fid *fid, int fh_len, int fh_type, | ||
| 126 | struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); | ||
| 127 | extern struct dentry *generic_fh_to_parent(struct super_block *sb, | ||
| 128 | struct fid *fid, int fh_len, int fh_type, | ||
| 129 | struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); | ||
| 130 | |||
| 126 | #endif /* LINUX_EXPORTFS_H */ | 131 | #endif /* LINUX_EXPORTFS_H */ |
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index c77c3bbfe4bb..0f6c86c634fd 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h | |||
| @@ -561,6 +561,7 @@ enum { | |||
| 561 | #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) | 561 | #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) |
| 562 | #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ | 562 | #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ |
| 563 | ~EXT2_DIR_ROUND) | 563 | ~EXT2_DIR_ROUND) |
| 564 | #define EXT2_MAX_REC_LEN ((1<<16)-1) | ||
| 564 | 565 | ||
| 565 | static inline ext2_fsblk_t | 566 | static inline ext2_fsblk_t |
| 566 | ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) | 567 | ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 50078bb30a1c..b3ec4a496d64 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -987,7 +987,7 @@ struct super_block { | |||
| 987 | const struct super_operations *s_op; | 987 | const struct super_operations *s_op; |
| 988 | struct dquot_operations *dq_op; | 988 | struct dquot_operations *dq_op; |
| 989 | struct quotactl_ops *s_qcop; | 989 | struct quotactl_ops *s_qcop; |
| 990 | struct export_operations *s_export_op; | 990 | const struct export_operations *s_export_op; |
| 991 | unsigned long s_flags; | 991 | unsigned long s_flags; |
| 992 | unsigned long s_magic; | 992 | unsigned long s_magic; |
| 993 | struct dentry *s_root; | 993 | struct dentry *s_root; |
diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 6c9873f88287..ff203dd02919 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h | |||
| @@ -34,6 +34,12 @@ | |||
| 34 | name: | 34 | name: |
| 35 | #endif | 35 | #endif |
| 36 | 36 | ||
| 37 | #ifndef WEAK | ||
| 38 | #define WEAK(name) \ | ||
| 39 | .weak name; \ | ||
| 40 | name: | ||
| 41 | #endif | ||
| 42 | |||
| 37 | #define KPROBE_ENTRY(name) \ | 43 | #define KPROBE_ENTRY(name) \ |
| 38 | .pushsection .kprobes.text, "ax"; \ | 44 | .pushsection .kprobes.text, "ax"; \ |
| 39 | ENTRY(name) | 45 | ENTRY(name) |
diff --git a/include/linux/memory.h b/include/linux/memory.h index 654ef5544878..33f0ff0cf634 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h | |||
| @@ -41,18 +41,15 @@ struct memory_block { | |||
| 41 | #define MEM_ONLINE (1<<0) /* exposed to userspace */ | 41 | #define MEM_ONLINE (1<<0) /* exposed to userspace */ |
| 42 | #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ | 42 | #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ |
| 43 | #define MEM_OFFLINE (1<<2) /* exposed to userspace */ | 43 | #define MEM_OFFLINE (1<<2) /* exposed to userspace */ |
| 44 | #define MEM_GOING_ONLINE (1<<3) | ||
| 45 | #define MEM_CANCEL_ONLINE (1<<4) | ||
| 46 | #define MEM_CANCEL_OFFLINE (1<<5) | ||
| 44 | 47 | ||
| 45 | /* | 48 | struct memory_notify { |
| 46 | * All of these states are currently kernel-internal for notifying | 49 | unsigned long start_pfn; |
| 47 | * kernel components and architectures. | 50 | unsigned long nr_pages; |
| 48 | * | 51 | int status_change_nid; |
| 49 | * For MEM_MAPPING_INVALID, all notifier chains with priority >0 | 52 | }; |
| 50 | * are called before pfn_to_page() becomes invalid. The priority=0 | ||
| 51 | * entry is reserved for the function that actually makes | ||
| 52 | * pfn_to_page() stop working. Any notifiers that want to be called | ||
| 53 | * after that should have priority <0. | ||
| 54 | */ | ||
| 55 | #define MEM_MAPPING_INVALID (1<<3) | ||
| 56 | 53 | ||
| 57 | struct notifier_block; | 54 | struct notifier_block; |
| 58 | struct mem_section; | 55 | struct mem_section; |
| @@ -69,21 +66,31 @@ static inline int register_memory_notifier(struct notifier_block *nb) | |||
| 69 | static inline void unregister_memory_notifier(struct notifier_block *nb) | 66 | static inline void unregister_memory_notifier(struct notifier_block *nb) |
| 70 | { | 67 | { |
| 71 | } | 68 | } |
| 69 | static inline int memory_notify(unsigned long val, void *v) | ||
| 70 | { | ||
| 71 | return 0; | ||
| 72 | } | ||
| 72 | #else | 73 | #else |
| 74 | extern int register_memory_notifier(struct notifier_block *nb); | ||
| 75 | extern void unregister_memory_notifier(struct notifier_block *nb); | ||
| 73 | extern int register_new_memory(struct mem_section *); | 76 | extern int register_new_memory(struct mem_section *); |
| 74 | extern int unregister_memory_section(struct mem_section *); | 77 | extern int unregister_memory_section(struct mem_section *); |
| 75 | extern int memory_dev_init(void); | 78 | extern int memory_dev_init(void); |
| 76 | extern int remove_memory_block(unsigned long, struct mem_section *, int); | 79 | extern int remove_memory_block(unsigned long, struct mem_section *, int); |
| 77 | 80 | extern int memory_notify(unsigned long val, void *v); | |
| 78 | #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) | 81 | #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) |
| 79 | 82 | ||
| 80 | 83 | ||
| 81 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 84 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
| 82 | 85 | ||
| 86 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
| 83 | #define hotplug_memory_notifier(fn, pri) { \ | 87 | #define hotplug_memory_notifier(fn, pri) { \ |
| 84 | static struct notifier_block fn##_mem_nb = \ | 88 | static struct notifier_block fn##_mem_nb = \ |
| 85 | { .notifier_call = fn, .priority = pri }; \ | 89 | { .notifier_call = fn, .priority = pri }; \ |
| 86 | register_memory_notifier(&fn##_mem_nb); \ | 90 | register_memory_notifier(&fn##_mem_nb); \ |
| 87 | } | 91 | } |
| 92 | #else | ||
| 93 | #define hotplug_memory_notifier(fn, pri) do { } while (0) | ||
| 94 | #endif | ||
| 88 | 95 | ||
| 89 | #endif /* _LINUX_MEMORY_H_ */ | 96 | #endif /* _LINUX_MEMORY_H_ */ |
diff --git a/include/linux/pci.h b/include/linux/pci.h index 768b93359f90..5d2281f661f7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h | |||
| @@ -141,6 +141,7 @@ struct pci_dev { | |||
| 141 | unsigned int class; /* 3 bytes: (base,sub,prog-if) */ | 141 | unsigned int class; /* 3 bytes: (base,sub,prog-if) */ |
| 142 | u8 revision; /* PCI revision, low byte of class word */ | 142 | u8 revision; /* PCI revision, low byte of class word */ |
| 143 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ | 143 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ |
| 144 | u8 pcie_type; /* PCI-E device/port type */ | ||
| 144 | u8 rom_base_reg; /* which config register controls the ROM */ | 145 | u8 rom_base_reg; /* which config register controls the ROM */ |
| 145 | u8 pin; /* which interrupt pin this device uses */ | 146 | u8 pin; /* which interrupt pin this device uses */ |
| 146 | 147 | ||
| @@ -183,6 +184,7 @@ struct pci_dev { | |||
| 183 | unsigned int msi_enabled:1; | 184 | unsigned int msi_enabled:1; |
| 184 | unsigned int msix_enabled:1; | 185 | unsigned int msix_enabled:1; |
| 185 | unsigned int is_managed:1; | 186 | unsigned int is_managed:1; |
| 187 | unsigned int is_pcie:1; | ||
| 186 | atomic_t enable_cnt; /* pci_enable_device has been called */ | 188 | atomic_t enable_cnt; /* pci_enable_device has been called */ |
| 187 | 189 | ||
| 188 | u32 saved_config_space[16]; /* config space saved at suspend time */ | 190 | u32 saved_config_space[16]; /* config space saved at suspend time */ |
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 72bfccd3da22..422eab4958a6 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h | |||
| @@ -28,6 +28,8 @@ | |||
| 28 | #include <linux/reiserfs_fs_sb.h> | 28 | #include <linux/reiserfs_fs_sb.h> |
| 29 | #endif | 29 | #endif |
| 30 | 30 | ||
| 31 | struct fid; | ||
| 32 | |||
| 31 | /* | 33 | /* |
| 32 | * include/linux/reiser_fs.h | 34 | * include/linux/reiser_fs.h |
| 33 | * | 35 | * |
| @@ -1877,12 +1879,10 @@ void reiserfs_delete_inode(struct inode *inode); | |||
| 1877 | int reiserfs_write_inode(struct inode *inode, int); | 1879 | int reiserfs_write_inode(struct inode *inode, int); |
| 1878 | int reiserfs_get_block(struct inode *inode, sector_t block, | 1880 | int reiserfs_get_block(struct inode *inode, sector_t block, |
| 1879 | struct buffer_head *bh_result, int create); | 1881 | struct buffer_head *bh_result, int create); |
| 1880 | struct dentry *reiserfs_get_dentry(struct super_block *, void *); | 1882 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
| 1881 | struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | 1883 | int fh_len, int fh_type); |
| 1882 | int len, int fhtype, | 1884 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
| 1883 | int (*acceptable) (void *contect, | 1885 | int fh_len, int fh_type); |
| 1884 | struct dentry * de), | ||
| 1885 | void *context); | ||
| 1886 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1886 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, |
| 1887 | int connectable); | 1887 | int connectable); |
| 1888 | 1888 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1833879f8438..3a47871a29d9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
| 187 | unsigned long onlined_pages = 0; | 187 | unsigned long onlined_pages = 0; |
| 188 | struct zone *zone; | 188 | struct zone *zone; |
| 189 | int need_zonelists_rebuild = 0; | 189 | int need_zonelists_rebuild = 0; |
| 190 | int nid; | ||
| 191 | int ret; | ||
| 192 | struct memory_notify arg; | ||
| 193 | |||
| 194 | arg.start_pfn = pfn; | ||
| 195 | arg.nr_pages = nr_pages; | ||
| 196 | arg.status_change_nid = -1; | ||
| 197 | |||
| 198 | nid = page_to_nid(pfn_to_page(pfn)); | ||
| 199 | if (node_present_pages(nid) == 0) | ||
| 200 | arg.status_change_nid = nid; | ||
| 190 | 201 | ||
| 202 | ret = memory_notify(MEM_GOING_ONLINE, &arg); | ||
| 203 | ret = notifier_to_errno(ret); | ||
| 204 | if (ret) { | ||
| 205 | memory_notify(MEM_CANCEL_ONLINE, &arg); | ||
| 206 | return ret; | ||
| 207 | } | ||
| 191 | /* | 208 | /* |
| 192 | * This doesn't need a lock to do pfn_to_page(). | 209 | * This doesn't need a lock to do pfn_to_page(). |
| 193 | * The section can't be removed here because of the | 210 | * The section can't be removed here because of the |
| @@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
| 222 | build_all_zonelists(); | 239 | build_all_zonelists(); |
| 223 | vm_total_pages = nr_free_pagecache_pages(); | 240 | vm_total_pages = nr_free_pagecache_pages(); |
| 224 | writeback_set_ratelimit(); | 241 | writeback_set_ratelimit(); |
| 242 | |||
| 243 | if (onlined_pages) | ||
| 244 | memory_notify(MEM_ONLINE, &arg); | ||
| 245 | |||
| 225 | return 0; | 246 | return 0; |
| 226 | } | 247 | } |
| 227 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 248 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
| @@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn, | |||
| 467 | { | 488 | { |
| 468 | unsigned long pfn, nr_pages, expire; | 489 | unsigned long pfn, nr_pages, expire; |
| 469 | long offlined_pages; | 490 | long offlined_pages; |
| 470 | int ret, drain, retry_max; | 491 | int ret, drain, retry_max, node; |
| 471 | struct zone *zone; | 492 | struct zone *zone; |
| 493 | struct memory_notify arg; | ||
| 472 | 494 | ||
| 473 | BUG_ON(start_pfn >= end_pfn); | 495 | BUG_ON(start_pfn >= end_pfn); |
| 474 | /* at least, alignment against pageblock is necessary */ | 496 | /* at least, alignment against pageblock is necessary */ |
| @@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn, | |||
| 480 | we assume this for now. .*/ | 502 | we assume this for now. .*/ |
| 481 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 503 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
| 482 | return -EINVAL; | 504 | return -EINVAL; |
| 505 | |||
| 506 | zone = page_zone(pfn_to_page(start_pfn)); | ||
| 507 | node = zone_to_nid(zone); | ||
| 508 | nr_pages = end_pfn - start_pfn; | ||
| 509 | |||
| 483 | /* set above range as isolated */ | 510 | /* set above range as isolated */ |
| 484 | ret = start_isolate_page_range(start_pfn, end_pfn); | 511 | ret = start_isolate_page_range(start_pfn, end_pfn); |
| 485 | if (ret) | 512 | if (ret) |
| 486 | return ret; | 513 | return ret; |
| 487 | nr_pages = end_pfn - start_pfn; | 514 | |
| 515 | arg.start_pfn = start_pfn; | ||
| 516 | arg.nr_pages = nr_pages; | ||
| 517 | arg.status_change_nid = -1; | ||
| 518 | if (nr_pages >= node_present_pages(node)) | ||
| 519 | arg.status_change_nid = node; | ||
| 520 | |||
| 521 | ret = memory_notify(MEM_GOING_OFFLINE, &arg); | ||
| 522 | ret = notifier_to_errno(ret); | ||
| 523 | if (ret) | ||
| 524 | goto failed_removal; | ||
| 525 | |||
| 488 | pfn = start_pfn; | 526 | pfn = start_pfn; |
| 489 | expire = jiffies + timeout; | 527 | expire = jiffies + timeout; |
| 490 | drain = 0; | 528 | drain = 0; |
| @@ -539,20 +577,24 @@ repeat: | |||
| 539 | /* reset pagetype flags */ | 577 | /* reset pagetype flags */ |
| 540 | start_isolate_page_range(start_pfn, end_pfn); | 578 | start_isolate_page_range(start_pfn, end_pfn); |
| 541 | /* removal success */ | 579 | /* removal success */ |
| 542 | zone = page_zone(pfn_to_page(start_pfn)); | ||
| 543 | zone->present_pages -= offlined_pages; | 580 | zone->present_pages -= offlined_pages; |
| 544 | zone->zone_pgdat->node_present_pages -= offlined_pages; | 581 | zone->zone_pgdat->node_present_pages -= offlined_pages; |
| 545 | totalram_pages -= offlined_pages; | 582 | totalram_pages -= offlined_pages; |
| 546 | num_physpages -= offlined_pages; | 583 | num_physpages -= offlined_pages; |
| 584 | |||
| 547 | vm_total_pages = nr_free_pagecache_pages(); | 585 | vm_total_pages = nr_free_pagecache_pages(); |
| 548 | writeback_set_ratelimit(); | 586 | writeback_set_ratelimit(); |
| 587 | |||
| 588 | memory_notify(MEM_OFFLINE, &arg); | ||
| 549 | return 0; | 589 | return 0; |
| 550 | 590 | ||
| 551 | failed_removal: | 591 | failed_removal: |
| 552 | printk(KERN_INFO "memory offlining %lx to %lx failed\n", | 592 | printk(KERN_INFO "memory offlining %lx to %lx failed\n", |
| 553 | start_pfn, end_pfn); | 593 | start_pfn, end_pfn); |
| 594 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | ||
| 554 | /* pushback to free area */ | 595 | /* pushback to free area */ |
| 555 | undo_isolate_page_range(start_pfn, end_pfn); | 596 | undo_isolate_page_range(start_pfn, end_pfn); |
| 597 | |||
| 556 | return ret; | 598 | return ret; |
| 557 | } | 599 | } |
| 558 | #else | 600 | #else |
diff --git a/mm/shmem.c b/mm/shmem.c index 289dbb0a6fd6..404e53bb2127 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -2020,33 +2020,25 @@ static int shmem_match(struct inode *ino, void *vfh) | |||
| 2020 | return ino->i_ino == inum && fh[0] == ino->i_generation; | 2020 | return ino->i_ino == inum && fh[0] == ino->i_generation; |
| 2021 | } | 2021 | } |
| 2022 | 2022 | ||
| 2023 | static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) | 2023 | static struct dentry *shmem_fh_to_dentry(struct super_block *sb, |
| 2024 | struct fid *fid, int fh_len, int fh_type) | ||
| 2024 | { | 2025 | { |
| 2025 | struct dentry *de = NULL; | ||
| 2026 | struct inode *inode; | 2026 | struct inode *inode; |
| 2027 | __u32 *fh = vfh; | 2027 | struct dentry *dentry = NULL; |
| 2028 | __u64 inum = fh[2]; | 2028 | u64 inum = fid->raw[2]; |
| 2029 | inum = (inum << 32) | fh[1]; | 2029 | inum = (inum << 32) | fid->raw[1]; |
| 2030 | |||
| 2031 | if (fh_len < 3) | ||
| 2032 | return NULL; | ||
| 2030 | 2033 | ||
| 2031 | inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); | 2034 | inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), |
| 2035 | shmem_match, fid->raw); | ||
| 2032 | if (inode) { | 2036 | if (inode) { |
| 2033 | de = d_find_alias(inode); | 2037 | dentry = d_find_alias(inode); |
| 2034 | iput(inode); | 2038 | iput(inode); |
| 2035 | } | 2039 | } |
| 2036 | 2040 | ||
| 2037 | return de? de: ERR_PTR(-ESTALE); | 2041 | return dentry; |
| 2038 | } | ||
| 2039 | |||
| 2040 | static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, | ||
| 2041 | int len, int type, | ||
| 2042 | int (*acceptable)(void *context, struct dentry *de), | ||
| 2043 | void *context) | ||
| 2044 | { | ||
| 2045 | if (len < 3) | ||
| 2046 | return ERR_PTR(-ESTALE); | ||
| 2047 | |||
| 2048 | return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, | ||
| 2049 | context); | ||
| 2050 | } | 2042 | } |
| 2051 | 2043 | ||
| 2052 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | 2044 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, |
| @@ -2079,11 +2071,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | |||
| 2079 | return 1; | 2071 | return 1; |
| 2080 | } | 2072 | } |
| 2081 | 2073 | ||
| 2082 | static struct export_operations shmem_export_ops = { | 2074 | static const struct export_operations shmem_export_ops = { |
| 2083 | .get_parent = shmem_get_parent, | 2075 | .get_parent = shmem_get_parent, |
| 2084 | .get_dentry = shmem_get_dentry, | ||
| 2085 | .encode_fh = shmem_encode_fh, | 2076 | .encode_fh = shmem_encode_fh, |
| 2086 | .decode_fh = shmem_decode_fh, | 2077 | .fh_to_dentry = shmem_fh_to_dentry, |
| 2087 | }; | 2078 | }; |
| 2088 | 2079 | ||
| 2089 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, | 2080 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, |
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/mempolicy.h> | 20 | #include <linux/mempolicy.h> |
| 21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
| 22 | #include <linux/kallsyms.h> | 22 | #include <linux/kallsyms.h> |
| 23 | #include <linux/memory.h> | ||
| 23 | 24 | ||
| 24 | /* | 25 | /* |
| 25 | * Lock order: | 26 | * Lock order: |
| @@ -2694,6 +2695,121 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
| 2694 | } | 2695 | } |
| 2695 | EXPORT_SYMBOL(kmem_cache_shrink); | 2696 | EXPORT_SYMBOL(kmem_cache_shrink); |
| 2696 | 2697 | ||
| 2698 | #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) | ||
| 2699 | static int slab_mem_going_offline_callback(void *arg) | ||
| 2700 | { | ||
| 2701 | struct kmem_cache *s; | ||
| 2702 | |||
| 2703 | down_read(&slub_lock); | ||
| 2704 | list_for_each_entry(s, &slab_caches, list) | ||
| 2705 | kmem_cache_shrink(s); | ||
| 2706 | up_read(&slub_lock); | ||
| 2707 | |||
| 2708 | return 0; | ||
| 2709 | } | ||
| 2710 | |||
| 2711 | static void slab_mem_offline_callback(void *arg) | ||
| 2712 | { | ||
| 2713 | struct kmem_cache_node *n; | ||
| 2714 | struct kmem_cache *s; | ||
| 2715 | struct memory_notify *marg = arg; | ||
| 2716 | int offline_node; | ||
| 2717 | |||
| 2718 | offline_node = marg->status_change_nid; | ||
| 2719 | |||
| 2720 | /* | ||
| 2721 | * If the node still has available memory. we need kmem_cache_node | ||
| 2722 | * for it yet. | ||
| 2723 | */ | ||
| 2724 | if (offline_node < 0) | ||
| 2725 | return; | ||
| 2726 | |||
| 2727 | down_read(&slub_lock); | ||
| 2728 | list_for_each_entry(s, &slab_caches, list) { | ||
| 2729 | n = get_node(s, offline_node); | ||
| 2730 | if (n) { | ||
| 2731 | /* | ||
| 2732 | * if n->nr_slabs > 0, slabs still exist on the node | ||
| 2733 | * that is going down. We were unable to free them, | ||
| 2734 | * and offline_pages() function shoudn't call this | ||
| 2735 | * callback. So, we must fail. | ||
| 2736 | */ | ||
| 2737 | BUG_ON(atomic_read(&n->nr_slabs)); | ||
| 2738 | |||
| 2739 | s->node[offline_node] = NULL; | ||
| 2740 | kmem_cache_free(kmalloc_caches, n); | ||
| 2741 | } | ||
| 2742 | } | ||
| 2743 | up_read(&slub_lock); | ||
| 2744 | } | ||
| 2745 | |||
| 2746 | static int slab_mem_going_online_callback(void *arg) | ||
| 2747 | { | ||
| 2748 | struct kmem_cache_node *n; | ||
| 2749 | struct kmem_cache *s; | ||
| 2750 | struct memory_notify *marg = arg; | ||
| 2751 | int nid = marg->status_change_nid; | ||
| 2752 | int ret = 0; | ||
| 2753 | |||
| 2754 | /* | ||
| 2755 | * If the node's memory is already available, then kmem_cache_node is | ||
| 2756 | * already created. Nothing to do. | ||
| 2757 | */ | ||
| 2758 | if (nid < 0) | ||
| 2759 | return 0; | ||
| 2760 | |||
| 2761 | /* | ||
| 2762 | * We are bringing a node online. No memory is availabe yet. We must | ||
| 2763 | * allocate a kmem_cache_node structure in order to bring the node | ||
| 2764 | * online. | ||
| 2765 | */ | ||
| 2766 | down_read(&slub_lock); | ||
| 2767 | list_for_each_entry(s, &slab_caches, list) { | ||
| 2768 | /* | ||
| 2769 | * XXX: kmem_cache_alloc_node will fallback to other nodes | ||
| 2770 | * since memory is not yet available from the node that | ||
| 2771 | * is brought up. | ||
| 2772 | */ | ||
| 2773 | n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); | ||
| 2774 | if (!n) { | ||
| 2775 | ret = -ENOMEM; | ||
| 2776 | goto out; | ||
| 2777 | } | ||
| 2778 | init_kmem_cache_node(n); | ||
| 2779 | s->node[nid] = n; | ||
| 2780 | } | ||
| 2781 | out: | ||
| 2782 | up_read(&slub_lock); | ||
| 2783 | return ret; | ||
| 2784 | } | ||
| 2785 | |||
| 2786 | static int slab_memory_callback(struct notifier_block *self, | ||
| 2787 | unsigned long action, void *arg) | ||
| 2788 | { | ||
| 2789 | int ret = 0; | ||
| 2790 | |||
| 2791 | switch (action) { | ||
| 2792 | case MEM_GOING_ONLINE: | ||
| 2793 | ret = slab_mem_going_online_callback(arg); | ||
| 2794 | break; | ||
| 2795 | case MEM_GOING_OFFLINE: | ||
| 2796 | ret = slab_mem_going_offline_callback(arg); | ||
| 2797 | break; | ||
| 2798 | case MEM_OFFLINE: | ||
| 2799 | case MEM_CANCEL_ONLINE: | ||
| 2800 | slab_mem_offline_callback(arg); | ||
| 2801 | break; | ||
| 2802 | case MEM_ONLINE: | ||
| 2803 | case MEM_CANCEL_OFFLINE: | ||
| 2804 | break; | ||
| 2805 | } | ||
| 2806 | |||
| 2807 | ret = notifier_from_errno(ret); | ||
| 2808 | return ret; | ||
| 2809 | } | ||
| 2810 | |||
| 2811 | #endif /* CONFIG_MEMORY_HOTPLUG */ | ||
| 2812 | |||
| 2697 | /******************************************************************** | 2813 | /******************************************************************** |
| 2698 | * Basic setup of slabs | 2814 | * Basic setup of slabs |
| 2699 | *******************************************************************/ | 2815 | *******************************************************************/ |
| @@ -2715,6 +2831,8 @@ void __init kmem_cache_init(void) | |||
| 2715 | sizeof(struct kmem_cache_node), GFP_KERNEL); | 2831 | sizeof(struct kmem_cache_node), GFP_KERNEL); |
| 2716 | kmalloc_caches[0].refcount = -1; | 2832 | kmalloc_caches[0].refcount = -1; |
| 2717 | caches++; | 2833 | caches++; |
| 2834 | |||
| 2835 | hotplug_memory_notifier(slab_memory_callback, 1); | ||
| 2718 | #endif | 2836 | #endif |
| 2719 | 2837 | ||
| 2720 | /* Able to allocate the per node structures */ | 2838 | /* Able to allocate the per node structures */ |
diff --git a/security/commoncap.c b/security/commoncap.c index 43f902750a1b..bf67871173ef 100644 --- a/security/commoncap.c +++ b/security/commoncap.c | |||
| @@ -190,7 +190,8 @@ int cap_inode_killpriv(struct dentry *dentry) | |||
| 190 | return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); | 190 | return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); |
| 191 | } | 191 | } |
| 192 | 192 | ||
| 193 | static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, | 193 | static inline int cap_from_disk(struct vfs_cap_data *caps, |
| 194 | struct linux_binprm *bprm, | ||
| 194 | int size) | 195 | int size) |
| 195 | { | 196 | { |
| 196 | __u32 magic_etc; | 197 | __u32 magic_etc; |
| @@ -198,7 +199,7 @@ static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, | |||
| 198 | if (size != XATTR_CAPS_SZ) | 199 | if (size != XATTR_CAPS_SZ) |
| 199 | return -EINVAL; | 200 | return -EINVAL; |
| 200 | 201 | ||
| 201 | magic_etc = le32_to_cpu(caps[0]); | 202 | magic_etc = le32_to_cpu(caps->magic_etc); |
| 202 | 203 | ||
| 203 | switch ((magic_etc & VFS_CAP_REVISION_MASK)) { | 204 | switch ((magic_etc & VFS_CAP_REVISION_MASK)) { |
| 204 | case VFS_CAP_REVISION: | 205 | case VFS_CAP_REVISION: |
| @@ -206,8 +207,8 @@ static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, | |||
| 206 | bprm->cap_effective = true; | 207 | bprm->cap_effective = true; |
| 207 | else | 208 | else |
| 208 | bprm->cap_effective = false; | 209 | bprm->cap_effective = false; |
| 209 | bprm->cap_permitted = to_cap_t( le32_to_cpu(caps[1]) ); | 210 | bprm->cap_permitted = to_cap_t(le32_to_cpu(caps->permitted)); |
| 210 | bprm->cap_inheritable = to_cap_t( le32_to_cpu(caps[2]) ); | 211 | bprm->cap_inheritable = to_cap_t(le32_to_cpu(caps->inheritable)); |
| 211 | return 0; | 212 | return 0; |
| 212 | default: | 213 | default: |
| 213 | return -EINVAL; | 214 | return -EINVAL; |
| @@ -219,7 +220,7 @@ static int get_file_caps(struct linux_binprm *bprm) | |||
| 219 | { | 220 | { |
| 220 | struct dentry *dentry; | 221 | struct dentry *dentry; |
| 221 | int rc = 0; | 222 | int rc = 0; |
| 222 | __le32 v1caps[XATTR_CAPS_SZ]; | 223 | struct vfs_cap_data incaps; |
| 223 | struct inode *inode; | 224 | struct inode *inode; |
| 224 | 225 | ||
| 225 | if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { | 226 | if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { |
| @@ -232,8 +233,14 @@ static int get_file_caps(struct linux_binprm *bprm) | |||
| 232 | if (!inode->i_op || !inode->i_op->getxattr) | 233 | if (!inode->i_op || !inode->i_op->getxattr) |
| 233 | goto out; | 234 | goto out; |
| 234 | 235 | ||
| 235 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &v1caps, | 236 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); |
| 236 | XATTR_CAPS_SZ); | 237 | if (rc > 0) { |
| 238 | if (rc == XATTR_CAPS_SZ) | ||
| 239 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, | ||
| 240 | &incaps, XATTR_CAPS_SZ); | ||
| 241 | else | ||
| 242 | rc = -EINVAL; | ||
| 243 | } | ||
| 237 | if (rc == -ENODATA || rc == -EOPNOTSUPP) { | 244 | if (rc == -ENODATA || rc == -EOPNOTSUPP) { |
| 238 | /* no data, that's ok */ | 245 | /* no data, that's ok */ |
| 239 | rc = 0; | 246 | rc = 0; |
| @@ -242,7 +249,7 @@ static int get_file_caps(struct linux_binprm *bprm) | |||
| 242 | if (rc < 0) | 249 | if (rc < 0) |
| 243 | goto out; | 250 | goto out; |
| 244 | 251 | ||
| 245 | rc = cap_from_disk(v1caps, bprm, rc); | 252 | rc = cap_from_disk(&incaps, bprm, rc); |
| 246 | if (rc) | 253 | if (rc) |
| 247 | printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", | 254 | printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", |
| 248 | __FUNCTION__, rc, bprm->filename); | 255 | __FUNCTION__, rc, bprm->filename); |
