diff options
80 files changed, 5088 insertions, 937 deletions
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt new file mode 100644 index 00000000000..c2321903aa0 --- /dev/null +++ b/Documentation/Intel-IOMMU.txt | |||
@@ -0,0 +1,115 @@ | |||
1 | Linux IOMMU Support | ||
2 | =================== | ||
3 | |||
4 | The architecture spec can be obtained from the below location. | ||
5 | |||
6 | http://www.intel.com/technology/virtualization/ | ||
7 | |||
8 | This guide gives a quick cheat sheet for some basic understanding. | ||
9 | |||
10 | Some Keywords | ||
11 | |||
12 | DMAR - DMA remapping | ||
13 | DRHD - DMA Engine Reporting Structure | ||
14 | RMRR - Reserved memory Region Reporting Structure | ||
15 | ZLR - Zero length reads from PCI devices | ||
16 | IOVA - IO Virtual address. | ||
17 | |||
18 | Basic stuff | ||
19 | ----------- | ||
20 | |||
21 | ACPI enumerates and lists the different DMA engines in the platform, and | ||
22 | device scope relationships between PCI devices and which DMA engine controls | ||
23 | them. | ||
24 | |||
25 | What is RMRR? | ||
26 | ------------- | ||
27 | |||
28 | There are some devices the BIOS controls, for e.g USB devices to perform | ||
29 | PS2 emulation. The regions of memory used for these devices are marked | ||
30 | reserved in the e820 map. When we turn on DMA translation, DMA to those | ||
31 | regions will fail. Hence BIOS uses RMRR to specify these regions along with | ||
32 | devices that need to access these regions. OS is expected to setup | ||
33 | unity mappings for these regions for these devices to access these regions. | ||
34 | |||
35 | How is IOVA generated? | ||
36 | --------------------- | ||
37 | |||
38 | Well behaved drivers call pci_map_*() calls before sending command to device | ||
39 | that needs to perform DMA. Once DMA is completed and mapping is no longer | ||
40 | required, device performs a pci_unmap_*() calls to unmap the region. | ||
41 | |||
42 | The Intel IOMMU driver allocates a virtual address per domain. Each PCIE | ||
43 | device has its own domain (hence protection). Devices under p2p bridges | ||
44 | share the virtual address with all devices under the p2p bridge due to | ||
45 | transaction id aliasing for p2p bridges. | ||
46 | |||
47 | IOVA generation is pretty generic. We used the same technique as vmalloc() | ||
48 | but these are not global address spaces, but separate for each domain. | ||
49 | Different DMA engines may support different number of domains. | ||
50 | |||
51 | We also allocate gaurd pages with each mapping, so we can attempt to catch | ||
52 | any overflow that might happen. | ||
53 | |||
54 | |||
55 | Graphics Problems? | ||
56 | ------------------ | ||
57 | If you encounter issues with graphics devices, you can try adding | ||
58 | option intel_iommu=igfx_off to turn off the integrated graphics engine. | ||
59 | |||
60 | If it happens to be a PCI device included in the INCLUDE_ALL Engine, | ||
61 | then try enabling CONFIG_DMAR_GFX_WA to setup a 1-1 map. We hear | ||
62 | graphics drivers may be in process of using DMA api's in the near | ||
63 | future and at that time this option can be yanked out. | ||
64 | |||
65 | Some exceptions to IOVA | ||
66 | ----------------------- | ||
67 | Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff). | ||
68 | The same is true for peer to peer transactions. Hence we reserve the | ||
69 | address from PCI MMIO ranges so they are not allocated for IOVA addresses. | ||
70 | |||
71 | |||
72 | Fault reporting | ||
73 | --------------- | ||
74 | When errors are reported, the DMA engine signals via an interrupt. The fault | ||
75 | reason and device that caused it with fault reason is printed on console. | ||
76 | |||
77 | See below for sample. | ||
78 | |||
79 | |||
80 | Boot Message Sample | ||
81 | ------------------- | ||
82 | |||
83 | Something like this gets printed indicating presence of DMAR tables | ||
84 | in ACPI. | ||
85 | |||
86 | ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0 | ||
87 | |||
88 | When DMAR is being processed and initialized by ACPI, prints DMAR locations | ||
89 | and any RMRR's processed. | ||
90 | |||
91 | ACPI DMAR:Host address width 36 | ||
92 | ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000 | ||
93 | ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000 | ||
94 | ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000 | ||
95 | ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff | ||
96 | ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff | ||
97 | |||
98 | When DMAR is enabled for use, you will notice.. | ||
99 | |||
100 | PCI-DMA: Using DMAR IOMMU | ||
101 | |||
102 | Fault reporting | ||
103 | --------------- | ||
104 | |||
105 | DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000 | ||
106 | DMAR:[fault reason 05] PTE Write access is not set | ||
107 | DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000 | ||
108 | DMAR:[fault reason 05] PTE Write access is not set | ||
109 | |||
110 | TBD | ||
111 | ---- | ||
112 | |||
113 | - For compatibility testing, could use unity map domain for all devices, just | ||
114 | provide a 1-1 for all useful memory under a single domain for all devices. | ||
115 | - API for paravirt ops for abstracting functionlity for VMM folks. | ||
diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/Exporting index 31047e0fe14..87019d2b598 100644 --- a/Documentation/filesystems/Exporting +++ b/Documentation/filesystems/Exporting | |||
@@ -2,9 +2,12 @@ | |||
2 | Making Filesystems Exportable | 2 | Making Filesystems Exportable |
3 | ============================= | 3 | ============================= |
4 | 4 | ||
5 | Most filesystem operations require a dentry (or two) as a starting | 5 | Overview |
6 | -------- | ||
7 | |||
8 | All filesystem operations require a dentry (or two) as a starting | ||
6 | point. Local applications have a reference-counted hold on suitable | 9 | point. Local applications have a reference-counted hold on suitable |
7 | dentrys via open file descriptors or cwd/root. However remote | 10 | dentries via open file descriptors or cwd/root. However remote |
8 | applications that access a filesystem via a remote filesystem protocol | 11 | applications that access a filesystem via a remote filesystem protocol |
9 | such as NFS may not be able to hold such a reference, and so need a | 12 | such as NFS may not be able to hold such a reference, and so need a |
10 | different way to refer to a particular dentry. As the alternative | 13 | different way to refer to a particular dentry. As the alternative |
@@ -13,14 +16,14 @@ server-reboot (among other things, though these tend to be the most | |||
13 | problematic), there is no simple answer like 'filename'. | 16 | problematic), there is no simple answer like 'filename'. |
14 | 17 | ||
15 | The mechanism discussed here allows each filesystem implementation to | 18 | The mechanism discussed here allows each filesystem implementation to |
16 | specify how to generate an opaque (out side of the filesystem) byte | 19 | specify how to generate an opaque (outside of the filesystem) byte |
17 | string for any dentry, and how to find an appropriate dentry for any | 20 | string for any dentry, and how to find an appropriate dentry for any |
18 | given opaque byte string. | 21 | given opaque byte string. |
19 | This byte string will be called a "filehandle fragment" as it | 22 | This byte string will be called a "filehandle fragment" as it |
20 | corresponds to part of an NFS filehandle. | 23 | corresponds to part of an NFS filehandle. |
21 | 24 | ||
22 | A filesystem which supports the mapping between filehandle fragments | 25 | A filesystem which supports the mapping between filehandle fragments |
23 | and dentrys will be termed "exportable". | 26 | and dentries will be termed "exportable". |
24 | 27 | ||
25 | 28 | ||
26 | 29 | ||
@@ -89,11 +92,9 @@ For a filesystem to be exportable it must: | |||
89 | 1/ provide the filehandle fragment routines described below. | 92 | 1/ provide the filehandle fragment routines described below. |
90 | 2/ make sure that d_splice_alias is used rather than d_add | 93 | 2/ make sure that d_splice_alias is used rather than d_add |
91 | when ->lookup finds an inode for a given parent and name. | 94 | when ->lookup finds an inode for a given parent and name. |
92 | Typically the ->lookup routine will end: | 95 | Typically the ->lookup routine will end with a: |
93 | if (inode) | 96 | |
94 | return d_splice(inode, dentry); | 97 | return d_splice_alias(inode, dentry); |
95 | d_add(dentry, inode); | ||
96 | return NULL; | ||
97 | } | 98 | } |
98 | 99 | ||
99 | 100 | ||
@@ -101,67 +102,39 @@ For a filesystem to be exportable it must: | |||
101 | A file system implementation declares that instances of the filesystem | 102 | A file system implementation declares that instances of the filesystem |
102 | are exportable by setting the s_export_op field in the struct | 103 | are exportable by setting the s_export_op field in the struct |
103 | super_block. This field must point to a "struct export_operations" | 104 | super_block. This field must point to a "struct export_operations" |
104 | struct which could potentially be full of NULLs, though normally at | 105 | struct which has the following members: |
105 | least get_parent will be set. | 106 | |
106 | 107 | encode_fh (optional) | |
107 | The primary operations are decode_fh and encode_fh. | 108 | Takes a dentry and creates a filehandle fragment which can later be used |
108 | decode_fh takes a filehandle fragment and tries to find or create a | 109 | to find or create a dentry for the same object. The default |
109 | dentry for the object referred to by the filehandle. | 110 | implementation creates a filehandle fragment that encodes a 32bit inode |
110 | encode_fh takes a dentry and creates a filehandle fragment which can | 111 | and generation number for the inode encoded, and if necessary the |
111 | later be used to find/create a dentry for the same object. | 112 | same information for the parent. |
112 | 113 | ||
113 | decode_fh will probably make use of "find_exported_dentry". | 114 | fh_to_dentry (mandatory) |
114 | This function lives in the "exportfs" module which a filesystem does | 115 | Given a filehandle fragment, this should find the implied object and |
115 | not need unless it is being exported. So rather that calling | 116 | create a dentry for it (possibly with d_alloc_anon). |
116 | find_exported_dentry directly, each filesystem should call it through | 117 | |
117 | the find_exported_dentry pointer in it's export_operations table. | 118 | fh_to_parent (optional but strongly recommended) |
118 | This field is set correctly by the exporting agent (e.g. nfsd) when a | 119 | Given a filehandle fragment, this should find the parent of the |
119 | filesystem is exported, and before any export operations are called. | 120 | implied object and create a dentry for it (possibly with d_alloc_anon). |
120 | 121 | May fail if the filehandle fragment is too small. | |
121 | find_exported_dentry needs three support functions from the | 122 | |
122 | filesystem: | 123 | get_parent (optional but strongly recommended) |
123 | get_name. When given a parent dentry and a child dentry, this | 124 | When given a dentry for a directory, this should return a dentry for |
124 | should find a name in the directory identified by the parent | 125 | the parent. Quite possibly the parent dentry will have been allocated |
125 | dentry, which leads to the object identified by the child dentry. | 126 | by d_alloc_anon. The default get_parent function just returns an error |
126 | If no get_name function is supplied, a default implementation is | 127 | so any filehandle lookup that requires finding a parent will fail. |
127 | provided which uses vfs_readdir to find potential names, and | 128 | ->lookup("..") is *not* used as a default as it can leave ".." entries |
128 | matches inode numbers to find the correct match. | 129 | in the dcache which are too messy to work with. |
129 | 130 | ||
130 | get_parent. When given a dentry for a directory, this should return | 131 | get_name (optional) |
131 | a dentry for the parent. Quite possibly the parent dentry will | 132 | When given a parent dentry and a child dentry, this should find a name |
132 | have been allocated by d_alloc_anon. | 133 | in the directory identified by the parent dentry, which leads to the |
133 | The default get_parent function just returns an error so any | 134 | object identified by the child dentry. If no get_name function is |
134 | filehandle lookup that requires finding a parent will fail. | 135 | supplied, a default implementation is provided which uses vfs_readdir |
135 | ->lookup("..") is *not* used as a default as it can leave ".." | 136 | to find potential names, and matches inode numbers to find the correct |
136 | entries in the dcache which are too messy to work with. | 137 | match. |
137 | |||
138 | get_dentry. When given an opaque datum, this should find the | ||
139 | implied object and create a dentry for it (possibly with | ||
140 | d_alloc_anon). | ||
141 | The opaque datum is whatever is passed down by the decode_fh | ||
142 | function, and is often simply a fragment of the filehandle | ||
143 | fragment. | ||
144 | decode_fh passes two datums through find_exported_dentry. One that | ||
145 | should be used to identify the target object, and one that can be | ||
146 | used to identify the object's parent, should that be necessary. | ||
147 | The default get_dentry function assumes that the datum contains an | ||
148 | inode number and a generation number, and it attempts to get the | ||
149 | inode using "iget" and check it's validity by matching the | ||
150 | generation number. A filesystem should only depend on the default | ||
151 | if iget can safely be used this way. | ||
152 | |||
153 | If decode_fh and/or encode_fh are left as NULL, then default | ||
154 | implementations are used. These defaults are suitable for ext2 and | ||
155 | extremely similar filesystems (like ext3). | ||
156 | |||
157 | The default encode_fh creates a filehandle fragment from the inode | ||
158 | number and generation number of the target together with the inode | ||
159 | number and generation number of the parent (if the parent is | ||
160 | required). | ||
161 | |||
162 | The default decode_fh extract the target and parent datums from the | ||
163 | filehandle assuming the format used by the default encode_fh and | ||
164 | passed them to find_exported_dentry. | ||
165 | 138 | ||
166 | 139 | ||
167 | A filehandle fragment consists of an array of 1 or more 4byte words, | 140 | A filehandle fragment consists of an array of 1 or more 4byte words, |
@@ -172,5 +145,3 @@ generated by encode_fh, in which case it will have been padded with | |||
172 | nuls. Rather, the encode_fh routine should choose a "type" which | 145 | nuls. Rather, the encode_fh routine should choose a "type" which |
173 | indicates the decode_fh how much of the filehandle is valid, and how | 146 | indicates the decode_fh how much of the filehandle is valid, and how |
174 | it should be interpreted. | 147 | it should be interpreted. |
175 | |||
176 | |||
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 35985b34d5a..2f75e750e4f 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt | |||
@@ -168,6 +168,8 @@ Offset Proto Name Meaning | |||
168 | 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not | 168 | 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not |
169 | 0235/3 N/A pad2 Unused | 169 | 0235/3 N/A pad2 Unused |
170 | 0238/4 2.06+ cmdline_size Maximum size of the kernel command line | 170 | 0238/4 2.06+ cmdline_size Maximum size of the kernel command line |
171 | 023C/4 2.07+ hardware_subarch Hardware subarchitecture | ||
172 | 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data | ||
171 | 173 | ||
172 | (1) For backwards compatibility, if the setup_sects field contains 0, the | 174 | (1) For backwards compatibility, if the setup_sects field contains 0, the |
173 | real value is 4. | 175 | real value is 4. |
@@ -204,7 +206,7 @@ boot loaders can ignore those fields. | |||
204 | 206 | ||
205 | The byte order of all fields is littleendian (this is x86, after all.) | 207 | The byte order of all fields is littleendian (this is x86, after all.) |
206 | 208 | ||
207 | Field name: setup_secs | 209 | Field name: setup_sects |
208 | Type: read | 210 | Type: read |
209 | Offset/size: 0x1f1/1 | 211 | Offset/size: 0x1f1/1 |
210 | Protocol: ALL | 212 | Protocol: ALL |
@@ -356,6 +358,13 @@ Protocol: 2.00+ | |||
356 | - If 0, the protected-mode code is loaded at 0x10000. | 358 | - If 0, the protected-mode code is loaded at 0x10000. |
357 | - If 1, the protected-mode code is loaded at 0x100000. | 359 | - If 1, the protected-mode code is loaded at 0x100000. |
358 | 360 | ||
361 | Bit 6 (write): KEEP_SEGMENTS | ||
362 | Protocol: 2.07+ | ||
363 | - if 0, reload the segment registers in the 32bit entry point. | ||
364 | - if 1, do not reload the segment registers in the 32bit entry point. | ||
365 | Assume that %cs %ds %ss %es are all set to flat segments with | ||
366 | a base of 0 (or the equivalent for their environment). | ||
367 | |||
359 | Bit 7 (write): CAN_USE_HEAP | 368 | Bit 7 (write): CAN_USE_HEAP |
360 | Set this bit to 1 to indicate that the value entered in the | 369 | Set this bit to 1 to indicate that the value entered in the |
361 | heap_end_ptr is valid. If this field is clear, some setup code | 370 | heap_end_ptr is valid. If this field is clear, some setup code |
@@ -480,6 +489,29 @@ Protocol: 2.06+ | |||
480 | cmdline_size characters. With protocol version 2.05 and earlier, the | 489 | cmdline_size characters. With protocol version 2.05 and earlier, the |
481 | maximum size was 255. | 490 | maximum size was 255. |
482 | 491 | ||
492 | Field name: hardware_subarch | ||
493 | Type: write | ||
494 | Offset/size: 0x23c/4 | ||
495 | Protocol: 2.07+ | ||
496 | |||
497 | In a paravirtualized environment the hardware low level architectural | ||
498 | pieces such as interrupt handling, page table handling, and | ||
499 | accessing process control registers needs to be done differently. | ||
500 | |||
501 | This field allows the bootloader to inform the kernel we are in one | ||
502 | one of those environments. | ||
503 | |||
504 | 0x00000000 The default x86/PC environment | ||
505 | 0x00000001 lguest | ||
506 | 0x00000002 Xen | ||
507 | |||
508 | Field name: hardware_subarch_data | ||
509 | Type: write | ||
510 | Offset/size: 0x240/8 | ||
511 | Protocol: 2.07+ | ||
512 | |||
513 | A pointer to data that is specific to hardware subarch | ||
514 | |||
483 | 515 | ||
484 | **** THE KERNEL COMMAND LINE | 516 | **** THE KERNEL COMMAND LINE |
485 | 517 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6accd360da7..b2361667839 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -772,6 +772,23 @@ and is between 256 and 4096 characters. It is defined in the file | |||
772 | 772 | ||
773 | inttest= [IA64] | 773 | inttest= [IA64] |
774 | 774 | ||
775 | intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option | ||
776 | off | ||
777 | Disable intel iommu driver. | ||
778 | igfx_off [Default Off] | ||
779 | By default, gfx is mapped as normal device. If a gfx | ||
780 | device has a dedicated DMAR unit, the DMAR unit is | ||
781 | bypassed by not enabling DMAR with this option. In | ||
782 | this case, gfx device will use physical address for | ||
783 | DMA. | ||
784 | forcedac [x86_64] | ||
785 | With this option iommu will not optimize to look | ||
786 | for io virtual address below 32 bit forcing dual | ||
787 | address cycle on pci bus for cards supporting greater | ||
788 | than 32 bit addressing. The default is to look | ||
789 | for translation below 32 bit and if not available | ||
790 | then look in the higher range. | ||
791 | |||
775 | io7= [HW] IO7 for Marvel based alpha systems | 792 | io7= [HW] IO7 for Marvel based alpha systems |
776 | See comment before marvel_specify_io7 in | 793 | See comment before marvel_specify_io7 in |
777 | arch/alpha/kernel/core_marvel.c. | 794 | arch/alpha/kernel/core_marvel.c. |
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 5fbcc22c98e..168117bd6ee 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt | |||
@@ -2,7 +2,8 @@ | |||
2 | Memory Hotplug | 2 | Memory Hotplug |
3 | ============== | 3 | ============== |
4 | 4 | ||
5 | Last Updated: Jul 28 2007 | 5 | Created: Jul 28 2007 |
6 | Add description of notifier of memory hotplug Oct 11 2007 | ||
6 | 7 | ||
7 | This document is about memory hotplug including how-to-use and current status. | 8 | This document is about memory hotplug including how-to-use and current status. |
8 | Because Memory Hotplug is still under development, contents of this text will | 9 | Because Memory Hotplug is still under development, contents of this text will |
@@ -24,7 +25,8 @@ be changed often. | |||
24 | 6.1 Memory offline and ZONE_MOVABLE | 25 | 6.1 Memory offline and ZONE_MOVABLE |
25 | 6.2. How to offline memory | 26 | 6.2. How to offline memory |
26 | 7. Physical memory remove | 27 | 7. Physical memory remove |
27 | 8. Future Work List | 28 | 8. Memory hotplug event notifier |
29 | 9. Future Work List | ||
28 | 30 | ||
29 | Note(1): x86_64's has special implementation for memory hotplug. | 31 | Note(1): x86_64's has special implementation for memory hotplug. |
30 | This text does not describe it. | 32 | This text does not describe it. |
@@ -307,8 +309,58 @@ Need more implementation yet.... | |||
307 | - Notification completion of remove works by OS to firmware. | 309 | - Notification completion of remove works by OS to firmware. |
308 | - Guard from remove if not yet. | 310 | - Guard from remove if not yet. |
309 | 311 | ||
312 | -------------------------------- | ||
313 | 8. Memory hotplug event notifier | ||
314 | -------------------------------- | ||
315 | Memory hotplug has event notifer. There are 6 types of notification. | ||
316 | |||
317 | MEMORY_GOING_ONLINE | ||
318 | Generated before new memory becomes available in order to be able to | ||
319 | prepare subsystems to handle memory. The page allocator is still unable | ||
320 | to allocate from the new memory. | ||
321 | |||
322 | MEMORY_CANCEL_ONLINE | ||
323 | Generated if MEMORY_GOING_ONLINE fails. | ||
324 | |||
325 | MEMORY_ONLINE | ||
326 | Generated when memory has succesfully brought online. The callback may | ||
327 | allocate pages from the new memory. | ||
328 | |||
329 | MEMORY_GOING_OFFLINE | ||
330 | Generated to begin the process of offlining memory. Allocations are no | ||
331 | longer possible from the memory but some of the memory to be offlined | ||
332 | is still in use. The callback can be used to free memory known to a | ||
333 | subsystem from the indicated memory section. | ||
334 | |||
335 | MEMORY_CANCEL_OFFLINE | ||
336 | Generated if MEMORY_GOING_OFFLINE fails. Memory is available again from | ||
337 | the section that we attempted to offline. | ||
338 | |||
339 | MEMORY_OFFLINE | ||
340 | Generated after offlining memory is complete. | ||
341 | |||
342 | A callback routine can be registered by | ||
343 | hotplug_memory_notifier(callback_func, priority) | ||
344 | |||
345 | The second argument of callback function (action) is event types of above. | ||
346 | The third argument is passed by pointer of struct memory_notify. | ||
347 | |||
348 | struct memory_notify { | ||
349 | unsigned long start_pfn; | ||
350 | unsigned long nr_pages; | ||
351 | int status_cahnge_nid; | ||
352 | } | ||
353 | |||
354 | start_pfn is start_pfn of online/offline memory. | ||
355 | nr_pages is # of pages of online/offline memory. | ||
356 | status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be) | ||
357 | set/clear. It means a new(memoryless) node gets new memory by online and a | ||
358 | node loses all memory. If this is -1, then nodemask status is not changed. | ||
359 | If status_changed_nid >= 0, callback should create/discard structures for the | ||
360 | node if necessary. | ||
361 | |||
310 | -------------- | 362 | -------------- |
311 | 8. Future Work | 363 | 9. Future Work |
312 | -------------- | 364 | -------------- |
313 | - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like | 365 | - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like |
314 | sysctl or new control file. | 366 | sysctl or new control file. |
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 8e4894b205e..3f7ea13358e 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c | |||
@@ -1090,7 +1090,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e) | |||
1090 | 1090 | ||
1091 | void | 1091 | void |
1092 | efi_initialize_iomem_resources(struct resource *code_resource, | 1092 | efi_initialize_iomem_resources(struct resource *code_resource, |
1093 | struct resource *data_resource) | 1093 | struct resource *data_resource, |
1094 | struct resource *bss_resource) | ||
1094 | { | 1095 | { |
1095 | struct resource *res; | 1096 | struct resource *res; |
1096 | void *efi_map_start, *efi_map_end, *p; | 1097 | void *efi_map_start, *efi_map_end, *p; |
@@ -1171,6 +1172,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, | |||
1171 | */ | 1172 | */ |
1172 | insert_resource(res, code_resource); | 1173 | insert_resource(res, code_resource); |
1173 | insert_resource(res, data_resource); | 1174 | insert_resource(res, data_resource); |
1175 | insert_resource(res, bss_resource); | ||
1174 | #ifdef CONFIG_KEXEC | 1176 | #ifdef CONFIG_KEXEC |
1175 | insert_resource(res, &efi_memmap_res); | 1177 | insert_resource(res, &efi_memmap_res); |
1176 | insert_resource(res, &boot_param_res); | 1178 | insert_resource(res, &boot_param_res); |
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index cbf67f1aa29..ae6c3c02e11 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
@@ -90,7 +90,12 @@ static struct resource code_resource = { | |||
90 | .name = "Kernel code", | 90 | .name = "Kernel code", |
91 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 91 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
92 | }; | 92 | }; |
93 | extern char _text[], _end[], _etext[]; | 93 | |
94 | static struct resource bss_resource = { | ||
95 | .name = "Kernel bss", | ||
96 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
97 | }; | ||
98 | extern char _text[], _end[], _etext[], _edata[], _bss[]; | ||
94 | 99 | ||
95 | unsigned long ia64_max_cacheline_size; | 100 | unsigned long ia64_max_cacheline_size; |
96 | 101 | ||
@@ -200,8 +205,11 @@ static int __init register_memory(void) | |||
200 | code_resource.start = ia64_tpa(_text); | 205 | code_resource.start = ia64_tpa(_text); |
201 | code_resource.end = ia64_tpa(_etext) - 1; | 206 | code_resource.end = ia64_tpa(_etext) - 1; |
202 | data_resource.start = ia64_tpa(_etext); | 207 | data_resource.start = ia64_tpa(_etext); |
203 | data_resource.end = ia64_tpa(_end) - 1; | 208 | data_resource.end = ia64_tpa(_edata) - 1; |
204 | efi_initialize_iomem_resources(&code_resource, &data_resource); | 209 | bss_resource.start = ia64_tpa(_bss); |
210 | bss_resource.end = ia64_tpa(_end) - 1; | ||
211 | efi_initialize_iomem_resources(&code_resource, &data_resource, | ||
212 | &bss_resource); | ||
205 | 213 | ||
206 | return 0; | 214 | return 0; |
207 | } | 215 | } |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f35ea223752..a0ae2e7f6ce 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -27,13 +27,22 @@ | |||
27 | #include <asm/segment.h> | 27 | #include <asm/segment.h> |
28 | #include <asm/page.h> | 28 | #include <asm/page.h> |
29 | #include <asm/boot.h> | 29 | #include <asm/boot.h> |
30 | #include <asm/asm-offsets.h> | ||
30 | 31 | ||
31 | .section ".text.head","ax",@progbits | 32 | .section ".text.head","ax",@progbits |
32 | .globl startup_32 | 33 | .globl startup_32 |
33 | 34 | ||
34 | startup_32: | 35 | startup_32: |
35 | cld | 36 | /* check to see if KEEP_SEGMENTS flag is meaningful */ |
36 | cli | 37 | cmpw $0x207, BP_version(%esi) |
38 | jb 1f | ||
39 | |||
40 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | ||
41 | * us to not reload segments */ | ||
42 | testb $(1<<6), BP_loadflags(%esi) | ||
43 | jnz 2f | ||
44 | |||
45 | 1: cli | ||
37 | movl $(__BOOT_DS),%eax | 46 | movl $(__BOOT_DS),%eax |
38 | movl %eax,%ds | 47 | movl %eax,%ds |
39 | movl %eax,%es | 48 | movl %eax,%es |
@@ -41,6 +50,8 @@ startup_32: | |||
41 | movl %eax,%gs | 50 | movl %eax,%gs |
42 | movl %eax,%ss | 51 | movl %eax,%ss |
43 | 52 | ||
53 | 2: cld | ||
54 | |||
44 | /* Calculate the delta between where we were compiled to run | 55 | /* Calculate the delta between where we were compiled to run |
45 | * at and where we were actually loaded at. This can only be done | 56 | * at and where we were actually loaded at. This can only be done |
46 | * with a short local call on x86. Nothing else will tell us what | 57 | * with a short local call on x86. Nothing else will tell us what |
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c index 1dc1e19c0a9..b74d60d1b2f 100644 --- a/arch/x86/boot/compressed/misc_32.c +++ b/arch/x86/boot/compressed/misc_32.c | |||
@@ -247,6 +247,9 @@ static void putstr(const char *s) | |||
247 | int x,y,pos; | 247 | int x,y,pos; |
248 | char c; | 248 | char c; |
249 | 249 | ||
250 | if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) | ||
251 | return; | ||
252 | |||
250 | x = RM_SCREEN_INFO.orig_x; | 253 | x = RM_SCREEN_INFO.orig_x; |
251 | y = RM_SCREEN_INFO.orig_y; | 254 | y = RM_SCREEN_INFO.orig_y; |
252 | 255 | ||
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f3140e596d4..8353c81c41c 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -119,7 +119,7 @@ _start: | |||
119 | # Part 2 of the header, from the old setup.S | 119 | # Part 2 of the header, from the old setup.S |
120 | 120 | ||
121 | .ascii "HdrS" # header signature | 121 | .ascii "HdrS" # header signature |
122 | .word 0x0206 # header version number (>= 0x0105) | 122 | .word 0x0207 # header version number (>= 0x0105) |
123 | # or else old loadlin-1.5 will fail) | 123 | # or else old loadlin-1.5 will fail) |
124 | .globl realmode_swtch | 124 | .globl realmode_swtch |
125 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 125 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
@@ -214,6 +214,11 @@ cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | |||
214 | #added with boot protocol | 214 | #added with boot protocol |
215 | #version 2.06 | 215 | #version 2.06 |
216 | 216 | ||
217 | hardware_subarch: .long 0 # subarchitecture, added with 2.07 | ||
218 | # default to 0 for normal x86 PC | ||
219 | |||
220 | hardware_subarch_data: .quad 0 | ||
221 | |||
217 | # End of setup header ##################################################### | 222 | # End of setup header ##################################################### |
218 | 223 | ||
219 | .section ".inittext", "ax" | 224 | .section ".inittext", "ax" |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index f1b7cdda82b..f8764716b0c 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/fixmap.h> | 15 | #include <asm/fixmap.h> |
16 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
17 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
18 | #include <asm/bootparam.h> | ||
18 | #include <asm/elf.h> | 19 | #include <asm/elf.h> |
19 | 20 | ||
20 | #include <xen/interface/xen.h> | 21 | #include <xen/interface/xen.h> |
@@ -146,4 +147,10 @@ void foo(void) | |||
146 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); | 147 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); |
147 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); | 148 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); |
148 | #endif | 149 | #endif |
150 | |||
151 | BLANK(); | ||
152 | OFFSET(BP_scratch, boot_params, scratch); | ||
153 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | ||
154 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | ||
155 | OFFSET(BP_version, boot_params, hdr.version); | ||
149 | } | 156 | } |
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 58fd54eb557..18f500d185a 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c | |||
@@ -51,6 +51,13 @@ struct resource code_resource = { | |||
51 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 51 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
52 | }; | 52 | }; |
53 | 53 | ||
54 | struct resource bss_resource = { | ||
55 | .name = "Kernel bss", | ||
56 | .start = 0, | ||
57 | .end = 0, | ||
58 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
59 | }; | ||
60 | |||
54 | static struct resource system_rom_resource = { | 61 | static struct resource system_rom_resource = { |
55 | .name = "System ROM", | 62 | .name = "System ROM", |
56 | .start = 0xf0000, | 63 | .start = 0xf0000, |
@@ -254,7 +261,9 @@ static void __init probe_roms(void) | |||
254 | * and also for regions reported as reserved by the e820. | 261 | * and also for regions reported as reserved by the e820. |
255 | */ | 262 | */ |
256 | static void __init | 263 | static void __init |
257 | legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) | 264 | legacy_init_iomem_resources(struct resource *code_resource, |
265 | struct resource *data_resource, | ||
266 | struct resource *bss_resource) | ||
258 | { | 267 | { |
259 | int i; | 268 | int i; |
260 | 269 | ||
@@ -287,6 +296,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat | |||
287 | */ | 296 | */ |
288 | request_resource(res, code_resource); | 297 | request_resource(res, code_resource); |
289 | request_resource(res, data_resource); | 298 | request_resource(res, data_resource); |
299 | request_resource(res, bss_resource); | ||
290 | #ifdef CONFIG_KEXEC | 300 | #ifdef CONFIG_KEXEC |
291 | if (crashk_res.start != crashk_res.end) | 301 | if (crashk_res.start != crashk_res.end) |
292 | request_resource(res, &crashk_res); | 302 | request_resource(res, &crashk_res); |
@@ -307,9 +317,11 @@ static int __init request_standard_resources(void) | |||
307 | 317 | ||
308 | printk("Setting up standard PCI resources\n"); | 318 | printk("Setting up standard PCI resources\n"); |
309 | if (efi_enabled) | 319 | if (efi_enabled) |
310 | efi_initialize_iomem_resources(&code_resource, &data_resource); | 320 | efi_initialize_iomem_resources(&code_resource, |
321 | &data_resource, &bss_resource); | ||
311 | else | 322 | else |
312 | legacy_init_iomem_resources(&code_resource, &data_resource); | 323 | legacy_init_iomem_resources(&code_resource, |
324 | &data_resource, &bss_resource); | ||
313 | 325 | ||
314 | /* EFI systems may still have VGA */ | 326 | /* EFI systems may still have VGA */ |
315 | request_resource(&iomem_resource, &video_ram_resource); | 327 | request_resource(&iomem_resource, &video_ram_resource); |
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 57616865d8a..04698e0b056 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
@@ -47,7 +47,7 @@ unsigned long end_pfn_map; | |||
47 | */ | 47 | */ |
48 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | 48 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; |
49 | 49 | ||
50 | extern struct resource code_resource, data_resource; | 50 | extern struct resource code_resource, data_resource, bss_resource; |
51 | 51 | ||
52 | /* Check for some hardcoded bad areas that early boot is not allowed to touch */ | 52 | /* Check for some hardcoded bad areas that early boot is not allowed to touch */ |
53 | static inline int bad_addr(unsigned long *addrp, unsigned long size) | 53 | static inline int bad_addr(unsigned long *addrp, unsigned long size) |
@@ -225,6 +225,7 @@ void __init e820_reserve_resources(void) | |||
225 | */ | 225 | */ |
226 | request_resource(res, &code_resource); | 226 | request_resource(res, &code_resource); |
227 | request_resource(res, &data_resource); | 227 | request_resource(res, &data_resource); |
228 | request_resource(res, &bss_resource); | ||
228 | #ifdef CONFIG_KEXEC | 229 | #ifdef CONFIG_KEXEC |
229 | if (crashk_res.start != crashk_res.end) | 230 | if (crashk_res.start != crashk_res.end) |
230 | request_resource(res, &crashk_res); | 231 | request_resource(res, &crashk_res); |
@@ -729,3 +730,22 @@ __init void e820_setup_gap(void) | |||
729 | printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | 730 | printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", |
730 | pci_mem_start, gapstart, gapsize); | 731 | pci_mem_start, gapstart, gapsize); |
731 | } | 732 | } |
733 | |||
734 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | ||
735 | { | ||
736 | int i; | ||
737 | |||
738 | if (slot < 0 || slot >= e820.nr_map) | ||
739 | return -1; | ||
740 | for (i = slot; i < e820.nr_map; i++) { | ||
741 | if (e820.map[i].type != E820_RAM) | ||
742 | continue; | ||
743 | break; | ||
744 | } | ||
745 | if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) | ||
746 | return -1; | ||
747 | *addr = e820.map[i].addr; | ||
748 | *size = min_t(u64, e820.map[i].size + e820.map[i].addr, | ||
749 | max_pfn << PAGE_SHIFT) - *addr; | ||
750 | return i + 1; | ||
751 | } | ||
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index b42558c48e9..e2be78f4939 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
@@ -603,7 +603,8 @@ void __init efi_enter_virtual_mode(void) | |||
603 | 603 | ||
604 | void __init | 604 | void __init |
605 | efi_initialize_iomem_resources(struct resource *code_resource, | 605 | efi_initialize_iomem_resources(struct resource *code_resource, |
606 | struct resource *data_resource) | 606 | struct resource *data_resource, |
607 | struct resource *bss_resource) | ||
607 | { | 608 | { |
608 | struct resource *res; | 609 | struct resource *res; |
609 | efi_memory_desc_t *md; | 610 | efi_memory_desc_t *md; |
@@ -675,6 +676,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, | |||
675 | if (md->type == EFI_CONVENTIONAL_MEMORY) { | 676 | if (md->type == EFI_CONVENTIONAL_MEMORY) { |
676 | request_resource(res, code_resource); | 677 | request_resource(res, code_resource); |
677 | request_resource(res, data_resource); | 678 | request_resource(res, data_resource); |
679 | request_resource(res, bss_resource); | ||
678 | #ifdef CONFIG_KEXEC | 680 | #ifdef CONFIG_KEXEC |
679 | request_resource(res, &crashk_res); | 681 | request_resource(res, &crashk_res); |
680 | #endif | 682 | #endif |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 39677965e16..00b1c2c5645 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -79,22 +79,30 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ | |||
79 | */ | 79 | */ |
80 | .section .text.head,"ax",@progbits | 80 | .section .text.head,"ax",@progbits |
81 | ENTRY(startup_32) | 81 | ENTRY(startup_32) |
82 | /* check to see if KEEP_SEGMENTS flag is meaningful */ | ||
83 | cmpw $0x207, BP_version(%esi) | ||
84 | jb 1f | ||
85 | |||
86 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | ||
87 | us to not reload segments */ | ||
88 | testb $(1<<6), BP_loadflags(%esi) | ||
89 | jnz 2f | ||
82 | 90 | ||
83 | /* | 91 | /* |
84 | * Set segments to known values. | 92 | * Set segments to known values. |
85 | */ | 93 | */ |
86 | cld | 94 | 1: lgdt boot_gdt_descr - __PAGE_OFFSET |
87 | lgdt boot_gdt_descr - __PAGE_OFFSET | ||
88 | movl $(__BOOT_DS),%eax | 95 | movl $(__BOOT_DS),%eax |
89 | movl %eax,%ds | 96 | movl %eax,%ds |
90 | movl %eax,%es | 97 | movl %eax,%es |
91 | movl %eax,%fs | 98 | movl %eax,%fs |
92 | movl %eax,%gs | 99 | movl %eax,%gs |
100 | 2: | ||
93 | 101 | ||
94 | /* | 102 | /* |
95 | * Clear BSS first so that there are no surprises... | 103 | * Clear BSS first so that there are no surprises... |
96 | * No need to cld as DF is already clear from cld above... | ||
97 | */ | 104 | */ |
105 | cld | ||
98 | xorl %eax,%eax | 106 | xorl %eax,%eax |
99 | movl $__bss_start - __PAGE_OFFSET,%edi | 107 | movl $__bss_start - __PAGE_OFFSET,%edi |
100 | movl $__bss_stop - __PAGE_OFFSET,%ecx | 108 | movl $__bss_stop - __PAGE_OFFSET,%ecx |
@@ -128,6 +136,35 @@ ENTRY(startup_32) | |||
128 | movsl | 136 | movsl |
129 | 1: | 137 | 1: |
130 | 138 | ||
139 | #ifdef CONFIG_PARAVIRT | ||
140 | cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) | ||
141 | jb default_entry | ||
142 | |||
143 | /* Paravirt-compatible boot parameters. Look to see what architecture | ||
144 | we're booting under. */ | ||
145 | movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax | ||
146 | cmpl $num_subarch_entries, %eax | ||
147 | jae bad_subarch | ||
148 | |||
149 | movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax | ||
150 | subl $__PAGE_OFFSET, %eax | ||
151 | jmp *%eax | ||
152 | |||
153 | bad_subarch: | ||
154 | WEAK(lguest_entry) | ||
155 | WEAK(xen_entry) | ||
156 | /* Unknown implementation; there's really | ||
157 | nothing we can do at this point. */ | ||
158 | ud2a | ||
159 | .data | ||
160 | subarch_entries: | ||
161 | .long default_entry /* normal x86/PC */ | ||
162 | .long lguest_entry /* lguest hypervisor */ | ||
163 | .long xen_entry /* Xen hypervisor */ | ||
164 | num_subarch_entries = (. - subarch_entries) / 4 | ||
165 | .previous | ||
166 | #endif /* CONFIG_PARAVIRT */ | ||
167 | |||
131 | /* | 168 | /* |
132 | * Initialize page tables. This creates a PDE and a set of page | 169 | * Initialize page tables. This creates a PDE and a set of page |
133 | * tables, which are located immediately beyond _end. The variable | 170 | * tables, which are located immediately beyond _end. The variable |
@@ -140,6 +177,7 @@ ENTRY(startup_32) | |||
140 | */ | 177 | */ |
141 | page_pde_offset = (__PAGE_OFFSET >> 20); | 178 | page_pde_offset = (__PAGE_OFFSET >> 20); |
142 | 179 | ||
180 | default_entry: | ||
143 | movl $(pg0 - __PAGE_OFFSET), %edi | 181 | movl $(pg0 - __PAGE_OFFSET), %edi |
144 | movl $(swapper_pg_dir - __PAGE_OFFSET), %edx | 182 | movl $(swapper_pg_dir - __PAGE_OFFSET), %edx |
145 | movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ | 183 | movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b3c2d268d70..953328b55a3 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/sysdev.h> | 31 | #include <linux/sysdev.h> |
32 | #include <linux/msi.h> | 32 | #include <linux/msi.h> |
33 | #include <linux/htirq.h> | 33 | #include <linux/htirq.h> |
34 | #include <linux/dmar.h> | ||
34 | #ifdef CONFIG_ACPI | 35 | #ifdef CONFIG_ACPI |
35 | #include <acpi/acpi_bus.h> | 36 | #include <acpi/acpi_bus.h> |
36 | #endif | 37 | #endif |
@@ -2031,8 +2032,64 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
2031 | destroy_irq(irq); | 2032 | destroy_irq(irq); |
2032 | } | 2033 | } |
2033 | 2034 | ||
2034 | #endif /* CONFIG_PCI_MSI */ | 2035 | #ifdef CONFIG_DMAR |
2036 | #ifdef CONFIG_SMP | ||
2037 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | ||
2038 | { | ||
2039 | struct irq_cfg *cfg = irq_cfg + irq; | ||
2040 | struct msi_msg msg; | ||
2041 | unsigned int dest; | ||
2042 | cpumask_t tmp; | ||
2043 | |||
2044 | cpus_and(tmp, mask, cpu_online_map); | ||
2045 | if (cpus_empty(tmp)) | ||
2046 | return; | ||
2047 | |||
2048 | if (assign_irq_vector(irq, mask)) | ||
2049 | return; | ||
2050 | |||
2051 | cpus_and(tmp, cfg->domain, mask); | ||
2052 | dest = cpu_mask_to_apicid(tmp); | ||
2053 | |||
2054 | dmar_msi_read(irq, &msg); | ||
2055 | |||
2056 | msg.data &= ~MSI_DATA_VECTOR_MASK; | ||
2057 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | ||
2058 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | ||
2059 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | ||
2060 | |||
2061 | dmar_msi_write(irq, &msg); | ||
2062 | irq_desc[irq].affinity = mask; | ||
2063 | } | ||
2064 | #endif /* CONFIG_SMP */ | ||
2065 | |||
2066 | struct irq_chip dmar_msi_type = { | ||
2067 | .name = "DMAR_MSI", | ||
2068 | .unmask = dmar_msi_unmask, | ||
2069 | .mask = dmar_msi_mask, | ||
2070 | .ack = ack_apic_edge, | ||
2071 | #ifdef CONFIG_SMP | ||
2072 | .set_affinity = dmar_msi_set_affinity, | ||
2073 | #endif | ||
2074 | .retrigger = ioapic_retrigger_irq, | ||
2075 | }; | ||
2076 | |||
2077 | int arch_setup_dmar_msi(unsigned int irq) | ||
2078 | { | ||
2079 | int ret; | ||
2080 | struct msi_msg msg; | ||
2081 | |||
2082 | ret = msi_compose_msg(NULL, irq, &msg); | ||
2083 | if (ret < 0) | ||
2084 | return ret; | ||
2085 | dmar_msi_write(irq, &msg); | ||
2086 | set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, | ||
2087 | "edge"); | ||
2088 | return 0; | ||
2089 | } | ||
2090 | #endif | ||
2035 | 2091 | ||
2092 | #endif /* CONFIG_PCI_MSI */ | ||
2036 | /* | 2093 | /* |
2037 | * Hypertransport interrupt support | 2094 | * Hypertransport interrupt support |
2038 | */ | 2095 | */ |
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index afaf9f12c03..393e2725a6e 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/dmar.h> | ||
10 | #include <asm/io.h> | 11 | #include <asm/io.h> |
11 | #include <asm/iommu.h> | 12 | #include <asm/iommu.h> |
12 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
@@ -305,6 +306,8 @@ void __init pci_iommu_alloc(void) | |||
305 | detect_calgary(); | 306 | detect_calgary(); |
306 | #endif | 307 | #endif |
307 | 308 | ||
309 | detect_intel_iommu(); | ||
310 | |||
308 | #ifdef CONFIG_SWIOTLB | 311 | #ifdef CONFIG_SWIOTLB |
309 | pci_swiotlb_init(); | 312 | pci_swiotlb_init(); |
310 | #endif | 313 | #endif |
@@ -316,6 +319,8 @@ static int __init pci_iommu_init(void) | |||
316 | calgary_iommu_init(); | 319 | calgary_iommu_init(); |
317 | #endif | 320 | #endif |
318 | 321 | ||
322 | intel_iommu_init(); | ||
323 | |||
319 | #ifdef CONFIG_IOMMU | 324 | #ifdef CONFIG_IOMMU |
320 | gart_iommu_init(); | 325 | gart_iommu_init(); |
321 | #endif | 326 | #endif |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index ba2e165a8a0..cc0e91447b7 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <asm/vmi.h> | 60 | #include <asm/vmi.h> |
61 | #include <setup_arch.h> | 61 | #include <setup_arch.h> |
62 | #include <bios_ebda.h> | 62 | #include <bios_ebda.h> |
63 | #include <asm/cacheflush.h> | ||
63 | 64 | ||
64 | /* This value is set up by the early boot code to point to the value | 65 | /* This value is set up by the early boot code to point to the value |
65 | immediately after the boot time page tables. It contains a *physical* | 66 | immediately after the boot time page tables. It contains a *physical* |
@@ -73,6 +74,7 @@ int disable_pse __devinitdata = 0; | |||
73 | */ | 74 | */ |
74 | extern struct resource code_resource; | 75 | extern struct resource code_resource; |
75 | extern struct resource data_resource; | 76 | extern struct resource data_resource; |
77 | extern struct resource bss_resource; | ||
76 | 78 | ||
77 | /* cpu data as detected by the assembly code in head.S */ | 79 | /* cpu data as detected by the assembly code in head.S */ |
78 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | 80 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
@@ -600,6 +602,8 @@ void __init setup_arch(char **cmdline_p) | |||
600 | code_resource.end = virt_to_phys(_etext)-1; | 602 | code_resource.end = virt_to_phys(_etext)-1; |
601 | data_resource.start = virt_to_phys(_etext); | 603 | data_resource.start = virt_to_phys(_etext); |
602 | data_resource.end = virt_to_phys(_edata)-1; | 604 | data_resource.end = virt_to_phys(_edata)-1; |
605 | bss_resource.start = virt_to_phys(&__bss_start); | ||
606 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | ||
603 | 607 | ||
604 | parse_early_param(); | 608 | parse_early_param(); |
605 | 609 | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 31322d42eaa..e7a9e36bd52 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/numa.h> | 58 | #include <asm/numa.h> |
59 | #include <asm/sections.h> | 59 | #include <asm/sections.h> |
60 | #include <asm/dmi.h> | 60 | #include <asm/dmi.h> |
61 | #include <asm/cacheflush.h> | ||
61 | 62 | ||
62 | /* | 63 | /* |
63 | * Machine setup.. | 64 | * Machine setup.. |
@@ -133,6 +134,12 @@ struct resource code_resource = { | |||
133 | .end = 0, | 134 | .end = 0, |
134 | .flags = IORESOURCE_RAM, | 135 | .flags = IORESOURCE_RAM, |
135 | }; | 136 | }; |
137 | struct resource bss_resource = { | ||
138 | .name = "Kernel bss", | ||
139 | .start = 0, | ||
140 | .end = 0, | ||
141 | .flags = IORESOURCE_RAM, | ||
142 | }; | ||
136 | 143 | ||
137 | #ifdef CONFIG_PROC_VMCORE | 144 | #ifdef CONFIG_PROC_VMCORE |
138 | /* elfcorehdr= specifies the location of elf core header | 145 | /* elfcorehdr= specifies the location of elf core header |
@@ -276,6 +283,8 @@ void __init setup_arch(char **cmdline_p) | |||
276 | code_resource.end = virt_to_phys(&_etext)-1; | 283 | code_resource.end = virt_to_phys(&_etext)-1; |
277 | data_resource.start = virt_to_phys(&_etext); | 284 | data_resource.start = virt_to_phys(&_etext); |
278 | data_resource.end = virt_to_phys(&_edata)-1; | 285 | data_resource.end = virt_to_phys(&_edata)-1; |
286 | bss_resource.start = virt_to_phys(&__bss_start); | ||
287 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | ||
279 | 288 | ||
280 | early_identify_cpu(&boot_cpu_data); | 289 | early_identify_cpu(&boot_cpu_data); |
281 | 290 | ||
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c index c7b7dfe1d40..c40afbaaf93 100644 --- a/arch/x86/mm/pageattr_64.c +++ b/arch/x86/mm/pageattr_64.c | |||
@@ -61,10 +61,10 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
61 | return base; | 61 | return base; |
62 | } | 62 | } |
63 | 63 | ||
64 | static void cache_flush_page(void *adr) | 64 | void clflush_cache_range(void *adr, int size) |
65 | { | 65 | { |
66 | int i; | 66 | int i; |
67 | for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | 67 | for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) |
68 | clflush(adr+i); | 68 | clflush(adr+i); |
69 | } | 69 | } |
70 | 70 | ||
@@ -80,7 +80,7 @@ static void flush_kernel_map(void *arg) | |||
80 | asm volatile("wbinvd" ::: "memory"); | 80 | asm volatile("wbinvd" ::: "memory"); |
81 | else list_for_each_entry(pg, l, lru) { | 81 | else list_for_each_entry(pg, l, lru) { |
82 | void *adr = page_address(pg); | 82 | void *adr = page_address(pg); |
83 | cache_flush_page(adr); | 83 | clflush_cache_range(adr, PAGE_SIZE); |
84 | } | 84 | } |
85 | __flush_tlb_all(); | 85 | __flush_tlb_all(); |
86 | } | 86 | } |
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index aab25f3ba3c..c2d24991bb2 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -750,6 +750,38 @@ config PCI_DOMAINS | |||
750 | depends on PCI | 750 | depends on PCI |
751 | default y | 751 | default y |
752 | 752 | ||
753 | config DMAR | ||
754 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | ||
755 | depends on PCI_MSI && ACPI && EXPERIMENTAL | ||
756 | default y | ||
757 | help | ||
758 | DMA remapping (DMAR) devices support enables independent address | ||
759 | translations for Direct Memory Access (DMA) from devices. | ||
760 | These DMA remapping devices are reported via ACPI tables | ||
761 | and include PCI device scope covered by these DMA | ||
762 | remapping devices. | ||
763 | |||
764 | config DMAR_GFX_WA | ||
765 | bool "Support for Graphics workaround" | ||
766 | depends on DMAR | ||
767 | default y | ||
768 | help | ||
769 | Current Graphics drivers tend to use physical address | ||
770 | for DMA and avoid using DMA APIs. Setting this config | ||
771 | option permits the IOMMU driver to set a unity map for | ||
772 | all the OS-visible memory. Hence the driver can continue | ||
773 | to use physical addresses for DMA. | ||
774 | |||
775 | config DMAR_FLOPPY_WA | ||
776 | bool | ||
777 | depends on DMAR | ||
778 | default y | ||
779 | help | ||
780 | Floppy disk drivers are know to bypass DMA API calls | ||
781 | thereby failing to work when IOMMU is enabled. This | ||
782 | workaround will setup a 1:1 mapping for the first | ||
783 | 16M to make floppy (an ISA device) work. | ||
784 | |||
753 | source "drivers/pci/pcie/Kconfig" | 785 | source "drivers/pci/pcie/Kconfig" |
754 | 786 | ||
755 | source "drivers/pci/Kconfig" | 787 | source "drivers/pci/Kconfig" |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c41d0728efe..7868707c7ed 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf) | |||
137 | return len; | 137 | return len; |
138 | } | 138 | } |
139 | 139 | ||
140 | static inline int memory_notify(unsigned long val, void *v) | 140 | int memory_notify(unsigned long val, void *v) |
141 | { | 141 | { |
142 | return blocking_notifier_call_chain(&memory_chain, val, v); | 142 | return blocking_notifier_call_chain(&memory_chain, val, v); |
143 | } | 143 | } |
@@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) | |||
183 | break; | 183 | break; |
184 | case MEM_OFFLINE: | 184 | case MEM_OFFLINE: |
185 | mem->state = MEM_GOING_OFFLINE; | 185 | mem->state = MEM_GOING_OFFLINE; |
186 | memory_notify(MEM_GOING_OFFLINE, NULL); | ||
187 | start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; | 186 | start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; |
188 | ret = remove_memory(start_paddr, | 187 | ret = remove_memory(start_paddr, |
189 | PAGES_PER_SECTION << PAGE_SHIFT); | 188 | PAGES_PER_SECTION << PAGE_SHIFT); |
@@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) | |||
191 | mem->state = old_state; | 190 | mem->state = old_state; |
192 | break; | 191 | break; |
193 | } | 192 | } |
194 | memory_notify(MEM_MAPPING_INVALID, NULL); | ||
195 | break; | 193 | break; |
196 | default: | 194 | default: |
197 | printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", | 195 | printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", |
@@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) | |||
199 | WARN_ON(1); | 197 | WARN_ON(1); |
200 | ret = -EINVAL; | 198 | ret = -EINVAL; |
201 | } | 199 | } |
202 | /* | ||
203 | * For now, only notify on successful memory operations | ||
204 | */ | ||
205 | if (!ret) | ||
206 | memory_notify(action, NULL); | ||
207 | 200 | ||
208 | return ret; | 201 | return ret; |
209 | } | 202 | } |
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 006054a4099..55505565073 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile | |||
@@ -20,6 +20,9 @@ obj-$(CONFIG_PCI_MSI) += msi.o | |||
20 | # Build the Hypertransport interrupt support | 20 | # Build the Hypertransport interrupt support |
21 | obj-$(CONFIG_HT_IRQ) += htirq.o | 21 | obj-$(CONFIG_HT_IRQ) += htirq.o |
22 | 22 | ||
23 | # Build Intel IOMMU support | ||
24 | obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o | ||
25 | |||
23 | # | 26 | # |
24 | # Some architectures use the generic PCI setup functions | 27 | # Some architectures use the generic PCI setup functions |
25 | # | 28 | # |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c new file mode 100644 index 00000000000..5dfdfdac92e --- /dev/null +++ b/drivers/pci/dmar.c | |||
@@ -0,0 +1,329 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
19 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
20 | * | ||
21 | * This file implements early detection/parsing of DMA Remapping Devices | ||
22 | * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI | ||
23 | * tables. | ||
24 | */ | ||
25 | |||
26 | #include <linux/pci.h> | ||
27 | #include <linux/dmar.h> | ||
28 | |||
29 | #undef PREFIX | ||
30 | #define PREFIX "DMAR:" | ||
31 | |||
32 | /* No locks are needed as DMA remapping hardware unit | ||
33 | * list is constructed at boot time and hotplug of | ||
34 | * these units are not supported by the architecture. | ||
35 | */ | ||
36 | LIST_HEAD(dmar_drhd_units); | ||
37 | LIST_HEAD(dmar_rmrr_units); | ||
38 | |||
39 | static struct acpi_table_header * __initdata dmar_tbl; | ||
40 | |||
41 | static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) | ||
42 | { | ||
43 | /* | ||
44 | * add INCLUDE_ALL at the tail, so scan the list will find it at | ||
45 | * the very end. | ||
46 | */ | ||
47 | if (drhd->include_all) | ||
48 | list_add_tail(&drhd->list, &dmar_drhd_units); | ||
49 | else | ||
50 | list_add(&drhd->list, &dmar_drhd_units); | ||
51 | } | ||
52 | |||
53 | static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) | ||
54 | { | ||
55 | list_add(&rmrr->list, &dmar_rmrr_units); | ||
56 | } | ||
57 | |||
58 | static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, | ||
59 | struct pci_dev **dev, u16 segment) | ||
60 | { | ||
61 | struct pci_bus *bus; | ||
62 | struct pci_dev *pdev = NULL; | ||
63 | struct acpi_dmar_pci_path *path; | ||
64 | int count; | ||
65 | |||
66 | bus = pci_find_bus(segment, scope->bus); | ||
67 | path = (struct acpi_dmar_pci_path *)(scope + 1); | ||
68 | count = (scope->length - sizeof(struct acpi_dmar_device_scope)) | ||
69 | / sizeof(struct acpi_dmar_pci_path); | ||
70 | |||
71 | while (count) { | ||
72 | if (pdev) | ||
73 | pci_dev_put(pdev); | ||
74 | /* | ||
75 | * Some BIOSes list non-exist devices in DMAR table, just | ||
76 | * ignore it | ||
77 | */ | ||
78 | if (!bus) { | ||
79 | printk(KERN_WARNING | ||
80 | PREFIX "Device scope bus [%d] not found\n", | ||
81 | scope->bus); | ||
82 | break; | ||
83 | } | ||
84 | pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); | ||
85 | if (!pdev) { | ||
86 | printk(KERN_WARNING PREFIX | ||
87 | "Device scope device [%04x:%02x:%02x.%02x] not found\n", | ||
88 | segment, bus->number, path->dev, path->fn); | ||
89 | break; | ||
90 | } | ||
91 | path ++; | ||
92 | count --; | ||
93 | bus = pdev->subordinate; | ||
94 | } | ||
95 | if (!pdev) { | ||
96 | printk(KERN_WARNING PREFIX | ||
97 | "Device scope device [%04x:%02x:%02x.%02x] not found\n", | ||
98 | segment, scope->bus, path->dev, path->fn); | ||
99 | *dev = NULL; | ||
100 | return 0; | ||
101 | } | ||
102 | if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ | ||
103 | pdev->subordinate) || (scope->entry_type == \ | ||
104 | ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { | ||
105 | pci_dev_put(pdev); | ||
106 | printk(KERN_WARNING PREFIX | ||
107 | "Device scope type does not match for %s\n", | ||
108 | pci_name(pdev)); | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | *dev = pdev; | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, | ||
116 | struct pci_dev ***devices, u16 segment) | ||
117 | { | ||
118 | struct acpi_dmar_device_scope *scope; | ||
119 | void * tmp = start; | ||
120 | int index; | ||
121 | int ret; | ||
122 | |||
123 | *cnt = 0; | ||
124 | while (start < end) { | ||
125 | scope = start; | ||
126 | if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || | ||
127 | scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) | ||
128 | (*cnt)++; | ||
129 | else | ||
130 | printk(KERN_WARNING PREFIX | ||
131 | "Unsupported device scope\n"); | ||
132 | start += scope->length; | ||
133 | } | ||
134 | if (*cnt == 0) | ||
135 | return 0; | ||
136 | |||
137 | *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); | ||
138 | if (!*devices) | ||
139 | return -ENOMEM; | ||
140 | |||
141 | start = tmp; | ||
142 | index = 0; | ||
143 | while (start < end) { | ||
144 | scope = start; | ||
145 | if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || | ||
146 | scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { | ||
147 | ret = dmar_parse_one_dev_scope(scope, | ||
148 | &(*devices)[index], segment); | ||
149 | if (ret) { | ||
150 | kfree(*devices); | ||
151 | return ret; | ||
152 | } | ||
153 | index ++; | ||
154 | } | ||
155 | start += scope->length; | ||
156 | } | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition | ||
163 | * structure which uniquely represent one DMA remapping hardware unit | ||
164 | * present in the platform | ||
165 | */ | ||
166 | static int __init | ||
167 | dmar_parse_one_drhd(struct acpi_dmar_header *header) | ||
168 | { | ||
169 | struct acpi_dmar_hardware_unit *drhd; | ||
170 | struct dmar_drhd_unit *dmaru; | ||
171 | int ret = 0; | ||
172 | static int include_all; | ||
173 | |||
174 | dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); | ||
175 | if (!dmaru) | ||
176 | return -ENOMEM; | ||
177 | |||
178 | drhd = (struct acpi_dmar_hardware_unit *)header; | ||
179 | dmaru->reg_base_addr = drhd->address; | ||
180 | dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ | ||
181 | |||
182 | if (!dmaru->include_all) | ||
183 | ret = dmar_parse_dev_scope((void *)(drhd + 1), | ||
184 | ((void *)drhd) + header->length, | ||
185 | &dmaru->devices_cnt, &dmaru->devices, | ||
186 | drhd->segment); | ||
187 | else { | ||
188 | /* Only allow one INCLUDE_ALL */ | ||
189 | if (include_all) { | ||
190 | printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " | ||
191 | "device scope is allowed\n"); | ||
192 | ret = -EINVAL; | ||
193 | } | ||
194 | include_all = 1; | ||
195 | } | ||
196 | |||
197 | if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) | ||
198 | kfree(dmaru); | ||
199 | else | ||
200 | dmar_register_drhd_unit(dmaru); | ||
201 | return ret; | ||
202 | } | ||
203 | |||
204 | static int __init | ||
205 | dmar_parse_one_rmrr(struct acpi_dmar_header *header) | ||
206 | { | ||
207 | struct acpi_dmar_reserved_memory *rmrr; | ||
208 | struct dmar_rmrr_unit *rmrru; | ||
209 | int ret = 0; | ||
210 | |||
211 | rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); | ||
212 | if (!rmrru) | ||
213 | return -ENOMEM; | ||
214 | |||
215 | rmrr = (struct acpi_dmar_reserved_memory *)header; | ||
216 | rmrru->base_address = rmrr->base_address; | ||
217 | rmrru->end_address = rmrr->end_address; | ||
218 | ret = dmar_parse_dev_scope((void *)(rmrr + 1), | ||
219 | ((void *)rmrr) + header->length, | ||
220 | &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); | ||
221 | |||
222 | if (ret || (rmrru->devices_cnt == 0)) | ||
223 | kfree(rmrru); | ||
224 | else | ||
225 | dmar_register_rmrr_unit(rmrru); | ||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | static void __init | ||
230 | dmar_table_print_dmar_entry(struct acpi_dmar_header *header) | ||
231 | { | ||
232 | struct acpi_dmar_hardware_unit *drhd; | ||
233 | struct acpi_dmar_reserved_memory *rmrr; | ||
234 | |||
235 | switch (header->type) { | ||
236 | case ACPI_DMAR_TYPE_HARDWARE_UNIT: | ||
237 | drhd = (struct acpi_dmar_hardware_unit *)header; | ||
238 | printk (KERN_INFO PREFIX | ||
239 | "DRHD (flags: 0x%08x)base: 0x%016Lx\n", | ||
240 | drhd->flags, drhd->address); | ||
241 | break; | ||
242 | case ACPI_DMAR_TYPE_RESERVED_MEMORY: | ||
243 | rmrr = (struct acpi_dmar_reserved_memory *)header; | ||
244 | |||
245 | printk (KERN_INFO PREFIX | ||
246 | "RMRR base: 0x%016Lx end: 0x%016Lx\n", | ||
247 | rmrr->base_address, rmrr->end_address); | ||
248 | break; | ||
249 | } | ||
250 | } | ||
251 | |||
252 | /** | ||
253 | * parse_dmar_table - parses the DMA reporting table | ||
254 | */ | ||
255 | static int __init | ||
256 | parse_dmar_table(void) | ||
257 | { | ||
258 | struct acpi_table_dmar *dmar; | ||
259 | struct acpi_dmar_header *entry_header; | ||
260 | int ret = 0; | ||
261 | |||
262 | dmar = (struct acpi_table_dmar *)dmar_tbl; | ||
263 | if (!dmar) | ||
264 | return -ENODEV; | ||
265 | |||
266 | if (!dmar->width) { | ||
267 | printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n"); | ||
268 | return -EINVAL; | ||
269 | } | ||
270 | |||
271 | printk (KERN_INFO PREFIX "Host address width %d\n", | ||
272 | dmar->width + 1); | ||
273 | |||
274 | entry_header = (struct acpi_dmar_header *)(dmar + 1); | ||
275 | while (((unsigned long)entry_header) < | ||
276 | (((unsigned long)dmar) + dmar_tbl->length)) { | ||
277 | dmar_table_print_dmar_entry(entry_header); | ||
278 | |||
279 | switch (entry_header->type) { | ||
280 | case ACPI_DMAR_TYPE_HARDWARE_UNIT: | ||
281 | ret = dmar_parse_one_drhd(entry_header); | ||
282 | break; | ||
283 | case ACPI_DMAR_TYPE_RESERVED_MEMORY: | ||
284 | ret = dmar_parse_one_rmrr(entry_header); | ||
285 | break; | ||
286 | default: | ||
287 | printk(KERN_WARNING PREFIX | ||
288 | "Unknown DMAR structure type\n"); | ||
289 | ret = 0; /* for forward compatibility */ | ||
290 | break; | ||
291 | } | ||
292 | if (ret) | ||
293 | break; | ||
294 | |||
295 | entry_header = ((void *)entry_header + entry_header->length); | ||
296 | } | ||
297 | return ret; | ||
298 | } | ||
299 | |||
300 | |||
301 | int __init dmar_table_init(void) | ||
302 | { | ||
303 | |||
304 | parse_dmar_table(); | ||
305 | if (list_empty(&dmar_drhd_units)) { | ||
306 | printk(KERN_INFO PREFIX "No DMAR devices found\n"); | ||
307 | return -ENODEV; | ||
308 | } | ||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | /** | ||
313 | * early_dmar_detect - checks to see if the platform supports DMAR devices | ||
314 | */ | ||
315 | int __init early_dmar_detect(void) | ||
316 | { | ||
317 | acpi_status status = AE_OK; | ||
318 | |||
319 | /* if we could find DMAR table, then there are DMAR devices */ | ||
320 | status = acpi_get_table(ACPI_SIG_DMAR, 0, | ||
321 | (struct acpi_table_header **)&dmar_tbl); | ||
322 | |||
323 | if (ACPI_SUCCESS(status) && !dmar_tbl) { | ||
324 | printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); | ||
325 | status = AE_NOT_FOUND; | ||
326 | } | ||
327 | |||
328 | return (ACPI_SUCCESS(status) ? 1 : 0); | ||
329 | } | ||
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c new file mode 100644 index 00000000000..b3d70310af4 --- /dev/null +++ b/drivers/pci/intel-iommu.c | |||
@@ -0,0 +1,2271 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
19 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/init.h> | ||
23 | #include <linux/bitmap.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/irq.h> | ||
26 | #include <linux/interrupt.h> | ||
27 | #include <linux/sysdev.h> | ||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/pci.h> | ||
30 | #include <linux/dmar.h> | ||
31 | #include <linux/dma-mapping.h> | ||
32 | #include <linux/mempool.h> | ||
33 | #include "iova.h" | ||
34 | #include "intel-iommu.h" | ||
35 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ | ||
36 | #include <asm/cacheflush.h> | ||
37 | #include <asm/iommu.h> | ||
38 | #include "pci.h" | ||
39 | |||
40 | #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) | ||
41 | #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) | ||
42 | |||
43 | #define IOAPIC_RANGE_START (0xfee00000) | ||
44 | #define IOAPIC_RANGE_END (0xfeefffff) | ||
45 | #define IOVA_START_ADDR (0x1000) | ||
46 | |||
47 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 | ||
48 | |||
49 | #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ | ||
50 | |||
51 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | ||
52 | |||
53 | static void domain_remove_dev_info(struct dmar_domain *domain); | ||
54 | |||
55 | static int dmar_disabled; | ||
56 | static int __initdata dmar_map_gfx = 1; | ||
57 | static int dmar_forcedac; | ||
58 | |||
59 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | ||
60 | static DEFINE_SPINLOCK(device_domain_lock); | ||
61 | static LIST_HEAD(device_domain_list); | ||
62 | |||
63 | static int __init intel_iommu_setup(char *str) | ||
64 | { | ||
65 | if (!str) | ||
66 | return -EINVAL; | ||
67 | while (*str) { | ||
68 | if (!strncmp(str, "off", 3)) { | ||
69 | dmar_disabled = 1; | ||
70 | printk(KERN_INFO"Intel-IOMMU: disabled\n"); | ||
71 | } else if (!strncmp(str, "igfx_off", 8)) { | ||
72 | dmar_map_gfx = 0; | ||
73 | printk(KERN_INFO | ||
74 | "Intel-IOMMU: disable GFX device mapping\n"); | ||
75 | } else if (!strncmp(str, "forcedac", 8)) { | ||
76 | printk (KERN_INFO | ||
77 | "Intel-IOMMU: Forcing DAC for PCI devices\n"); | ||
78 | dmar_forcedac = 1; | ||
79 | } | ||
80 | |||
81 | str += strcspn(str, ","); | ||
82 | while (*str == ',') | ||
83 | str++; | ||
84 | } | ||
85 | return 0; | ||
86 | } | ||
87 | __setup("intel_iommu=", intel_iommu_setup); | ||
88 | |||
89 | static struct kmem_cache *iommu_domain_cache; | ||
90 | static struct kmem_cache *iommu_devinfo_cache; | ||
91 | static struct kmem_cache *iommu_iova_cache; | ||
92 | |||
93 | static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) | ||
94 | { | ||
95 | unsigned int flags; | ||
96 | void *vaddr; | ||
97 | |||
98 | /* trying to avoid low memory issues */ | ||
99 | flags = current->flags & PF_MEMALLOC; | ||
100 | current->flags |= PF_MEMALLOC; | ||
101 | vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); | ||
102 | current->flags &= (~PF_MEMALLOC | flags); | ||
103 | return vaddr; | ||
104 | } | ||
105 | |||
106 | |||
107 | static inline void *alloc_pgtable_page(void) | ||
108 | { | ||
109 | unsigned int flags; | ||
110 | void *vaddr; | ||
111 | |||
112 | /* trying to avoid low memory issues */ | ||
113 | flags = current->flags & PF_MEMALLOC; | ||
114 | current->flags |= PF_MEMALLOC; | ||
115 | vaddr = (void *)get_zeroed_page(GFP_ATOMIC); | ||
116 | current->flags &= (~PF_MEMALLOC | flags); | ||
117 | return vaddr; | ||
118 | } | ||
119 | |||
120 | static inline void free_pgtable_page(void *vaddr) | ||
121 | { | ||
122 | free_page((unsigned long)vaddr); | ||
123 | } | ||
124 | |||
125 | static inline void *alloc_domain_mem(void) | ||
126 | { | ||
127 | return iommu_kmem_cache_alloc(iommu_domain_cache); | ||
128 | } | ||
129 | |||
130 | static inline void free_domain_mem(void *vaddr) | ||
131 | { | ||
132 | kmem_cache_free(iommu_domain_cache, vaddr); | ||
133 | } | ||
134 | |||
135 | static inline void * alloc_devinfo_mem(void) | ||
136 | { | ||
137 | return iommu_kmem_cache_alloc(iommu_devinfo_cache); | ||
138 | } | ||
139 | |||
140 | static inline void free_devinfo_mem(void *vaddr) | ||
141 | { | ||
142 | kmem_cache_free(iommu_devinfo_cache, vaddr); | ||
143 | } | ||
144 | |||
145 | struct iova *alloc_iova_mem(void) | ||
146 | { | ||
147 | return iommu_kmem_cache_alloc(iommu_iova_cache); | ||
148 | } | ||
149 | |||
150 | void free_iova_mem(struct iova *iova) | ||
151 | { | ||
152 | kmem_cache_free(iommu_iova_cache, iova); | ||
153 | } | ||
154 | |||
155 | static inline void __iommu_flush_cache( | ||
156 | struct intel_iommu *iommu, void *addr, int size) | ||
157 | { | ||
158 | if (!ecap_coherent(iommu->ecap)) | ||
159 | clflush_cache_range(addr, size); | ||
160 | } | ||
161 | |||
162 | /* Gets context entry for a given bus and devfn */ | ||
163 | static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, | ||
164 | u8 bus, u8 devfn) | ||
165 | { | ||
166 | struct root_entry *root; | ||
167 | struct context_entry *context; | ||
168 | unsigned long phy_addr; | ||
169 | unsigned long flags; | ||
170 | |||
171 | spin_lock_irqsave(&iommu->lock, flags); | ||
172 | root = &iommu->root_entry[bus]; | ||
173 | context = get_context_addr_from_root(root); | ||
174 | if (!context) { | ||
175 | context = (struct context_entry *)alloc_pgtable_page(); | ||
176 | if (!context) { | ||
177 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
178 | return NULL; | ||
179 | } | ||
180 | __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K); | ||
181 | phy_addr = virt_to_phys((void *)context); | ||
182 | set_root_value(root, phy_addr); | ||
183 | set_root_present(root); | ||
184 | __iommu_flush_cache(iommu, root, sizeof(*root)); | ||
185 | } | ||
186 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
187 | return &context[devfn]; | ||
188 | } | ||
189 | |||
190 | static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) | ||
191 | { | ||
192 | struct root_entry *root; | ||
193 | struct context_entry *context; | ||
194 | int ret; | ||
195 | unsigned long flags; | ||
196 | |||
197 | spin_lock_irqsave(&iommu->lock, flags); | ||
198 | root = &iommu->root_entry[bus]; | ||
199 | context = get_context_addr_from_root(root); | ||
200 | if (!context) { | ||
201 | ret = 0; | ||
202 | goto out; | ||
203 | } | ||
204 | ret = context_present(context[devfn]); | ||
205 | out: | ||
206 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) | ||
211 | { | ||
212 | struct root_entry *root; | ||
213 | struct context_entry *context; | ||
214 | unsigned long flags; | ||
215 | |||
216 | spin_lock_irqsave(&iommu->lock, flags); | ||
217 | root = &iommu->root_entry[bus]; | ||
218 | context = get_context_addr_from_root(root); | ||
219 | if (context) { | ||
220 | context_clear_entry(context[devfn]); | ||
221 | __iommu_flush_cache(iommu, &context[devfn], \ | ||
222 | sizeof(*context)); | ||
223 | } | ||
224 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
225 | } | ||
226 | |||
227 | static void free_context_table(struct intel_iommu *iommu) | ||
228 | { | ||
229 | struct root_entry *root; | ||
230 | int i; | ||
231 | unsigned long flags; | ||
232 | struct context_entry *context; | ||
233 | |||
234 | spin_lock_irqsave(&iommu->lock, flags); | ||
235 | if (!iommu->root_entry) { | ||
236 | goto out; | ||
237 | } | ||
238 | for (i = 0; i < ROOT_ENTRY_NR; i++) { | ||
239 | root = &iommu->root_entry[i]; | ||
240 | context = get_context_addr_from_root(root); | ||
241 | if (context) | ||
242 | free_pgtable_page(context); | ||
243 | } | ||
244 | free_pgtable_page(iommu->root_entry); | ||
245 | iommu->root_entry = NULL; | ||
246 | out: | ||
247 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
248 | } | ||
249 | |||
250 | /* page table handling */ | ||
251 | #define LEVEL_STRIDE (9) | ||
252 | #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) | ||
253 | |||
254 | static inline int agaw_to_level(int agaw) | ||
255 | { | ||
256 | return agaw + 2; | ||
257 | } | ||
258 | |||
259 | static inline int agaw_to_width(int agaw) | ||
260 | { | ||
261 | return 30 + agaw * LEVEL_STRIDE; | ||
262 | |||
263 | } | ||
264 | |||
265 | static inline int width_to_agaw(int width) | ||
266 | { | ||
267 | return (width - 30) / LEVEL_STRIDE; | ||
268 | } | ||
269 | |||
270 | static inline unsigned int level_to_offset_bits(int level) | ||
271 | { | ||
272 | return (12 + (level - 1) * LEVEL_STRIDE); | ||
273 | } | ||
274 | |||
275 | static inline int address_level_offset(u64 addr, int level) | ||
276 | { | ||
277 | return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); | ||
278 | } | ||
279 | |||
280 | static inline u64 level_mask(int level) | ||
281 | { | ||
282 | return ((u64)-1 << level_to_offset_bits(level)); | ||
283 | } | ||
284 | |||
285 | static inline u64 level_size(int level) | ||
286 | { | ||
287 | return ((u64)1 << level_to_offset_bits(level)); | ||
288 | } | ||
289 | |||
290 | static inline u64 align_to_level(u64 addr, int level) | ||
291 | { | ||
292 | return ((addr + level_size(level) - 1) & level_mask(level)); | ||
293 | } | ||
294 | |||
295 | static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) | ||
296 | { | ||
297 | int addr_width = agaw_to_width(domain->agaw); | ||
298 | struct dma_pte *parent, *pte = NULL; | ||
299 | int level = agaw_to_level(domain->agaw); | ||
300 | int offset; | ||
301 | unsigned long flags; | ||
302 | |||
303 | BUG_ON(!domain->pgd); | ||
304 | |||
305 | addr &= (((u64)1) << addr_width) - 1; | ||
306 | parent = domain->pgd; | ||
307 | |||
308 | spin_lock_irqsave(&domain->mapping_lock, flags); | ||
309 | while (level > 0) { | ||
310 | void *tmp_page; | ||
311 | |||
312 | offset = address_level_offset(addr, level); | ||
313 | pte = &parent[offset]; | ||
314 | if (level == 1) | ||
315 | break; | ||
316 | |||
317 | if (!dma_pte_present(*pte)) { | ||
318 | tmp_page = alloc_pgtable_page(); | ||
319 | |||
320 | if (!tmp_page) { | ||
321 | spin_unlock_irqrestore(&domain->mapping_lock, | ||
322 | flags); | ||
323 | return NULL; | ||
324 | } | ||
325 | __iommu_flush_cache(domain->iommu, tmp_page, | ||
326 | PAGE_SIZE_4K); | ||
327 | dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); | ||
328 | /* | ||
329 | * high level table always sets r/w, last level page | ||
330 | * table control read/write | ||
331 | */ | ||
332 | dma_set_pte_readable(*pte); | ||
333 | dma_set_pte_writable(*pte); | ||
334 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
335 | } | ||
336 | parent = phys_to_virt(dma_pte_addr(*pte)); | ||
337 | level--; | ||
338 | } | ||
339 | |||
340 | spin_unlock_irqrestore(&domain->mapping_lock, flags); | ||
341 | return pte; | ||
342 | } | ||
343 | |||
344 | /* return address's pte at specific level */ | ||
345 | static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, | ||
346 | int level) | ||
347 | { | ||
348 | struct dma_pte *parent, *pte = NULL; | ||
349 | int total = agaw_to_level(domain->agaw); | ||
350 | int offset; | ||
351 | |||
352 | parent = domain->pgd; | ||
353 | while (level <= total) { | ||
354 | offset = address_level_offset(addr, total); | ||
355 | pte = &parent[offset]; | ||
356 | if (level == total) | ||
357 | return pte; | ||
358 | |||
359 | if (!dma_pte_present(*pte)) | ||
360 | break; | ||
361 | parent = phys_to_virt(dma_pte_addr(*pte)); | ||
362 | total--; | ||
363 | } | ||
364 | return NULL; | ||
365 | } | ||
366 | |||
367 | /* clear one page's page table */ | ||
368 | static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) | ||
369 | { | ||
370 | struct dma_pte *pte = NULL; | ||
371 | |||
372 | /* get last level pte */ | ||
373 | pte = dma_addr_level_pte(domain, addr, 1); | ||
374 | |||
375 | if (pte) { | ||
376 | dma_clear_pte(*pte); | ||
377 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | /* clear last level pte, a tlb flush should be followed */ | ||
382 | static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) | ||
383 | { | ||
384 | int addr_width = agaw_to_width(domain->agaw); | ||
385 | |||
386 | start &= (((u64)1) << addr_width) - 1; | ||
387 | end &= (((u64)1) << addr_width) - 1; | ||
388 | /* in case it's partial page */ | ||
389 | start = PAGE_ALIGN_4K(start); | ||
390 | end &= PAGE_MASK_4K; | ||
391 | |||
392 | /* we don't need lock here, nobody else touches the iova range */ | ||
393 | while (start < end) { | ||
394 | dma_pte_clear_one(domain, start); | ||
395 | start += PAGE_SIZE_4K; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | /* free page table pages. last level pte should already be cleared */ | ||
400 | static void dma_pte_free_pagetable(struct dmar_domain *domain, | ||
401 | u64 start, u64 end) | ||
402 | { | ||
403 | int addr_width = agaw_to_width(domain->agaw); | ||
404 | struct dma_pte *pte; | ||
405 | int total = agaw_to_level(domain->agaw); | ||
406 | int level; | ||
407 | u64 tmp; | ||
408 | |||
409 | start &= (((u64)1) << addr_width) - 1; | ||
410 | end &= (((u64)1) << addr_width) - 1; | ||
411 | |||
412 | /* we don't need lock here, nobody else touches the iova range */ | ||
413 | level = 2; | ||
414 | while (level <= total) { | ||
415 | tmp = align_to_level(start, level); | ||
416 | if (tmp >= end || (tmp + level_size(level) > end)) | ||
417 | return; | ||
418 | |||
419 | while (tmp < end) { | ||
420 | pte = dma_addr_level_pte(domain, tmp, level); | ||
421 | if (pte) { | ||
422 | free_pgtable_page( | ||
423 | phys_to_virt(dma_pte_addr(*pte))); | ||
424 | dma_clear_pte(*pte); | ||
425 | __iommu_flush_cache(domain->iommu, | ||
426 | pte, sizeof(*pte)); | ||
427 | } | ||
428 | tmp += level_size(level); | ||
429 | } | ||
430 | level++; | ||
431 | } | ||
432 | /* free pgd */ | ||
433 | if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { | ||
434 | free_pgtable_page(domain->pgd); | ||
435 | domain->pgd = NULL; | ||
436 | } | ||
437 | } | ||
438 | |||
439 | /* iommu handling */ | ||
440 | static int iommu_alloc_root_entry(struct intel_iommu *iommu) | ||
441 | { | ||
442 | struct root_entry *root; | ||
443 | unsigned long flags; | ||
444 | |||
445 | root = (struct root_entry *)alloc_pgtable_page(); | ||
446 | if (!root) | ||
447 | return -ENOMEM; | ||
448 | |||
449 | __iommu_flush_cache(iommu, root, PAGE_SIZE_4K); | ||
450 | |||
451 | spin_lock_irqsave(&iommu->lock, flags); | ||
452 | iommu->root_entry = root; | ||
453 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ | ||
459 | {\ | ||
460 | unsigned long start_time = jiffies;\ | ||
461 | while (1) {\ | ||
462 | sts = op (iommu->reg + offset);\ | ||
463 | if (cond)\ | ||
464 | break;\ | ||
465 | if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\ | ||
466 | panic("DMAR hardware is malfunctioning\n");\ | ||
467 | cpu_relax();\ | ||
468 | }\ | ||
469 | } | ||
470 | |||
471 | static void iommu_set_root_entry(struct intel_iommu *iommu) | ||
472 | { | ||
473 | void *addr; | ||
474 | u32 cmd, sts; | ||
475 | unsigned long flag; | ||
476 | |||
477 | addr = iommu->root_entry; | ||
478 | |||
479 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
480 | dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); | ||
481 | |||
482 | cmd = iommu->gcmd | DMA_GCMD_SRTP; | ||
483 | writel(cmd, iommu->reg + DMAR_GCMD_REG); | ||
484 | |||
485 | /* Make sure hardware complete it */ | ||
486 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
487 | readl, (sts & DMA_GSTS_RTPS), sts); | ||
488 | |||
489 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
490 | } | ||
491 | |||
492 | static void iommu_flush_write_buffer(struct intel_iommu *iommu) | ||
493 | { | ||
494 | u32 val; | ||
495 | unsigned long flag; | ||
496 | |||
497 | if (!cap_rwbf(iommu->cap)) | ||
498 | return; | ||
499 | val = iommu->gcmd | DMA_GCMD_WBF; | ||
500 | |||
501 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
502 | writel(val, iommu->reg + DMAR_GCMD_REG); | ||
503 | |||
504 | /* Make sure hardware complete it */ | ||
505 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
506 | readl, (!(val & DMA_GSTS_WBFS)), val); | ||
507 | |||
508 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
509 | } | ||
510 | |||
511 | /* return value determine if we need a write buffer flush */ | ||
512 | static int __iommu_flush_context(struct intel_iommu *iommu, | ||
513 | u16 did, u16 source_id, u8 function_mask, u64 type, | ||
514 | int non_present_entry_flush) | ||
515 | { | ||
516 | u64 val = 0; | ||
517 | unsigned long flag; | ||
518 | |||
519 | /* | ||
520 | * In the non-present entry flush case, if hardware doesn't cache | ||
521 | * non-present entry we do nothing and if hardware cache non-present | ||
522 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
523 | * any non-present entries) | ||
524 | */ | ||
525 | if (non_present_entry_flush) { | ||
526 | if (!cap_caching_mode(iommu->cap)) | ||
527 | return 1; | ||
528 | else | ||
529 | did = 0; | ||
530 | } | ||
531 | |||
532 | switch (type) { | ||
533 | case DMA_CCMD_GLOBAL_INVL: | ||
534 | val = DMA_CCMD_GLOBAL_INVL; | ||
535 | break; | ||
536 | case DMA_CCMD_DOMAIN_INVL: | ||
537 | val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); | ||
538 | break; | ||
539 | case DMA_CCMD_DEVICE_INVL: | ||
540 | val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) | ||
541 | | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); | ||
542 | break; | ||
543 | default: | ||
544 | BUG(); | ||
545 | } | ||
546 | val |= DMA_CCMD_ICC; | ||
547 | |||
548 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
549 | dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); | ||
550 | |||
551 | /* Make sure hardware complete it */ | ||
552 | IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, | ||
553 | dmar_readq, (!(val & DMA_CCMD_ICC)), val); | ||
554 | |||
555 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
556 | |||
557 | /* flush context entry will implictly flush write buffer */ | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static int inline iommu_flush_context_global(struct intel_iommu *iommu, | ||
562 | int non_present_entry_flush) | ||
563 | { | ||
564 | return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, | ||
565 | non_present_entry_flush); | ||
566 | } | ||
567 | |||
568 | static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did, | ||
569 | int non_present_entry_flush) | ||
570 | { | ||
571 | return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, | ||
572 | non_present_entry_flush); | ||
573 | } | ||
574 | |||
575 | static int inline iommu_flush_context_device(struct intel_iommu *iommu, | ||
576 | u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) | ||
577 | { | ||
578 | return __iommu_flush_context(iommu, did, source_id, function_mask, | ||
579 | DMA_CCMD_DEVICE_INVL, non_present_entry_flush); | ||
580 | } | ||
581 | |||
582 | /* return value determine if we need a write buffer flush */ | ||
583 | static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, | ||
584 | u64 addr, unsigned int size_order, u64 type, | ||
585 | int non_present_entry_flush) | ||
586 | { | ||
587 | int tlb_offset = ecap_iotlb_offset(iommu->ecap); | ||
588 | u64 val = 0, val_iva = 0; | ||
589 | unsigned long flag; | ||
590 | |||
591 | /* | ||
592 | * In the non-present entry flush case, if hardware doesn't cache | ||
593 | * non-present entry we do nothing and if hardware cache non-present | ||
594 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
595 | * any non-present entries) | ||
596 | */ | ||
597 | if (non_present_entry_flush) { | ||
598 | if (!cap_caching_mode(iommu->cap)) | ||
599 | return 1; | ||
600 | else | ||
601 | did = 0; | ||
602 | } | ||
603 | |||
604 | switch (type) { | ||
605 | case DMA_TLB_GLOBAL_FLUSH: | ||
606 | /* global flush doesn't need set IVA_REG */ | ||
607 | val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; | ||
608 | break; | ||
609 | case DMA_TLB_DSI_FLUSH: | ||
610 | val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); | ||
611 | break; | ||
612 | case DMA_TLB_PSI_FLUSH: | ||
613 | val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); | ||
614 | /* Note: always flush non-leaf currently */ | ||
615 | val_iva = size_order | addr; | ||
616 | break; | ||
617 | default: | ||
618 | BUG(); | ||
619 | } | ||
620 | /* Note: set drain read/write */ | ||
621 | #if 0 | ||
622 | /* | ||
623 | * This is probably to be super secure.. Looks like we can | ||
624 | * ignore it without any impact. | ||
625 | */ | ||
626 | if (cap_read_drain(iommu->cap)) | ||
627 | val |= DMA_TLB_READ_DRAIN; | ||
628 | #endif | ||
629 | if (cap_write_drain(iommu->cap)) | ||
630 | val |= DMA_TLB_WRITE_DRAIN; | ||
631 | |||
632 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
633 | /* Note: Only uses first TLB reg currently */ | ||
634 | if (val_iva) | ||
635 | dmar_writeq(iommu->reg + tlb_offset, val_iva); | ||
636 | dmar_writeq(iommu->reg + tlb_offset + 8, val); | ||
637 | |||
638 | /* Make sure hardware complete it */ | ||
639 | IOMMU_WAIT_OP(iommu, tlb_offset + 8, | ||
640 | dmar_readq, (!(val & DMA_TLB_IVT)), val); | ||
641 | |||
642 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
643 | |||
644 | /* check IOTLB invalidation granularity */ | ||
645 | if (DMA_TLB_IAIG(val) == 0) | ||
646 | printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); | ||
647 | if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) | ||
648 | pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", | ||
649 | DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); | ||
650 | /* flush context entry will implictly flush write buffer */ | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu, | ||
655 | int non_present_entry_flush) | ||
656 | { | ||
657 | return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, | ||
658 | non_present_entry_flush); | ||
659 | } | ||
660 | |||
661 | static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, | ||
662 | int non_present_entry_flush) | ||
663 | { | ||
664 | return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, | ||
665 | non_present_entry_flush); | ||
666 | } | ||
667 | |||
668 | static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, | ||
669 | u64 addr, unsigned int pages, int non_present_entry_flush) | ||
670 | { | ||
671 | unsigned int mask; | ||
672 | |||
673 | BUG_ON(addr & (~PAGE_MASK_4K)); | ||
674 | BUG_ON(pages == 0); | ||
675 | |||
676 | /* Fallback to domain selective flush if no PSI support */ | ||
677 | if (!cap_pgsel_inv(iommu->cap)) | ||
678 | return iommu_flush_iotlb_dsi(iommu, did, | ||
679 | non_present_entry_flush); | ||
680 | |||
681 | /* | ||
682 | * PSI requires page size to be 2 ^ x, and the base address is naturally | ||
683 | * aligned to the size | ||
684 | */ | ||
685 | mask = ilog2(__roundup_pow_of_two(pages)); | ||
686 | /* Fallback to domain selective flush if size is too big */ | ||
687 | if (mask > cap_max_amask_val(iommu->cap)) | ||
688 | return iommu_flush_iotlb_dsi(iommu, did, | ||
689 | non_present_entry_flush); | ||
690 | |||
691 | return __iommu_flush_iotlb(iommu, did, addr, mask, | ||
692 | DMA_TLB_PSI_FLUSH, non_present_entry_flush); | ||
693 | } | ||
694 | |||
695 | static int iommu_enable_translation(struct intel_iommu *iommu) | ||
696 | { | ||
697 | u32 sts; | ||
698 | unsigned long flags; | ||
699 | |||
700 | spin_lock_irqsave(&iommu->register_lock, flags); | ||
701 | writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); | ||
702 | |||
703 | /* Make sure hardware complete it */ | ||
704 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
705 | readl, (sts & DMA_GSTS_TES), sts); | ||
706 | |||
707 | iommu->gcmd |= DMA_GCMD_TE; | ||
708 | spin_unlock_irqrestore(&iommu->register_lock, flags); | ||
709 | return 0; | ||
710 | } | ||
711 | |||
712 | static int iommu_disable_translation(struct intel_iommu *iommu) | ||
713 | { | ||
714 | u32 sts; | ||
715 | unsigned long flag; | ||
716 | |||
717 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
718 | iommu->gcmd &= ~DMA_GCMD_TE; | ||
719 | writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); | ||
720 | |||
721 | /* Make sure hardware complete it */ | ||
722 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | ||
723 | readl, (!(sts & DMA_GSTS_TES)), sts); | ||
724 | |||
725 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
726 | return 0; | ||
727 | } | ||
728 | |||
729 | /* iommu interrupt handling. Most stuff are MSI-like. */ | ||
730 | |||
731 | static char *fault_reason_strings[] = | ||
732 | { | ||
733 | "Software", | ||
734 | "Present bit in root entry is clear", | ||
735 | "Present bit in context entry is clear", | ||
736 | "Invalid context entry", | ||
737 | "Access beyond MGAW", | ||
738 | "PTE Write access is not set", | ||
739 | "PTE Read access is not set", | ||
740 | "Next page table ptr is invalid", | ||
741 | "Root table address invalid", | ||
742 | "Context table ptr is invalid", | ||
743 | "non-zero reserved fields in RTP", | ||
744 | "non-zero reserved fields in CTP", | ||
745 | "non-zero reserved fields in PTE", | ||
746 | "Unknown" | ||
747 | }; | ||
748 | #define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) | ||
749 | |||
750 | char *dmar_get_fault_reason(u8 fault_reason) | ||
751 | { | ||
752 | if (fault_reason > MAX_FAULT_REASON_IDX) | ||
753 | return fault_reason_strings[MAX_FAULT_REASON_IDX]; | ||
754 | else | ||
755 | return fault_reason_strings[fault_reason]; | ||
756 | } | ||
757 | |||
758 | void dmar_msi_unmask(unsigned int irq) | ||
759 | { | ||
760 | struct intel_iommu *iommu = get_irq_data(irq); | ||
761 | unsigned long flag; | ||
762 | |||
763 | /* unmask it */ | ||
764 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
765 | writel(0, iommu->reg + DMAR_FECTL_REG); | ||
766 | /* Read a reg to force flush the post write */ | ||
767 | readl(iommu->reg + DMAR_FECTL_REG); | ||
768 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
769 | } | ||
770 | |||
771 | void dmar_msi_mask(unsigned int irq) | ||
772 | { | ||
773 | unsigned long flag; | ||
774 | struct intel_iommu *iommu = get_irq_data(irq); | ||
775 | |||
776 | /* mask it */ | ||
777 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
778 | writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); | ||
779 | /* Read a reg to force flush the post write */ | ||
780 | readl(iommu->reg + DMAR_FECTL_REG); | ||
781 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
782 | } | ||
783 | |||
784 | void dmar_msi_write(int irq, struct msi_msg *msg) | ||
785 | { | ||
786 | struct intel_iommu *iommu = get_irq_data(irq); | ||
787 | unsigned long flag; | ||
788 | |||
789 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
790 | writel(msg->data, iommu->reg + DMAR_FEDATA_REG); | ||
791 | writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); | ||
792 | writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); | ||
793 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
794 | } | ||
795 | |||
796 | void dmar_msi_read(int irq, struct msi_msg *msg) | ||
797 | { | ||
798 | struct intel_iommu *iommu = get_irq_data(irq); | ||
799 | unsigned long flag; | ||
800 | |||
801 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
802 | msg->data = readl(iommu->reg + DMAR_FEDATA_REG); | ||
803 | msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); | ||
804 | msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); | ||
805 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
806 | } | ||
807 | |||
808 | static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, | ||
809 | u8 fault_reason, u16 source_id, u64 addr) | ||
810 | { | ||
811 | char *reason; | ||
812 | |||
813 | reason = dmar_get_fault_reason(fault_reason); | ||
814 | |||
815 | printk(KERN_ERR | ||
816 | "DMAR:[%s] Request device [%02x:%02x.%d] " | ||
817 | "fault addr %llx \n" | ||
818 | "DMAR:[fault reason %02d] %s\n", | ||
819 | (type ? "DMA Read" : "DMA Write"), | ||
820 | (source_id >> 8), PCI_SLOT(source_id & 0xFF), | ||
821 | PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); | ||
822 | return 0; | ||
823 | } | ||
824 | |||
825 | #define PRIMARY_FAULT_REG_LEN (16) | ||
826 | static irqreturn_t iommu_page_fault(int irq, void *dev_id) | ||
827 | { | ||
828 | struct intel_iommu *iommu = dev_id; | ||
829 | int reg, fault_index; | ||
830 | u32 fault_status; | ||
831 | unsigned long flag; | ||
832 | |||
833 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
834 | fault_status = readl(iommu->reg + DMAR_FSTS_REG); | ||
835 | |||
836 | /* TBD: ignore advanced fault log currently */ | ||
837 | if (!(fault_status & DMA_FSTS_PPF)) | ||
838 | goto clear_overflow; | ||
839 | |||
840 | fault_index = dma_fsts_fault_record_index(fault_status); | ||
841 | reg = cap_fault_reg_offset(iommu->cap); | ||
842 | while (1) { | ||
843 | u8 fault_reason; | ||
844 | u16 source_id; | ||
845 | u64 guest_addr; | ||
846 | int type; | ||
847 | u32 data; | ||
848 | |||
849 | /* highest 32 bits */ | ||
850 | data = readl(iommu->reg + reg + | ||
851 | fault_index * PRIMARY_FAULT_REG_LEN + 12); | ||
852 | if (!(data & DMA_FRCD_F)) | ||
853 | break; | ||
854 | |||
855 | fault_reason = dma_frcd_fault_reason(data); | ||
856 | type = dma_frcd_type(data); | ||
857 | |||
858 | data = readl(iommu->reg + reg + | ||
859 | fault_index * PRIMARY_FAULT_REG_LEN + 8); | ||
860 | source_id = dma_frcd_source_id(data); | ||
861 | |||
862 | guest_addr = dmar_readq(iommu->reg + reg + | ||
863 | fault_index * PRIMARY_FAULT_REG_LEN); | ||
864 | guest_addr = dma_frcd_page_addr(guest_addr); | ||
865 | /* clear the fault */ | ||
866 | writel(DMA_FRCD_F, iommu->reg + reg + | ||
867 | fault_index * PRIMARY_FAULT_REG_LEN + 12); | ||
868 | |||
869 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
870 | |||
871 | iommu_page_fault_do_one(iommu, type, fault_reason, | ||
872 | source_id, guest_addr); | ||
873 | |||
874 | fault_index++; | ||
875 | if (fault_index > cap_num_fault_regs(iommu->cap)) | ||
876 | fault_index = 0; | ||
877 | spin_lock_irqsave(&iommu->register_lock, flag); | ||
878 | } | ||
879 | clear_overflow: | ||
880 | /* clear primary fault overflow */ | ||
881 | fault_status = readl(iommu->reg + DMAR_FSTS_REG); | ||
882 | if (fault_status & DMA_FSTS_PFO) | ||
883 | writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); | ||
884 | |||
885 | spin_unlock_irqrestore(&iommu->register_lock, flag); | ||
886 | return IRQ_HANDLED; | ||
887 | } | ||
888 | |||
889 | int dmar_set_interrupt(struct intel_iommu *iommu) | ||
890 | { | ||
891 | int irq, ret; | ||
892 | |||
893 | irq = create_irq(); | ||
894 | if (!irq) { | ||
895 | printk(KERN_ERR "IOMMU: no free vectors\n"); | ||
896 | return -EINVAL; | ||
897 | } | ||
898 | |||
899 | set_irq_data(irq, iommu); | ||
900 | iommu->irq = irq; | ||
901 | |||
902 | ret = arch_setup_dmar_msi(irq); | ||
903 | if (ret) { | ||
904 | set_irq_data(irq, NULL); | ||
905 | iommu->irq = 0; | ||
906 | destroy_irq(irq); | ||
907 | return 0; | ||
908 | } | ||
909 | |||
910 | /* Force fault register is cleared */ | ||
911 | iommu_page_fault(irq, iommu); | ||
912 | |||
913 | ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu); | ||
914 | if (ret) | ||
915 | printk(KERN_ERR "IOMMU: can't request irq\n"); | ||
916 | return ret; | ||
917 | } | ||
918 | |||
919 | static int iommu_init_domains(struct intel_iommu *iommu) | ||
920 | { | ||
921 | unsigned long ndomains; | ||
922 | unsigned long nlongs; | ||
923 | |||
924 | ndomains = cap_ndoms(iommu->cap); | ||
925 | pr_debug("Number of Domains supportd <%ld>\n", ndomains); | ||
926 | nlongs = BITS_TO_LONGS(ndomains); | ||
927 | |||
928 | /* TBD: there might be 64K domains, | ||
929 | * consider other allocation for future chip | ||
930 | */ | ||
931 | iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); | ||
932 | if (!iommu->domain_ids) { | ||
933 | printk(KERN_ERR "Allocating domain id array failed\n"); | ||
934 | return -ENOMEM; | ||
935 | } | ||
936 | iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), | ||
937 | GFP_KERNEL); | ||
938 | if (!iommu->domains) { | ||
939 | printk(KERN_ERR "Allocating domain array failed\n"); | ||
940 | kfree(iommu->domain_ids); | ||
941 | return -ENOMEM; | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * if Caching mode is set, then invalid translations are tagged | ||
946 | * with domainid 0. Hence we need to pre-allocate it. | ||
947 | */ | ||
948 | if (cap_caching_mode(iommu->cap)) | ||
949 | set_bit(0, iommu->domain_ids); | ||
950 | return 0; | ||
951 | } | ||
952 | |||
953 | static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) | ||
954 | { | ||
955 | struct intel_iommu *iommu; | ||
956 | int ret; | ||
957 | int map_size; | ||
958 | u32 ver; | ||
959 | |||
960 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); | ||
961 | if (!iommu) | ||
962 | return NULL; | ||
963 | iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); | ||
964 | if (!iommu->reg) { | ||
965 | printk(KERN_ERR "IOMMU: can't map the region\n"); | ||
966 | goto error; | ||
967 | } | ||
968 | iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); | ||
969 | iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); | ||
970 | |||
971 | /* the registers might be more than one page */ | ||
972 | map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), | ||
973 | cap_max_fault_reg_offset(iommu->cap)); | ||
974 | map_size = PAGE_ALIGN_4K(map_size); | ||
975 | if (map_size > PAGE_SIZE_4K) { | ||
976 | iounmap(iommu->reg); | ||
977 | iommu->reg = ioremap(drhd->reg_base_addr, map_size); | ||
978 | if (!iommu->reg) { | ||
979 | printk(KERN_ERR "IOMMU: can't map the region\n"); | ||
980 | goto error; | ||
981 | } | ||
982 | } | ||
983 | |||
984 | ver = readl(iommu->reg + DMAR_VER_REG); | ||
985 | pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", | ||
986 | drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), | ||
987 | iommu->cap, iommu->ecap); | ||
988 | ret = iommu_init_domains(iommu); | ||
989 | if (ret) | ||
990 | goto error_unmap; | ||
991 | spin_lock_init(&iommu->lock); | ||
992 | spin_lock_init(&iommu->register_lock); | ||
993 | |||
994 | drhd->iommu = iommu; | ||
995 | return iommu; | ||
996 | error_unmap: | ||
997 | iounmap(iommu->reg); | ||
998 | iommu->reg = 0; | ||
999 | error: | ||
1000 | kfree(iommu); | ||
1001 | return NULL; | ||
1002 | } | ||
1003 | |||
1004 | static void domain_exit(struct dmar_domain *domain); | ||
1005 | static void free_iommu(struct intel_iommu *iommu) | ||
1006 | { | ||
1007 | struct dmar_domain *domain; | ||
1008 | int i; | ||
1009 | |||
1010 | if (!iommu) | ||
1011 | return; | ||
1012 | |||
1013 | i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); | ||
1014 | for (; i < cap_ndoms(iommu->cap); ) { | ||
1015 | domain = iommu->domains[i]; | ||
1016 | clear_bit(i, iommu->domain_ids); | ||
1017 | domain_exit(domain); | ||
1018 | i = find_next_bit(iommu->domain_ids, | ||
1019 | cap_ndoms(iommu->cap), i+1); | ||
1020 | } | ||
1021 | |||
1022 | if (iommu->gcmd & DMA_GCMD_TE) | ||
1023 | iommu_disable_translation(iommu); | ||
1024 | |||
1025 | if (iommu->irq) { | ||
1026 | set_irq_data(iommu->irq, NULL); | ||
1027 | /* This will mask the irq */ | ||
1028 | free_irq(iommu->irq, iommu); | ||
1029 | destroy_irq(iommu->irq); | ||
1030 | } | ||
1031 | |||
1032 | kfree(iommu->domains); | ||
1033 | kfree(iommu->domain_ids); | ||
1034 | |||
1035 | /* free context mapping */ | ||
1036 | free_context_table(iommu); | ||
1037 | |||
1038 | if (iommu->reg) | ||
1039 | iounmap(iommu->reg); | ||
1040 | kfree(iommu); | ||
1041 | } | ||
1042 | |||
1043 | static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) | ||
1044 | { | ||
1045 | unsigned long num; | ||
1046 | unsigned long ndomains; | ||
1047 | struct dmar_domain *domain; | ||
1048 | unsigned long flags; | ||
1049 | |||
1050 | domain = alloc_domain_mem(); | ||
1051 | if (!domain) | ||
1052 | return NULL; | ||
1053 | |||
1054 | ndomains = cap_ndoms(iommu->cap); | ||
1055 | |||
1056 | spin_lock_irqsave(&iommu->lock, flags); | ||
1057 | num = find_first_zero_bit(iommu->domain_ids, ndomains); | ||
1058 | if (num >= ndomains) { | ||
1059 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1060 | free_domain_mem(domain); | ||
1061 | printk(KERN_ERR "IOMMU: no free domain ids\n"); | ||
1062 | return NULL; | ||
1063 | } | ||
1064 | |||
1065 | set_bit(num, iommu->domain_ids); | ||
1066 | domain->id = num; | ||
1067 | domain->iommu = iommu; | ||
1068 | iommu->domains[num] = domain; | ||
1069 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1070 | |||
1071 | return domain; | ||
1072 | } | ||
1073 | |||
1074 | static void iommu_free_domain(struct dmar_domain *domain) | ||
1075 | { | ||
1076 | unsigned long flags; | ||
1077 | |||
1078 | spin_lock_irqsave(&domain->iommu->lock, flags); | ||
1079 | clear_bit(domain->id, domain->iommu->domain_ids); | ||
1080 | spin_unlock_irqrestore(&domain->iommu->lock, flags); | ||
1081 | } | ||
1082 | |||
1083 | static struct iova_domain reserved_iova_list; | ||
1084 | |||
1085 | static void dmar_init_reserved_ranges(void) | ||
1086 | { | ||
1087 | struct pci_dev *pdev = NULL; | ||
1088 | struct iova *iova; | ||
1089 | int i; | ||
1090 | u64 addr, size; | ||
1091 | |||
1092 | init_iova_domain(&reserved_iova_list); | ||
1093 | |||
1094 | /* IOAPIC ranges shouldn't be accessed by DMA */ | ||
1095 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), | ||
1096 | IOVA_PFN(IOAPIC_RANGE_END)); | ||
1097 | if (!iova) | ||
1098 | printk(KERN_ERR "Reserve IOAPIC range failed\n"); | ||
1099 | |||
1100 | /* Reserve all PCI MMIO to avoid peer-to-peer access */ | ||
1101 | for_each_pci_dev(pdev) { | ||
1102 | struct resource *r; | ||
1103 | |||
1104 | for (i = 0; i < PCI_NUM_RESOURCES; i++) { | ||
1105 | r = &pdev->resource[i]; | ||
1106 | if (!r->flags || !(r->flags & IORESOURCE_MEM)) | ||
1107 | continue; | ||
1108 | addr = r->start; | ||
1109 | addr &= PAGE_MASK_4K; | ||
1110 | size = r->end - addr; | ||
1111 | size = PAGE_ALIGN_4K(size); | ||
1112 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), | ||
1113 | IOVA_PFN(size + addr) - 1); | ||
1114 | if (!iova) | ||
1115 | printk(KERN_ERR "Reserve iova failed\n"); | ||
1116 | } | ||
1117 | } | ||
1118 | |||
1119 | } | ||
1120 | |||
1121 | static void domain_reserve_special_ranges(struct dmar_domain *domain) | ||
1122 | { | ||
1123 | copy_reserved_iova(&reserved_iova_list, &domain->iovad); | ||
1124 | } | ||
1125 | |||
1126 | static inline int guestwidth_to_adjustwidth(int gaw) | ||
1127 | { | ||
1128 | int agaw; | ||
1129 | int r = (gaw - 12) % 9; | ||
1130 | |||
1131 | if (r == 0) | ||
1132 | agaw = gaw; | ||
1133 | else | ||
1134 | agaw = gaw + 9 - r; | ||
1135 | if (agaw > 64) | ||
1136 | agaw = 64; | ||
1137 | return agaw; | ||
1138 | } | ||
1139 | |||
1140 | static int domain_init(struct dmar_domain *domain, int guest_width) | ||
1141 | { | ||
1142 | struct intel_iommu *iommu; | ||
1143 | int adjust_width, agaw; | ||
1144 | unsigned long sagaw; | ||
1145 | |||
1146 | init_iova_domain(&domain->iovad); | ||
1147 | spin_lock_init(&domain->mapping_lock); | ||
1148 | |||
1149 | domain_reserve_special_ranges(domain); | ||
1150 | |||
1151 | /* calculate AGAW */ | ||
1152 | iommu = domain->iommu; | ||
1153 | if (guest_width > cap_mgaw(iommu->cap)) | ||
1154 | guest_width = cap_mgaw(iommu->cap); | ||
1155 | domain->gaw = guest_width; | ||
1156 | adjust_width = guestwidth_to_adjustwidth(guest_width); | ||
1157 | agaw = width_to_agaw(adjust_width); | ||
1158 | sagaw = cap_sagaw(iommu->cap); | ||
1159 | if (!test_bit(agaw, &sagaw)) { | ||
1160 | /* hardware doesn't support it, choose a bigger one */ | ||
1161 | pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); | ||
1162 | agaw = find_next_bit(&sagaw, 5, agaw); | ||
1163 | if (agaw >= 5) | ||
1164 | return -ENODEV; | ||
1165 | } | ||
1166 | domain->agaw = agaw; | ||
1167 | INIT_LIST_HEAD(&domain->devices); | ||
1168 | |||
1169 | /* always allocate the top pgd */ | ||
1170 | domain->pgd = (struct dma_pte *)alloc_pgtable_page(); | ||
1171 | if (!domain->pgd) | ||
1172 | return -ENOMEM; | ||
1173 | __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K); | ||
1174 | return 0; | ||
1175 | } | ||
1176 | |||
1177 | static void domain_exit(struct dmar_domain *domain) | ||
1178 | { | ||
1179 | u64 end; | ||
1180 | |||
1181 | /* Domain 0 is reserved, so dont process it */ | ||
1182 | if (!domain) | ||
1183 | return; | ||
1184 | |||
1185 | domain_remove_dev_info(domain); | ||
1186 | /* destroy iovas */ | ||
1187 | put_iova_domain(&domain->iovad); | ||
1188 | end = DOMAIN_MAX_ADDR(domain->gaw); | ||
1189 | end = end & (~PAGE_MASK_4K); | ||
1190 | |||
1191 | /* clear ptes */ | ||
1192 | dma_pte_clear_range(domain, 0, end); | ||
1193 | |||
1194 | /* free page tables */ | ||
1195 | dma_pte_free_pagetable(domain, 0, end); | ||
1196 | |||
1197 | iommu_free_domain(domain); | ||
1198 | free_domain_mem(domain); | ||
1199 | } | ||
1200 | |||
1201 | static int domain_context_mapping_one(struct dmar_domain *domain, | ||
1202 | u8 bus, u8 devfn) | ||
1203 | { | ||
1204 | struct context_entry *context; | ||
1205 | struct intel_iommu *iommu = domain->iommu; | ||
1206 | unsigned long flags; | ||
1207 | |||
1208 | pr_debug("Set context mapping for %02x:%02x.%d\n", | ||
1209 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); | ||
1210 | BUG_ON(!domain->pgd); | ||
1211 | context = device_to_context_entry(iommu, bus, devfn); | ||
1212 | if (!context) | ||
1213 | return -ENOMEM; | ||
1214 | spin_lock_irqsave(&iommu->lock, flags); | ||
1215 | if (context_present(*context)) { | ||
1216 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1217 | return 0; | ||
1218 | } | ||
1219 | |||
1220 | context_set_domain_id(*context, domain->id); | ||
1221 | context_set_address_width(*context, domain->agaw); | ||
1222 | context_set_address_root(*context, virt_to_phys(domain->pgd)); | ||
1223 | context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); | ||
1224 | context_set_fault_enable(*context); | ||
1225 | context_set_present(*context); | ||
1226 | __iommu_flush_cache(iommu, context, sizeof(*context)); | ||
1227 | |||
1228 | /* it's a non-present to present mapping */ | ||
1229 | if (iommu_flush_context_device(iommu, domain->id, | ||
1230 | (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) | ||
1231 | iommu_flush_write_buffer(iommu); | ||
1232 | else | ||
1233 | iommu_flush_iotlb_dsi(iommu, 0, 0); | ||
1234 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1235 | return 0; | ||
1236 | } | ||
1237 | |||
1238 | static int | ||
1239 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | ||
1240 | { | ||
1241 | int ret; | ||
1242 | struct pci_dev *tmp, *parent; | ||
1243 | |||
1244 | ret = domain_context_mapping_one(domain, pdev->bus->number, | ||
1245 | pdev->devfn); | ||
1246 | if (ret) | ||
1247 | return ret; | ||
1248 | |||
1249 | /* dependent device mapping */ | ||
1250 | tmp = pci_find_upstream_pcie_bridge(pdev); | ||
1251 | if (!tmp) | ||
1252 | return 0; | ||
1253 | /* Secondary interface's bus number and devfn 0 */ | ||
1254 | parent = pdev->bus->self; | ||
1255 | while (parent != tmp) { | ||
1256 | ret = domain_context_mapping_one(domain, parent->bus->number, | ||
1257 | parent->devfn); | ||
1258 | if (ret) | ||
1259 | return ret; | ||
1260 | parent = parent->bus->self; | ||
1261 | } | ||
1262 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ | ||
1263 | return domain_context_mapping_one(domain, | ||
1264 | tmp->subordinate->number, 0); | ||
1265 | else /* this is a legacy PCI bridge */ | ||
1266 | return domain_context_mapping_one(domain, | ||
1267 | tmp->bus->number, tmp->devfn); | ||
1268 | } | ||
1269 | |||
1270 | static int domain_context_mapped(struct dmar_domain *domain, | ||
1271 | struct pci_dev *pdev) | ||
1272 | { | ||
1273 | int ret; | ||
1274 | struct pci_dev *tmp, *parent; | ||
1275 | |||
1276 | ret = device_context_mapped(domain->iommu, | ||
1277 | pdev->bus->number, pdev->devfn); | ||
1278 | if (!ret) | ||
1279 | return ret; | ||
1280 | /* dependent device mapping */ | ||
1281 | tmp = pci_find_upstream_pcie_bridge(pdev); | ||
1282 | if (!tmp) | ||
1283 | return ret; | ||
1284 | /* Secondary interface's bus number and devfn 0 */ | ||
1285 | parent = pdev->bus->self; | ||
1286 | while (parent != tmp) { | ||
1287 | ret = device_context_mapped(domain->iommu, parent->bus->number, | ||
1288 | parent->devfn); | ||
1289 | if (!ret) | ||
1290 | return ret; | ||
1291 | parent = parent->bus->self; | ||
1292 | } | ||
1293 | if (tmp->is_pcie) | ||
1294 | return device_context_mapped(domain->iommu, | ||
1295 | tmp->subordinate->number, 0); | ||
1296 | else | ||
1297 | return device_context_mapped(domain->iommu, | ||
1298 | tmp->bus->number, tmp->devfn); | ||
1299 | } | ||
1300 | |||
1301 | static int | ||
1302 | domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, | ||
1303 | u64 hpa, size_t size, int prot) | ||
1304 | { | ||
1305 | u64 start_pfn, end_pfn; | ||
1306 | struct dma_pte *pte; | ||
1307 | int index; | ||
1308 | |||
1309 | if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) | ||
1310 | return -EINVAL; | ||
1311 | iova &= PAGE_MASK_4K; | ||
1312 | start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K; | ||
1313 | end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K; | ||
1314 | index = 0; | ||
1315 | while (start_pfn < end_pfn) { | ||
1316 | pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); | ||
1317 | if (!pte) | ||
1318 | return -ENOMEM; | ||
1319 | /* We don't need lock here, nobody else | ||
1320 | * touches the iova range | ||
1321 | */ | ||
1322 | BUG_ON(dma_pte_addr(*pte)); | ||
1323 | dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); | ||
1324 | dma_set_pte_prot(*pte, prot); | ||
1325 | __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); | ||
1326 | start_pfn++; | ||
1327 | index++; | ||
1328 | } | ||
1329 | return 0; | ||
1330 | } | ||
1331 | |||
1332 | static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) | ||
1333 | { | ||
1334 | clear_context_table(domain->iommu, bus, devfn); | ||
1335 | iommu_flush_context_global(domain->iommu, 0); | ||
1336 | iommu_flush_iotlb_global(domain->iommu, 0); | ||
1337 | } | ||
1338 | |||
1339 | static void domain_remove_dev_info(struct dmar_domain *domain) | ||
1340 | { | ||
1341 | struct device_domain_info *info; | ||
1342 | unsigned long flags; | ||
1343 | |||
1344 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1345 | while (!list_empty(&domain->devices)) { | ||
1346 | info = list_entry(domain->devices.next, | ||
1347 | struct device_domain_info, link); | ||
1348 | list_del(&info->link); | ||
1349 | list_del(&info->global); | ||
1350 | if (info->dev) | ||
1351 | info->dev->dev.archdata.iommu = NULL; | ||
1352 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1353 | |||
1354 | detach_domain_for_dev(info->domain, info->bus, info->devfn); | ||
1355 | free_devinfo_mem(info); | ||
1356 | |||
1357 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1358 | } | ||
1359 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1360 | } | ||
1361 | |||
1362 | /* | ||
1363 | * find_domain | ||
1364 | * Note: we use struct pci_dev->dev.archdata.iommu stores the info | ||
1365 | */ | ||
1366 | struct dmar_domain * | ||
1367 | find_domain(struct pci_dev *pdev) | ||
1368 | { | ||
1369 | struct device_domain_info *info; | ||
1370 | |||
1371 | /* No lock here, assumes no domain exit in normal case */ | ||
1372 | info = pdev->dev.archdata.iommu; | ||
1373 | if (info) | ||
1374 | return info->domain; | ||
1375 | return NULL; | ||
1376 | } | ||
1377 | |||
1378 | static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, | ||
1379 | struct pci_dev *dev) | ||
1380 | { | ||
1381 | int index; | ||
1382 | |||
1383 | while (dev) { | ||
1384 | for (index = 0; index < cnt; index ++) | ||
1385 | if (dev == devices[index]) | ||
1386 | return 1; | ||
1387 | |||
1388 | /* Check our parent */ | ||
1389 | dev = dev->bus->self; | ||
1390 | } | ||
1391 | |||
1392 | return 0; | ||
1393 | } | ||
1394 | |||
1395 | static struct dmar_drhd_unit * | ||
1396 | dmar_find_matched_drhd_unit(struct pci_dev *dev) | ||
1397 | { | ||
1398 | struct dmar_drhd_unit *drhd = NULL; | ||
1399 | |||
1400 | list_for_each_entry(drhd, &dmar_drhd_units, list) { | ||
1401 | if (drhd->include_all || dmar_pci_device_match(drhd->devices, | ||
1402 | drhd->devices_cnt, dev)) | ||
1403 | return drhd; | ||
1404 | } | ||
1405 | |||
1406 | return NULL; | ||
1407 | } | ||
1408 | |||
1409 | /* domain is initialized */ | ||
1410 | static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) | ||
1411 | { | ||
1412 | struct dmar_domain *domain, *found = NULL; | ||
1413 | struct intel_iommu *iommu; | ||
1414 | struct dmar_drhd_unit *drhd; | ||
1415 | struct device_domain_info *info, *tmp; | ||
1416 | struct pci_dev *dev_tmp; | ||
1417 | unsigned long flags; | ||
1418 | int bus = 0, devfn = 0; | ||
1419 | |||
1420 | domain = find_domain(pdev); | ||
1421 | if (domain) | ||
1422 | return domain; | ||
1423 | |||
1424 | dev_tmp = pci_find_upstream_pcie_bridge(pdev); | ||
1425 | if (dev_tmp) { | ||
1426 | if (dev_tmp->is_pcie) { | ||
1427 | bus = dev_tmp->subordinate->number; | ||
1428 | devfn = 0; | ||
1429 | } else { | ||
1430 | bus = dev_tmp->bus->number; | ||
1431 | devfn = dev_tmp->devfn; | ||
1432 | } | ||
1433 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1434 | list_for_each_entry(info, &device_domain_list, global) { | ||
1435 | if (info->bus == bus && info->devfn == devfn) { | ||
1436 | found = info->domain; | ||
1437 | break; | ||
1438 | } | ||
1439 | } | ||
1440 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1441 | /* pcie-pci bridge already has a domain, uses it */ | ||
1442 | if (found) { | ||
1443 | domain = found; | ||
1444 | goto found_domain; | ||
1445 | } | ||
1446 | } | ||
1447 | |||
1448 | /* Allocate new domain for the device */ | ||
1449 | drhd = dmar_find_matched_drhd_unit(pdev); | ||
1450 | if (!drhd) { | ||
1451 | printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", | ||
1452 | pci_name(pdev)); | ||
1453 | return NULL; | ||
1454 | } | ||
1455 | iommu = drhd->iommu; | ||
1456 | |||
1457 | domain = iommu_alloc_domain(iommu); | ||
1458 | if (!domain) | ||
1459 | goto error; | ||
1460 | |||
1461 | if (domain_init(domain, gaw)) { | ||
1462 | domain_exit(domain); | ||
1463 | goto error; | ||
1464 | } | ||
1465 | |||
1466 | /* register pcie-to-pci device */ | ||
1467 | if (dev_tmp) { | ||
1468 | info = alloc_devinfo_mem(); | ||
1469 | if (!info) { | ||
1470 | domain_exit(domain); | ||
1471 | goto error; | ||
1472 | } | ||
1473 | info->bus = bus; | ||
1474 | info->devfn = devfn; | ||
1475 | info->dev = NULL; | ||
1476 | info->domain = domain; | ||
1477 | /* This domain is shared by devices under p2p bridge */ | ||
1478 | domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; | ||
1479 | |||
1480 | /* pcie-to-pci bridge already has a domain, uses it */ | ||
1481 | found = NULL; | ||
1482 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1483 | list_for_each_entry(tmp, &device_domain_list, global) { | ||
1484 | if (tmp->bus == bus && tmp->devfn == devfn) { | ||
1485 | found = tmp->domain; | ||
1486 | break; | ||
1487 | } | ||
1488 | } | ||
1489 | if (found) { | ||
1490 | free_devinfo_mem(info); | ||
1491 | domain_exit(domain); | ||
1492 | domain = found; | ||
1493 | } else { | ||
1494 | list_add(&info->link, &domain->devices); | ||
1495 | list_add(&info->global, &device_domain_list); | ||
1496 | } | ||
1497 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1498 | } | ||
1499 | |||
1500 | found_domain: | ||
1501 | info = alloc_devinfo_mem(); | ||
1502 | if (!info) | ||
1503 | goto error; | ||
1504 | info->bus = pdev->bus->number; | ||
1505 | info->devfn = pdev->devfn; | ||
1506 | info->dev = pdev; | ||
1507 | info->domain = domain; | ||
1508 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1509 | /* somebody is fast */ | ||
1510 | found = find_domain(pdev); | ||
1511 | if (found != NULL) { | ||
1512 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1513 | if (found != domain) { | ||
1514 | domain_exit(domain); | ||
1515 | domain = found; | ||
1516 | } | ||
1517 | free_devinfo_mem(info); | ||
1518 | return domain; | ||
1519 | } | ||
1520 | list_add(&info->link, &domain->devices); | ||
1521 | list_add(&info->global, &device_domain_list); | ||
1522 | pdev->dev.archdata.iommu = info; | ||
1523 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1524 | return domain; | ||
1525 | error: | ||
1526 | /* recheck it here, maybe others set it */ | ||
1527 | return find_domain(pdev); | ||
1528 | } | ||
1529 | |||
1530 | static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) | ||
1531 | { | ||
1532 | struct dmar_domain *domain; | ||
1533 | unsigned long size; | ||
1534 | u64 base; | ||
1535 | int ret; | ||
1536 | |||
1537 | printk(KERN_INFO | ||
1538 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", | ||
1539 | pci_name(pdev), start, end); | ||
1540 | /* page table init */ | ||
1541 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
1542 | if (!domain) | ||
1543 | return -ENOMEM; | ||
1544 | |||
1545 | /* The address might not be aligned */ | ||
1546 | base = start & PAGE_MASK_4K; | ||
1547 | size = end - base; | ||
1548 | size = PAGE_ALIGN_4K(size); | ||
1549 | if (!reserve_iova(&domain->iovad, IOVA_PFN(base), | ||
1550 | IOVA_PFN(base + size) - 1)) { | ||
1551 | printk(KERN_ERR "IOMMU: reserve iova failed\n"); | ||
1552 | ret = -ENOMEM; | ||
1553 | goto error; | ||
1554 | } | ||
1555 | |||
1556 | pr_debug("Mapping reserved region %lx@%llx for %s\n", | ||
1557 | size, base, pci_name(pdev)); | ||
1558 | /* | ||
1559 | * RMRR range might have overlap with physical memory range, | ||
1560 | * clear it first | ||
1561 | */ | ||
1562 | dma_pte_clear_range(domain, base, base + size); | ||
1563 | |||
1564 | ret = domain_page_mapping(domain, base, base, size, | ||
1565 | DMA_PTE_READ|DMA_PTE_WRITE); | ||
1566 | if (ret) | ||
1567 | goto error; | ||
1568 | |||
1569 | /* context entry init */ | ||
1570 | ret = domain_context_mapping(domain, pdev); | ||
1571 | if (!ret) | ||
1572 | return 0; | ||
1573 | error: | ||
1574 | domain_exit(domain); | ||
1575 | return ret; | ||
1576 | |||
1577 | } | ||
1578 | |||
1579 | static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | ||
1580 | struct pci_dev *pdev) | ||
1581 | { | ||
1582 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
1583 | return 0; | ||
1584 | return iommu_prepare_identity_map(pdev, rmrr->base_address, | ||
1585 | rmrr->end_address + 1); | ||
1586 | } | ||
1587 | |||
1588 | #ifdef CONFIG_DMAR_GFX_WA | ||
1589 | extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); | ||
1590 | static void __init iommu_prepare_gfx_mapping(void) | ||
1591 | { | ||
1592 | struct pci_dev *pdev = NULL; | ||
1593 | u64 base, size; | ||
1594 | int slot; | ||
1595 | int ret; | ||
1596 | |||
1597 | for_each_pci_dev(pdev) { | ||
1598 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || | ||
1599 | !IS_GFX_DEVICE(pdev)) | ||
1600 | continue; | ||
1601 | printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", | ||
1602 | pci_name(pdev)); | ||
1603 | slot = arch_get_ram_range(0, &base, &size); | ||
1604 | while (slot >= 0) { | ||
1605 | ret = iommu_prepare_identity_map(pdev, | ||
1606 | base, base + size); | ||
1607 | if (ret) | ||
1608 | goto error; | ||
1609 | slot = arch_get_ram_range(slot, &base, &size); | ||
1610 | } | ||
1611 | continue; | ||
1612 | error: | ||
1613 | printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); | ||
1614 | } | ||
1615 | } | ||
1616 | #endif | ||
1617 | |||
1618 | #ifdef CONFIG_DMAR_FLOPPY_WA | ||
1619 | static inline void iommu_prepare_isa(void) | ||
1620 | { | ||
1621 | struct pci_dev *pdev; | ||
1622 | int ret; | ||
1623 | |||
1624 | pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); | ||
1625 | if (!pdev) | ||
1626 | return; | ||
1627 | |||
1628 | printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); | ||
1629 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); | ||
1630 | |||
1631 | if (ret) | ||
1632 | printk("IOMMU: Failed to create 0-64M identity map, " | ||
1633 | "floppy might not work\n"); | ||
1634 | |||
1635 | } | ||
1636 | #else | ||
1637 | static inline void iommu_prepare_isa(void) | ||
1638 | { | ||
1639 | return; | ||
1640 | } | ||
1641 | #endif /* !CONFIG_DMAR_FLPY_WA */ | ||
1642 | |||
1643 | int __init init_dmars(void) | ||
1644 | { | ||
1645 | struct dmar_drhd_unit *drhd; | ||
1646 | struct dmar_rmrr_unit *rmrr; | ||
1647 | struct pci_dev *pdev; | ||
1648 | struct intel_iommu *iommu; | ||
1649 | int ret, unit = 0; | ||
1650 | |||
1651 | /* | ||
1652 | * for each drhd | ||
1653 | * allocate root | ||
1654 | * initialize and program root entry to not present | ||
1655 | * endfor | ||
1656 | */ | ||
1657 | for_each_drhd_unit(drhd) { | ||
1658 | if (drhd->ignored) | ||
1659 | continue; | ||
1660 | iommu = alloc_iommu(drhd); | ||
1661 | if (!iommu) { | ||
1662 | ret = -ENOMEM; | ||
1663 | goto error; | ||
1664 | } | ||
1665 | |||
1666 | /* | ||
1667 | * TBD: | ||
1668 | * we could share the same root & context tables | ||
1669 | * amoung all IOMMU's. Need to Split it later. | ||
1670 | */ | ||
1671 | ret = iommu_alloc_root_entry(iommu); | ||
1672 | if (ret) { | ||
1673 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); | ||
1674 | goto error; | ||
1675 | } | ||
1676 | } | ||
1677 | |||
1678 | /* | ||
1679 | * For each rmrr | ||
1680 | * for each dev attached to rmrr | ||
1681 | * do | ||
1682 | * locate drhd for dev, alloc domain for dev | ||
1683 | * allocate free domain | ||
1684 | * allocate page table entries for rmrr | ||
1685 | * if context not allocated for bus | ||
1686 | * allocate and init context | ||
1687 | * set present in root table for this bus | ||
1688 | * init context with domain, translation etc | ||
1689 | * endfor | ||
1690 | * endfor | ||
1691 | */ | ||
1692 | for_each_rmrr_units(rmrr) { | ||
1693 | int i; | ||
1694 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
1695 | pdev = rmrr->devices[i]; | ||
1696 | /* some BIOS lists non-exist devices in DMAR table */ | ||
1697 | if (!pdev) | ||
1698 | continue; | ||
1699 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
1700 | if (ret) | ||
1701 | printk(KERN_ERR | ||
1702 | "IOMMU: mapping reserved region failed\n"); | ||
1703 | } | ||
1704 | } | ||
1705 | |||
1706 | iommu_prepare_gfx_mapping(); | ||
1707 | |||
1708 | iommu_prepare_isa(); | ||
1709 | |||
1710 | /* | ||
1711 | * for each drhd | ||
1712 | * enable fault log | ||
1713 | * global invalidate context cache | ||
1714 | * global invalidate iotlb | ||
1715 | * enable translation | ||
1716 | */ | ||
1717 | for_each_drhd_unit(drhd) { | ||
1718 | if (drhd->ignored) | ||
1719 | continue; | ||
1720 | iommu = drhd->iommu; | ||
1721 | sprintf (iommu->name, "dmar%d", unit++); | ||
1722 | |||
1723 | iommu_flush_write_buffer(iommu); | ||
1724 | |||
1725 | ret = dmar_set_interrupt(iommu); | ||
1726 | if (ret) | ||
1727 | goto error; | ||
1728 | |||
1729 | iommu_set_root_entry(iommu); | ||
1730 | |||
1731 | iommu_flush_context_global(iommu, 0); | ||
1732 | iommu_flush_iotlb_global(iommu, 0); | ||
1733 | |||
1734 | ret = iommu_enable_translation(iommu); | ||
1735 | if (ret) | ||
1736 | goto error; | ||
1737 | } | ||
1738 | |||
1739 | return 0; | ||
1740 | error: | ||
1741 | for_each_drhd_unit(drhd) { | ||
1742 | if (drhd->ignored) | ||
1743 | continue; | ||
1744 | iommu = drhd->iommu; | ||
1745 | free_iommu(iommu); | ||
1746 | } | ||
1747 | return ret; | ||
1748 | } | ||
1749 | |||
1750 | static inline u64 aligned_size(u64 host_addr, size_t size) | ||
1751 | { | ||
1752 | u64 addr; | ||
1753 | addr = (host_addr & (~PAGE_MASK_4K)) + size; | ||
1754 | return PAGE_ALIGN_4K(addr); | ||
1755 | } | ||
1756 | |||
1757 | struct iova * | ||
1758 | iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) | ||
1759 | { | ||
1760 | struct iova *piova; | ||
1761 | |||
1762 | /* Make sure it's in range */ | ||
1763 | end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); | ||
1764 | if (!size || (IOVA_START_ADDR + size > end)) | ||
1765 | return NULL; | ||
1766 | |||
1767 | piova = alloc_iova(&domain->iovad, | ||
1768 | size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1); | ||
1769 | return piova; | ||
1770 | } | ||
1771 | |||
1772 | static struct iova * | ||
1773 | __intel_alloc_iova(struct device *dev, struct dmar_domain *domain, | ||
1774 | size_t size) | ||
1775 | { | ||
1776 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1777 | struct iova *iova = NULL; | ||
1778 | |||
1779 | if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) { | ||
1780 | iova = iommu_alloc_iova(domain, size, pdev->dma_mask); | ||
1781 | } else { | ||
1782 | /* | ||
1783 | * First try to allocate an io virtual address in | ||
1784 | * DMA_32BIT_MASK and if that fails then try allocating | ||
1785 | * from higer range | ||
1786 | */ | ||
1787 | iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK); | ||
1788 | if (!iova) | ||
1789 | iova = iommu_alloc_iova(domain, size, pdev->dma_mask); | ||
1790 | } | ||
1791 | |||
1792 | if (!iova) { | ||
1793 | printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); | ||
1794 | return NULL; | ||
1795 | } | ||
1796 | |||
1797 | return iova; | ||
1798 | } | ||
1799 | |||
1800 | static struct dmar_domain * | ||
1801 | get_valid_domain_for_dev(struct pci_dev *pdev) | ||
1802 | { | ||
1803 | struct dmar_domain *domain; | ||
1804 | int ret; | ||
1805 | |||
1806 | domain = get_domain_for_dev(pdev, | ||
1807 | DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
1808 | if (!domain) { | ||
1809 | printk(KERN_ERR | ||
1810 | "Allocating domain for %s failed", pci_name(pdev)); | ||
1811 | return 0; | ||
1812 | } | ||
1813 | |||
1814 | /* make sure context mapping is ok */ | ||
1815 | if (unlikely(!domain_context_mapped(domain, pdev))) { | ||
1816 | ret = domain_context_mapping(domain, pdev); | ||
1817 | if (ret) { | ||
1818 | printk(KERN_ERR | ||
1819 | "Domain context map for %s failed", | ||
1820 | pci_name(pdev)); | ||
1821 | return 0; | ||
1822 | } | ||
1823 | } | ||
1824 | |||
1825 | return domain; | ||
1826 | } | ||
1827 | |||
1828 | static dma_addr_t intel_map_single(struct device *hwdev, void *addr, | ||
1829 | size_t size, int dir) | ||
1830 | { | ||
1831 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
1832 | int ret; | ||
1833 | struct dmar_domain *domain; | ||
1834 | unsigned long start_addr; | ||
1835 | struct iova *iova; | ||
1836 | int prot = 0; | ||
1837 | |||
1838 | BUG_ON(dir == DMA_NONE); | ||
1839 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
1840 | return virt_to_bus(addr); | ||
1841 | |||
1842 | domain = get_valid_domain_for_dev(pdev); | ||
1843 | if (!domain) | ||
1844 | return 0; | ||
1845 | |||
1846 | addr = (void *)virt_to_phys(addr); | ||
1847 | size = aligned_size((u64)addr, size); | ||
1848 | |||
1849 | iova = __intel_alloc_iova(hwdev, domain, size); | ||
1850 | if (!iova) | ||
1851 | goto error; | ||
1852 | |||
1853 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
1854 | |||
1855 | /* | ||
1856 | * Check if DMAR supports zero-length reads on write only | ||
1857 | * mappings.. | ||
1858 | */ | ||
1859 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ | ||
1860 | !cap_zlr(domain->iommu->cap)) | ||
1861 | prot |= DMA_PTE_READ; | ||
1862 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) | ||
1863 | prot |= DMA_PTE_WRITE; | ||
1864 | /* | ||
1865 | * addr - (addr + size) might be partial page, we should map the whole | ||
1866 | * page. Note: if two part of one page are separately mapped, we | ||
1867 | * might have two guest_addr mapping to the same host addr, but this | ||
1868 | * is not a big problem | ||
1869 | */ | ||
1870 | ret = domain_page_mapping(domain, start_addr, | ||
1871 | ((u64)addr) & PAGE_MASK_4K, size, prot); | ||
1872 | if (ret) | ||
1873 | goto error; | ||
1874 | |||
1875 | pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", | ||
1876 | pci_name(pdev), size, (u64)addr, | ||
1877 | size, (u64)start_addr, dir); | ||
1878 | |||
1879 | /* it's a non-present to present mapping */ | ||
1880 | ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, | ||
1881 | start_addr, size >> PAGE_SHIFT_4K, 1); | ||
1882 | if (ret) | ||
1883 | iommu_flush_write_buffer(domain->iommu); | ||
1884 | |||
1885 | return (start_addr + ((u64)addr & (~PAGE_MASK_4K))); | ||
1886 | |||
1887 | error: | ||
1888 | if (iova) | ||
1889 | __free_iova(&domain->iovad, iova); | ||
1890 | printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", | ||
1891 | pci_name(pdev), size, (u64)addr, dir); | ||
1892 | return 0; | ||
1893 | } | ||
1894 | |||
1895 | static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | ||
1896 | size_t size, int dir) | ||
1897 | { | ||
1898 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1899 | struct dmar_domain *domain; | ||
1900 | unsigned long start_addr; | ||
1901 | struct iova *iova; | ||
1902 | |||
1903 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
1904 | return; | ||
1905 | domain = find_domain(pdev); | ||
1906 | BUG_ON(!domain); | ||
1907 | |||
1908 | iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); | ||
1909 | if (!iova) | ||
1910 | return; | ||
1911 | |||
1912 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
1913 | size = aligned_size((u64)dev_addr, size); | ||
1914 | |||
1915 | pr_debug("Device %s unmapping: %lx@%llx\n", | ||
1916 | pci_name(pdev), size, (u64)start_addr); | ||
1917 | |||
1918 | /* clear the whole page */ | ||
1919 | dma_pte_clear_range(domain, start_addr, start_addr + size); | ||
1920 | /* free page tables */ | ||
1921 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | ||
1922 | |||
1923 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, | ||
1924 | size >> PAGE_SHIFT_4K, 0)) | ||
1925 | iommu_flush_write_buffer(domain->iommu); | ||
1926 | |||
1927 | /* free iova */ | ||
1928 | __free_iova(&domain->iovad, iova); | ||
1929 | } | ||
1930 | |||
1931 | static void * intel_alloc_coherent(struct device *hwdev, size_t size, | ||
1932 | dma_addr_t *dma_handle, gfp_t flags) | ||
1933 | { | ||
1934 | void *vaddr; | ||
1935 | int order; | ||
1936 | |||
1937 | size = PAGE_ALIGN_4K(size); | ||
1938 | order = get_order(size); | ||
1939 | flags &= ~(GFP_DMA | GFP_DMA32); | ||
1940 | |||
1941 | vaddr = (void *)__get_free_pages(flags, order); | ||
1942 | if (!vaddr) | ||
1943 | return NULL; | ||
1944 | memset(vaddr, 0, size); | ||
1945 | |||
1946 | *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL); | ||
1947 | if (*dma_handle) | ||
1948 | return vaddr; | ||
1949 | free_pages((unsigned long)vaddr, order); | ||
1950 | return NULL; | ||
1951 | } | ||
1952 | |||
1953 | static void intel_free_coherent(struct device *hwdev, size_t size, | ||
1954 | void *vaddr, dma_addr_t dma_handle) | ||
1955 | { | ||
1956 | int order; | ||
1957 | |||
1958 | size = PAGE_ALIGN_4K(size); | ||
1959 | order = get_order(size); | ||
1960 | |||
1961 | intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); | ||
1962 | free_pages((unsigned long)vaddr, order); | ||
1963 | } | ||
1964 | |||
1965 | #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) | ||
1966 | static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, | ||
1967 | int nelems, int dir) | ||
1968 | { | ||
1969 | int i; | ||
1970 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
1971 | struct dmar_domain *domain; | ||
1972 | unsigned long start_addr; | ||
1973 | struct iova *iova; | ||
1974 | size_t size = 0; | ||
1975 | void *addr; | ||
1976 | struct scatterlist *sg; | ||
1977 | |||
1978 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
1979 | return; | ||
1980 | |||
1981 | domain = find_domain(pdev); | ||
1982 | |||
1983 | iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); | ||
1984 | if (!iova) | ||
1985 | return; | ||
1986 | for_each_sg(sglist, sg, nelems, i) { | ||
1987 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
1988 | size += aligned_size((u64)addr, sg->length); | ||
1989 | } | ||
1990 | |||
1991 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
1992 | |||
1993 | /* clear the whole page */ | ||
1994 | dma_pte_clear_range(domain, start_addr, start_addr + size); | ||
1995 | /* free page tables */ | ||
1996 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | ||
1997 | |||
1998 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, | ||
1999 | size >> PAGE_SHIFT_4K, 0)) | ||
2000 | iommu_flush_write_buffer(domain->iommu); | ||
2001 | |||
2002 | /* free iova */ | ||
2003 | __free_iova(&domain->iovad, iova); | ||
2004 | } | ||
2005 | |||
2006 | static int intel_nontranslate_map_sg(struct device *hddev, | ||
2007 | struct scatterlist *sglist, int nelems, int dir) | ||
2008 | { | ||
2009 | int i; | ||
2010 | struct scatterlist *sg; | ||
2011 | |||
2012 | for_each_sg(sglist, sg, nelems, i) { | ||
2013 | BUG_ON(!sg->page); | ||
2014 | sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)); | ||
2015 | sg->dma_length = sg->length; | ||
2016 | } | ||
2017 | return nelems; | ||
2018 | } | ||
2019 | |||
2020 | static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, | ||
2021 | int nelems, int dir) | ||
2022 | { | ||
2023 | void *addr; | ||
2024 | int i; | ||
2025 | struct pci_dev *pdev = to_pci_dev(hwdev); | ||
2026 | struct dmar_domain *domain; | ||
2027 | size_t size = 0; | ||
2028 | int prot = 0; | ||
2029 | size_t offset = 0; | ||
2030 | struct iova *iova = NULL; | ||
2031 | int ret; | ||
2032 | struct scatterlist *sg; | ||
2033 | unsigned long start_addr; | ||
2034 | |||
2035 | BUG_ON(dir == DMA_NONE); | ||
2036 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | ||
2037 | return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); | ||
2038 | |||
2039 | domain = get_valid_domain_for_dev(pdev); | ||
2040 | if (!domain) | ||
2041 | return 0; | ||
2042 | |||
2043 | for_each_sg(sglist, sg, nelems, i) { | ||
2044 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
2045 | addr = (void *)virt_to_phys(addr); | ||
2046 | size += aligned_size((u64)addr, sg->length); | ||
2047 | } | ||
2048 | |||
2049 | iova = __intel_alloc_iova(hwdev, domain, size); | ||
2050 | if (!iova) { | ||
2051 | sglist->dma_length = 0; | ||
2052 | return 0; | ||
2053 | } | ||
2054 | |||
2055 | /* | ||
2056 | * Check if DMAR supports zero-length reads on write only | ||
2057 | * mappings.. | ||
2058 | */ | ||
2059 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ | ||
2060 | !cap_zlr(domain->iommu->cap)) | ||
2061 | prot |= DMA_PTE_READ; | ||
2062 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) | ||
2063 | prot |= DMA_PTE_WRITE; | ||
2064 | |||
2065 | start_addr = iova->pfn_lo << PAGE_SHIFT_4K; | ||
2066 | offset = 0; | ||
2067 | for_each_sg(sglist, sg, nelems, i) { | ||
2068 | addr = SG_ENT_VIRT_ADDRESS(sg); | ||
2069 | addr = (void *)virt_to_phys(addr); | ||
2070 | size = aligned_size((u64)addr, sg->length); | ||
2071 | ret = domain_page_mapping(domain, start_addr + offset, | ||
2072 | ((u64)addr) & PAGE_MASK_4K, | ||
2073 | size, prot); | ||
2074 | if (ret) { | ||
2075 | /* clear the page */ | ||
2076 | dma_pte_clear_range(domain, start_addr, | ||
2077 | start_addr + offset); | ||
2078 | /* free page tables */ | ||
2079 | dma_pte_free_pagetable(domain, start_addr, | ||
2080 | start_addr + offset); | ||
2081 | /* free iova */ | ||
2082 | __free_iova(&domain->iovad, iova); | ||
2083 | return 0; | ||
2084 | } | ||
2085 | sg->dma_address = start_addr + offset + | ||
2086 | ((u64)addr & (~PAGE_MASK_4K)); | ||
2087 | sg->dma_length = sg->length; | ||
2088 | offset += size; | ||
2089 | } | ||
2090 | |||
2091 | /* it's a non-present to present mapping */ | ||
2092 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, | ||
2093 | start_addr, offset >> PAGE_SHIFT_4K, 1)) | ||
2094 | iommu_flush_write_buffer(domain->iommu); | ||
2095 | return nelems; | ||
2096 | } | ||
2097 | |||
2098 | static struct dma_mapping_ops intel_dma_ops = { | ||
2099 | .alloc_coherent = intel_alloc_coherent, | ||
2100 | .free_coherent = intel_free_coherent, | ||
2101 | .map_single = intel_map_single, | ||
2102 | .unmap_single = intel_unmap_single, | ||
2103 | .map_sg = intel_map_sg, | ||
2104 | .unmap_sg = intel_unmap_sg, | ||
2105 | }; | ||
2106 | |||
2107 | static inline int iommu_domain_cache_init(void) | ||
2108 | { | ||
2109 | int ret = 0; | ||
2110 | |||
2111 | iommu_domain_cache = kmem_cache_create("iommu_domain", | ||
2112 | sizeof(struct dmar_domain), | ||
2113 | 0, | ||
2114 | SLAB_HWCACHE_ALIGN, | ||
2115 | |||
2116 | NULL); | ||
2117 | if (!iommu_domain_cache) { | ||
2118 | printk(KERN_ERR "Couldn't create iommu_domain cache\n"); | ||
2119 | ret = -ENOMEM; | ||
2120 | } | ||
2121 | |||
2122 | return ret; | ||
2123 | } | ||
2124 | |||
2125 | static inline int iommu_devinfo_cache_init(void) | ||
2126 | { | ||
2127 | int ret = 0; | ||
2128 | |||
2129 | iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", | ||
2130 | sizeof(struct device_domain_info), | ||
2131 | 0, | ||
2132 | SLAB_HWCACHE_ALIGN, | ||
2133 | |||
2134 | NULL); | ||
2135 | if (!iommu_devinfo_cache) { | ||
2136 | printk(KERN_ERR "Couldn't create devinfo cache\n"); | ||
2137 | ret = -ENOMEM; | ||
2138 | } | ||
2139 | |||
2140 | return ret; | ||
2141 | } | ||
2142 | |||
2143 | static inline int iommu_iova_cache_init(void) | ||
2144 | { | ||
2145 | int ret = 0; | ||
2146 | |||
2147 | iommu_iova_cache = kmem_cache_create("iommu_iova", | ||
2148 | sizeof(struct iova), | ||
2149 | 0, | ||
2150 | SLAB_HWCACHE_ALIGN, | ||
2151 | |||
2152 | NULL); | ||
2153 | if (!iommu_iova_cache) { | ||
2154 | printk(KERN_ERR "Couldn't create iova cache\n"); | ||
2155 | ret = -ENOMEM; | ||
2156 | } | ||
2157 | |||
2158 | return ret; | ||
2159 | } | ||
2160 | |||
2161 | static int __init iommu_init_mempool(void) | ||
2162 | { | ||
2163 | int ret; | ||
2164 | ret = iommu_iova_cache_init(); | ||
2165 | if (ret) | ||
2166 | return ret; | ||
2167 | |||
2168 | ret = iommu_domain_cache_init(); | ||
2169 | if (ret) | ||
2170 | goto domain_error; | ||
2171 | |||
2172 | ret = iommu_devinfo_cache_init(); | ||
2173 | if (!ret) | ||
2174 | return ret; | ||
2175 | |||
2176 | kmem_cache_destroy(iommu_domain_cache); | ||
2177 | domain_error: | ||
2178 | kmem_cache_destroy(iommu_iova_cache); | ||
2179 | |||
2180 | return -ENOMEM; | ||
2181 | } | ||
2182 | |||
2183 | static void __init iommu_exit_mempool(void) | ||
2184 | { | ||
2185 | kmem_cache_destroy(iommu_devinfo_cache); | ||
2186 | kmem_cache_destroy(iommu_domain_cache); | ||
2187 | kmem_cache_destroy(iommu_iova_cache); | ||
2188 | |||
2189 | } | ||
2190 | |||
2191 | void __init detect_intel_iommu(void) | ||
2192 | { | ||
2193 | if (swiotlb || no_iommu || iommu_detected || dmar_disabled) | ||
2194 | return; | ||
2195 | if (early_dmar_detect()) { | ||
2196 | iommu_detected = 1; | ||
2197 | } | ||
2198 | } | ||
2199 | |||
2200 | static void __init init_no_remapping_devices(void) | ||
2201 | { | ||
2202 | struct dmar_drhd_unit *drhd; | ||
2203 | |||
2204 | for_each_drhd_unit(drhd) { | ||
2205 | if (!drhd->include_all) { | ||
2206 | int i; | ||
2207 | for (i = 0; i < drhd->devices_cnt; i++) | ||
2208 | if (drhd->devices[i] != NULL) | ||
2209 | break; | ||
2210 | /* ignore DMAR unit if no pci devices exist */ | ||
2211 | if (i == drhd->devices_cnt) | ||
2212 | drhd->ignored = 1; | ||
2213 | } | ||
2214 | } | ||
2215 | |||
2216 | if (dmar_map_gfx) | ||
2217 | return; | ||
2218 | |||
2219 | for_each_drhd_unit(drhd) { | ||
2220 | int i; | ||
2221 | if (drhd->ignored || drhd->include_all) | ||
2222 | continue; | ||
2223 | |||
2224 | for (i = 0; i < drhd->devices_cnt; i++) | ||
2225 | if (drhd->devices[i] && | ||
2226 | !IS_GFX_DEVICE(drhd->devices[i])) | ||
2227 | break; | ||
2228 | |||
2229 | if (i < drhd->devices_cnt) | ||
2230 | continue; | ||
2231 | |||
2232 | /* bypass IOMMU if it is just for gfx devices */ | ||
2233 | drhd->ignored = 1; | ||
2234 | for (i = 0; i < drhd->devices_cnt; i++) { | ||
2235 | if (!drhd->devices[i]) | ||
2236 | continue; | ||
2237 | drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; | ||
2238 | } | ||
2239 | } | ||
2240 | } | ||
2241 | |||
2242 | int __init intel_iommu_init(void) | ||
2243 | { | ||
2244 | int ret = 0; | ||
2245 | |||
2246 | if (no_iommu || swiotlb || dmar_disabled) | ||
2247 | return -ENODEV; | ||
2248 | |||
2249 | if (dmar_table_init()) | ||
2250 | return -ENODEV; | ||
2251 | |||
2252 | iommu_init_mempool(); | ||
2253 | dmar_init_reserved_ranges(); | ||
2254 | |||
2255 | init_no_remapping_devices(); | ||
2256 | |||
2257 | ret = init_dmars(); | ||
2258 | if (ret) { | ||
2259 | printk(KERN_ERR "IOMMU: dmar init failed\n"); | ||
2260 | put_iova_domain(&reserved_iova_list); | ||
2261 | iommu_exit_mempool(); | ||
2262 | return ret; | ||
2263 | } | ||
2264 | printk(KERN_INFO | ||
2265 | "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); | ||
2266 | |||
2267 | force_iommu = 1; | ||
2268 | dma_ops = &intel_dma_ops; | ||
2269 | return 0; | ||
2270 | } | ||
2271 | |||
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h new file mode 100644 index 00000000000..ee88dd2400c --- /dev/null +++ b/drivers/pci/intel-iommu.h | |||
@@ -0,0 +1,325 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
18 | * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
19 | */ | ||
20 | |||
21 | #ifndef _INTEL_IOMMU_H_ | ||
22 | #define _INTEL_IOMMU_H_ | ||
23 | |||
24 | #include <linux/types.h> | ||
25 | #include <linux/msi.h> | ||
26 | #include "iova.h" | ||
27 | #include <linux/io.h> | ||
28 | |||
29 | /* | ||
30 | * Intel IOMMU register specification per version 1.0 public spec. | ||
31 | */ | ||
32 | |||
33 | #define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ | ||
34 | #define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ | ||
35 | #define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ | ||
36 | #define DMAR_GCMD_REG 0x18 /* Global command register */ | ||
37 | #define DMAR_GSTS_REG 0x1c /* Global status register */ | ||
38 | #define DMAR_RTADDR_REG 0x20 /* Root entry table */ | ||
39 | #define DMAR_CCMD_REG 0x28 /* Context command reg */ | ||
40 | #define DMAR_FSTS_REG 0x34 /* Fault Status register */ | ||
41 | #define DMAR_FECTL_REG 0x38 /* Fault control register */ | ||
42 | #define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ | ||
43 | #define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ | ||
44 | #define DMAR_FEUADDR_REG 0x44 /* Upper address register */ | ||
45 | #define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ | ||
46 | #define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ | ||
47 | #define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ | ||
48 | #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ | ||
49 | #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ | ||
50 | #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ | ||
51 | |||
52 | #define OFFSET_STRIDE (9) | ||
53 | /* | ||
54 | #define dmar_readl(dmar, reg) readl(dmar + reg) | ||
55 | #define dmar_readq(dmar, reg) ({ \ | ||
56 | u32 lo, hi; \ | ||
57 | lo = readl(dmar + reg); \ | ||
58 | hi = readl(dmar + reg + 4); \ | ||
59 | (((u64) hi) << 32) + lo; }) | ||
60 | */ | ||
61 | static inline u64 dmar_readq(void *addr) | ||
62 | { | ||
63 | u32 lo, hi; | ||
64 | lo = readl(addr); | ||
65 | hi = readl(addr + 4); | ||
66 | return (((u64) hi) << 32) + lo; | ||
67 | } | ||
68 | |||
69 | static inline void dmar_writeq(void __iomem *addr, u64 val) | ||
70 | { | ||
71 | writel((u32)val, addr); | ||
72 | writel((u32)(val >> 32), addr + 4); | ||
73 | } | ||
74 | |||
75 | #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) | ||
76 | #define DMAR_VER_MINOR(v) ((v) & 0x0f) | ||
77 | |||
78 | /* | ||
79 | * Decoding Capability Register | ||
80 | */ | ||
81 | #define cap_read_drain(c) (((c) >> 55) & 1) | ||
82 | #define cap_write_drain(c) (((c) >> 54) & 1) | ||
83 | #define cap_max_amask_val(c) (((c) >> 48) & 0x3f) | ||
84 | #define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) | ||
85 | #define cap_pgsel_inv(c) (((c) >> 39) & 1) | ||
86 | |||
87 | #define cap_super_page_val(c) (((c) >> 34) & 0xf) | ||
88 | #define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ | ||
89 | * OFFSET_STRIDE) + 21) | ||
90 | |||
91 | #define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) | ||
92 | #define cap_max_fault_reg_offset(c) \ | ||
93 | (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) | ||
94 | |||
95 | #define cap_zlr(c) (((c) >> 22) & 1) | ||
96 | #define cap_isoch(c) (((c) >> 23) & 1) | ||
97 | #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) | ||
98 | #define cap_sagaw(c) (((c) >> 8) & 0x1f) | ||
99 | #define cap_caching_mode(c) (((c) >> 7) & 1) | ||
100 | #define cap_phmr(c) (((c) >> 6) & 1) | ||
101 | #define cap_plmr(c) (((c) >> 5) & 1) | ||
102 | #define cap_rwbf(c) (((c) >> 4) & 1) | ||
103 | #define cap_afl(c) (((c) >> 3) & 1) | ||
104 | #define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) | ||
105 | /* | ||
106 | * Extended Capability Register | ||
107 | */ | ||
108 | |||
109 | #define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) | ||
110 | #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) | ||
111 | #define ecap_max_iotlb_offset(e) \ | ||
112 | (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) | ||
113 | #define ecap_coherent(e) ((e) & 0x1) | ||
114 | |||
115 | |||
116 | /* IOTLB_REG */ | ||
117 | #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) | ||
118 | #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) | ||
119 | #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) | ||
120 | #define DMA_TLB_IIRG(type) ((type >> 60) & 7) | ||
121 | #define DMA_TLB_IAIG(val) (((val) >> 57) & 7) | ||
122 | #define DMA_TLB_READ_DRAIN (((u64)1) << 49) | ||
123 | #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) | ||
124 | #define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) | ||
125 | #define DMA_TLB_IVT (((u64)1) << 63) | ||
126 | #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) | ||
127 | #define DMA_TLB_MAX_SIZE (0x3f) | ||
128 | |||
129 | /* GCMD_REG */ | ||
130 | #define DMA_GCMD_TE (((u32)1) << 31) | ||
131 | #define DMA_GCMD_SRTP (((u32)1) << 30) | ||
132 | #define DMA_GCMD_SFL (((u32)1) << 29) | ||
133 | #define DMA_GCMD_EAFL (((u32)1) << 28) | ||
134 | #define DMA_GCMD_WBF (((u32)1) << 27) | ||
135 | |||
136 | /* GSTS_REG */ | ||
137 | #define DMA_GSTS_TES (((u32)1) << 31) | ||
138 | #define DMA_GSTS_RTPS (((u32)1) << 30) | ||
139 | #define DMA_GSTS_FLS (((u32)1) << 29) | ||
140 | #define DMA_GSTS_AFLS (((u32)1) << 28) | ||
141 | #define DMA_GSTS_WBFS (((u32)1) << 27) | ||
142 | |||
143 | /* CCMD_REG */ | ||
144 | #define DMA_CCMD_ICC (((u64)1) << 63) | ||
145 | #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) | ||
146 | #define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) | ||
147 | #define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) | ||
148 | #define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) | ||
149 | #define DMA_CCMD_MASK_NOBIT 0 | ||
150 | #define DMA_CCMD_MASK_1BIT 1 | ||
151 | #define DMA_CCMD_MASK_2BIT 2 | ||
152 | #define DMA_CCMD_MASK_3BIT 3 | ||
153 | #define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) | ||
154 | #define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) | ||
155 | |||
156 | /* FECTL_REG */ | ||
157 | #define DMA_FECTL_IM (((u32)1) << 31) | ||
158 | |||
159 | /* FSTS_REG */ | ||
160 | #define DMA_FSTS_PPF ((u32)2) | ||
161 | #define DMA_FSTS_PFO ((u32)1) | ||
162 | #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) | ||
163 | |||
164 | /* FRCD_REG, 32 bits access */ | ||
165 | #define DMA_FRCD_F (((u32)1) << 31) | ||
166 | #define dma_frcd_type(d) ((d >> 30) & 1) | ||
167 | #define dma_frcd_fault_reason(c) (c & 0xff) | ||
168 | #define dma_frcd_source_id(c) (c & 0xffff) | ||
169 | #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ | ||
170 | |||
171 | /* | ||
172 | * 0: Present | ||
173 | * 1-11: Reserved | ||
174 | * 12-63: Context Ptr (12 - (haw-1)) | ||
175 | * 64-127: Reserved | ||
176 | */ | ||
177 | struct root_entry { | ||
178 | u64 val; | ||
179 | u64 rsvd1; | ||
180 | }; | ||
181 | #define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) | ||
182 | static inline bool root_present(struct root_entry *root) | ||
183 | { | ||
184 | return (root->val & 1); | ||
185 | } | ||
186 | static inline void set_root_present(struct root_entry *root) | ||
187 | { | ||
188 | root->val |= 1; | ||
189 | } | ||
190 | static inline void set_root_value(struct root_entry *root, unsigned long value) | ||
191 | { | ||
192 | root->val |= value & PAGE_MASK_4K; | ||
193 | } | ||
194 | |||
195 | struct context_entry; | ||
196 | static inline struct context_entry * | ||
197 | get_context_addr_from_root(struct root_entry *root) | ||
198 | { | ||
199 | return (struct context_entry *) | ||
200 | (root_present(root)?phys_to_virt( | ||
201 | root->val & PAGE_MASK_4K): | ||
202 | NULL); | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * low 64 bits: | ||
207 | * 0: present | ||
208 | * 1: fault processing disable | ||
209 | * 2-3: translation type | ||
210 | * 12-63: address space root | ||
211 | * high 64 bits: | ||
212 | * 0-2: address width | ||
213 | * 3-6: aval | ||
214 | * 8-23: domain id | ||
215 | */ | ||
216 | struct context_entry { | ||
217 | u64 lo; | ||
218 | u64 hi; | ||
219 | }; | ||
220 | #define context_present(c) ((c).lo & 1) | ||
221 | #define context_fault_disable(c) (((c).lo >> 1) & 1) | ||
222 | #define context_translation_type(c) (((c).lo >> 2) & 3) | ||
223 | #define context_address_root(c) ((c).lo & PAGE_MASK_4K) | ||
224 | #define context_address_width(c) ((c).hi & 7) | ||
225 | #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) | ||
226 | |||
227 | #define context_set_present(c) do {(c).lo |= 1;} while (0) | ||
228 | #define context_set_fault_enable(c) \ | ||
229 | do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) | ||
230 | #define context_set_translation_type(c, val) \ | ||
231 | do { \ | ||
232 | (c).lo &= (((u64)-1) << 4) | 3; \ | ||
233 | (c).lo |= ((val) & 3) << 2; \ | ||
234 | } while (0) | ||
235 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
236 | #define context_set_address_root(c, val) \ | ||
237 | do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) | ||
238 | #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) | ||
239 | #define context_set_domain_id(c, val) \ | ||
240 | do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) | ||
241 | #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) | ||
242 | |||
243 | /* | ||
244 | * 0: readable | ||
245 | * 1: writable | ||
246 | * 2-6: reserved | ||
247 | * 7: super page | ||
248 | * 8-11: available | ||
249 | * 12-63: Host physcial address | ||
250 | */ | ||
251 | struct dma_pte { | ||
252 | u64 val; | ||
253 | }; | ||
254 | #define dma_clear_pte(p) do {(p).val = 0;} while (0) | ||
255 | |||
256 | #define DMA_PTE_READ (1) | ||
257 | #define DMA_PTE_WRITE (2) | ||
258 | |||
259 | #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) | ||
260 | #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) | ||
261 | #define dma_set_pte_prot(p, prot) \ | ||
262 | do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) | ||
263 | #define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) | ||
264 | #define dma_set_pte_addr(p, addr) do {\ | ||
265 | (p).val |= ((addr) & PAGE_MASK_4K); } while (0) | ||
266 | #define dma_pte_present(p) (((p).val & 3) != 0) | ||
267 | |||
268 | struct intel_iommu; | ||
269 | |||
270 | struct dmar_domain { | ||
271 | int id; /* domain id */ | ||
272 | struct intel_iommu *iommu; /* back pointer to owning iommu */ | ||
273 | |||
274 | struct list_head devices; /* all devices' list */ | ||
275 | struct iova_domain iovad; /* iova's that belong to this domain */ | ||
276 | |||
277 | struct dma_pte *pgd; /* virtual address */ | ||
278 | spinlock_t mapping_lock; /* page table lock */ | ||
279 | int gaw; /* max guest address width */ | ||
280 | |||
281 | /* adjusted guest address width, 0 is level 2 30-bit */ | ||
282 | int agaw; | ||
283 | |||
284 | #define DOMAIN_FLAG_MULTIPLE_DEVICES 1 | ||
285 | int flags; | ||
286 | }; | ||
287 | |||
288 | /* PCI domain-device relationship */ | ||
289 | struct device_domain_info { | ||
290 | struct list_head link; /* link to domain siblings */ | ||
291 | struct list_head global; /* link to global list */ | ||
292 | u8 bus; /* PCI bus numer */ | ||
293 | u8 devfn; /* PCI devfn number */ | ||
294 | struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ | ||
295 | struct dmar_domain *domain; /* pointer to domain */ | ||
296 | }; | ||
297 | |||
298 | extern int init_dmars(void); | ||
299 | |||
300 | struct intel_iommu { | ||
301 | void __iomem *reg; /* Pointer to hardware regs, virtual addr */ | ||
302 | u64 cap; | ||
303 | u64 ecap; | ||
304 | unsigned long *domain_ids; /* bitmap of domains */ | ||
305 | struct dmar_domain **domains; /* ptr to domains */ | ||
306 | int seg; | ||
307 | u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ | ||
308 | spinlock_t lock; /* protect context, domain ids */ | ||
309 | spinlock_t register_lock; /* protect register handling */ | ||
310 | struct root_entry *root_entry; /* virtual address */ | ||
311 | |||
312 | unsigned int irq; | ||
313 | unsigned char name[7]; /* Device Name */ | ||
314 | struct msi_msg saved_msg; | ||
315 | struct sys_device sysdev; | ||
316 | }; | ||
317 | |||
318 | #ifndef CONFIG_DMAR_GFX_WA | ||
319 | static inline void iommu_prepare_gfx_mapping(void) | ||
320 | { | ||
321 | return; | ||
322 | } | ||
323 | #endif /* !CONFIG_DMAR_GFX_WA */ | ||
324 | |||
325 | #endif | ||
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c new file mode 100644 index 00000000000..a84571c2936 --- /dev/null +++ b/drivers/pci/iova.c | |||
@@ -0,0 +1,394 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This file is released under the GPLv2. | ||
5 | * | ||
6 | * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
7 | */ | ||
8 | |||
9 | #include "iova.h" | ||
10 | |||
11 | void | ||
12 | init_iova_domain(struct iova_domain *iovad) | ||
13 | { | ||
14 | spin_lock_init(&iovad->iova_alloc_lock); | ||
15 | spin_lock_init(&iovad->iova_rbtree_lock); | ||
16 | iovad->rbroot = RB_ROOT; | ||
17 | iovad->cached32_node = NULL; | ||
18 | |||
19 | } | ||
20 | |||
21 | static struct rb_node * | ||
22 | __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) | ||
23 | { | ||
24 | if ((*limit_pfn != DMA_32BIT_PFN) || | ||
25 | (iovad->cached32_node == NULL)) | ||
26 | return rb_last(&iovad->rbroot); | ||
27 | else { | ||
28 | struct rb_node *prev_node = rb_prev(iovad->cached32_node); | ||
29 | struct iova *curr_iova = | ||
30 | container_of(iovad->cached32_node, struct iova, node); | ||
31 | *limit_pfn = curr_iova->pfn_lo - 1; | ||
32 | return prev_node; | ||
33 | } | ||
34 | } | ||
35 | |||
36 | static void | ||
37 | __cached_rbnode_insert_update(struct iova_domain *iovad, | ||
38 | unsigned long limit_pfn, struct iova *new) | ||
39 | { | ||
40 | if (limit_pfn != DMA_32BIT_PFN) | ||
41 | return; | ||
42 | iovad->cached32_node = &new->node; | ||
43 | } | ||
44 | |||
45 | static void | ||
46 | __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) | ||
47 | { | ||
48 | struct iova *cached_iova; | ||
49 | struct rb_node *curr; | ||
50 | |||
51 | if (!iovad->cached32_node) | ||
52 | return; | ||
53 | curr = iovad->cached32_node; | ||
54 | cached_iova = container_of(curr, struct iova, node); | ||
55 | |||
56 | if (free->pfn_lo >= cached_iova->pfn_lo) | ||
57 | iovad->cached32_node = rb_next(&free->node); | ||
58 | } | ||
59 | |||
60 | /* Computes the padding size required, to make the | ||
61 | * the start address naturally aligned on its size | ||
62 | */ | ||
63 | static int | ||
64 | iova_get_pad_size(int size, unsigned int limit_pfn) | ||
65 | { | ||
66 | unsigned int pad_size = 0; | ||
67 | unsigned int order = ilog2(size); | ||
68 | |||
69 | if (order) | ||
70 | pad_size = (limit_pfn + 1) % (1 << order); | ||
71 | |||
72 | return pad_size; | ||
73 | } | ||
74 | |||
75 | static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, | ||
76 | unsigned long limit_pfn, struct iova *new, bool size_aligned) | ||
77 | { | ||
78 | struct rb_node *curr = NULL; | ||
79 | unsigned long flags; | ||
80 | unsigned long saved_pfn; | ||
81 | unsigned int pad_size = 0; | ||
82 | |||
83 | /* Walk the tree backwards */ | ||
84 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
85 | saved_pfn = limit_pfn; | ||
86 | curr = __get_cached_rbnode(iovad, &limit_pfn); | ||
87 | while (curr) { | ||
88 | struct iova *curr_iova = container_of(curr, struct iova, node); | ||
89 | if (limit_pfn < curr_iova->pfn_lo) | ||
90 | goto move_left; | ||
91 | else if (limit_pfn < curr_iova->pfn_hi) | ||
92 | goto adjust_limit_pfn; | ||
93 | else { | ||
94 | if (size_aligned) | ||
95 | pad_size = iova_get_pad_size(size, limit_pfn); | ||
96 | if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) | ||
97 | break; /* found a free slot */ | ||
98 | } | ||
99 | adjust_limit_pfn: | ||
100 | limit_pfn = curr_iova->pfn_lo - 1; | ||
101 | move_left: | ||
102 | curr = rb_prev(curr); | ||
103 | } | ||
104 | |||
105 | if (!curr) { | ||
106 | if (size_aligned) | ||
107 | pad_size = iova_get_pad_size(size, limit_pfn); | ||
108 | if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { | ||
109 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
110 | return -ENOMEM; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | /* pfn_lo will point to size aligned address if size_aligned is set */ | ||
115 | new->pfn_lo = limit_pfn - (size + pad_size) + 1; | ||
116 | new->pfn_hi = new->pfn_lo + size - 1; | ||
117 | |||
118 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static void | ||
123 | iova_insert_rbtree(struct rb_root *root, struct iova *iova) | ||
124 | { | ||
125 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
126 | /* Figure out where to put new node */ | ||
127 | while (*new) { | ||
128 | struct iova *this = container_of(*new, struct iova, node); | ||
129 | parent = *new; | ||
130 | |||
131 | if (iova->pfn_lo < this->pfn_lo) | ||
132 | new = &((*new)->rb_left); | ||
133 | else if (iova->pfn_lo > this->pfn_lo) | ||
134 | new = &((*new)->rb_right); | ||
135 | else | ||
136 | BUG(); /* this should not happen */ | ||
137 | } | ||
138 | /* Add new node and rebalance tree. */ | ||
139 | rb_link_node(&iova->node, parent, new); | ||
140 | rb_insert_color(&iova->node, root); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * alloc_iova - allocates an iova | ||
145 | * @iovad - iova domain in question | ||
146 | * @size - size of page frames to allocate | ||
147 | * @limit_pfn - max limit address | ||
148 | * @size_aligned - set if size_aligned address range is required | ||
149 | * This function allocates an iova in the range limit_pfn to IOVA_START_PFN | ||
150 | * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned | ||
151 | * flag is set then the allocated address iova->pfn_lo will be naturally | ||
152 | * aligned on roundup_power_of_two(size). | ||
153 | */ | ||
154 | struct iova * | ||
155 | alloc_iova(struct iova_domain *iovad, unsigned long size, | ||
156 | unsigned long limit_pfn, | ||
157 | bool size_aligned) | ||
158 | { | ||
159 | unsigned long flags; | ||
160 | struct iova *new_iova; | ||
161 | int ret; | ||
162 | |||
163 | new_iova = alloc_iova_mem(); | ||
164 | if (!new_iova) | ||
165 | return NULL; | ||
166 | |||
167 | /* If size aligned is set then round the size to | ||
168 | * to next power of two. | ||
169 | */ | ||
170 | if (size_aligned) | ||
171 | size = __roundup_pow_of_two(size); | ||
172 | |||
173 | spin_lock_irqsave(&iovad->iova_alloc_lock, flags); | ||
174 | ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova, | ||
175 | size_aligned); | ||
176 | |||
177 | if (ret) { | ||
178 | spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); | ||
179 | free_iova_mem(new_iova); | ||
180 | return NULL; | ||
181 | } | ||
182 | |||
183 | /* Insert the new_iova into domain rbtree by holding writer lock */ | ||
184 | spin_lock(&iovad->iova_rbtree_lock); | ||
185 | iova_insert_rbtree(&iovad->rbroot, new_iova); | ||
186 | __cached_rbnode_insert_update(iovad, limit_pfn, new_iova); | ||
187 | spin_unlock(&iovad->iova_rbtree_lock); | ||
188 | |||
189 | spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); | ||
190 | |||
191 | return new_iova; | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * find_iova - find's an iova for a given pfn | ||
196 | * @iovad - iova domain in question. | ||
197 | * pfn - page frame number | ||
198 | * This function finds and returns an iova belonging to the | ||
199 | * given doamin which matches the given pfn. | ||
200 | */ | ||
201 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) | ||
202 | { | ||
203 | unsigned long flags; | ||
204 | struct rb_node *node; | ||
205 | |||
206 | /* Take the lock so that no other thread is manipulating the rbtree */ | ||
207 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
208 | node = iovad->rbroot.rb_node; | ||
209 | while (node) { | ||
210 | struct iova *iova = container_of(node, struct iova, node); | ||
211 | |||
212 | /* If pfn falls within iova's range, return iova */ | ||
213 | if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { | ||
214 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
215 | /* We are not holding the lock while this iova | ||
216 | * is referenced by the caller as the same thread | ||
217 | * which called this function also calls __free_iova() | ||
218 | * and it is by desing that only one thread can possibly | ||
219 | * reference a particular iova and hence no conflict. | ||
220 | */ | ||
221 | return iova; | ||
222 | } | ||
223 | |||
224 | if (pfn < iova->pfn_lo) | ||
225 | node = node->rb_left; | ||
226 | else if (pfn > iova->pfn_lo) | ||
227 | node = node->rb_right; | ||
228 | } | ||
229 | |||
230 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
231 | return NULL; | ||
232 | } | ||
233 | |||
234 | /** | ||
235 | * __free_iova - frees the given iova | ||
236 | * @iovad: iova domain in question. | ||
237 | * @iova: iova in question. | ||
238 | * Frees the given iova belonging to the giving domain | ||
239 | */ | ||
240 | void | ||
241 | __free_iova(struct iova_domain *iovad, struct iova *iova) | ||
242 | { | ||
243 | unsigned long flags; | ||
244 | |||
245 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
246 | __cached_rbnode_delete_update(iovad, iova); | ||
247 | rb_erase(&iova->node, &iovad->rbroot); | ||
248 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
249 | free_iova_mem(iova); | ||
250 | } | ||
251 | |||
252 | /** | ||
253 | * free_iova - finds and frees the iova for a given pfn | ||
254 | * @iovad: - iova domain in question. | ||
255 | * @pfn: - pfn that is allocated previously | ||
256 | * This functions finds an iova for a given pfn and then | ||
257 | * frees the iova from that domain. | ||
258 | */ | ||
259 | void | ||
260 | free_iova(struct iova_domain *iovad, unsigned long pfn) | ||
261 | { | ||
262 | struct iova *iova = find_iova(iovad, pfn); | ||
263 | if (iova) | ||
264 | __free_iova(iovad, iova); | ||
265 | |||
266 | } | ||
267 | |||
268 | /** | ||
269 | * put_iova_domain - destroys the iova doamin | ||
270 | * @iovad: - iova domain in question. | ||
271 | * All the iova's in that domain are destroyed. | ||
272 | */ | ||
273 | void put_iova_domain(struct iova_domain *iovad) | ||
274 | { | ||
275 | struct rb_node *node; | ||
276 | unsigned long flags; | ||
277 | |||
278 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | ||
279 | node = rb_first(&iovad->rbroot); | ||
280 | while (node) { | ||
281 | struct iova *iova = container_of(node, struct iova, node); | ||
282 | rb_erase(node, &iovad->rbroot); | ||
283 | free_iova_mem(iova); | ||
284 | node = rb_first(&iovad->rbroot); | ||
285 | } | ||
286 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | ||
287 | } | ||
288 | |||
289 | static int | ||
290 | __is_range_overlap(struct rb_node *node, | ||
291 | unsigned long pfn_lo, unsigned long pfn_hi) | ||
292 | { | ||
293 | struct iova *iova = container_of(node, struct iova, node); | ||
294 | |||
295 | if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) | ||
296 | return 1; | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static struct iova * | ||
301 | __insert_new_range(struct iova_domain *iovad, | ||
302 | unsigned long pfn_lo, unsigned long pfn_hi) | ||
303 | { | ||
304 | struct iova *iova; | ||
305 | |||
306 | iova = alloc_iova_mem(); | ||
307 | if (!iova) | ||
308 | return iova; | ||
309 | |||
310 | iova->pfn_hi = pfn_hi; | ||
311 | iova->pfn_lo = pfn_lo; | ||
312 | iova_insert_rbtree(&iovad->rbroot, iova); | ||
313 | return iova; | ||
314 | } | ||
315 | |||
316 | static void | ||
317 | __adjust_overlap_range(struct iova *iova, | ||
318 | unsigned long *pfn_lo, unsigned long *pfn_hi) | ||
319 | { | ||
320 | if (*pfn_lo < iova->pfn_lo) | ||
321 | iova->pfn_lo = *pfn_lo; | ||
322 | if (*pfn_hi > iova->pfn_hi) | ||
323 | *pfn_lo = iova->pfn_hi + 1; | ||
324 | } | ||
325 | |||
326 | /** | ||
327 | * reserve_iova - reserves an iova in the given range | ||
328 | * @iovad: - iova domain pointer | ||
329 | * @pfn_lo: - lower page frame address | ||
330 | * @pfn_hi:- higher pfn adderss | ||
331 | * This function allocates reserves the address range from pfn_lo to pfn_hi so | ||
332 | * that this address is not dished out as part of alloc_iova. | ||
333 | */ | ||
334 | struct iova * | ||
335 | reserve_iova(struct iova_domain *iovad, | ||
336 | unsigned long pfn_lo, unsigned long pfn_hi) | ||
337 | { | ||
338 | struct rb_node *node; | ||
339 | unsigned long flags; | ||
340 | struct iova *iova; | ||
341 | unsigned int overlap = 0; | ||
342 | |||
343 | spin_lock_irqsave(&iovad->iova_alloc_lock, flags); | ||
344 | spin_lock(&iovad->iova_rbtree_lock); | ||
345 | for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { | ||
346 | if (__is_range_overlap(node, pfn_lo, pfn_hi)) { | ||
347 | iova = container_of(node, struct iova, node); | ||
348 | __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); | ||
349 | if ((pfn_lo >= iova->pfn_lo) && | ||
350 | (pfn_hi <= iova->pfn_hi)) | ||
351 | goto finish; | ||
352 | overlap = 1; | ||
353 | |||
354 | } else if (overlap) | ||
355 | break; | ||
356 | } | ||
357 | |||
358 | /* We are here either becasue this is the first reserver node | ||
359 | * or need to insert remaining non overlap addr range | ||
360 | */ | ||
361 | iova = __insert_new_range(iovad, pfn_lo, pfn_hi); | ||
362 | finish: | ||
363 | |||
364 | spin_unlock(&iovad->iova_rbtree_lock); | ||
365 | spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); | ||
366 | return iova; | ||
367 | } | ||
368 | |||
369 | /** | ||
370 | * copy_reserved_iova - copies the reserved between domains | ||
371 | * @from: - source doamin from where to copy | ||
372 | * @to: - destination domin where to copy | ||
373 | * This function copies reserved iova's from one doamin to | ||
374 | * other. | ||
375 | */ | ||
376 | void | ||
377 | copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) | ||
378 | { | ||
379 | unsigned long flags; | ||
380 | struct rb_node *node; | ||
381 | |||
382 | spin_lock_irqsave(&from->iova_alloc_lock, flags); | ||
383 | spin_lock(&from->iova_rbtree_lock); | ||
384 | for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { | ||
385 | struct iova *iova = container_of(node, struct iova, node); | ||
386 | struct iova *new_iova; | ||
387 | new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); | ||
388 | if (!new_iova) | ||
389 | printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", | ||
390 | iova->pfn_lo, iova->pfn_lo); | ||
391 | } | ||
392 | spin_unlock(&from->iova_rbtree_lock); | ||
393 | spin_unlock_irqrestore(&from->iova_alloc_lock, flags); | ||
394 | } | ||
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h new file mode 100644 index 00000000000..ae3028d5a94 --- /dev/null +++ b/drivers/pci/iova.h | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This file is released under the GPLv2. | ||
5 | * | ||
6 | * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef _IOVA_H_ | ||
11 | #define _IOVA_H_ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/dma-mapping.h> | ||
17 | |||
18 | /* | ||
19 | * We need a fixed PAGE_SIZE of 4K irrespective of | ||
20 | * arch PAGE_SIZE for IOMMU page tables. | ||
21 | */ | ||
22 | #define PAGE_SHIFT_4K (12) | ||
23 | #define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) | ||
24 | #define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) | ||
25 | #define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) | ||
26 | |||
27 | /* IO virtual address start page frame number */ | ||
28 | #define IOVA_START_PFN (1) | ||
29 | |||
30 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) | ||
31 | #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) | ||
32 | #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) | ||
33 | |||
34 | /* iova structure */ | ||
35 | struct iova { | ||
36 | struct rb_node node; | ||
37 | unsigned long pfn_hi; /* IOMMU dish out addr hi */ | ||
38 | unsigned long pfn_lo; /* IOMMU dish out addr lo */ | ||
39 | }; | ||
40 | |||
41 | /* holds all the iova translations for a domain */ | ||
42 | struct iova_domain { | ||
43 | spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ | ||
44 | spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ | ||
45 | struct rb_root rbroot; /* iova domain rbtree root */ | ||
46 | struct rb_node *cached32_node; /* Save last alloced node */ | ||
47 | }; | ||
48 | |||
49 | struct iova *alloc_iova_mem(void); | ||
50 | void free_iova_mem(struct iova *iova); | ||
51 | void free_iova(struct iova_domain *iovad, unsigned long pfn); | ||
52 | void __free_iova(struct iova_domain *iovad, struct iova *iova); | ||
53 | struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, | ||
54 | unsigned long limit_pfn, | ||
55 | bool size_aligned); | ||
56 | struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, | ||
57 | unsigned long pfn_hi); | ||
58 | void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); | ||
59 | void init_iova_domain(struct iova_domain *iovad); | ||
60 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); | ||
61 | void put_iova_domain(struct iova_domain *iovad); | ||
62 | |||
63 | #endif | ||
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6fda33de84e..fc87e14b50d 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h | |||
@@ -90,3 +90,4 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) | |||
90 | return NULL; | 90 | return NULL; |
91 | } | 91 | } |
92 | 92 | ||
93 | struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev); | ||
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5db6b6690b5..463a5a9d583 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c | |||
@@ -837,6 +837,19 @@ static void pci_release_dev(struct device *dev) | |||
837 | kfree(pci_dev); | 837 | kfree(pci_dev); |
838 | } | 838 | } |
839 | 839 | ||
840 | static void set_pcie_port_type(struct pci_dev *pdev) | ||
841 | { | ||
842 | int pos; | ||
843 | u16 reg16; | ||
844 | |||
845 | pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); | ||
846 | if (!pos) | ||
847 | return; | ||
848 | pdev->is_pcie = 1; | ||
849 | pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); | ||
850 | pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; | ||
851 | } | ||
852 | |||
840 | /** | 853 | /** |
841 | * pci_cfg_space_size - get the configuration space size of the PCI device. | 854 | * pci_cfg_space_size - get the configuration space size of the PCI device. |
842 | * @dev: PCI device | 855 | * @dev: PCI device |
@@ -951,6 +964,7 @@ pci_scan_device(struct pci_bus *bus, int devfn) | |||
951 | dev->device = (l >> 16) & 0xffff; | 964 | dev->device = (l >> 16) & 0xffff; |
952 | dev->cfg_size = pci_cfg_space_size(dev); | 965 | dev->cfg_size = pci_cfg_space_size(dev); |
953 | dev->error_state = pci_channel_io_normal; | 966 | dev->error_state = pci_channel_io_normal; |
967 | set_pcie_port_type(dev); | ||
954 | 968 | ||
955 | /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) | 969 | /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) |
956 | set this higher, assuming the system even supports it. */ | 970 | set this higher, assuming the system even supports it. */ |
diff --git a/drivers/pci/search.c b/drivers/pci/search.c index c6e79d01ce3..b001b5922e3 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c | |||
@@ -14,6 +14,40 @@ | |||
14 | #include "pci.h" | 14 | #include "pci.h" |
15 | 15 | ||
16 | DECLARE_RWSEM(pci_bus_sem); | 16 | DECLARE_RWSEM(pci_bus_sem); |
17 | /* | ||
18 | * find the upstream PCIE-to-PCI bridge of a PCI device | ||
19 | * if the device is PCIE, return NULL | ||
20 | * if the device isn't connected to a PCIE bridge (that is its parent is a | ||
21 | * legacy PCI bridge and the bridge is directly connected to bus 0), return its | ||
22 | * parent | ||
23 | */ | ||
24 | struct pci_dev * | ||
25 | pci_find_upstream_pcie_bridge(struct pci_dev *pdev) | ||
26 | { | ||
27 | struct pci_dev *tmp = NULL; | ||
28 | |||
29 | if (pdev->is_pcie) | ||
30 | return NULL; | ||
31 | while (1) { | ||
32 | if (!pdev->bus->self) | ||
33 | break; | ||
34 | pdev = pdev->bus->self; | ||
35 | /* a p2p bridge */ | ||
36 | if (!pdev->is_pcie) { | ||
37 | tmp = pdev; | ||
38 | continue; | ||
39 | } | ||
40 | /* PCI device should connect to a PCIE bridge */ | ||
41 | if (pdev->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE) { | ||
42 | /* Busted hardware? */ | ||
43 | WARN_ON_ONCE(1); | ||
44 | return NULL; | ||
45 | } | ||
46 | return pdev; | ||
47 | } | ||
48 | |||
49 | return tmp; | ||
50 | } | ||
17 | 51 | ||
18 | static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr) | 52 | static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr) |
19 | { | 53 | { |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0a3ee5a322b..5574ba3ab1f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -103,7 +103,7 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep, | |||
103 | unsigned int command, unsigned long arg); | 103 | unsigned int command, unsigned long arg); |
104 | 104 | ||
105 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 105 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
106 | extern struct export_operations cifs_export_ops; | 106 | extern const struct export_operations cifs_export_ops; |
107 | #endif /* EXPERIMENTAL */ | 107 | #endif /* EXPERIMENTAL */ |
108 | 108 | ||
109 | #define CIFS_VERSION "1.51" | 109 | #define CIFS_VERSION "1.51" |
diff --git a/fs/cifs/export.c b/fs/cifs/export.c index d614b91caec..75949d6a5f1 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c | |||
@@ -53,7 +53,7 @@ static struct dentry *cifs_get_parent(struct dentry *dentry) | |||
53 | return ERR_PTR(-EACCES); | 53 | return ERR_PTR(-EACCES); |
54 | } | 54 | } |
55 | 55 | ||
56 | struct export_operations cifs_export_ops = { | 56 | const struct export_operations cifs_export_ops = { |
57 | .get_parent = cifs_get_parent, | 57 | .get_parent = cifs_get_parent, |
58 | /* Following five export operations are unneeded so far and can default: | 58 | /* Following five export operations are unneeded so far and can default: |
59 | .get_dentry = | 59 | .get_dentry = |
diff --git a/fs/dcache.c b/fs/dcache.c index 2bb3f7ac683..d9ca1e5ceb9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1479,6 +1479,8 @@ static void switch_names(struct dentry *dentry, struct dentry *target) | |||
1479 | * dentry:internal, target:external. Steal target's | 1479 | * dentry:internal, target:external. Steal target's |
1480 | * storage and make target internal. | 1480 | * storage and make target internal. |
1481 | */ | 1481 | */ |
1482 | memcpy(target->d_iname, dentry->d_name.name, | ||
1483 | dentry->d_name.len + 1); | ||
1482 | dentry->d_name.name = target->d_name.name; | 1484 | dentry->d_name.name = target->d_name.name; |
1483 | target->d_name.name = target->d_iname; | 1485 | target->d_name.name = target->d_iname; |
1484 | } | 1486 | } |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 5276b19423c..f7f407075be 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
@@ -10,6 +10,8 @@ | |||
10 | #include <linux/string.h> | 10 | #include <linux/string.h> |
11 | #include <linux/efs_fs.h> | 11 | #include <linux/efs_fs.h> |
12 | #include <linux/smp_lock.h> | 12 | #include <linux/smp_lock.h> |
13 | #include <linux/exportfs.h> | ||
14 | |||
13 | 15 | ||
14 | static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { | 16 | static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { |
15 | struct buffer_head *bh; | 17 | struct buffer_head *bh; |
@@ -75,13 +77,10 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
75 | return NULL; | 77 | return NULL; |
76 | } | 78 | } |
77 | 79 | ||
78 | struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) | 80 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, |
81 | u32 generation) | ||
79 | { | 82 | { |
80 | __u32 *objp = vobjp; | ||
81 | unsigned long ino = objp[0]; | ||
82 | __u32 generation = objp[1]; | ||
83 | struct inode *inode; | 83 | struct inode *inode; |
84 | struct dentry *result; | ||
85 | 84 | ||
86 | if (ino == 0) | 85 | if (ino == 0) |
87 | return ERR_PTR(-ESTALE); | 86 | return ERR_PTR(-ESTALE); |
@@ -91,20 +90,25 @@ struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) | |||
91 | 90 | ||
92 | if (is_bad_inode(inode) || | 91 | if (is_bad_inode(inode) || |
93 | (generation && inode->i_generation != generation)) { | 92 | (generation && inode->i_generation != generation)) { |
94 | result = ERR_PTR(-ESTALE); | 93 | iput(inode); |
95 | goto out_iput; | 94 | return ERR_PTR(-ESTALE); |
96 | } | 95 | } |
97 | 96 | ||
98 | result = d_alloc_anon(inode); | 97 | return inode; |
99 | if (!result) { | 98 | } |
100 | result = ERR_PTR(-ENOMEM); | ||
101 | goto out_iput; | ||
102 | } | ||
103 | return result; | ||
104 | 99 | ||
105 | out_iput: | 100 | struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
106 | iput(inode); | 101 | int fh_len, int fh_type) |
107 | return result; | 102 | { |
103 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
104 | efs_nfs_get_inode); | ||
105 | } | ||
106 | |||
107 | struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
108 | int fh_len, int fh_type) | ||
109 | { | ||
110 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
111 | efs_nfs_get_inode); | ||
108 | } | 112 | } |
109 | 113 | ||
110 | struct dentry *efs_get_parent(struct dentry *child) | 114 | struct dentry *efs_get_parent(struct dentry *child) |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 25d0326c5f1..c79bc627f10 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -113,8 +113,9 @@ static const struct super_operations efs_superblock_operations = { | |||
113 | .remount_fs = efs_remount, | 113 | .remount_fs = efs_remount, |
114 | }; | 114 | }; |
115 | 115 | ||
116 | static struct export_operations efs_export_ops = { | 116 | static const struct export_operations efs_export_ops = { |
117 | .get_dentry = efs_get_dentry, | 117 | .fh_to_dentry = efs_fh_to_dentry, |
118 | .fh_to_parent = efs_fh_to_parent, | ||
118 | .get_parent = efs_get_parent, | 119 | .get_parent = efs_get_parent, |
119 | }; | 120 | }; |
120 | 121 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 8adb32a9387..109ab5e44ec 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -1,4 +1,13 @@ | |||
1 | 1 | /* | |
2 | * Copyright (C) Neil Brown 2002 | ||
3 | * Copyright (C) Christoph Hellwig 2007 | ||
4 | * | ||
5 | * This file contains the code mapping from inodes to NFS file handles, | ||
6 | * and for mapping back from file handles to dentries. | ||
7 | * | ||
8 | * For details on why we do all the strange and hairy things in here | ||
9 | * take a look at Documentation/filesystems/Exporting. | ||
10 | */ | ||
2 | #include <linux/exportfs.h> | 11 | #include <linux/exportfs.h> |
3 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
4 | #include <linux/file.h> | 13 | #include <linux/file.h> |
@@ -9,32 +18,19 @@ | |||
9 | #define dprintk(fmt, args...) do{}while(0) | 18 | #define dprintk(fmt, args...) do{}while(0) |
10 | 19 | ||
11 | 20 | ||
12 | static int get_name(struct dentry *dentry, char *name, | 21 | static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, |
13 | struct dentry *child); | 22 | struct dentry *child); |
14 | 23 | ||
15 | 24 | ||
16 | static struct dentry *exportfs_get_dentry(struct super_block *sb, void *obj) | 25 | static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, |
26 | char *name, struct dentry *child) | ||
17 | { | 27 | { |
18 | struct dentry *result = ERR_PTR(-ESTALE); | 28 | const struct export_operations *nop = dir->d_sb->s_export_op; |
19 | |||
20 | if (sb->s_export_op->get_dentry) { | ||
21 | result = sb->s_export_op->get_dentry(sb, obj); | ||
22 | if (!result) | ||
23 | result = ERR_PTR(-ESTALE); | ||
24 | } | ||
25 | |||
26 | return result; | ||
27 | } | ||
28 | |||
29 | static int exportfs_get_name(struct dentry *dir, char *name, | ||
30 | struct dentry *child) | ||
31 | { | ||
32 | struct export_operations *nop = dir->d_sb->s_export_op; | ||
33 | 29 | ||
34 | if (nop->get_name) | 30 | if (nop->get_name) |
35 | return nop->get_name(dir, name, child); | 31 | return nop->get_name(dir, name, child); |
36 | else | 32 | else |
37 | return get_name(dir, name, child); | 33 | return get_name(mnt, dir, name, child); |
38 | } | 34 | } |
39 | 35 | ||
40 | /* | 36 | /* |
@@ -98,7 +94,7 @@ find_disconnected_root(struct dentry *dentry) | |||
98 | * It may already be, as the flag isn't always updated when connection happens. | 94 | * It may already be, as the flag isn't always updated when connection happens. |
99 | */ | 95 | */ |
100 | static int | 96 | static int |
101 | reconnect_path(struct super_block *sb, struct dentry *target_dir) | 97 | reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) |
102 | { | 98 | { |
103 | char nbuf[NAME_MAX+1]; | 99 | char nbuf[NAME_MAX+1]; |
104 | int noprogress = 0; | 100 | int noprogress = 0; |
@@ -121,7 +117,7 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
121 | pd->d_flags &= ~DCACHE_DISCONNECTED; | 117 | pd->d_flags &= ~DCACHE_DISCONNECTED; |
122 | spin_unlock(&pd->d_lock); | 118 | spin_unlock(&pd->d_lock); |
123 | noprogress = 0; | 119 | noprogress = 0; |
124 | } else if (pd == sb->s_root) { | 120 | } else if (pd == mnt->mnt_sb->s_root) { |
125 | printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); | 121 | printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); |
126 | spin_lock(&pd->d_lock); | 122 | spin_lock(&pd->d_lock); |
127 | pd->d_flags &= ~DCACHE_DISCONNECTED; | 123 | pd->d_flags &= ~DCACHE_DISCONNECTED; |
@@ -147,8 +143,8 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
147 | struct dentry *npd; | 143 | struct dentry *npd; |
148 | 144 | ||
149 | mutex_lock(&pd->d_inode->i_mutex); | 145 | mutex_lock(&pd->d_inode->i_mutex); |
150 | if (sb->s_export_op->get_parent) | 146 | if (mnt->mnt_sb->s_export_op->get_parent) |
151 | ppd = sb->s_export_op->get_parent(pd); | 147 | ppd = mnt->mnt_sb->s_export_op->get_parent(pd); |
152 | mutex_unlock(&pd->d_inode->i_mutex); | 148 | mutex_unlock(&pd->d_inode->i_mutex); |
153 | 149 | ||
154 | if (IS_ERR(ppd)) { | 150 | if (IS_ERR(ppd)) { |
@@ -161,7 +157,7 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
161 | 157 | ||
162 | dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, | 158 | dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, |
163 | pd->d_inode->i_ino, ppd->d_inode->i_ino); | 159 | pd->d_inode->i_ino, ppd->d_inode->i_ino); |
164 | err = exportfs_get_name(ppd, nbuf, pd); | 160 | err = exportfs_get_name(mnt, ppd, nbuf, pd); |
165 | if (err) { | 161 | if (err) { |
166 | dput(ppd); | 162 | dput(ppd); |
167 | dput(pd); | 163 | dput(pd); |
@@ -214,125 +210,6 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) | |||
214 | return 0; | 210 | return 0; |
215 | } | 211 | } |
216 | 212 | ||
217 | /** | ||
218 | * find_exported_dentry - helper routine to implement export_operations->decode_fh | ||
219 | * @sb: The &super_block identifying the filesystem | ||
220 | * @obj: An opaque identifier of the object to be found - passed to | ||
221 | * get_inode | ||
222 | * @parent: An optional opqaue identifier of the parent of the object. | ||
223 | * @acceptable: A function used to test possible &dentries to see if they are | ||
224 | * acceptable | ||
225 | * @context: A parameter to @acceptable so that it knows on what basis to | ||
226 | * judge. | ||
227 | * | ||
228 | * find_exported_dentry is the central helper routine to enable file systems | ||
229 | * to provide the decode_fh() export_operation. It's main task is to take | ||
230 | * an &inode, find or create an appropriate &dentry structure, and possibly | ||
231 | * splice this into the dcache in the correct place. | ||
232 | * | ||
233 | * The decode_fh() operation provided by the filesystem should call | ||
234 | * find_exported_dentry() with the same parameters that it received except | ||
235 | * that instead of the file handle fragment, pointers to opaque identifiers | ||
236 | * for the object and optionally its parent are passed. The default decode_fh | ||
237 | * routine passes one pointer to the start of the filehandle fragment, and | ||
238 | * one 8 bytes into the fragment. It is expected that most filesystems will | ||
239 | * take this approach, though the offset to the parent identifier may well be | ||
240 | * different. | ||
241 | * | ||
242 | * find_exported_dentry() will call get_dentry to get an dentry pointer from | ||
243 | * the file system. If any &dentry in the d_alias list is acceptable, it will | ||
244 | * be returned. Otherwise find_exported_dentry() will attempt to splice a new | ||
245 | * &dentry into the dcache using get_name() and get_parent() to find the | ||
246 | * appropriate place. | ||
247 | */ | ||
248 | |||
249 | struct dentry * | ||
250 | find_exported_dentry(struct super_block *sb, void *obj, void *parent, | ||
251 | int (*acceptable)(void *context, struct dentry *de), | ||
252 | void *context) | ||
253 | { | ||
254 | struct dentry *result, *alias; | ||
255 | int err = -ESTALE; | ||
256 | |||
257 | /* | ||
258 | * Attempt to find the inode. | ||
259 | */ | ||
260 | result = exportfs_get_dentry(sb, obj); | ||
261 | if (IS_ERR(result)) | ||
262 | return result; | ||
263 | |||
264 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
265 | if (!(result->d_flags & DCACHE_DISCONNECTED)) { | ||
266 | if (acceptable(context, result)) | ||
267 | return result; | ||
268 | err = -EACCES; | ||
269 | goto err_result; | ||
270 | } | ||
271 | |||
272 | err = reconnect_path(sb, result); | ||
273 | if (err) | ||
274 | goto err_result; | ||
275 | } else { | ||
276 | struct dentry *target_dir, *nresult; | ||
277 | char nbuf[NAME_MAX+1]; | ||
278 | |||
279 | alias = find_acceptable_alias(result, acceptable, context); | ||
280 | if (alias) | ||
281 | return alias; | ||
282 | |||
283 | if (parent == NULL) | ||
284 | goto err_result; | ||
285 | |||
286 | target_dir = exportfs_get_dentry(sb,parent); | ||
287 | if (IS_ERR(target_dir)) { | ||
288 | err = PTR_ERR(target_dir); | ||
289 | goto err_result; | ||
290 | } | ||
291 | |||
292 | err = reconnect_path(sb, target_dir); | ||
293 | if (err) { | ||
294 | dput(target_dir); | ||
295 | goto err_result; | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * As we weren't after a directory, have one more step to go. | ||
300 | */ | ||
301 | err = exportfs_get_name(target_dir, nbuf, result); | ||
302 | if (!err) { | ||
303 | mutex_lock(&target_dir->d_inode->i_mutex); | ||
304 | nresult = lookup_one_len(nbuf, target_dir, | ||
305 | strlen(nbuf)); | ||
306 | mutex_unlock(&target_dir->d_inode->i_mutex); | ||
307 | if (!IS_ERR(nresult)) { | ||
308 | if (nresult->d_inode) { | ||
309 | dput(result); | ||
310 | result = nresult; | ||
311 | } else | ||
312 | dput(nresult); | ||
313 | } | ||
314 | } | ||
315 | dput(target_dir); | ||
316 | } | ||
317 | |||
318 | alias = find_acceptable_alias(result, acceptable, context); | ||
319 | if (alias) | ||
320 | return alias; | ||
321 | |||
322 | /* drat - I just cannot find anything acceptable */ | ||
323 | dput(result); | ||
324 | /* It might be justifiable to return ESTALE here, | ||
325 | * but the filehandle at-least looks reasonable good | ||
326 | * and it may just be a permission problem, so returning | ||
327 | * -EACCESS is safer | ||
328 | */ | ||
329 | return ERR_PTR(-EACCES); | ||
330 | |||
331 | err_result: | ||
332 | dput(result); | ||
333 | return ERR_PTR(err); | ||
334 | } | ||
335 | |||
336 | struct getdents_callback { | 213 | struct getdents_callback { |
337 | char *name; /* name that was found. It already points to a | 214 | char *name; /* name that was found. It already points to a |
338 | buffer NAME_MAX+1 is size */ | 215 | buffer NAME_MAX+1 is size */ |
@@ -370,8 +247,8 @@ static int filldir_one(void * __buf, const char * name, int len, | |||
370 | * calls readdir on the parent until it finds an entry with | 247 | * calls readdir on the parent until it finds an entry with |
371 | * the same inode number as the child, and returns that. | 248 | * the same inode number as the child, and returns that. |
372 | */ | 249 | */ |
373 | static int get_name(struct dentry *dentry, char *name, | 250 | static int get_name(struct vfsmount *mnt, struct dentry *dentry, |
374 | struct dentry *child) | 251 | char *name, struct dentry *child) |
375 | { | 252 | { |
376 | struct inode *dir = dentry->d_inode; | 253 | struct inode *dir = dentry->d_inode; |
377 | int error; | 254 | int error; |
@@ -387,7 +264,7 @@ static int get_name(struct dentry *dentry, char *name, | |||
387 | /* | 264 | /* |
388 | * Open the directory ... | 265 | * Open the directory ... |
389 | */ | 266 | */ |
390 | file = dentry_open(dget(dentry), NULL, O_RDONLY); | 267 | file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY); |
391 | error = PTR_ERR(file); | 268 | error = PTR_ERR(file); |
392 | if (IS_ERR(file)) | 269 | if (IS_ERR(file)) |
393 | goto out; | 270 | goto out; |
@@ -434,100 +311,177 @@ out: | |||
434 | * can be used to check that it is still valid. It places them in the | 311 | * can be used to check that it is still valid. It places them in the |
435 | * filehandle fragment where export_decode_fh expects to find them. | 312 | * filehandle fragment where export_decode_fh expects to find them. |
436 | */ | 313 | */ |
437 | static int export_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, | 314 | static int export_encode_fh(struct dentry *dentry, struct fid *fid, |
438 | int connectable) | 315 | int *max_len, int connectable) |
439 | { | 316 | { |
440 | struct inode * inode = dentry->d_inode; | 317 | struct inode * inode = dentry->d_inode; |
441 | int len = *max_len; | 318 | int len = *max_len; |
442 | int type = 1; | 319 | int type = FILEID_INO32_GEN; |
443 | 320 | ||
444 | if (len < 2 || (connectable && len < 4)) | 321 | if (len < 2 || (connectable && len < 4)) |
445 | return 255; | 322 | return 255; |
446 | 323 | ||
447 | len = 2; | 324 | len = 2; |
448 | fh[0] = inode->i_ino; | 325 | fid->i32.ino = inode->i_ino; |
449 | fh[1] = inode->i_generation; | 326 | fid->i32.gen = inode->i_generation; |
450 | if (connectable && !S_ISDIR(inode->i_mode)) { | 327 | if (connectable && !S_ISDIR(inode->i_mode)) { |
451 | struct inode *parent; | 328 | struct inode *parent; |
452 | 329 | ||
453 | spin_lock(&dentry->d_lock); | 330 | spin_lock(&dentry->d_lock); |
454 | parent = dentry->d_parent->d_inode; | 331 | parent = dentry->d_parent->d_inode; |
455 | fh[2] = parent->i_ino; | 332 | fid->i32.parent_ino = parent->i_ino; |
456 | fh[3] = parent->i_generation; | 333 | fid->i32.parent_gen = parent->i_generation; |
457 | spin_unlock(&dentry->d_lock); | 334 | spin_unlock(&dentry->d_lock); |
458 | len = 4; | 335 | len = 4; |
459 | type = 2; | 336 | type = FILEID_INO32_GEN_PARENT; |
460 | } | 337 | } |
461 | *max_len = len; | 338 | *max_len = len; |
462 | return type; | 339 | return type; |
463 | } | 340 | } |
464 | 341 | ||
465 | 342 | int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, | |
466 | /** | ||
467 | * export_decode_fh - default export_operations->decode_fh function | ||
468 | * @sb: The superblock | ||
469 | * @fh: pointer to the file handle fragment | ||
470 | * @fh_len: length of file handle fragment | ||
471 | * @acceptable: function for testing acceptability of dentrys | ||
472 | * @context: context for @acceptable | ||
473 | * | ||
474 | * This is the default decode_fh() function. | ||
475 | * a fileid_type of 1 indicates that the filehandlefragment | ||
476 | * just contains an object identifier understood by get_dentry. | ||
477 | * a fileid_type of 2 says that there is also a directory | ||
478 | * identifier 8 bytes in to the filehandlefragement. | ||
479 | */ | ||
480 | static struct dentry *export_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, | ||
481 | int fileid_type, | ||
482 | int (*acceptable)(void *context, struct dentry *de), | ||
483 | void *context) | ||
484 | { | ||
485 | __u32 parent[2]; | ||
486 | parent[0] = parent[1] = 0; | ||
487 | if (fh_len < 2 || fileid_type > 2) | ||
488 | return NULL; | ||
489 | if (fileid_type == 2) { | ||
490 | if (fh_len > 2) parent[0] = fh[2]; | ||
491 | if (fh_len > 3) parent[1] = fh[3]; | ||
492 | } | ||
493 | return find_exported_dentry(sb, fh, parent, | ||
494 | acceptable, context); | ||
495 | } | ||
496 | |||
497 | int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, | ||
498 | int connectable) | 343 | int connectable) |
499 | { | 344 | { |
500 | struct export_operations *nop = dentry->d_sb->s_export_op; | 345 | const struct export_operations *nop = dentry->d_sb->s_export_op; |
501 | int error; | 346 | int error; |
502 | 347 | ||
503 | if (nop->encode_fh) | 348 | if (nop->encode_fh) |
504 | error = nop->encode_fh(dentry, fh, max_len, connectable); | 349 | error = nop->encode_fh(dentry, fid->raw, max_len, connectable); |
505 | else | 350 | else |
506 | error = export_encode_fh(dentry, fh, max_len, connectable); | 351 | error = export_encode_fh(dentry, fid, max_len, connectable); |
507 | 352 | ||
508 | return error; | 353 | return error; |
509 | } | 354 | } |
510 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); | 355 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); |
511 | 356 | ||
512 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, int fh_len, | 357 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, |
513 | int fileid_type, int (*acceptable)(void *, struct dentry *), | 358 | int fh_len, int fileid_type, |
514 | void *context) | 359 | int (*acceptable)(void *, struct dentry *), void *context) |
515 | { | 360 | { |
516 | struct export_operations *nop = mnt->mnt_sb->s_export_op; | 361 | const struct export_operations *nop = mnt->mnt_sb->s_export_op; |
517 | struct dentry *result; | 362 | struct dentry *result, *alias; |
363 | int err; | ||
518 | 364 | ||
519 | if (nop->decode_fh) { | 365 | /* |
520 | result = nop->decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | 366 | * Try to get any dentry for the given file handle from the filesystem. |
521 | acceptable, context); | 367 | */ |
368 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); | ||
369 | if (!result) | ||
370 | result = ERR_PTR(-ESTALE); | ||
371 | if (IS_ERR(result)) | ||
372 | return result; | ||
373 | |||
374 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
375 | /* | ||
376 | * This request is for a directory. | ||
377 | * | ||
378 | * On the positive side there is only one dentry for each | ||
379 | * directory inode. On the negative side this implies that we | ||
380 | * to ensure our dentry is connected all the way up to the | ||
381 | * filesystem root. | ||
382 | */ | ||
383 | if (result->d_flags & DCACHE_DISCONNECTED) { | ||
384 | err = reconnect_path(mnt, result); | ||
385 | if (err) | ||
386 | goto err_result; | ||
387 | } | ||
388 | |||
389 | if (!acceptable(context, result)) { | ||
390 | err = -EACCES; | ||
391 | goto err_result; | ||
392 | } | ||
393 | |||
394 | return result; | ||
522 | } else { | 395 | } else { |
523 | result = export_decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | 396 | /* |
524 | acceptable, context); | 397 | * It's not a directory. Life is a little more complicated. |
398 | */ | ||
399 | struct dentry *target_dir, *nresult; | ||
400 | char nbuf[NAME_MAX+1]; | ||
401 | |||
402 | /* | ||
403 | * See if either the dentry we just got from the filesystem | ||
404 | * or any alias for it is acceptable. This is always true | ||
405 | * if this filesystem is exported without the subtreecheck | ||
406 | * option. If the filesystem is exported with the subtree | ||
407 | * check option there's a fair chance we need to look at | ||
408 | * the parent directory in the file handle and make sure | ||
409 | * it's connected to the filesystem root. | ||
410 | */ | ||
411 | alias = find_acceptable_alias(result, acceptable, context); | ||
412 | if (alias) | ||
413 | return alias; | ||
414 | |||
415 | /* | ||
416 | * Try to extract a dentry for the parent directory from the | ||
417 | * file handle. If this fails we'll have to give up. | ||
418 | */ | ||
419 | err = -ESTALE; | ||
420 | if (!nop->fh_to_parent) | ||
421 | goto err_result; | ||
422 | |||
423 | target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, | ||
424 | fh_len, fileid_type); | ||
425 | if (!target_dir) | ||
426 | goto err_result; | ||
427 | err = PTR_ERR(target_dir); | ||
428 | if (IS_ERR(target_dir)) | ||
429 | goto err_result; | ||
430 | |||
431 | /* | ||
432 | * And as usual we need to make sure the parent directory is | ||
433 | * connected to the filesystem root. The VFS really doesn't | ||
434 | * like disconnected directories.. | ||
435 | */ | ||
436 | err = reconnect_path(mnt, target_dir); | ||
437 | if (err) { | ||
438 | dput(target_dir); | ||
439 | goto err_result; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Now that we've got both a well-connected parent and a | ||
444 | * dentry for the inode we're after, make sure that our | ||
445 | * inode is actually connected to the parent. | ||
446 | */ | ||
447 | err = exportfs_get_name(mnt, target_dir, nbuf, result); | ||
448 | if (!err) { | ||
449 | mutex_lock(&target_dir->d_inode->i_mutex); | ||
450 | nresult = lookup_one_len(nbuf, target_dir, | ||
451 | strlen(nbuf)); | ||
452 | mutex_unlock(&target_dir->d_inode->i_mutex); | ||
453 | if (!IS_ERR(nresult)) { | ||
454 | if (nresult->d_inode) { | ||
455 | dput(result); | ||
456 | result = nresult; | ||
457 | } else | ||
458 | dput(nresult); | ||
459 | } | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * At this point we are done with the parent, but it's pinned | ||
464 | * by the child dentry anyway. | ||
465 | */ | ||
466 | dput(target_dir); | ||
467 | |||
468 | /* | ||
469 | * And finally make sure the dentry is actually acceptable | ||
470 | * to NFSD. | ||
471 | */ | ||
472 | alias = find_acceptable_alias(result, acceptable, context); | ||
473 | if (!alias) { | ||
474 | err = -EACCES; | ||
475 | goto err_result; | ||
476 | } | ||
477 | |||
478 | return alias; | ||
525 | } | 479 | } |
526 | 480 | ||
527 | return result; | 481 | err_result: |
482 | dput(result); | ||
483 | return ERR_PTR(err); | ||
528 | } | 484 | } |
529 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); | 485 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); |
530 | 486 | ||
531 | EXPORT_SYMBOL(find_exported_dentry); | ||
532 | |||
533 | MODULE_LICENSE("GPL"); | 487 | MODULE_LICENSE("GPL"); |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 05d9342bb64..d868e26c15e 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -28,6 +28,24 @@ | |||
28 | 28 | ||
29 | typedef struct ext2_dir_entry_2 ext2_dirent; | 29 | typedef struct ext2_dir_entry_2 ext2_dirent; |
30 | 30 | ||
31 | static inline unsigned ext2_rec_len_from_disk(__le16 dlen) | ||
32 | { | ||
33 | unsigned len = le16_to_cpu(dlen); | ||
34 | |||
35 | if (len == EXT2_MAX_REC_LEN) | ||
36 | return 1 << 16; | ||
37 | return len; | ||
38 | } | ||
39 | |||
40 | static inline __le16 ext2_rec_len_to_disk(unsigned len) | ||
41 | { | ||
42 | if (len == (1 << 16)) | ||
43 | return cpu_to_le16(EXT2_MAX_REC_LEN); | ||
44 | else if (len > (1 << 16)) | ||
45 | BUG(); | ||
46 | return cpu_to_le16(len); | ||
47 | } | ||
48 | |||
31 | /* | 49 | /* |
32 | * ext2 uses block-sized chunks. Arguably, sector-sized ones would be | 50 | * ext2 uses block-sized chunks. Arguably, sector-sized ones would be |
33 | * more robust, but we have what we have | 51 | * more robust, but we have what we have |
@@ -106,7 +124,7 @@ static void ext2_check_page(struct page *page) | |||
106 | } | 124 | } |
107 | for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { | 125 | for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { |
108 | p = (ext2_dirent *)(kaddr + offs); | 126 | p = (ext2_dirent *)(kaddr + offs); |
109 | rec_len = le16_to_cpu(p->rec_len); | 127 | rec_len = ext2_rec_len_from_disk(p->rec_len); |
110 | 128 | ||
111 | if (rec_len < EXT2_DIR_REC_LEN(1)) | 129 | if (rec_len < EXT2_DIR_REC_LEN(1)) |
112 | goto Eshort; | 130 | goto Eshort; |
@@ -204,7 +222,8 @@ static inline int ext2_match (int len, const char * const name, | |||
204 | */ | 222 | */ |
205 | static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) | 223 | static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) |
206 | { | 224 | { |
207 | return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); | 225 | return (ext2_dirent *)((char *)p + |
226 | ext2_rec_len_from_disk(p->rec_len)); | ||
208 | } | 227 | } |
209 | 228 | ||
210 | static inline unsigned | 229 | static inline unsigned |
@@ -316,7 +335,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
316 | return 0; | 335 | return 0; |
317 | } | 336 | } |
318 | } | 337 | } |
319 | filp->f_pos += le16_to_cpu(de->rec_len); | 338 | filp->f_pos += ext2_rec_len_from_disk(de->rec_len); |
320 | } | 339 | } |
321 | ext2_put_page(page); | 340 | ext2_put_page(page); |
322 | } | 341 | } |
@@ -425,7 +444,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, | |||
425 | { | 444 | { |
426 | loff_t pos = page_offset(page) + | 445 | loff_t pos = page_offset(page) + |
427 | (char *) de - (char *) page_address(page); | 446 | (char *) de - (char *) page_address(page); |
428 | unsigned len = le16_to_cpu(de->rec_len); | 447 | unsigned len = ext2_rec_len_from_disk(de->rec_len); |
429 | int err; | 448 | int err; |
430 | 449 | ||
431 | lock_page(page); | 450 | lock_page(page); |
@@ -482,7 +501,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
482 | /* We hit i_size */ | 501 | /* We hit i_size */ |
483 | name_len = 0; | 502 | name_len = 0; |
484 | rec_len = chunk_size; | 503 | rec_len = chunk_size; |
485 | de->rec_len = cpu_to_le16(chunk_size); | 504 | de->rec_len = ext2_rec_len_to_disk(chunk_size); |
486 | de->inode = 0; | 505 | de->inode = 0; |
487 | goto got_it; | 506 | goto got_it; |
488 | } | 507 | } |
@@ -496,7 +515,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
496 | if (ext2_match (namelen, name, de)) | 515 | if (ext2_match (namelen, name, de)) |
497 | goto out_unlock; | 516 | goto out_unlock; |
498 | name_len = EXT2_DIR_REC_LEN(de->name_len); | 517 | name_len = EXT2_DIR_REC_LEN(de->name_len); |
499 | rec_len = le16_to_cpu(de->rec_len); | 518 | rec_len = ext2_rec_len_from_disk(de->rec_len); |
500 | if (!de->inode && rec_len >= reclen) | 519 | if (!de->inode && rec_len >= reclen) |
501 | goto got_it; | 520 | goto got_it; |
502 | if (rec_len >= name_len + reclen) | 521 | if (rec_len >= name_len + reclen) |
@@ -518,8 +537,8 @@ got_it: | |||
518 | goto out_unlock; | 537 | goto out_unlock; |
519 | if (de->inode) { | 538 | if (de->inode) { |
520 | ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); | 539 | ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); |
521 | de1->rec_len = cpu_to_le16(rec_len - name_len); | 540 | de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len); |
522 | de->rec_len = cpu_to_le16(name_len); | 541 | de->rec_len = ext2_rec_len_to_disk(name_len); |
523 | de = de1; | 542 | de = de1; |
524 | } | 543 | } |
525 | de->name_len = namelen; | 544 | de->name_len = namelen; |
@@ -550,7 +569,8 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | |||
550 | struct inode *inode = mapping->host; | 569 | struct inode *inode = mapping->host; |
551 | char *kaddr = page_address(page); | 570 | char *kaddr = page_address(page); |
552 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); | 571 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); |
553 | unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); | 572 | unsigned to = ((char *)dir - kaddr) + |
573 | ext2_rec_len_from_disk(dir->rec_len); | ||
554 | loff_t pos; | 574 | loff_t pos; |
555 | ext2_dirent * pde = NULL; | 575 | ext2_dirent * pde = NULL; |
556 | ext2_dirent * de = (ext2_dirent *) (kaddr + from); | 576 | ext2_dirent * de = (ext2_dirent *) (kaddr + from); |
@@ -574,7 +594,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | |||
574 | &page, NULL); | 594 | &page, NULL); |
575 | BUG_ON(err); | 595 | BUG_ON(err); |
576 | if (pde) | 596 | if (pde) |
577 | pde->rec_len = cpu_to_le16(to - from); | 597 | pde->rec_len = ext2_rec_len_to_disk(to - from); |
578 | dir->inode = 0; | 598 | dir->inode = 0; |
579 | err = ext2_commit_chunk(page, pos, to - from); | 599 | err = ext2_commit_chunk(page, pos, to - from); |
580 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 600 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
@@ -610,14 +630,14 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) | |||
610 | memset(kaddr, 0, chunk_size); | 630 | memset(kaddr, 0, chunk_size); |
611 | de = (struct ext2_dir_entry_2 *)kaddr; | 631 | de = (struct ext2_dir_entry_2 *)kaddr; |
612 | de->name_len = 1; | 632 | de->name_len = 1; |
613 | de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); | 633 | de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1)); |
614 | memcpy (de->name, ".\0\0", 4); | 634 | memcpy (de->name, ".\0\0", 4); |
615 | de->inode = cpu_to_le32(inode->i_ino); | 635 | de->inode = cpu_to_le32(inode->i_ino); |
616 | ext2_set_de_type (de, inode); | 636 | ext2_set_de_type (de, inode); |
617 | 637 | ||
618 | de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); | 638 | de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); |
619 | de->name_len = 2; | 639 | de->name_len = 2; |
620 | de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); | 640 | de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1)); |
621 | de->inode = cpu_to_le32(parent->i_ino); | 641 | de->inode = cpu_to_le32(parent->i_ino); |
622 | memcpy (de->name, "..\0", 4); | 642 | memcpy (de->name, "..\0", 4); |
623 | ext2_set_de_type (de, inode); | 643 | ext2_set_de_type (de, inode); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 77bd5f9262f..154e25f13d7 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -311,13 +311,10 @@ static const struct super_operations ext2_sops = { | |||
311 | #endif | 311 | #endif |
312 | }; | 312 | }; |
313 | 313 | ||
314 | static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | 314 | static struct inode *ext2_nfs_get_inode(struct super_block *sb, |
315 | u64 ino, u32 generation) | ||
315 | { | 316 | { |
316 | __u32 *objp = vobjp; | ||
317 | unsigned long ino = objp[0]; | ||
318 | __u32 generation = objp[1]; | ||
319 | struct inode *inode; | 317 | struct inode *inode; |
320 | struct dentry *result; | ||
321 | 318 | ||
322 | if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) | 319 | if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) |
323 | return ERR_PTR(-ESTALE); | 320 | return ERR_PTR(-ESTALE); |
@@ -338,15 +335,21 @@ static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | |||
338 | iput(inode); | 335 | iput(inode); |
339 | return ERR_PTR(-ESTALE); | 336 | return ERR_PTR(-ESTALE); |
340 | } | 337 | } |
341 | /* now to find a dentry. | 338 | return inode; |
342 | * If possible, get a well-connected one | 339 | } |
343 | */ | 340 | |
344 | result = d_alloc_anon(inode); | 341 | static struct dentry *ext2_fh_to_dentry(struct super_block *sb, struct fid *fid, |
345 | if (!result) { | 342 | int fh_len, int fh_type) |
346 | iput(inode); | 343 | { |
347 | return ERR_PTR(-ENOMEM); | 344 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
348 | } | 345 | ext2_nfs_get_inode); |
349 | return result; | 346 | } |
347 | |||
348 | static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
349 | int fh_len, int fh_type) | ||
350 | { | ||
351 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
352 | ext2_nfs_get_inode); | ||
350 | } | 353 | } |
351 | 354 | ||
352 | /* Yes, most of these are left as NULL!! | 355 | /* Yes, most of these are left as NULL!! |
@@ -354,9 +357,10 @@ static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | |||
354 | * systems, but can be improved upon. | 357 | * systems, but can be improved upon. |
355 | * Currently only get_parent is required. | 358 | * Currently only get_parent is required. |
356 | */ | 359 | */ |
357 | static struct export_operations ext2_export_ops = { | 360 | static const struct export_operations ext2_export_ops = { |
361 | .fh_to_dentry = ext2_fh_to_dentry, | ||
362 | .fh_to_parent = ext2_fh_to_parent, | ||
358 | .get_parent = ext2_get_parent, | 363 | .get_parent = ext2_get_parent, |
359 | .get_dentry = ext2_get_dentry, | ||
360 | }; | 364 | }; |
361 | 365 | ||
362 | static unsigned long get_sb_block(void **data) | 366 | static unsigned long get_sb_block(void **data) |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 81868c0bc40..de55da9e28b 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -631,13 +631,10 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
631 | } | 631 | } |
632 | 632 | ||
633 | 633 | ||
634 | static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) | 634 | static struct inode *ext3_nfs_get_inode(struct super_block *sb, |
635 | u64 ino, u32 generation) | ||
635 | { | 636 | { |
636 | __u32 *objp = vobjp; | ||
637 | unsigned long ino = objp[0]; | ||
638 | __u32 generation = objp[1]; | ||
639 | struct inode *inode; | 637 | struct inode *inode; |
640 | struct dentry *result; | ||
641 | 638 | ||
642 | if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) | 639 | if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) |
643 | return ERR_PTR(-ESTALE); | 640 | return ERR_PTR(-ESTALE); |
@@ -660,15 +657,22 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) | |||
660 | iput(inode); | 657 | iput(inode); |
661 | return ERR_PTR(-ESTALE); | 658 | return ERR_PTR(-ESTALE); |
662 | } | 659 | } |
663 | /* now to find a dentry. | 660 | |
664 | * If possible, get a well-connected one | 661 | return inode; |
665 | */ | 662 | } |
666 | result = d_alloc_anon(inode); | 663 | |
667 | if (!result) { | 664 | static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, |
668 | iput(inode); | 665 | int fh_len, int fh_type) |
669 | return ERR_PTR(-ENOMEM); | 666 | { |
670 | } | 667 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
671 | return result; | 668 | ext3_nfs_get_inode); |
669 | } | ||
670 | |||
671 | static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
672 | int fh_len, int fh_type) | ||
673 | { | ||
674 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
675 | ext3_nfs_get_inode); | ||
672 | } | 676 | } |
673 | 677 | ||
674 | #ifdef CONFIG_QUOTA | 678 | #ifdef CONFIG_QUOTA |
@@ -737,9 +741,10 @@ static const struct super_operations ext3_sops = { | |||
737 | #endif | 741 | #endif |
738 | }; | 742 | }; |
739 | 743 | ||
740 | static struct export_operations ext3_export_ops = { | 744 | static const struct export_operations ext3_export_ops = { |
745 | .fh_to_dentry = ext3_fh_to_dentry, | ||
746 | .fh_to_parent = ext3_fh_to_parent, | ||
741 | .get_parent = ext3_get_parent, | 747 | .get_parent = ext3_get_parent, |
742 | .get_dentry = ext3_get_dentry, | ||
743 | }; | 748 | }; |
744 | 749 | ||
745 | enum { | 750 | enum { |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b11e9e2bcd0..8031dc0e24e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -686,13 +686,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
686 | } | 686 | } |
687 | 687 | ||
688 | 688 | ||
689 | static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) | 689 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
690 | u64 ino, u32 generation) | ||
690 | { | 691 | { |
691 | __u32 *objp = vobjp; | ||
692 | unsigned long ino = objp[0]; | ||
693 | __u32 generation = objp[1]; | ||
694 | struct inode *inode; | 692 | struct inode *inode; |
695 | struct dentry *result; | ||
696 | 693 | ||
697 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) | 694 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) |
698 | return ERR_PTR(-ESTALE); | 695 | return ERR_PTR(-ESTALE); |
@@ -715,15 +712,22 @@ static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) | |||
715 | iput(inode); | 712 | iput(inode); |
716 | return ERR_PTR(-ESTALE); | 713 | return ERR_PTR(-ESTALE); |
717 | } | 714 | } |
718 | /* now to find a dentry. | 715 | |
719 | * If possible, get a well-connected one | 716 | return inode; |
720 | */ | 717 | } |
721 | result = d_alloc_anon(inode); | 718 | |
722 | if (!result) { | 719 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, |
723 | iput(inode); | 720 | int fh_len, int fh_type) |
724 | return ERR_PTR(-ENOMEM); | 721 | { |
725 | } | 722 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
726 | return result; | 723 | ext4_nfs_get_inode); |
724 | } | ||
725 | |||
726 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
727 | int fh_len, int fh_type) | ||
728 | { | ||
729 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
730 | ext4_nfs_get_inode); | ||
727 | } | 731 | } |
728 | 732 | ||
729 | #ifdef CONFIG_QUOTA | 733 | #ifdef CONFIG_QUOTA |
@@ -792,9 +796,10 @@ static const struct super_operations ext4_sops = { | |||
792 | #endif | 796 | #endif |
793 | }; | 797 | }; |
794 | 798 | ||
795 | static struct export_operations ext4_export_ops = { | 799 | static const struct export_operations ext4_export_ops = { |
800 | .fh_to_dentry = ext4_fh_to_dentry, | ||
801 | .fh_to_parent = ext4_fh_to_parent, | ||
796 | .get_parent = ext4_get_parent, | 802 | .get_parent = ext4_get_parent, |
797 | .get_dentry = ext4_get_dentry, | ||
798 | }; | 803 | }; |
799 | 804 | ||
800 | enum { | 805 | enum { |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c0c5e9c55b5..920a576e1c2 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -653,24 +653,15 @@ static const struct super_operations fat_sops = { | |||
653 | * of i_logstart is used to store the directory entry offset. | 653 | * of i_logstart is used to store the directory entry offset. |
654 | */ | 654 | */ |
655 | 655 | ||
656 | static struct dentry * | 656 | static struct dentry *fat_fh_to_dentry(struct super_block *sb, |
657 | fat_decode_fh(struct super_block *sb, __u32 *fh, int len, int fhtype, | 657 | struct fid *fid, int fh_len, int fh_type) |
658 | int (*acceptable)(void *context, struct dentry *de), | ||
659 | void *context) | ||
660 | { | ||
661 | if (fhtype != 3) | ||
662 | return ERR_PTR(-ESTALE); | ||
663 | if (len < 5) | ||
664 | return ERR_PTR(-ESTALE); | ||
665 | |||
666 | return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, context); | ||
667 | } | ||
668 | |||
669 | static struct dentry *fat_get_dentry(struct super_block *sb, void *inump) | ||
670 | { | 658 | { |
671 | struct inode *inode = NULL; | 659 | struct inode *inode = NULL; |
672 | struct dentry *result; | 660 | struct dentry *result; |
673 | __u32 *fh = inump; | 661 | u32 *fh = fid->raw; |
662 | |||
663 | if (fh_len < 5 || fh_type != 3) | ||
664 | return NULL; | ||
674 | 665 | ||
675 | inode = iget(sb, fh[0]); | 666 | inode = iget(sb, fh[0]); |
676 | if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { | 667 | if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { |
@@ -783,10 +774,9 @@ out: | |||
783 | return parent; | 774 | return parent; |
784 | } | 775 | } |
785 | 776 | ||
786 | static struct export_operations fat_export_ops = { | 777 | static const struct export_operations fat_export_ops = { |
787 | .decode_fh = fat_decode_fh, | ||
788 | .encode_fh = fat_encode_fh, | 778 | .encode_fh = fat_encode_fh, |
789 | .get_dentry = fat_get_dentry, | 779 | .fh_to_dentry = fat_fh_to_dentry, |
790 | .get_parent = fat_get_parent, | 780 | .get_parent = fat_get_parent, |
791 | }; | 781 | }; |
792 | 782 | ||
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index e2d1347796a..b9da62348a8 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -31,40 +31,6 @@ | |||
31 | #define GFS2_LARGE_FH_SIZE 8 | 31 | #define GFS2_LARGE_FH_SIZE 8 |
32 | #define GFS2_OLD_FH_SIZE 10 | 32 | #define GFS2_OLD_FH_SIZE 10 |
33 | 33 | ||
34 | static struct dentry *gfs2_decode_fh(struct super_block *sb, | ||
35 | __u32 *p, | ||
36 | int fh_len, | ||
37 | int fh_type, | ||
38 | int (*acceptable)(void *context, | ||
39 | struct dentry *dentry), | ||
40 | void *context) | ||
41 | { | ||
42 | __be32 *fh = (__force __be32 *)p; | ||
43 | struct gfs2_inum_host inum, parent; | ||
44 | |||
45 | memset(&parent, 0, sizeof(struct gfs2_inum)); | ||
46 | |||
47 | switch (fh_len) { | ||
48 | case GFS2_LARGE_FH_SIZE: | ||
49 | case GFS2_OLD_FH_SIZE: | ||
50 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; | ||
51 | parent.no_formal_ino |= be32_to_cpu(fh[5]); | ||
52 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; | ||
53 | parent.no_addr |= be32_to_cpu(fh[7]); | ||
54 | case GFS2_SMALL_FH_SIZE: | ||
55 | inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; | ||
56 | inum.no_formal_ino |= be32_to_cpu(fh[1]); | ||
57 | inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; | ||
58 | inum.no_addr |= be32_to_cpu(fh[3]); | ||
59 | break; | ||
60 | default: | ||
61 | return NULL; | ||
62 | } | ||
63 | |||
64 | return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent, | ||
65 | acceptable, context); | ||
66 | } | ||
67 | |||
68 | static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, | 34 | static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, |
69 | int connectable) | 35 | int connectable) |
70 | { | 36 | { |
@@ -189,10 +155,10 @@ static struct dentry *gfs2_get_parent(struct dentry *child) | |||
189 | return dentry; | 155 | return dentry; |
190 | } | 156 | } |
191 | 157 | ||
192 | static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) | 158 | static struct dentry *gfs2_get_dentry(struct super_block *sb, |
159 | struct gfs2_inum_host *inum) | ||
193 | { | 160 | { |
194 | struct gfs2_sbd *sdp = sb->s_fs_info; | 161 | struct gfs2_sbd *sdp = sb->s_fs_info; |
195 | struct gfs2_inum_host *inum = inum_obj; | ||
196 | struct gfs2_holder i_gh, ri_gh, rgd_gh; | 162 | struct gfs2_holder i_gh, ri_gh, rgd_gh; |
197 | struct gfs2_rgrpd *rgd; | 163 | struct gfs2_rgrpd *rgd; |
198 | struct inode *inode; | 164 | struct inode *inode; |
@@ -289,11 +255,50 @@ fail: | |||
289 | return ERR_PTR(error); | 255 | return ERR_PTR(error); |
290 | } | 256 | } |
291 | 257 | ||
292 | struct export_operations gfs2_export_ops = { | 258 | static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, |
293 | .decode_fh = gfs2_decode_fh, | 259 | int fh_len, int fh_type) |
260 | { | ||
261 | struct gfs2_inum_host this; | ||
262 | __be32 *fh = (__force __be32 *)fid->raw; | ||
263 | |||
264 | switch (fh_type) { | ||
265 | case GFS2_SMALL_FH_SIZE: | ||
266 | case GFS2_LARGE_FH_SIZE: | ||
267 | case GFS2_OLD_FH_SIZE: | ||
268 | this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; | ||
269 | this.no_formal_ino |= be32_to_cpu(fh[1]); | ||
270 | this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; | ||
271 | this.no_addr |= be32_to_cpu(fh[3]); | ||
272 | return gfs2_get_dentry(sb, &this); | ||
273 | default: | ||
274 | return NULL; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
279 | int fh_len, int fh_type) | ||
280 | { | ||
281 | struct gfs2_inum_host parent; | ||
282 | __be32 *fh = (__force __be32 *)fid->raw; | ||
283 | |||
284 | switch (fh_type) { | ||
285 | case GFS2_LARGE_FH_SIZE: | ||
286 | case GFS2_OLD_FH_SIZE: | ||
287 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; | ||
288 | parent.no_formal_ino |= be32_to_cpu(fh[5]); | ||
289 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; | ||
290 | parent.no_addr |= be32_to_cpu(fh[7]); | ||
291 | return gfs2_get_dentry(sb, &parent); | ||
292 | default: | ||
293 | return NULL; | ||
294 | } | ||
295 | } | ||
296 | |||
297 | const struct export_operations gfs2_export_ops = { | ||
294 | .encode_fh = gfs2_encode_fh, | 298 | .encode_fh = gfs2_encode_fh, |
299 | .fh_to_dentry = gfs2_fh_to_dentry, | ||
300 | .fh_to_parent = gfs2_fh_to_parent, | ||
295 | .get_name = gfs2_get_name, | 301 | .get_name = gfs2_get_name, |
296 | .get_parent = gfs2_get_parent, | 302 | .get_parent = gfs2_get_parent, |
297 | .get_dentry = gfs2_get_dentry, | ||
298 | }; | 303 | }; |
299 | 304 | ||
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h index 407029b3b2b..da849051183 100644 --- a/fs/gfs2/ops_fstype.h +++ b/fs/gfs2/ops_fstype.h | |||
@@ -14,6 +14,6 @@ | |||
14 | 14 | ||
15 | extern struct file_system_type gfs2_fs_type; | 15 | extern struct file_system_type gfs2_fs_type; |
16 | extern struct file_system_type gfs2meta_fs_type; | 16 | extern struct file_system_type gfs2meta_fs_type; |
17 | extern struct export_operations gfs2_export_ops; | 17 | extern const struct export_operations gfs2_export_ops; |
18 | 18 | ||
19 | #endif /* __OPS_FSTYPE_DOT_H__ */ | 19 | #endif /* __OPS_FSTYPE_DOT_H__ */ |
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 4af856a7fda..29f9753ae5e 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
@@ -42,16 +42,6 @@ isofs_export_iget(struct super_block *sb, | |||
42 | return result; | 42 | return result; |
43 | } | 43 | } |
44 | 44 | ||
45 | static struct dentry * | ||
46 | isofs_export_get_dentry(struct super_block *sb, void *vobjp) | ||
47 | { | ||
48 | __u32 *objp = vobjp; | ||
49 | unsigned long block = objp[0]; | ||
50 | unsigned long offset = objp[1]; | ||
51 | __u32 generation = objp[2]; | ||
52 | return isofs_export_iget(sb, block, offset, generation); | ||
53 | } | ||
54 | |||
55 | /* This function is surprisingly simple. The trick is understanding | 45 | /* This function is surprisingly simple. The trick is understanding |
56 | * that "child" is always a directory. So, to find its parent, you | 46 | * that "child" is always a directory. So, to find its parent, you |
57 | * simply need to find its ".." entry, normalize its block and offset, | 47 | * simply need to find its ".." entry, normalize its block and offset, |
@@ -182,43 +172,44 @@ isofs_export_encode_fh(struct dentry *dentry, | |||
182 | return type; | 172 | return type; |
183 | } | 173 | } |
184 | 174 | ||
175 | struct isofs_fid { | ||
176 | u32 block; | ||
177 | u16 offset; | ||
178 | u16 parent_offset; | ||
179 | u32 generation; | ||
180 | u32 parent_block; | ||
181 | u32 parent_generation; | ||
182 | }; | ||
185 | 183 | ||
186 | static struct dentry * | 184 | static struct dentry *isofs_fh_to_dentry(struct super_block *sb, |
187 | isofs_export_decode_fh(struct super_block *sb, | 185 | struct fid *fid, int fh_len, int fh_type) |
188 | __u32 *fh32, | ||
189 | int fh_len, | ||
190 | int fileid_type, | ||
191 | int (*acceptable)(void *context, struct dentry *de), | ||
192 | void *context) | ||
193 | { | 186 | { |
194 | __u16 *fh16 = (__u16*)fh32; | 187 | struct isofs_fid *ifid = (struct isofs_fid *)fid; |
195 | __u32 child[3]; /* The child is what triggered all this. */ | ||
196 | __u32 parent[3]; /* The parent is just along for the ride. */ | ||
197 | 188 | ||
198 | if (fh_len < 3 || fileid_type > 2) | 189 | if (fh_len < 3 || fh_type > 2) |
199 | return NULL; | 190 | return NULL; |
200 | 191 | ||
201 | child[0] = fh32[0]; | 192 | return isofs_export_iget(sb, ifid->block, ifid->offset, |
202 | child[1] = fh16[2]; /* fh16 [sic] */ | 193 | ifid->generation); |
203 | child[2] = fh32[2]; | ||
204 | |||
205 | parent[0] = 0; | ||
206 | parent[1] = 0; | ||
207 | parent[2] = 0; | ||
208 | if (fileid_type == 2) { | ||
209 | if (fh_len > 2) parent[0] = fh32[3]; | ||
210 | parent[1] = fh16[3]; /* fh16 [sic] */ | ||
211 | if (fh_len > 4) parent[2] = fh32[4]; | ||
212 | } | ||
213 | |||
214 | return sb->s_export_op->find_exported_dentry(sb, child, parent, | ||
215 | acceptable, context); | ||
216 | } | 194 | } |
217 | 195 | ||
196 | static struct dentry *isofs_fh_to_parent(struct super_block *sb, | ||
197 | struct fid *fid, int fh_len, int fh_type) | ||
198 | { | ||
199 | struct isofs_fid *ifid = (struct isofs_fid *)fid; | ||
200 | |||
201 | if (fh_type != 2) | ||
202 | return NULL; | ||
203 | |||
204 | return isofs_export_iget(sb, | ||
205 | fh_len > 2 ? ifid->parent_block : 0, | ||
206 | ifid->parent_offset, | ||
207 | fh_len > 4 ? ifid->parent_generation : 0); | ||
208 | } | ||
218 | 209 | ||
219 | struct export_operations isofs_export_ops = { | 210 | const struct export_operations isofs_export_ops = { |
220 | .decode_fh = isofs_export_decode_fh, | ||
221 | .encode_fh = isofs_export_encode_fh, | 211 | .encode_fh = isofs_export_encode_fh, |
222 | .get_dentry = isofs_export_get_dentry, | 212 | .fh_to_dentry = isofs_fh_to_dentry, |
213 | .fh_to_parent = isofs_fh_to_parent, | ||
223 | .get_parent = isofs_export_get_parent, | 214 | .get_parent = isofs_export_get_parent, |
224 | }; | 215 | }; |
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index a07e67b1ea7..f3213f9f89a 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
@@ -178,4 +178,4 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de, | |||
178 | extern const struct inode_operations isofs_dir_inode_operations; | 178 | extern const struct inode_operations isofs_dir_inode_operations; |
179 | extern const struct file_operations isofs_dir_operations; | 179 | extern const struct file_operations isofs_dir_operations; |
180 | extern const struct address_space_operations isofs_symlink_aops; | 180 | extern const struct address_space_operations isofs_symlink_aops; |
181 | extern struct export_operations isofs_export_ops; | 181 | extern const struct export_operations isofs_export_ops; |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index f0ec72b263f..8e2cf2cde18 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #ifndef _H_JFS_INODE | 18 | #ifndef _H_JFS_INODE |
19 | #define _H_JFS_INODE | 19 | #define _H_JFS_INODE |
20 | 20 | ||
21 | struct fid; | ||
22 | |||
21 | extern struct inode *ialloc(struct inode *, umode_t); | 23 | extern struct inode *ialloc(struct inode *, umode_t); |
22 | extern int jfs_fsync(struct file *, struct dentry *, int); | 24 | extern int jfs_fsync(struct file *, struct dentry *, int); |
23 | extern int jfs_ioctl(struct inode *, struct file *, | 25 | extern int jfs_ioctl(struct inode *, struct file *, |
@@ -32,7 +34,10 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); | |||
32 | extern void jfs_free_zero_link(struct inode *); | 34 | extern void jfs_free_zero_link(struct inode *); |
33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); | 35 | extern struct dentry *jfs_get_parent(struct dentry *dentry); |
34 | extern void jfs_get_inode_flags(struct jfs_inode_info *); | 36 | extern void jfs_get_inode_flags(struct jfs_inode_info *); |
35 | extern struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp); | 37 | extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
38 | int fh_len, int fh_type); | ||
39 | extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
40 | int fh_len, int fh_type); | ||
36 | extern void jfs_set_inode_flags(struct inode *); | 41 | extern void jfs_set_inode_flags(struct inode *); |
37 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 42 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
38 | 43 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 932797ba433..4e0a8493cef 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
22 | #include <linux/quotaops.h> | 22 | #include <linux/quotaops.h> |
23 | #include <linux/exportfs.h> | ||
23 | #include "jfs_incore.h" | 24 | #include "jfs_incore.h" |
24 | #include "jfs_superblock.h" | 25 | #include "jfs_superblock.h" |
25 | #include "jfs_inode.h" | 26 | #include "jfs_inode.h" |
@@ -1477,13 +1478,10 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc | |||
1477 | return dentry; | 1478 | return dentry; |
1478 | } | 1479 | } |
1479 | 1480 | ||
1480 | struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) | 1481 | static struct inode *jfs_nfs_get_inode(struct super_block *sb, |
1482 | u64 ino, u32 generation) | ||
1481 | { | 1483 | { |
1482 | __u32 *objp = vobjp; | ||
1483 | unsigned long ino = objp[0]; | ||
1484 | __u32 generation = objp[1]; | ||
1485 | struct inode *inode; | 1484 | struct inode *inode; |
1486 | struct dentry *result; | ||
1487 | 1485 | ||
1488 | if (ino == 0) | 1486 | if (ino == 0) |
1489 | return ERR_PTR(-ESTALE); | 1487 | return ERR_PTR(-ESTALE); |
@@ -1493,20 +1491,25 @@ struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) | |||
1493 | 1491 | ||
1494 | if (is_bad_inode(inode) || | 1492 | if (is_bad_inode(inode) || |
1495 | (generation && inode->i_generation != generation)) { | 1493 | (generation && inode->i_generation != generation)) { |
1496 | result = ERR_PTR(-ESTALE); | 1494 | iput(inode); |
1497 | goto out_iput; | 1495 | return ERR_PTR(-ESTALE); |
1498 | } | 1496 | } |
1499 | 1497 | ||
1500 | result = d_alloc_anon(inode); | 1498 | return inode; |
1501 | if (!result) { | 1499 | } |
1502 | result = ERR_PTR(-ENOMEM); | ||
1503 | goto out_iput; | ||
1504 | } | ||
1505 | return result; | ||
1506 | 1500 | ||
1507 | out_iput: | 1501 | struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1508 | iput(inode); | 1502 | int fh_len, int fh_type) |
1509 | return result; | 1503 | { |
1504 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
1505 | jfs_nfs_get_inode); | ||
1506 | } | ||
1507 | |||
1508 | struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
1509 | int fh_len, int fh_type) | ||
1510 | { | ||
1511 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
1512 | jfs_nfs_get_inode); | ||
1510 | } | 1513 | } |
1511 | 1514 | ||
1512 | struct dentry *jfs_get_parent(struct dentry *dentry) | 1515 | struct dentry *jfs_get_parent(struct dentry *dentry) |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index cff60c17194..314bb4ff1ba 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -48,7 +48,7 @@ MODULE_LICENSE("GPL"); | |||
48 | static struct kmem_cache * jfs_inode_cachep; | 48 | static struct kmem_cache * jfs_inode_cachep; |
49 | 49 | ||
50 | static const struct super_operations jfs_super_operations; | 50 | static const struct super_operations jfs_super_operations; |
51 | static struct export_operations jfs_export_operations; | 51 | static const struct export_operations jfs_export_operations; |
52 | static struct file_system_type jfs_fs_type; | 52 | static struct file_system_type jfs_fs_type; |
53 | 53 | ||
54 | #define MAX_COMMIT_THREADS 64 | 54 | #define MAX_COMMIT_THREADS 64 |
@@ -737,8 +737,9 @@ static const struct super_operations jfs_super_operations = { | |||
737 | #endif | 737 | #endif |
738 | }; | 738 | }; |
739 | 739 | ||
740 | static struct export_operations jfs_export_operations = { | 740 | static const struct export_operations jfs_export_operations = { |
741 | .get_dentry = jfs_get_dentry, | 741 | .fh_to_dentry = jfs_fh_to_dentry, |
742 | .fh_to_parent = jfs_fh_to_parent, | ||
742 | .get_parent = jfs_get_parent, | 743 | .get_parent = jfs_get_parent, |
743 | }; | 744 | }; |
744 | 745 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index ae51481e45e..6e68b700958 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mount.h> | 8 | #include <linux/mount.h> |
9 | #include <linux/vfs.h> | 9 | #include <linux/vfs.h> |
10 | #include <linux/mutex.h> | 10 | #include <linux/mutex.h> |
11 | #include <linux/exportfs.h> | ||
11 | 12 | ||
12 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
13 | 14 | ||
@@ -678,6 +679,93 @@ out: | |||
678 | return ret; | 679 | return ret; |
679 | } | 680 | } |
680 | 681 | ||
682 | /* | ||
683 | * This is what d_alloc_anon should have been. Once the exportfs | ||
684 | * argument transition has been finished I will update d_alloc_anon | ||
685 | * to this prototype and this wrapper will go away. --hch | ||
686 | */ | ||
687 | static struct dentry *exportfs_d_alloc(struct inode *inode) | ||
688 | { | ||
689 | struct dentry *dentry; | ||
690 | |||
691 | if (!inode) | ||
692 | return NULL; | ||
693 | if (IS_ERR(inode)) | ||
694 | return ERR_PTR(PTR_ERR(inode)); | ||
695 | |||
696 | dentry = d_alloc_anon(inode); | ||
697 | if (!dentry) { | ||
698 | iput(inode); | ||
699 | dentry = ERR_PTR(-ENOMEM); | ||
700 | } | ||
701 | return dentry; | ||
702 | } | ||
703 | |||
704 | /** | ||
705 | * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation | ||
706 | * @sb: filesystem to do the file handle conversion on | ||
707 | * @fid: file handle to convert | ||
708 | * @fh_len: length of the file handle in bytes | ||
709 | * @fh_type: type of file handle | ||
710 | * @get_inode: filesystem callback to retrieve inode | ||
711 | * | ||
712 | * This function decodes @fid as long as it has one of the well-known | ||
713 | * Linux filehandle types and calls @get_inode on it to retrieve the | ||
714 | * inode for the object specified in the file handle. | ||
715 | */ | ||
716 | struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
717 | int fh_len, int fh_type, struct inode *(*get_inode) | ||
718 | (struct super_block *sb, u64 ino, u32 gen)) | ||
719 | { | ||
720 | struct inode *inode = NULL; | ||
721 | |||
722 | if (fh_len < 2) | ||
723 | return NULL; | ||
724 | |||
725 | switch (fh_type) { | ||
726 | case FILEID_INO32_GEN: | ||
727 | case FILEID_INO32_GEN_PARENT: | ||
728 | inode = get_inode(sb, fid->i32.ino, fid->i32.gen); | ||
729 | break; | ||
730 | } | ||
731 | |||
732 | return exportfs_d_alloc(inode); | ||
733 | } | ||
734 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); | ||
735 | |||
736 | /** | ||
737 | * generic_fh_to_dentry - generic helper for the fh_to_parent export operation | ||
738 | * @sb: filesystem to do the file handle conversion on | ||
739 | * @fid: file handle to convert | ||
740 | * @fh_len: length of the file handle in bytes | ||
741 | * @fh_type: type of file handle | ||
742 | * @get_inode: filesystem callback to retrieve inode | ||
743 | * | ||
744 | * This function decodes @fid as long as it has one of the well-known | ||
745 | * Linux filehandle types and calls @get_inode on it to retrieve the | ||
746 | * inode for the _parent_ object specified in the file handle if it | ||
747 | * is specified in the file handle, or NULL otherwise. | ||
748 | */ | ||
749 | struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
750 | int fh_len, int fh_type, struct inode *(*get_inode) | ||
751 | (struct super_block *sb, u64 ino, u32 gen)) | ||
752 | { | ||
753 | struct inode *inode = NULL; | ||
754 | |||
755 | if (fh_len <= 2) | ||
756 | return NULL; | ||
757 | |||
758 | switch (fh_type) { | ||
759 | case FILEID_INO32_GEN_PARENT: | ||
760 | inode = get_inode(sb, fid->i32.parent_ino, | ||
761 | (fh_len > 3 ? fid->i32.parent_gen : 0)); | ||
762 | break; | ||
763 | } | ||
764 | |||
765 | return exportfs_d_alloc(inode); | ||
766 | } | ||
767 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); | ||
768 | |||
681 | EXPORT_SYMBOL(dcache_dir_close); | 769 | EXPORT_SYMBOL(dcache_dir_close); |
682 | EXPORT_SYMBOL(dcache_dir_lseek); | 770 | EXPORT_SYMBOL(dcache_dir_lseek); |
683 | EXPORT_SYMBOL(dcache_dir_open); | 771 | EXPORT_SYMBOL(dcache_dir_open); |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 04b26672980..66d0aeb32a4 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -386,15 +386,13 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid) | |||
386 | dprintk("exp_export: export of non-dev fs without fsid\n"); | 386 | dprintk("exp_export: export of non-dev fs without fsid\n"); |
387 | return -EINVAL; | 387 | return -EINVAL; |
388 | } | 388 | } |
389 | if (!inode->i_sb->s_export_op) { | 389 | |
390 | if (!inode->i_sb->s_export_op || | ||
391 | !inode->i_sb->s_export_op->fh_to_dentry) { | ||
390 | dprintk("exp_export: export of invalid fs type.\n"); | 392 | dprintk("exp_export: export of invalid fs type.\n"); |
391 | return -EINVAL; | 393 | return -EINVAL; |
392 | } | 394 | } |
393 | 395 | ||
394 | /* Ok, we can export it */; | ||
395 | if (!inode->i_sb->s_export_op->find_exported_dentry) | ||
396 | inode->i_sb->s_export_op->find_exported_dentry = | ||
397 | find_exported_dentry; | ||
398 | return 0; | 396 | return 0; |
399 | 397 | ||
400 | } | 398 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 7011d62acfc..4f712e97058 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -115,8 +115,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
115 | dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); | 115 | dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); |
116 | 116 | ||
117 | if (!fhp->fh_dentry) { | 117 | if (!fhp->fh_dentry) { |
118 | __u32 *datap=NULL; | 118 | struct fid *fid = NULL, sfid; |
119 | __u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */ | ||
120 | int fileid_type; | 119 | int fileid_type; |
121 | int data_left = fh->fh_size/4; | 120 | int data_left = fh->fh_size/4; |
122 | 121 | ||
@@ -128,7 +127,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
128 | 127 | ||
129 | if (fh->fh_version == 1) { | 128 | if (fh->fh_version == 1) { |
130 | int len; | 129 | int len; |
131 | datap = fh->fh_auth; | ||
132 | if (--data_left<0) goto out; | 130 | if (--data_left<0) goto out; |
133 | switch (fh->fh_auth_type) { | 131 | switch (fh->fh_auth_type) { |
134 | case 0: break; | 132 | case 0: break; |
@@ -144,9 +142,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
144 | fh->fh_fsid[1] = fh->fh_fsid[2]; | 142 | fh->fh_fsid[1] = fh->fh_fsid[2]; |
145 | } | 143 | } |
146 | if ((data_left -= len)<0) goto out; | 144 | if ((data_left -= len)<0) goto out; |
147 | exp = rqst_exp_find(rqstp, fh->fh_fsid_type, datap); | 145 | exp = rqst_exp_find(rqstp, fh->fh_fsid_type, |
148 | datap += len; | 146 | fh->fh_auth); |
147 | fid = (struct fid *)(fh->fh_auth + len); | ||
149 | } else { | 148 | } else { |
149 | __u32 tfh[2]; | ||
150 | dev_t xdev; | 150 | dev_t xdev; |
151 | ino_t xino; | 151 | ino_t xino; |
152 | if (fh->fh_size != NFS_FHSIZE) | 152 | if (fh->fh_size != NFS_FHSIZE) |
@@ -190,22 +190,22 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
190 | error = nfserr_badhandle; | 190 | error = nfserr_badhandle; |
191 | 191 | ||
192 | if (fh->fh_version != 1) { | 192 | if (fh->fh_version != 1) { |
193 | tfh[0] = fh->ofh_ino; | 193 | sfid.i32.ino = fh->ofh_ino; |
194 | tfh[1] = fh->ofh_generation; | 194 | sfid.i32.gen = fh->ofh_generation; |
195 | tfh[2] = fh->ofh_dirino; | 195 | sfid.i32.parent_ino = fh->ofh_dirino; |
196 | datap = tfh; | 196 | fid = &sfid; |
197 | data_left = 3; | 197 | data_left = 3; |
198 | if (fh->ofh_dirino == 0) | 198 | if (fh->ofh_dirino == 0) |
199 | fileid_type = 1; | 199 | fileid_type = FILEID_INO32_GEN; |
200 | else | 200 | else |
201 | fileid_type = 2; | 201 | fileid_type = FILEID_INO32_GEN_PARENT; |
202 | } else | 202 | } else |
203 | fileid_type = fh->fh_fileid_type; | 203 | fileid_type = fh->fh_fileid_type; |
204 | 204 | ||
205 | if (fileid_type == 0) | 205 | if (fileid_type == FILEID_ROOT) |
206 | dentry = dget(exp->ex_dentry); | 206 | dentry = dget(exp->ex_dentry); |
207 | else { | 207 | else { |
208 | dentry = exportfs_decode_fh(exp->ex_mnt, datap, | 208 | dentry = exportfs_decode_fh(exp->ex_mnt, fid, |
209 | data_left, fileid_type, | 209 | data_left, fileid_type, |
210 | nfsd_acceptable, exp); | 210 | nfsd_acceptable, exp); |
211 | } | 211 | } |
@@ -286,16 +286,21 @@ out: | |||
286 | * an inode. In this case a call to fh_update should be made | 286 | * an inode. In this case a call to fh_update should be made |
287 | * before the fh goes out on the wire ... | 287 | * before the fh goes out on the wire ... |
288 | */ | 288 | */ |
289 | static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, | 289 | static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, |
290 | __u32 *datap, int *maxsize) | 290 | struct dentry *dentry) |
291 | { | 291 | { |
292 | if (dentry == exp->ex_dentry) { | 292 | if (dentry != exp->ex_dentry) { |
293 | *maxsize = 0; | 293 | struct fid *fid = (struct fid *) |
294 | return 0; | 294 | (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); |
295 | } | 295 | int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; |
296 | int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); | ||
296 | 297 | ||
297 | return exportfs_encode_fh(dentry, datap, maxsize, | 298 | fhp->fh_handle.fh_fileid_type = |
298 | !(exp->ex_flags & NFSEXP_NOSUBTREECHECK)); | 299 | exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck); |
300 | fhp->fh_handle.fh_size += maxsize * 4; | ||
301 | } else { | ||
302 | fhp->fh_handle.fh_fileid_type = FILEID_ROOT; | ||
303 | } | ||
299 | } | 304 | } |
300 | 305 | ||
301 | /* | 306 | /* |
@@ -457,12 +462,8 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
457 | datap += len/4; | 462 | datap += len/4; |
458 | fhp->fh_handle.fh_size = 4 + len; | 463 | fhp->fh_handle.fh_size = 4 + len; |
459 | 464 | ||
460 | if (inode) { | 465 | if (inode) |
461 | int size = (fhp->fh_maxsize-len-4)/4; | 466 | _fh_update(fhp, exp, dentry); |
462 | fhp->fh_handle.fh_fileid_type = | ||
463 | _fh_update(dentry, exp, datap, &size); | ||
464 | fhp->fh_handle.fh_size += size*4; | ||
465 | } | ||
466 | if (fhp->fh_handle.fh_fileid_type == 255) | 467 | if (fhp->fh_handle.fh_fileid_type == 255) |
467 | return nfserr_opnotsupp; | 468 | return nfserr_opnotsupp; |
468 | } | 469 | } |
@@ -479,7 +480,6 @@ __be32 | |||
479 | fh_update(struct svc_fh *fhp) | 480 | fh_update(struct svc_fh *fhp) |
480 | { | 481 | { |
481 | struct dentry *dentry; | 482 | struct dentry *dentry; |
482 | __u32 *datap; | ||
483 | 483 | ||
484 | if (!fhp->fh_dentry) | 484 | if (!fhp->fh_dentry) |
485 | goto out_bad; | 485 | goto out_bad; |
@@ -490,15 +490,10 @@ fh_update(struct svc_fh *fhp) | |||
490 | if (fhp->fh_handle.fh_version != 1) { | 490 | if (fhp->fh_handle.fh_version != 1) { |
491 | _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); | 491 | _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); |
492 | } else { | 492 | } else { |
493 | int size; | 493 | if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) |
494 | if (fhp->fh_handle.fh_fileid_type != 0) | ||
495 | goto out; | 494 | goto out; |
496 | datap = fhp->fh_handle.fh_auth+ | 495 | |
497 | fhp->fh_handle.fh_size/4 -1; | 496 | _fh_update(fhp, fhp->fh_export, dentry); |
498 | size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; | ||
499 | fhp->fh_handle.fh_fileid_type = | ||
500 | _fh_update(dentry, fhp->fh_export, datap, &size); | ||
501 | fhp->fh_handle.fh_size += size*4; | ||
502 | if (fhp->fh_handle.fh_fileid_type == 255) | 497 | if (fhp->fh_handle.fh_fileid_type == 255) |
503 | return nfserr_opnotsupp; | 498 | return nfserr_opnotsupp; |
504 | } | 499 | } |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e93c6142b23..e1781c8b165 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
@@ -450,58 +450,40 @@ try_next: | |||
450 | return parent_dent; | 450 | return parent_dent; |
451 | } | 451 | } |
452 | 452 | ||
453 | /** | 453 | static struct inode *ntfs_nfs_get_inode(struct super_block *sb, |
454 | * ntfs_get_dentry - find a dentry for the inode from a file handle sub-fragment | 454 | u64 ino, u32 generation) |
455 | * @sb: super block identifying the mounted ntfs volume | ||
456 | * @fh: the file handle sub-fragment | ||
457 | * | ||
458 | * Find a dentry for the inode given a file handle sub-fragment. This function | ||
459 | * is called from fs/exportfs/expfs.c::find_exported_dentry() which in turn is | ||
460 | * called from the default ->decode_fh() which is export_decode_fh() in the | ||
461 | * same file. The code is closely based on the default ->get_dentry() helper | ||
462 | * fs/exportfs/expfs.c::get_object(). | ||
463 | * | ||
464 | * The @fh contains two 32-bit unsigned values, the first one is the inode | ||
465 | * number and the second one is the inode generation. | ||
466 | * | ||
467 | * Return the dentry on success or the error code on error (IS_ERR() is true). | ||
468 | */ | ||
469 | static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) | ||
470 | { | 455 | { |
471 | struct inode *vi; | 456 | struct inode *inode; |
472 | struct dentry *dent; | ||
473 | unsigned long ino = ((u32 *)fh)[0]; | ||
474 | u32 gen = ((u32 *)fh)[1]; | ||
475 | 457 | ||
476 | ntfs_debug("Entering for inode 0x%lx, generation 0x%x.", ino, gen); | 458 | inode = ntfs_iget(sb, ino); |
477 | vi = ntfs_iget(sb, ino); | 459 | if (!IS_ERR(inode)) { |
478 | if (IS_ERR(vi)) { | 460 | if (is_bad_inode(inode) || inode->i_generation != generation) { |
479 | ntfs_error(sb, "Failed to get inode 0x%lx.", ino); | 461 | iput(inode); |
480 | return (struct dentry *)vi; | 462 | inode = ERR_PTR(-ESTALE); |
481 | } | 463 | } |
482 | if (unlikely(is_bad_inode(vi) || vi->i_generation != gen)) { | ||
483 | /* We didn't find the right inode. */ | ||
484 | ntfs_error(sb, "Inode 0x%lx, bad count: %d %d or version 0x%x " | ||
485 | "0x%x.", vi->i_ino, vi->i_nlink, | ||
486 | atomic_read(&vi->i_count), vi->i_generation, | ||
487 | gen); | ||
488 | iput(vi); | ||
489 | return ERR_PTR(-ESTALE); | ||
490 | } | ||
491 | /* Now find a dentry. If possible, get a well-connected one. */ | ||
492 | dent = d_alloc_anon(vi); | ||
493 | if (unlikely(!dent)) { | ||
494 | iput(vi); | ||
495 | return ERR_PTR(-ENOMEM); | ||
496 | } | 464 | } |
497 | ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); | 465 | |
498 | return dent; | 466 | return inode; |
467 | } | ||
468 | |||
469 | static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
470 | int fh_len, int fh_type) | ||
471 | { | ||
472 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
473 | ntfs_nfs_get_inode); | ||
474 | } | ||
475 | |||
476 | static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
477 | int fh_len, int fh_type) | ||
478 | { | ||
479 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
480 | ntfs_nfs_get_inode); | ||
499 | } | 481 | } |
500 | 482 | ||
501 | /** | 483 | /** |
502 | * Export operations allowing NFS exporting of mounted NTFS partitions. | 484 | * Export operations allowing NFS exporting of mounted NTFS partitions. |
503 | * | 485 | * |
504 | * We use the default ->decode_fh() and ->encode_fh() for now. Note that they | 486 | * We use the default ->encode_fh() for now. Note that they |
505 | * use 32 bits to store the inode number which is an unsigned long so on 64-bit | 487 | * use 32 bits to store the inode number which is an unsigned long so on 64-bit |
506 | * architectures is usually 64 bits so it would all fail horribly on huge | 488 | * architectures is usually 64 bits so it would all fail horribly on huge |
507 | * volumes. I guess we need to define our own encode and decode fh functions | 489 | * volumes. I guess we need to define our own encode and decode fh functions |
@@ -517,10 +499,9 @@ static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) | |||
517 | * allowing the inode number 0 which is used in NTFS for the system file $MFT | 499 | * allowing the inode number 0 which is used in NTFS for the system file $MFT |
518 | * and due to using iget() whereas NTFS needs ntfs_iget(). | 500 | * and due to using iget() whereas NTFS needs ntfs_iget(). |
519 | */ | 501 | */ |
520 | struct export_operations ntfs_export_ops = { | 502 | const struct export_operations ntfs_export_ops = { |
521 | .get_parent = ntfs_get_parent, /* Find the parent of a given | 503 | .get_parent = ntfs_get_parent, /* Find the parent of a given |
522 | directory. */ | 504 | directory. */ |
523 | .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode | 505 | .fh_to_dentry = ntfs_fh_to_dentry, |
524 | given a file handle | 506 | .fh_to_parent = ntfs_fh_to_parent, |
525 | sub-fragment. */ | ||
526 | }; | 507 | }; |
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index d73f5a9ac34..d6a340bf80f 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h | |||
@@ -69,7 +69,7 @@ extern const struct inode_operations ntfs_dir_inode_ops; | |||
69 | extern const struct file_operations ntfs_empty_file_ops; | 69 | extern const struct file_operations ntfs_empty_file_ops; |
70 | extern const struct inode_operations ntfs_empty_inode_ops; | 70 | extern const struct inode_operations ntfs_empty_inode_ops; |
71 | 71 | ||
72 | extern struct export_operations ntfs_export_ops; | 72 | extern const struct export_operations ntfs_export_ops; |
73 | 73 | ||
74 | /** | 74 | /** |
75 | * NTFS_SB - return the ntfs volume given a vfs super block | 75 | * NTFS_SB - return the ntfs volume given a vfs super block |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index c3bbc198f9c..535bfa9568a 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -45,9 +45,9 @@ struct ocfs2_inode_handle | |||
45 | u32 ih_generation; | 45 | u32 ih_generation; |
46 | }; | 46 | }; |
47 | 47 | ||
48 | static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) | 48 | static struct dentry *ocfs2_get_dentry(struct super_block *sb, |
49 | struct ocfs2_inode_handle *handle) | ||
49 | { | 50 | { |
50 | struct ocfs2_inode_handle *handle = vobjp; | ||
51 | struct inode *inode; | 51 | struct inode *inode; |
52 | struct dentry *result; | 52 | struct dentry *result; |
53 | 53 | ||
@@ -194,54 +194,37 @@ bail: | |||
194 | return type; | 194 | return type; |
195 | } | 195 | } |
196 | 196 | ||
197 | static struct dentry *ocfs2_decode_fh(struct super_block *sb, u32 *fh_in, | 197 | static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb, |
198 | int fh_len, int fileid_type, | 198 | struct fid *fid, int fh_len, int fh_type) |
199 | int (*acceptable)(void *context, | ||
200 | struct dentry *de), | ||
201 | void *context) | ||
202 | { | 199 | { |
203 | struct ocfs2_inode_handle handle, parent; | 200 | struct ocfs2_inode_handle handle; |
204 | struct dentry *ret = NULL; | ||
205 | __le32 *fh = (__force __le32 *) fh_in; | ||
206 | |||
207 | mlog_entry("(0x%p, 0x%p, %d, %d, 0x%p, 0x%p)\n", | ||
208 | sb, fh, fh_len, fileid_type, acceptable, context); | ||
209 | |||
210 | if (fh_len < 3 || fileid_type > 2) | ||
211 | goto bail; | ||
212 | |||
213 | if (fileid_type == 2) { | ||
214 | if (fh_len < 6) | ||
215 | goto bail; | ||
216 | |||
217 | parent.ih_blkno = (u64)le32_to_cpu(fh[3]) << 32; | ||
218 | parent.ih_blkno |= (u64)le32_to_cpu(fh[4]); | ||
219 | parent.ih_generation = le32_to_cpu(fh[5]); | ||
220 | 201 | ||
221 | mlog(0, "Decoding parent: blkno: %llu, generation: %u\n", | 202 | if (fh_len < 3 || fh_type > 2) |
222 | (unsigned long long)parent.ih_blkno, | 203 | return NULL; |
223 | parent.ih_generation); | ||
224 | } | ||
225 | 204 | ||
226 | handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32; | 205 | handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32; |
227 | handle.ih_blkno |= (u64)le32_to_cpu(fh[1]); | 206 | handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]); |
228 | handle.ih_generation = le32_to_cpu(fh[2]); | 207 | handle.ih_generation = le32_to_cpu(fid->raw[2]); |
208 | return ocfs2_get_dentry(sb, &handle); | ||
209 | } | ||
229 | 210 | ||
230 | mlog(0, "Encoding fh: blkno: %llu, generation: %u\n", | 211 | static struct dentry *ocfs2_fh_to_parent(struct super_block *sb, |
231 | (unsigned long long)handle.ih_blkno, handle.ih_generation); | 212 | struct fid *fid, int fh_len, int fh_type) |
213 | { | ||
214 | struct ocfs2_inode_handle parent; | ||
232 | 215 | ||
233 | ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent, | 216 | if (fh_type != 2 || fh_len < 6) |
234 | acceptable, context); | 217 | return NULL; |
235 | 218 | ||
236 | bail: | 219 | parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32; |
237 | mlog_exit_ptr(ret); | 220 | parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]); |
238 | return ret; | 221 | parent.ih_generation = le32_to_cpu(fid->raw[5]); |
222 | return ocfs2_get_dentry(sb, &parent); | ||
239 | } | 223 | } |
240 | 224 | ||
241 | struct export_operations ocfs2_export_ops = { | 225 | const struct export_operations ocfs2_export_ops = { |
242 | .decode_fh = ocfs2_decode_fh, | ||
243 | .encode_fh = ocfs2_encode_fh, | 226 | .encode_fh = ocfs2_encode_fh, |
244 | 227 | .fh_to_dentry = ocfs2_fh_to_dentry, | |
228 | .fh_to_parent = ocfs2_fh_to_parent, | ||
245 | .get_parent = ocfs2_get_parent, | 229 | .get_parent = ocfs2_get_parent, |
246 | .get_dentry = ocfs2_get_dentry, | ||
247 | }; | 230 | }; |
diff --git a/fs/ocfs2/export.h b/fs/ocfs2/export.h index e08bed9e45a..41a738678c3 100644 --- a/fs/ocfs2/export.h +++ b/fs/ocfs2/export.h | |||
@@ -28,6 +28,6 @@ | |||
28 | 28 | ||
29 | #include <linux/exportfs.h> | 29 | #include <linux/exportfs.h> |
30 | 30 | ||
31 | extern struct export_operations ocfs2_export_ops; | 31 | extern const struct export_operations ocfs2_export_ops; |
32 | 32 | ||
33 | #endif /* OCFS2_EXPORT_H */ | 33 | #endif /* OCFS2_EXPORT_H */ |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a991af96f3f..231fd5ccadc 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1515,19 +1515,20 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) | |||
1515 | return inode; | 1515 | return inode; |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) | 1518 | static struct dentry *reiserfs_get_dentry(struct super_block *sb, |
1519 | u32 objectid, u32 dir_id, u32 generation) | ||
1520 | |||
1519 | { | 1521 | { |
1520 | __u32 *data = vobjp; | ||
1521 | struct cpu_key key; | 1522 | struct cpu_key key; |
1522 | struct dentry *result; | 1523 | struct dentry *result; |
1523 | struct inode *inode; | 1524 | struct inode *inode; |
1524 | 1525 | ||
1525 | key.on_disk_key.k_objectid = data[0]; | 1526 | key.on_disk_key.k_objectid = objectid; |
1526 | key.on_disk_key.k_dir_id = data[1]; | 1527 | key.on_disk_key.k_dir_id = dir_id; |
1527 | reiserfs_write_lock(sb); | 1528 | reiserfs_write_lock(sb); |
1528 | inode = reiserfs_iget(sb, &key); | 1529 | inode = reiserfs_iget(sb, &key); |
1529 | if (inode && !IS_ERR(inode) && data[2] != 0 && | 1530 | if (inode && !IS_ERR(inode) && generation != 0 && |
1530 | data[2] != inode->i_generation) { | 1531 | generation != inode->i_generation) { |
1531 | iput(inode); | 1532 | iput(inode); |
1532 | inode = NULL; | 1533 | inode = NULL; |
1533 | } | 1534 | } |
@@ -1544,14 +1545,9 @@ struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) | |||
1544 | return result; | 1545 | return result; |
1545 | } | 1546 | } |
1546 | 1547 | ||
1547 | struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | 1548 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1548 | int len, int fhtype, | 1549 | int fh_len, int fh_type) |
1549 | int (*acceptable) (void *contect, | ||
1550 | struct dentry * de), | ||
1551 | void *context) | ||
1552 | { | 1550 | { |
1553 | __u32 obj[3], parent[3]; | ||
1554 | |||
1555 | /* fhtype happens to reflect the number of u32s encoded. | 1551 | /* fhtype happens to reflect the number of u32s encoded. |
1556 | * due to a bug in earlier code, fhtype might indicate there | 1552 | * due to a bug in earlier code, fhtype might indicate there |
1557 | * are more u32s then actually fitted. | 1553 | * are more u32s then actually fitted. |
@@ -1564,32 +1560,28 @@ struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | |||
1564 | * 6 - as above plus generation of directory | 1560 | * 6 - as above plus generation of directory |
1565 | * 6 does not fit in NFSv2 handles | 1561 | * 6 does not fit in NFSv2 handles |
1566 | */ | 1562 | */ |
1567 | if (fhtype > len) { | 1563 | if (fh_type > fh_len) { |
1568 | if (fhtype != 6 || len != 5) | 1564 | if (fh_type != 6 || fh_len != 5) |
1569 | reiserfs_warning(sb, | 1565 | reiserfs_warning(sb, |
1570 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", | 1566 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", |
1571 | fhtype, len); | 1567 | fh_type, fh_len); |
1572 | fhtype = 5; | 1568 | fh_type = 5; |
1573 | } | 1569 | } |
1574 | 1570 | ||
1575 | obj[0] = data[0]; | 1571 | return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], |
1576 | obj[1] = data[1]; | 1572 | (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); |
1577 | if (fhtype == 3 || fhtype >= 5) | 1573 | } |
1578 | obj[2] = data[2]; | ||
1579 | else | ||
1580 | obj[2] = 0; /* generation number */ | ||
1581 | 1574 | ||
1582 | if (fhtype >= 4) { | 1575 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
1583 | parent[0] = data[fhtype >= 5 ? 3 : 2]; | 1576 | int fh_len, int fh_type) |
1584 | parent[1] = data[fhtype >= 5 ? 4 : 3]; | 1577 | { |
1585 | if (fhtype == 6) | 1578 | if (fh_type < 4) |
1586 | parent[2] = data[5]; | 1579 | return NULL; |
1587 | else | 1580 | |
1588 | parent[2] = 0; | 1581 | return reiserfs_get_dentry(sb, |
1589 | } | 1582 | (fh_type >= 5) ? fid->raw[3] : fid->raw[2], |
1590 | return sb->s_export_op->find_exported_dentry(sb, obj, | 1583 | (fh_type >= 5) ? fid->raw[4] : fid->raw[3], |
1591 | fhtype < 4 ? NULL : parent, | 1584 | (fh_type == 6) ? fid->raw[5] : 0); |
1592 | acceptable, context); | ||
1593 | } | 1585 | } |
1594 | 1586 | ||
1595 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1587 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 98c3781bc06..5cd85fe5df5 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -661,11 +661,11 @@ static struct quotactl_ops reiserfs_qctl_operations = { | |||
661 | }; | 661 | }; |
662 | #endif | 662 | #endif |
663 | 663 | ||
664 | static struct export_operations reiserfs_export_ops = { | 664 | static const struct export_operations reiserfs_export_ops = { |
665 | .encode_fh = reiserfs_encode_fh, | 665 | .encode_fh = reiserfs_encode_fh, |
666 | .decode_fh = reiserfs_decode_fh, | 666 | .fh_to_dentry = reiserfs_fh_to_dentry, |
667 | .fh_to_parent = reiserfs_fh_to_parent, | ||
667 | .get_parent = reiserfs_get_parent, | 668 | .get_parent = reiserfs_get_parent, |
668 | .get_dentry = reiserfs_get_dentry, | ||
669 | }; | 669 | }; |
670 | 670 | ||
671 | /* this struct is used in reiserfs_getopt () for containing the value for those | 671 | /* this struct is used in reiserfs_getopt () for containing the value for those |
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3586c7a28d2..15bd4948832 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -33,62 +33,25 @@ | |||
33 | static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; | 33 | static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * XFS encodes and decodes the fileid portion of NFS filehandles | 36 | * Note that we only accept fileids which are long enough rather than allow |
37 | * itself instead of letting the generic NFS code do it. This | 37 | * the parent generation number to default to zero. XFS considers zero a |
38 | * allows filesystems with 64 bit inode numbers to be exported. | 38 | * valid generation number not an invalid/wildcard value. |
39 | * | ||
40 | * Note that a side effect is that xfs_vget() won't be passed a | ||
41 | * zero inode/generation pair under normal circumstances. As | ||
42 | * however a malicious client could send us such data, the check | ||
43 | * remains in that code. | ||
44 | */ | 39 | */ |
45 | 40 | static int xfs_fileid_length(int fileid_type) | |
46 | STATIC struct dentry * | ||
47 | xfs_fs_decode_fh( | ||
48 | struct super_block *sb, | ||
49 | __u32 *fh, | ||
50 | int fh_len, | ||
51 | int fileid_type, | ||
52 | int (*acceptable)( | ||
53 | void *context, | ||
54 | struct dentry *de), | ||
55 | void *context) | ||
56 | { | 41 | { |
57 | xfs_fid_t ifid; | 42 | switch (fileid_type) { |
58 | xfs_fid_t pfid; | 43 | case FILEID_INO32_GEN: |
59 | void *parent = NULL; | 44 | return 2; |
60 | int is64 = 0; | 45 | case FILEID_INO32_GEN_PARENT: |
61 | __u32 *p = fh; | 46 | return 4; |
62 | 47 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | |
63 | #if XFS_BIG_INUMS | 48 | return 3; |
64 | is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG); | 49 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: |
65 | fileid_type &= ~XFS_FILEID_TYPE_64FLAG; | 50 | return 6; |
66 | #endif | ||
67 | |||
68 | /* | ||
69 | * Note that we only accept fileids which are long enough | ||
70 | * rather than allow the parent generation number to default | ||
71 | * to zero. XFS considers zero a valid generation number not | ||
72 | * an invalid/wildcard value. There's little point printk'ing | ||
73 | * a warning here as we don't have the client information | ||
74 | * which would make such a warning useful. | ||
75 | */ | ||
76 | if (fileid_type > 2 || | ||
77 | fh_len < xfs_fileid_length((fileid_type == 2), is64)) | ||
78 | return NULL; | ||
79 | |||
80 | p = xfs_fileid_decode_fid2(p, &ifid, is64); | ||
81 | |||
82 | if (fileid_type == 2) { | ||
83 | p = xfs_fileid_decode_fid2(p, &pfid, is64); | ||
84 | parent = &pfid; | ||
85 | } | 51 | } |
86 | 52 | return 255; /* invalid */ | |
87 | fh = (__u32 *)&ifid; | ||
88 | return sb->s_export_op->find_exported_dentry(sb, fh, parent, acceptable, context); | ||
89 | } | 53 | } |
90 | 54 | ||
91 | |||
92 | STATIC int | 55 | STATIC int |
93 | xfs_fs_encode_fh( | 56 | xfs_fs_encode_fh( |
94 | struct dentry *dentry, | 57 | struct dentry *dentry, |
@@ -96,21 +59,21 @@ xfs_fs_encode_fh( | |||
96 | int *max_len, | 59 | int *max_len, |
97 | int connectable) | 60 | int connectable) |
98 | { | 61 | { |
62 | struct fid *fid = (struct fid *)fh; | ||
63 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; | ||
99 | struct inode *inode = dentry->d_inode; | 64 | struct inode *inode = dentry->d_inode; |
100 | int type = 1; | 65 | int fileid_type; |
101 | __u32 *p = fh; | ||
102 | int len; | 66 | int len; |
103 | int is64 = 0; | ||
104 | #if XFS_BIG_INUMS | ||
105 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) { | ||
106 | /* filesystem may contain 64bit inode numbers */ | ||
107 | is64 = XFS_FILEID_TYPE_64FLAG; | ||
108 | } | ||
109 | #endif | ||
110 | 67 | ||
111 | /* Directories don't need their parent encoded, they have ".." */ | 68 | /* Directories don't need their parent encoded, they have ".." */ |
112 | if (S_ISDIR(inode->i_mode)) | 69 | if (S_ISDIR(inode->i_mode)) |
113 | connectable = 0; | 70 | fileid_type = FILEID_INO32_GEN; |
71 | else | ||
72 | fileid_type = FILEID_INO32_GEN_PARENT; | ||
73 | |||
74 | /* filesystem may contain 64bit inode numbers */ | ||
75 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | ||
76 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | ||
114 | 77 | ||
115 | /* | 78 | /* |
116 | * Only encode if there is enough space given. In practice | 79 | * Only encode if there is enough space given. In practice |
@@ -118,39 +81,118 @@ xfs_fs_encode_fh( | |||
118 | * over NFSv2 with the subtree_check export option; the other | 81 | * over NFSv2 with the subtree_check export option; the other |
119 | * seven combinations work. The real answer is "don't use v2". | 82 | * seven combinations work. The real answer is "don't use v2". |
120 | */ | 83 | */ |
121 | len = xfs_fileid_length(connectable, is64); | 84 | len = xfs_fileid_length(fileid_type); |
122 | if (*max_len < len) | 85 | if (*max_len < len) |
123 | return 255; | 86 | return 255; |
124 | *max_len = len; | 87 | *max_len = len; |
125 | 88 | ||
126 | p = xfs_fileid_encode_inode(p, inode, is64); | 89 | switch (fileid_type) { |
127 | if (connectable) { | 90 | case FILEID_INO32_GEN_PARENT: |
128 | spin_lock(&dentry->d_lock); | 91 | spin_lock(&dentry->d_lock); |
129 | p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64); | 92 | fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; |
93 | fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; | ||
130 | spin_unlock(&dentry->d_lock); | 94 | spin_unlock(&dentry->d_lock); |
131 | type = 2; | 95 | /*FALLTHRU*/ |
96 | case FILEID_INO32_GEN: | ||
97 | fid->i32.ino = inode->i_ino; | ||
98 | fid->i32.gen = inode->i_generation; | ||
99 | break; | ||
100 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | ||
101 | spin_lock(&dentry->d_lock); | ||
102 | fid64->parent_ino = dentry->d_parent->d_inode->i_ino; | ||
103 | fid64->parent_gen = dentry->d_parent->d_inode->i_generation; | ||
104 | spin_unlock(&dentry->d_lock); | ||
105 | /*FALLTHRU*/ | ||
106 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | ||
107 | fid64->ino = inode->i_ino; | ||
108 | fid64->gen = inode->i_generation; | ||
109 | break; | ||
132 | } | 110 | } |
133 | BUG_ON((p - fh) != len); | 111 | |
134 | return type | is64; | 112 | return fileid_type; |
135 | } | 113 | } |
136 | 114 | ||
137 | STATIC struct dentry * | 115 | STATIC struct inode * |
138 | xfs_fs_get_dentry( | 116 | xfs_nfs_get_inode( |
139 | struct super_block *sb, | 117 | struct super_block *sb, |
140 | void *data) | 118 | u64 ino, |
141 | { | 119 | u32 generation) |
120 | { | ||
121 | xfs_fid_t xfid; | ||
142 | bhv_vnode_t *vp; | 122 | bhv_vnode_t *vp; |
143 | struct inode *inode; | ||
144 | struct dentry *result; | ||
145 | int error; | 123 | int error; |
146 | 124 | ||
147 | error = xfs_vget(XFS_M(sb), &vp, data); | 125 | xfid.fid_len = sizeof(xfs_fid_t) - sizeof(xfid.fid_len); |
148 | if (error || vp == NULL) | 126 | xfid.fid_pad = 0; |
149 | return ERR_PTR(-ESTALE) ; | 127 | xfid.fid_ino = ino; |
128 | xfid.fid_gen = generation; | ||
150 | 129 | ||
151 | inode = vn_to_inode(vp); | 130 | error = xfs_vget(XFS_M(sb), &vp, &xfid); |
131 | if (error) | ||
132 | return ERR_PTR(-error); | ||
133 | |||
134 | return vp ? vn_to_inode(vp) : NULL; | ||
135 | } | ||
136 | |||
137 | STATIC struct dentry * | ||
138 | xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, | ||
139 | int fh_len, int fileid_type) | ||
140 | { | ||
141 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | ||
142 | struct inode *inode = NULL; | ||
143 | struct dentry *result; | ||
144 | |||
145 | if (fh_len < xfs_fileid_length(fileid_type)) | ||
146 | return NULL; | ||
147 | |||
148 | switch (fileid_type) { | ||
149 | case FILEID_INO32_GEN_PARENT: | ||
150 | case FILEID_INO32_GEN: | ||
151 | inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); | ||
152 | break; | ||
153 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | ||
154 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | ||
155 | inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); | ||
156 | break; | ||
157 | } | ||
158 | |||
159 | if (!inode) | ||
160 | return NULL; | ||
161 | if (IS_ERR(inode)) | ||
162 | return ERR_PTR(PTR_ERR(inode)); | ||
163 | result = d_alloc_anon(inode); | ||
164 | if (!result) { | ||
165 | iput(inode); | ||
166 | return ERR_PTR(-ENOMEM); | ||
167 | } | ||
168 | return result; | ||
169 | } | ||
170 | |||
171 | STATIC struct dentry * | ||
172 | xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
173 | int fh_len, int fileid_type) | ||
174 | { | ||
175 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | ||
176 | struct inode *inode = NULL; | ||
177 | struct dentry *result; | ||
178 | |||
179 | switch (fileid_type) { | ||
180 | case FILEID_INO32_GEN_PARENT: | ||
181 | inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, | ||
182 | fid->i32.parent_gen); | ||
183 | break; | ||
184 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | ||
185 | inode = xfs_nfs_get_inode(sb, fid64->parent_ino, | ||
186 | fid64->parent_gen); | ||
187 | break; | ||
188 | } | ||
189 | |||
190 | if (!inode) | ||
191 | return NULL; | ||
192 | if (IS_ERR(inode)) | ||
193 | return ERR_PTR(PTR_ERR(inode)); | ||
152 | result = d_alloc_anon(inode); | 194 | result = d_alloc_anon(inode); |
153 | if (!result) { | 195 | if (!result) { |
154 | iput(inode); | 196 | iput(inode); |
155 | return ERR_PTR(-ENOMEM); | 197 | return ERR_PTR(-ENOMEM); |
156 | } | 198 | } |
@@ -178,9 +220,9 @@ xfs_fs_get_parent( | |||
178 | return parent; | 220 | return parent; |
179 | } | 221 | } |
180 | 222 | ||
181 | struct export_operations xfs_export_operations = { | 223 | const struct export_operations xfs_export_operations = { |
182 | .decode_fh = xfs_fs_decode_fh, | ||
183 | .encode_fh = xfs_fs_encode_fh, | 224 | .encode_fh = xfs_fs_encode_fh, |
225 | .fh_to_dentry = xfs_fs_fh_to_dentry, | ||
226 | .fh_to_parent = xfs_fs_fh_to_parent, | ||
184 | .get_parent = xfs_fs_get_parent, | 227 | .get_parent = xfs_fs_get_parent, |
185 | .get_dentry = xfs_fs_get_dentry, | ||
186 | }; | 228 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h index 2f36071a86f..3272b6ae7a3 100644 --- a/fs/xfs/linux-2.6/xfs_export.h +++ b/fs/xfs/linux-2.6/xfs_export.h | |||
@@ -59,50 +59,14 @@ | |||
59 | * a subdirectory) or use the "fsid" export option. | 59 | * a subdirectory) or use the "fsid" export option. |
60 | */ | 60 | */ |
61 | 61 | ||
62 | struct xfs_fid64 { | ||
63 | u64 ino; | ||
64 | u32 gen; | ||
65 | u64 parent_ino; | ||
66 | u32 parent_gen; | ||
67 | } __attribute__((packed)); | ||
68 | |||
62 | /* This flag goes on the wire. Don't play with it. */ | 69 | /* This flag goes on the wire. Don't play with it. */ |
63 | #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ | 70 | #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ |
64 | 71 | ||
65 | /* Calculate the length in u32 units of the fileid data */ | ||
66 | static inline int | ||
67 | xfs_fileid_length(int hasparent, int is64) | ||
68 | { | ||
69 | return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Decode encoded inode information (either for the inode itself | ||
74 | * or the parent) into an xfs_fid_t structure. Advances and | ||
75 | * returns the new data pointer | ||
76 | */ | ||
77 | static inline __u32 * | ||
78 | xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64) | ||
79 | { | ||
80 | fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len); | ||
81 | fid->fid_pad = 0; | ||
82 | fid->fid_ino = *p++; | ||
83 | #if XFS_BIG_INUMS | ||
84 | if (is64) | ||
85 | fid->fid_ino |= (((__u64)(*p++)) << 32); | ||
86 | #endif | ||
87 | fid->fid_gen = *p++; | ||
88 | return p; | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * Encode inode information (either for the inode itself or the | ||
93 | * parent) into a fileid buffer. Advances and returns the new | ||
94 | * data pointer. | ||
95 | */ | ||
96 | static inline __u32 * | ||
97 | xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64) | ||
98 | { | ||
99 | *p++ = (__u32)inode->i_ino; | ||
100 | #if XFS_BIG_INUMS | ||
101 | if (is64) | ||
102 | *p++ = (__u32)(inode->i_ino >> 32); | ||
103 | #endif | ||
104 | *p++ = inode->i_generation; | ||
105 | return p; | ||
106 | } | ||
107 | |||
108 | #endif /* __XFS_EXPORT_H__ */ | 72 | #endif /* __XFS_EXPORT_H__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index c78c23310fe..3efcf45b14a 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -118,7 +118,7 @@ extern int xfs_blkdev_get(struct xfs_mount *, const char *, | |||
118 | extern void xfs_blkdev_put(struct block_device *); | 118 | extern void xfs_blkdev_put(struct block_device *); |
119 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 119 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
120 | 120 | ||
121 | extern struct export_operations xfs_export_operations; | 121 | extern const struct export_operations xfs_export_operations; |
122 | 122 | ||
123 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 123 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
124 | 124 | ||
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4e5d3ca53a8..a1b1b2ee3e5 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h | |||
@@ -257,7 +257,8 @@ struct acpi_table_dbgp { | |||
257 | struct acpi_table_dmar { | 257 | struct acpi_table_dmar { |
258 | struct acpi_table_header header; /* Common ACPI table header */ | 258 | struct acpi_table_header header; /* Common ACPI table header */ |
259 | u8 width; /* Host Address Width */ | 259 | u8 width; /* Host Address Width */ |
260 | u8 reserved[11]; | 260 | u8 flags; |
261 | u8 reserved[10]; | ||
261 | }; | 262 | }; |
262 | 263 | ||
263 | /* DMAR subtable header */ | 264 | /* DMAR subtable header */ |
@@ -265,8 +266,6 @@ struct acpi_table_dmar { | |||
265 | struct acpi_dmar_header { | 266 | struct acpi_dmar_header { |
266 | u16 type; | 267 | u16 type; |
267 | u16 length; | 268 | u16 length; |
268 | u8 flags; | ||
269 | u8 reserved[3]; | ||
270 | }; | 269 | }; |
271 | 270 | ||
272 | /* Values for subtable type in struct acpi_dmar_header */ | 271 | /* Values for subtable type in struct acpi_dmar_header */ |
@@ -274,13 +273,15 @@ struct acpi_dmar_header { | |||
274 | enum acpi_dmar_type { | 273 | enum acpi_dmar_type { |
275 | ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, | 274 | ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, |
276 | ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, | 275 | ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, |
277 | ACPI_DMAR_TYPE_RESERVED = 2 /* 2 and greater are reserved */ | 276 | ACPI_DMAR_TYPE_ATSR = 2, |
277 | ACPI_DMAR_TYPE_RESERVED = 3 /* 3 and greater are reserved */ | ||
278 | }; | 278 | }; |
279 | 279 | ||
280 | struct acpi_dmar_device_scope { | 280 | struct acpi_dmar_device_scope { |
281 | u8 entry_type; | 281 | u8 entry_type; |
282 | u8 length; | 282 | u8 length; |
283 | u8 segment; | 283 | u16 reserved; |
284 | u8 enumeration_id; | ||
284 | u8 bus; | 285 | u8 bus; |
285 | }; | 286 | }; |
286 | 287 | ||
@@ -290,7 +291,14 @@ enum acpi_dmar_scope_type { | |||
290 | ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, | 291 | ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, |
291 | ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, | 292 | ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, |
292 | ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, | 293 | ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, |
293 | ACPI_DMAR_SCOPE_TYPE_RESERVED = 3 /* 3 and greater are reserved */ | 294 | ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, |
295 | ACPI_DMAR_SCOPE_TYPE_HPET = 4, | ||
296 | ACPI_DMAR_SCOPE_TYPE_RESERVED = 5 /* 5 and greater are reserved */ | ||
297 | }; | ||
298 | |||
299 | struct acpi_dmar_pci_path { | ||
300 | u8 dev; | ||
301 | u8 fn; | ||
294 | }; | 302 | }; |
295 | 303 | ||
296 | /* | 304 | /* |
@@ -301,6 +309,9 @@ enum acpi_dmar_scope_type { | |||
301 | 309 | ||
302 | struct acpi_dmar_hardware_unit { | 310 | struct acpi_dmar_hardware_unit { |
303 | struct acpi_dmar_header header; | 311 | struct acpi_dmar_header header; |
312 | u8 flags; | ||
313 | u8 reserved; | ||
314 | u16 segment; | ||
304 | u64 address; /* Register Base Address */ | 315 | u64 address; /* Register Base Address */ |
305 | }; | 316 | }; |
306 | 317 | ||
@@ -312,7 +323,9 @@ struct acpi_dmar_hardware_unit { | |||
312 | 323 | ||
313 | struct acpi_dmar_reserved_memory { | 324 | struct acpi_dmar_reserved_memory { |
314 | struct acpi_dmar_header header; | 325 | struct acpi_dmar_header header; |
315 | u64 address; /* 4_k aligned base address */ | 326 | u16 reserved; |
327 | u16 segment; | ||
328 | u64 base_address; /* 4_k aligned base address */ | ||
316 | u64 end_address; /* 4_k aligned limit address */ | 329 | u64 end_address; /* 4_k aligned limit address */ |
317 | }; | 330 | }; |
318 | 331 | ||
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index ef67b59dbdb..dc031cf4463 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h | |||
@@ -28,8 +28,9 @@ struct setup_header { | |||
28 | u16 kernel_version; | 28 | u16 kernel_version; |
29 | u8 type_of_loader; | 29 | u8 type_of_loader; |
30 | u8 loadflags; | 30 | u8 loadflags; |
31 | #define LOADED_HIGH 0x01 | 31 | #define LOADED_HIGH (1<<0) |
32 | #define CAN_USE_HEAP 0x80 | 32 | #define KEEP_SEGMENTS (1<<6) |
33 | #define CAN_USE_HEAP (1<<7) | ||
33 | u16 setup_move_size; | 34 | u16 setup_move_size; |
34 | u32 code32_start; | 35 | u32 code32_start; |
35 | u32 ramdisk_image; | 36 | u32 ramdisk_image; |
@@ -41,6 +42,10 @@ struct setup_header { | |||
41 | u32 initrd_addr_max; | 42 | u32 initrd_addr_max; |
42 | u32 kernel_alignment; | 43 | u32 kernel_alignment; |
43 | u8 relocatable_kernel; | 44 | u8 relocatable_kernel; |
45 | u8 _pad2[3]; | ||
46 | u32 cmdline_size; | ||
47 | u32 hardware_subarch; | ||
48 | u64 hardware_subarch_data; | ||
44 | } __attribute__((packed)); | 49 | } __attribute__((packed)); |
45 | 50 | ||
46 | struct sys_desc_table { | 51 | struct sys_desc_table { |
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index b3d43de44c5..9411a2d3f19 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h | |||
@@ -27,6 +27,7 @@ | |||
27 | void global_flush_tlb(void); | 27 | void global_flush_tlb(void); |
28 | int change_page_attr(struct page *page, int numpages, pgprot_t prot); | 28 | int change_page_attr(struct page *page, int numpages, pgprot_t prot); |
29 | int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); | 29 | int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); |
30 | void clflush_cache_range(void *addr, int size); | ||
30 | 31 | ||
31 | #ifdef CONFIG_DEBUG_PAGEALLOC | 32 | #ifdef CONFIG_DEBUG_PAGEALLOC |
32 | /* internal debugging function */ | 33 | /* internal debugging function */ |
diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index d9ee5e52e91..87a715367a1 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h | |||
@@ -5,6 +5,9 @@ struct dev_archdata { | |||
5 | #ifdef CONFIG_ACPI | 5 | #ifdef CONFIG_ACPI |
6 | void *acpi_handle; | 6 | void *acpi_handle; |
7 | #endif | 7 | #endif |
8 | #ifdef CONFIG_DMAR | ||
9 | void *iommu; /* hook for IOMMU specific extension */ | ||
10 | #endif | ||
8 | }; | 11 | }; |
9 | 12 | ||
10 | #endif /* _ASM_X86_DEVICE_H */ | 13 | #endif /* _ASM_X86_DEVICE_H */ |
diff --git a/include/linux/capability.h b/include/linux/capability.h index 7a8d7ade28a..bb017edffd5 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h | |||
@@ -56,10 +56,8 @@ typedef struct __user_cap_data_struct { | |||
56 | 56 | ||
57 | struct vfs_cap_data { | 57 | struct vfs_cap_data { |
58 | __u32 magic_etc; /* Little endian */ | 58 | __u32 magic_etc; /* Little endian */ |
59 | struct { | 59 | __u32 permitted; /* Little endian */ |
60 | __u32 permitted; /* Little endian */ | 60 | __u32 inheritable; /* Little endian */ |
61 | __u32 inheritable; /* Little endian */ | ||
62 | } data[1]; | ||
63 | }; | 61 | }; |
64 | 62 | ||
65 | #ifdef __KERNEL__ | 63 | #ifdef __KERNEL__ |
diff --git a/include/linux/dmar.h b/include/linux/dmar.h new file mode 100644 index 00000000000..ffb6439cb5e --- /dev/null +++ b/include/linux/dmar.h | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) Ashok Raj <ashok.raj@intel.com> | ||
18 | * Copyright (C) Shaohua Li <shaohua.li@intel.com> | ||
19 | */ | ||
20 | |||
21 | #ifndef __DMAR_H__ | ||
22 | #define __DMAR_H__ | ||
23 | |||
24 | #include <linux/acpi.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/msi.h> | ||
27 | |||
28 | #ifdef CONFIG_DMAR | ||
29 | struct intel_iommu; | ||
30 | |||
31 | extern char *dmar_get_fault_reason(u8 fault_reason); | ||
32 | |||
33 | /* Can't use the common MSI interrupt functions | ||
34 | * since DMAR is not a pci device | ||
35 | */ | ||
36 | extern void dmar_msi_unmask(unsigned int irq); | ||
37 | extern void dmar_msi_mask(unsigned int irq); | ||
38 | extern void dmar_msi_read(int irq, struct msi_msg *msg); | ||
39 | extern void dmar_msi_write(int irq, struct msi_msg *msg); | ||
40 | extern int dmar_set_interrupt(struct intel_iommu *iommu); | ||
41 | extern int arch_setup_dmar_msi(unsigned int irq); | ||
42 | |||
43 | /* Intel IOMMU detection and initialization functions */ | ||
44 | extern void detect_intel_iommu(void); | ||
45 | extern int intel_iommu_init(void); | ||
46 | |||
47 | extern int dmar_table_init(void); | ||
48 | extern int early_dmar_detect(void); | ||
49 | |||
50 | extern struct list_head dmar_drhd_units; | ||
51 | extern struct list_head dmar_rmrr_units; | ||
52 | |||
53 | struct dmar_drhd_unit { | ||
54 | struct list_head list; /* list of drhd units */ | ||
55 | u64 reg_base_addr; /* register base address*/ | ||
56 | struct pci_dev **devices; /* target device array */ | ||
57 | int devices_cnt; /* target device count */ | ||
58 | u8 ignored:1; /* ignore drhd */ | ||
59 | u8 include_all:1; | ||
60 | struct intel_iommu *iommu; | ||
61 | }; | ||
62 | |||
63 | struct dmar_rmrr_unit { | ||
64 | struct list_head list; /* list of rmrr units */ | ||
65 | u64 base_address; /* reserved base address*/ | ||
66 | u64 end_address; /* reserved end address */ | ||
67 | struct pci_dev **devices; /* target devices */ | ||
68 | int devices_cnt; /* target device count */ | ||
69 | }; | ||
70 | |||
71 | #define for_each_drhd_unit(drhd) \ | ||
72 | list_for_each_entry(drhd, &dmar_drhd_units, list) | ||
73 | #define for_each_rmrr_units(rmrr) \ | ||
74 | list_for_each_entry(rmrr, &dmar_rmrr_units, list) | ||
75 | #else | ||
76 | static inline void detect_intel_iommu(void) | ||
77 | { | ||
78 | return; | ||
79 | } | ||
80 | static inline int intel_iommu_init(void) | ||
81 | { | ||
82 | return -ENODEV; | ||
83 | } | ||
84 | |||
85 | #endif /* !CONFIG_DMAR */ | ||
86 | #endif /* __DMAR_H__ */ | ||
diff --git a/include/linux/efi.h b/include/linux/efi.h index 0b9579a4cd4..14813b59580 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h | |||
@@ -298,7 +298,7 @@ extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, | |||
298 | u64 attr); | 298 | u64 attr); |
299 | extern int __init efi_uart_console_only (void); | 299 | extern int __init efi_uart_console_only (void); |
300 | extern void efi_initialize_iomem_resources(struct resource *code_resource, | 300 | extern void efi_initialize_iomem_resources(struct resource *code_resource, |
301 | struct resource *data_resource); | 301 | struct resource *data_resource, struct resource *bss_resource); |
302 | extern unsigned long efi_get_time(void); | 302 | extern unsigned long efi_get_time(void); |
303 | extern int efi_set_rtc_mmss(unsigned long nowtime); | 303 | extern int efi_set_rtc_mmss(unsigned long nowtime); |
304 | extern int is_available_memory(efi_memory_desc_t * md); | 304 | extern int is_available_memory(efi_memory_desc_t * md); |
diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index 16cb25cbf7c..dd57fe523e9 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h | |||
@@ -35,6 +35,7 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb) | |||
35 | } | 35 | } |
36 | 36 | ||
37 | struct statfs; | 37 | struct statfs; |
38 | struct fid; | ||
38 | 39 | ||
39 | extern const struct inode_operations efs_dir_inode_operations; | 40 | extern const struct inode_operations efs_dir_inode_operations; |
40 | extern const struct file_operations efs_dir_operations; | 41 | extern const struct file_operations efs_dir_operations; |
@@ -45,7 +46,10 @@ extern efs_block_t efs_map_block(struct inode *, efs_block_t); | |||
45 | extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 46 | extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
46 | 47 | ||
47 | extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); | 48 | extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); |
48 | extern struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp); | 49 | extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
50 | int fh_len, int fh_type); | ||
51 | extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, | ||
52 | int fh_len, int fh_type); | ||
49 | extern struct dentry *efs_get_parent(struct dentry *); | 53 | extern struct dentry *efs_get_parent(struct dentry *); |
50 | extern int efs_bmap(struct inode *, int); | 54 | extern int efs_bmap(struct inode *, int); |
51 | 55 | ||
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 8872fe8392d..51d21413881 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h | |||
@@ -4,9 +4,48 @@ | |||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
6 | struct dentry; | 6 | struct dentry; |
7 | struct inode; | ||
7 | struct super_block; | 8 | struct super_block; |
8 | struct vfsmount; | 9 | struct vfsmount; |
9 | 10 | ||
11 | /* | ||
12 | * The fileid_type identifies how the file within the filesystem is encoded. | ||
13 | * In theory this is freely set and parsed by the filesystem, but we try to | ||
14 | * stick to conventions so we can share some generic code and don't confuse | ||
15 | * sniffers like ethereal/wireshark. | ||
16 | * | ||
17 | * The filesystem must not use the value '0' or '0xff'. | ||
18 | */ | ||
19 | enum fid_type { | ||
20 | /* | ||
21 | * The root, or export point, of the filesystem. | ||
22 | * (Never actually passed down to the filesystem. | ||
23 | */ | ||
24 | FILEID_ROOT = 0, | ||
25 | |||
26 | /* | ||
27 | * 32bit inode number, 32 bit generation number. | ||
28 | */ | ||
29 | FILEID_INO32_GEN = 1, | ||
30 | |||
31 | /* | ||
32 | * 32bit inode number, 32 bit generation number, | ||
33 | * 32 bit parent directory inode number. | ||
34 | */ | ||
35 | FILEID_INO32_GEN_PARENT = 2, | ||
36 | }; | ||
37 | |||
38 | struct fid { | ||
39 | union { | ||
40 | struct { | ||
41 | u32 ino; | ||
42 | u32 gen; | ||
43 | u32 parent_ino; | ||
44 | u32 parent_gen; | ||
45 | } i32; | ||
46 | __u32 raw[6]; | ||
47 | }; | ||
48 | }; | ||
10 | 49 | ||
11 | /** | 50 | /** |
12 | * struct export_operations - for nfsd to communicate with file systems | 51 | * struct export_operations - for nfsd to communicate with file systems |
@@ -15,43 +54,9 @@ struct vfsmount; | |||
15 | * @get_name: find the name for a given inode in a given directory | 54 | * @get_name: find the name for a given inode in a given directory |
16 | * @get_parent: find the parent of a given directory | 55 | * @get_parent: find the parent of a given directory |
17 | * @get_dentry: find a dentry for the inode given a file handle sub-fragment | 56 | * @get_dentry: find a dentry for the inode given a file handle sub-fragment |
18 | * @find_exported_dentry: | ||
19 | * set by the exporting module to a standard helper function. | ||
20 | * | ||
21 | * Description: | ||
22 | * The export_operations structure provides a means for nfsd to communicate | ||
23 | * with a particular exported file system - particularly enabling nfsd and | ||
24 | * the filesystem to co-operate when dealing with file handles. | ||
25 | * | ||
26 | * export_operations contains two basic operation for dealing with file | ||
27 | * handles, decode_fh() and encode_fh(), and allows for some other | ||
28 | * operations to be defined which standard helper routines use to get | ||
29 | * specific information from the filesystem. | ||
30 | * | ||
31 | * nfsd encodes information use to determine which filesystem a filehandle | ||
32 | * applies to in the initial part of the file handle. The remainder, termed | ||
33 | * a file handle fragment, is controlled completely by the filesystem. The | ||
34 | * standard helper routines assume that this fragment will contain one or | ||
35 | * two sub-fragments, one which identifies the file, and one which may be | ||
36 | * used to identify the (a) directory containing the file. | ||
37 | * | 57 | * |
38 | * In some situations, nfsd needs to get a dentry which is connected into a | 58 | * See Documentation/filesystems/Exporting for details on how to use |
39 | * specific part of the file tree. To allow for this, it passes the | 59 | * this interface correctly. |
40 | * function acceptable() together with a @context which can be used to see | ||
41 | * if the dentry is acceptable. As there can be multiple dentrys for a | ||
42 | * given file, the filesystem should check each one for acceptability before | ||
43 | * looking for the next. As soon as an acceptable one is found, it should | ||
44 | * be returned. | ||
45 | * | ||
46 | * decode_fh: | ||
47 | * @decode_fh is given a &struct super_block (@sb), a file handle fragment | ||
48 | * (@fh, @fh_len) and an acceptability testing function (@acceptable, | ||
49 | * @context). It should return a &struct dentry which refers to the same | ||
50 | * file that the file handle fragment refers to, and which passes the | ||
51 | * acceptability test. If it cannot, it should return a %NULL pointer if | ||
52 | * the file was found but no acceptable &dentries were available, or a | ||
53 | * %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or | ||
54 | * %ENOMEM). | ||
55 | * | 60 | * |
56 | * encode_fh: | 61 | * encode_fh: |
57 | * @encode_fh should store in the file handle fragment @fh (using at most | 62 | * @encode_fh should store in the file handle fragment @fh (using at most |
@@ -63,6 +68,21 @@ struct vfsmount; | |||
63 | * the filehandle fragment. encode_fh() should return the number of bytes | 68 | * the filehandle fragment. encode_fh() should return the number of bytes |
64 | * stored or a negative error code such as %-ENOSPC | 69 | * stored or a negative error code such as %-ENOSPC |
65 | * | 70 | * |
71 | * fh_to_dentry: | ||
72 | * @fh_to_dentry is given a &struct super_block (@sb) and a file handle | ||
73 | * fragment (@fh, @fh_len). It should return a &struct dentry which refers | ||
74 | * to the same file that the file handle fragment refers to. If it cannot, | ||
75 | * it should return a %NULL pointer if the file was found but no acceptable | ||
76 | * &dentries were available, or an %ERR_PTR error code indicating why it | ||
77 | * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be | ||
78 | * returned including, if necessary, a new dentry created with d_alloc_root. | ||
79 | * The caller can then find any other extant dentries by following the | ||
80 | * d_alias links. | ||
81 | * | ||
82 | * fh_to_parent: | ||
83 | * Same as @fh_to_dentry, except that it returns a pointer to the parent | ||
84 | * dentry if it was encoded into the filehandle fragment by @encode_fh. | ||
85 | * | ||
66 | * get_name: | 86 | * get_name: |
67 | * @get_name should find a name for the given @child in the given @parent | 87 | * @get_name should find a name for the given @child in the given @parent |
68 | * directory. The name should be stored in the @name (with the | 88 | * directory. The name should be stored in the @name (with the |
@@ -75,52 +95,37 @@ struct vfsmount; | |||
75 | * is also a directory. In the event that it cannot be found, or storage | 95 | * is also a directory. In the event that it cannot be found, or storage |
76 | * space cannot be allocated, a %ERR_PTR should be returned. | 96 | * space cannot be allocated, a %ERR_PTR should be returned. |
77 | * | 97 | * |
78 | * get_dentry: | ||
79 | * Given a &super_block (@sb) and a pointer to a file-system specific inode | ||
80 | * identifier, possibly an inode number, (@inump) get_dentry() should find | ||
81 | * the identified inode and return a dentry for that inode. Any suitable | ||
82 | * dentry can be returned including, if necessary, a new dentry created with | ||
83 | * d_alloc_root. The caller can then find any other extant dentrys by | ||
84 | * following the d_alias links. If a new dentry was created using | ||
85 | * d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry | ||
86 | * should be d_rehash()ed. | ||
87 | * | ||
88 | * If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code | ||
89 | * can be returned. The @inump will be whatever was passed to | ||
90 | * nfsd_find_fh_dentry() in either the @obj or @parent parameters. | ||
91 | * | ||
92 | * Locking rules: | 98 | * Locking rules: |
93 | * get_parent is called with child->d_inode->i_mutex down | 99 | * get_parent is called with child->d_inode->i_mutex down |
94 | * get_name is not (which is possibly inconsistent) | 100 | * get_name is not (which is possibly inconsistent) |
95 | */ | 101 | */ |
96 | 102 | ||
97 | struct export_operations { | 103 | struct export_operations { |
98 | struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, | ||
99 | int fh_len, int fh_type, | ||
100 | int (*acceptable)(void *context, struct dentry *de), | ||
101 | void *context); | ||
102 | int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, | 104 | int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, |
103 | int connectable); | 105 | int connectable); |
106 | struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, | ||
107 | int fh_len, int fh_type); | ||
108 | struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, | ||
109 | int fh_len, int fh_type); | ||
104 | int (*get_name)(struct dentry *parent, char *name, | 110 | int (*get_name)(struct dentry *parent, char *name, |
105 | struct dentry *child); | 111 | struct dentry *child); |
106 | struct dentry * (*get_parent)(struct dentry *child); | 112 | struct dentry * (*get_parent)(struct dentry *child); |
107 | struct dentry * (*get_dentry)(struct super_block *sb, void *inump); | ||
108 | |||
109 | /* This is set by the exporting module to a standard helper */ | ||
110 | struct dentry * (*find_exported_dentry)( | ||
111 | struct super_block *sb, void *obj, void *parent, | ||
112 | int (*acceptable)(void *context, struct dentry *de), | ||
113 | void *context); | ||
114 | }; | 113 | }; |
115 | 114 | ||
116 | extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj, | 115 | extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, |
117 | void *parent, int (*acceptable)(void *context, struct dentry *de), | 116 | int *max_len, int connectable); |
118 | void *context); | 117 | extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, |
119 | |||
120 | extern int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, | ||
121 | int connectable); | ||
122 | extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, | ||
123 | int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), | 118 | int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), |
124 | void *context); | 119 | void *context); |
125 | 120 | ||
121 | /* | ||
122 | * Generic helpers for filesystems. | ||
123 | */ | ||
124 | extern struct dentry *generic_fh_to_dentry(struct super_block *sb, | ||
125 | struct fid *fid, int fh_len, int fh_type, | ||
126 | struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); | ||
127 | extern struct dentry *generic_fh_to_parent(struct super_block *sb, | ||
128 | struct fid *fid, int fh_len, int fh_type, | ||
129 | struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); | ||
130 | |||
126 | #endif /* LINUX_EXPORTFS_H */ | 131 | #endif /* LINUX_EXPORTFS_H */ |
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index c77c3bbfe4b..0f6c86c634f 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h | |||
@@ -561,6 +561,7 @@ enum { | |||
561 | #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) | 561 | #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) |
562 | #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ | 562 | #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ |
563 | ~EXT2_DIR_ROUND) | 563 | ~EXT2_DIR_ROUND) |
564 | #define EXT2_MAX_REC_LEN ((1<<16)-1) | ||
564 | 565 | ||
565 | static inline ext2_fsblk_t | 566 | static inline ext2_fsblk_t |
566 | ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) | 567 | ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 50078bb30a1..b3ec4a496d6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -987,7 +987,7 @@ struct super_block { | |||
987 | const struct super_operations *s_op; | 987 | const struct super_operations *s_op; |
988 | struct dquot_operations *dq_op; | 988 | struct dquot_operations *dq_op; |
989 | struct quotactl_ops *s_qcop; | 989 | struct quotactl_ops *s_qcop; |
990 | struct export_operations *s_export_op; | 990 | const struct export_operations *s_export_op; |
991 | unsigned long s_flags; | 991 | unsigned long s_flags; |
992 | unsigned long s_magic; | 992 | unsigned long s_magic; |
993 | struct dentry *s_root; | 993 | struct dentry *s_root; |
diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 6c9873f8828..ff203dd0291 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h | |||
@@ -34,6 +34,12 @@ | |||
34 | name: | 34 | name: |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | #ifndef WEAK | ||
38 | #define WEAK(name) \ | ||
39 | .weak name; \ | ||
40 | name: | ||
41 | #endif | ||
42 | |||
37 | #define KPROBE_ENTRY(name) \ | 43 | #define KPROBE_ENTRY(name) \ |
38 | .pushsection .kprobes.text, "ax"; \ | 44 | .pushsection .kprobes.text, "ax"; \ |
39 | ENTRY(name) | 45 | ENTRY(name) |
diff --git a/include/linux/memory.h b/include/linux/memory.h index 654ef554487..33f0ff0cf63 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h | |||
@@ -41,18 +41,15 @@ struct memory_block { | |||
41 | #define MEM_ONLINE (1<<0) /* exposed to userspace */ | 41 | #define MEM_ONLINE (1<<0) /* exposed to userspace */ |
42 | #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ | 42 | #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ |
43 | #define MEM_OFFLINE (1<<2) /* exposed to userspace */ | 43 | #define MEM_OFFLINE (1<<2) /* exposed to userspace */ |
44 | #define MEM_GOING_ONLINE (1<<3) | ||
45 | #define MEM_CANCEL_ONLINE (1<<4) | ||
46 | #define MEM_CANCEL_OFFLINE (1<<5) | ||
44 | 47 | ||
45 | /* | 48 | struct memory_notify { |
46 | * All of these states are currently kernel-internal for notifying | 49 | unsigned long start_pfn; |
47 | * kernel components and architectures. | 50 | unsigned long nr_pages; |
48 | * | 51 | int status_change_nid; |
49 | * For MEM_MAPPING_INVALID, all notifier chains with priority >0 | 52 | }; |
50 | * are called before pfn_to_page() becomes invalid. The priority=0 | ||
51 | * entry is reserved for the function that actually makes | ||
52 | * pfn_to_page() stop working. Any notifiers that want to be called | ||
53 | * after that should have priority <0. | ||
54 | */ | ||
55 | #define MEM_MAPPING_INVALID (1<<3) | ||
56 | 53 | ||
57 | struct notifier_block; | 54 | struct notifier_block; |
58 | struct mem_section; | 55 | struct mem_section; |
@@ -69,21 +66,31 @@ static inline int register_memory_notifier(struct notifier_block *nb) | |||
69 | static inline void unregister_memory_notifier(struct notifier_block *nb) | 66 | static inline void unregister_memory_notifier(struct notifier_block *nb) |
70 | { | 67 | { |
71 | } | 68 | } |
69 | static inline int memory_notify(unsigned long val, void *v) | ||
70 | { | ||
71 | return 0; | ||
72 | } | ||
72 | #else | 73 | #else |
74 | extern int register_memory_notifier(struct notifier_block *nb); | ||
75 | extern void unregister_memory_notifier(struct notifier_block *nb); | ||
73 | extern int register_new_memory(struct mem_section *); | 76 | extern int register_new_memory(struct mem_section *); |
74 | extern int unregister_memory_section(struct mem_section *); | 77 | extern int unregister_memory_section(struct mem_section *); |
75 | extern int memory_dev_init(void); | 78 | extern int memory_dev_init(void); |
76 | extern int remove_memory_block(unsigned long, struct mem_section *, int); | 79 | extern int remove_memory_block(unsigned long, struct mem_section *, int); |
77 | 80 | extern int memory_notify(unsigned long val, void *v); | |
78 | #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) | 81 | #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) |
79 | 82 | ||
80 | 83 | ||
81 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 84 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
82 | 85 | ||
86 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
83 | #define hotplug_memory_notifier(fn, pri) { \ | 87 | #define hotplug_memory_notifier(fn, pri) { \ |
84 | static struct notifier_block fn##_mem_nb = \ | 88 | static struct notifier_block fn##_mem_nb = \ |
85 | { .notifier_call = fn, .priority = pri }; \ | 89 | { .notifier_call = fn, .priority = pri }; \ |
86 | register_memory_notifier(&fn##_mem_nb); \ | 90 | register_memory_notifier(&fn##_mem_nb); \ |
87 | } | 91 | } |
92 | #else | ||
93 | #define hotplug_memory_notifier(fn, pri) do { } while (0) | ||
94 | #endif | ||
88 | 95 | ||
89 | #endif /* _LINUX_MEMORY_H_ */ | 96 | #endif /* _LINUX_MEMORY_H_ */ |
diff --git a/include/linux/pci.h b/include/linux/pci.h index 768b93359f9..5d2281f661f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h | |||
@@ -141,6 +141,7 @@ struct pci_dev { | |||
141 | unsigned int class; /* 3 bytes: (base,sub,prog-if) */ | 141 | unsigned int class; /* 3 bytes: (base,sub,prog-if) */ |
142 | u8 revision; /* PCI revision, low byte of class word */ | 142 | u8 revision; /* PCI revision, low byte of class word */ |
143 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ | 143 | u8 hdr_type; /* PCI header type (`multi' flag masked out) */ |
144 | u8 pcie_type; /* PCI-E device/port type */ | ||
144 | u8 rom_base_reg; /* which config register controls the ROM */ | 145 | u8 rom_base_reg; /* which config register controls the ROM */ |
145 | u8 pin; /* which interrupt pin this device uses */ | 146 | u8 pin; /* which interrupt pin this device uses */ |
146 | 147 | ||
@@ -183,6 +184,7 @@ struct pci_dev { | |||
183 | unsigned int msi_enabled:1; | 184 | unsigned int msi_enabled:1; |
184 | unsigned int msix_enabled:1; | 185 | unsigned int msix_enabled:1; |
185 | unsigned int is_managed:1; | 186 | unsigned int is_managed:1; |
187 | unsigned int is_pcie:1; | ||
186 | atomic_t enable_cnt; /* pci_enable_device has been called */ | 188 | atomic_t enable_cnt; /* pci_enable_device has been called */ |
187 | 189 | ||
188 | u32 saved_config_space[16]; /* config space saved at suspend time */ | 190 | u32 saved_config_space[16]; /* config space saved at suspend time */ |
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 72bfccd3da2..422eab4958a 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <linux/reiserfs_fs_sb.h> | 28 | #include <linux/reiserfs_fs_sb.h> |
29 | #endif | 29 | #endif |
30 | 30 | ||
31 | struct fid; | ||
32 | |||
31 | /* | 33 | /* |
32 | * include/linux/reiser_fs.h | 34 | * include/linux/reiser_fs.h |
33 | * | 35 | * |
@@ -1877,12 +1879,10 @@ void reiserfs_delete_inode(struct inode *inode); | |||
1877 | int reiserfs_write_inode(struct inode *inode, int); | 1879 | int reiserfs_write_inode(struct inode *inode, int); |
1878 | int reiserfs_get_block(struct inode *inode, sector_t block, | 1880 | int reiserfs_get_block(struct inode *inode, sector_t block, |
1879 | struct buffer_head *bh_result, int create); | 1881 | struct buffer_head *bh_result, int create); |
1880 | struct dentry *reiserfs_get_dentry(struct super_block *, void *); | 1882 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1881 | struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | 1883 | int fh_len, int fh_type); |
1882 | int len, int fhtype, | 1884 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
1883 | int (*acceptable) (void *contect, | 1885 | int fh_len, int fh_type); |
1884 | struct dentry * de), | ||
1885 | void *context); | ||
1886 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1886 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, |
1887 | int connectable); | 1887 | int connectable); |
1888 | 1888 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1833879f843..3a47871a29d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
187 | unsigned long onlined_pages = 0; | 187 | unsigned long onlined_pages = 0; |
188 | struct zone *zone; | 188 | struct zone *zone; |
189 | int need_zonelists_rebuild = 0; | 189 | int need_zonelists_rebuild = 0; |
190 | int nid; | ||
191 | int ret; | ||
192 | struct memory_notify arg; | ||
193 | |||
194 | arg.start_pfn = pfn; | ||
195 | arg.nr_pages = nr_pages; | ||
196 | arg.status_change_nid = -1; | ||
197 | |||
198 | nid = page_to_nid(pfn_to_page(pfn)); | ||
199 | if (node_present_pages(nid) == 0) | ||
200 | arg.status_change_nid = nid; | ||
190 | 201 | ||
202 | ret = memory_notify(MEM_GOING_ONLINE, &arg); | ||
203 | ret = notifier_to_errno(ret); | ||
204 | if (ret) { | ||
205 | memory_notify(MEM_CANCEL_ONLINE, &arg); | ||
206 | return ret; | ||
207 | } | ||
191 | /* | 208 | /* |
192 | * This doesn't need a lock to do pfn_to_page(). | 209 | * This doesn't need a lock to do pfn_to_page(). |
193 | * The section can't be removed here because of the | 210 | * The section can't be removed here because of the |
@@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
222 | build_all_zonelists(); | 239 | build_all_zonelists(); |
223 | vm_total_pages = nr_free_pagecache_pages(); | 240 | vm_total_pages = nr_free_pagecache_pages(); |
224 | writeback_set_ratelimit(); | 241 | writeback_set_ratelimit(); |
242 | |||
243 | if (onlined_pages) | ||
244 | memory_notify(MEM_ONLINE, &arg); | ||
245 | |||
225 | return 0; | 246 | return 0; |
226 | } | 247 | } |
227 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 248 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
@@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn, | |||
467 | { | 488 | { |
468 | unsigned long pfn, nr_pages, expire; | 489 | unsigned long pfn, nr_pages, expire; |
469 | long offlined_pages; | 490 | long offlined_pages; |
470 | int ret, drain, retry_max; | 491 | int ret, drain, retry_max, node; |
471 | struct zone *zone; | 492 | struct zone *zone; |
493 | struct memory_notify arg; | ||
472 | 494 | ||
473 | BUG_ON(start_pfn >= end_pfn); | 495 | BUG_ON(start_pfn >= end_pfn); |
474 | /* at least, alignment against pageblock is necessary */ | 496 | /* at least, alignment against pageblock is necessary */ |
@@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn, | |||
480 | we assume this for now. .*/ | 502 | we assume this for now. .*/ |
481 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 503 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
482 | return -EINVAL; | 504 | return -EINVAL; |
505 | |||
506 | zone = page_zone(pfn_to_page(start_pfn)); | ||
507 | node = zone_to_nid(zone); | ||
508 | nr_pages = end_pfn - start_pfn; | ||
509 | |||
483 | /* set above range as isolated */ | 510 | /* set above range as isolated */ |
484 | ret = start_isolate_page_range(start_pfn, end_pfn); | 511 | ret = start_isolate_page_range(start_pfn, end_pfn); |
485 | if (ret) | 512 | if (ret) |
486 | return ret; | 513 | return ret; |
487 | nr_pages = end_pfn - start_pfn; | 514 | |
515 | arg.start_pfn = start_pfn; | ||
516 | arg.nr_pages = nr_pages; | ||
517 | arg.status_change_nid = -1; | ||
518 | if (nr_pages >= node_present_pages(node)) | ||
519 | arg.status_change_nid = node; | ||
520 | |||
521 | ret = memory_notify(MEM_GOING_OFFLINE, &arg); | ||
522 | ret = notifier_to_errno(ret); | ||
523 | if (ret) | ||
524 | goto failed_removal; | ||
525 | |||
488 | pfn = start_pfn; | 526 | pfn = start_pfn; |
489 | expire = jiffies + timeout; | 527 | expire = jiffies + timeout; |
490 | drain = 0; | 528 | drain = 0; |
@@ -539,20 +577,24 @@ repeat: | |||
539 | /* reset pagetype flags */ | 577 | /* reset pagetype flags */ |
540 | start_isolate_page_range(start_pfn, end_pfn); | 578 | start_isolate_page_range(start_pfn, end_pfn); |
541 | /* removal success */ | 579 | /* removal success */ |
542 | zone = page_zone(pfn_to_page(start_pfn)); | ||
543 | zone->present_pages -= offlined_pages; | 580 | zone->present_pages -= offlined_pages; |
544 | zone->zone_pgdat->node_present_pages -= offlined_pages; | 581 | zone->zone_pgdat->node_present_pages -= offlined_pages; |
545 | totalram_pages -= offlined_pages; | 582 | totalram_pages -= offlined_pages; |
546 | num_physpages -= offlined_pages; | 583 | num_physpages -= offlined_pages; |
584 | |||
547 | vm_total_pages = nr_free_pagecache_pages(); | 585 | vm_total_pages = nr_free_pagecache_pages(); |
548 | writeback_set_ratelimit(); | 586 | writeback_set_ratelimit(); |
587 | |||
588 | memory_notify(MEM_OFFLINE, &arg); | ||
549 | return 0; | 589 | return 0; |
550 | 590 | ||
551 | failed_removal: | 591 | failed_removal: |
552 | printk(KERN_INFO "memory offlining %lx to %lx failed\n", | 592 | printk(KERN_INFO "memory offlining %lx to %lx failed\n", |
553 | start_pfn, end_pfn); | 593 | start_pfn, end_pfn); |
594 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | ||
554 | /* pushback to free area */ | 595 | /* pushback to free area */ |
555 | undo_isolate_page_range(start_pfn, end_pfn); | 596 | undo_isolate_page_range(start_pfn, end_pfn); |
597 | |||
556 | return ret; | 598 | return ret; |
557 | } | 599 | } |
558 | #else | 600 | #else |
diff --git a/mm/shmem.c b/mm/shmem.c index 289dbb0a6fd..404e53bb212 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2020,33 +2020,25 @@ static int shmem_match(struct inode *ino, void *vfh) | |||
2020 | return ino->i_ino == inum && fh[0] == ino->i_generation; | 2020 | return ino->i_ino == inum && fh[0] == ino->i_generation; |
2021 | } | 2021 | } |
2022 | 2022 | ||
2023 | static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) | 2023 | static struct dentry *shmem_fh_to_dentry(struct super_block *sb, |
2024 | struct fid *fid, int fh_len, int fh_type) | ||
2024 | { | 2025 | { |
2025 | struct dentry *de = NULL; | ||
2026 | struct inode *inode; | 2026 | struct inode *inode; |
2027 | __u32 *fh = vfh; | 2027 | struct dentry *dentry = NULL; |
2028 | __u64 inum = fh[2]; | 2028 | u64 inum = fid->raw[2]; |
2029 | inum = (inum << 32) | fh[1]; | 2029 | inum = (inum << 32) | fid->raw[1]; |
2030 | |||
2031 | if (fh_len < 3) | ||
2032 | return NULL; | ||
2030 | 2033 | ||
2031 | inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); | 2034 | inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), |
2035 | shmem_match, fid->raw); | ||
2032 | if (inode) { | 2036 | if (inode) { |
2033 | de = d_find_alias(inode); | 2037 | dentry = d_find_alias(inode); |
2034 | iput(inode); | 2038 | iput(inode); |
2035 | } | 2039 | } |
2036 | 2040 | ||
2037 | return de? de: ERR_PTR(-ESTALE); | 2041 | return dentry; |
2038 | } | ||
2039 | |||
2040 | static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, | ||
2041 | int len, int type, | ||
2042 | int (*acceptable)(void *context, struct dentry *de), | ||
2043 | void *context) | ||
2044 | { | ||
2045 | if (len < 3) | ||
2046 | return ERR_PTR(-ESTALE); | ||
2047 | |||
2048 | return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, | ||
2049 | context); | ||
2050 | } | 2042 | } |
2051 | 2043 | ||
2052 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | 2044 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, |
@@ -2079,11 +2071,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | |||
2079 | return 1; | 2071 | return 1; |
2080 | } | 2072 | } |
2081 | 2073 | ||
2082 | static struct export_operations shmem_export_ops = { | 2074 | static const struct export_operations shmem_export_ops = { |
2083 | .get_parent = shmem_get_parent, | 2075 | .get_parent = shmem_get_parent, |
2084 | .get_dentry = shmem_get_dentry, | ||
2085 | .encode_fh = shmem_encode_fh, | 2076 | .encode_fh = shmem_encode_fh, |
2086 | .decode_fh = shmem_decode_fh, | 2077 | .fh_to_dentry = shmem_fh_to_dentry, |
2087 | }; | 2078 | }; |
2088 | 2079 | ||
2089 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, | 2080 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, |
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/mempolicy.h> | 20 | #include <linux/mempolicy.h> |
21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
22 | #include <linux/kallsyms.h> | 22 | #include <linux/kallsyms.h> |
23 | #include <linux/memory.h> | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * Lock order: | 26 | * Lock order: |
@@ -2694,6 +2695,121 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2694 | } | 2695 | } |
2695 | EXPORT_SYMBOL(kmem_cache_shrink); | 2696 | EXPORT_SYMBOL(kmem_cache_shrink); |
2696 | 2697 | ||
2698 | #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) | ||
2699 | static int slab_mem_going_offline_callback(void *arg) | ||
2700 | { | ||
2701 | struct kmem_cache *s; | ||
2702 | |||
2703 | down_read(&slub_lock); | ||
2704 | list_for_each_entry(s, &slab_caches, list) | ||
2705 | kmem_cache_shrink(s); | ||
2706 | up_read(&slub_lock); | ||
2707 | |||
2708 | return 0; | ||
2709 | } | ||
2710 | |||
2711 | static void slab_mem_offline_callback(void *arg) | ||
2712 | { | ||
2713 | struct kmem_cache_node *n; | ||
2714 | struct kmem_cache *s; | ||
2715 | struct memory_notify *marg = arg; | ||
2716 | int offline_node; | ||
2717 | |||
2718 | offline_node = marg->status_change_nid; | ||
2719 | |||
2720 | /* | ||
2721 | * If the node still has available memory. we need kmem_cache_node | ||
2722 | * for it yet. | ||
2723 | */ | ||
2724 | if (offline_node < 0) | ||
2725 | return; | ||
2726 | |||
2727 | down_read(&slub_lock); | ||
2728 | list_for_each_entry(s, &slab_caches, list) { | ||
2729 | n = get_node(s, offline_node); | ||
2730 | if (n) { | ||
2731 | /* | ||
2732 | * if n->nr_slabs > 0, slabs still exist on the node | ||
2733 | * that is going down. We were unable to free them, | ||
2734 | * and offline_pages() function shoudn't call this | ||
2735 | * callback. So, we must fail. | ||
2736 | */ | ||
2737 | BUG_ON(atomic_read(&n->nr_slabs)); | ||
2738 | |||
2739 | s->node[offline_node] = NULL; | ||
2740 | kmem_cache_free(kmalloc_caches, n); | ||
2741 | } | ||
2742 | } | ||
2743 | up_read(&slub_lock); | ||
2744 | } | ||
2745 | |||
2746 | static int slab_mem_going_online_callback(void *arg) | ||
2747 | { | ||
2748 | struct kmem_cache_node *n; | ||
2749 | struct kmem_cache *s; | ||
2750 | struct memory_notify *marg = arg; | ||
2751 | int nid = marg->status_change_nid; | ||
2752 | int ret = 0; | ||
2753 | |||
2754 | /* | ||
2755 | * If the node's memory is already available, then kmem_cache_node is | ||
2756 | * already created. Nothing to do. | ||
2757 | */ | ||
2758 | if (nid < 0) | ||
2759 | return 0; | ||
2760 | |||
2761 | /* | ||
2762 | * We are bringing a node online. No memory is availabe yet. We must | ||
2763 | * allocate a kmem_cache_node structure in order to bring the node | ||
2764 | * online. | ||
2765 | */ | ||
2766 | down_read(&slub_lock); | ||
2767 | list_for_each_entry(s, &slab_caches, list) { | ||
2768 | /* | ||
2769 | * XXX: kmem_cache_alloc_node will fallback to other nodes | ||
2770 | * since memory is not yet available from the node that | ||
2771 | * is brought up. | ||
2772 | */ | ||
2773 | n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); | ||
2774 | if (!n) { | ||
2775 | ret = -ENOMEM; | ||
2776 | goto out; | ||
2777 | } | ||
2778 | init_kmem_cache_node(n); | ||
2779 | s->node[nid] = n; | ||
2780 | } | ||
2781 | out: | ||
2782 | up_read(&slub_lock); | ||
2783 | return ret; | ||
2784 | } | ||
2785 | |||
2786 | static int slab_memory_callback(struct notifier_block *self, | ||
2787 | unsigned long action, void *arg) | ||
2788 | { | ||
2789 | int ret = 0; | ||
2790 | |||
2791 | switch (action) { | ||
2792 | case MEM_GOING_ONLINE: | ||
2793 | ret = slab_mem_going_online_callback(arg); | ||
2794 | break; | ||
2795 | case MEM_GOING_OFFLINE: | ||
2796 | ret = slab_mem_going_offline_callback(arg); | ||
2797 | break; | ||
2798 | case MEM_OFFLINE: | ||
2799 | case MEM_CANCEL_ONLINE: | ||
2800 | slab_mem_offline_callback(arg); | ||
2801 | break; | ||
2802 | case MEM_ONLINE: | ||
2803 | case MEM_CANCEL_OFFLINE: | ||
2804 | break; | ||
2805 | } | ||
2806 | |||
2807 | ret = notifier_from_errno(ret); | ||
2808 | return ret; | ||
2809 | } | ||
2810 | |||
2811 | #endif /* CONFIG_MEMORY_HOTPLUG */ | ||
2812 | |||
2697 | /******************************************************************** | 2813 | /******************************************************************** |
2698 | * Basic setup of slabs | 2814 | * Basic setup of slabs |
2699 | *******************************************************************/ | 2815 | *******************************************************************/ |
@@ -2715,6 +2831,8 @@ void __init kmem_cache_init(void) | |||
2715 | sizeof(struct kmem_cache_node), GFP_KERNEL); | 2831 | sizeof(struct kmem_cache_node), GFP_KERNEL); |
2716 | kmalloc_caches[0].refcount = -1; | 2832 | kmalloc_caches[0].refcount = -1; |
2717 | caches++; | 2833 | caches++; |
2834 | |||
2835 | hotplug_memory_notifier(slab_memory_callback, 1); | ||
2718 | #endif | 2836 | #endif |
2719 | 2837 | ||
2720 | /* Able to allocate the per node structures */ | 2838 | /* Able to allocate the per node structures */ |
diff --git a/security/commoncap.c b/security/commoncap.c index 43f902750a1..bf67871173e 100644 --- a/security/commoncap.c +++ b/security/commoncap.c | |||
@@ -190,7 +190,8 @@ int cap_inode_killpriv(struct dentry *dentry) | |||
190 | return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); | 190 | return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); |
191 | } | 191 | } |
192 | 192 | ||
193 | static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, | 193 | static inline int cap_from_disk(struct vfs_cap_data *caps, |
194 | struct linux_binprm *bprm, | ||
194 | int size) | 195 | int size) |
195 | { | 196 | { |
196 | __u32 magic_etc; | 197 | __u32 magic_etc; |
@@ -198,7 +199,7 @@ static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, | |||
198 | if (size != XATTR_CAPS_SZ) | 199 | if (size != XATTR_CAPS_SZ) |
199 | return -EINVAL; | 200 | return -EINVAL; |
200 | 201 | ||
201 | magic_etc = le32_to_cpu(caps[0]); | 202 | magic_etc = le32_to_cpu(caps->magic_etc); |
202 | 203 | ||
203 | switch ((magic_etc & VFS_CAP_REVISION_MASK)) { | 204 | switch ((magic_etc & VFS_CAP_REVISION_MASK)) { |
204 | case VFS_CAP_REVISION: | 205 | case VFS_CAP_REVISION: |
@@ -206,8 +207,8 @@ static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, | |||
206 | bprm->cap_effective = true; | 207 | bprm->cap_effective = true; |
207 | else | 208 | else |
208 | bprm->cap_effective = false; | 209 | bprm->cap_effective = false; |
209 | bprm->cap_permitted = to_cap_t( le32_to_cpu(caps[1]) ); | 210 | bprm->cap_permitted = to_cap_t(le32_to_cpu(caps->permitted)); |
210 | bprm->cap_inheritable = to_cap_t( le32_to_cpu(caps[2]) ); | 211 | bprm->cap_inheritable = to_cap_t(le32_to_cpu(caps->inheritable)); |
211 | return 0; | 212 | return 0; |
212 | default: | 213 | default: |
213 | return -EINVAL; | 214 | return -EINVAL; |
@@ -219,7 +220,7 @@ static int get_file_caps(struct linux_binprm *bprm) | |||
219 | { | 220 | { |
220 | struct dentry *dentry; | 221 | struct dentry *dentry; |
221 | int rc = 0; | 222 | int rc = 0; |
222 | __le32 v1caps[XATTR_CAPS_SZ]; | 223 | struct vfs_cap_data incaps; |
223 | struct inode *inode; | 224 | struct inode *inode; |
224 | 225 | ||
225 | if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { | 226 | if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { |
@@ -232,8 +233,14 @@ static int get_file_caps(struct linux_binprm *bprm) | |||
232 | if (!inode->i_op || !inode->i_op->getxattr) | 233 | if (!inode->i_op || !inode->i_op->getxattr) |
233 | goto out; | 234 | goto out; |
234 | 235 | ||
235 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &v1caps, | 236 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); |
236 | XATTR_CAPS_SZ); | 237 | if (rc > 0) { |
238 | if (rc == XATTR_CAPS_SZ) | ||
239 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, | ||
240 | &incaps, XATTR_CAPS_SZ); | ||
241 | else | ||
242 | rc = -EINVAL; | ||
243 | } | ||
237 | if (rc == -ENODATA || rc == -EOPNOTSUPP) { | 244 | if (rc == -ENODATA || rc == -EOPNOTSUPP) { |
238 | /* no data, that's ok */ | 245 | /* no data, that's ok */ |
239 | rc = 0; | 246 | rc = 0; |
@@ -242,7 +249,7 @@ static int get_file_caps(struct linux_binprm *bprm) | |||
242 | if (rc < 0) | 249 | if (rc < 0) |
243 | goto out; | 250 | goto out; |
244 | 251 | ||
245 | rc = cap_from_disk(v1caps, bprm, rc); | 252 | rc = cap_from_disk(&incaps, bprm, rc); |
246 | if (rc) | 253 | if (rc) |
247 | printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", | 254 | printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", |
248 | __FUNCTION__, rc, bprm->filename); | 255 | __FUNCTION__, rc, bprm->filename); |