diff options
-rw-r--r-- | Documentation/ia64/aliasing-test.c | 247 | ||||
-rw-r--r-- | Documentation/ia64/aliasing.txt | 71 |
2 files changed, 284 insertions, 34 deletions
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c new file mode 100644 index 000000000000..3153167b41c3 --- /dev/null +++ b/Documentation/ia64/aliasing-test.c | |||
@@ -0,0 +1,247 @@ | |||
1 | /* | ||
2 | * Exercise /dev/mem mmap cases that have been troublesome in the past | ||
3 | * | ||
4 | * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. | ||
5 | * Bjorn Helgaas <bjorn.helgaas@hp.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <stdlib.h> | ||
13 | #include <stdio.h> | ||
14 | #include <sys/types.h> | ||
15 | #include <dirent.h> | ||
16 | #include <fcntl.h> | ||
17 | #include <fnmatch.h> | ||
18 | #include <string.h> | ||
19 | #include <sys/mman.h> | ||
20 | #include <sys/stat.h> | ||
21 | #include <unistd.h> | ||
22 | |||
23 | int sum; | ||
24 | |||
25 | int map_mem(char *path, off_t offset, size_t length, int touch) | ||
26 | { | ||
27 | int fd, rc; | ||
28 | void *addr; | ||
29 | int *c; | ||
30 | |||
31 | fd = open(path, O_RDWR); | ||
32 | if (fd == -1) { | ||
33 | perror(path); | ||
34 | return -1; | ||
35 | } | ||
36 | |||
37 | addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset); | ||
38 | if (addr == MAP_FAILED) | ||
39 | return 1; | ||
40 | |||
41 | if (touch) { | ||
42 | c = (int *) addr; | ||
43 | while (c < (int *) (offset + length)) | ||
44 | sum += *c++; | ||
45 | } | ||
46 | |||
47 | rc = munmap(addr, length); | ||
48 | if (rc == -1) { | ||
49 | perror("munmap"); | ||
50 | return -1; | ||
51 | } | ||
52 | |||
53 | close(fd); | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch) | ||
58 | { | ||
59 | struct dirent **namelist; | ||
60 | char *name, *path2; | ||
61 | int i, n, r, rc, result = 0; | ||
62 | struct stat buf; | ||
63 | |||
64 | n = scandir(path, &namelist, 0, alphasort); | ||
65 | if (n < 0) { | ||
66 | perror("scandir"); | ||
67 | return -1; | ||
68 | } | ||
69 | |||
70 | for (i = 0; i < n; i++) { | ||
71 | name = namelist[i]->d_name; | ||
72 | |||
73 | if (fnmatch(".", name, 0) == 0) | ||
74 | goto skip; | ||
75 | if (fnmatch("..", name, 0) == 0) | ||
76 | goto skip; | ||
77 | |||
78 | path2 = malloc(strlen(path) + strlen(name) + 3); | ||
79 | strcpy(path2, path); | ||
80 | strcat(path2, "/"); | ||
81 | strcat(path2, name); | ||
82 | |||
83 | if (fnmatch(file, name, 0) == 0) { | ||
84 | rc = map_mem(path2, offset, length, touch); | ||
85 | if (rc == 0) | ||
86 | fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable"); | ||
87 | else if (rc > 0) | ||
88 | fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length); | ||
89 | else { | ||
90 | fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length); | ||
91 | return rc; | ||
92 | } | ||
93 | } else { | ||
94 | r = lstat(path2, &buf); | ||
95 | if (r == 0 && S_ISDIR(buf.st_mode)) { | ||
96 | rc = scan_sysfs(path2, file, offset, length, touch); | ||
97 | if (rc < 0) | ||
98 | return rc; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | result |= rc; | ||
103 | free(path2); | ||
104 | |||
105 | skip: | ||
106 | free(namelist[i]); | ||
107 | } | ||
108 | free(namelist); | ||
109 | return rc; | ||
110 | } | ||
111 | |||
112 | char buf[1024]; | ||
113 | |||
114 | int read_rom(char *path) | ||
115 | { | ||
116 | int fd, rc; | ||
117 | size_t size = 0; | ||
118 | |||
119 | fd = open(path, O_RDWR); | ||
120 | if (fd == -1) { | ||
121 | perror(path); | ||
122 | return -1; | ||
123 | } | ||
124 | |||
125 | rc = write(fd, "1", 2); | ||
126 | if (rc <= 0) { | ||
127 | perror("write"); | ||
128 | return -1; | ||
129 | } | ||
130 | |||
131 | do { | ||
132 | rc = read(fd, buf, sizeof(buf)); | ||
133 | if (rc > 0) | ||
134 | size += rc; | ||
135 | } while (rc > 0); | ||
136 | |||
137 | close(fd); | ||
138 | return size; | ||
139 | } | ||
140 | |||
141 | int scan_rom(char *path, char *file) | ||
142 | { | ||
143 | struct dirent **namelist; | ||
144 | char *name, *path2; | ||
145 | int i, n, r, rc, result = 0; | ||
146 | struct stat buf; | ||
147 | |||
148 | n = scandir(path, &namelist, 0, alphasort); | ||
149 | if (n < 0) { | ||
150 | perror("scandir"); | ||
151 | return -1; | ||
152 | } | ||
153 | |||
154 | for (i = 0; i < n; i++) { | ||
155 | name = namelist[i]->d_name; | ||
156 | |||
157 | if (fnmatch(".", name, 0) == 0) | ||
158 | goto skip; | ||
159 | if (fnmatch("..", name, 0) == 0) | ||
160 | goto skip; | ||
161 | |||
162 | path2 = malloc(strlen(path) + strlen(name) + 3); | ||
163 | strcpy(path2, path); | ||
164 | strcat(path2, "/"); | ||
165 | strcat(path2, name); | ||
166 | |||
167 | if (fnmatch(file, name, 0) == 0) { | ||
168 | rc = read_rom(path2); | ||
169 | |||
170 | /* | ||
171 | * It's OK if the ROM is unreadable. Maybe there | ||
172 | * is no ROM, or some other error ocurred. The | ||
173 | * important thing is that no MCA happened. | ||
174 | */ | ||
175 | if (rc > 0) | ||
176 | fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc); | ||
177 | else { | ||
178 | fprintf(stderr, "PASS: %s not readable\n", path2); | ||
179 | return rc; | ||
180 | } | ||
181 | } else { | ||
182 | r = lstat(path2, &buf); | ||
183 | if (r == 0 && S_ISDIR(buf.st_mode)) { | ||
184 | rc = scan_rom(path2, file); | ||
185 | if (rc < 0) | ||
186 | return rc; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | result |= rc; | ||
191 | free(path2); | ||
192 | |||
193 | skip: | ||
194 | free(namelist[i]); | ||
195 | } | ||
196 | free(namelist); | ||
197 | return rc; | ||
198 | } | ||
199 | |||
200 | main() | ||
201 | { | ||
202 | int rc; | ||
203 | |||
204 | if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0) | ||
205 | fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n"); | ||
206 | else | ||
207 | fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n"); | ||
208 | |||
209 | /* | ||
210 | * It's not safe to blindly read the VGA frame buffer. If you know | ||
211 | * how to poke the card the right way, it should respond, but it's | ||
212 | * not safe in general. Many machines, e.g., Intel chipsets, cover | ||
213 | * up a non-responding card by just returning -1, but others will | ||
214 | * report the failure as a machine check. | ||
215 | */ | ||
216 | if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0) | ||
217 | fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n"); | ||
218 | else | ||
219 | fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n"); | ||
220 | |||
221 | if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0) | ||
222 | fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n"); | ||
223 | else | ||
224 | fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n"); | ||
225 | |||
226 | /* | ||
227 | * Often you can map all the individual pieces above (0-0xA0000, | ||
228 | * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole | ||
229 | * thing at once. This is because the individual pieces use different | ||
230 | * attributes, and there's no single attribute supported over the | ||
231 | * whole region. | ||
232 | */ | ||
233 | rc = map_mem("/dev/mem", 0, 1024*1024, 0); | ||
234 | if (rc == 0) | ||
235 | fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n"); | ||
236 | else if (rc > 0) | ||
237 | fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n"); | ||
238 | else | ||
239 | fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n"); | ||
240 | |||
241 | scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1); | ||
242 | scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0); | ||
243 | scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1); | ||
244 | scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0); | ||
245 | |||
246 | scan_rom("/sys/devices", "rom"); | ||
247 | } | ||
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt index 38f9a52d1820..9a431a7d0f5d 100644 --- a/Documentation/ia64/aliasing.txt +++ b/Documentation/ia64/aliasing.txt | |||
@@ -112,16 +112,6 @@ POTENTIAL ATTRIBUTE ALIASING CASES | |||
112 | 112 | ||
113 | The /dev/mem mmap constraints apply. | 113 | The /dev/mem mmap constraints apply. |
114 | 114 | ||
115 | However, since this is for mapping legacy MMIO space, WB access | ||
116 | does not make sense. This matters on machines without legacy | ||
117 | VGA support: these machines may have WB memory for the entire | ||
118 | first megabyte (or even the entire first granule). | ||
119 | |||
120 | On these machines, we could mmap legacy_mem as WB, which would | ||
121 | be safe in terms of attribute aliasing, but X has no way of | ||
122 | knowing that it is accessing regular memory, not a frame buffer, | ||
123 | so the kernel should fail the mmap rather than doing it with WB. | ||
124 | |||
125 | read/write of /dev/mem | 115 | read/write of /dev/mem |
126 | 116 | ||
127 | This uses copy_from_user(), which implicitly uses a kernel | 117 | This uses copy_from_user(), which implicitly uses a kernel |
@@ -138,14 +128,20 @@ POTENTIAL ATTRIBUTE ALIASING CASES | |||
138 | 128 | ||
139 | ioremap() | 129 | ioremap() |
140 | 130 | ||
141 | This returns a kernel identity mapping for use inside the | 131 | This returns a mapping for use inside the kernel. |
142 | kernel. | ||
143 | 132 | ||
144 | If the region is in kern_memmap, we should use the attribute | 133 | If the region is in kern_memmap, we should use the attribute |
145 | specified there. Otherwise, if the EFI memory map reports that | 134 | specified there. |
146 | the entire granule supports WB, we should use that (granules | 135 | |
147 | that are partially reserved or occupied by firmware do not appear | 136 | If the EFI memory map reports that the entire granule supports |
148 | in kern_memmap). Otherwise, we should use a UC mapping. | 137 | WB, we should use that (granules that are partially reserved |
138 | or occupied by firmware do not appear in kern_memmap). | ||
139 | |||
140 | If the granule contains non-WB memory, but we can cover the | ||
141 | region safely with kernel page table mappings, we can use | ||
142 | ioremap_page_range() as most other architectures do. | ||
143 | |||
144 | Failing all of the above, we have to fall back to a UC mapping. | ||
149 | 145 | ||
150 | PAST PROBLEM CASES | 146 | PAST PROBLEM CASES |
151 | 147 | ||
@@ -158,7 +154,7 @@ PAST PROBLEM CASES | |||
158 | succeed. It may create either WB or UC user mappings, depending | 154 | succeed. It may create either WB or UC user mappings, depending |
159 | on whether the region is in kern_memmap or the EFI memory map. | 155 | on whether the region is in kern_memmap or the EFI memory map. |
160 | 156 | ||
161 | mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled | 157 | mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled |
162 | 158 | ||
163 | See https://bugzilla.novell.com/show_bug.cgi?id=140858. | 159 | See https://bugzilla.novell.com/show_bug.cgi?id=140858. |
164 | 160 | ||
@@ -171,28 +167,25 @@ PAST PROBLEM CASES | |||
171 | so it is safe to use WB mappings. | 167 | so it is safe to use WB mappings. |
172 | 168 | ||
173 | The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, | 169 | The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, |
174 | which will use a granule-sized UC mapping covering 0-0xFFFFF. This | 170 | which uses a granule-sized UC mapping. This granule will cover some |
175 | granule covers some WB-only memory, but since UC is non-speculative, | 171 | WB-only memory, but since UC is non-speculative, the processor will |
176 | the processor will never generate an uncacheable reference to the | 172 | never generate an uncacheable reference to the WB-only areas unless |
177 | WB-only areas unless the driver explicitly touches them. | 173 | the driver explicitly touches them. |
178 | 174 | ||
179 | mmap of 0x0-0xFFFFF legacy_mem by "X" | 175 | mmap of 0x0-0xFFFFF legacy_mem by "X" |
180 | 176 | ||
181 | If the EFI memory map reports this entire range as WB, there | 177 | If the EFI memory map reports that the entire range supports the |
182 | is no VGA MMIO hole, and the mmap should fail or be done with | 178 | same attributes, we can allow the mmap (and we will prefer WB if |
183 | a WB mapping. | 179 | supported, as is the case with HP sx[12]000 machines with VGA |
180 | disabled). | ||
184 | 181 | ||
185 | There's no easy way for X to determine whether the 0xA0000-0xBFFFF | 182 | If EFI reports the range as partly WB and partly UC (as on sx[12]000 |
186 | region is a frame buffer or just memory, so I think it's best to | 183 | machines with VGA enabled), we must fail the mmap because there's no |
187 | just fail this mmap request rather than using a WB mapping. As | 184 | safe attribute to use. |
188 | far as I know, there's no need to map legacy_mem with WB | ||
189 | mappings. | ||
190 | 185 | ||
191 | Otherwise, a UC mapping of the entire region is probably safe. | 186 | If EFI reports some of the range but not all (as on Intel firmware |
192 | The VGA hole means the region will not be in kern_memmap. The | 187 | that doesn't report the VGA frame buffer at all), we should fail the |
193 | HP sx1000 chipset doesn't support UC access to the memory surrounding | 188 | mmap and force the user to map just the specific region of interest. |
194 | the VGA hole, but X doesn't need that area anyway and should not | ||
195 | reference it. | ||
196 | 189 | ||
197 | mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled | 190 | mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled |
198 | 191 | ||
@@ -202,6 +195,16 @@ PAST PROBLEM CASES | |||
202 | This is a special case of the previous case, and the mmap should | 195 | This is a special case of the previous case, and the mmap should |
203 | fail for the same reason as above. | 196 | fail for the same reason as above. |
204 | 197 | ||
198 | read of /sys/devices/.../rom | ||
199 | |||
200 | For VGA devices, this may cause an ioremap() of 0xC0000. This | ||
201 | used to be done with a UC mapping, because the VGA frame buffer | ||
202 | at 0xA0000 prevents use of a WB granule. The UC mapping causes | ||
203 | an MCA on HP sx[12]000 chipsets. | ||
204 | |||
205 | We should use WB page table mappings to avoid covering the VGA | ||
206 | frame buffer. | ||
207 | |||
205 | NOTES | 208 | NOTES |
206 | 209 | ||
207 | [1] SDM rev 2.2, vol 2, sec 4.4.1. | 210 | [1] SDM rev 2.2, vol 2, sec 4.4.1. |