aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2010-10-25 10:28:14 -0400
committerRobert Richter <robert.richter@amd.com>2010-10-25 10:29:12 -0400
commitdbd1e66e04558a582e673bc4a9cd933ce0228d93 (patch)
tree85f3633276282cde0a3ac558d988704eaa3e68af /drivers
parent328b8f1ba50b708a1b3c0acd7c41ee1b356822f6 (diff)
parent4a60cfa9457749f7987fd4f3c956dbba5a281129 (diff)
Merge commit 'linux-2.6/master' (early part) into oprofile/core
This branch depends on these apic patches: apic, x86: Use BIOS settings for IBS and MCE threshold interrupt LVT offsets apic, x86: Check if EILVT APIC registers are available (AMD only) Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/acpi_pad.c7
-rw-r--r--drivers/base/topology.c16
-rw-r--r--drivers/block/Kconfig17
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/rbd.c1841
-rw-r--r--drivers/block/rbd_types.h73
-rw-r--r--drivers/block/virtio_blk.c17
-rw-r--r--drivers/char/agp/Kconfig2
-rw-r--r--drivers/char/agp/amd64-agp.c39
-rw-r--r--drivers/char/agp/generic.c4
-rw-r--r--drivers/char/tpm/tpm.c22
-rw-r--r--drivers/char/virtio_console.c240
-rw-r--r--drivers/edac/Kconfig16
-rw-r--r--drivers/edac/Makefile3
-rw-r--r--drivers/edac/amd64_edac.c17
-rw-r--r--drivers/edac/amd64_edac.h5
-rw-r--r--drivers/edac/amd64_edac_dbg.c207
-rw-r--r--drivers/edac/edac_device_sysfs.c16
-rw-r--r--drivers/edac/edac_mc_sysfs.c11
-rw-r--r--drivers/edac/edac_mce_amd.c452
-rw-r--r--drivers/edac/edac_module.c79
-rw-r--r--drivers/edac/edac_module.h1
-rw-r--r--drivers/edac/edac_pci_sysfs.c10
-rw-r--r--drivers/edac/edac_stub.c51
-rw-r--r--drivers/edac/mce_amd.c680
-rw-r--r--drivers/edac/mce_amd.h (renamed from drivers/edac/edac_mce_amd.h)59
-rw-r--r--drivers/edac/mce_amd_inj.c171
-rw-r--r--drivers/firmware/Kconfig2
-rw-r--r--drivers/idle/intel_idle.c9
-rw-r--r--drivers/input/evdev.c2
-rw-r--r--drivers/input/misc/hp_sdc_rtc.c4
-rw-r--r--drivers/input/serio/hil_mlc.c6
-rw-r--r--drivers/input/serio/hp_sdc.c4
-rw-r--r--drivers/isdn/act2000/act2000.h6
-rw-r--r--drivers/isdn/hisax/config.c18
-rw-r--r--drivers/isdn/hisax/hisax.h1
-rw-r--r--drivers/macintosh/adb.c2
-rw-r--r--drivers/mfd/twl4030-irq.c4
-rw-r--r--drivers/net/3c527.c2
-rw-r--r--drivers/net/hamradio/6pack.c2
-rw-r--r--drivers/net/hamradio/mkiss.c2
-rw-r--r--drivers/net/irda/sir_dev.c2
-rw-r--r--drivers/net/ppp_async.c2
-rw-r--r--drivers/net/wan/cosa.c2
-rw-r--r--drivers/parport/share.c2
-rw-r--r--drivers/pci/dmar.c8
-rw-r--r--drivers/pci/htirq.c22
-rw-r--r--drivers/pci/intr_remapping.c212
-rw-r--r--drivers/pci/msi.c38
-rw-r--r--drivers/vhost/net.c16
-rw-r--r--drivers/vhost/vhost.c22
-rw-r--r--drivers/vhost/vhost.h10
-rw-r--r--drivers/xen/events.c23
53 files changed, 3353 insertions, 1127 deletions
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 6b115f6c4313..6afceb3d4034 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -30,18 +30,13 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <acpi/acpi_bus.h> 31#include <acpi/acpi_bus.h>
32#include <acpi/acpi_drivers.h> 32#include <acpi/acpi_drivers.h>
33#include <asm/mwait.h>
33 34
34#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" 35#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad"
35#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" 36#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
36#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 37#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
37static DEFINE_MUTEX(isolated_cpus_lock); 38static DEFINE_MUTEX(isolated_cpus_lock);
38 39
39#define MWAIT_SUBSTATE_MASK (0xf)
40#define MWAIT_CSTATE_MASK (0xf)
41#define MWAIT_SUBSTATE_SIZE (4)
42#define CPUID_MWAIT_LEAF (5)
43#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
44#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
45static unsigned long power_saving_mwait_eax; 40static unsigned long power_saving_mwait_eax;
46 41
47static unsigned char tsc_detected_unstable; 42static unsigned char tsc_detected_unstable;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 9fc630ce1ddb..f6f37a05a0c3 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -45,7 +45,8 @@ static ssize_t show_##name(struct sys_device *dev, \
45 return sprintf(buf, "%d\n", topology_##name(cpu)); \ 45 return sprintf(buf, "%d\n", topology_##name(cpu)); \
46} 46}
47 47
48#if defined(topology_thread_cpumask) || defined(topology_core_cpumask) 48#if defined(topology_thread_cpumask) || defined(topology_core_cpumask) || \
49 defined(topology_book_cpumask)
49static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf) 50static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
50{ 51{
51 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; 52 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
@@ -114,6 +115,14 @@ define_siblings_show_func(core_cpumask);
114define_one_ro_named(core_siblings, show_core_cpumask); 115define_one_ro_named(core_siblings, show_core_cpumask);
115define_one_ro_named(core_siblings_list, show_core_cpumask_list); 116define_one_ro_named(core_siblings_list, show_core_cpumask_list);
116 117
118#ifdef CONFIG_SCHED_BOOK
119define_id_show_func(book_id);
120define_one_ro(book_id);
121define_siblings_show_func(book_cpumask);
122define_one_ro_named(book_siblings, show_book_cpumask);
123define_one_ro_named(book_siblings_list, show_book_cpumask_list);
124#endif
125
117static struct attribute *default_attrs[] = { 126static struct attribute *default_attrs[] = {
118 &attr_physical_package_id.attr, 127 &attr_physical_package_id.attr,
119 &attr_core_id.attr, 128 &attr_core_id.attr,
@@ -121,6 +130,11 @@ static struct attribute *default_attrs[] = {
121 &attr_thread_siblings_list.attr, 130 &attr_thread_siblings_list.attr,
122 &attr_core_siblings.attr, 131 &attr_core_siblings.attr,
123 &attr_core_siblings_list.attr, 132 &attr_core_siblings_list.attr,
133#ifdef CONFIG_SCHED_BOOK
134 &attr_book_id.attr,
135 &attr_book_siblings.attr,
136 &attr_book_siblings_list.attr,
137#endif
124 NULL 138 NULL
125}; 139};
126 140
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index de277689da61..4b9359a6f6ca 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -488,4 +488,21 @@ config BLK_DEV_HD
488 488
489 If unsure, say N. 489 If unsure, say N.
490 490
491config BLK_DEV_RBD
492 tristate "Rados block device (RBD)"
493 depends on INET && EXPERIMENTAL && BLOCK
494 select CEPH_LIB
495 select LIBCRC32C
496 select CRYPTO_AES
497 select CRYPTO
498 default n
499 help
500 Say Y here if you want include the Rados block device, which stripes
501 a block device over objects stored in the Ceph distributed object
502 store.
503
504 More information at http://ceph.newdream.net/.
505
506 If unsure, say N.
507
491endif # BLK_DEV 508endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index aff5ac925c34..d7f463d6312d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o
37 37
38obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o 38obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
39obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ 39obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
40obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
40 41
41swim_mod-objs := swim.o swim_asm.o 42swim_mod-objs := swim.o swim_asm.o
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
new file mode 100644
index 000000000000..6ec9d53806c5
--- /dev/null
+++ b/drivers/block/rbd.c
@@ -0,0 +1,1841 @@
1/*
2 rbd.c -- Export ceph rados objects as a Linux block device
3
4
5 based on drivers/block/osdblk.c:
6
7 Copyright 2009 Red Hat, Inc.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING. If not, write to
20 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21
22
23
24 Instructions for use
25 --------------------
26
27 1) Map a Linux block device to an existing rbd image.
28
29 Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
30
31 $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add
32
33 The snapshot name can be "-" or omitted to map the image read/write.
34
35 2) List all active blkdev<->object mappings.
36
37 In this example, we have performed step #1 twice, creating two blkdevs,
38 mapped to two separate rados objects in the rados rbd pool
39
40 $ cat /sys/class/rbd/list
41 #id major client_name pool name snap KB
42 0 254 client4143 rbd foo - 1024000
43
44 The columns, in order, are:
45 - blkdev unique id
46 - blkdev assigned major
47 - rados client id
48 - rados pool name
49 - rados block device name
50 - mapped snapshot ("-" if none)
51 - device size in KB
52
53
54 3) Create a snapshot.
55
56 Usage: <blkdev id> <snapname>
57
58 $ echo "0 mysnap" > /sys/class/rbd/snap_create
59
60
61 4) Listing a snapshot.
62
63 $ cat /sys/class/rbd/snaps_list
64 #id snap KB
65 0 - 1024000 (*)
66 0 foo 1024000
67
68 The columns, in order, are:
69 - blkdev unique id
70 - snapshot name, '-' means none (active read/write version)
71 - size of device at time of snapshot
72 - the (*) indicates this is the active version
73
74 5) Rollback to snapshot.
75
76 Usage: <blkdev id> <snapname>
77
78 $ echo "0 mysnap" > /sys/class/rbd/snap_rollback
79
80
81 6) Mapping an image using snapshot.
82
83 A snapshot mapping is read-only. This is being done by passing
84 snap=<snapname> to the options when adding a device.
85
86 $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add
87
88
89 7) Remove an active blkdev<->rbd image mapping.
90
91 In this example, we remove the mapping with blkdev unique id 1.
92
93 $ echo 1 > /sys/class/rbd/remove
94
95
96 NOTE: The actual creation and deletion of rados objects is outside the scope
97 of this driver.
98
99 */
100
101#include <linux/ceph/libceph.h>
102#include <linux/ceph/osd_client.h>
103#include <linux/ceph/mon_client.h>
104#include <linux/ceph/decode.h>
105
106#include <linux/kernel.h>
107#include <linux/device.h>
108#include <linux/module.h>
109#include <linux/fs.h>
110#include <linux/blkdev.h>
111
112#include "rbd_types.h"
113
114#define DRV_NAME "rbd"
115#define DRV_NAME_LONG "rbd (rados block device)"
116
117#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
118
119#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX))
120#define RBD_MAX_POOL_NAME_LEN 64
121#define RBD_MAX_SNAP_NAME_LEN 32
122#define RBD_MAX_OPT_LEN 1024
123
124#define RBD_SNAP_HEAD_NAME "-"
125
126#define DEV_NAME_LEN 32
127
128/*
129 * block device image metadata (in-memory version)
130 */
131struct rbd_image_header {
132 u64 image_size;
133 char block_name[32];
134 __u8 obj_order;
135 __u8 crypt_type;
136 __u8 comp_type;
137 struct rw_semaphore snap_rwsem;
138 struct ceph_snap_context *snapc;
139 size_t snap_names_len;
140 u64 snap_seq;
141 u32 total_snaps;
142
143 char *snap_names;
144 u64 *snap_sizes;
145};
146
147/*
148 * an instance of the client. multiple devices may share a client.
149 */
150struct rbd_client {
151 struct ceph_client *client;
152 struct kref kref;
153 struct list_head node;
154};
155
156/*
157 * a single io request
158 */
159struct rbd_request {
160 struct request *rq; /* blk layer request */
161 struct bio *bio; /* cloned bio */
162 struct page **pages; /* list of used pages */
163 u64 len;
164};
165
166/*
167 * a single device
168 */
169struct rbd_device {
170 int id; /* blkdev unique id */
171
172 int major; /* blkdev assigned major */
173 struct gendisk *disk; /* blkdev's gendisk and rq */
174 struct request_queue *q;
175
176 struct ceph_client *client;
177 struct rbd_client *rbd_client;
178
179 char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
180
181 spinlock_t lock; /* queue lock */
182
183 struct rbd_image_header header;
184 char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
185 int obj_len;
186 char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
187 char pool_name[RBD_MAX_POOL_NAME_LEN];
188 int poolid;
189
190 char snap_name[RBD_MAX_SNAP_NAME_LEN];
191 u32 cur_snap; /* index+1 of current snapshot within snap context
192 0 - for the head */
193 int read_only;
194
195 struct list_head node;
196};
197
198static spinlock_t node_lock; /* protects client get/put */
199
200static struct class *class_rbd; /* /sys/class/rbd */
201static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
202static LIST_HEAD(rbd_dev_list); /* devices */
203static LIST_HEAD(rbd_client_list); /* clients */
204
205
206static int rbd_open(struct block_device *bdev, fmode_t mode)
207{
208 struct gendisk *disk = bdev->bd_disk;
209 struct rbd_device *rbd_dev = disk->private_data;
210
211 set_device_ro(bdev, rbd_dev->read_only);
212
213 if ((mode & FMODE_WRITE) && rbd_dev->read_only)
214 return -EROFS;
215
216 return 0;
217}
218
219static const struct block_device_operations rbd_bd_ops = {
220 .owner = THIS_MODULE,
221 .open = rbd_open,
222};
223
224/*
225 * Initialize an rbd client instance.
226 * We own *opt.
227 */
228static struct rbd_client *rbd_client_create(struct ceph_options *opt)
229{
230 struct rbd_client *rbdc;
231 int ret = -ENOMEM;
232
233 dout("rbd_client_create\n");
234 rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
235 if (!rbdc)
236 goto out_opt;
237
238 kref_init(&rbdc->kref);
239 INIT_LIST_HEAD(&rbdc->node);
240
241 rbdc->client = ceph_create_client(opt, rbdc);
242 if (IS_ERR(rbdc->client))
243 goto out_rbdc;
244 opt = NULL; /* Now rbdc->client is responsible for opt */
245
246 ret = ceph_open_session(rbdc->client);
247 if (ret < 0)
248 goto out_err;
249
250 spin_lock(&node_lock);
251 list_add_tail(&rbdc->node, &rbd_client_list);
252 spin_unlock(&node_lock);
253
254 dout("rbd_client_create created %p\n", rbdc);
255 return rbdc;
256
257out_err:
258 ceph_destroy_client(rbdc->client);
259out_rbdc:
260 kfree(rbdc);
261out_opt:
262 if (opt)
263 ceph_destroy_options(opt);
264 return ERR_PTR(ret);
265}
266
267/*
268 * Find a ceph client with specific addr and configuration.
269 */
270static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
271{
272 struct rbd_client *client_node;
273
274 if (opt->flags & CEPH_OPT_NOSHARE)
275 return NULL;
276
277 list_for_each_entry(client_node, &rbd_client_list, node)
278 if (ceph_compare_options(opt, client_node->client) == 0)
279 return client_node;
280 return NULL;
281}
282
283/*
284 * Get a ceph client with specific addr and configuration, if one does
285 * not exist create it.
286 */
287static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
288 char *options)
289{
290 struct rbd_client *rbdc;
291 struct ceph_options *opt;
292 int ret;
293
294 ret = ceph_parse_options(&opt, options, mon_addr,
295 mon_addr + strlen(mon_addr), NULL, NULL);
296 if (ret < 0)
297 return ret;
298
299 spin_lock(&node_lock);
300 rbdc = __rbd_client_find(opt);
301 if (rbdc) {
302 ceph_destroy_options(opt);
303
304 /* using an existing client */
305 kref_get(&rbdc->kref);
306 rbd_dev->rbd_client = rbdc;
307 rbd_dev->client = rbdc->client;
308 spin_unlock(&node_lock);
309 return 0;
310 }
311 spin_unlock(&node_lock);
312
313 rbdc = rbd_client_create(opt);
314 if (IS_ERR(rbdc))
315 return PTR_ERR(rbdc);
316
317 rbd_dev->rbd_client = rbdc;
318 rbd_dev->client = rbdc->client;
319 return 0;
320}
321
322/*
323 * Destroy ceph client
324 */
325static void rbd_client_release(struct kref *kref)
326{
327 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
328
329 dout("rbd_release_client %p\n", rbdc);
330 spin_lock(&node_lock);
331 list_del(&rbdc->node);
332 spin_unlock(&node_lock);
333
334 ceph_destroy_client(rbdc->client);
335 kfree(rbdc);
336}
337
338/*
339 * Drop reference to ceph client node. If it's not referenced anymore, release
340 * it.
341 */
342static void rbd_put_client(struct rbd_device *rbd_dev)
343{
344 kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
345 rbd_dev->rbd_client = NULL;
346 rbd_dev->client = NULL;
347}
348
349
350/*
351 * Create a new header structure, translate header format from the on-disk
352 * header.
353 */
354static int rbd_header_from_disk(struct rbd_image_header *header,
355 struct rbd_image_header_ondisk *ondisk,
356 int allocated_snaps,
357 gfp_t gfp_flags)
358{
359 int i;
360 u32 snap_count = le32_to_cpu(ondisk->snap_count);
361 int ret = -ENOMEM;
362
363 init_rwsem(&header->snap_rwsem);
364
365 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
366 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
367 snap_count *
368 sizeof(struct rbd_image_snap_ondisk),
369 gfp_flags);
370 if (!header->snapc)
371 return -ENOMEM;
372 if (snap_count) {
373 header->snap_names = kmalloc(header->snap_names_len,
374 GFP_KERNEL);
375 if (!header->snap_names)
376 goto err_snapc;
377 header->snap_sizes = kmalloc(snap_count * sizeof(u64),
378 GFP_KERNEL);
379 if (!header->snap_sizes)
380 goto err_names;
381 } else {
382 header->snap_names = NULL;
383 header->snap_sizes = NULL;
384 }
385 memcpy(header->block_name, ondisk->block_name,
386 sizeof(ondisk->block_name));
387
388 header->image_size = le64_to_cpu(ondisk->image_size);
389 header->obj_order = ondisk->options.order;
390 header->crypt_type = ondisk->options.crypt_type;
391 header->comp_type = ondisk->options.comp_type;
392
393 atomic_set(&header->snapc->nref, 1);
394 header->snap_seq = le64_to_cpu(ondisk->snap_seq);
395 header->snapc->num_snaps = snap_count;
396 header->total_snaps = snap_count;
397
398 if (snap_count &&
399 allocated_snaps == snap_count) {
400 for (i = 0; i < snap_count; i++) {
401 header->snapc->snaps[i] =
402 le64_to_cpu(ondisk->snaps[i].id);
403 header->snap_sizes[i] =
404 le64_to_cpu(ondisk->snaps[i].image_size);
405 }
406
407 /* copy snapshot names */
408 memcpy(header->snap_names, &ondisk->snaps[i],
409 header->snap_names_len);
410 }
411
412 return 0;
413
414err_names:
415 kfree(header->snap_names);
416err_snapc:
417 kfree(header->snapc);
418 return ret;
419}
420
421static int snap_index(struct rbd_image_header *header, int snap_num)
422{
423 return header->total_snaps - snap_num;
424}
425
426static u64 cur_snap_id(struct rbd_device *rbd_dev)
427{
428 struct rbd_image_header *header = &rbd_dev->header;
429
430 if (!rbd_dev->cur_snap)
431 return 0;
432
433 return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
434}
435
436static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
437 u64 *seq, u64 *size)
438{
439 int i;
440 char *p = header->snap_names;
441
442 for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
443 if (strcmp(snap_name, p) == 0)
444 break;
445 }
446 if (i == header->total_snaps)
447 return -ENOENT;
448 if (seq)
449 *seq = header->snapc->snaps[i];
450
451 if (size)
452 *size = header->snap_sizes[i];
453
454 return i;
455}
456
457static int rbd_header_set_snap(struct rbd_device *dev,
458 const char *snap_name,
459 u64 *size)
460{
461 struct rbd_image_header *header = &dev->header;
462 struct ceph_snap_context *snapc = header->snapc;
463 int ret = -ENOENT;
464
465 down_write(&header->snap_rwsem);
466
467 if (!snap_name ||
468 !*snap_name ||
469 strcmp(snap_name, "-") == 0 ||
470 strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) {
471 if (header->total_snaps)
472 snapc->seq = header->snap_seq;
473 else
474 snapc->seq = 0;
475 dev->cur_snap = 0;
476 dev->read_only = 0;
477 if (size)
478 *size = header->image_size;
479 } else {
480 ret = snap_by_name(header, snap_name, &snapc->seq, size);
481 if (ret < 0)
482 goto done;
483
484 dev->cur_snap = header->total_snaps - ret;
485 dev->read_only = 1;
486 }
487
488 ret = 0;
489done:
490 up_write(&header->snap_rwsem);
491 return ret;
492}
493
494static void rbd_header_free(struct rbd_image_header *header)
495{
496 kfree(header->snapc);
497 kfree(header->snap_names);
498 kfree(header->snap_sizes);
499}
500
501/*
502 * get the actual striped segment name, offset and length
503 */
504static u64 rbd_get_segment(struct rbd_image_header *header,
505 const char *block_name,
506 u64 ofs, u64 len,
507 char *seg_name, u64 *segofs)
508{
509 u64 seg = ofs >> header->obj_order;
510
511 if (seg_name)
512 snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
513 "%s.%012llx", block_name, seg);
514
515 ofs = ofs & ((1 << header->obj_order) - 1);
516 len = min_t(u64, len, (1 << header->obj_order) - ofs);
517
518 if (segofs)
519 *segofs = ofs;
520
521 return len;
522}
523
524/*
525 * bio helpers
526 */
527
528static void bio_chain_put(struct bio *chain)
529{
530 struct bio *tmp;
531
532 while (chain) {
533 tmp = chain;
534 chain = chain->bi_next;
535 bio_put(tmp);
536 }
537}
538
539/*
540 * zeros a bio chain, starting at specific offset
541 */
542static void zero_bio_chain(struct bio *chain, int start_ofs)
543{
544 struct bio_vec *bv;
545 unsigned long flags;
546 void *buf;
547 int i;
548 int pos = 0;
549
550 while (chain) {
551 bio_for_each_segment(bv, chain, i) {
552 if (pos + bv->bv_len > start_ofs) {
553 int remainder = max(start_ofs - pos, 0);
554 buf = bvec_kmap_irq(bv, &flags);
555 memset(buf + remainder, 0,
556 bv->bv_len - remainder);
557 bvec_kunmap_irq(buf, &flags);
558 }
559 pos += bv->bv_len;
560 }
561
562 chain = chain->bi_next;
563 }
564}
565
566/*
567 * bio_chain_clone - clone a chain of bios up to a certain length.
568 * might return a bio_pair that will need to be released.
569 */
570static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
571 struct bio_pair **bp,
572 int len, gfp_t gfpmask)
573{
574 struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
575 int total = 0;
576
577 if (*bp) {
578 bio_pair_release(*bp);
579 *bp = NULL;
580 }
581
582 while (old_chain && (total < len)) {
583 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
584 if (!tmp)
585 goto err_out;
586
587 if (total + old_chain->bi_size > len) {
588 struct bio_pair *bp;
589
590 /*
591 * this split can only happen with a single paged bio,
592 * split_bio will BUG_ON if this is not the case
593 */
594 dout("bio_chain_clone split! total=%d remaining=%d"
595 "bi_size=%d\n",
596 (int)total, (int)len-total,
597 (int)old_chain->bi_size);
598
599 /* split the bio. We'll release it either in the next
600 call, or it will have to be released outside */
601 bp = bio_split(old_chain, (len - total) / 512ULL);
602 if (!bp)
603 goto err_out;
604
605 __bio_clone(tmp, &bp->bio1);
606
607 *next = &bp->bio2;
608 } else {
609 __bio_clone(tmp, old_chain);
610 *next = old_chain->bi_next;
611 }
612
613 tmp->bi_bdev = NULL;
614 gfpmask &= ~__GFP_WAIT;
615 tmp->bi_next = NULL;
616
617 if (!new_chain) {
618 new_chain = tail = tmp;
619 } else {
620 tail->bi_next = tmp;
621 tail = tmp;
622 }
623 old_chain = old_chain->bi_next;
624
625 total += tmp->bi_size;
626 }
627
628 BUG_ON(total < len);
629
630 if (tail)
631 tail->bi_next = NULL;
632
633 *old = old_chain;
634
635 return new_chain;
636
637err_out:
638 dout("bio_chain_clone with err\n");
639 bio_chain_put(new_chain);
640 return NULL;
641}
642
643/*
644 * helpers for osd request op vectors.
645 */
646static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
647 int num_ops,
648 int opcode,
649 u32 payload_len)
650{
651 *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
652 GFP_NOIO);
653 if (!*ops)
654 return -ENOMEM;
655 (*ops)[0].op = opcode;
656 /*
657 * op extent offset and length will be set later on
658 * in calc_raw_layout()
659 */
660 (*ops)[0].payload_len = payload_len;
661 return 0;
662}
663
664static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
665{
666 kfree(ops);
667}
668
669/*
670 * Send ceph osd request
671 */
672static int rbd_do_request(struct request *rq,
673 struct rbd_device *dev,
674 struct ceph_snap_context *snapc,
675 u64 snapid,
676 const char *obj, u64 ofs, u64 len,
677 struct bio *bio,
678 struct page **pages,
679 int num_pages,
680 int flags,
681 struct ceph_osd_req_op *ops,
682 int num_reply,
683 void (*rbd_cb)(struct ceph_osd_request *req,
684 struct ceph_msg *msg))
685{
686 struct ceph_osd_request *req;
687 struct ceph_file_layout *layout;
688 int ret;
689 u64 bno;
690 struct timespec mtime = CURRENT_TIME;
691 struct rbd_request *req_data;
692 struct ceph_osd_request_head *reqhead;
693 struct rbd_image_header *header = &dev->header;
694
695 ret = -ENOMEM;
696 req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
697 if (!req_data)
698 goto done;
699
700 dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs);
701
702 down_read(&header->snap_rwsem);
703
704 req = ceph_osdc_alloc_request(&dev->client->osdc, flags,
705 snapc,
706 ops,
707 false,
708 GFP_NOIO, pages, bio);
709 if (IS_ERR(req)) {
710 up_read(&header->snap_rwsem);
711 ret = PTR_ERR(req);
712 goto done_pages;
713 }
714
715 req->r_callback = rbd_cb;
716
717 req_data->rq = rq;
718 req_data->bio = bio;
719 req_data->pages = pages;
720 req_data->len = len;
721
722 req->r_priv = req_data;
723
724 reqhead = req->r_request->front.iov_base;
725 reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
726
727 strncpy(req->r_oid, obj, sizeof(req->r_oid));
728 req->r_oid_len = strlen(req->r_oid);
729
730 layout = &req->r_file_layout;
731 memset(layout, 0, sizeof(*layout));
732 layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
733 layout->fl_stripe_count = cpu_to_le32(1);
734 layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
735 layout->fl_pg_preferred = cpu_to_le32(-1);
736 layout->fl_pg_pool = cpu_to_le32(dev->poolid);
737 ceph_calc_raw_layout(&dev->client->osdc, layout, snapid,
738 ofs, &len, &bno, req, ops);
739
740 ceph_osdc_build_request(req, ofs, &len,
741 ops,
742 snapc,
743 &mtime,
744 req->r_oid, req->r_oid_len);
745 up_read(&header->snap_rwsem);
746
747 ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
748 if (ret < 0)
749 goto done_err;
750
751 if (!rbd_cb) {
752 ret = ceph_osdc_wait_request(&dev->client->osdc, req);
753 ceph_osdc_put_request(req);
754 }
755 return ret;
756
757done_err:
758 bio_chain_put(req_data->bio);
759 ceph_osdc_put_request(req);
760done_pages:
761 kfree(req_data);
762done:
763 if (rq)
764 blk_end_request(rq, ret, len);
765 return ret;
766}
767
768/*
769 * Ceph osd op callback
770 */
771static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
772{
773 struct rbd_request *req_data = req->r_priv;
774 struct ceph_osd_reply_head *replyhead;
775 struct ceph_osd_op *op;
776 __s32 rc;
777 u64 bytes;
778 int read_op;
779
780 /* parse reply */
781 replyhead = msg->front.iov_base;
782 WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
783 op = (void *)(replyhead + 1);
784 rc = le32_to_cpu(replyhead->result);
785 bytes = le64_to_cpu(op->extent.length);
786 read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
787
788 dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
789
790 if (rc == -ENOENT && read_op) {
791 zero_bio_chain(req_data->bio, 0);
792 rc = 0;
793 } else if (rc == 0 && read_op && bytes < req_data->len) {
794 zero_bio_chain(req_data->bio, bytes);
795 bytes = req_data->len;
796 }
797
798 blk_end_request(req_data->rq, rc, bytes);
799
800 if (req_data->bio)
801 bio_chain_put(req_data->bio);
802
803 ceph_osdc_put_request(req);
804 kfree(req_data);
805}
806
807/*
808 * Do a synchronous ceph osd operation
809 */
810static int rbd_req_sync_op(struct rbd_device *dev,
811 struct ceph_snap_context *snapc,
812 u64 snapid,
813 int opcode,
814 int flags,
815 struct ceph_osd_req_op *orig_ops,
816 int num_reply,
817 const char *obj,
818 u64 ofs, u64 len,
819 char *buf)
820{
821 int ret;
822 struct page **pages;
823 int num_pages;
824 struct ceph_osd_req_op *ops = orig_ops;
825 u32 payload_len;
826
827 num_pages = calc_pages_for(ofs , len);
828 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
829 if (IS_ERR(pages))
830 return PTR_ERR(pages);
831
832 if (!orig_ops) {
833 payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
834 ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
835 if (ret < 0)
836 goto done;
837
838 if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
839 ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
840 if (ret < 0)
841 goto done_ops;
842 }
843 }
844
845 ret = rbd_do_request(NULL, dev, snapc, snapid,
846 obj, ofs, len, NULL,
847 pages, num_pages,
848 flags,
849 ops,
850 2,
851 NULL);
852 if (ret < 0)
853 goto done_ops;
854
855 if ((flags & CEPH_OSD_FLAG_READ) && buf)
856 ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
857
858done_ops:
859 if (!orig_ops)
860 rbd_destroy_ops(ops);
861done:
862 ceph_release_page_vector(pages, num_pages);
863 return ret;
864}
865
866/*
867 * Do an asynchronous ceph osd operation
868 */
869static int rbd_do_op(struct request *rq,
870 struct rbd_device *rbd_dev ,
871 struct ceph_snap_context *snapc,
872 u64 snapid,
873 int opcode, int flags, int num_reply,
874 u64 ofs, u64 len,
875 struct bio *bio)
876{
877 char *seg_name;
878 u64 seg_ofs;
879 u64 seg_len;
880 int ret;
881 struct ceph_osd_req_op *ops;
882 u32 payload_len;
883
884 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
885 if (!seg_name)
886 return -ENOMEM;
887
888 seg_len = rbd_get_segment(&rbd_dev->header,
889 rbd_dev->header.block_name,
890 ofs, len,
891 seg_name, &seg_ofs);
892
893 payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
894
895 ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
896 if (ret < 0)
897 goto done;
898
899 /* we've taken care of segment sizes earlier when we
900 cloned the bios. We should never have a segment
901 truncated at this point */
902 BUG_ON(seg_len < len);
903
904 ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
905 seg_name, seg_ofs, seg_len,
906 bio,
907 NULL, 0,
908 flags,
909 ops,
910 num_reply,
911 rbd_req_cb);
912done:
913 kfree(seg_name);
914 return ret;
915}
916
917/*
918 * Request async osd write
919 */
920static int rbd_req_write(struct request *rq,
921 struct rbd_device *rbd_dev,
922 struct ceph_snap_context *snapc,
923 u64 ofs, u64 len,
924 struct bio *bio)
925{
926 return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
927 CEPH_OSD_OP_WRITE,
928 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
929 2,
930 ofs, len, bio);
931}
932
933/*
934 * Request async osd read
935 */
936static int rbd_req_read(struct request *rq,
937 struct rbd_device *rbd_dev,
938 u64 snapid,
939 u64 ofs, u64 len,
940 struct bio *bio)
941{
942 return rbd_do_op(rq, rbd_dev, NULL,
943 (snapid ? snapid : CEPH_NOSNAP),
944 CEPH_OSD_OP_READ,
945 CEPH_OSD_FLAG_READ,
946 2,
947 ofs, len, bio);
948}
949
950/*
951 * Request sync osd read
952 */
953static int rbd_req_sync_read(struct rbd_device *dev,
954 struct ceph_snap_context *snapc,
955 u64 snapid,
956 const char *obj,
957 u64 ofs, u64 len,
958 char *buf)
959{
960 return rbd_req_sync_op(dev, NULL,
961 (snapid ? snapid : CEPH_NOSNAP),
962 CEPH_OSD_OP_READ,
963 CEPH_OSD_FLAG_READ,
964 NULL,
965 1, obj, ofs, len, buf);
966}
967
968/*
969 * Request sync osd read
970 */
971static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
972 u64 snapid,
973 const char *obj)
974{
975 struct ceph_osd_req_op *ops;
976 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
977 if (ret < 0)
978 return ret;
979
980 ops[0].snap.snapid = snapid;
981
982 ret = rbd_req_sync_op(dev, NULL,
983 CEPH_NOSNAP,
984 0,
985 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
986 ops,
987 1, obj, 0, 0, NULL);
988
989 rbd_destroy_ops(ops);
990
991 if (ret < 0)
992 return ret;
993
994 return ret;
995}
996
997/*
998 * Request sync osd read
999 */
1000static int rbd_req_sync_exec(struct rbd_device *dev,
1001 const char *obj,
1002 const char *cls,
1003 const char *method,
1004 const char *data,
1005 int len)
1006{
1007 struct ceph_osd_req_op *ops;
1008 int cls_len = strlen(cls);
1009 int method_len = strlen(method);
1010 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
1011 cls_len + method_len + len);
1012 if (ret < 0)
1013 return ret;
1014
1015 ops[0].cls.class_name = cls;
1016 ops[0].cls.class_len = (__u8)cls_len;
1017 ops[0].cls.method_name = method;
1018 ops[0].cls.method_len = (__u8)method_len;
1019 ops[0].cls.argc = 0;
1020 ops[0].cls.indata = data;
1021 ops[0].cls.indata_len = len;
1022
1023 ret = rbd_req_sync_op(dev, NULL,
1024 CEPH_NOSNAP,
1025 0,
1026 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1027 ops,
1028 1, obj, 0, 0, NULL);
1029
1030 rbd_destroy_ops(ops);
1031
1032 dout("cls_exec returned %d\n", ret);
1033 return ret;
1034}
1035
1036/*
1037 * block device queue callback
1038 */
1039static void rbd_rq_fn(struct request_queue *q)
1040{
1041 struct rbd_device *rbd_dev = q->queuedata;
1042 struct request *rq;
1043 struct bio_pair *bp = NULL;
1044
1045 rq = blk_fetch_request(q);
1046
1047 while (1) {
1048 struct bio *bio;
1049 struct bio *rq_bio, *next_bio = NULL;
1050 bool do_write;
1051 int size, op_size = 0;
1052 u64 ofs;
1053
1054 /* peek at request from block layer */
1055 if (!rq)
1056 break;
1057
1058 dout("fetched request\n");
1059
1060 /* filter out block requests we don't understand */
1061 if ((rq->cmd_type != REQ_TYPE_FS)) {
1062 __blk_end_request_all(rq, 0);
1063 goto next;
1064 }
1065
1066 /* deduce our operation (read, write) */
1067 do_write = (rq_data_dir(rq) == WRITE);
1068
1069 size = blk_rq_bytes(rq);
1070 ofs = blk_rq_pos(rq) * 512ULL;
1071 rq_bio = rq->bio;
1072 if (do_write && rbd_dev->read_only) {
1073 __blk_end_request_all(rq, -EROFS);
1074 goto next;
1075 }
1076
1077 spin_unlock_irq(q->queue_lock);
1078
1079 dout("%s 0x%x bytes at 0x%llx\n",
1080 do_write ? "write" : "read",
1081 size, blk_rq_pos(rq) * 512ULL);
1082
1083 do {
1084 /* a bio clone to be passed down to OSD req */
1085 dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
1086 op_size = rbd_get_segment(&rbd_dev->header,
1087 rbd_dev->header.block_name,
1088 ofs, size,
1089 NULL, NULL);
1090 bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
1091 op_size, GFP_ATOMIC);
1092 if (!bio) {
1093 spin_lock_irq(q->queue_lock);
1094 __blk_end_request_all(rq, -ENOMEM);
1095 goto next;
1096 }
1097
1098 /* init OSD command: write or read */
1099 if (do_write)
1100 rbd_req_write(rq, rbd_dev,
1101 rbd_dev->header.snapc,
1102 ofs,
1103 op_size, bio);
1104 else
1105 rbd_req_read(rq, rbd_dev,
1106 cur_snap_id(rbd_dev),
1107 ofs,
1108 op_size, bio);
1109
1110 size -= op_size;
1111 ofs += op_size;
1112
1113 rq_bio = next_bio;
1114 } while (size > 0);
1115
1116 if (bp)
1117 bio_pair_release(bp);
1118
1119 spin_lock_irq(q->queue_lock);
1120next:
1121 rq = blk_fetch_request(q);
1122 }
1123}
1124
1125/*
1126 * a queue callback. Makes sure that we don't create a bio that spans across
1127 * multiple osd objects. One exception would be with a single page bios,
1128 * which we handle later at bio_chain_clone
1129 */
1130static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
1131 struct bio_vec *bvec)
1132{
1133 struct rbd_device *rbd_dev = q->queuedata;
1134 unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
1135 sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
1136 unsigned int bio_sectors = bmd->bi_size >> 9;
1137 int max;
1138
1139 max = (chunk_sectors - ((sector & (chunk_sectors - 1))
1140 + bio_sectors)) << 9;
1141 if (max < 0)
1142 max = 0; /* bio_add cannot handle a negative return */
1143 if (max <= bvec->bv_len && bio_sectors == 0)
1144 return bvec->bv_len;
1145 return max;
1146}
1147
1148static void rbd_free_disk(struct rbd_device *rbd_dev)
1149{
1150 struct gendisk *disk = rbd_dev->disk;
1151
1152 if (!disk)
1153 return;
1154
1155 rbd_header_free(&rbd_dev->header);
1156
1157 if (disk->flags & GENHD_FL_UP)
1158 del_gendisk(disk);
1159 if (disk->queue)
1160 blk_cleanup_queue(disk->queue);
1161 put_disk(disk);
1162}
1163
1164/*
1165 * reload the ondisk the header
1166 */
1167static int rbd_read_header(struct rbd_device *rbd_dev,
1168 struct rbd_image_header *header)
1169{
1170 ssize_t rc;
1171 struct rbd_image_header_ondisk *dh;
1172 int snap_count = 0;
1173 u64 snap_names_len = 0;
1174
1175 while (1) {
1176 int len = sizeof(*dh) +
1177 snap_count * sizeof(struct rbd_image_snap_ondisk) +
1178 snap_names_len;
1179
1180 rc = -ENOMEM;
1181 dh = kmalloc(len, GFP_KERNEL);
1182 if (!dh)
1183 return -ENOMEM;
1184
1185 rc = rbd_req_sync_read(rbd_dev,
1186 NULL, CEPH_NOSNAP,
1187 rbd_dev->obj_md_name,
1188 0, len,
1189 (char *)dh);
1190 if (rc < 0)
1191 goto out_dh;
1192
1193 rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
1194 if (rc < 0)
1195 goto out_dh;
1196
1197 if (snap_count != header->total_snaps) {
1198 snap_count = header->total_snaps;
1199 snap_names_len = header->snap_names_len;
1200 rbd_header_free(header);
1201 kfree(dh);
1202 continue;
1203 }
1204 break;
1205 }
1206
1207out_dh:
1208 kfree(dh);
1209 return rc;
1210}
1211
1212/*
1213 * create a snapshot
1214 */
1215static int rbd_header_add_snap(struct rbd_device *dev,
1216 const char *snap_name,
1217 gfp_t gfp_flags)
1218{
1219 int name_len = strlen(snap_name);
1220 u64 new_snapid;
1221 int ret;
1222 void *data, *data_start, *data_end;
1223
1224 /* we should create a snapshot only if we're pointing at the head */
1225 if (dev->cur_snap)
1226 return -EINVAL;
1227
1228 ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid,
1229 &new_snapid);
1230 dout("created snapid=%lld\n", new_snapid);
1231 if (ret < 0)
1232 return ret;
1233
1234 data = kmalloc(name_len + 16, gfp_flags);
1235 if (!data)
1236 return -ENOMEM;
1237
1238 data_start = data;
1239 data_end = data + name_len + 16;
1240
1241 ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad);
1242 ceph_encode_64_safe(&data, data_end, new_snapid, bad);
1243
1244 ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
1245 data_start, data - data_start);
1246
1247 kfree(data_start);
1248
1249 if (ret < 0)
1250 return ret;
1251
1252 dev->header.snapc->seq = new_snapid;
1253
1254 return 0;
1255bad:
1256 return -ERANGE;
1257}
1258
1259/*
1260 * only read the first part of the ondisk header, without the snaps info
1261 */
1262static int rbd_update_snaps(struct rbd_device *rbd_dev)
1263{
1264 int ret;
1265 struct rbd_image_header h;
1266 u64 snap_seq;
1267
1268 ret = rbd_read_header(rbd_dev, &h);
1269 if (ret < 0)
1270 return ret;
1271
1272 down_write(&rbd_dev->header.snap_rwsem);
1273
1274 snap_seq = rbd_dev->header.snapc->seq;
1275
1276 kfree(rbd_dev->header.snapc);
1277 kfree(rbd_dev->header.snap_names);
1278 kfree(rbd_dev->header.snap_sizes);
1279
1280 rbd_dev->header.total_snaps = h.total_snaps;
1281 rbd_dev->header.snapc = h.snapc;
1282 rbd_dev->header.snap_names = h.snap_names;
1283 rbd_dev->header.snap_sizes = h.snap_sizes;
1284 rbd_dev->header.snapc->seq = snap_seq;
1285
1286 up_write(&rbd_dev->header.snap_rwsem);
1287
1288 return 0;
1289}
1290
1291static int rbd_init_disk(struct rbd_device *rbd_dev)
1292{
1293 struct gendisk *disk;
1294 struct request_queue *q;
1295 int rc;
1296 u64 total_size = 0;
1297
1298 /* contact OSD, request size info about the object being mapped */
1299 rc = rbd_read_header(rbd_dev, &rbd_dev->header);
1300 if (rc)
1301 return rc;
1302
1303 rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
1304 if (rc)
1305 return rc;
1306
1307 /* create gendisk info */
1308 rc = -ENOMEM;
1309 disk = alloc_disk(RBD_MINORS_PER_MAJOR);
1310 if (!disk)
1311 goto out;
1312
1313 sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id);
1314 disk->major = rbd_dev->major;
1315 disk->first_minor = 0;
1316 disk->fops = &rbd_bd_ops;
1317 disk->private_data = rbd_dev;
1318
1319 /* init rq */
1320 rc = -ENOMEM;
1321 q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
1322 if (!q)
1323 goto out_disk;
1324 blk_queue_merge_bvec(q, rbd_merge_bvec);
1325 disk->queue = q;
1326
1327 q->queuedata = rbd_dev;
1328
1329 rbd_dev->disk = disk;
1330 rbd_dev->q = q;
1331
1332 /* finally, announce the disk to the world */
1333 set_capacity(disk, total_size / 512ULL);
1334 add_disk(disk);
1335
1336 pr_info("%s: added with size 0x%llx\n",
1337 disk->disk_name, (unsigned long long)total_size);
1338 return 0;
1339
1340out_disk:
1341 put_disk(disk);
1342out:
1343 return rc;
1344}
1345
1346/********************************************************************
1347 * /sys/class/rbd/
1348 * add map rados objects to blkdev
1349 * remove unmap rados objects
1350 * list show mappings
1351 *******************************************************************/
1352
1353static void class_rbd_release(struct class *cls)
1354{
1355 kfree(cls);
1356}
1357
1358static ssize_t class_rbd_list(struct class *c,
1359 struct class_attribute *attr,
1360 char *data)
1361{
1362 int n = 0;
1363 struct list_head *tmp;
1364 int max = PAGE_SIZE;
1365
1366 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1367
1368 n += snprintf(data, max,
1369 "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n");
1370
1371 list_for_each(tmp, &rbd_dev_list) {
1372 struct rbd_device *rbd_dev;
1373
1374 rbd_dev = list_entry(tmp, struct rbd_device, node);
1375 n += snprintf(data+n, max-n,
1376 "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n",
1377 rbd_dev->id,
1378 rbd_dev->major,
1379 ceph_client_id(rbd_dev->client),
1380 rbd_dev->pool_name,
1381 rbd_dev->obj, rbd_dev->snap_name,
1382 rbd_dev->header.image_size >> 10);
1383 if (n == max)
1384 break;
1385 }
1386
1387 mutex_unlock(&ctl_mutex);
1388 return n;
1389}
1390
1391static ssize_t class_rbd_add(struct class *c,
1392 struct class_attribute *attr,
1393 const char *buf, size_t count)
1394{
1395 struct ceph_osd_client *osdc;
1396 struct rbd_device *rbd_dev;
1397 ssize_t rc = -ENOMEM;
1398 int irc, new_id = 0;
1399 struct list_head *tmp;
1400 char *mon_dev_name;
1401 char *options;
1402
1403 if (!try_module_get(THIS_MODULE))
1404 return -ENODEV;
1405
1406 mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
1407 if (!mon_dev_name)
1408 goto err_out_mod;
1409
1410 options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
1411 if (!options)
1412 goto err_mon_dev;
1413
1414 /* new rbd_device object */
1415 rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
1416 if (!rbd_dev)
1417 goto err_out_opt;
1418
1419 /* static rbd_device initialization */
1420 spin_lock_init(&rbd_dev->lock);
1421 INIT_LIST_HEAD(&rbd_dev->node);
1422
1423 /* generate unique id: find highest unique id, add one */
1424 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1425
1426 list_for_each(tmp, &rbd_dev_list) {
1427 struct rbd_device *rbd_dev;
1428
1429 rbd_dev = list_entry(tmp, struct rbd_device, node);
1430 if (rbd_dev->id >= new_id)
1431 new_id = rbd_dev->id + 1;
1432 }
1433
1434 rbd_dev->id = new_id;
1435
1436 /* add to global list */
1437 list_add_tail(&rbd_dev->node, &rbd_dev_list);
1438
1439 /* parse add command */
1440 if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
1441 "%" __stringify(RBD_MAX_OPT_LEN) "s "
1442 "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
1443 "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
1444 "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
1445 mon_dev_name, options, rbd_dev->pool_name,
1446 rbd_dev->obj, rbd_dev->snap_name) < 4) {
1447 rc = -EINVAL;
1448 goto err_out_slot;
1449 }
1450
1451 if (rbd_dev->snap_name[0] == 0)
1452 rbd_dev->snap_name[0] = '-';
1453
1454 rbd_dev->obj_len = strlen(rbd_dev->obj);
1455 snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
1456 rbd_dev->obj, RBD_SUFFIX);
1457
1458 /* initialize rest of new object */
1459 snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
1460 rc = rbd_get_client(rbd_dev, mon_dev_name, options);
1461 if (rc < 0)
1462 goto err_out_slot;
1463
1464 mutex_unlock(&ctl_mutex);
1465
1466 /* pick the pool */
1467 osdc = &rbd_dev->client->osdc;
1468 rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
1469 if (rc < 0)
1470 goto err_out_client;
1471 rbd_dev->poolid = rc;
1472
1473 /* register our block device */
1474 irc = register_blkdev(0, rbd_dev->name);
1475 if (irc < 0) {
1476 rc = irc;
1477 goto err_out_client;
1478 }
1479 rbd_dev->major = irc;
1480
1481 /* set up and announce blkdev mapping */
1482 rc = rbd_init_disk(rbd_dev);
1483 if (rc)
1484 goto err_out_blkdev;
1485
1486 return count;
1487
1488err_out_blkdev:
1489 unregister_blkdev(rbd_dev->major, rbd_dev->name);
1490err_out_client:
1491 rbd_put_client(rbd_dev);
1492 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1493err_out_slot:
1494 list_del_init(&rbd_dev->node);
1495 mutex_unlock(&ctl_mutex);
1496
1497 kfree(rbd_dev);
1498err_out_opt:
1499 kfree(options);
1500err_mon_dev:
1501 kfree(mon_dev_name);
1502err_out_mod:
1503 dout("Error adding device %s\n", buf);
1504 module_put(THIS_MODULE);
1505 return rc;
1506}
1507
1508static struct rbd_device *__rbd_get_dev(unsigned long id)
1509{
1510 struct list_head *tmp;
1511 struct rbd_device *rbd_dev;
1512
1513 list_for_each(tmp, &rbd_dev_list) {
1514 rbd_dev = list_entry(tmp, struct rbd_device, node);
1515 if (rbd_dev->id == id)
1516 return rbd_dev;
1517 }
1518 return NULL;
1519}
1520
1521static ssize_t class_rbd_remove(struct class *c,
1522 struct class_attribute *attr,
1523 const char *buf,
1524 size_t count)
1525{
1526 struct rbd_device *rbd_dev = NULL;
1527 int target_id, rc;
1528 unsigned long ul;
1529
1530 rc = strict_strtoul(buf, 10, &ul);
1531 if (rc)
1532 return rc;
1533
1534 /* convert to int; abort if we lost anything in the conversion */
1535 target_id = (int) ul;
1536 if (target_id != ul)
1537 return -EINVAL;
1538
1539 /* remove object from list immediately */
1540 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1541
1542 rbd_dev = __rbd_get_dev(target_id);
1543 if (rbd_dev)
1544 list_del_init(&rbd_dev->node);
1545
1546 mutex_unlock(&ctl_mutex);
1547
1548 if (!rbd_dev)
1549 return -ENOENT;
1550
1551 rbd_put_client(rbd_dev);
1552
1553 /* clean up and free blkdev */
1554 rbd_free_disk(rbd_dev);
1555 unregister_blkdev(rbd_dev->major, rbd_dev->name);
1556 kfree(rbd_dev);
1557
1558 /* release module ref */
1559 module_put(THIS_MODULE);
1560
1561 return count;
1562}
1563
1564static ssize_t class_rbd_snaps_list(struct class *c,
1565 struct class_attribute *attr,
1566 char *data)
1567{
1568 struct rbd_device *rbd_dev = NULL;
1569 struct list_head *tmp;
1570 struct rbd_image_header *header;
1571 int i, n = 0, max = PAGE_SIZE;
1572 int ret;
1573
1574 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1575
1576 n += snprintf(data, max, "#id\tsnap\tKB\n");
1577
1578 list_for_each(tmp, &rbd_dev_list) {
1579 char *names, *p;
1580 struct ceph_snap_context *snapc;
1581
1582 rbd_dev = list_entry(tmp, struct rbd_device, node);
1583 header = &rbd_dev->header;
1584
1585 down_read(&header->snap_rwsem);
1586
1587 names = header->snap_names;
1588 snapc = header->snapc;
1589
1590 n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
1591 rbd_dev->id, RBD_SNAP_HEAD_NAME,
1592 header->image_size >> 10,
1593 (!rbd_dev->cur_snap ? " (*)" : ""));
1594 if (n == max)
1595 break;
1596
1597 p = names;
1598 for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
1599 n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
1600 rbd_dev->id, p, header->snap_sizes[i] >> 10,
1601 (rbd_dev->cur_snap &&
1602 (snap_index(header, i) == rbd_dev->cur_snap) ?
1603 " (*)" : ""));
1604 if (n == max)
1605 break;
1606 }
1607
1608 up_read(&header->snap_rwsem);
1609 }
1610
1611
1612 ret = n;
1613 mutex_unlock(&ctl_mutex);
1614 return ret;
1615}
1616
1617static ssize_t class_rbd_snaps_refresh(struct class *c,
1618 struct class_attribute *attr,
1619 const char *buf,
1620 size_t count)
1621{
1622 struct rbd_device *rbd_dev = NULL;
1623 int target_id, rc;
1624 unsigned long ul;
1625 int ret = count;
1626
1627 rc = strict_strtoul(buf, 10, &ul);
1628 if (rc)
1629 return rc;
1630
1631 /* convert to int; abort if we lost anything in the conversion */
1632 target_id = (int) ul;
1633 if (target_id != ul)
1634 return -EINVAL;
1635
1636 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1637
1638 rbd_dev = __rbd_get_dev(target_id);
1639 if (!rbd_dev) {
1640 ret = -ENOENT;
1641 goto done;
1642 }
1643
1644 rc = rbd_update_snaps(rbd_dev);
1645 if (rc < 0)
1646 ret = rc;
1647
1648done:
1649 mutex_unlock(&ctl_mutex);
1650 return ret;
1651}
1652
1653static ssize_t class_rbd_snap_create(struct class *c,
1654 struct class_attribute *attr,
1655 const char *buf,
1656 size_t count)
1657{
1658 struct rbd_device *rbd_dev = NULL;
1659 int target_id, ret;
1660 char *name;
1661
1662 name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL);
1663 if (!name)
1664 return -ENOMEM;
1665
1666 /* parse snaps add command */
1667 if (sscanf(buf, "%d "
1668 "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
1669 &target_id,
1670 name) != 2) {
1671 ret = -EINVAL;
1672 goto done;
1673 }
1674
1675 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1676
1677 rbd_dev = __rbd_get_dev(target_id);
1678 if (!rbd_dev) {
1679 ret = -ENOENT;
1680 goto done_unlock;
1681 }
1682
1683 ret = rbd_header_add_snap(rbd_dev,
1684 name, GFP_KERNEL);
1685 if (ret < 0)
1686 goto done_unlock;
1687
1688 ret = rbd_update_snaps(rbd_dev);
1689 if (ret < 0)
1690 goto done_unlock;
1691
1692 ret = count;
1693done_unlock:
1694 mutex_unlock(&ctl_mutex);
1695done:
1696 kfree(name);
1697 return ret;
1698}
1699
1700static ssize_t class_rbd_rollback(struct class *c,
1701 struct class_attribute *attr,
1702 const char *buf,
1703 size_t count)
1704{
1705 struct rbd_device *rbd_dev = NULL;
1706 int target_id, ret;
1707 u64 snapid;
1708 char snap_name[RBD_MAX_SNAP_NAME_LEN];
1709 u64 cur_ofs;
1710 char *seg_name;
1711
1712 /* parse snaps add command */
1713 if (sscanf(buf, "%d "
1714 "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
1715 &target_id,
1716 snap_name) != 2) {
1717 return -EINVAL;
1718 }
1719
1720 ret = -ENOMEM;
1721 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
1722 if (!seg_name)
1723 return ret;
1724
1725 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1726
1727 rbd_dev = __rbd_get_dev(target_id);
1728 if (!rbd_dev) {
1729 ret = -ENOENT;
1730 goto done_unlock;
1731 }
1732
1733 ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
1734 if (ret < 0)
1735 goto done_unlock;
1736
1737 dout("snapid=%lld\n", snapid);
1738
1739 cur_ofs = 0;
1740 while (cur_ofs < rbd_dev->header.image_size) {
1741 cur_ofs += rbd_get_segment(&rbd_dev->header,
1742 rbd_dev->obj,
1743 cur_ofs, (u64)-1,
1744 seg_name, NULL);
1745 dout("seg_name=%s\n", seg_name);
1746
1747 ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
1748 if (ret < 0)
1749 pr_warning("could not roll back obj %s err=%d\n",
1750 seg_name, ret);
1751 }
1752
1753 ret = rbd_update_snaps(rbd_dev);
1754 if (ret < 0)
1755 goto done_unlock;
1756
1757 ret = count;
1758
1759done_unlock:
1760 mutex_unlock(&ctl_mutex);
1761 kfree(seg_name);
1762
1763 return ret;
1764}
1765
1766static struct class_attribute class_rbd_attrs[] = {
1767 __ATTR(add, 0200, NULL, class_rbd_add),
1768 __ATTR(remove, 0200, NULL, class_rbd_remove),
1769 __ATTR(list, 0444, class_rbd_list, NULL),
1770 __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh),
1771 __ATTR(snap_create, 0200, NULL, class_rbd_snap_create),
1772 __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL),
1773 __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback),
1774 __ATTR_NULL
1775};
1776
1777/*
1778 * create control files in sysfs
1779 * /sys/class/rbd/...
1780 */
1781static int rbd_sysfs_init(void)
1782{
1783 int ret = -ENOMEM;
1784
1785 class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL);
1786 if (!class_rbd)
1787 goto out;
1788
1789 class_rbd->name = DRV_NAME;
1790 class_rbd->owner = THIS_MODULE;
1791 class_rbd->class_release = class_rbd_release;
1792 class_rbd->class_attrs = class_rbd_attrs;
1793
1794 ret = class_register(class_rbd);
1795 if (ret)
1796 goto out_class;
1797 return 0;
1798
1799out_class:
1800 kfree(class_rbd);
1801 class_rbd = NULL;
1802 pr_err(DRV_NAME ": failed to create class rbd\n");
1803out:
1804 return ret;
1805}
1806
1807static void rbd_sysfs_cleanup(void)
1808{
1809 if (class_rbd)
1810 class_destroy(class_rbd);
1811 class_rbd = NULL;
1812}
1813
1814int __init rbd_init(void)
1815{
1816 int rc;
1817
1818 rc = rbd_sysfs_init();
1819 if (rc)
1820 return rc;
1821 spin_lock_init(&node_lock);
1822 pr_info("loaded " DRV_NAME_LONG "\n");
1823 return 0;
1824}
1825
1826void __exit rbd_exit(void)
1827{
1828 rbd_sysfs_cleanup();
1829}
1830
1831module_init(rbd_init);
1832module_exit(rbd_exit);
1833
1834MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1835MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1836MODULE_DESCRIPTION("rados block device");
1837
1838/* following authorship retained from original osdblk.c */
1839MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
1840
1841MODULE_LICENSE("GPL");
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
new file mode 100644
index 000000000000..fc6c678aa2cb
--- /dev/null
+++ b/drivers/block/rbd_types.h
@@ -0,0 +1,73 @@
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net>
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13#ifndef CEPH_RBD_TYPES_H
14#define CEPH_RBD_TYPES_H
15
16#include <linux/types.h>
17
18/*
19 * rbd image 'foo' consists of objects
20 * foo.rbd - image metadata
21 * foo.00000000
22 * foo.00000001
23 * ... - data
24 */
25
26#define RBD_SUFFIX ".rbd"
27#define RBD_DIRECTORY "rbd_directory"
28#define RBD_INFO "rbd_info"
29
30#define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */
31#define RBD_MIN_OBJ_ORDER 16
32#define RBD_MAX_OBJ_ORDER 30
33
34#define RBD_MAX_OBJ_NAME_LEN 96
35#define RBD_MAX_SEG_NAME_LEN 128
36
37#define RBD_COMP_NONE 0
38#define RBD_CRYPT_NONE 0
39
40#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n"
41#define RBD_HEADER_SIGNATURE "RBD"
42#define RBD_HEADER_VERSION "001.005"
43
44struct rbd_info {
45 __le64 max_id;
46} __attribute__ ((packed));
47
48struct rbd_image_snap_ondisk {
49 __le64 id;
50 __le64 image_size;
51} __attribute__((packed));
52
53struct rbd_image_header_ondisk {
54 char text[40];
55 char block_name[24];
56 char signature[4];
57 char version[8];
58 struct {
59 __u8 order;
60 __u8 crypt_type;
61 __u8 comp_type;
62 __u8 unused;
63 } __attribute__((packed)) options;
64 __le64 image_size;
65 __le64 snap_seq;
66 __le32 snap_count;
67 __le32 reserved;
68 __le64 snap_names_len;
69 struct rbd_image_snap_ondisk snaps[0];
70} __attribute__((packed));
71
72
73#endif
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1101e251a629..8320490226b7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -2,7 +2,6 @@
2#include <linux/spinlock.h> 2#include <linux/spinlock.h>
3#include <linux/slab.h> 3#include <linux/slab.h>
4#include <linux/blkdev.h> 4#include <linux/blkdev.h>
5#include <linux/smp_lock.h>
6#include <linux/hdreg.h> 5#include <linux/hdreg.h>
7#include <linux/virtio.h> 6#include <linux/virtio.h>
8#include <linux/virtio_blk.h> 7#include <linux/virtio_blk.h>
@@ -222,8 +221,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
222 return err; 221 return err;
223} 222}
224 223
225static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, 224static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
226 unsigned cmd, unsigned long data) 225 unsigned int cmd, unsigned long data)
227{ 226{
228 struct gendisk *disk = bdev->bd_disk; 227 struct gendisk *disk = bdev->bd_disk;
229 struct virtio_blk *vblk = disk->private_data; 228 struct virtio_blk *vblk = disk->private_data;
@@ -238,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
238 (void __user *)data); 237 (void __user *)data);
239} 238}
240 239
241static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
242 unsigned int cmd, unsigned long param)
243{
244 int ret;
245
246 lock_kernel();
247 ret = virtblk_locked_ioctl(bdev, mode, cmd, param);
248 unlock_kernel();
249
250 return ret;
251}
252
253/* We provide getgeo only to please some old bootloader/partitioning tools */ 240/* We provide getgeo only to please some old bootloader/partitioning tools */
254static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 241static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
255{ 242{
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index 4b66c69eaf57..5ddf67e76f8b 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -57,7 +57,7 @@ config AGP_AMD
57 57
58config AGP_AMD64 58config AGP_AMD64
59 tristate "AMD Opteron/Athlon64 on-CPU GART support" 59 tristate "AMD Opteron/Athlon64 on-CPU GART support"
60 depends on AGP && X86 && K8_NB 60 depends on AGP && X86 && AMD_NB
61 help 61 help
62 This option gives you AGP support for the GLX component of 62 This option gives you AGP support for the GLX component of
63 X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. 63 X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 70312da4c968..42396df55556 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -15,7 +15,7 @@
15#include <linux/mmzone.h> 15#include <linux/mmzone.h>
16#include <asm/page.h> /* PAGE_SIZE */ 16#include <asm/page.h> /* PAGE_SIZE */
17#include <asm/e820.h> 17#include <asm/e820.h>
18#include <asm/k8.h> 18#include <asm/amd_nb.h>
19#include <asm/gart.h> 19#include <asm/gart.h>
20#include "agp.h" 20#include "agp.h"
21 21
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void)
124 u32 temp; 124 u32 temp;
125 struct aper_size_info_32 *values; 125 struct aper_size_info_32 *values;
126 126
127 dev = k8_northbridges[0]; 127 dev = k8_northbridges.nb_misc[0];
128 if (dev==NULL) 128 if (dev==NULL)
129 return 0; 129 return 0;
130 130
@@ -181,10 +181,14 @@ static int amd_8151_configure(void)
181 unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); 181 unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
182 int i; 182 int i;
183 183
184 if (!k8_northbridges.gart_supported)
185 return 0;
186
184 /* Configure AGP regs in each x86-64 host bridge. */ 187 /* Configure AGP regs in each x86-64 host bridge. */
185 for (i = 0; i < num_k8_northbridges; i++) { 188 for (i = 0; i < k8_northbridges.num; i++) {
186 agp_bridge->gart_bus_addr = 189 agp_bridge->gart_bus_addr =
187 amd64_configure(k8_northbridges[i], gatt_bus); 190 amd64_configure(k8_northbridges.nb_misc[i],
191 gatt_bus);
188 } 192 }
189 k8_flush_garts(); 193 k8_flush_garts();
190 return 0; 194 return 0;
@@ -195,11 +199,15 @@ static void amd64_cleanup(void)
195{ 199{
196 u32 tmp; 200 u32 tmp;
197 int i; 201 int i;
198 for (i = 0; i < num_k8_northbridges; i++) { 202
199 struct pci_dev *dev = k8_northbridges[i]; 203 if (!k8_northbridges.gart_supported)
204 return;
205
206 for (i = 0; i < k8_northbridges.num; i++) {
207 struct pci_dev *dev = k8_northbridges.nb_misc[i];
200 /* disable gart translation */ 208 /* disable gart translation */
201 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); 209 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
202 tmp &= ~AMD64_GARTEN; 210 tmp &= ~GARTEN;
203 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); 211 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp);
204 } 212 }
205} 213}
@@ -313,22 +321,25 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
313 if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order)) 321 if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order))
314 return -1; 322 return -1;
315 323
316 pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1); 324 gart_set_size_and_enable(nb, order);
317 pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); 325 pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25);
318 326
319 return 0; 327 return 0;
320} 328}
321 329
322static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) 330static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
323{ 331{
324 int i; 332 int i;
325 333
326 if (cache_k8_northbridges() < 0) 334 if (cache_k8_northbridges() < 0)
327 return -ENODEV; 335 return -ENODEV;
328 336
337 if (!k8_northbridges.gart_supported)
338 return -ENODEV;
339
329 i = 0; 340 i = 0;
330 for (i = 0; i < num_k8_northbridges; i++) { 341 for (i = 0; i < k8_northbridges.num; i++) {
331 struct pci_dev *dev = k8_northbridges[i]; 342 struct pci_dev *dev = k8_northbridges.nb_misc[i];
332 if (fix_northbridge(dev, pdev, cap_ptr) < 0) { 343 if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
333 dev_err(&dev->dev, "no usable aperture found\n"); 344 dev_err(&dev->dev, "no usable aperture found\n");
334#ifdef __x86_64__ 345#ifdef __x86_64__
@@ -405,7 +416,8 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
405 } 416 }
406 417
407 /* shadow x86-64 registers into ULi registers */ 418 /* shadow x86-64 registers into ULi registers */
408 pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea); 419 pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
420 &httfea);
409 421
410 /* if x86-64 aperture base is beyond 4G, exit here */ 422 /* if x86-64 aperture base is beyond 4G, exit here */
411 if ((httfea & 0x7fff) >> (32 - 25)) { 423 if ((httfea & 0x7fff) >> (32 - 25)) {
@@ -472,7 +484,8 @@ static int nforce3_agp_init(struct pci_dev *pdev)
472 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); 484 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
473 485
474 /* shadow x86-64 registers into NVIDIA registers */ 486 /* shadow x86-64 registers into NVIDIA registers */
475 pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase); 487 pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
488 &apbase);
476 489
477 /* if x86-64 aperture base is beyond 4G, exit here */ 490 /* if x86-64 aperture base is beyond 4G, exit here */
478 if ( (apbase & 0x7fff) >> (32 - 25) ) { 491 if ( (apbase & 0x7fff) >> (32 - 25) ) {
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index d2abf5143983..64255cef8a7d 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -984,7 +984,9 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
984 984
985 bridge->driver->cache_flush(); 985 bridge->driver->cache_flush();
986#ifdef CONFIG_X86 986#ifdef CONFIG_X86
987 set_memory_uc((unsigned long)table, 1 << page_order); 987 if (set_memory_uc((unsigned long)table, 1 << page_order))
988 printk(KERN_WARNING "Could not set GATT table memory to UC!");
989
988 bridge->gatt_table = (void *)table; 990 bridge->gatt_table = (void *)table;
989#else 991#else
990 bridge->gatt_table = ioremap_nocache(virt_to_phys(table), 992 bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 05ad4a17a28f..7c4133582dba 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -47,6 +47,16 @@ enum tpm_duration {
47#define TPM_MAX_PROTECTED_ORDINAL 12 47#define TPM_MAX_PROTECTED_ORDINAL 12
48#define TPM_PROTECTED_ORDINAL_MASK 0xFF 48#define TPM_PROTECTED_ORDINAL_MASK 0xFF
49 49
50/*
51 * Bug workaround - some TPM's don't flush the most
52 * recently changed pcr on suspend, so force the flush
53 * with an extend to the selected _unused_ non-volatile pcr.
54 */
55static int tpm_suspend_pcr;
56module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
57MODULE_PARM_DESC(suspend_pcr,
58 "PCR to use for dummy writes to faciltate flush on suspend.");
59
50static LIST_HEAD(tpm_chip_list); 60static LIST_HEAD(tpm_chip_list);
51static DEFINE_SPINLOCK(driver_lock); 61static DEFINE_SPINLOCK(driver_lock);
52static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); 62static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
@@ -1077,18 +1087,6 @@ static struct tpm_input_header savestate_header = {
1077 .ordinal = TPM_ORD_SAVESTATE 1087 .ordinal = TPM_ORD_SAVESTATE
1078}; 1088};
1079 1089
1080/* Bug workaround - some TPM's don't flush the most
1081 * recently changed pcr on suspend, so force the flush
1082 * with an extend to the selected _unused_ non-volatile pcr.
1083 */
1084static int tpm_suspend_pcr;
1085static int __init tpm_suspend_setup(char *str)
1086{
1087 get_option(&str, &tpm_suspend_pcr);
1088 return 1;
1089}
1090__setup("tpm_suspend_pcr=", tpm_suspend_setup);
1091
1092/* 1090/*
1093 * We are about to suspend. Save the TPM state 1091 * We are about to suspend. Save the TPM state
1094 * so that it can be restored. 1092 * so that it can be restored.
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 0f69c5ec0ecd..6c1b676643a9 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -48,6 +48,9 @@ struct ports_driver_data {
48 /* Used for exporting per-port information to debugfs */ 48 /* Used for exporting per-port information to debugfs */
49 struct dentry *debugfs_dir; 49 struct dentry *debugfs_dir;
50 50
51 /* List of all the devices we're handling */
52 struct list_head portdevs;
53
51 /* Number of devices this driver is handling */ 54 /* Number of devices this driver is handling */
52 unsigned int index; 55 unsigned int index;
53 56
@@ -108,6 +111,9 @@ struct port_buffer {
108 * ports for that device (vdev->priv). 111 * ports for that device (vdev->priv).
109 */ 112 */
110struct ports_device { 113struct ports_device {
114 /* Next portdev in the list, head is in the pdrvdata struct */
115 struct list_head list;
116
111 /* 117 /*
112 * Workqueue handlers where we process deferred work after 118 * Workqueue handlers where we process deferred work after
113 * notification 119 * notification
@@ -178,15 +184,21 @@ struct port {
178 struct console cons; 184 struct console cons;
179 185
180 /* Each port associates with a separate char device */ 186 /* Each port associates with a separate char device */
181 struct cdev cdev; 187 struct cdev *cdev;
182 struct device *dev; 188 struct device *dev;
183 189
190 /* Reference-counting to handle port hot-unplugs and file operations */
191 struct kref kref;
192
184 /* A waitqueue for poll() or blocking read operations */ 193 /* A waitqueue for poll() or blocking read operations */
185 wait_queue_head_t waitqueue; 194 wait_queue_head_t waitqueue;
186 195
187 /* The 'name' of the port that we expose via sysfs properties */ 196 /* The 'name' of the port that we expose via sysfs properties */
188 char *name; 197 char *name;
189 198
199 /* We can notify apps of host connect / disconnect events via SIGIO */
200 struct fasync_struct *async_queue;
201
190 /* The 'id' to identify the port with the Host */ 202 /* The 'id' to identify the port with the Host */
191 u32 id; 203 u32 id;
192 204
@@ -221,6 +233,41 @@ out:
221 return port; 233 return port;
222} 234}
223 235
236static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev,
237 dev_t dev)
238{
239 struct port *port;
240 unsigned long flags;
241
242 spin_lock_irqsave(&portdev->ports_lock, flags);
243 list_for_each_entry(port, &portdev->ports, list)
244 if (port->cdev->dev == dev)
245 goto out;
246 port = NULL;
247out:
248 spin_unlock_irqrestore(&portdev->ports_lock, flags);
249
250 return port;
251}
252
253static struct port *find_port_by_devt(dev_t dev)
254{
255 struct ports_device *portdev;
256 struct port *port;
257 unsigned long flags;
258
259 spin_lock_irqsave(&pdrvdata_lock, flags);
260 list_for_each_entry(portdev, &pdrvdata.portdevs, list) {
261 port = find_port_by_devt_in_portdev(portdev, dev);
262 if (port)
263 goto out;
264 }
265 port = NULL;
266out:
267 spin_unlock_irqrestore(&pdrvdata_lock, flags);
268 return port;
269}
270
224static struct port *find_port_by_id(struct ports_device *portdev, u32 id) 271static struct port *find_port_by_id(struct ports_device *portdev, u32 id)
225{ 272{
226 struct port *port; 273 struct port *port;
@@ -410,7 +457,10 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
410static ssize_t send_control_msg(struct port *port, unsigned int event, 457static ssize_t send_control_msg(struct port *port, unsigned int event,
411 unsigned int value) 458 unsigned int value)
412{ 459{
413 return __send_control_msg(port->portdev, port->id, event, value); 460 /* Did the port get unplugged before userspace closed it? */
461 if (port->portdev)
462 return __send_control_msg(port->portdev, port->id, event, value);
463 return 0;
414} 464}
415 465
416/* Callers must take the port->outvq_lock */ 466/* Callers must take the port->outvq_lock */
@@ -525,6 +575,10 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count,
525/* The condition that must be true for polling to end */ 575/* The condition that must be true for polling to end */
526static bool will_read_block(struct port *port) 576static bool will_read_block(struct port *port)
527{ 577{
578 if (!port->guest_connected) {
579 /* Port got hot-unplugged. Let's exit. */
580 return false;
581 }
528 return !port_has_data(port) && port->host_connected; 582 return !port_has_data(port) && port->host_connected;
529} 583}
530 584
@@ -575,6 +629,9 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
575 if (ret < 0) 629 if (ret < 0)
576 return ret; 630 return ret;
577 } 631 }
632 /* Port got hot-unplugged. */
633 if (!port->guest_connected)
634 return -ENODEV;
578 /* 635 /*
579 * We could've received a disconnection message while we were 636 * We could've received a disconnection message while we were
580 * waiting for more data. 637 * waiting for more data.
@@ -616,6 +673,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
616 if (ret < 0) 673 if (ret < 0)
617 return ret; 674 return ret;
618 } 675 }
676 /* Port got hot-unplugged. */
677 if (!port->guest_connected)
678 return -ENODEV;
619 679
620 count = min((size_t)(32 * 1024), count); 680 count = min((size_t)(32 * 1024), count);
621 681
@@ -656,6 +716,10 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
656 port = filp->private_data; 716 port = filp->private_data;
657 poll_wait(filp, &port->waitqueue, wait); 717 poll_wait(filp, &port->waitqueue, wait);
658 718
719 if (!port->guest_connected) {
720 /* Port got unplugged */
721 return POLLHUP;
722 }
659 ret = 0; 723 ret = 0;
660 if (!will_read_block(port)) 724 if (!will_read_block(port))
661 ret |= POLLIN | POLLRDNORM; 725 ret |= POLLIN | POLLRDNORM;
@@ -667,6 +731,8 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
667 return ret; 731 return ret;
668} 732}
669 733
734static void remove_port(struct kref *kref);
735
670static int port_fops_release(struct inode *inode, struct file *filp) 736static int port_fops_release(struct inode *inode, struct file *filp)
671{ 737{
672 struct port *port; 738 struct port *port;
@@ -687,6 +753,16 @@ static int port_fops_release(struct inode *inode, struct file *filp)
687 reclaim_consumed_buffers(port); 753 reclaim_consumed_buffers(port);
688 spin_unlock_irq(&port->outvq_lock); 754 spin_unlock_irq(&port->outvq_lock);
689 755
756 /*
757 * Locks aren't necessary here as a port can't be opened after
758 * unplug, and if a port isn't unplugged, a kref would already
759 * exist for the port. Plus, taking ports_lock here would
760 * create a dependency on other locks taken by functions
761 * inside remove_port if we're the last holder of the port,
762 * creating many problems.
763 */
764 kref_put(&port->kref, remove_port);
765
690 return 0; 766 return 0;
691} 767}
692 768
@@ -694,22 +770,31 @@ static int port_fops_open(struct inode *inode, struct file *filp)
694{ 770{
695 struct cdev *cdev = inode->i_cdev; 771 struct cdev *cdev = inode->i_cdev;
696 struct port *port; 772 struct port *port;
773 int ret;
697 774
698 port = container_of(cdev, struct port, cdev); 775 port = find_port_by_devt(cdev->dev);
699 filp->private_data = port; 776 filp->private_data = port;
700 777
778 /* Prevent against a port getting hot-unplugged at the same time */
779 spin_lock_irq(&port->portdev->ports_lock);
780 kref_get(&port->kref);
781 spin_unlock_irq(&port->portdev->ports_lock);
782
701 /* 783 /*
702 * Don't allow opening of console port devices -- that's done 784 * Don't allow opening of console port devices -- that's done
703 * via /dev/hvc 785 * via /dev/hvc
704 */ 786 */
705 if (is_console_port(port)) 787 if (is_console_port(port)) {
706 return -ENXIO; 788 ret = -ENXIO;
789 goto out;
790 }
707 791
708 /* Allow only one process to open a particular port at a time */ 792 /* Allow only one process to open a particular port at a time */
709 spin_lock_irq(&port->inbuf_lock); 793 spin_lock_irq(&port->inbuf_lock);
710 if (port->guest_connected) { 794 if (port->guest_connected) {
711 spin_unlock_irq(&port->inbuf_lock); 795 spin_unlock_irq(&port->inbuf_lock);
712 return -EMFILE; 796 ret = -EMFILE;
797 goto out;
713 } 798 }
714 799
715 port->guest_connected = true; 800 port->guest_connected = true;
@@ -724,10 +809,23 @@ static int port_fops_open(struct inode *inode, struct file *filp)
724 reclaim_consumed_buffers(port); 809 reclaim_consumed_buffers(port);
725 spin_unlock_irq(&port->outvq_lock); 810 spin_unlock_irq(&port->outvq_lock);
726 811
812 nonseekable_open(inode, filp);
813
727 /* Notify host of port being opened */ 814 /* Notify host of port being opened */
728 send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); 815 send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1);
729 816
730 return 0; 817 return 0;
818out:
819 kref_put(&port->kref, remove_port);
820 return ret;
821}
822
823static int port_fops_fasync(int fd, struct file *filp, int mode)
824{
825 struct port *port;
826
827 port = filp->private_data;
828 return fasync_helper(fd, filp, mode, &port->async_queue);
731} 829}
732 830
733/* 831/*
@@ -743,6 +841,8 @@ static const struct file_operations port_fops = {
743 .write = port_fops_write, 841 .write = port_fops_write,
744 .poll = port_fops_poll, 842 .poll = port_fops_poll,
745 .release = port_fops_release, 843 .release = port_fops_release,
844 .fasync = port_fops_fasync,
845 .llseek = no_llseek,
746}; 846};
747 847
748/* 848/*
@@ -1001,6 +1101,12 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock)
1001 return nr_added_bufs; 1101 return nr_added_bufs;
1002} 1102}
1003 1103
1104static void send_sigio_to_port(struct port *port)
1105{
1106 if (port->async_queue && port->guest_connected)
1107 kill_fasync(&port->async_queue, SIGIO, POLL_OUT);
1108}
1109
1004static int add_port(struct ports_device *portdev, u32 id) 1110static int add_port(struct ports_device *portdev, u32 id)
1005{ 1111{
1006 char debugfs_name[16]; 1112 char debugfs_name[16];
@@ -1015,6 +1121,7 @@ static int add_port(struct ports_device *portdev, u32 id)
1015 err = -ENOMEM; 1121 err = -ENOMEM;
1016 goto fail; 1122 goto fail;
1017 } 1123 }
1124 kref_init(&port->kref);
1018 1125
1019 port->portdev = portdev; 1126 port->portdev = portdev;
1020 port->id = id; 1127 port->id = id;
@@ -1022,6 +1129,7 @@ static int add_port(struct ports_device *portdev, u32 id)
1022 port->name = NULL; 1129 port->name = NULL;
1023 port->inbuf = NULL; 1130 port->inbuf = NULL;
1024 port->cons.hvc = NULL; 1131 port->cons.hvc = NULL;
1132 port->async_queue = NULL;
1025 1133
1026 port->cons.ws.ws_row = port->cons.ws.ws_col = 0; 1134 port->cons.ws.ws_row = port->cons.ws.ws_col = 0;
1027 1135
@@ -1032,14 +1140,20 @@ static int add_port(struct ports_device *portdev, u32 id)
1032 port->in_vq = portdev->in_vqs[port->id]; 1140 port->in_vq = portdev->in_vqs[port->id];
1033 port->out_vq = portdev->out_vqs[port->id]; 1141 port->out_vq = portdev->out_vqs[port->id];
1034 1142
1035 cdev_init(&port->cdev, &port_fops); 1143 port->cdev = cdev_alloc();
1144 if (!port->cdev) {
1145 dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n");
1146 err = -ENOMEM;
1147 goto free_port;
1148 }
1149 port->cdev->ops = &port_fops;
1036 1150
1037 devt = MKDEV(portdev->chr_major, id); 1151 devt = MKDEV(portdev->chr_major, id);
1038 err = cdev_add(&port->cdev, devt, 1); 1152 err = cdev_add(port->cdev, devt, 1);
1039 if (err < 0) { 1153 if (err < 0) {
1040 dev_err(&port->portdev->vdev->dev, 1154 dev_err(&port->portdev->vdev->dev,
1041 "Error %d adding cdev for port %u\n", err, id); 1155 "Error %d adding cdev for port %u\n", err, id);
1042 goto free_port; 1156 goto free_cdev;
1043 } 1157 }
1044 port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, 1158 port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev,
1045 devt, port, "vport%up%u", 1159 devt, port, "vport%up%u",
@@ -1104,7 +1218,7 @@ free_inbufs:
1104free_device: 1218free_device:
1105 device_destroy(pdrvdata.class, port->dev->devt); 1219 device_destroy(pdrvdata.class, port->dev->devt);
1106free_cdev: 1220free_cdev:
1107 cdev_del(&port->cdev); 1221 cdev_del(port->cdev);
1108free_port: 1222free_port:
1109 kfree(port); 1223 kfree(port);
1110fail: 1224fail:
@@ -1113,21 +1227,45 @@ fail:
1113 return err; 1227 return err;
1114} 1228}
1115 1229
1116/* Remove all port-specific data. */ 1230/* No users remain, remove all port-specific data. */
1117static int remove_port(struct port *port) 1231static void remove_port(struct kref *kref)
1232{
1233 struct port *port;
1234
1235 port = container_of(kref, struct port, kref);
1236
1237 sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
1238 device_destroy(pdrvdata.class, port->dev->devt);
1239 cdev_del(port->cdev);
1240
1241 kfree(port->name);
1242
1243 debugfs_remove(port->debugfs_file);
1244
1245 kfree(port);
1246}
1247
1248/*
1249 * Port got unplugged. Remove port from portdev's list and drop the
1250 * kref reference. If no userspace has this port opened, it will
1251 * result in immediate removal the port.
1252 */
1253static void unplug_port(struct port *port)
1118{ 1254{
1119 struct port_buffer *buf; 1255 struct port_buffer *buf;
1120 1256
1257 spin_lock_irq(&port->portdev->ports_lock);
1258 list_del(&port->list);
1259 spin_unlock_irq(&port->portdev->ports_lock);
1260
1121 if (port->guest_connected) { 1261 if (port->guest_connected) {
1122 port->guest_connected = false; 1262 port->guest_connected = false;
1123 port->host_connected = false; 1263 port->host_connected = false;
1124 wake_up_interruptible(&port->waitqueue); 1264 wake_up_interruptible(&port->waitqueue);
1125 send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
1126 }
1127 1265
1128 spin_lock_irq(&port->portdev->ports_lock); 1266 /* Let the app know the port is going down. */
1129 list_del(&port->list); 1267 send_sigio_to_port(port);
1130 spin_unlock_irq(&port->portdev->ports_lock); 1268 }
1131 1269
1132 if (is_console_port(port)) { 1270 if (is_console_port(port)) {
1133 spin_lock_irq(&pdrvdata_lock); 1271 spin_lock_irq(&pdrvdata_lock);
@@ -1146,9 +1284,6 @@ static int remove_port(struct port *port)
1146 hvc_remove(port->cons.hvc); 1284 hvc_remove(port->cons.hvc);
1147#endif 1285#endif
1148 } 1286 }
1149 sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
1150 device_destroy(pdrvdata.class, port->dev->devt);
1151 cdev_del(&port->cdev);
1152 1287
1153 /* Remove unused data this port might have received. */ 1288 /* Remove unused data this port might have received. */
1154 discard_port_data(port); 1289 discard_port_data(port);
@@ -1159,12 +1294,19 @@ static int remove_port(struct port *port)
1159 while ((buf = virtqueue_detach_unused_buf(port->in_vq))) 1294 while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
1160 free_buf(buf); 1295 free_buf(buf);
1161 1296
1162 kfree(port->name); 1297 /*
1163 1298 * We should just assume the device itself has gone off --
1164 debugfs_remove(port->debugfs_file); 1299 * else a close on an open port later will try to send out a
1300 * control message.
1301 */
1302 port->portdev = NULL;
1165 1303
1166 kfree(port); 1304 /*
1167 return 0; 1305 * Locks around here are not necessary - a port can't be
1306 * opened after we removed the port struct from ports_list
1307 * above.
1308 */
1309 kref_put(&port->kref, remove_port);
1168} 1310}
1169 1311
1170/* Any private messages that the Host and Guest want to share */ 1312/* Any private messages that the Host and Guest want to share */
@@ -1203,7 +1345,7 @@ static void handle_control_message(struct ports_device *portdev,
1203 add_port(portdev, cpkt->id); 1345 add_port(portdev, cpkt->id);
1204 break; 1346 break;
1205 case VIRTIO_CONSOLE_PORT_REMOVE: 1347 case VIRTIO_CONSOLE_PORT_REMOVE:
1206 remove_port(port); 1348 unplug_port(port);
1207 break; 1349 break;
1208 case VIRTIO_CONSOLE_CONSOLE_PORT: 1350 case VIRTIO_CONSOLE_CONSOLE_PORT:
1209 if (!cpkt->value) 1351 if (!cpkt->value)
@@ -1245,6 +1387,12 @@ static void handle_control_message(struct ports_device *portdev,
1245 spin_lock_irq(&port->outvq_lock); 1387 spin_lock_irq(&port->outvq_lock);
1246 reclaim_consumed_buffers(port); 1388 reclaim_consumed_buffers(port);
1247 spin_unlock_irq(&port->outvq_lock); 1389 spin_unlock_irq(&port->outvq_lock);
1390
1391 /*
1392 * If the guest is connected, it'll be interested in
1393 * knowing the host connection state changed.
1394 */
1395 send_sigio_to_port(port);
1248 break; 1396 break;
1249 case VIRTIO_CONSOLE_PORT_NAME: 1397 case VIRTIO_CONSOLE_PORT_NAME:
1250 /* 1398 /*
@@ -1341,6 +1489,9 @@ static void in_intr(struct virtqueue *vq)
1341 1489
1342 wake_up_interruptible(&port->waitqueue); 1490 wake_up_interruptible(&port->waitqueue);
1343 1491
1492 /* Send a SIGIO indicating new data in case the process asked for it */
1493 send_sigio_to_port(port);
1494
1344 if (is_console_port(port) && hvc_poll(port->cons.hvc)) 1495 if (is_console_port(port) && hvc_poll(port->cons.hvc))
1345 hvc_kick(); 1496 hvc_kick();
1346} 1497}
@@ -1577,6 +1728,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
1577 add_port(portdev, 0); 1728 add_port(portdev, 0);
1578 } 1729 }
1579 1730
1731 spin_lock_irq(&pdrvdata_lock);
1732 list_add_tail(&portdev->list, &pdrvdata.portdevs);
1733 spin_unlock_irq(&pdrvdata_lock);
1734
1580 __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, 1735 __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
1581 VIRTIO_CONSOLE_DEVICE_READY, 1); 1736 VIRTIO_CONSOLE_DEVICE_READY, 1);
1582 return 0; 1737 return 0;
@@ -1600,23 +1755,41 @@ static void virtcons_remove(struct virtio_device *vdev)
1600{ 1755{
1601 struct ports_device *portdev; 1756 struct ports_device *portdev;
1602 struct port *port, *port2; 1757 struct port *port, *port2;
1603 struct port_buffer *buf;
1604 unsigned int len;
1605 1758
1606 portdev = vdev->priv; 1759 portdev = vdev->priv;
1607 1760
1761 spin_lock_irq(&pdrvdata_lock);
1762 list_del(&portdev->list);
1763 spin_unlock_irq(&pdrvdata_lock);
1764
1765 /* Disable interrupts for vqs */
1766 vdev->config->reset(vdev);
1767 /* Finish up work that's lined up */
1608 cancel_work_sync(&portdev->control_work); 1768 cancel_work_sync(&portdev->control_work);
1609 1769
1610 list_for_each_entry_safe(port, port2, &portdev->ports, list) 1770 list_for_each_entry_safe(port, port2, &portdev->ports, list)
1611 remove_port(port); 1771 unplug_port(port);
1612 1772
1613 unregister_chrdev(portdev->chr_major, "virtio-portsdev"); 1773 unregister_chrdev(portdev->chr_major, "virtio-portsdev");
1614 1774
1615 while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) 1775 /*
1616 free_buf(buf); 1776 * When yanking out a device, we immediately lose the
1777 * (device-side) queues. So there's no point in keeping the
1778 * guest side around till we drop our final reference. This
1779 * also means that any ports which are in an open state will
1780 * have to just stop using the port, as the vqs are going
1781 * away.
1782 */
1783 if (use_multiport(portdev)) {
1784 struct port_buffer *buf;
1785 unsigned int len;
1617 1786
1618 while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) 1787 while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
1619 free_buf(buf); 1788 free_buf(buf);
1789
1790 while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
1791 free_buf(buf);
1792 }
1620 1793
1621 vdev->config->del_vqs(vdev); 1794 vdev->config->del_vqs(vdev);
1622 kfree(portdev->in_vqs); 1795 kfree(portdev->in_vqs);
@@ -1663,6 +1836,7 @@ static int __init init(void)
1663 PTR_ERR(pdrvdata.debugfs_dir)); 1836 PTR_ERR(pdrvdata.debugfs_dir));
1664 } 1837 }
1665 INIT_LIST_HEAD(&pdrvdata.consoles); 1838 INIT_LIST_HEAD(&pdrvdata.consoles);
1839 INIT_LIST_HEAD(&pdrvdata.portdevs);
1666 1840
1667 return register_virtio_driver(&virtio_console); 1841 return register_virtio_driver(&virtio_console);
1668} 1842}
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 70bb350de996..9dbb28b9559f 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -39,7 +39,7 @@ config EDAC_DEBUG
39 there're four debug levels (x=0,1,2,3 from low to high). 39 there're four debug levels (x=0,1,2,3 from low to high).
40 Usually you should select 'N'. 40 Usually you should select 'N'.
41 41
42 config EDAC_DECODE_MCE 42config EDAC_DECODE_MCE
43 tristate "Decode MCEs in human-readable form (only on AMD for now)" 43 tristate "Decode MCEs in human-readable form (only on AMD for now)"
44 depends on CPU_SUP_AMD && X86_MCE 44 depends on CPU_SUP_AMD && X86_MCE
45 default y 45 default y
@@ -51,6 +51,16 @@ config EDAC_DEBUG
51 which occur really early upon boot, before the module infrastructure 51 which occur really early upon boot, before the module infrastructure
52 has been initialized. 52 has been initialized.
53 53
54config EDAC_MCE_INJ
55 tristate "Simple MCE injection interface over /sysfs"
56 depends on EDAC_DECODE_MCE
57 default n
58 help
59 This is a simple interface to inject MCEs over /sysfs and test
60 the MCE decoding code in EDAC.
61
62 This is currently AMD-only.
63
54config EDAC_MM_EDAC 64config EDAC_MM_EDAC
55 tristate "Main Memory EDAC (Error Detection And Correction) reporting" 65 tristate "Main Memory EDAC (Error Detection And Correction) reporting"
56 help 66 help
@@ -66,13 +76,13 @@ config EDAC_MCE
66 76
67config EDAC_AMD64 77config EDAC_AMD64
68 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" 78 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
69 depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE 79 depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE
70 help 80 help
71 Support for error detection and correction on the AMD 64 81 Support for error detection and correction on the AMD 64
72 Families of Memory Controllers (K8, F10h and F11h) 82 Families of Memory Controllers (K8, F10h and F11h)
73 83
74config EDAC_AMD64_ERROR_INJECTION 84config EDAC_AMD64_ERROR_INJECTION
75 bool "Sysfs Error Injection facilities" 85 bool "Sysfs HW Error injection facilities"
76 depends on EDAC_AMD64 86 depends on EDAC_AMD64
77 help 87 help
78 Recent Opterons (Family 10h and later) provide for Memory Error 88 Recent Opterons (Family 10h and later) provide for Memory Error
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ca6b1bb24ccc..32c7bc93c525 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,6 +17,9 @@ ifdef CONFIG_PCI
17edac_core-objs += edac_pci.o edac_pci_sysfs.o 17edac_core-objs += edac_pci.o edac_pci_sysfs.o
18endif 18endif
19 19
20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
21
22edac_mce_amd-objs := mce_amd.o
20obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 23obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
21 24
22obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 25obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e7d5d6b5dcf6..8521401bbd75 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,5 +1,5 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2#include <asm/k8.h> 2#include <asm/amd_nb.h>
3 3
4static struct edac_pci_ctl_info *amd64_ctl_pci; 4static struct edac_pci_ctl_info *amd64_ctl_pci;
5 5
@@ -2073,11 +2073,18 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
2073 amd64_handle_ue(mci, info); 2073 amd64_handle_ue(mci, info);
2074} 2074}
2075 2075
2076void amd64_decode_bus_error(int node_id, struct err_regs *regs) 2076void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
2077{ 2077{
2078 struct mem_ctl_info *mci = mci_lookup[node_id]; 2078 struct mem_ctl_info *mci = mci_lookup[node_id];
2079 struct err_regs regs;
2079 2080
2080 __amd64_decode_bus_error(mci, regs); 2081 regs.nbsl = (u32) m->status;
2082 regs.nbsh = (u32)(m->status >> 32);
2083 regs.nbeal = (u32) m->addr;
2084 regs.nbeah = (u32)(m->addr >> 32);
2085 regs.nbcfg = nbcfg;
2086
2087 __amd64_decode_bus_error(mci, &regs);
2081 2088
2082 /* 2089 /*
2083 * Check the UE bit of the NB status high register, if set generate some 2090 * Check the UE bit of the NB status high register, if set generate some
@@ -2086,7 +2093,7 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs)
2086 * 2093 *
2087 * FIXME: this should go somewhere else, if at all. 2094 * FIXME: this should go somewhere else, if at all.
2088 */ 2095 */
2089 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2096 if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
2090 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2097 edac_mc_handle_ue_no_info(mci, "UE bit is set");
2091 2098
2092} 2099}
@@ -2927,7 +2934,7 @@ static int __init amd64_edac_init(void)
2927 * to finish initialization of the MC instances. 2934 * to finish initialization of the MC instances.
2928 */ 2935 */
2929 err = -ENODEV; 2936 err = -ENODEV;
2930 for (nb = 0; nb < num_k8_northbridges; nb++) { 2937 for (nb = 0; nb < k8_northbridges.num; nb++) {
2931 if (!pvt_lookup[nb]) 2938 if (!pvt_lookup[nb])
2932 continue; 2939 continue;
2933 2940
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 613b9381e71a..044aee4f944d 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -72,7 +72,7 @@
72#include <linux/edac.h> 72#include <linux/edac.h>
73#include <asm/msr.h> 73#include <asm/msr.h>
74#include "edac_core.h" 74#include "edac_core.h"
75#include "edac_mce_amd.h" 75#include "mce_amd.h"
76 76
77#define amd64_printk(level, fmt, arg...) \ 77#define amd64_printk(level, fmt, arg...) \
78 edac_printk(level, "amd64", fmt, ##arg) 78 edac_printk(level, "amd64", fmt, ##arg)
@@ -482,11 +482,10 @@ extern const char *rrrr_msgs[16];
482extern const char *to_msgs[2]; 482extern const char *to_msgs[2];
483extern const char *pp_msgs[4]; 483extern const char *pp_msgs[4];
484extern const char *ii_msgs[4]; 484extern const char *ii_msgs[4];
485extern const char *ext_msgs[32];
486extern const char *htlink_msgs[8]; 485extern const char *htlink_msgs[8];
487 486
488#ifdef CONFIG_EDAC_DEBUG 487#ifdef CONFIG_EDAC_DEBUG
489#define NUM_DBG_ATTRS 9 488#define NUM_DBG_ATTRS 5
490#else 489#else
491#define NUM_DBG_ATTRS 0 490#define NUM_DBG_ATTRS 0
492#endif 491#endif
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index 59cf2cf6e11e..e3562288f4ce 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -1,167 +1,16 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2 2
3/* 3#define EDAC_DCT_ATTR_SHOW(reg) \
4 * accept a hex value and store it into the virtual error register file, field: 4static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
5 * nbeal and nbeah. Assume virtual error values have already been set for: NBSL, 5{ \
6 * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and 6 struct amd64_pvt *pvt = mci->pvt_info; \
7 * CHANNEL 7 return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
8 */
9static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
10 size_t count)
11{
12 struct amd64_pvt *pvt = mci->pvt_info;
13 unsigned long long value;
14 int ret = 0;
15
16 ret = strict_strtoull(data, 16, &value);
17 if (ret != -EINVAL) {
18 debugf0("received NBEA= 0x%llx\n", value);
19
20 /* place the value into the virtual error packet */
21 pvt->ctl_error_info.nbeal = (u32) value;
22 value >>= 32;
23 pvt->ctl_error_info.nbeah = (u32) value;
24
25 /* Process the Mapping request */
26 /* TODO: Add race prevention */
27 amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1);
28
29 return count;
30 }
31 return ret;
32}
33
34/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
35static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
36{
37 struct amd64_pvt *pvt = mci->pvt_info;
38 u64 value;
39
40 value = pvt->ctl_error_info.nbeah;
41 value <<= 32;
42 value |= pvt->ctl_error_info.nbeal;
43
44 return sprintf(data, "%llx\n", value);
45}
46
47/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
48static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
49 size_t count)
50{
51 struct amd64_pvt *pvt = mci->pvt_info;
52 unsigned long value;
53 int ret = 0;
54
55 ret = strict_strtoul(data, 16, &value);
56 if (ret != -EINVAL) {
57 debugf0("received NBSL= 0x%lx\n", value);
58
59 pvt->ctl_error_info.nbsl = (u32) value;
60
61 return count;
62 }
63 return ret;
64}
65
66/* display back what the last NBSL value written */
67static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
68{
69 struct amd64_pvt *pvt = mci->pvt_info;
70 u32 value;
71
72 value = pvt->ctl_error_info.nbsl;
73
74 return sprintf(data, "%x\n", value);
75}
76
77/* store the NBSH (MCA NB Status High) value user desires */
78static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
79 size_t count)
80{
81 struct amd64_pvt *pvt = mci->pvt_info;
82 unsigned long value;
83 int ret = 0;
84
85 ret = strict_strtoul(data, 16, &value);
86 if (ret != -EINVAL) {
87 debugf0("received NBSH= 0x%lx\n", value);
88
89 pvt->ctl_error_info.nbsh = (u32) value;
90
91 return count;
92 }
93 return ret;
94}
95
96/* display back what the last NBSH value written */
97static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
98{
99 struct amd64_pvt *pvt = mci->pvt_info;
100 u32 value;
101
102 value = pvt->ctl_error_info.nbsh;
103
104 return sprintf(data, "%x\n", value);
105} 8}
106 9
107/* accept and store the NBCFG (MCA NB Configuration) value user desires */ 10EDAC_DCT_ATTR_SHOW(dhar);
108static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci, 11EDAC_DCT_ATTR_SHOW(dbam0);
109 const char *data, size_t count) 12EDAC_DCT_ATTR_SHOW(top_mem);
110{ 13EDAC_DCT_ATTR_SHOW(top_mem2);
111 struct amd64_pvt *pvt = mci->pvt_info;
112 unsigned long value;
113 int ret = 0;
114
115 ret = strict_strtoul(data, 16, &value);
116 if (ret != -EINVAL) {
117 debugf0("received NBCFG= 0x%lx\n", value);
118
119 pvt->ctl_error_info.nbcfg = (u32) value;
120
121 return count;
122 }
123 return ret;
124}
125
126/* various show routines for the controls of a MCI */
127static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
128{
129 struct amd64_pvt *pvt = mci->pvt_info;
130
131 return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
132}
133
134
135static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
136{
137 struct amd64_pvt *pvt = mci->pvt_info;
138
139 return sprintf(data, "%x\n", pvt->dhar);
140}
141
142
143static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
144{
145 struct amd64_pvt *pvt = mci->pvt_info;
146
147 return sprintf(data, "%x\n", pvt->dbam0);
148}
149
150
151static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
152{
153 struct amd64_pvt *pvt = mci->pvt_info;
154
155 return sprintf(data, "%llx\n", pvt->top_mem);
156}
157
158
159static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
160{
161 struct amd64_pvt *pvt = mci->pvt_info;
162
163 return sprintf(data, "%llx\n", pvt->top_mem2);
164}
165 14
166static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) 15static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
167{ 16{
@@ -182,38 +31,6 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
182 31
183 { 32 {
184 .attr = { 33 .attr = {
185 .name = "nbea_ctl",
186 .mode = (S_IRUGO | S_IWUSR)
187 },
188 .show = amd64_nbea_show,
189 .store = amd64_nbea_store,
190 },
191 {
192 .attr = {
193 .name = "nbsl_ctl",
194 .mode = (S_IRUGO | S_IWUSR)
195 },
196 .show = amd64_nbsl_show,
197 .store = amd64_nbsl_store,
198 },
199 {
200 .attr = {
201 .name = "nbsh_ctl",
202 .mode = (S_IRUGO | S_IWUSR)
203 },
204 .show = amd64_nbsh_show,
205 .store = amd64_nbsh_store,
206 },
207 {
208 .attr = {
209 .name = "nbcfg_ctl",
210 .mode = (S_IRUGO | S_IWUSR)
211 },
212 .show = amd64_nbcfg_show,
213 .store = amd64_nbcfg_store,
214 },
215 {
216 .attr = {
217 .name = "dhar", 34 .name = "dhar",
218 .mode = (S_IRUGO) 35 .mode = (S_IRUGO)
219 }, 36 },
@@ -225,7 +42,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
225 .name = "dbam", 42 .name = "dbam",
226 .mode = (S_IRUGO) 43 .mode = (S_IRUGO)
227 }, 44 },
228 .show = amd64_dbam_show, 45 .show = amd64_dbam0_show,
229 .store = NULL, 46 .store = NULL,
230 }, 47 },
231 { 48 {
@@ -233,7 +50,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
233 .name = "topmem", 50 .name = "topmem",
234 .mode = (S_IRUGO) 51 .mode = (S_IRUGO)
235 }, 52 },
236 .show = amd64_topmem_show, 53 .show = amd64_top_mem_show,
237 .store = NULL, 54 .store = NULL,
238 }, 55 },
239 { 56 {
@@ -241,7 +58,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
241 .name = "topmem2", 58 .name = "topmem2",
242 .mode = (S_IRUGO) 59 .mode = (S_IRUGO)
243 }, 60 },
244 .show = amd64_topmem2_show, 61 .show = amd64_top_mem2_show,
245 .store = NULL, 62 .store = NULL,
246 }, 63 },
247 { 64 {
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 070968178a24..2941dca91aae 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -13,6 +13,7 @@
13#include <linux/ctype.h> 13#include <linux/ctype.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/edac.h>
16 17
17#include "edac_core.h" 18#include "edac_core.h"
18#include "edac_module.h" 19#include "edac_module.h"
@@ -235,7 +236,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
235 debugf1("%s()\n", __func__); 236 debugf1("%s()\n", __func__);
236 237
237 /* get the /sys/devices/system/edac reference */ 238 /* get the /sys/devices/system/edac reference */
238 edac_class = edac_get_edac_class(); 239 edac_class = edac_get_sysfs_class();
239 if (edac_class == NULL) { 240 if (edac_class == NULL) {
240 debugf1("%s() no edac_class error\n", __func__); 241 debugf1("%s() no edac_class error\n", __func__);
241 err = -ENODEV; 242 err = -ENODEV;
@@ -255,7 +256,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
255 256
256 if (!try_module_get(edac_dev->owner)) { 257 if (!try_module_get(edac_dev->owner)) {
257 err = -ENODEV; 258 err = -ENODEV;
258 goto err_out; 259 goto err_mod_get;
259 } 260 }
260 261
261 /* register */ 262 /* register */
@@ -282,6 +283,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
282err_kobj_reg: 283err_kobj_reg:
283 module_put(edac_dev->owner); 284 module_put(edac_dev->owner);
284 285
286err_mod_get:
287 edac_put_sysfs_class();
288
285err_out: 289err_out:
286 return err; 290 return err;
287} 291}
@@ -290,12 +294,11 @@ err_out:
290 * edac_device_unregister_sysfs_main_kobj: 294 * edac_device_unregister_sysfs_main_kobj:
291 * the '..../edac/<name>' kobject 295 * the '..../edac/<name>' kobject
292 */ 296 */
293void edac_device_unregister_sysfs_main_kobj( 297void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
294 struct edac_device_ctl_info *edac_dev)
295{ 298{
296 debugf0("%s()\n", __func__); 299 debugf0("%s()\n", __func__);
297 debugf4("%s() name of kobject is: %s\n", 300 debugf4("%s() name of kobject is: %s\n",
298 __func__, kobject_name(&edac_dev->kobj)); 301 __func__, kobject_name(&dev->kobj));
299 302
300 /* 303 /*
301 * Unregister the edac device's kobject and 304 * Unregister the edac device's kobject and
@@ -304,7 +307,8 @@ void edac_device_unregister_sysfs_main_kobj(
304 * a) module_put() this module 307 * a) module_put() this module
305 * b) 'kfree' the memory 308 * b) 'kfree' the memory
306 */ 309 */
307 kobject_put(&edac_dev->kobj); 310 kobject_put(&dev->kobj);
311 edac_put_sysfs_class();
308} 312}
309 313
310/* edac_dev -> instance information */ 314/* edac_dev -> instance information */
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 8aad94d10c0c..a4135860149b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/edac.h>
14#include <linux/bug.h> 15#include <linux/bug.h>
15 16
16#include "edac_core.h" 17#include "edac_core.h"
@@ -1011,13 +1012,13 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1011 */ 1012 */
1012int edac_sysfs_setup_mc_kset(void) 1013int edac_sysfs_setup_mc_kset(void)
1013{ 1014{
1014 int err = 0; 1015 int err = -EINVAL;
1015 struct sysdev_class *edac_class; 1016 struct sysdev_class *edac_class;
1016 1017
1017 debugf1("%s()\n", __func__); 1018 debugf1("%s()\n", __func__);
1018 1019
1019 /* get the /sys/devices/system/edac class reference */ 1020 /* get the /sys/devices/system/edac class reference */
1020 edac_class = edac_get_edac_class(); 1021 edac_class = edac_get_sysfs_class();
1021 if (edac_class == NULL) { 1022 if (edac_class == NULL) {
1022 debugf1("%s() no edac_class error=%d\n", __func__, err); 1023 debugf1("%s() no edac_class error=%d\n", __func__, err);
1023 goto fail_out; 1024 goto fail_out;
@@ -1028,15 +1029,16 @@ int edac_sysfs_setup_mc_kset(void)
1028 if (!mc_kset) { 1029 if (!mc_kset) {
1029 err = -ENOMEM; 1030 err = -ENOMEM;
1030 debugf1("%s() Failed to register '.../edac/mc'\n", __func__); 1031 debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
1031 goto fail_out; 1032 goto fail_kset;
1032 } 1033 }
1033 1034
1034 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); 1035 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__);
1035 1036
1036 return 0; 1037 return 0;
1037 1038
1039fail_kset:
1040 edac_put_sysfs_class();
1038 1041
1039 /* error unwind stack */
1040fail_out: 1042fail_out:
1041 return err; 1043 return err;
1042} 1044}
@@ -1049,5 +1051,6 @@ fail_out:
1049void edac_sysfs_teardown_mc_kset(void) 1051void edac_sysfs_teardown_mc_kset(void)
1050{ 1052{
1051 kset_unregister(mc_kset); 1053 kset_unregister(mc_kset);
1054 edac_put_sysfs_class();
1052} 1055}
1053 1056
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
deleted file mode 100644
index 9014df6f605d..000000000000
--- a/drivers/edac/edac_mce_amd.c
+++ /dev/null
@@ -1,452 +0,0 @@
1#include <linux/module.h>
2#include "edac_mce_amd.h"
3
4static bool report_gart_errors;
5static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
6
7void amd_report_gart_errors(bool v)
8{
9 report_gart_errors = v;
10}
11EXPORT_SYMBOL_GPL(amd_report_gart_errors);
12
13void amd_register_ecc_decoder(void (*f)(int, struct err_regs *))
14{
15 nb_bus_decoder = f;
16}
17EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
18
19void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *))
20{
21 if (nb_bus_decoder) {
22 WARN_ON(nb_bus_decoder != f);
23
24 nb_bus_decoder = NULL;
25 }
26}
27EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
28
29/*
30 * string representation for the different MCA reported error types, see F3x48
31 * or MSR0000_0411.
32 */
33const char *tt_msgs[] = { /* transaction type */
34 "instruction",
35 "data",
36 "generic",
37 "reserved"
38};
39EXPORT_SYMBOL_GPL(tt_msgs);
40
41const char *ll_msgs[] = { /* cache level */
42 "L0",
43 "L1",
44 "L2",
45 "L3/generic"
46};
47EXPORT_SYMBOL_GPL(ll_msgs);
48
49const char *rrrr_msgs[] = {
50 "generic",
51 "generic read",
52 "generic write",
53 "data read",
54 "data write",
55 "inst fetch",
56 "prefetch",
57 "evict",
58 "snoop",
59 "reserved RRRR= 9",
60 "reserved RRRR= 10",
61 "reserved RRRR= 11",
62 "reserved RRRR= 12",
63 "reserved RRRR= 13",
64 "reserved RRRR= 14",
65 "reserved RRRR= 15"
66};
67EXPORT_SYMBOL_GPL(rrrr_msgs);
68
69const char *pp_msgs[] = { /* participating processor */
70 "local node originated (SRC)",
71 "local node responded to request (RES)",
72 "local node observed as 3rd party (OBS)",
73 "generic"
74};
75EXPORT_SYMBOL_GPL(pp_msgs);
76
77const char *to_msgs[] = {
78 "no timeout",
79 "timed out"
80};
81EXPORT_SYMBOL_GPL(to_msgs);
82
83const char *ii_msgs[] = { /* memory or i/o */
84 "mem access",
85 "reserved",
86 "i/o access",
87 "generic"
88};
89EXPORT_SYMBOL_GPL(ii_msgs);
90
91/*
92 * Map the 4 or 5 (family-specific) bits of Extended Error code to the
93 * string table.
94 */
95const char *ext_msgs[] = {
96 "K8 ECC error", /* 0_0000b */
97 "CRC error on link", /* 0_0001b */
98 "Sync error packets on link", /* 0_0010b */
99 "Master Abort during link operation", /* 0_0011b */
100 "Target Abort during link operation", /* 0_0100b */
101 "Invalid GART PTE entry during table walk", /* 0_0101b */
102 "Unsupported atomic RMW command received", /* 0_0110b */
103 "WDT error: NB transaction timeout", /* 0_0111b */
104 "ECC/ChipKill ECC error", /* 0_1000b */
105 "SVM DEV Error", /* 0_1001b */
106 "Link Data error", /* 0_1010b */
107 "Link/L3/Probe Filter Protocol error", /* 0_1011b */
108 "NB Internal Arrays Parity error", /* 0_1100b */
109 "DRAM Address/Control Parity error", /* 0_1101b */
110 "Link Transmission error", /* 0_1110b */
111 "GART/DEV Table Walk Data error" /* 0_1111b */
112 "Res 0x100 error", /* 1_0000b */
113 "Res 0x101 error", /* 1_0001b */
114 "Res 0x102 error", /* 1_0010b */
115 "Res 0x103 error", /* 1_0011b */
116 "Res 0x104 error", /* 1_0100b */
117 "Res 0x105 error", /* 1_0101b */
118 "Res 0x106 error", /* 1_0110b */
119 "Res 0x107 error", /* 1_0111b */
120 "Res 0x108 error", /* 1_1000b */
121 "Res 0x109 error", /* 1_1001b */
122 "Res 0x10A error", /* 1_1010b */
123 "Res 0x10B error", /* 1_1011b */
124 "ECC error in L3 Cache Data", /* 1_1100b */
125 "L3 Cache Tag error", /* 1_1101b */
126 "L3 Cache LRU Parity error", /* 1_1110b */
127 "Probe Filter error" /* 1_1111b */
128};
129EXPORT_SYMBOL_GPL(ext_msgs);
130
131static void amd_decode_dc_mce(u64 mc0_status)
132{
133 u32 ec = mc0_status & 0xffff;
134 u32 xec = (mc0_status >> 16) & 0xf;
135
136 pr_emerg("Data Cache Error");
137
138 if (xec == 1 && TLB_ERROR(ec))
139 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
140 else if (xec == 0) {
141 if (mc0_status & (1ULL << 40))
142 pr_cont(" during Data Scrub.\n");
143 else if (TLB_ERROR(ec))
144 pr_cont(": %s TLB parity error.\n", LL_MSG(ec));
145 else if (MEM_ERROR(ec)) {
146 u8 ll = ec & 0x3;
147 u8 tt = (ec >> 2) & 0x3;
148 u8 rrrr = (ec >> 4) & 0xf;
149
150 /* see F10h BKDG (31116), Table 92. */
151 if (ll == 0x1) {
152 if (tt != 0x1)
153 goto wrong_dc_mce;
154
155 pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec));
156
157 } else if (ll == 0x2 && rrrr == 0x3)
158 pr_cont(" during L1 linefill from L2.\n");
159 else
160 goto wrong_dc_mce;
161 } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf)
162 pr_cont(" during system linefill.\n");
163 else
164 goto wrong_dc_mce;
165 } else
166 goto wrong_dc_mce;
167
168 return;
169
170wrong_dc_mce:
171 pr_warning("Corrupted DC MCE info?\n");
172}
173
174static void amd_decode_ic_mce(u64 mc1_status)
175{
176 u32 ec = mc1_status & 0xffff;
177 u32 xec = (mc1_status >> 16) & 0xf;
178
179 pr_emerg("Instruction Cache Error");
180
181 if (xec == 1 && TLB_ERROR(ec))
182 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
183 else if (xec == 0) {
184 if (TLB_ERROR(ec))
185 pr_cont(": %s TLB Parity error.\n", LL_MSG(ec));
186 else if (BUS_ERROR(ec)) {
187 if (boot_cpu_data.x86 == 0xf &&
188 (mc1_status & (1ULL << 58)))
189 pr_cont(" during system linefill.\n");
190 else
191 pr_cont(" during attempted NB data read.\n");
192 } else if (MEM_ERROR(ec)) {
193 u8 ll = ec & 0x3;
194 u8 rrrr = (ec >> 4) & 0xf;
195
196 if (ll == 0x2)
197 pr_cont(" during a linefill from L2.\n");
198 else if (ll == 0x1) {
199
200 switch (rrrr) {
201 case 0x5:
202 pr_cont(": Parity error during "
203 "data load.\n");
204 break;
205
206 case 0x7:
207 pr_cont(": Copyback Parity/Victim"
208 " error.\n");
209 break;
210
211 case 0x8:
212 pr_cont(": Tag Snoop error.\n");
213 break;
214
215 default:
216 goto wrong_ic_mce;
217 break;
218 }
219 }
220 } else
221 goto wrong_ic_mce;
222 } else
223 goto wrong_ic_mce;
224
225 return;
226
227wrong_ic_mce:
228 pr_warning("Corrupted IC MCE info?\n");
229}
230
231static void amd_decode_bu_mce(u64 mc2_status)
232{
233 u32 ec = mc2_status & 0xffff;
234 u32 xec = (mc2_status >> 16) & 0xf;
235
236 pr_emerg("Bus Unit Error");
237
238 if (xec == 0x1)
239 pr_cont(" in the write data buffers.\n");
240 else if (xec == 0x3)
241 pr_cont(" in the victim data buffers.\n");
242 else if (xec == 0x2 && MEM_ERROR(ec))
243 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
244 else if (xec == 0x0) {
245 if (TLB_ERROR(ec))
246 pr_cont(": %s error in a Page Descriptor Cache or "
247 "Guest TLB.\n", TT_MSG(ec));
248 else if (BUS_ERROR(ec))
249 pr_cont(": %s/ECC error in data read from NB: %s.\n",
250 RRRR_MSG(ec), PP_MSG(ec));
251 else if (MEM_ERROR(ec)) {
252 u8 rrrr = (ec >> 4) & 0xf;
253
254 if (rrrr >= 0x7)
255 pr_cont(": %s error during data copyback.\n",
256 RRRR_MSG(ec));
257 else if (rrrr <= 0x1)
258 pr_cont(": %s parity/ECC error during data "
259 "access from L2.\n", RRRR_MSG(ec));
260 else
261 goto wrong_bu_mce;
262 } else
263 goto wrong_bu_mce;
264 } else
265 goto wrong_bu_mce;
266
267 return;
268
269wrong_bu_mce:
270 pr_warning("Corrupted BU MCE info?\n");
271}
272
273static void amd_decode_ls_mce(u64 mc3_status)
274{
275 u32 ec = mc3_status & 0xffff;
276 u32 xec = (mc3_status >> 16) & 0xf;
277
278 pr_emerg("Load Store Error");
279
280 if (xec == 0x0) {
281 u8 rrrr = (ec >> 4) & 0xf;
282
283 if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4))
284 goto wrong_ls_mce;
285
286 pr_cont(" during %s.\n", RRRR_MSG(ec));
287 }
288 return;
289
290wrong_ls_mce:
291 pr_warning("Corrupted LS MCE info?\n");
292}
293
294void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
295{
296 u32 ec = ERROR_CODE(regs->nbsl);
297
298 if (!handle_errors)
299 return;
300
301 /*
302 * GART TLB error reporting is disabled by default. Bail out early.
303 */
304 if (TLB_ERROR(ec) && !report_gart_errors)
305 return;
306
307 pr_emerg("Northbridge Error, node %d", node_id);
308
309 /*
310 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
311 * value encoding has changed so interpret those differently
312 */
313 if ((boot_cpu_data.x86 == 0x10) &&
314 (boot_cpu_data.x86_model > 7)) {
315 if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
316 pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
317 } else {
318 u8 assoc_cpus = regs->nbsh & 0xf;
319
320 if (assoc_cpus > 0)
321 pr_cont(", core: %d", fls(assoc_cpus) - 1);
322
323 pr_cont("\n");
324 }
325
326 pr_emerg("%s.\n", EXT_ERR_MSG(regs->nbsl));
327
328 if (BUS_ERROR(ec) && nb_bus_decoder)
329 nb_bus_decoder(node_id, regs);
330}
331EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
332
333static void amd_decode_fr_mce(u64 mc5_status)
334{
335 /* we have only one error signature so match all fields at once. */
336 if ((mc5_status & 0xffff) == 0x0f0f)
337 pr_emerg(" FR Error: CPU Watchdog timer expire.\n");
338 else
339 pr_warning("Corrupted FR MCE info?\n");
340}
341
342static inline void amd_decode_err_code(unsigned int ec)
343{
344 if (TLB_ERROR(ec)) {
345 pr_emerg("Transaction: %s, Cache Level %s\n",
346 TT_MSG(ec), LL_MSG(ec));
347 } else if (MEM_ERROR(ec)) {
348 pr_emerg("Transaction: %s, Type: %s, Cache Level: %s",
349 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
350 } else if (BUS_ERROR(ec)) {
351 pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, "
352 "Participating Processor: %s\n",
353 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
354 PP_MSG(ec));
355 } else
356 pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
357}
358
359static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
360 void *data)
361{
362 struct mce *m = (struct mce *)data;
363 struct err_regs regs;
364 int node, ecc;
365
366 pr_emerg("MC%d_STATUS: ", m->bank);
367
368 pr_cont("%sorrected error, other errors lost: %s, "
369 "CPU context corrupt: %s",
370 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
371 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
372 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
373
374 /* do the two bits[14:13] together */
375 ecc = (m->status >> 45) & 0x3;
376 if (ecc)
377 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
378
379 pr_cont("\n");
380
381 switch (m->bank) {
382 case 0:
383 amd_decode_dc_mce(m->status);
384 break;
385
386 case 1:
387 amd_decode_ic_mce(m->status);
388 break;
389
390 case 2:
391 amd_decode_bu_mce(m->status);
392 break;
393
394 case 3:
395 amd_decode_ls_mce(m->status);
396 break;
397
398 case 4:
399 regs.nbsl = (u32) m->status;
400 regs.nbsh = (u32)(m->status >> 32);
401 regs.nbeal = (u32) m->addr;
402 regs.nbeah = (u32)(m->addr >> 32);
403 node = amd_get_nb_id(m->extcpu);
404
405 amd_decode_nb_mce(node, &regs, 1);
406 break;
407
408 case 5:
409 amd_decode_fr_mce(m->status);
410 break;
411
412 default:
413 break;
414 }
415
416 amd_decode_err_code(m->status & 0xffff);
417
418 return NOTIFY_STOP;
419}
420
421static struct notifier_block amd_mce_dec_nb = {
422 .notifier_call = amd_decode_mce,
423};
424
425static int __init mce_amd_init(void)
426{
427 /*
428 * We can decode MCEs for K8, F10h and F11h CPUs:
429 */
430 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
431 return 0;
432
433 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
434 return 0;
435
436 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
437
438 return 0;
439}
440early_initcall(mce_amd_init);
441
442#ifdef MODULE
443static void __exit mce_amd_exit(void)
444{
445 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
446}
447
448MODULE_DESCRIPTION("AMD MCE decoder");
449MODULE_ALIAS("edac-mce-amd");
450MODULE_LICENSE("GPL");
451module_exit(mce_amd_exit);
452#endif
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 7e1374afd967..be4b075c3098 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -27,15 +27,6 @@ EXPORT_SYMBOL_GPL(edac_debug_level);
27struct workqueue_struct *edac_workqueue; 27struct workqueue_struct *edac_workqueue;
28 28
29/* 29/*
30 * sysfs object: /sys/devices/system/edac
31 * need to export to other files in this modules
32 */
33static struct sysdev_class edac_class = {
34 .name = "edac",
35};
36static int edac_class_valid;
37
38/*
39 * edac_op_state_to_string() 30 * edac_op_state_to_string()
40 */ 31 */
41char *edac_op_state_to_string(int opstate) 32char *edac_op_state_to_string(int opstate)
@@ -55,60 +46,6 @@ char *edac_op_state_to_string(int opstate)
55} 46}
56 47
57/* 48/*
58 * edac_get_edac_class()
59 *
60 * return pointer to the edac class of 'edac'
61 */
62struct sysdev_class *edac_get_edac_class(void)
63{
64 struct sysdev_class *classptr = NULL;
65
66 if (edac_class_valid)
67 classptr = &edac_class;
68
69 return classptr;
70}
71
72/*
73 * edac_register_sysfs_edac_name()
74 *
75 * register the 'edac' into /sys/devices/system
76 *
77 * return:
78 * 0 success
79 * !0 error
80 */
81static int edac_register_sysfs_edac_name(void)
82{
83 int err;
84
85 /* create the /sys/devices/system/edac directory */
86 err = sysdev_class_register(&edac_class);
87
88 if (err) {
89 debugf1("%s() error=%d\n", __func__, err);
90 return err;
91 }
92
93 edac_class_valid = 1;
94 return 0;
95}
96
97/*
98 * sysdev_class_unregister()
99 *
100 * unregister the 'edac' from /sys/devices/system
101 */
102static void edac_unregister_sysfs_edac_name(void)
103{
104 /* only if currently registered, then unregister it */
105 if (edac_class_valid)
106 sysdev_class_unregister(&edac_class);
107
108 edac_class_valid = 0;
109}
110
111/*
112 * edac_workqueue_setup 49 * edac_workqueue_setup
113 * initialize the edac work queue for polling operations 50 * initialize the edac work queue for polling operations
114 */ 51 */
@@ -154,21 +91,11 @@ static int __init edac_init(void)
154 edac_pci_clear_parity_errors(); 91 edac_pci_clear_parity_errors();
155 92
156 /* 93 /*
157 * perform the registration of the /sys/devices/system/edac class object
158 */
159 if (edac_register_sysfs_edac_name()) {
160 edac_printk(KERN_ERR, EDAC_MC,
161 "Error initializing 'edac' kobject\n");
162 err = -ENODEV;
163 goto error;
164 }
165
166 /*
167 * now set up the mc_kset under the edac class object 94 * now set up the mc_kset under the edac class object
168 */ 95 */
169 err = edac_sysfs_setup_mc_kset(); 96 err = edac_sysfs_setup_mc_kset();
170 if (err) 97 if (err)
171 goto sysfs_setup_fail; 98 goto error;
172 99
173 /* Setup/Initialize the workq for this core */ 100 /* Setup/Initialize the workq for this core */
174 err = edac_workqueue_setup(); 101 err = edac_workqueue_setup();
@@ -183,9 +110,6 @@ static int __init edac_init(void)
183workq_fail: 110workq_fail:
184 edac_sysfs_teardown_mc_kset(); 111 edac_sysfs_teardown_mc_kset();
185 112
186sysfs_setup_fail:
187 edac_unregister_sysfs_edac_name();
188
189error: 113error:
190 return err; 114 return err;
191} 115}
@@ -201,7 +125,6 @@ static void __exit edac_exit(void)
201 /* tear down the various subsystems */ 125 /* tear down the various subsystems */
202 edac_workqueue_teardown(); 126 edac_workqueue_teardown();
203 edac_sysfs_teardown_mc_kset(); 127 edac_sysfs_teardown_mc_kset();
204 edac_unregister_sysfs_edac_name();
205} 128}
206 129
207/* 130/*
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 233d4798c3aa..17aabb7b90ec 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -42,7 +42,6 @@ extern void edac_device_unregister_sysfs_main_kobj(
42 struct edac_device_ctl_info *edac_dev); 42 struct edac_device_ctl_info *edac_dev);
43extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); 43extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev);
44extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); 44extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev);
45extern struct sysdev_class *edac_get_edac_class(void);
46 45
47/* edac core workqueue: single CPU mode */ 46/* edac core workqueue: single CPU mode */
48extern struct workqueue_struct *edac_workqueue; 47extern struct workqueue_struct *edac_workqueue;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index c39697df9cb4..023b01cb5175 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -7,7 +7,7 @@
7 * 7 *
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/sysdev.h> 10#include <linux/edac.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13 13
@@ -354,7 +354,7 @@ static int edac_pci_main_kobj_setup(void)
354 /* First time, so create the main kobject and its 354 /* First time, so create the main kobject and its
355 * controls and atributes 355 * controls and atributes
356 */ 356 */
357 edac_class = edac_get_edac_class(); 357 edac_class = edac_get_sysfs_class();
358 if (edac_class == NULL) { 358 if (edac_class == NULL) {
359 debugf1("%s() no edac_class\n", __func__); 359 debugf1("%s() no edac_class\n", __func__);
360 err = -ENODEV; 360 err = -ENODEV;
@@ -368,7 +368,7 @@ static int edac_pci_main_kobj_setup(void)
368 if (!try_module_get(THIS_MODULE)) { 368 if (!try_module_get(THIS_MODULE)) {
369 debugf1("%s() try_module_get() failed\n", __func__); 369 debugf1("%s() try_module_get() failed\n", __func__);
370 err = -ENODEV; 370 err = -ENODEV;
371 goto decrement_count_fail; 371 goto mod_get_fail;
372 } 372 }
373 373
374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -403,6 +403,9 @@ kobject_init_and_add_fail:
403kzalloc_fail: 403kzalloc_fail:
404 module_put(THIS_MODULE); 404 module_put(THIS_MODULE);
405 405
406mod_get_fail:
407 edac_put_sysfs_class();
408
406decrement_count_fail: 409decrement_count_fail:
407 /* if are on this error exit, nothing to tear down */ 410 /* if are on this error exit, nothing to tear down */
408 atomic_dec(&edac_pci_sysfs_refcount); 411 atomic_dec(&edac_pci_sysfs_refcount);
@@ -429,6 +432,7 @@ static void edac_pci_main_kobj_teardown(void)
429 __func__); 432 __func__);
430 kobject_put(edac_pci_top_main_kobj); 433 kobject_put(edac_pci_top_main_kobj);
431 } 434 }
435 edac_put_sysfs_class();
432} 436}
433 437
434/* 438/*
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 20b428aa155e..aab970760b75 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -3,10 +3,13 @@
3 * 3 *
4 * Author: Dave Jiang <djiang@mvista.com> 4 * Author: Dave Jiang <djiang@mvista.com>
5 * 5 *
6 * 2007 (c) MontaVista Software, Inc. This file is licensed under 6 * 2007 (c) MontaVista Software, Inc.
7 * the terms of the GNU General Public License version 2. This program 7 * 2010 (c) Advanced Micro Devices Inc.
8 * is licensed "as is" without any warranty of any kind, whether express 8 * Borislav Petkov <borislav.petkov@amd.com>
9 * or implied. 9 *
10 * This file is licensed under the terms of the GNU General Public
11 * License version 2. This program is licensed "as is" without any
12 * warranty of any kind, whether express or implied.
10 * 13 *
11 */ 14 */
12#include <linux/module.h> 15#include <linux/module.h>
@@ -23,6 +26,8 @@ EXPORT_SYMBOL_GPL(edac_handlers);
23int edac_err_assert = 0; 26int edac_err_assert = 0;
24EXPORT_SYMBOL_GPL(edac_err_assert); 27EXPORT_SYMBOL_GPL(edac_err_assert);
25 28
29static atomic_t edac_class_valid = ATOMIC_INIT(0);
30
26/* 31/*
27 * called to determine if there is an EDAC driver interested in 32 * called to determine if there is an EDAC driver interested in
28 * knowing an event (such as NMI) occurred 33 * knowing an event (such as NMI) occurred
@@ -44,3 +49,41 @@ void edac_atomic_assert_error(void)
44 edac_err_assert++; 49 edac_err_assert++;
45} 50}
46EXPORT_SYMBOL_GPL(edac_atomic_assert_error); 51EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
52
53/*
54 * sysfs object: /sys/devices/system/edac
55 * need to export to other files
56 */
57struct sysdev_class edac_class = {
58 .name = "edac",
59};
60EXPORT_SYMBOL_GPL(edac_class);
61
62/* return pointer to the 'edac' node in sysfs */
63struct sysdev_class *edac_get_sysfs_class(void)
64{
65 int err = 0;
66
67 if (atomic_read(&edac_class_valid))
68 goto out;
69
70 /* create the /sys/devices/system/edac directory */
71 err = sysdev_class_register(&edac_class);
72 if (err) {
73 printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
74 return NULL;
75 }
76
77out:
78 atomic_inc(&edac_class_valid);
79 return &edac_class;
80}
81EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
82
83void edac_put_sysfs_class(void)
84{
85 /* last user unregisters it */
86 if (atomic_dec_and_test(&edac_class_valid))
87 sysdev_class_unregister(&edac_class);
88}
89EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
new file mode 100644
index 000000000000..c0181093b490
--- /dev/null
+++ b/drivers/edac/mce_amd.c
@@ -0,0 +1,680 @@
1#include <linux/module.h>
2#include <linux/slab.h>
3
4#include "mce_amd.h"
5
6static struct amd_decoder_ops *fam_ops;
7
8static u8 nb_err_cpumask = 0xf;
9
10static bool report_gart_errors;
11static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
19void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32))
20{
21 nb_bus_decoder = f;
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32))
26{
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
29
30 nb_bus_decoder = NULL;
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
35/*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
39
40/* transaction type */
41const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42EXPORT_SYMBOL_GPL(tt_msgs);
43
44/* cache level */
45const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
46EXPORT_SYMBOL_GPL(ll_msgs);
47
48/* memory transaction type */
49const char *rrrr_msgs[] = {
50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
51};
52EXPORT_SYMBOL_GPL(rrrr_msgs);
53
54/* participating processor */
55const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
56EXPORT_SYMBOL_GPL(pp_msgs);
57
58/* request timeout */
59const char *to_msgs[] = { "no timeout", "timed out" };
60EXPORT_SYMBOL_GPL(to_msgs);
61
62/* memory or i/o */
63const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
64EXPORT_SYMBOL_GPL(ii_msgs);
65
66static const char *f10h_nb_mce_desc[] = {
67 "HT link data error",
68 "Protocol error (link, L3, probe filter, etc.)",
69 "Parity error in NB-internal arrays",
70 "Link Retry due to IO link transmission error",
71 "L3 ECC data cache error",
72 "ECC error in L3 cache tag",
73 "L3 LRU parity bits error",
74 "ECC Error in the Probe Filter directory"
75};
76
77static bool f12h_dc_mce(u16 ec)
78{
79 bool ret = false;
80
81 if (MEM_ERROR(ec)) {
82 u8 ll = ec & 0x3;
83 ret = true;
84
85 if (ll == LL_L2)
86 pr_cont("during L1 linefill from L2.\n");
87 else if (ll == LL_L1)
88 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
89 else
90 ret = false;
91 }
92 return ret;
93}
94
95static bool f10h_dc_mce(u16 ec)
96{
97 u8 r4 = (ec >> 4) & 0xf;
98 u8 ll = ec & 0x3;
99
100 if (r4 == R4_GEN && ll == LL_L1) {
101 pr_cont("during data scrub.\n");
102 return true;
103 }
104 return f12h_dc_mce(ec);
105}
106
107static bool k8_dc_mce(u16 ec)
108{
109 if (BUS_ERROR(ec)) {
110 pr_cont("during system linefill.\n");
111 return true;
112 }
113
114 return f10h_dc_mce(ec);
115}
116
117static bool f14h_dc_mce(u16 ec)
118{
119 u8 r4 = (ec >> 4) & 0xf;
120 u8 ll = ec & 0x3;
121 u8 tt = (ec >> 2) & 0x3;
122 u8 ii = tt;
123 bool ret = true;
124
125 if (MEM_ERROR(ec)) {
126
127 if (tt != TT_DATA || ll != LL_L1)
128 return false;
129
130 switch (r4) {
131 case R4_DRD:
132 case R4_DWR:
133 pr_cont("Data/Tag parity error due to %s.\n",
134 (r4 == R4_DRD ? "load/hw prf" : "store"));
135 break;
136 case R4_EVICT:
137 pr_cont("Copyback parity error on a tag miss.\n");
138 break;
139 case R4_SNOOP:
140 pr_cont("Tag parity error during snoop.\n");
141 break;
142 default:
143 ret = false;
144 }
145 } else if (BUS_ERROR(ec)) {
146
147 if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
148 return false;
149
150 pr_cont("System read data error on a ");
151
152 switch (r4) {
153 case R4_RD:
154 pr_cont("TLB reload.\n");
155 break;
156 case R4_DWR:
157 pr_cont("store.\n");
158 break;
159 case R4_DRD:
160 pr_cont("load.\n");
161 break;
162 default:
163 ret = false;
164 }
165 } else {
166 ret = false;
167 }
168
169 return ret;
170}
171
172static void amd_decode_dc_mce(struct mce *m)
173{
174 u16 ec = m->status & 0xffff;
175 u8 xec = (m->status >> 16) & 0xf;
176
177 pr_emerg(HW_ERR "Data Cache Error: ");
178
179 /* TLB error signatures are the same across families */
180 if (TLB_ERROR(ec)) {
181 u8 tt = (ec >> 2) & 0x3;
182
183 if (tt == TT_DATA) {
184 pr_cont("%s TLB %s.\n", LL_MSG(ec),
185 (xec ? "multimatch" : "parity error"));
186 return;
187 }
188 else
189 goto wrong_dc_mce;
190 }
191
192 if (!fam_ops->dc_mce(ec))
193 goto wrong_dc_mce;
194
195 return;
196
197wrong_dc_mce:
198 pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
199}
200
201static bool k8_ic_mce(u16 ec)
202{
203 u8 ll = ec & 0x3;
204 u8 r4 = (ec >> 4) & 0xf;
205 bool ret = true;
206
207 if (!MEM_ERROR(ec))
208 return false;
209
210 if (ll == 0x2)
211 pr_cont("during a linefill from L2.\n");
212 else if (ll == 0x1) {
213 switch (r4) {
214 case R4_IRD:
215 pr_cont("Parity error during data load.\n");
216 break;
217
218 case R4_EVICT:
219 pr_cont("Copyback Parity/Victim error.\n");
220 break;
221
222 case R4_SNOOP:
223 pr_cont("Tag Snoop error.\n");
224 break;
225
226 default:
227 ret = false;
228 break;
229 }
230 } else
231 ret = false;
232
233 return ret;
234}
235
236static bool f14h_ic_mce(u16 ec)
237{
238 u8 ll = ec & 0x3;
239 u8 tt = (ec >> 2) & 0x3;
240 u8 r4 = (ec >> 4) & 0xf;
241 bool ret = true;
242
243 if (MEM_ERROR(ec)) {
244 if (tt != 0 || ll != 1)
245 ret = false;
246
247 if (r4 == R4_IRD)
248 pr_cont("Data/tag array parity error for a tag hit.\n");
249 else if (r4 == R4_SNOOP)
250 pr_cont("Tag error during snoop/victimization.\n");
251 else
252 ret = false;
253 }
254 return ret;
255}
256
257static void amd_decode_ic_mce(struct mce *m)
258{
259 u16 ec = m->status & 0xffff;
260 u8 xec = (m->status >> 16) & 0xf;
261
262 pr_emerg(HW_ERR "Instruction Cache Error: ");
263
264 if (TLB_ERROR(ec))
265 pr_cont("%s TLB %s.\n", LL_MSG(ec),
266 (xec ? "multimatch" : "parity error"));
267 else if (BUS_ERROR(ec)) {
268 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
269
270 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
271 } else if (fam_ops->ic_mce(ec))
272 ;
273 else
274 pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
275}
276
277static void amd_decode_bu_mce(struct mce *m)
278{
279 u32 ec = m->status & 0xffff;
280 u32 xec = (m->status >> 16) & 0xf;
281
282 pr_emerg(HW_ERR "Bus Unit Error");
283
284 if (xec == 0x1)
285 pr_cont(" in the write data buffers.\n");
286 else if (xec == 0x3)
287 pr_cont(" in the victim data buffers.\n");
288 else if (xec == 0x2 && MEM_ERROR(ec))
289 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
290 else if (xec == 0x0) {
291 if (TLB_ERROR(ec))
292 pr_cont(": %s error in a Page Descriptor Cache or "
293 "Guest TLB.\n", TT_MSG(ec));
294 else if (BUS_ERROR(ec))
295 pr_cont(": %s/ECC error in data read from NB: %s.\n",
296 RRRR_MSG(ec), PP_MSG(ec));
297 else if (MEM_ERROR(ec)) {
298 u8 rrrr = (ec >> 4) & 0xf;
299
300 if (rrrr >= 0x7)
301 pr_cont(": %s error during data copyback.\n",
302 RRRR_MSG(ec));
303 else if (rrrr <= 0x1)
304 pr_cont(": %s parity/ECC error during data "
305 "access from L2.\n", RRRR_MSG(ec));
306 else
307 goto wrong_bu_mce;
308 } else
309 goto wrong_bu_mce;
310 } else
311 goto wrong_bu_mce;
312
313 return;
314
315wrong_bu_mce:
316 pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
317}
318
319static void amd_decode_ls_mce(struct mce *m)
320{
321 u16 ec = m->status & 0xffff;
322 u8 xec = (m->status >> 16) & 0xf;
323
324 if (boot_cpu_data.x86 == 0x14) {
325 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
326 " please report on LKML.\n");
327 return;
328 }
329
330 pr_emerg(HW_ERR "Load Store Error");
331
332 if (xec == 0x0) {
333 u8 r4 = (ec >> 4) & 0xf;
334
335 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
336 goto wrong_ls_mce;
337
338 pr_cont(" during %s.\n", RRRR_MSG(ec));
339 } else
340 goto wrong_ls_mce;
341
342 return;
343
344wrong_ls_mce:
345 pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
346}
347
348static bool k8_nb_mce(u16 ec, u8 xec)
349{
350 bool ret = true;
351
352 switch (xec) {
353 case 0x1:
354 pr_cont("CRC error detected on HT link.\n");
355 break;
356
357 case 0x5:
358 pr_cont("Invalid GART PTE entry during GART table walk.\n");
359 break;
360
361 case 0x6:
362 pr_cont("Unsupported atomic RMW received from an IO link.\n");
363 break;
364
365 case 0x0:
366 case 0x8:
367 if (boot_cpu_data.x86 == 0x11)
368 return false;
369
370 pr_cont("DRAM ECC error detected on the NB.\n");
371 break;
372
373 case 0xd:
374 pr_cont("Parity error on the DRAM addr/ctl signals.\n");
375 break;
376
377 default:
378 ret = false;
379 break;
380 }
381
382 return ret;
383}
384
385static bool f10h_nb_mce(u16 ec, u8 xec)
386{
387 bool ret = true;
388 u8 offset = 0;
389
390 if (k8_nb_mce(ec, xec))
391 return true;
392
393 switch(xec) {
394 case 0xa ... 0xc:
395 offset = 10;
396 break;
397
398 case 0xe:
399 offset = 11;
400 break;
401
402 case 0xf:
403 if (TLB_ERROR(ec))
404 pr_cont("GART Table Walk data error.\n");
405 else if (BUS_ERROR(ec))
406 pr_cont("DMA Exclusion Vector Table Walk error.\n");
407 else
408 ret = false;
409
410 goto out;
411 break;
412
413 case 0x1c ... 0x1f:
414 offset = 24;
415 break;
416
417 default:
418 ret = false;
419
420 goto out;
421 break;
422 }
423
424 pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]);
425
426out:
427 return ret;
428}
429
430static bool nb_noop_mce(u16 ec, u8 xec)
431{
432 return false;
433}
434
435void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
436{
437 u8 xec = (m->status >> 16) & 0x1f;
438 u16 ec = m->status & 0xffff;
439 u32 nbsh = (u32)(m->status >> 32);
440
441 pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
442
443 /*
444 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
445 * value encoding has changed so interpret those differently
446 */
447 if ((boot_cpu_data.x86 == 0x10) &&
448 (boot_cpu_data.x86_model > 7)) {
449 if (nbsh & K8_NBSH_ERR_CPU_VAL)
450 pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
451 } else {
452 u8 assoc_cpus = nbsh & nb_err_cpumask;
453
454 if (assoc_cpus > 0)
455 pr_cont(", core: %d", fls(assoc_cpus) - 1);
456 }
457
458 switch (xec) {
459 case 0x2:
460 pr_cont("Sync error (sync packets on HT link detected).\n");
461 return;
462
463 case 0x3:
464 pr_cont("HT Master abort.\n");
465 return;
466
467 case 0x4:
468 pr_cont("HT Target abort.\n");
469 return;
470
471 case 0x7:
472 pr_cont("NB Watchdog timeout.\n");
473 return;
474
475 case 0x9:
476 pr_cont("SVM DMA Exclusion Vector error.\n");
477 return;
478
479 default:
480 break;
481 }
482
483 if (!fam_ops->nb_mce(ec, xec))
484 goto wrong_nb_mce;
485
486 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
487 if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder)
488 nb_bus_decoder(node_id, m, nbcfg);
489
490 return;
491
492wrong_nb_mce:
493 pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
494}
495EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
496
497static void amd_decode_fr_mce(struct mce *m)
498{
499 if (boot_cpu_data.x86 == 0xf ||
500 boot_cpu_data.x86 == 0x11)
501 goto wrong_fr_mce;
502
503 /* we have only one error signature so match all fields at once. */
504 if ((m->status & 0xffff) == 0x0f0f) {
505 pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
506 return;
507 }
508
509wrong_fr_mce:
510 pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
511}
512
513static inline void amd_decode_err_code(u16 ec)
514{
515 if (TLB_ERROR(ec)) {
516 pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
517 TT_MSG(ec), LL_MSG(ec));
518 } else if (MEM_ERROR(ec)) {
519 pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
520 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
521 } else if (BUS_ERROR(ec)) {
522 pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
523 "Participating Processor: %s\n",
524 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
525 PP_MSG(ec));
526 } else
527 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
528}
529
530/*
531 * Filter out unwanted MCE signatures here.
532 */
533static bool amd_filter_mce(struct mce *m)
534{
535 u8 xec = (m->status >> 16) & 0x1f;
536
537 /*
538 * NB GART TLB error reporting is disabled by default.
539 */
540 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
541 return true;
542
543 return false;
544}
545
546int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
547{
548 struct mce *m = (struct mce *)data;
549 int node, ecc;
550
551 if (amd_filter_mce(m))
552 return NOTIFY_STOP;
553
554 pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
555
556 pr_cont("%sorrected error, other errors lost: %s, "
557 "CPU context corrupt: %s",
558 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
559 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
560 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
561
562 /* do the two bits[14:13] together */
563 ecc = (m->status >> 45) & 0x3;
564 if (ecc)
565 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
566
567 pr_cont("\n");
568
569 switch (m->bank) {
570 case 0:
571 amd_decode_dc_mce(m);
572 break;
573
574 case 1:
575 amd_decode_ic_mce(m);
576 break;
577
578 case 2:
579 amd_decode_bu_mce(m);
580 break;
581
582 case 3:
583 amd_decode_ls_mce(m);
584 break;
585
586 case 4:
587 node = amd_get_nb_id(m->extcpu);
588 amd_decode_nb_mce(node, m, 0);
589 break;
590
591 case 5:
592 amd_decode_fr_mce(m);
593 break;
594
595 default:
596 break;
597 }
598
599 amd_decode_err_code(m->status & 0xffff);
600
601 return NOTIFY_STOP;
602}
603EXPORT_SYMBOL_GPL(amd_decode_mce);
604
605static struct notifier_block amd_mce_dec_nb = {
606 .notifier_call = amd_decode_mce,
607};
608
609static int __init mce_amd_init(void)
610{
611 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
612 return 0;
613
614 if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
615 (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
616 return 0;
617
618 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
619 if (!fam_ops)
620 return -ENOMEM;
621
622 switch (boot_cpu_data.x86) {
623 case 0xf:
624 fam_ops->dc_mce = k8_dc_mce;
625 fam_ops->ic_mce = k8_ic_mce;
626 fam_ops->nb_mce = k8_nb_mce;
627 break;
628
629 case 0x10:
630 fam_ops->dc_mce = f10h_dc_mce;
631 fam_ops->ic_mce = k8_ic_mce;
632 fam_ops->nb_mce = f10h_nb_mce;
633 break;
634
635 case 0x11:
636 fam_ops->dc_mce = k8_dc_mce;
637 fam_ops->ic_mce = k8_ic_mce;
638 fam_ops->nb_mce = f10h_nb_mce;
639 break;
640
641 case 0x12:
642 fam_ops->dc_mce = f12h_dc_mce;
643 fam_ops->ic_mce = k8_ic_mce;
644 fam_ops->nb_mce = nb_noop_mce;
645 break;
646
647 case 0x14:
648 nb_err_cpumask = 0x3;
649 fam_ops->dc_mce = f14h_dc_mce;
650 fam_ops->ic_mce = f14h_ic_mce;
651 fam_ops->nb_mce = nb_noop_mce;
652 break;
653
654 default:
655 printk(KERN_WARNING "Huh? What family is that: %d?!\n",
656 boot_cpu_data.x86);
657 kfree(fam_ops);
658 return -EINVAL;
659 }
660
661 pr_info("MCE: In-kernel MCE decoding enabled.\n");
662
663 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
664
665 return 0;
666}
667early_initcall(mce_amd_init);
668
669#ifdef MODULE
670static void __exit mce_amd_exit(void)
671{
672 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
673 kfree(fam_ops);
674}
675
676MODULE_DESCRIPTION("AMD MCE decoder");
677MODULE_ALIAS("edac-mce-amd");
678MODULE_LICENSE("GPL");
679module_exit(mce_amd_exit);
680#endif
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/mce_amd.h
index df23ee065f79..35f6e0e3b297 100644
--- a/drivers/edac/edac_mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -1,11 +1,14 @@
1#ifndef _EDAC_MCE_AMD_H 1#ifndef _EDAC_MCE_AMD_H
2#define _EDAC_MCE_AMD_H 2#define _EDAC_MCE_AMD_H
3 3
4#include <linux/notifier.h>
5
4#include <asm/mce.h> 6#include <asm/mce.h>
5 7
8#define BIT_64(n) (U64_C(1) << (n))
9
6#define ERROR_CODE(x) ((x) & 0xffff) 10#define ERROR_CODE(x) ((x) & 0xffff)
7#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) 11#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f)
8#define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)]
9 12
10#define LOW_SYNDROME(x) (((x) >> 15) & 0xff) 13#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
11#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) 14#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -20,13 +23,14 @@
20#define II_MSG(x) ii_msgs[II(x)] 23#define II_MSG(x) ii_msgs[II(x)]
21#define LL(x) (((x) >> 0) & 0x3) 24#define LL(x) (((x) >> 0) & 0x3)
22#define LL_MSG(x) ll_msgs[LL(x)] 25#define LL_MSG(x) ll_msgs[LL(x)]
23#define RRRR(x) (((x) >> 4) & 0xf)
24#define RRRR_MSG(x) rrrr_msgs[RRRR(x)]
25#define TO(x) (((x) >> 8) & 0x1) 26#define TO(x) (((x) >> 8) & 0x1)
26#define TO_MSG(x) to_msgs[TO(x)] 27#define TO_MSG(x) to_msgs[TO(x)]
27#define PP(x) (((x) >> 9) & 0x3) 28#define PP(x) (((x) >> 9) & 0x3)
28#define PP_MSG(x) pp_msgs[PP(x)] 29#define PP_MSG(x) pp_msgs[PP(x)]
29 30
31#define RRRR(x) (((x) >> 4) & 0xf)
32#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!")
33
30#define K8_NBSH 0x4C 34#define K8_NBSH 0x4C
31 35
32#define K8_NBSH_VALID_BIT BIT(31) 36#define K8_NBSH_VALID_BIT BIT(31)
@@ -41,13 +45,45 @@
41#define K8_NBSH_UECC BIT(13) 45#define K8_NBSH_UECC BIT(13)
42#define K8_NBSH_ERR_SCRUBER BIT(8) 46#define K8_NBSH_ERR_SCRUBER BIT(8)
43 47
48enum tt_ids {
49 TT_INSTR = 0,
50 TT_DATA,
51 TT_GEN,
52 TT_RESV,
53};
54
55enum ll_ids {
56 LL_RESV = 0,
57 LL_L1,
58 LL_L2,
59 LL_LG,
60};
61
62enum ii_ids {
63 II_MEM = 0,
64 II_RESV,
65 II_IO,
66 II_GEN,
67};
68
69enum rrrr_ids {
70 R4_GEN = 0,
71 R4_RD,
72 R4_WR,
73 R4_DRD,
74 R4_DWR,
75 R4_IRD,
76 R4_PREF,
77 R4_EVICT,
78 R4_SNOOP,
79};
80
44extern const char *tt_msgs[]; 81extern const char *tt_msgs[];
45extern const char *ll_msgs[]; 82extern const char *ll_msgs[];
46extern const char *rrrr_msgs[]; 83extern const char *rrrr_msgs[];
47extern const char *pp_msgs[]; 84extern const char *pp_msgs[];
48extern const char *to_msgs[]; 85extern const char *to_msgs[];
49extern const char *ii_msgs[]; 86extern const char *ii_msgs[];
50extern const char *ext_msgs[];
51 87
52/* 88/*
53 * relevant NB regs 89 * relevant NB regs
@@ -60,10 +96,19 @@ struct err_regs {
60 u32 nbeal; 96 u32 nbeal;
61}; 97};
62 98
99/*
100 * per-family decoder ops
101 */
102struct amd_decoder_ops {
103 bool (*dc_mce)(u16);
104 bool (*ic_mce)(u16);
105 bool (*nb_mce)(u16, u8);
106};
63 107
64void amd_report_gart_errors(bool); 108void amd_report_gart_errors(bool);
65void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); 109void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
66void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); 110void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
67void amd_decode_nb_mce(int, struct err_regs *, int); 111void amd_decode_nb_mce(int, struct mce *, u32);
112int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
68 113
69#endif /* _EDAC_MCE_AMD_H */ 114#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
new file mode 100644
index 000000000000..8d0688f36d4c
--- /dev/null
+++ b/drivers/edac/mce_amd_inj.c
@@ -0,0 +1,171 @@
1/*
2 * A simple MCE injection facility for testing the MCE decoding code. This
3 * driver should be built as module so that it can be loaded on production
4 * kernels for testing purposes.
5 *
6 * This file may be distributed under the terms of the GNU General Public
7 * License version 2.
8 *
9 * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com>
10 * Advanced Micro Devices Inc.
11 */
12
13#include <linux/kobject.h>
14#include <linux/sysdev.h>
15#include <linux/edac.h>
16#include <asm/mce.h>
17
18#include "mce_amd.h"
19
20struct edac_mce_attr {
21 struct attribute attr;
22 ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
23 ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
24 const char *buf, size_t count);
25};
26
27#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
28static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
29
30static struct kobject *mce_kobj;
31
32/*
33 * Collect all the MCi_XXX settings
34 */
35static struct mce i_mce;
36
37#define MCE_INJECT_STORE(reg) \
38static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
39 struct edac_mce_attr *attr, \
40 const char *data, size_t count)\
41{ \
42 int ret = 0; \
43 unsigned long value; \
44 \
45 ret = strict_strtoul(data, 16, &value); \
46 if (ret < 0) \
47 printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
48 \
49 i_mce.reg = value; \
50 \
51 return count; \
52}
53
54MCE_INJECT_STORE(status);
55MCE_INJECT_STORE(misc);
56MCE_INJECT_STORE(addr);
57
58#define MCE_INJECT_SHOW(reg) \
59static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
60 struct edac_mce_attr *attr, \
61 char *buf) \
62{ \
63 return sprintf(buf, "0x%016llx\n", i_mce.reg); \
64}
65
66MCE_INJECT_SHOW(status);
67MCE_INJECT_SHOW(misc);
68MCE_INJECT_SHOW(addr);
69
70EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
71EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
72EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
73
74/*
75 * This denotes into which bank we're injecting and triggers
76 * the injection, at the same time.
77 */
78static ssize_t edac_inject_bank_store(struct kobject *kobj,
79 struct edac_mce_attr *attr,
80 const char *data, size_t count)
81{
82 int ret = 0;
83 unsigned long value;
84
85 ret = strict_strtoul(data, 10, &value);
86 if (ret < 0) {
87 printk(KERN_ERR "Invalid bank value!\n");
88 return -EINVAL;
89 }
90
91 if (value > 5) {
92 printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
93 return -EINVAL;
94 }
95
96 i_mce.bank = value;
97
98 amd_decode_mce(NULL, 0, &i_mce);
99
100 return count;
101}
102
103static ssize_t edac_inject_bank_show(struct kobject *kobj,
104 struct edac_mce_attr *attr, char *buf)
105{
106 return sprintf(buf, "%d\n", i_mce.bank);
107}
108
109EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
110
111static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
112 &mce_attr_addr, &mce_attr_bank
113};
114
115static int __init edac_init_mce_inject(void)
116{
117 struct sysdev_class *edac_class = NULL;
118 int i, err = 0;
119
120 edac_class = edac_get_sysfs_class();
121 if (!edac_class)
122 return -EINVAL;
123
124 mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
125 if (!mce_kobj) {
126 printk(KERN_ERR "Error creating a mce kset.\n");
127 err = -ENOMEM;
128 goto err_mce_kobj;
129 }
130
131 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
132 err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
133 if (err) {
134 printk(KERN_ERR "Error creating %s in sysfs.\n",
135 sysfs_attrs[i]->attr.name);
136 goto err_sysfs_create;
137 }
138 }
139 return 0;
140
141err_sysfs_create:
142 while (i-- >= 0)
143 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
144
145 kobject_del(mce_kobj);
146
147err_mce_kobj:
148 edac_put_sysfs_class();
149
150 return err;
151}
152
153static void __exit edac_exit_mce_inject(void)
154{
155 int i;
156
157 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
158 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
159
160 kobject_del(mce_kobj);
161
162 edac_put_sysfs_class();
163}
164
165module_init(edac_init_mce_inject);
166module_exit(edac_exit_mce_inject);
167
168MODULE_LICENSE("GPL");
169MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>");
170MODULE_AUTHOR("AMD Inc.");
171MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 280c9b5ad9e3..88a3ae6cd023 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -125,7 +125,7 @@ config ISCSI_IBFT_FIND
125config ISCSI_IBFT 125config ISCSI_IBFT
126 tristate "iSCSI Boot Firmware Table Attributes module" 126 tristate "iSCSI Boot Firmware Table Attributes module"
127 select ISCSI_BOOT_SYSFS 127 select ISCSI_BOOT_SYSFS
128 depends on ISCSI_IBFT_FIND && SCSI 128 depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL
129 default n 129 default n
130 help 130 help
131 This option enables support for detection and exposing of iSCSI 131 This option enables support for detection and exposing of iSCSI
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c37ef64d1465..cb3ccf3ed221 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -59,18 +59,11 @@
59#include <linux/hrtimer.h> /* ktime_get_real() */ 59#include <linux/hrtimer.h> /* ktime_get_real() */
60#include <trace/events/power.h> 60#include <trace/events/power.h>
61#include <linux/sched.h> 61#include <linux/sched.h>
62#include <asm/mwait.h>
62 63
63#define INTEL_IDLE_VERSION "0.4" 64#define INTEL_IDLE_VERSION "0.4"
64#define PREFIX "intel_idle: " 65#define PREFIX "intel_idle: "
65 66
66#define MWAIT_SUBSTATE_MASK (0xf)
67#define MWAIT_CSTATE_MASK (0xf)
68#define MWAIT_SUBSTATE_SIZE (4)
69#define MWAIT_MAX_NUM_CSTATES 8
70#define CPUID_MWAIT_LEAF (5)
71#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
72#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
73
74static struct cpuidle_driver intel_idle_driver = { 67static struct cpuidle_driver intel_idle_driver = {
75 .name = "intel_idle", 68 .name = "intel_idle",
76 .owner = THIS_MODULE, 69 .owner = THIS_MODULE,
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 9ddafc30f432..af9ee313c10b 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -28,7 +28,7 @@ struct evdev {
28 int minor; 28 int minor;
29 struct input_handle handle; 29 struct input_handle handle;
30 wait_queue_head_t wait; 30 wait_queue_head_t wait;
31 struct evdev_client *grab; 31 struct evdev_client __rcu *grab;
32 struct list_head client_list; 32 struct list_head client_list;
33 spinlock_t client_lock; /* protects client_list */ 33 spinlock_t client_lock; /* protects client_list */
34 struct mutex mutex; 34 struct mutex mutex;
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index c19066479057..7e2c12a5b839 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm)
104 t.endidx = 91; 104 t.endidx = 91;
105 t.seq = tseq; 105 t.seq = tseq;
106 t.act.semaphore = &tsem; 106 t.act.semaphore = &tsem;
107 init_MUTEX_LOCKED(&tsem); 107 sema_init(&tsem, 0);
108 108
109 if (hp_sdc_enqueue_transaction(&t)) return -1; 109 if (hp_sdc_enqueue_transaction(&t)) return -1;
110 110
@@ -698,7 +698,7 @@ static int __init hp_sdc_rtc_init(void)
698 return -ENODEV; 698 return -ENODEV;
699#endif 699#endif
700 700
701 init_MUTEX(&i8042tregs); 701 sema_init(&i8042tregs, 1);
702 702
703 if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) 703 if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr)))
704 return ret; 704 return ret;
diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c
index c92f4edfee7b..e5624d8f1709 100644
--- a/drivers/input/serio/hil_mlc.c
+++ b/drivers/input/serio/hil_mlc.c
@@ -915,15 +915,15 @@ int hil_mlc_register(hil_mlc *mlc)
915 mlc->ostarted = 0; 915 mlc->ostarted = 0;
916 916
917 rwlock_init(&mlc->lock); 917 rwlock_init(&mlc->lock);
918 init_MUTEX(&mlc->osem); 918 sema_init(&mlc->osem, 1);
919 919
920 init_MUTEX(&mlc->isem); 920 sema_init(&mlc->isem, 1);
921 mlc->icount = -1; 921 mlc->icount = -1;
922 mlc->imatch = 0; 922 mlc->imatch = 0;
923 923
924 mlc->opercnt = 0; 924 mlc->opercnt = 0;
925 925
926 init_MUTEX_LOCKED(&(mlc->csem)); 926 sema_init(&(mlc->csem), 0);
927 927
928 hil_mlc_clear_di_scratch(mlc); 928 hil_mlc_clear_di_scratch(mlc);
929 hil_mlc_clear_di_map(mlc, 0); 929 hil_mlc_clear_di_map(mlc, 0);
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index bcc2d30ec245..8c0b51c31424 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -905,7 +905,7 @@ static int __init hp_sdc_init(void)
905 ts_sync[1] = 0x0f; 905 ts_sync[1] = 0x0f;
906 ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0; 906 ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0;
907 t_sync.act.semaphore = &s_sync; 907 t_sync.act.semaphore = &s_sync;
908 init_MUTEX_LOCKED(&s_sync); 908 sema_init(&s_sync, 0);
909 hp_sdc_enqueue_transaction(&t_sync); 909 hp_sdc_enqueue_transaction(&t_sync);
910 down(&s_sync); /* Wait for t_sync to complete */ 910 down(&s_sync); /* Wait for t_sync to complete */
911 911
@@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void)
1039 return hp_sdc.dev_err; 1039 return hp_sdc.dev_err;
1040 } 1040 }
1041 1041
1042 init_MUTEX_LOCKED(&tq_init_sem); 1042 sema_init(&tq_init_sem, 0);
1043 1043
1044 tq_init.actidx = 0; 1044 tq_init.actidx = 0;
1045 tq_init.idx = 1; 1045 tq_init.idx = 1;
diff --git a/drivers/isdn/act2000/act2000.h b/drivers/isdn/act2000/act2000.h
index d4c50512a1ff..88c9423500d8 100644
--- a/drivers/isdn/act2000/act2000.h
+++ b/drivers/isdn/act2000/act2000.h
@@ -141,9 +141,9 @@ typedef struct irq_data_isa {
141 __u8 rcvhdr[8]; 141 __u8 rcvhdr[8];
142} irq_data_isa; 142} irq_data_isa;
143 143
144typedef union irq_data { 144typedef union act2000_irq_data {
145 irq_data_isa isa; 145 irq_data_isa isa;
146} irq_data; 146} act2000_irq_data;
147 147
148/* 148/*
149 * Per card driver data 149 * Per card driver data
@@ -176,7 +176,7 @@ typedef struct act2000_card {
176 char *status_buf_read; 176 char *status_buf_read;
177 char *status_buf_write; 177 char *status_buf_write;
178 char *status_buf_end; 178 char *status_buf_end;
179 irq_data idat; /* Data used for IRQ handler */ 179 act2000_irq_data idat; /* Data used for IRQ handler */
180 isdn_if interface; /* Interface to upper layer */ 180 isdn_if interface; /* Interface to upper layer */
181 char regname[35]; /* Name used for request_region */ 181 char regname[35]; /* Name used for request_region */
182} act2000_card; 182} act2000_card;
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index 6f9afcd5ca4e..b133378d4dc9 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -801,6 +801,16 @@ static void closecard(int cardnr)
801 ll_unload(csta); 801 ll_unload(csta);
802} 802}
803 803
804static irqreturn_t card_irq(int intno, void *dev_id)
805{
806 struct IsdnCardState *cs = dev_id;
807 irqreturn_t ret = cs->irq_func(intno, cs);
808
809 if (ret == IRQ_HANDLED)
810 cs->irq_cnt++;
811 return ret;
812}
813
804static int init_card(struct IsdnCardState *cs) 814static int init_card(struct IsdnCardState *cs)
805{ 815{
806 int irq_cnt, cnt = 3, ret; 816 int irq_cnt, cnt = 3, ret;
@@ -809,10 +819,10 @@ static int init_card(struct IsdnCardState *cs)
809 ret = cs->cardmsg(cs, CARD_INIT, NULL); 819 ret = cs->cardmsg(cs, CARD_INIT, NULL);
810 return(ret); 820 return(ret);
811 } 821 }
812 irq_cnt = kstat_irqs(cs->irq); 822 irq_cnt = cs->irq_cnt = 0;
813 printk(KERN_INFO "%s: IRQ %d count %d\n", CardType[cs->typ], 823 printk(KERN_INFO "%s: IRQ %d count %d\n", CardType[cs->typ],
814 cs->irq, irq_cnt); 824 cs->irq, irq_cnt);
815 if (request_irq(cs->irq, cs->irq_func, cs->irq_flags, "HiSax", cs)) { 825 if (request_irq(cs->irq, card_irq, cs->irq_flags, "HiSax", cs)) {
816 printk(KERN_WARNING "HiSax: couldn't get interrupt %d\n", 826 printk(KERN_WARNING "HiSax: couldn't get interrupt %d\n",
817 cs->irq); 827 cs->irq);
818 return 1; 828 return 1;
@@ -822,8 +832,8 @@ static int init_card(struct IsdnCardState *cs)
822 /* Timeout 10ms */ 832 /* Timeout 10ms */
823 msleep(10); 833 msleep(10);
824 printk(KERN_INFO "%s: IRQ %d count %d\n", 834 printk(KERN_INFO "%s: IRQ %d count %d\n",
825 CardType[cs->typ], cs->irq, kstat_irqs(cs->irq)); 835 CardType[cs->typ], cs->irq, cs->irq_cnt);
826 if (kstat_irqs(cs->irq) == irq_cnt) { 836 if (cs->irq_cnt == irq_cnt) {
827 printk(KERN_WARNING 837 printk(KERN_WARNING
828 "%s: IRQ(%d) getting no interrupts during init %d\n", 838 "%s: IRQ(%d) getting no interrupts during init %d\n",
829 CardType[cs->typ], cs->irq, 4 - cnt); 839 CardType[cs->typ], cs->irq, 4 - cnt);
diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h
index 832a87855ffb..32ab3924aa73 100644
--- a/drivers/isdn/hisax/hisax.h
+++ b/drivers/isdn/hisax/hisax.h
@@ -959,6 +959,7 @@ struct IsdnCardState {
959 u_long event; 959 u_long event;
960 struct work_struct tqueue; 960 struct work_struct tqueue;
961 struct timer_list dbusytimer; 961 struct timer_list dbusytimer;
962 unsigned int irq_cnt;
962#ifdef ERROR_STATISTIC 963#ifdef ERROR_STATISTIC
963 int err_crc; 964 int err_crc;
964 int err_tx; 965 int err_tx;
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 1c4ee6e77937..bf64e49d996a 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -83,7 +83,7 @@ static struct adb_driver *adb_controller;
83BLOCKING_NOTIFIER_HEAD(adb_client_list); 83BLOCKING_NOTIFIER_HEAD(adb_client_list);
84static int adb_got_sleep; 84static int adb_got_sleep;
85static int adb_inited; 85static int adb_inited;
86static DECLARE_MUTEX(adb_probe_mutex); 86static DEFINE_SEMAPHORE(adb_probe_mutex);
87static int sleepy_trackpad; 87static int sleepy_trackpad;
88static int autopoll_devs; 88static int autopoll_devs;
89int __adb_probe_sync; 89int __adb_probe_sync;
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 097f24d8bceb..b9fda7018cef 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -78,7 +78,7 @@ struct sih {
78 u8 irq_lines; /* number of supported irq lines */ 78 u8 irq_lines; /* number of supported irq lines */
79 79
80 /* SIR ignored -- set interrupt, for testing only */ 80 /* SIR ignored -- set interrupt, for testing only */
81 struct irq_data { 81 struct sih_irq_data {
82 u8 isr_offset; 82 u8 isr_offset;
83 u8 imr_offset; 83 u8 imr_offset;
84 } mask[2]; 84 } mask[2];
@@ -810,7 +810,7 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
810 twl4030_irq_chip = dummy_irq_chip; 810 twl4030_irq_chip = dummy_irq_chip;
811 twl4030_irq_chip.name = "twl4030"; 811 twl4030_irq_chip.name = "twl4030";
812 812
813 twl4030_sih_irq_chip.ack = dummy_irq_chip.ack; 813 twl4030_sih_irq_chip.irq_ack = dummy_irq_chip.irq_ack;
814 814
815 for (i = irq_base; i < irq_end; i++) { 815 for (i = irq_base; i < irq_end; i++) {
816 set_irq_chip_and_handler(i, &twl4030_irq_chip, 816 set_irq_chip_and_handler(i, &twl4030_irq_chip,
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 70705d1306b9..eca55c52bdfd 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -522,7 +522,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot)
522 lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ 522 lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */
523 lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ 523 lp->rx_len = lp->exec_box->data[11]; /* Receive list count */
524 524
525 init_MUTEX_LOCKED(&lp->cmd_mutex); 525 sema_init(&lp->cmd_mutex, 0);
526 init_completion(&lp->execution_cmd); 526 init_completion(&lp->execution_cmd);
527 init_completion(&lp->xceiver_cmd); 527 init_completion(&lp->xceiver_cmd);
528 528
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 4b52c767ad05..3e5d0b6b6516 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -608,7 +608,7 @@ static int sixpack_open(struct tty_struct *tty)
608 608
609 spin_lock_init(&sp->lock); 609 spin_lock_init(&sp->lock);
610 atomic_set(&sp->refcnt, 1); 610 atomic_set(&sp->refcnt, 1);
611 init_MUTEX_LOCKED(&sp->dead_sem); 611 sema_init(&sp->dead_sem, 0);
612 612
613 /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ 613 /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */
614 614
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 66e88bd59caa..4c628393c8b1 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -747,7 +747,7 @@ static int mkiss_open(struct tty_struct *tty)
747 747
748 spin_lock_init(&ax->buflock); 748 spin_lock_init(&ax->buflock);
749 atomic_set(&ax->refcnt, 1); 749 atomic_set(&ax->refcnt, 1);
750 init_MUTEX_LOCKED(&ax->dead_sem); 750 sema_init(&ax->dead_sem, 0);
751 751
752 ax->tty = tty; 752 ax->tty = tty;
753 tty->disc_data = ax; 753 tty->disc_data = ax;
diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c
index 1b051dab7b29..51d74447f8f8 100644
--- a/drivers/net/irda/sir_dev.c
+++ b/drivers/net/irda/sir_dev.c
@@ -909,7 +909,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n
909 dev->tx_skb = NULL; 909 dev->tx_skb = NULL;
910 910
911 spin_lock_init(&dev->tx_lock); 911 spin_lock_init(&dev->tx_lock);
912 init_MUTEX(&dev->fsm.sem); 912 sema_init(&dev->fsm.sem, 1);
913 913
914 dev->drv = drv; 914 dev->drv = drv;
915 dev->netdev = ndev; 915 dev->netdev = ndev;
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index af50a530daee..78d70a6481bf 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -184,7 +184,7 @@ ppp_asynctty_open(struct tty_struct *tty)
184 tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); 184 tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap);
185 185
186 atomic_set(&ap->refcnt, 1); 186 atomic_set(&ap->refcnt, 1);
187 init_MUTEX_LOCKED(&ap->dead_sem); 187 sema_init(&ap->dead_sem, 0);
188 188
189 ap->chan.private = ap; 189 ap->chan.private = ap;
190 ap->chan.ops = &async_ops; 190 ap->chan.ops = &async_ops;
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 04c6cd4333f1..10bafd59f9c3 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -575,7 +575,7 @@ static int cosa_probe(int base, int irq, int dma)
575 575
576 /* Initialize the chardev data structures */ 576 /* Initialize the chardev data structures */
577 mutex_init(&chan->rlock); 577 mutex_init(&chan->rlock);
578 init_MUTEX(&chan->wsem); 578 sema_init(&chan->wsem, 1);
579 579
580 /* Register the network interface */ 580 /* Register the network interface */
581 if (!(chan->netdev = alloc_hdlcdev(chan))) { 581 if (!(chan->netdev = alloc_hdlcdev(chan))) {
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index dffa5d4fb298..a2d9d1e59260 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma,
306 spin_lock_init(&tmp->pardevice_lock); 306 spin_lock_init(&tmp->pardevice_lock);
307 tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; 307 tmp->ieee1284.mode = IEEE1284_MODE_COMPAT;
308 tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; 308 tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
309 init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */ 309 sema_init(&tmp->ieee1284.irq, 0);
310 tmp->spintime = parport_default_spintime; 310 tmp->spintime = parport_default_spintime;
311 atomic_set (&tmp->ref_count, 1); 311 atomic_set (&tmp->ref_count, 1);
312 INIT_LIST_HEAD(&tmp->full_list); 312 INIT_LIST_HEAD(&tmp->full_list);
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0a19708074c2..3de3a436a432 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1221,9 +1221,9 @@ const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1221 } 1221 }
1222} 1222}
1223 1223
1224void dmar_msi_unmask(unsigned int irq) 1224void dmar_msi_unmask(struct irq_data *data)
1225{ 1225{
1226 struct intel_iommu *iommu = get_irq_data(irq); 1226 struct intel_iommu *iommu = irq_data_get_irq_data(data);
1227 unsigned long flag; 1227 unsigned long flag;
1228 1228
1229 /* unmask it */ 1229 /* unmask it */
@@ -1234,10 +1234,10 @@ void dmar_msi_unmask(unsigned int irq)
1234 spin_unlock_irqrestore(&iommu->register_lock, flag); 1234 spin_unlock_irqrestore(&iommu->register_lock, flag);
1235} 1235}
1236 1236
1237void dmar_msi_mask(unsigned int irq) 1237void dmar_msi_mask(struct irq_data *data)
1238{ 1238{
1239 unsigned long flag; 1239 unsigned long flag;
1240 struct intel_iommu *iommu = get_irq_data(irq); 1240 struct intel_iommu *iommu = irq_data_get_irq_data(data);
1241 1241
1242 /* mask it */ 1242 /* mask it */
1243 spin_lock_irqsave(&iommu->register_lock, flag); 1243 spin_lock_irqsave(&iommu->register_lock, flag);
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 98abf8b91294..834842aa5bbf 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -57,28 +57,22 @@ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
57 *msg = cfg->msg; 57 *msg = cfg->msg;
58} 58}
59 59
60void mask_ht_irq(unsigned int irq) 60void mask_ht_irq(struct irq_data *data)
61{ 61{
62 struct ht_irq_cfg *cfg; 62 struct ht_irq_cfg *cfg = irq_data_get_irq_data(data);
63 struct ht_irq_msg msg; 63 struct ht_irq_msg msg = cfg->msg;
64
65 cfg = get_irq_data(irq);
66 64
67 msg = cfg->msg;
68 msg.address_lo |= 1; 65 msg.address_lo |= 1;
69 write_ht_irq_msg(irq, &msg); 66 write_ht_irq_msg(data->irq, &msg);
70} 67}
71 68
72void unmask_ht_irq(unsigned int irq) 69void unmask_ht_irq(struct irq_data *data)
73{ 70{
74 struct ht_irq_cfg *cfg; 71 struct ht_irq_cfg *cfg = irq_data_get_irq_data(data);
75 struct ht_irq_msg msg; 72 struct ht_irq_msg msg = cfg->msg;
76
77 cfg = get_irq_data(irq);
78 73
79 msg = cfg->msg;
80 msg.address_lo &= ~1; 74 msg.address_lo &= ~1;
81 write_ht_irq_msg(irq, &msg); 75 write_ht_irq_msg(data->irq, &msg);
82} 76}
83 77
84/** 78/**
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index fd1d2867cdcc..ec87cd66f3eb 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -46,109 +46,24 @@ static __init int setup_intremap(char *str)
46} 46}
47early_param("intremap", setup_intremap); 47early_param("intremap", setup_intremap);
48 48
49struct irq_2_iommu {
50 struct intel_iommu *iommu;
51 u16 irte_index;
52 u16 sub_handle;
53 u8 irte_mask;
54};
55
56#ifdef CONFIG_GENERIC_HARDIRQS
57static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
58{
59 struct irq_2_iommu *iommu;
60
61 iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
62 printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
63
64 return iommu;
65}
66
67static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
68{
69 struct irq_desc *desc;
70
71 desc = irq_to_desc(irq);
72
73 if (WARN_ON_ONCE(!desc))
74 return NULL;
75
76 return desc->irq_2_iommu;
77}
78
79static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
80{
81 struct irq_desc *desc;
82 struct irq_2_iommu *irq_iommu;
83
84 desc = irq_to_desc(irq);
85 if (!desc) {
86 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
87 return NULL;
88 }
89
90 irq_iommu = desc->irq_2_iommu;
91
92 if (!irq_iommu)
93 desc->irq_2_iommu = get_one_free_irq_2_iommu(irq_node(irq));
94
95 return desc->irq_2_iommu;
96}
97
98#else /* !CONFIG_SPARSE_IRQ */
99
100static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
101
102static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
103{
104 if (irq < nr_irqs)
105 return &irq_2_iommuX[irq];
106
107 return NULL;
108}
109static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
110{
111 return irq_2_iommu(irq);
112}
113#endif
114
115static DEFINE_SPINLOCK(irq_2_ir_lock); 49static DEFINE_SPINLOCK(irq_2_ir_lock);
116 50
117static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq) 51static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
118{
119 struct irq_2_iommu *irq_iommu;
120
121 irq_iommu = irq_2_iommu(irq);
122
123 if (!irq_iommu)
124 return NULL;
125
126 if (!irq_iommu->iommu)
127 return NULL;
128
129 return irq_iommu;
130}
131
132int irq_remapped(int irq)
133{ 52{
134 return valid_irq_2_iommu(irq) != NULL; 53 struct irq_cfg *cfg = get_irq_chip_data(irq);
54 return cfg ? &cfg->irq_2_iommu : NULL;
135} 55}
136 56
137int get_irte(int irq, struct irte *entry) 57int get_irte(int irq, struct irte *entry)
138{ 58{
139 int index; 59 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
140 struct irq_2_iommu *irq_iommu;
141 unsigned long flags; 60 unsigned long flags;
61 int index;
142 62
143 if (!entry) 63 if (!entry || !irq_iommu)
144 return -1; 64 return -1;
145 65
146 spin_lock_irqsave(&irq_2_ir_lock, flags); 66 spin_lock_irqsave(&irq_2_ir_lock, flags);
147 irq_iommu = valid_irq_2_iommu(irq);
148 if (!irq_iommu) {
149 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
150 return -1;
151 }
152 67
153 index = irq_iommu->irte_index + irq_iommu->sub_handle; 68 index = irq_iommu->irte_index + irq_iommu->sub_handle;
154 *entry = *(irq_iommu->iommu->ir_table->base + index); 69 *entry = *(irq_iommu->iommu->ir_table->base + index);
@@ -160,20 +75,14 @@ int get_irte(int irq, struct irte *entry)
160int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) 75int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
161{ 76{
162 struct ir_table *table = iommu->ir_table; 77 struct ir_table *table = iommu->ir_table;
163 struct irq_2_iommu *irq_iommu; 78 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
164 u16 index, start_index; 79 u16 index, start_index;
165 unsigned int mask = 0; 80 unsigned int mask = 0;
166 unsigned long flags; 81 unsigned long flags;
167 int i; 82 int i;
168 83
169 if (!count) 84 if (!count || !irq_iommu)
170 return -1;
171
172#ifndef CONFIG_SPARSE_IRQ
173 /* protect irq_2_iommu_alloc later */
174 if (irq >= nr_irqs)
175 return -1; 85 return -1;
176#endif
177 86
178 /* 87 /*
179 * start the IRTE search from index 0. 88 * start the IRTE search from index 0.
@@ -214,13 +123,6 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
214 for (i = index; i < index + count; i++) 123 for (i = index; i < index + count; i++)
215 table->base[i].present = 1; 124 table->base[i].present = 1;
216 125
217 irq_iommu = irq_2_iommu_alloc(irq);
218 if (!irq_iommu) {
219 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
220 printk(KERN_ERR "can't allocate irq_2_iommu\n");
221 return -1;
222 }
223
224 irq_iommu->iommu = iommu; 126 irq_iommu->iommu = iommu;
225 irq_iommu->irte_index = index; 127 irq_iommu->irte_index = index;
226 irq_iommu->sub_handle = 0; 128 irq_iommu->sub_handle = 0;
@@ -244,17 +146,14 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
244 146
245int map_irq_to_irte_handle(int irq, u16 *sub_handle) 147int map_irq_to_irte_handle(int irq, u16 *sub_handle)
246{ 148{
247 int index; 149 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
248 struct irq_2_iommu *irq_iommu;
249 unsigned long flags; 150 unsigned long flags;
151 int index;
250 152
251 spin_lock_irqsave(&irq_2_ir_lock, flags); 153 if (!irq_iommu)
252 irq_iommu = valid_irq_2_iommu(irq);
253 if (!irq_iommu) {
254 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
255 return -1; 154 return -1;
256 }
257 155
156 spin_lock_irqsave(&irq_2_ir_lock, flags);
258 *sub_handle = irq_iommu->sub_handle; 157 *sub_handle = irq_iommu->sub_handle;
259 index = irq_iommu->irte_index; 158 index = irq_iommu->irte_index;
260 spin_unlock_irqrestore(&irq_2_ir_lock, flags); 159 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -263,18 +162,13 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
263 162
264int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) 163int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
265{ 164{
266 struct irq_2_iommu *irq_iommu; 165 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
267 unsigned long flags; 166 unsigned long flags;
268 167
269 spin_lock_irqsave(&irq_2_ir_lock, flags); 168 if (!irq_iommu)
270
271 irq_iommu = irq_2_iommu_alloc(irq);
272
273 if (!irq_iommu) {
274 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
275 printk(KERN_ERR "can't allocate irq_2_iommu\n");
276 return -1; 169 return -1;
277 } 170
171 spin_lock_irqsave(&irq_2_ir_lock, flags);
278 172
279 irq_iommu->iommu = iommu; 173 irq_iommu->iommu = iommu;
280 irq_iommu->irte_index = index; 174 irq_iommu->irte_index = index;
@@ -286,43 +180,18 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
286 return 0; 180 return 0;
287} 181}
288 182
289int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
290{
291 struct irq_2_iommu *irq_iommu;
292 unsigned long flags;
293
294 spin_lock_irqsave(&irq_2_ir_lock, flags);
295 irq_iommu = valid_irq_2_iommu(irq);
296 if (!irq_iommu) {
297 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
298 return -1;
299 }
300
301 irq_iommu->iommu = NULL;
302 irq_iommu->irte_index = 0;
303 irq_iommu->sub_handle = 0;
304 irq_2_iommu(irq)->irte_mask = 0;
305
306 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
307
308 return 0;
309}
310
311int modify_irte(int irq, struct irte *irte_modified) 183int modify_irte(int irq, struct irte *irte_modified)
312{ 184{
313 int rc; 185 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
314 int index;
315 struct irte *irte;
316 struct intel_iommu *iommu; 186 struct intel_iommu *iommu;
317 struct irq_2_iommu *irq_iommu;
318 unsigned long flags; 187 unsigned long flags;
188 struct irte *irte;
189 int rc, index;
319 190
320 spin_lock_irqsave(&irq_2_ir_lock, flags); 191 if (!irq_iommu)
321 irq_iommu = valid_irq_2_iommu(irq);
322 if (!irq_iommu) {
323 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
324 return -1; 192 return -1;
325 } 193
194 spin_lock_irqsave(&irq_2_ir_lock, flags);
326 195
327 iommu = irq_iommu->iommu; 196 iommu = irq_iommu->iommu;
328 197
@@ -339,31 +208,6 @@ int modify_irte(int irq, struct irte *irte_modified)
339 return rc; 208 return rc;
340} 209}
341 210
342int flush_irte(int irq)
343{
344 int rc;
345 int index;
346 struct intel_iommu *iommu;
347 struct irq_2_iommu *irq_iommu;
348 unsigned long flags;
349
350 spin_lock_irqsave(&irq_2_ir_lock, flags);
351 irq_iommu = valid_irq_2_iommu(irq);
352 if (!irq_iommu) {
353 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
354 return -1;
355 }
356
357 iommu = irq_iommu->iommu;
358
359 index = irq_iommu->irte_index + irq_iommu->sub_handle;
360
361 rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
362 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
363
364 return rc;
365}
366
367struct intel_iommu *map_hpet_to_ir(u8 hpet_id) 211struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
368{ 212{
369 int i; 213 int i;
@@ -420,16 +264,14 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
420 264
421int free_irte(int irq) 265int free_irte(int irq)
422{ 266{
423 int rc = 0; 267 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
424 struct irq_2_iommu *irq_iommu;
425 unsigned long flags; 268 unsigned long flags;
269 int rc;
426 270
427 spin_lock_irqsave(&irq_2_ir_lock, flags); 271 if (!irq_iommu)
428 irq_iommu = valid_irq_2_iommu(irq);
429 if (!irq_iommu) {
430 spin_unlock_irqrestore(&irq_2_ir_lock, flags);
431 return -1; 272 return -1;
432 } 273
274 spin_lock_irqsave(&irq_2_ir_lock, flags);
433 275
434 rc = clear_entries(irq_iommu); 276 rc = clear_entries(irq_iommu);
435 277
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 69b7be33b3a2..5fcf5aec680f 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -170,33 +170,31 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag)
170 desc->masked = __msix_mask_irq(desc, flag); 170 desc->masked = __msix_mask_irq(desc, flag);
171} 171}
172 172
173static void msi_set_mask_bit(unsigned irq, u32 flag) 173static void msi_set_mask_bit(struct irq_data *data, u32 flag)
174{ 174{
175 struct msi_desc *desc = get_irq_msi(irq); 175 struct msi_desc *desc = irq_data_get_msi(data);
176 176
177 if (desc->msi_attrib.is_msix) { 177 if (desc->msi_attrib.is_msix) {
178 msix_mask_irq(desc, flag); 178 msix_mask_irq(desc, flag);
179 readl(desc->mask_base); /* Flush write to device */ 179 readl(desc->mask_base); /* Flush write to device */
180 } else { 180 } else {
181 unsigned offset = irq - desc->dev->irq; 181 unsigned offset = data->irq - desc->dev->irq;
182 msi_mask_irq(desc, 1 << offset, flag << offset); 182 msi_mask_irq(desc, 1 << offset, flag << offset);
183 } 183 }
184} 184}
185 185
186void mask_msi_irq(unsigned int irq) 186void mask_msi_irq(struct irq_data *data)
187{ 187{
188 msi_set_mask_bit(irq, 1); 188 msi_set_mask_bit(data, 1);
189} 189}
190 190
191void unmask_msi_irq(unsigned int irq) 191void unmask_msi_irq(struct irq_data *data)
192{ 192{
193 msi_set_mask_bit(irq, 0); 193 msi_set_mask_bit(data, 0);
194} 194}
195 195
196void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) 196void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
197{ 197{
198 struct msi_desc *entry = get_irq_desc_msi(desc);
199
200 BUG_ON(entry->dev->current_state != PCI_D0); 198 BUG_ON(entry->dev->current_state != PCI_D0);
201 199
202 if (entry->msi_attrib.is_msix) { 200 if (entry->msi_attrib.is_msix) {
@@ -227,15 +225,13 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
227 225
228void read_msi_msg(unsigned int irq, struct msi_msg *msg) 226void read_msi_msg(unsigned int irq, struct msi_msg *msg)
229{ 227{
230 struct irq_desc *desc = irq_to_desc(irq); 228 struct msi_desc *entry = get_irq_msi(irq);
231 229
232 read_msi_msg_desc(desc, msg); 230 __read_msi_msg(entry, msg);
233} 231}
234 232
235void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) 233void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
236{ 234{
237 struct msi_desc *entry = get_irq_desc_msi(desc);
238
239 /* Assert that the cache is valid, assuming that 235 /* Assert that the cache is valid, assuming that
240 * valid messages are not all-zeroes. */ 236 * valid messages are not all-zeroes. */
241 BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | 237 BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
@@ -246,15 +242,13 @@ void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
246 242
247void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) 243void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
248{ 244{
249 struct irq_desc *desc = irq_to_desc(irq); 245 struct msi_desc *entry = get_irq_msi(irq);
250 246
251 get_cached_msi_msg_desc(desc, msg); 247 __get_cached_msi_msg(entry, msg);
252} 248}
253 249
254void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) 250void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
255{ 251{
256 struct msi_desc *entry = get_irq_desc_msi(desc);
257
258 if (entry->dev->current_state != PCI_D0) { 252 if (entry->dev->current_state != PCI_D0) {
259 /* Don't touch the hardware now */ 253 /* Don't touch the hardware now */
260 } else if (entry->msi_attrib.is_msix) { 254 } else if (entry->msi_attrib.is_msix) {
@@ -292,9 +286,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
292 286
293void write_msi_msg(unsigned int irq, struct msi_msg *msg) 287void write_msi_msg(unsigned int irq, struct msi_msg *msg)
294{ 288{
295 struct irq_desc *desc = irq_to_desc(irq); 289 struct msi_desc *entry = get_irq_msi(irq);
296 290
297 write_msi_msg_desc(desc, msg); 291 __write_msi_msg(entry, msg);
298} 292}
299 293
300static void free_msi_irqs(struct pci_dev *dev) 294static void free_msi_irqs(struct pci_dev *dev)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7c8008225ee3..17927b1f9334 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net)
127 size_t len, total_len = 0; 127 size_t len, total_len = 0;
128 int err, wmem; 128 int err, wmem;
129 size_t hdr_size; 129 size_t hdr_size;
130 struct socket *sock = rcu_dereference(vq->private_data); 130 struct socket *sock;
131
132 sock = rcu_dereference_check(vq->private_data,
133 lockdep_is_held(&vq->mutex));
131 if (!sock) 134 if (!sock)
132 return; 135 return;
133 136
@@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n,
582static void vhost_net_enable_vq(struct vhost_net *n, 585static void vhost_net_enable_vq(struct vhost_net *n,
583 struct vhost_virtqueue *vq) 586 struct vhost_virtqueue *vq)
584{ 587{
585 struct socket *sock = vq->private_data; 588 struct socket *sock;
589
590 sock = rcu_dereference_protected(vq->private_data,
591 lockdep_is_held(&vq->mutex));
586 if (!sock) 592 if (!sock)
587 return; 593 return;
588 if (vq == n->vqs + VHOST_NET_VQ_TX) { 594 if (vq == n->vqs + VHOST_NET_VQ_TX) {
@@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
598 struct socket *sock; 604 struct socket *sock;
599 605
600 mutex_lock(&vq->mutex); 606 mutex_lock(&vq->mutex);
601 sock = vq->private_data; 607 sock = rcu_dereference_protected(vq->private_data,
608 lockdep_is_held(&vq->mutex));
602 vhost_net_disable_vq(n, vq); 609 vhost_net_disable_vq(n, vq);
603 rcu_assign_pointer(vq->private_data, NULL); 610 rcu_assign_pointer(vq->private_data, NULL);
604 mutex_unlock(&vq->mutex); 611 mutex_unlock(&vq->mutex);
@@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
736 } 743 }
737 744
738 /* start polling new socket */ 745 /* start polling new socket */
739 oldsock = vq->private_data; 746 oldsock = rcu_dereference_protected(vq->private_data,
747 lockdep_is_held(&vq->mutex));
740 if (sock != oldsock) { 748 if (sock != oldsock) {
741 vhost_net_disable_vq(n, vq); 749 vhost_net_disable_vq(n, vq);
742 rcu_assign_pointer(vq->private_data, sock); 750 rcu_assign_pointer(vq->private_data, sock);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index dd3d6f7406f8..8b5a1b33d0fe 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -320,7 +320,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
320 vhost_dev_cleanup(dev); 320 vhost_dev_cleanup(dev);
321 321
322 memory->nregions = 0; 322 memory->nregions = 0;
323 dev->memory = memory; 323 RCU_INIT_POINTER(dev->memory, memory);
324 return 0; 324 return 0;
325} 325}
326 326
@@ -352,8 +352,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
352 fput(dev->log_file); 352 fput(dev->log_file);
353 dev->log_file = NULL; 353 dev->log_file = NULL;
354 /* No one will access memory at this point */ 354 /* No one will access memory at this point */
355 kfree(dev->memory); 355 kfree(rcu_dereference_protected(dev->memory,
356 dev->memory = NULL; 356 lockdep_is_held(&dev->mutex)));
357 RCU_INIT_POINTER(dev->memory, NULL);
357 if (dev->mm) 358 if (dev->mm)
358 mmput(dev->mm); 359 mmput(dev->mm);
359 dev->mm = NULL; 360 dev->mm = NULL;
@@ -440,14 +441,22 @@ static int vq_access_ok(unsigned int num,
440/* Caller should have device mutex but not vq mutex */ 441/* Caller should have device mutex but not vq mutex */
441int vhost_log_access_ok(struct vhost_dev *dev) 442int vhost_log_access_ok(struct vhost_dev *dev)
442{ 443{
443 return memory_access_ok(dev, dev->memory, 1); 444 struct vhost_memory *mp;
445
446 mp = rcu_dereference_protected(dev->memory,
447 lockdep_is_held(&dev->mutex));
448 return memory_access_ok(dev, mp, 1);
444} 449}
445 450
446/* Verify access for write logging. */ 451/* Verify access for write logging. */
447/* Caller should have vq mutex and device mutex */ 452/* Caller should have vq mutex and device mutex */
448static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) 453static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
449{ 454{
450 return vq_memory_access_ok(log_base, vq->dev->memory, 455 struct vhost_memory *mp;
456
457 mp = rcu_dereference_protected(vq->dev->memory,
458 lockdep_is_held(&vq->mutex));
459 return vq_memory_access_ok(log_base, mp,
451 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && 460 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
452 (!vq->log_used || log_access_ok(log_base, vq->log_addr, 461 (!vq->log_used || log_access_ok(log_base, vq->log_addr,
453 sizeof *vq->used + 462 sizeof *vq->used +
@@ -487,7 +496,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
487 kfree(newmem); 496 kfree(newmem);
488 return -EFAULT; 497 return -EFAULT;
489 } 498 }
490 oldmem = d->memory; 499 oldmem = rcu_dereference_protected(d->memory,
500 lockdep_is_held(&d->mutex));
491 rcu_assign_pointer(d->memory, newmem); 501 rcu_assign_pointer(d->memory, newmem);
492 synchronize_rcu(); 502 synchronize_rcu();
493 kfree(oldmem); 503 kfree(oldmem);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index afd77295971c..af3c11ded5fd 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,7 +106,7 @@ struct vhost_virtqueue {
106 * vhost_work execution acts instead of rcu_read_lock() and the end of 106 * vhost_work execution acts instead of rcu_read_lock() and the end of
107 * vhost_work execution acts instead of rcu_read_lock(). 107 * vhost_work execution acts instead of rcu_read_lock().
108 * Writers use virtqueue mutex. */ 108 * Writers use virtqueue mutex. */
109 void *private_data; 109 void __rcu *private_data;
110 /* Log write descriptors */ 110 /* Log write descriptors */
111 void __user *log_base; 111 void __user *log_base;
112 struct vhost_log log[VHOST_NET_MAX_SG]; 112 struct vhost_log log[VHOST_NET_MAX_SG];
@@ -116,7 +116,7 @@ struct vhost_dev {
116 /* Readers use RCU to access memory table pointer 116 /* Readers use RCU to access memory table pointer
117 * log base pointer and features. 117 * log base pointer and features.
118 * Writers use mutex below.*/ 118 * Writers use mutex below.*/
119 struct vhost_memory *memory; 119 struct vhost_memory __rcu *memory;
120 struct mm_struct *mm; 120 struct mm_struct *mm;
121 struct mutex mutex; 121 struct mutex mutex;
122 unsigned acked_features; 122 unsigned acked_features;
@@ -173,7 +173,11 @@ enum {
173 173
174static inline int vhost_has_feature(struct vhost_dev *dev, int bit) 174static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
175{ 175{
176 unsigned acked_features = rcu_dereference(dev->acked_features); 176 unsigned acked_features;
177
178 acked_features =
179 rcu_dereference_index_check(dev->acked_features,
180 lockdep_is_held(&dev->mutex));
177 return acked_features & (1 << bit); 181 return acked_features & (1 << bit);
178} 182}
179 183
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 13365ba35218..7d24b0d94ed4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -338,30 +338,29 @@ static void unmask_evtchn(int port)
338 338
339static int find_unbound_irq(void) 339static int find_unbound_irq(void)
340{ 340{
341 int irq; 341 struct irq_data *data;
342 struct irq_desc *desc; 342 int irq, res;
343 343
344 for (irq = 0; irq < nr_irqs; irq++) { 344 for (irq = 0; irq < nr_irqs; irq++) {
345 desc = irq_to_desc(irq); 345 data = irq_get_irq_data(irq);
346 /* only 0->15 have init'd desc; handle irq > 16 */ 346 /* only 0->15 have init'd desc; handle irq > 16 */
347 if (desc == NULL) 347 if (!data)
348 break; 348 break;
349 if (desc->chip == &no_irq_chip) 349 if (data->chip == &no_irq_chip)
350 break; 350 break;
351 if (desc->chip != &xen_dynamic_chip) 351 if (data->chip != &xen_dynamic_chip)
352 continue; 352 continue;
353 if (irq_info[irq].type == IRQT_UNBOUND) 353 if (irq_info[irq].type == IRQT_UNBOUND)
354 break; 354 return irq;
355 } 355 }
356 356
357 if (irq == nr_irqs) 357 if (irq == nr_irqs)
358 panic("No available IRQ to bind to: increase nr_irqs!\n"); 358 panic("No available IRQ to bind to: increase nr_irqs!\n");
359 359
360 desc = irq_to_desc_alloc_node(irq, 0); 360 res = irq_alloc_desc_at(irq, 0);
361 if (WARN_ON(desc == NULL))
362 return -1;
363 361
364 dynamic_irq_init_keep_chip_data(irq); 362 if (WARN_ON(res != irq))
363 return -1;
365 364
366 return irq; 365 return irq;
367} 366}
@@ -495,7 +494,7 @@ static void unbind_from_irq(unsigned int irq)
495 if (irq_info[irq].type != IRQT_UNBOUND) { 494 if (irq_info[irq].type != IRQT_UNBOUND) {
496 irq_info[irq] = mk_unbound_info(); 495 irq_info[irq] = mk_unbound_info();
497 496
498 dynamic_irq_cleanup(irq); 497 irq_free_desc(irq);
499 } 498 }
500 499
501 spin_unlock(&irq_mapping_update_lock); 500 spin_unlock(&irq_mapping_update_lock);