aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci
diff options
context:
space:
mode:
authorMark Maule <maule@sgi.com>2006-04-10 22:17:48 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2006-06-21 14:59:58 -0400
commitfd58e55fcf5568e51da2ed54d7acd049c3fdb184 (patch)
tree2cf41864d66b8db39f637549d4652c7664256155 /drivers/pci
parentc34b4c734482dda750deb6089521f7c891b48736 (diff)
[PATCH] PCI: msi abstractions and support for altix
Abstract portions of the MSI core for platforms that do not use standard APIC interrupt controllers. This is implemented through a new arch-specific msi setup routine, and a set of msi ops which can be set on a per platform basis. Signed-off-by: Mark Maule <maule@sgi.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/Makefile6
-rw-r--r--drivers/pci/msi-altix.c18
-rw-r--r--drivers/pci/msi-apic.c100
-rw-r--r--drivers/pci/msi.c212
-rw-r--r--drivers/pci/msi.h133
5 files changed, 325 insertions, 144 deletions
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 6707df96893..f2d152b818f 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,7 +26,11 @@ obj-$(CONFIG_PPC32) += setup-irq.o
26obj-$(CONFIG_PPC64) += setup-bus.o 26obj-$(CONFIG_PPC64) += setup-bus.o
27obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o 27obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
28obj-$(CONFIG_X86_VISWS) += setup-irq.o 28obj-$(CONFIG_X86_VISWS) += setup-irq.o
29obj-$(CONFIG_PCI_MSI) += msi.o 29
30msiobj-y := msi.o msi-apic.o
31msiobj-$(CONFIG_IA64_GENERIC) += msi-altix.o
32msiobj-$(CONFIG_IA64_SGI_SN2) += msi-altix.o
33obj-$(CONFIG_PCI_MSI) += $(msiobj-y)
30 34
31# 35#
32# ACPI Related PCI FW Functions 36# ACPI Related PCI FW Functions
diff --git a/drivers/pci/msi-altix.c b/drivers/pci/msi-altix.c
new file mode 100644
index 00000000000..9bd240602c1
--- /dev/null
+++ b/drivers/pci/msi-altix.c
@@ -0,0 +1,18 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2006 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9#include <asm/errno.h>
10
11int
12sn_msi_init(void)
13{
14 /*
15 * return error until MSI is supported on altix platforms
16 */
17 return -EINVAL;
18}
diff --git a/drivers/pci/msi-apic.c b/drivers/pci/msi-apic.c
new file mode 100644
index 00000000000..0eb5fe9003a
--- /dev/null
+++ b/drivers/pci/msi-apic.c
@@ -0,0 +1,100 @@
1/*
2 * MSI hooks for standard x86 apic
3 */
4
5#include <linux/pci.h>
6#include <linux/irq.h>
7
8#include "msi.h"
9
10/*
11 * Shifts for APIC-based data
12 */
13
14#define MSI_DATA_VECTOR_SHIFT 0
15#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
16
17#define MSI_DATA_DELIVERY_SHIFT 8
18#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
19#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT)
20
21#define MSI_DATA_LEVEL_SHIFT 14
22#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
23#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
24
25#define MSI_DATA_TRIGGER_SHIFT 15
26#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
27#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
28
29/*
30 * Shift/mask fields for APIC-based bus address
31 */
32
33#define MSI_ADDR_HEADER 0xfee00000
34
35#define MSI_ADDR_DESTID_MASK 0xfff0000f
36#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT)
37
38#define MSI_ADDR_DESTMODE_SHIFT 2
39#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT)
40#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
41
42#define MSI_ADDR_REDIRECTION_SHIFT 3
43#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
44#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
45
46
47static void
48msi_target_apic(unsigned int vector,
49 unsigned int dest_cpu,
50 u32 *address_hi, /* in/out */
51 u32 *address_lo) /* in/out */
52{
53 u32 addr = *address_lo;
54
55 addr &= MSI_ADDR_DESTID_MASK;
56 addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(dest_cpu));
57
58 *address_lo = addr;
59}
60
61static int
62msi_setup_apic(struct pci_dev *pdev, /* unused in generic */
63 unsigned int vector,
64 u32 *address_hi,
65 u32 *address_lo,
66 u32 *data)
67{
68 unsigned long dest_phys_id;
69
70 dest_phys_id = cpu_physical_id(first_cpu(cpu_online_map));
71
72 *address_hi = 0;
73 *address_lo = MSI_ADDR_HEADER |
74 MSI_ADDR_DESTMODE_PHYS |
75 MSI_ADDR_REDIRECTION_CPU |
76 MSI_ADDR_DESTID_CPU(dest_phys_id);
77
78 *data = MSI_DATA_TRIGGER_EDGE |
79 MSI_DATA_LEVEL_ASSERT |
80 MSI_DATA_DELIVERY_FIXED |
81 MSI_DATA_VECTOR(vector);
82
83 return 0;
84}
85
86static void
87msi_teardown_apic(unsigned int vector)
88{
89 return; /* no-op */
90}
91
92/*
93 * Generic ops used on most IA archs/platforms. Set with msi_register()
94 */
95
96struct msi_ops msi_apic_ops = {
97 .setup = msi_setup_apic,
98 .teardown = msi_teardown_apic,
99 .target = msi_target_apic,
100};
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 9855c4c920b..55ff52df5fe 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -23,8 +23,6 @@
23#include "pci.h" 23#include "pci.h"
24#include "msi.h" 24#include "msi.h"
25 25
26#define MSI_TARGET_CPU first_cpu(cpu_online_map)
27
28static DEFINE_SPINLOCK(msi_lock); 26static DEFINE_SPINLOCK(msi_lock);
29static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL }; 27static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
30static kmem_cache_t* msi_cachep; 28static kmem_cache_t* msi_cachep;
@@ -40,6 +38,15 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
40u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; 38u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
41#endif 39#endif
42 40
41static struct msi_ops *msi_ops;
42
43int
44msi_register(struct msi_ops *ops)
45{
46 msi_ops = ops;
47 return 0;
48}
49
43static void msi_cache_ctor(void *p, kmem_cache_t *cache, unsigned long flags) 50static void msi_cache_ctor(void *p, kmem_cache_t *cache, unsigned long flags)
44{ 51{
45 memset(p, 0, NR_IRQS * sizeof(struct msi_desc)); 52 memset(p, 0, NR_IRQS * sizeof(struct msi_desc));
@@ -92,7 +99,7 @@ static void msi_set_mask_bit(unsigned int vector, int flag)
92static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) 99static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
93{ 100{
94 struct msi_desc *entry; 101 struct msi_desc *entry;
95 struct msg_address address; 102 u32 address_hi, address_lo;
96 unsigned int irq = vector; 103 unsigned int irq = vector;
97 unsigned int dest_cpu = first_cpu(cpu_mask); 104 unsigned int dest_cpu = first_cpu(cpu_mask);
98 105
@@ -108,28 +115,36 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
108 if (!pos) 115 if (!pos)
109 return; 116 return;
110 117
118 pci_read_config_dword(entry->dev, msi_upper_address_reg(pos),
119 &address_hi);
111 pci_read_config_dword(entry->dev, msi_lower_address_reg(pos), 120 pci_read_config_dword(entry->dev, msi_lower_address_reg(pos),
112 &address.lo_address.value); 121 &address_lo);
113 address.lo_address.value &= MSI_ADDRESS_DEST_ID_MASK; 122
114 address.lo_address.value |= (cpu_physical_id(dest_cpu) << 123 msi_ops->target(vector, dest_cpu, &address_hi, &address_lo);
115 MSI_TARGET_CPU_SHIFT); 124
116 entry->msi_attrib.current_cpu = cpu_physical_id(dest_cpu); 125 pci_write_config_dword(entry->dev, msi_upper_address_reg(pos),
126 address_hi);
117 pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), 127 pci_write_config_dword(entry->dev, msi_lower_address_reg(pos),
118 address.lo_address.value); 128 address_lo);
119 set_native_irq_info(irq, cpu_mask); 129 set_native_irq_info(irq, cpu_mask);
120 break; 130 break;
121 } 131 }
122 case PCI_CAP_ID_MSIX: 132 case PCI_CAP_ID_MSIX:
123 { 133 {
124 int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + 134 int offset_hi =
125 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET; 135 entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
126 136 PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET;
127 address.lo_address.value = readl(entry->mask_base + offset); 137 int offset_lo =
128 address.lo_address.value &= MSI_ADDRESS_DEST_ID_MASK; 138 entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
129 address.lo_address.value |= (cpu_physical_id(dest_cpu) << 139 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET;
130 MSI_TARGET_CPU_SHIFT); 140
131 entry->msi_attrib.current_cpu = cpu_physical_id(dest_cpu); 141 address_hi = readl(entry->mask_base + offset_hi);
132 writel(address.lo_address.value, entry->mask_base + offset); 142 address_lo = readl(entry->mask_base + offset_lo);
143
144 msi_ops->target(vector, dest_cpu, &address_hi, &address_lo);
145
146 writel(address_hi, entry->mask_base + offset_hi);
147 writel(address_lo, entry->mask_base + offset_lo);
133 set_native_irq_info(irq, cpu_mask); 148 set_native_irq_info(irq, cpu_mask);
134 break; 149 break;
135 } 150 }
@@ -251,30 +266,6 @@ static struct hw_interrupt_type msi_irq_wo_maskbit_type = {
251 .set_affinity = set_msi_affinity 266 .set_affinity = set_msi_affinity
252}; 267};
253 268
254static void msi_data_init(struct msg_data *msi_data,
255 unsigned int vector)
256{
257 memset(msi_data, 0, sizeof(struct msg_data));
258 msi_data->vector = (u8)vector;
259 msi_data->delivery_mode = MSI_DELIVERY_MODE;
260 msi_data->level = MSI_LEVEL_MODE;
261 msi_data->trigger = MSI_TRIGGER_MODE;
262}
263
264static void msi_address_init(struct msg_address *msi_address)
265{
266 unsigned int dest_id;
267 unsigned long dest_phys_id = cpu_physical_id(MSI_TARGET_CPU);
268
269 memset(msi_address, 0, sizeof(struct msg_address));
270 msi_address->hi_address = (u32)0;
271 dest_id = (MSI_ADDRESS_HEADER << MSI_ADDRESS_HEADER_SHIFT);
272 msi_address->lo_address.u.dest_mode = MSI_PHYSICAL_MODE;
273 msi_address->lo_address.u.redirection_hint = MSI_REDIRECTION_HINT_MODE;
274 msi_address->lo_address.u.dest_id = dest_id;
275 msi_address->lo_address.value |= (dest_phys_id << MSI_TARGET_CPU_SHIFT);
276}
277
278static int msi_free_vector(struct pci_dev* dev, int vector, int reassign); 269static int msi_free_vector(struct pci_dev* dev, int vector, int reassign);
279static int assign_msi_vector(void) 270static int assign_msi_vector(void)
280{ 271{
@@ -369,13 +360,29 @@ static int msi_init(void)
369 return status; 360 return status;
370 } 361 }
371 362
363 status = msi_arch_init();
364 if (status < 0) {
365 pci_msi_enable = 0;
366 printk(KERN_WARNING
367 "PCI: MSI arch init failed. MSI disabled.\n");
368 return status;
369 }
370
371 if (! msi_ops) {
372 printk(KERN_WARNING
373 "PCI: MSI ops not registered. MSI disabled.\n");
374 status = -EINVAL;
375 return status;
376 }
377
378 last_alloc_vector = assign_irq_vector(AUTO_ASSIGN);
372 status = msi_cache_init(); 379 status = msi_cache_init();
373 if (status < 0) { 380 if (status < 0) {
374 pci_msi_enable = 0; 381 pci_msi_enable = 0;
375 printk(KERN_WARNING "PCI: MSI cache init failed\n"); 382 printk(KERN_WARNING "PCI: MSI cache init failed\n");
376 return status; 383 return status;
377 } 384 }
378 last_alloc_vector = assign_irq_vector(AUTO_ASSIGN); 385
379 if (last_alloc_vector < 0) { 386 if (last_alloc_vector < 0) {
380 pci_msi_enable = 0; 387 pci_msi_enable = 0;
381 printk(KERN_WARNING "PCI: No interrupt vectors available for MSI\n"); 388 printk(KERN_WARNING "PCI: No interrupt vectors available for MSI\n");
@@ -575,6 +582,8 @@ void pci_restore_msi_state(struct pci_dev *dev)
575int pci_save_msix_state(struct pci_dev *dev) 582int pci_save_msix_state(struct pci_dev *dev)
576{ 583{
577 int pos; 584 int pos;
585 int temp;
586 int vector, head, tail = 0;
578 u16 control; 587 u16 control;
579 struct pci_cap_saved_state *save_state; 588 struct pci_cap_saved_state *save_state;
580 589
@@ -582,6 +591,7 @@ int pci_save_msix_state(struct pci_dev *dev)
582 if (pos <= 0 || dev->no_msi) 591 if (pos <= 0 || dev->no_msi)
583 return 0; 592 return 0;
584 593
594 /* save the capability */
585 pci_read_config_word(dev, msi_control_reg(pos), &control); 595 pci_read_config_word(dev, msi_control_reg(pos), &control);
586 if (!(control & PCI_MSIX_FLAGS_ENABLE)) 596 if (!(control & PCI_MSIX_FLAGS_ENABLE))
587 return 0; 597 return 0;
@@ -593,6 +603,38 @@ int pci_save_msix_state(struct pci_dev *dev)
593 } 603 }
594 *((u16 *)&save_state->data[0]) = control; 604 *((u16 *)&save_state->data[0]) = control;
595 605
606 /* save the table */
607 temp = dev->irq;
608 if (msi_lookup_vector(dev, PCI_CAP_ID_MSIX)) {
609 kfree(save_state);
610 return -EINVAL;
611 }
612
613 vector = head = dev->irq;
614 while (head != tail) {
615 int j;
616 void __iomem *base;
617 struct msi_desc *entry;
618
619 entry = msi_desc[vector];
620 base = entry->mask_base;
621 j = entry->msi_attrib.entry_nr;
622
623 entry->address_lo_save =
624 readl(base + j * PCI_MSIX_ENTRY_SIZE +
625 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
626 entry->address_hi_save =
627 readl(base + j * PCI_MSIX_ENTRY_SIZE +
628 PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
629 entry->data_save =
630 readl(base + j * PCI_MSIX_ENTRY_SIZE +
631 PCI_MSIX_ENTRY_DATA_OFFSET);
632
633 tail = msi_desc[vector]->link.tail;
634 vector = tail;
635 }
636 dev->irq = temp;
637
596 disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); 638 disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
597 save_state->cap_nr = PCI_CAP_ID_MSIX; 639 save_state->cap_nr = PCI_CAP_ID_MSIX;
598 pci_add_saved_cap(dev, save_state); 640 pci_add_saved_cap(dev, save_state);
@@ -606,8 +648,6 @@ void pci_restore_msix_state(struct pci_dev *dev)
606 int vector, head, tail = 0; 648 int vector, head, tail = 0;
607 void __iomem *base; 649 void __iomem *base;
608 int j; 650 int j;
609 struct msg_address address;
610 struct msg_data data;
611 struct msi_desc *entry; 651 struct msi_desc *entry;
612 int temp; 652 int temp;
613 struct pci_cap_saved_state *save_state; 653 struct pci_cap_saved_state *save_state;
@@ -633,20 +673,13 @@ void pci_restore_msix_state(struct pci_dev *dev)
633 base = entry->mask_base; 673 base = entry->mask_base;
634 j = entry->msi_attrib.entry_nr; 674 j = entry->msi_attrib.entry_nr;
635 675
636 msi_address_init(&address); 676 writel(entry->address_lo_save,
637 msi_data_init(&data, vector);
638
639 address.lo_address.value &= MSI_ADDRESS_DEST_ID_MASK;
640 address.lo_address.value |= entry->msi_attrib.current_cpu <<
641 MSI_TARGET_CPU_SHIFT;
642
643 writel(address.lo_address.value,
644 base + j * PCI_MSIX_ENTRY_SIZE + 677 base + j * PCI_MSIX_ENTRY_SIZE +
645 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); 678 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
646 writel(address.hi_address, 679 writel(entry->address_hi_save,
647 base + j * PCI_MSIX_ENTRY_SIZE + 680 base + j * PCI_MSIX_ENTRY_SIZE +
648 PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); 681 PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
649 writel(*(u32*)&data, 682 writel(entry->data_save,
650 base + j * PCI_MSIX_ENTRY_SIZE + 683 base + j * PCI_MSIX_ENTRY_SIZE +
651 PCI_MSIX_ENTRY_DATA_OFFSET); 684 PCI_MSIX_ENTRY_DATA_OFFSET);
652 685
@@ -660,30 +693,32 @@ void pci_restore_msix_state(struct pci_dev *dev)
660} 693}
661#endif 694#endif
662 695
663static void msi_register_init(struct pci_dev *dev, struct msi_desc *entry) 696static int msi_register_init(struct pci_dev *dev, struct msi_desc *entry)
664{ 697{
665 struct msg_address address; 698 int status;
666 struct msg_data data; 699 u32 address_hi;
700 u32 address_lo;
701 u32 data;
667 int pos, vector = dev->irq; 702 int pos, vector = dev->irq;
668 u16 control; 703 u16 control;
669 704
670 pos = pci_find_capability(dev, PCI_CAP_ID_MSI); 705 pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
671 pci_read_config_word(dev, msi_control_reg(pos), &control); 706 pci_read_config_word(dev, msi_control_reg(pos), &control);
707
672 /* Configure MSI capability structure */ 708 /* Configure MSI capability structure */
673 msi_address_init(&address); 709 status = msi_ops->setup(dev, vector, &address_hi, &address_lo, &data);
674 msi_data_init(&data, vector); 710 if (status < 0)
675 entry->msi_attrib.current_cpu = ((address.lo_address.u.dest_id >> 711 return status;
676 MSI_TARGET_CPU_SHIFT) & MSI_TARGET_CPU_MASK); 712
677 pci_write_config_dword(dev, msi_lower_address_reg(pos), 713 pci_write_config_dword(dev, msi_lower_address_reg(pos), address_lo);
678 address.lo_address.value);
679 if (is_64bit_address(control)) { 714 if (is_64bit_address(control)) {
680 pci_write_config_dword(dev, 715 pci_write_config_dword(dev,
681 msi_upper_address_reg(pos), address.hi_address); 716 msi_upper_address_reg(pos), address_hi);
682 pci_write_config_word(dev, 717 pci_write_config_word(dev,
683 msi_data_reg(pos, 1), *((u32*)&data)); 718 msi_data_reg(pos, 1), data);
684 } else 719 } else
685 pci_write_config_word(dev, 720 pci_write_config_word(dev,
686 msi_data_reg(pos, 0), *((u32*)&data)); 721 msi_data_reg(pos, 0), data);
687 if (entry->msi_attrib.maskbit) { 722 if (entry->msi_attrib.maskbit) {
688 unsigned int maskbits, temp; 723 unsigned int maskbits, temp;
689 /* All MSIs are unmasked by default, Mask them all */ 724 /* All MSIs are unmasked by default, Mask them all */
@@ -697,6 +732,8 @@ static void msi_register_init(struct pci_dev *dev, struct msi_desc *entry)
697 msi_mask_bits_reg(pos, is_64bit_address(control)), 732 msi_mask_bits_reg(pos, is_64bit_address(control)),
698 maskbits); 733 maskbits);
699 } 734 }
735
736 return 0;
700} 737}
701 738
702/** 739/**
@@ -710,6 +747,7 @@ static void msi_register_init(struct pci_dev *dev, struct msi_desc *entry)
710 **/ 747 **/
711static int msi_capability_init(struct pci_dev *dev) 748static int msi_capability_init(struct pci_dev *dev)
712{ 749{
750 int status;
713 struct msi_desc *entry; 751 struct msi_desc *entry;
714 int pos, vector; 752 int pos, vector;
715 u16 control; 753 u16 control;
@@ -742,7 +780,12 @@ static int msi_capability_init(struct pci_dev *dev)
742 /* Replace with MSI handler */ 780 /* Replace with MSI handler */
743 irq_handler_init(PCI_CAP_ID_MSI, vector, entry->msi_attrib.maskbit); 781 irq_handler_init(PCI_CAP_ID_MSI, vector, entry->msi_attrib.maskbit);
744 /* Configure MSI capability structure */ 782 /* Configure MSI capability structure */
745 msi_register_init(dev, entry); 783 status = msi_register_init(dev, entry);
784 if (status != 0) {
785 dev->irq = entry->msi_attrib.default_vector;
786 kmem_cache_free(msi_cachep, entry);
787 return status;
788 }
746 789
747 attach_msi_entry(entry, vector); 790 attach_msi_entry(entry, vector);
748 /* Set MSI enabled bits */ 791 /* Set MSI enabled bits */
@@ -765,8 +808,10 @@ static int msix_capability_init(struct pci_dev *dev,
765 struct msix_entry *entries, int nvec) 808 struct msix_entry *entries, int nvec)
766{ 809{
767 struct msi_desc *head = NULL, *tail = NULL, *entry = NULL; 810 struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
768 struct msg_address address; 811 u32 address_hi;
769 struct msg_data data; 812 u32 address_lo;
813 u32 data;
814 int status;
770 int vector, pos, i, j, nr_entries, temp = 0; 815 int vector, pos, i, j, nr_entries, temp = 0;
771 unsigned long phys_addr; 816 unsigned long phys_addr;
772 u32 table_offset; 817 u32 table_offset;
@@ -822,18 +867,20 @@ static int msix_capability_init(struct pci_dev *dev,
822 /* Replace with MSI-X handler */ 867 /* Replace with MSI-X handler */
823 irq_handler_init(PCI_CAP_ID_MSIX, vector, 1); 868 irq_handler_init(PCI_CAP_ID_MSIX, vector, 1);
824 /* Configure MSI-X capability structure */ 869 /* Configure MSI-X capability structure */
825 msi_address_init(&address); 870 status = msi_ops->setup(dev, vector,
826 msi_data_init(&data, vector); 871 &address_hi,
827 entry->msi_attrib.current_cpu = 872 &address_lo,
828 ((address.lo_address.u.dest_id >> 873 &data);
829 MSI_TARGET_CPU_SHIFT) & MSI_TARGET_CPU_MASK); 874 if (status < 0)
830 writel(address.lo_address.value, 875 break;
876
877 writel(address_lo,
831 base + j * PCI_MSIX_ENTRY_SIZE + 878 base + j * PCI_MSIX_ENTRY_SIZE +
832 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); 879 PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
833 writel(address.hi_address, 880 writel(address_hi,
834 base + j * PCI_MSIX_ENTRY_SIZE + 881 base + j * PCI_MSIX_ENTRY_SIZE +
835 PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); 882 PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
836 writel(*(u32*)&data, 883 writel(data,
837 base + j * PCI_MSIX_ENTRY_SIZE + 884 base + j * PCI_MSIX_ENTRY_SIZE +
838 PCI_MSIX_ENTRY_DATA_OFFSET); 885 PCI_MSIX_ENTRY_DATA_OFFSET);
839 attach_msi_entry(entry, vector); 886 attach_msi_entry(entry, vector);
@@ -901,9 +948,10 @@ int pci_enable_msi(struct pci_dev* dev)
901 vector_irq[dev->irq] = -1; 948 vector_irq[dev->irq] = -1;
902 nr_released_vectors--; 949 nr_released_vectors--;
903 spin_unlock_irqrestore(&msi_lock, flags); 950 spin_unlock_irqrestore(&msi_lock, flags);
904 msi_register_init(dev, msi_desc[dev->irq]); 951 status = msi_register_init(dev, msi_desc[dev->irq]);
905 enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); 952 if (status == 0)
906 return 0; 953 enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
954 return status;
907 } 955 }
908 spin_unlock_irqrestore(&msi_lock, flags); 956 spin_unlock_irqrestore(&msi_lock, flags);
909 dev->irq = temp; 957 dev->irq = temp;
@@ -980,6 +1028,8 @@ static int msi_free_vector(struct pci_dev* dev, int vector, int reassign)
980 void __iomem *base; 1028 void __iomem *base;
981 unsigned long flags; 1029 unsigned long flags;
982 1030
1031 msi_ops->teardown(vector);
1032
983 spin_lock_irqsave(&msi_lock, flags); 1033 spin_lock_irqsave(&msi_lock, flags);
984 entry = msi_desc[vector]; 1034 entry = msi_desc[vector];
985 if (!entry || entry->dev != dev) { 1035 if (!entry || entry->dev != dev) {
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 4ac52d441e4..56951c39d3a 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -6,6 +6,68 @@
6#ifndef MSI_H 6#ifndef MSI_H
7#define MSI_H 7#define MSI_H
8 8
9/*
10 * MSI operation vector. Used by the msi core code (drivers/pci/msi.c)
11 * to abstract platform-specific tasks relating to MSI address generation
12 * and resource management.
13 */
14struct msi_ops {
15 /**
16 * setup - generate an MSI bus address and data for a given vector
17 * @pdev: PCI device context (in)
18 * @vector: vector allocated by the msi core (in)
19 * @addr_hi: upper 32 bits of PCI bus MSI address (out)
20 * @addr_lo: lower 32 bits of PCI bus MSI address (out)
21 * @data: MSI data payload (out)
22 *
23 * Description: The setup op is used to generate a PCI bus addres and
24 * data which the msi core will program into the card MSI capability
25 * registers. The setup routine is responsible for picking an initial
26 * cpu to target the MSI at. The setup routine is responsible for
27 * examining pdev to determine the MSI capabilities of the card and
28 * generating a suitable address/data. The setup routine is
29 * responsible for allocating and tracking any system resources it
30 * needs to route the MSI to the cpu it picks, and for associating
31 * those resources with the passed in vector.
32 *
33 * Returns 0 if the MSI address/data was successfully setup.
34 **/
35
36 int (*setup) (struct pci_dev *pdev, unsigned int vector,
37 u32 *addr_hi, u32 *addr_lo, u32 *data);
38
39 /**
40 * teardown - release resources allocated by setup
41 * @vector: vector context for resources (in)
42 *
43 * Description: The teardown op is used to release any resources
44 * that were allocated in the setup routine associated with the passed
45 * in vector.
46 **/
47
48 void (*teardown) (unsigned int vector);
49
50 /**
51 * target - retarget an MSI at a different cpu
52 * @vector: vector context for resources (in)
53 * @cpu: new cpu to direct vector at (in)
54 * @addr_hi: new value of PCI bus upper 32 bits (in/out)
55 * @addr_lo: new value of PCI bus lower 32 bits (in/out)
56 *
57 * Description: The target op is used to redirect an MSI vector
58 * at a different cpu. addr_hi/addr_lo coming in are the existing
59 * values that the MSI core has programmed into the card. The
60 * target code is responsible for freeing any resources (if any)
61 * associated with the old address, and generating a new PCI bus
62 * addr_hi/addr_lo that will redirect the vector at the indicated cpu.
63 **/
64
65 void (*target) (unsigned int vector, unsigned int cpu,
66 u32 *addr_hi, u32 *addr_lo);
67};
68
69extern int msi_register(struct msi_ops *ops);
70
9#include <asm/msi.h> 71#include <asm/msi.h>
10 72
11/* 73/*
@@ -63,67 +125,6 @@ extern int pci_vector_resources(int last, int nr_released);
63#define msix_mask(address) (address | PCI_MSIX_FLAGS_BITMASK) 125#define msix_mask(address) (address | PCI_MSIX_FLAGS_BITMASK)
64#define msix_is_pending(address) (address & PCI_MSIX_FLAGS_PENDMASK) 126#define msix_is_pending(address) (address & PCI_MSIX_FLAGS_PENDMASK)
65 127
66/*
67 * MSI Defined Data Structures
68 */
69#define MSI_ADDRESS_HEADER 0xfee
70#define MSI_ADDRESS_HEADER_SHIFT 12
71#define MSI_ADDRESS_HEADER_MASK 0xfff000
72#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f
73#define MSI_TARGET_CPU_MASK 0xff
74#define MSI_DELIVERY_MODE 0
75#define MSI_LEVEL_MODE 1 /* Edge always assert */
76#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */
77#define MSI_PHYSICAL_MODE 0
78#define MSI_LOGICAL_MODE 1
79#define MSI_REDIRECTION_HINT_MODE 0
80
81struct msg_data {
82#if defined(__LITTLE_ENDIAN_BITFIELD)
83 __u32 vector : 8;
84 __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */
85 __u32 reserved_1 : 3;
86 __u32 level : 1; /* 0: deassert | 1: assert */
87 __u32 trigger : 1; /* 0: edge | 1: level */
88 __u32 reserved_2 : 16;
89#elif defined(__BIG_ENDIAN_BITFIELD)
90 __u32 reserved_2 : 16;
91 __u32 trigger : 1; /* 0: edge | 1: level */
92 __u32 level : 1; /* 0: deassert | 1: assert */
93 __u32 reserved_1 : 3;
94 __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */
95 __u32 vector : 8;
96#else
97#error "Bitfield endianness not defined! Check your byteorder.h"
98#endif
99} __attribute__ ((packed));
100
101struct msg_address {
102 union {
103 struct {
104#if defined(__LITTLE_ENDIAN_BITFIELD)
105 __u32 reserved_1 : 2;
106 __u32 dest_mode : 1; /*0:physic | 1:logic */
107 __u32 redirection_hint: 1; /*0: dedicated CPU
108 1: lowest priority */
109 __u32 reserved_2 : 4;
110 __u32 dest_id : 24; /* Destination ID */
111#elif defined(__BIG_ENDIAN_BITFIELD)
112 __u32 dest_id : 24; /* Destination ID */
113 __u32 reserved_2 : 4;
114 __u32 redirection_hint: 1; /*0: dedicated CPU
115 1: lowest priority */
116 __u32 dest_mode : 1; /*0:physic | 1:logic */
117 __u32 reserved_1 : 2;
118#else
119#error "Bitfield endianness not defined! Check your byteorder.h"
120#endif
121 }u;
122 __u32 value;
123 }lo_address;
124 __u32 hi_address;
125} __attribute__ ((packed));
126
127struct msi_desc { 128struct msi_desc {
128 struct { 129 struct {
129 __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ 130 __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */
@@ -132,7 +133,7 @@ struct msi_desc {
132 __u8 reserved: 1; /* reserved */ 133 __u8 reserved: 1; /* reserved */
133 __u8 entry_nr; /* specific enabled entry */ 134 __u8 entry_nr; /* specific enabled entry */
134 __u8 default_vector; /* default pre-assigned vector */ 135 __u8 default_vector; /* default pre-assigned vector */
135 __u8 current_cpu; /* current destination cpu */ 136 __u8 unused; /* formerly unused destination cpu*/
136 }msi_attrib; 137 }msi_attrib;
137 138
138 struct { 139 struct {
@@ -142,6 +143,14 @@ struct msi_desc {
142 143
143 void __iomem *mask_base; 144 void __iomem *mask_base;
144 struct pci_dev *dev; 145 struct pci_dev *dev;
146
147#ifdef CONFIG_PM
148 /* PM save area for MSIX address/data */
149
150 u32 address_hi_save;
151 u32 address_lo_save;
152 u32 data_save;
153#endif
145}; 154};
146 155
147#endif /* MSI_H */ 156#endif /* MSI_H */
lass="hl kwb">uint32_t start, uint32_t end); static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f); /* Called with erase_completion_lock held */ static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c) { struct jffs2_eraseblock *ret; struct list_head *nextlist = NULL; int n = jiffies % 128; /* Pick an eraseblock to garbage collect next. This is where we'll put the clever wear-levelling algorithms. Eventually. */ /* We possibly want to favour the dirtier blocks more when the number of free blocks is low. */ again: if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) { D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n")); nextlist = &c->bad_used_list; } else if (n < 50 && !list_empty(&c->erasable_list)) { /* Note that most of them will have gone directly to be erased. So don't favour the erasable_list _too_ much. */ D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n")); nextlist = &c->erasable_list; } else if (n < 110 && !list_empty(&c->very_dirty_list)) { /* Most of the time, pick one off the very_dirty list */ D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n")); nextlist = &c->very_dirty_list; } else if (n < 126 && !list_empty(&c->dirty_list)) { D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n")); nextlist = &c->dirty_list; } else if (!list_empty(&c->clean_list)) { D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n")); nextlist = &c->clean_list; } else if (!list_empty(&c->dirty_list)) { D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n")); nextlist = &c->dirty_list; } else if (!list_empty(&c->very_dirty_list)) { D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n")); nextlist = &c->very_dirty_list; } else if (!list_empty(&c->erasable_list)) { D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n")); nextlist = &c->erasable_list; } else if (!list_empty(&c->erasable_pending_wbuf_list)) { /* There are blocks are wating for the wbuf sync */ D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n")); spin_unlock(&c->erase_completion_lock); jffs2_flush_wbuf_pad(c); spin_lock(&c->erase_completion_lock); goto again; } else { /* Eep. All were empty */ D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n")); return NULL; } ret = list_entry(nextlist->next, struct jffs2_eraseblock, list); list_del(&ret->list); c->gcblock = ret; ret->gc_node = ret->first_node; if (!ret->gc_node) { printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset); BUG(); } /* Have we accidentally picked a clean block with wasted space ? */ if (ret->wasted_size) { D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size)); ret->dirty_size += ret->wasted_size; c->wasted_size -= ret->wasted_size; c->dirty_size += ret->wasted_size; ret->wasted_size = 0; } return ret; } /* jffs2_garbage_collect_pass * Make a single attempt to progress GC. Move one node, and possibly * start erasing one eraseblock. */ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) { struct jffs2_inode_info *f; struct jffs2_inode_cache *ic; struct jffs2_eraseblock *jeb; struct jffs2_raw_node_ref *raw; uint32_t gcblock_dirty; int ret = 0, inum, nlink; int xattr = 0; if (mutex_lock_interruptible(&c->alloc_sem)) return -EINTR; for (;;) { spin_lock(&c->erase_completion_lock); if (!c->unchecked_size) break; /* We can't start doing GC yet. We haven't finished checking the node CRCs etc. Do it now. */ /* checked_ino is protected by the alloc_sem */ if (c->checked_ino > c->highest_ino && xattr) { printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n", c->unchecked_size); jffs2_dbg_dump_block_lists_nolock(c); spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); return -ENOSPC; } spin_unlock(&c->erase_completion_lock); if (!xattr) xattr = jffs2_verify_xattr(c); spin_lock(&c->inocache_lock); ic = jffs2_get_ino_cache(c, c->checked_ino++); if (!ic) { spin_unlock(&c->inocache_lock); continue; } if (!ic->pino_nlink) { D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n", ic->ino)); spin_unlock(&c->inocache_lock); jffs2_xattr_delete_inode(c, ic); continue; } switch(ic->state) { case INO_STATE_CHECKEDABSENT: case INO_STATE_PRESENT: D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino)); spin_unlock(&c->inocache_lock); continue; case INO_STATE_GC: case INO_STATE_CHECKING: printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state); spin_unlock(&c->inocache_lock); BUG(); case INO_STATE_READING: /* We need to wait for it to finish, lest we move on and trigger the BUG() above while we haven't yet finished checking all its nodes */ D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino)); /* We need to come back again for the _same_ inode. We've made no progress in this case, but that should be OK */ c->checked_ino--; mutex_unlock(&c->alloc_sem); sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); return 0; default: BUG(); case INO_STATE_UNCHECKED: ; } ic->state = INO_STATE_CHECKING; spin_unlock(&c->inocache_lock); D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino)); ret = jffs2_do_crccheck_inode(c, ic); if (ret) printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino); jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT); mutex_unlock(&c->alloc_sem); return ret; } /* First, work out which block we're garbage-collecting */ jeb = c->gcblock; if (!jeb) jeb = jffs2_find_gc_block(c); if (!jeb) { /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */ if (!list_empty(&c->erase_pending_list)) { spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); return -EAGAIN; } D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n")); spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); return -EIO; } D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size)); D1(if (c->nextblock) printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size)); if (!jeb->used_size) { mutex_unlock(&c->alloc_sem); goto eraseit; } raw = jeb->gc_node; gcblock_dirty = jeb->dirty_size; while(ref_obsolete(raw)) { D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw))); raw = ref_next(raw); if (unlikely(!raw)) { printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n"); printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size); jeb->gc_node = raw; spin_unlock(&c->erase_completion_lock); mutex_unlock(&c->alloc_sem); BUG(); } } jeb->gc_node = raw; D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw))); if (!raw->next_in_ino) { /* Inode-less node. Clean marker, snapshot or something like that */ spin_unlock(&c->erase_completion_lock); if (ref_flags(raw) == REF_PRISTINE) { /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */ jffs2_garbage_collect_pristine(c, NULL, raw); } else { /* Just mark it obsolete */ jffs2_mark_node_obsolete(c, raw); } mutex_unlock(&c->alloc_sem); goto eraseit_lock; } ic = jffs2_raw_ref_to_ic(raw); #ifdef CONFIG_JFFS2_FS_XATTR /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr. * We can decide whether this node is inode or xattr by ic->class. */ if (ic->class == RAWNODE_CLASS_XATTR_DATUM || ic->class == RAWNODE_CLASS_XATTR_REF) { spin_unlock(&c->erase_completion_lock); if (ic->class == RAWNODE_CLASS_XATTR_DATUM) { ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw); } else { ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw); } goto test_gcnode; } #endif /* We need to hold the inocache. Either the erase_completion_lock or the inocache_lock are sufficient; we trade down since the inocache_lock causes less contention. */ spin_lock(&c->inocache_lock); spin_unlock(&c->erase_completion_lock); D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino)); /* Three possibilities: 1. Inode is already in-core. We must iget it and do proper updating to its fragtree, etc. 2. Inode is not in-core, node is REF_PRISTINE. We lock the inocache to prevent a read_inode(), copy the node intact. 3. Inode is not in-core, node is not pristine. We must iget() and take the slow path. */ switch(ic->state) { case INO_STATE_CHECKEDABSENT: /* It's been checked, but it's not currently in-core. We can just copy any pristine nodes, but have to prevent anyone else from doing read_inode() while we're at it, so we set the state accordingly */ if (ref_flags(raw) == REF_PRISTINE) ic->state = INO_STATE_GC; else { D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", ic->ino)); } break; case INO_STATE_PRESENT: /* It's in-core. GC must iget() it. */ break; case INO_STATE_UNCHECKED: case INO_STATE_CHECKING: case INO_STATE_GC: /* Should never happen. We should have finished checking by the time we actually start doing any GC, and since we're holding the alloc_sem, no other garbage collection can happen. */ printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n", ic->ino, ic->state); mutex_unlock(&c->alloc_sem); spin_unlock(&c->inocache_lock); BUG(); case INO_STATE_READING: /* Someone's currently trying to read it. We must wait for them to finish and then go through the full iget() route to do the GC. However, sometimes read_inode() needs to get the alloc_sem() (for marking nodes invalid) so we must drop the alloc_sem before sleeping. */ mutex_unlock(&c->alloc_sem); D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n", ic->ino, ic->state)); sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); /* And because we dropped the alloc_sem we must start again from the beginning. Ponder chance of livelock here -- we're returning success without actually making any progress. Q: What are the chances that the inode is back in INO_STATE_READING again by the time we next enter this function? And that this happens enough times to cause a real delay? A: Small enough that I don't care :) */ return 0; } /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the node intact, and we don't have to muck about with the fragtree etc. because we know it's not in-core. If it _was_ in-core, we go through all the iget() crap anyway */ if (ic->state == INO_STATE_GC) { spin_unlock(&c->inocache_lock); ret = jffs2_garbage_collect_pristine(c, ic, raw); spin_lock(&c->inocache_lock); ic->state = INO_STATE_CHECKEDABSENT; wake_up(&c->inocache_wq); if (ret != -EBADFD) { spin_unlock(&c->inocache_lock); goto test_gcnode; } /* Fall through if it wanted us to, with inocache_lock held */ } /* Prevent the fairly unlikely race where the gcblock is entirely obsoleted by the final close of a file which had the only valid nodes in the block, followed by erasure, followed by freeing of the ic because the erased block(s) held _all_ the nodes of that inode.... never been seen but it's vaguely possible. */ inum = ic->ino; nlink = ic->pino_nlink; spin_unlock(&c->inocache_lock); f = jffs2_gc_fetch_inode(c, inum, !nlink); if (IS_ERR(f)) { ret = PTR_ERR(f); goto release_sem; } if (!f) { ret = 0; goto release_sem; } ret = jffs2_garbage_collect_live(c, jeb, raw, f); jffs2_gc_release_inode(c, f); test_gcnode: if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) { /* Eep. This really should never happen. GC is broken */ printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node)); ret = -ENOSPC; } release_sem: mutex_unlock(&c->alloc_sem); eraseit_lock: /* If we've finished this block, start it erasing */ spin_lock(&c->erase_completion_lock); eraseit: if (c->gcblock && !c->gcblock->used_size) { D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset)); /* We're GC'ing an empty block? */ list_add_tail(&c->gcblock->list, &c->erase_pending_list); c->gcblock = NULL; c->nr_erasing_blocks++; jffs2_erase_pending_trigger(c); } spin_unlock(&c->erase_completion_lock); return ret; } static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f) { struct jffs2_node_frag *frag; struct jffs2_full_dnode *fn = NULL; struct jffs2_full_dirent *fd; uint32_t start = 0, end = 0, nrfrags = 0; int ret = 0; mutex_lock(&f->sem); /* Now we have the lock for this inode. Check that it's still the one at the head of the list. */ spin_lock(&c->erase_completion_lock); if (c->gcblock != jeb) { spin_unlock(&c->erase_completion_lock); D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n")); goto upnout; } if (ref_obsolete(raw)) { spin_unlock(&c->erase_completion_lock); D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n")); /* They'll call again */ goto upnout; } spin_unlock(&c->erase_completion_lock); /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */ if (f->metadata && f->metadata->raw == raw) { fn = f->metadata; ret = jffs2_garbage_collect_metadata(c, jeb, f, fn); goto upnout; } /* FIXME. Read node and do lookup? */ for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) { if (frag->node && frag->node->raw == raw) { fn = frag->node; end = frag->ofs + frag->size; if (!nrfrags++) start = frag->ofs; if (nrfrags == frag->node->frags) break; /* We've found them all */ } } if (fn) { if (ref_flags(raw) == REF_PRISTINE) { ret = jffs2_garbage_collect_pristine(c, f->inocache, raw); if (!ret) { /* Urgh. Return it sensibly. */ frag->node->raw = f->inocache->nodes; } if (ret != -EBADFD) goto upnout; } /* We found a datanode. Do the GC */ if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) { /* It crosses a page boundary. Therefore, it must be a hole. */ ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end); } else { /* It could still be a hole. But we GC the page this way anyway */ ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end); } goto upnout; } /* Wasn't a dnode. Try dirent */ for (fd = f->dents; fd; fd=fd->next) { if (fd->raw == raw) break; } if (fd && fd->ino) { ret = jffs2_garbage_collect_dirent(c, jeb, f, fd); } else if (fd) { ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd); } else { printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n", ref_offset(raw), f->inocache->ino); if (ref_obsolete(raw)) { printk(KERN_WARNING "But it's obsolete so we don't mind too much\n"); } else { jffs2_dbg_dump_node(c, ref_offset(raw)); BUG(); } } upnout: mutex_unlock(&f->sem); return ret; } static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, struct jffs2_raw_node_ref *raw) { union jffs2_node_union *node; size_t retlen; int ret; uint32_t phys_ofs, alloclen; uint32_t crc, rawlen; int retried = 0; D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw))); alloclen = rawlen = ref_totlen(c, c->gcblock, raw); /* Ask for a small amount of space (or the totlen if smaller) because we don't want to force wastage of the end of a block if splitting would work. */ if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN) alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN; ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen); /* 'rawlen' is not the exact summary size; it is only an upper estimation */ if (ret) return ret; if (alloclen < rawlen) { /* Doesn't fit untouched. We'll go the old route and split it */ return -EBADFD; } node = kmalloc(rawlen, GFP_KERNEL); if (!node) return -ENOMEM; ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node); if (!ret && retlen != rawlen) ret = -EIO; if (ret) goto out_node; crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4); if (je32_to_cpu(node->u.hdr_crc) != crc) { printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc); goto bail; } switch(je16_to_cpu(node->u.nodetype)) { case JFFS2_NODETYPE_INODE: crc = crc32(0, node, sizeof(node->i)-8); if (je32_to_cpu(node->i.node_crc) != crc) { printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->i.node_crc), crc); goto bail; } if (je32_to_cpu(node->i.dsize)) { crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize)); if (je32_to_cpu(node->i.data_crc) != crc) { printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->i.data_crc), crc); goto bail; } } break; case JFFS2_NODETYPE_DIRENT: crc = crc32(0, node, sizeof(node->d)-8); if (je32_to_cpu(node->d.node_crc) != crc) { printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->d.node_crc), crc); goto bail; } if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) { printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw)); goto bail; } if (node->d.nsize) { crc = crc32(0, node->d.name, node->d.nsize); if (je32_to_cpu(node->d.name_crc) != crc) { printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n", ref_offset(raw), je32_to_cpu(node->d.name_crc), crc); goto bail; } } break; default: /* If it's inode-less, we don't _know_ what it is. Just copy it intact */ if (ic) { printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", ref_offset(raw), je16_to_cpu(node->u.nodetype)); goto bail; } } /* OK, all the CRCs are good; this node can just be copied as-is. */ retry: phys_ofs = write_ofs(c); ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node); if (ret || (retlen != rawlen)) { printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n", rawlen, phys_ofs, ret, retlen); if (retlen) { jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL); } else { printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs); } if (!retried) { /* Try to reallocate space and retry */ uint32_t dummy; struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size]; retried = 1; D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n")); jffs2_dbg_acct_sanity_check(c,jeb); jffs2_dbg_acct_paranoia_check(c, jeb); ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen); /* this is not the exact summary size of it, it is only an upper estimation */ if (!ret) { D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs)); jffs2_dbg_acct_sanity_check(c,jeb); jffs2_dbg_acct_paranoia_check(c, jeb); goto retry; } D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret)); } if (!ret) ret = -EIO; goto out_node; } jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic); jffs2_mark_node_obsolete(c, raw); D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw))); out_node: kfree(node); return ret; bail: ret = -EBADFD; goto out_node; } static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn) { struct jffs2_full_dnode *new_fn; struct jffs2_raw_inode ri; struct jffs2_node_frag *last_frag; union jffs2_device_node dev; char *mdata = NULL, mdatalen = 0; uint32_t alloclen, ilen; int ret; if (S_ISBLK(JFFS2_F_I_MODE(f)) || S_ISCHR(JFFS2_F_I_MODE(f)) ) { /* For these, we don't actually need to read the old node */ mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f)); mdata = (char *)&dev; D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen)); } else if (S_ISLNK(JFFS2_F_I_MODE(f))) { mdatalen = fn->size; mdata = kmalloc(fn->size, GFP_KERNEL); if (!mdata) { printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n"); return -ENOMEM; } ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen); if (ret) { printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret); kfree(mdata); return ret; } D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen)); } ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen, JFFS2_SUMMARY_INODE_SIZE); if (ret) { printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n", sizeof(ri)+ mdatalen, ret); goto out; } last_frag = frag_last(&f->fragtree); if (last_frag) /* Fetch the inode length from the fragtree rather then * from i_size since i_size may have not been updated yet */ ilen = last_frag->ofs + last_frag->size; else ilen = JFFS2_F_I_SIZE(f); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE); ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen); ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4)); ri.ino = cpu_to_je32(f->inocache->ino); ri.version = cpu_to_je32(++f->highest_version); ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f)); ri.uid = cpu_to_je16(JFFS2_F_I_UID(f)); ri.gid = cpu_to_je16(JFFS2_F_I_GID(f)); ri.isize = cpu_to_je32(ilen); ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f)); ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f)); ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f)); ri.offset = cpu_to_je32(0); ri.csize = cpu_to_je32(mdatalen); ri.dsize = cpu_to_je32(mdatalen); ri.compr = JFFS2_COMPR_NONE; ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen)); new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC); if (IS_ERR(new_fn)) { printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn)); ret = PTR_ERR(new_fn); goto out; } jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); f->metadata = new_fn; out: if (S_ISLNK(JFFS2_F_I_MODE(f))) kfree(mdata); return ret; } static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_inode_info *f, struct jffs2_full_dirent *fd) { struct jffs2_full_dirent *new_fd; struct jffs2_raw_dirent rd; uint32_t alloclen; int ret; rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); rd.nsize = strlen(fd->name); rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize); rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4)); rd.pino = cpu_to_je32(f->inocache->ino); rd.version = cpu_to_je32(++f->highest_version); rd.ino = cpu_to_je32(fd->ino); /* If the times on this inode were set by explicit utime() they can be different, so refrain from splatting them. */ if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f)) rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f)); else rd.mctime = cpu_to_je32(0); rd.type = fd->type; rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8)); rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize)); ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen, JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize)); if (ret) { printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n", sizeof(rd)+rd.nsize, ret); return ret; } new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC); if (IS_ERR(new_fd)) { printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd)); return PTR_ERR(new_fd); } jffs2_add_fd_to_list(c, new_fd, &f->dents); return 0; } static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, struct jffs2_inode_info *f, struct jffs2_full_dirent *fd) { struct jffs2_full_dirent **fdp = &f->dents; int found = 0; /* On a medium where we can't actually mark nodes obsolete pernamently, such as NAND flash, we need to work out whether this deletion dirent is still needed to actively delete a 'real' dirent with the same name that's still somewhere else on the flash. */ if (!jffs2_can_mark_obsolete(c)) { struct jffs2_raw_dirent *rd; struct jffs2_raw_node_ref *raw; int ret; size_t retlen; int name_len = strlen(fd->name); uint32_t name_crc = crc32(0, fd->name, name_len); uint32_t rawlen = ref_totlen(c, jeb, fd->raw); rd = kmalloc(rawlen, GFP_KERNEL); if (!rd) return -ENOMEM; /* Prevent the erase code from nicking the obsolete node refs while we're looking at them. I really don't like this extra lock but can't see any alternative. Suggestions on a postcard to... */ mutex_lock(&c->erase_free_sem); for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) { cond_resched(); /* We only care about obsolete ones */ if (!(ref_obsolete(raw))) continue; /* Any dirent with the same name is going to have the same length... */ if (ref_totlen(c, NULL, raw) != rawlen) continue; /* Doesn't matter if there's one in the same erase block. We're going to delete it too at the same time. */ if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset)) continue; D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw))); /* This is an obsolete node belonging to the same directory, and it's of the right length. We need to take a closer look...*/ ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd); if (ret) { printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw)); /* If we can't read it, we don't need to continue to obsolete it. Continue */ continue; } if (retlen != rawlen) { printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n", retlen, rawlen, ref_offset(raw)); continue; } if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT) continue; /* If the name CRC doesn't match, skip */ if (je32_to_cpu(rd->name_crc) != name_crc) continue; /* If the name length doesn't match, or it's another deletion dirent, skip */ if (rd->nsize != name_len || !je32_to_cpu(rd->ino)) continue; /* OK, check the actual name now */ if (memcmp(rd->name, fd->name, name_len)) continue; /* OK. The name really does match. There really is still an older node on the flash which our deletion dirent obsoletes. So we have to write out a new deletion dirent to replace it */ mutex_unlock(&c->erase_free_sem); D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n", ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino))); kfree(rd); return jffs2_garbage_collect_dirent(c, jeb, f, fd); } mutex_unlock(&c->erase_free_sem); kfree(rd); } /* FIXME: If we're deleting a dirent which contains the current mtime and ctime, we should update the metadata node with those times accordingly */ /* No need for it any more. Just mark it obsolete and remove it from the list */