diff options
-rw-r--r-- | Documentation/IRQ-affinity.txt | 17 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 11 | ||||
-rw-r--r-- | include/linux/bitmap.h | 5 | ||||
-rw-r--r-- | include/linux/cpumask.h | 15 | ||||
-rw-r--r-- | kernel/irq/proc.c | 54 | ||||
-rw-r--r-- | lib/bitmap.c | 109 |
6 files changed, 188 insertions, 23 deletions
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt index b4a615b78403..7890fae18529 100644 --- a/Documentation/IRQ-affinity.txt +++ b/Documentation/IRQ-affinity.txt | |||
@@ -4,10 +4,11 @@ ChangeLog: | |||
4 | 4 | ||
5 | SMP IRQ affinity | 5 | SMP IRQ affinity |
6 | 6 | ||
7 | /proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted | 7 | /proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify |
8 | for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed | 8 | which target CPUs are permitted for a given IRQ source. It's a bitmask |
9 | to turn off all CPUs, and if an IRQ controller does not support IRQ | 9 | (smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not |
10 | affinity then the value will not change from the default 0xffffffff. | 10 | allowed to turn off all CPUs, and if an IRQ controller does not support |
11 | IRQ affinity then the value will not change from the default of all cpus. | ||
11 | 12 | ||
12 | /proc/irq/default_smp_affinity specifies default affinity mask that applies | 13 | /proc/irq/default_smp_affinity specifies default affinity mask that applies |
13 | to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask | 14 | to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask |
@@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms | |||
54 | This time around IRQ44 was delivered only to the last four processors. | 55 | This time around IRQ44 was delivered only to the last four processors. |
55 | i.e counters for the CPU0-3 did not change. | 56 | i.e counters for the CPU0-3 did not change. |
56 | 57 | ||
58 | Here is an example of limiting that same irq (44) to cpus 1024 to 1031: | ||
59 | |||
60 | [root@moon 44]# echo 1024-1031 > smp_affinity | ||
61 | [root@moon 44]# cat smp_affinity | ||
62 | 1024-1031 | ||
63 | |||
64 | Note that to do this with a bitmask would require 32 bitmasks of zero | ||
65 | to follow the pertinent one. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 60740e8ecb37..f48178024067 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default: | |||
574 | > cat /proc/irq/0/smp_affinity | 574 | > cat /proc/irq/0/smp_affinity |
575 | ffffffff | 575 | ffffffff |
576 | 576 | ||
577 | There is an alternate interface, smp_affinity_list which allows specifying | ||
578 | a cpu range instead of a bitmask: | ||
579 | |||
580 | > cat /proc/irq/0/smp_affinity_list | ||
581 | 1024-1031 | ||
582 | |||
577 | The default_smp_affinity mask applies to all non-active IRQs, which are the | 583 | The default_smp_affinity mask applies to all non-active IRQs, which are the |
578 | IRQs which have not yet been allocated/activated, and hence which lack a | 584 | IRQs which have not yet been allocated/activated, and hence which lack a |
579 | /proc/irq/[0-9]* directory. | 585 | /proc/irq/[0-9]* directory. |
@@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not | |||
583 | include information about any possible driver locality preference. | 589 | include information about any possible driver locality preference. |
584 | 590 | ||
585 | prof_cpu_mask specifies which CPUs are to be profiled by the system wide | 591 | prof_cpu_mask specifies which CPUs are to be profiled by the system wide |
586 | profiler. Default value is ffffffff (all cpus). | 592 | profiler. Default value is ffffffff (all cpus if there are only 32 of them). |
587 | 593 | ||
588 | The way IRQs are routed is handled by the IO-APIC, and it's Round Robin | 594 | The way IRQs are routed is handled by the IO-APIC, and it's Round Robin |
589 | between all the CPUs which are allowed to handle it. As usual the kernel has | 595 | between all the CPUs which are allowed to handle it. As usual the kernel has |
590 | more info than you and does a better job than you, so the defaults are the | 596 | more info than you and does a better job than you, so the defaults are the |
591 | best choice for almost everyone. | 597 | best choice for almost everyone. [Note this applies only to those IO-APIC's |
598 | that support "Round Robin" interrupt distribution.] | ||
592 | 599 | ||
593 | There are three more important subdirectories in /proc: net, scsi, and sys. | 600 | There are three more important subdirectories in /proc: net, scsi, and sys. |
594 | The general rule is that the contents, or even the existence of these | 601 | The general rule is that the contents, or even the existence of these |
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index daf8c480c786..dcafe0bf0005 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h | |||
@@ -55,7 +55,8 @@ | |||
55 | * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf | 55 | * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf |
56 | * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf | 56 | * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf |
57 | * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf | 57 | * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf |
58 | * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list | 58 | * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf |
59 | * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf | ||
59 | * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region | 60 | * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region |
60 | * bitmap_release_region(bitmap, pos, order) Free specified bit region | 61 | * bitmap_release_region(bitmap, pos, order) Free specified bit region |
61 | * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region | 62 | * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region |
@@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *buf, unsigned int len, | |||
129 | const unsigned long *src, int nbits); | 130 | const unsigned long *src, int nbits); |
130 | extern int bitmap_parselist(const char *buf, unsigned long *maskp, | 131 | extern int bitmap_parselist(const char *buf, unsigned long *maskp, |
131 | int nmaskbits); | 132 | int nmaskbits); |
133 | extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, | ||
134 | unsigned long *dst, int nbits); | ||
132 | extern void bitmap_remap(unsigned long *dst, const unsigned long *src, | 135 | extern void bitmap_remap(unsigned long *dst, const unsigned long *src, |
133 | const unsigned long *old, const unsigned long *new, int bits); | 136 | const unsigned long *old, const unsigned long *new, int bits); |
134 | extern int bitmap_bitremap(int oldbit, | 137 | extern int bitmap_bitremap(int oldbit, |
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bae6fe24d1f9..b24ac56477b4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
@@ -547,6 +547,21 @@ static inline int cpumask_parse_user(const char __user *buf, int len, | |||
547 | } | 547 | } |
548 | 548 | ||
549 | /** | 549 | /** |
550 | * cpumask_parselist_user - extract a cpumask from a user string | ||
551 | * @buf: the buffer to extract from | ||
552 | * @len: the length of the buffer | ||
553 | * @dstp: the cpumask to set. | ||
554 | * | ||
555 | * Returns -errno, or 0 for success. | ||
556 | */ | ||
557 | static inline int cpumask_parselist_user(const char __user *buf, int len, | ||
558 | struct cpumask *dstp) | ||
559 | { | ||
560 | return bitmap_parselist_user(buf, len, cpumask_bits(dstp), | ||
561 | nr_cpumask_bits); | ||
562 | } | ||
563 | |||
564 | /** | ||
550 | * cpulist_scnprintf - print a cpumask into a string as comma-separated list | 565 | * cpulist_scnprintf - print a cpumask into a string as comma-separated list |
551 | * @buf: the buffer to sprintf into | 566 | * @buf: the buffer to sprintf into |
552 | * @len: the length of the buffer | 567 | * @len: the length of the buffer |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 834899f2500f..64e3df6ab1ef 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
19 | 19 | ||
20 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
21 | 21 | ||
22 | static int irq_affinity_proc_show(struct seq_file *m, void *v) | 22 | static int show_irq_affinity(int type, struct seq_file *m, void *v) |
23 | { | 23 | { |
24 | struct irq_desc *desc = irq_to_desc((long)m->private); | 24 | struct irq_desc *desc = irq_to_desc((long)m->private); |
25 | const struct cpumask *mask = desc->irq_data.affinity; | 25 | const struct cpumask *mask = desc->irq_data.affinity; |
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v) | |||
28 | if (irqd_is_setaffinity_pending(&desc->irq_data)) | 28 | if (irqd_is_setaffinity_pending(&desc->irq_data)) |
29 | mask = desc->pending_mask; | 29 | mask = desc->pending_mask; |
30 | #endif | 30 | #endif |
31 | seq_cpumask(m, mask); | 31 | if (type) |
32 | seq_cpumask_list(m, mask); | ||
33 | else | ||
34 | seq_cpumask(m, mask); | ||
32 | seq_putc(m, '\n'); | 35 | seq_putc(m, '\n'); |
33 | return 0; | 36 | return 0; |
34 | } | 37 | } |
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) | |||
59 | #endif | 62 | #endif |
60 | 63 | ||
61 | int no_irq_affinity; | 64 | int no_irq_affinity; |
62 | static ssize_t irq_affinity_proc_write(struct file *file, | 65 | static int irq_affinity_proc_show(struct seq_file *m, void *v) |
66 | { | ||
67 | return show_irq_affinity(0, m, v); | ||
68 | } | ||
69 | |||
70 | static int irq_affinity_list_proc_show(struct seq_file *m, void *v) | ||
71 | { | ||
72 | return show_irq_affinity(1, m, v); | ||
73 | } | ||
74 | |||
75 | |||
76 | static ssize_t write_irq_affinity(int type, struct file *file, | ||
63 | const char __user *buffer, size_t count, loff_t *pos) | 77 | const char __user *buffer, size_t count, loff_t *pos) |
64 | { | 78 | { |
65 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; | 79 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; |
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file, | |||
72 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) | 86 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) |
73 | return -ENOMEM; | 87 | return -ENOMEM; |
74 | 88 | ||
75 | err = cpumask_parse_user(buffer, count, new_value); | 89 | if (type) |
90 | err = cpumask_parselist_user(buffer, count, new_value); | ||
91 | else | ||
92 | err = cpumask_parse_user(buffer, count, new_value); | ||
76 | if (err) | 93 | if (err) |
77 | goto free_cpumask; | 94 | goto free_cpumask; |
78 | 95 | ||
@@ -100,11 +117,28 @@ free_cpumask: | |||
100 | return err; | 117 | return err; |
101 | } | 118 | } |
102 | 119 | ||
120 | static ssize_t irq_affinity_proc_write(struct file *file, | ||
121 | const char __user *buffer, size_t count, loff_t *pos) | ||
122 | { | ||
123 | return write_irq_affinity(0, file, buffer, count, pos); | ||
124 | } | ||
125 | |||
126 | static ssize_t irq_affinity_list_proc_write(struct file *file, | ||
127 | const char __user *buffer, size_t count, loff_t *pos) | ||
128 | { | ||
129 | return write_irq_affinity(1, file, buffer, count, pos); | ||
130 | } | ||
131 | |||
103 | static int irq_affinity_proc_open(struct inode *inode, struct file *file) | 132 | static int irq_affinity_proc_open(struct inode *inode, struct file *file) |
104 | { | 133 | { |
105 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); | 134 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); |
106 | } | 135 | } |
107 | 136 | ||
137 | static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) | ||
138 | { | ||
139 | return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data); | ||
140 | } | ||
141 | |||
108 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) | 142 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) |
109 | { | 143 | { |
110 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); | 144 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); |
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = { | |||
125 | .release = single_release, | 159 | .release = single_release, |
126 | }; | 160 | }; |
127 | 161 | ||
162 | static const struct file_operations irq_affinity_list_proc_fops = { | ||
163 | .open = irq_affinity_list_proc_open, | ||
164 | .read = seq_read, | ||
165 | .llseek = seq_lseek, | ||
166 | .release = single_release, | ||
167 | .write = irq_affinity_list_proc_write, | ||
168 | }; | ||
169 | |||
128 | static int default_affinity_show(struct seq_file *m, void *v) | 170 | static int default_affinity_show(struct seq_file *m, void *v) |
129 | { | 171 | { |
130 | seq_cpumask(m, irq_default_affinity); | 172 | seq_cpumask(m, irq_default_affinity); |
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
289 | proc_create_data("affinity_hint", 0400, desc->dir, | 331 | proc_create_data("affinity_hint", 0400, desc->dir, |
290 | &irq_affinity_hint_proc_fops, (void *)(long)irq); | 332 | &irq_affinity_hint_proc_fops, (void *)(long)irq); |
291 | 333 | ||
334 | /* create /proc/irq/<irq>/smp_affinity_list */ | ||
335 | proc_create_data("smp_affinity_list", 0600, desc->dir, | ||
336 | &irq_affinity_list_proc_fops, (void *)(long)irq); | ||
337 | |||
292 | proc_create_data("node", 0444, desc->dir, | 338 | proc_create_data("node", 0444, desc->dir, |
293 | &irq_node_proc_fops, (void *)(long)irq); | 339 | &irq_node_proc_fops, (void *)(long)irq); |
294 | #endif | 340 | #endif |
diff --git a/lib/bitmap.c b/lib/bitmap.c index 91e0ccfdb424..41baf02924e6 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
@@ -571,8 +571,11 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen, | |||
571 | EXPORT_SYMBOL(bitmap_scnlistprintf); | 571 | EXPORT_SYMBOL(bitmap_scnlistprintf); |
572 | 572 | ||
573 | /** | 573 | /** |
574 | * bitmap_parselist - convert list format ASCII string to bitmap | 574 | * __bitmap_parselist - convert list format ASCII string to bitmap |
575 | * @bp: read nul-terminated user string from this buffer | 575 | * @bp: read nul-terminated user string from this buffer |
576 | * @buflen: buffer size in bytes. If string is smaller than this | ||
577 | * then it must be terminated with a \0. | ||
578 | * @is_user: location of buffer, 0 indicates kernel space | ||
576 | * @maskp: write resulting mask here | 579 | * @maskp: write resulting mask here |
577 | * @nmaskbits: number of bits in mask to be written | 580 | * @nmaskbits: number of bits in mask to be written |
578 | * | 581 | * |
@@ -587,20 +590,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf); | |||
587 | * %-EINVAL: invalid character in string | 590 | * %-EINVAL: invalid character in string |
588 | * %-ERANGE: bit number specified too large for mask | 591 | * %-ERANGE: bit number specified too large for mask |
589 | */ | 592 | */ |
590 | int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) | 593 | static int __bitmap_parselist(const char *buf, unsigned int buflen, |
594 | int is_user, unsigned long *maskp, | ||
595 | int nmaskbits) | ||
591 | { | 596 | { |
592 | unsigned a, b; | 597 | unsigned a, b; |
598 | int c, old_c, totaldigits; | ||
599 | const char __user *ubuf = buf; | ||
600 | int exp_digit, in_range; | ||
593 | 601 | ||
602 | totaldigits = c = 0; | ||
594 | bitmap_zero(maskp, nmaskbits); | 603 | bitmap_zero(maskp, nmaskbits); |
595 | do { | 604 | do { |
596 | if (!isdigit(*bp)) | 605 | exp_digit = 1; |
597 | return -EINVAL; | 606 | in_range = 0; |
598 | b = a = simple_strtoul(bp, (char **)&bp, BASEDEC); | 607 | a = b = 0; |
599 | if (*bp == '-') { | 608 | |
600 | bp++; | 609 | /* Get the next cpu# or a range of cpu#'s */ |
601 | if (!isdigit(*bp)) | 610 | while (buflen) { |
611 | old_c = c; | ||
612 | if (is_user) { | ||
613 | if (__get_user(c, ubuf++)) | ||
614 | return -EFAULT; | ||
615 | } else | ||
616 | c = *buf++; | ||
617 | buflen--; | ||
618 | if (isspace(c)) | ||
619 | continue; | ||
620 | |||
621 | /* | ||
622 | * If the last character was a space and the current | ||
623 | * character isn't '\0', we've got embedded whitespace. | ||
624 | * This is a no-no, so throw an error. | ||
625 | */ | ||
626 | if (totaldigits && c && isspace(old_c)) | ||
627 | return -EINVAL; | ||
628 | |||
629 | /* A '\0' or a ',' signal the end of a cpu# or range */ | ||
630 | if (c == '\0' || c == ',') | ||
631 | break; | ||
632 | |||
633 | if (c == '-') { | ||
634 | if (exp_digit || in_range) | ||
635 | return -EINVAL; | ||
636 | b = 0; | ||
637 | in_range = 1; | ||
638 | exp_digit = 1; | ||
639 | continue; | ||
640 | } | ||
641 | |||
642 | if (!isdigit(c)) | ||
602 | return -EINVAL; | 643 | return -EINVAL; |
603 | b = simple_strtoul(bp, (char **)&bp, BASEDEC); | 644 | |
645 | b = b * 10 + (c - '0'); | ||
646 | if (!in_range) | ||
647 | a = b; | ||
648 | exp_digit = 0; | ||
649 | totaldigits++; | ||
604 | } | 650 | } |
605 | if (!(a <= b)) | 651 | if (!(a <= b)) |
606 | return -EINVAL; | 652 | return -EINVAL; |
@@ -610,13 +656,52 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) | |||
610 | set_bit(a, maskp); | 656 | set_bit(a, maskp); |
611 | a++; | 657 | a++; |
612 | } | 658 | } |
613 | if (*bp == ',') | 659 | } while (buflen && c == ','); |
614 | bp++; | ||
615 | } while (*bp != '\0' && *bp != '\n'); | ||
616 | return 0; | 660 | return 0; |
617 | } | 661 | } |
662 | |||
663 | int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) | ||
664 | { | ||
665 | char *nl = strchr(bp, '\n'); | ||
666 | int len; | ||
667 | |||
668 | if (nl) | ||
669 | len = nl - bp; | ||
670 | else | ||
671 | len = strlen(bp); | ||
672 | |||
673 | return __bitmap_parselist(bp, len, 0, maskp, nmaskbits); | ||
674 | } | ||
618 | EXPORT_SYMBOL(bitmap_parselist); | 675 | EXPORT_SYMBOL(bitmap_parselist); |
619 | 676 | ||
677 | |||
678 | /** | ||
679 | * bitmap_parselist_user() | ||
680 | * | ||
681 | * @ubuf: pointer to user buffer containing string. | ||
682 | * @ulen: buffer size in bytes. If string is smaller than this | ||
683 | * then it must be terminated with a \0. | ||
684 | * @maskp: pointer to bitmap array that will contain result. | ||
685 | * @nmaskbits: size of bitmap, in bits. | ||
686 | * | ||
687 | * Wrapper for bitmap_parselist(), providing it with user buffer. | ||
688 | * | ||
689 | * We cannot have this as an inline function in bitmap.h because it needs | ||
690 | * linux/uaccess.h to get the access_ok() declaration and this causes | ||
691 | * cyclic dependencies. | ||
692 | */ | ||
693 | int bitmap_parselist_user(const char __user *ubuf, | ||
694 | unsigned int ulen, unsigned long *maskp, | ||
695 | int nmaskbits) | ||
696 | { | ||
697 | if (!access_ok(VERIFY_READ, ubuf, ulen)) | ||
698 | return -EFAULT; | ||
699 | return __bitmap_parselist((const char *)ubuf, | ||
700 | ulen, 1, maskp, nmaskbits); | ||
701 | } | ||
702 | EXPORT_SYMBOL(bitmap_parselist_user); | ||
703 | |||
704 | |||
620 | /** | 705 | /** |
621 | * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap | 706 | * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap |
622 | * @buf: pointer to a bitmap | 707 | * @buf: pointer to a bitmap |