aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2011-05-24 20:13:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 11:39:45 -0400
commit4b060420a596095869a6d7849caa798d23839cd1 (patch)
treeebbbc25555d0358f73527f114f78691ac849ce3e
parente50c1f609c63223adaa38f5a79b18759a00adf72 (diff)
bitmap, irq: add smp_affinity_list interface to /proc/irq
Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the cpu count is large. Setting smp affinity to cpus 256 to 263 would be: echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity instead of: echo 256-263 > smp_affinity_list Think about what it looks like for cpus around say, 4088 to 4095. We already have many alternate "list" interfaces: /sys/devices/system/cpu/cpuX/indexY/shared_cpu_list /sys/devices/system/cpu/cpuX/topology/thread_siblings_list /sys/devices/system/cpu/cpuX/topology/core_siblings_list /sys/devices/system/node/nodeX/cpulist /sys/devices/pci***/***/local_cpulist Add a companion interface, smp_affinity_list to use cpu lists instead of cpu maps. This conforms to other companion interfaces where both a map and a list interface exists. This required adding a bitmap_parselist_user() function in a manner similar to the bitmap_parse_user() function. [akpm@linux-foundation.org: make __bitmap_parselist() static] Signed-off-by: Mike Travis <travis@sgi.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Jack Steiner <steiner@sgi.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Andy Shevchenko <andy.shevchenko@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/IRQ-affinity.txt17
-rw-r--r--Documentation/filesystems/proc.txt11
-rw-r--r--include/linux/bitmap.h5
-rw-r--r--include/linux/cpumask.h15
-rw-r--r--kernel/irq/proc.c54
-rw-r--r--lib/bitmap.c109
6 files changed, 188 insertions, 23 deletions
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
index b4a615b78403..7890fae18529 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/IRQ-affinity.txt
@@ -4,10 +4,11 @@ ChangeLog:
4 4
5SMP IRQ affinity 5SMP IRQ affinity
6 6
7/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted 7/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
8for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed 8which target CPUs are permitted for a given IRQ source. It's a bitmask
9to turn off all CPUs, and if an IRQ controller does not support IRQ 9(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not
10affinity then the value will not change from the default 0xffffffff. 10allowed to turn off all CPUs, and if an IRQ controller does not support
11IRQ affinity then the value will not change from the default of all cpus.
11 12
12/proc/irq/default_smp_affinity specifies default affinity mask that applies 13/proc/irq/default_smp_affinity specifies default affinity mask that applies
13to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask 14to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms
54This time around IRQ44 was delivered only to the last four processors. 55This time around IRQ44 was delivered only to the last four processors.
55i.e counters for the CPU0-3 did not change. 56i.e counters for the CPU0-3 did not change.
56 57
58Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
59
60[root@moon 44]# echo 1024-1031 > smp_affinity
61[root@moon 44]# cat smp_affinity
621024-1031
63
64Note that to do this with a bitmask would require 32 bitmasks of zero
65to follow the pertinent one.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 60740e8ecb37..f48178024067 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default:
574 > cat /proc/irq/0/smp_affinity 574 > cat /proc/irq/0/smp_affinity
575 ffffffff 575 ffffffff
576 576
577There is an alternate interface, smp_affinity_list which allows specifying
578a cpu range instead of a bitmask:
579
580 > cat /proc/irq/0/smp_affinity_list
581 1024-1031
582
577The default_smp_affinity mask applies to all non-active IRQs, which are the 583The default_smp_affinity mask applies to all non-active IRQs, which are the
578IRQs which have not yet been allocated/activated, and hence which lack a 584IRQs which have not yet been allocated/activated, and hence which lack a
579/proc/irq/[0-9]* directory. 585/proc/irq/[0-9]* directory.
@@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not
583include information about any possible driver locality preference. 589include information about any possible driver locality preference.
584 590
585prof_cpu_mask specifies which CPUs are to be profiled by the system wide 591prof_cpu_mask specifies which CPUs are to be profiled by the system wide
586profiler. Default value is ffffffff (all cpus). 592profiler. Default value is ffffffff (all cpus if there are only 32 of them).
587 593
588The way IRQs are routed is handled by the IO-APIC, and it's Round Robin 594The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
589between all the CPUs which are allowed to handle it. As usual the kernel has 595between all the CPUs which are allowed to handle it. As usual the kernel has
590more info than you and does a better job than you, so the defaults are the 596more info than you and does a better job than you, so the defaults are the
591best choice for almost everyone. 597best choice for almost everyone. [Note this applies only to those IO-APIC's
598that support "Round Robin" interrupt distribution.]
592 599
593There are three more important subdirectories in /proc: net, scsi, and sys. 600There are three more important subdirectories in /proc: net, scsi, and sys.
594The general rule is that the contents, or even the existence of these 601The general rule is that the contents, or even the existence of these
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index daf8c480c786..dcafe0bf0005 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -55,7 +55,8 @@
55 * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf 55 * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf
56 * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf 56 * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf
57 * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf 57 * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
58 * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list 58 * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf
59 * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf
59 * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region 60 * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
60 * bitmap_release_region(bitmap, pos, order) Free specified bit region 61 * bitmap_release_region(bitmap, pos, order) Free specified bit region
61 * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region 62 * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
@@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *buf, unsigned int len,
129 const unsigned long *src, int nbits); 130 const unsigned long *src, int nbits);
130extern int bitmap_parselist(const char *buf, unsigned long *maskp, 131extern int bitmap_parselist(const char *buf, unsigned long *maskp,
131 int nmaskbits); 132 int nmaskbits);
133extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
134 unsigned long *dst, int nbits);
132extern void bitmap_remap(unsigned long *dst, const unsigned long *src, 135extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
133 const unsigned long *old, const unsigned long *new, int bits); 136 const unsigned long *old, const unsigned long *new, int bits);
134extern int bitmap_bitremap(int oldbit, 137extern int bitmap_bitremap(int oldbit,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bae6fe24d1f9..b24ac56477b4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -547,6 +547,21 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
547} 547}
548 548
549/** 549/**
550 * cpumask_parselist_user - extract a cpumask from a user string
551 * @buf: the buffer to extract from
552 * @len: the length of the buffer
553 * @dstp: the cpumask to set.
554 *
555 * Returns -errno, or 0 for success.
556 */
557static inline int cpumask_parselist_user(const char __user *buf, int len,
558 struct cpumask *dstp)
559{
560 return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
561 nr_cpumask_bits);
562}
563
564/**
550 * cpulist_scnprintf - print a cpumask into a string as comma-separated list 565 * cpulist_scnprintf - print a cpumask into a string as comma-separated list
551 * @buf: the buffer to sprintf into 566 * @buf: the buffer to sprintf into
552 * @len: the length of the buffer 567 * @len: the length of the buffer
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 834899f2500f..64e3df6ab1ef 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
19 19
20#ifdef CONFIG_SMP 20#ifdef CONFIG_SMP
21 21
22static int irq_affinity_proc_show(struct seq_file *m, void *v) 22static int show_irq_affinity(int type, struct seq_file *m, void *v)
23{ 23{
24 struct irq_desc *desc = irq_to_desc((long)m->private); 24 struct irq_desc *desc = irq_to_desc((long)m->private);
25 const struct cpumask *mask = desc->irq_data.affinity; 25 const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
28 if (irqd_is_setaffinity_pending(&desc->irq_data)) 28 if (irqd_is_setaffinity_pending(&desc->irq_data))
29 mask = desc->pending_mask; 29 mask = desc->pending_mask;
30#endif 30#endif
31 seq_cpumask(m, mask); 31 if (type)
32 seq_cpumask_list(m, mask);
33 else
34 seq_cpumask(m, mask);
32 seq_putc(m, '\n'); 35 seq_putc(m, '\n');
33 return 0; 36 return 0;
34} 37}
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
59#endif 62#endif
60 63
61int no_irq_affinity; 64int no_irq_affinity;
62static ssize_t irq_affinity_proc_write(struct file *file, 65static int irq_affinity_proc_show(struct seq_file *m, void *v)
66{
67 return show_irq_affinity(0, m, v);
68}
69
70static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
71{
72 return show_irq_affinity(1, m, v);
73}
74
75
76static ssize_t write_irq_affinity(int type, struct file *file,
63 const char __user *buffer, size_t count, loff_t *pos) 77 const char __user *buffer, size_t count, loff_t *pos)
64{ 78{
65 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; 79 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file,
72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) 86 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
73 return -ENOMEM; 87 return -ENOMEM;
74 88
75 err = cpumask_parse_user(buffer, count, new_value); 89 if (type)
90 err = cpumask_parselist_user(buffer, count, new_value);
91 else
92 err = cpumask_parse_user(buffer, count, new_value);
76 if (err) 93 if (err)
77 goto free_cpumask; 94 goto free_cpumask;
78 95
@@ -100,11 +117,28 @@ free_cpumask:
100 return err; 117 return err;
101} 118}
102 119
120static ssize_t irq_affinity_proc_write(struct file *file,
121 const char __user *buffer, size_t count, loff_t *pos)
122{
123 return write_irq_affinity(0, file, buffer, count, pos);
124}
125
126static ssize_t irq_affinity_list_proc_write(struct file *file,
127 const char __user *buffer, size_t count, loff_t *pos)
128{
129 return write_irq_affinity(1, file, buffer, count, pos);
130}
131
103static int irq_affinity_proc_open(struct inode *inode, struct file *file) 132static int irq_affinity_proc_open(struct inode *inode, struct file *file)
104{ 133{
105 return single_open(file, irq_affinity_proc_show, PDE(inode)->data); 134 return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
106} 135}
107 136
137static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
138{
139 return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
140}
141
108static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) 142static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
109{ 143{
110 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); 144 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = {
125 .release = single_release, 159 .release = single_release,
126}; 160};
127 161
162static const struct file_operations irq_affinity_list_proc_fops = {
163 .open = irq_affinity_list_proc_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167 .write = irq_affinity_list_proc_write,
168};
169
128static int default_affinity_show(struct seq_file *m, void *v) 170static int default_affinity_show(struct seq_file *m, void *v)
129{ 171{
130 seq_cpumask(m, irq_default_affinity); 172 seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
289 proc_create_data("affinity_hint", 0400, desc->dir, 331 proc_create_data("affinity_hint", 0400, desc->dir,
290 &irq_affinity_hint_proc_fops, (void *)(long)irq); 332 &irq_affinity_hint_proc_fops, (void *)(long)irq);
291 333
334 /* create /proc/irq/<irq>/smp_affinity_list */
335 proc_create_data("smp_affinity_list", 0600, desc->dir,
336 &irq_affinity_list_proc_fops, (void *)(long)irq);
337
292 proc_create_data("node", 0444, desc->dir, 338 proc_create_data("node", 0444, desc->dir,
293 &irq_node_proc_fops, (void *)(long)irq); 339 &irq_node_proc_fops, (void *)(long)irq);
294#endif 340#endif
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 91e0ccfdb424..41baf02924e6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -571,8 +571,11 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen,
571EXPORT_SYMBOL(bitmap_scnlistprintf); 571EXPORT_SYMBOL(bitmap_scnlistprintf);
572 572
573/** 573/**
574 * bitmap_parselist - convert list format ASCII string to bitmap 574 * __bitmap_parselist - convert list format ASCII string to bitmap
575 * @bp: read nul-terminated user string from this buffer 575 * @bp: read nul-terminated user string from this buffer
576 * @buflen: buffer size in bytes. If string is smaller than this
577 * then it must be terminated with a \0.
578 * @is_user: location of buffer, 0 indicates kernel space
576 * @maskp: write resulting mask here 579 * @maskp: write resulting mask here
577 * @nmaskbits: number of bits in mask to be written 580 * @nmaskbits: number of bits in mask to be written
578 * 581 *
@@ -587,20 +590,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf);
587 * %-EINVAL: invalid character in string 590 * %-EINVAL: invalid character in string
588 * %-ERANGE: bit number specified too large for mask 591 * %-ERANGE: bit number specified too large for mask
589 */ 592 */
590int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) 593static int __bitmap_parselist(const char *buf, unsigned int buflen,
594 int is_user, unsigned long *maskp,
595 int nmaskbits)
591{ 596{
592 unsigned a, b; 597 unsigned a, b;
598 int c, old_c, totaldigits;
599 const char __user *ubuf = buf;
600 int exp_digit, in_range;
593 601
602 totaldigits = c = 0;
594 bitmap_zero(maskp, nmaskbits); 603 bitmap_zero(maskp, nmaskbits);
595 do { 604 do {
596 if (!isdigit(*bp)) 605 exp_digit = 1;
597 return -EINVAL; 606 in_range = 0;
598 b = a = simple_strtoul(bp, (char **)&bp, BASEDEC); 607 a = b = 0;
599 if (*bp == '-') { 608
600 bp++; 609 /* Get the next cpu# or a range of cpu#'s */
601 if (!isdigit(*bp)) 610 while (buflen) {
611 old_c = c;
612 if (is_user) {
613 if (__get_user(c, ubuf++))
614 return -EFAULT;
615 } else
616 c = *buf++;
617 buflen--;
618 if (isspace(c))
619 continue;
620
621 /*
622 * If the last character was a space and the current
623 * character isn't '\0', we've got embedded whitespace.
624 * This is a no-no, so throw an error.
625 */
626 if (totaldigits && c && isspace(old_c))
627 return -EINVAL;
628
629 /* A '\0' or a ',' signal the end of a cpu# or range */
630 if (c == '\0' || c == ',')
631 break;
632
633 if (c == '-') {
634 if (exp_digit || in_range)
635 return -EINVAL;
636 b = 0;
637 in_range = 1;
638 exp_digit = 1;
639 continue;
640 }
641
642 if (!isdigit(c))
602 return -EINVAL; 643 return -EINVAL;
603 b = simple_strtoul(bp, (char **)&bp, BASEDEC); 644
645 b = b * 10 + (c - '0');
646 if (!in_range)
647 a = b;
648 exp_digit = 0;
649 totaldigits++;
604 } 650 }
605 if (!(a <= b)) 651 if (!(a <= b))
606 return -EINVAL; 652 return -EINVAL;
@@ -610,13 +656,52 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
610 set_bit(a, maskp); 656 set_bit(a, maskp);
611 a++; 657 a++;
612 } 658 }
613 if (*bp == ',') 659 } while (buflen && c == ',');
614 bp++;
615 } while (*bp != '\0' && *bp != '\n');
616 return 0; 660 return 0;
617} 661}
662
663int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
664{
665 char *nl = strchr(bp, '\n');
666 int len;
667
668 if (nl)
669 len = nl - bp;
670 else
671 len = strlen(bp);
672
673 return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
674}
618EXPORT_SYMBOL(bitmap_parselist); 675EXPORT_SYMBOL(bitmap_parselist);
619 676
677
678/**
679 * bitmap_parselist_user()
680 *
681 * @ubuf: pointer to user buffer containing string.
682 * @ulen: buffer size in bytes. If string is smaller than this
683 * then it must be terminated with a \0.
684 * @maskp: pointer to bitmap array that will contain result.
685 * @nmaskbits: size of bitmap, in bits.
686 *
687 * Wrapper for bitmap_parselist(), providing it with user buffer.
688 *
689 * We cannot have this as an inline function in bitmap.h because it needs
690 * linux/uaccess.h to get the access_ok() declaration and this causes
691 * cyclic dependencies.
692 */
693int bitmap_parselist_user(const char __user *ubuf,
694 unsigned int ulen, unsigned long *maskp,
695 int nmaskbits)
696{
697 if (!access_ok(VERIFY_READ, ubuf, ulen))
698 return -EFAULT;
699 return __bitmap_parselist((const char *)ubuf,
700 ulen, 1, maskp, nmaskbits);
701}
702EXPORT_SYMBOL(bitmap_parselist_user);
703
704
620/** 705/**
621 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap 706 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
622 * @buf: pointer to a bitmap 707 * @buf: pointer to a bitmap