aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2008-04-28 05:12:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:19 -0400
commit7ea931c9fc80c4d0a4306c30ec92eb0f1d922a0b (patch)
tree4f2c85d658c75e8679e01943e3e95866b3729b54
parentf5b087b52f1710eb0bf15a2d2b030c51a6a1ca9e (diff)
mempolicy: add bitmap_onto() and bitmap_fold() operations
The following adds two more bitmap operators, bitmap_onto() and bitmap_fold(), with the usual cpumask and nodemask wrappers. The bitmap_onto() operator computes one bitmap relative to another. If the n-th bit in the origin mask is set, then the m-th bit of the destination mask will be set, where m is the position of the n-th set bit in the relative mask. The bitmap_fold() operator folds a bitmap into a second that has bit m set iff the input bitmap has some bit n set, where m == n mod sz, for the specified sz value. There are two substantive changes between this patch and its predecessor bitmap_relative: 1) Renamed bitmap_relative() to be bitmap_onto(). 2) Added bitmap_fold(). The essential motivation for bitmap_onto() is to provide a mechanism for converting a cpuset-relative CPU or Node mask to an absolute mask. Cpuset relative masks are written as if the current task were in a cpuset whose CPUs or Nodes were just the consecutive ones numbered 0..N-1, for some N. The bitmap_onto() operator is provided in anticipation of adding support for the first such cpuset relative mask, by the mbind() and set_mempolicy() system calls, using a planned flag of MPOL_F_RELATIVE_NODES. These bitmap operators (and their nodemask wrappers, in particular) will be used in code that converts the user specified cpuset relative memory policy to a specific system node numbered policy, given the current mems_allowed of the tasks cpuset. Such cpuset relative mempolicies will address two deficiencies of the existing interface between cpusets and mempolicies: 1) A task cannot at present reliably establish a cpuset relative mempolicy because there is an essential race condition, in that the tasks cpuset may be changed in between the time the task can query its cpuset placement, and the time the task can issue the applicable mbind or set_memplicy system call. 2) A task cannot at present establish what cpuset relative mempolicy it would like to have, if it is in a smaller cpuset than it might have mempolicy preferences for, because the existing interface only allows specifying mempolicies for nodes currently allowed by the cpuset. Cpuset relative mempolicies are useful for tasks that don't distinguish particularly between one CPU or Node and another, but only between how many of each are allowed, and the proper placement of threads and memory pages on the various CPUs and Nodes available. The motivation for the added bitmap_fold() can be seen in the following example. Let's say an application has specified some mempolicies that presume 16 memory nodes, including say a mempolicy that specified MPOL_F_RELATIVE_NODES (cpuset relative) nodes 12-15. Then lets say that application is crammed into a cpuset that only has 8 memory nodes, 0-7. If one just uses bitmap_onto(), this mempolicy, mapped to that cpuset, would ignore the requested relative nodes above 7, leaving it empty of nodes. That's not good; better to fold the higher nodes down, so that some nodes are included in the resulting mapped mempolicy. In this case, the mempolicy nodes 12-15 are taken modulo 8 (the weight of the mems_allowed of the confining cpuset), resulting in a mempolicy specifying nodes 4-7. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: David Rientjes <rientjes@google.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@suse.de> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: <kosaki.motohiro@jp.fujitsu.com> Cc: <ray-lk@madrabbit.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/bitmap.h6
-rw-r--r--include/linux/cpumask.h22
-rw-r--r--include/linux/nodemask.h22
-rw-r--r--lib/bitmap.c158
4 files changed, 206 insertions, 2 deletions
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 1dbe074f1c64..43b406def35f 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -46,6 +46,8 @@
46 * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n 46 * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
47 * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) 47 * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
48 * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) 48 * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
49 * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
50 * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz
49 * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf 51 * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf
50 * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf 52 * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf
51 * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf 53 * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf
@@ -121,6 +123,10 @@ extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
121 const unsigned long *old, const unsigned long *new, int bits); 123 const unsigned long *old, const unsigned long *new, int bits);
122extern int bitmap_bitremap(int oldbit, 124extern int bitmap_bitremap(int oldbit,
123 const unsigned long *old, const unsigned long *new, int bits); 125 const unsigned long *old, const unsigned long *new, int bits);
126extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
127 const unsigned long *relmap, int bits);
128extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
129 int sz, int bits);
124extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); 130extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
125extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); 131extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
126extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); 132extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 259c8051155d..9650806fe2ea 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -14,6 +14,8 @@
14 * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. 14 * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
15 * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c 15 * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
16 * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. 16 * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
17 * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
18 * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
17 * 19 *
18 * The available cpumask operations are: 20 * The available cpumask operations are:
19 * 21 *
@@ -53,7 +55,9 @@
53 * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing 55 * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
54 * int cpulist_parse(buf, map) Parse ascii string as cpulist 56 * int cpulist_parse(buf, map) Parse ascii string as cpulist
55 * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) 57 * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit)
56 * int cpus_remap(dst, src, old, new) *dst = map(old, new)(src) 58 * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src)
59 * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap
60 * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz
57 * 61 *
58 * for_each_cpu_mask(cpu, mask) for-loop cpu over mask 62 * for_each_cpu_mask(cpu, mask) for-loop cpu over mask
59 * 63 *
@@ -330,6 +334,22 @@ static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
330 bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); 334 bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
331} 335}
332 336
337#define cpus_onto(dst, orig, relmap) \
338 __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS)
339static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp,
340 const cpumask_t *relmapp, int nbits)
341{
342 bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
343}
344
345#define cpus_fold(dst, orig, sz) \
346 __cpus_fold(&(dst), &(orig), sz, NR_CPUS)
347static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
348 int sz, int nbits)
349{
350 bitmap_fold(dstp->bits, origp->bits, sz, nbits);
351}
352
333#if NR_CPUS > 1 353#if NR_CPUS > 1
334#define for_each_cpu_mask(cpu, mask) \ 354#define for_each_cpu_mask(cpu, mask) \
335 for ((cpu) = first_cpu(mask); \ 355 for ((cpu) = first_cpu(mask); \
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 905e18f4b412..848025cd7087 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -14,6 +14,8 @@
14 * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. 14 * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
15 * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. 15 * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c.
16 * For details of nodes_remap(), see bitmap_remap in lib/bitmap.c. 16 * For details of nodes_remap(), see bitmap_remap in lib/bitmap.c.
17 * For details of nodes_onto(), see bitmap_onto in lib/bitmap.c.
18 * For details of nodes_fold(), see bitmap_fold in lib/bitmap.c.
17 * 19 *
18 * The available nodemask operations are: 20 * The available nodemask operations are:
19 * 21 *
@@ -55,7 +57,9 @@
55 * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing 57 * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing
56 * int nodelist_parse(buf, map) Parse ascii string as nodelist 58 * int nodelist_parse(buf, map) Parse ascii string as nodelist
57 * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) 59 * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit)
58 * int nodes_remap(dst, src, old, new) *dst = map(old, new)(dst) 60 * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src)
61 * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap
62 * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz
59 * 63 *
60 * for_each_node_mask(node, mask) for-loop node over mask 64 * for_each_node_mask(node, mask) for-loop node over mask
61 * 65 *
@@ -326,6 +330,22 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
326 bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); 330 bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
327} 331}
328 332
333#define nodes_onto(dst, orig, relmap) \
334 __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES)
335static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
336 const nodemask_t *relmapp, int nbits)
337{
338 bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
339}
340
341#define nodes_fold(dst, orig, sz) \
342 __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES)
343static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
344 int sz, int nbits)
345{
346 bitmap_fold(dstp->bits, origp->bits, sz, nbits);
347}
348
329#if MAX_NUMNODES > 1 349#if MAX_NUMNODES > 1
330#define for_each_node_mask(node, mask) \ 350#define for_each_node_mask(node, mask) \
331 for ((node) = first_node(mask); \ 351 for ((node) = first_node(mask); \
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a6939e18d7bb..c4cb48f77f0c 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -714,6 +714,164 @@ int bitmap_bitremap(int oldbit, const unsigned long *old,
714} 714}
715EXPORT_SYMBOL(bitmap_bitremap); 715EXPORT_SYMBOL(bitmap_bitremap);
716 716
717/**
718 * bitmap_onto - translate one bitmap relative to another
719 * @dst: resulting translated bitmap
720 * @orig: original untranslated bitmap
721 * @relmap: bitmap relative to which translated
722 * @bits: number of bits in each of these bitmaps
723 *
724 * Set the n-th bit of @dst iff there exists some m such that the
725 * n-th bit of @relmap is set, the m-th bit of @orig is set, and
726 * the n-th bit of @relmap is also the m-th _set_ bit of @relmap.
727 * (If you understood the previous sentence the first time your
728 * read it, you're overqualified for your current job.)
729 *
730 * In other words, @orig is mapped onto (surjectively) @dst,
731 * using the the map { <n, m> | the n-th bit of @relmap is the
732 * m-th set bit of @relmap }.
733 *
734 * Any set bits in @orig above bit number W, where W is the
735 * weight of (number of set bits in) @relmap are mapped nowhere.
736 * In particular, if for all bits m set in @orig, m >= W, then
737 * @dst will end up empty. In situations where the possibility
738 * of such an empty result is not desired, one way to avoid it is
739 * to use the bitmap_fold() operator, below, to first fold the
740 * @orig bitmap over itself so that all its set bits x are in the
741 * range 0 <= x < W. The bitmap_fold() operator does this by
742 * setting the bit (m % W) in @dst, for each bit (m) set in @orig.
743 *
744 * Example [1] for bitmap_onto():
745 * Let's say @relmap has bits 30-39 set, and @orig has bits
746 * 1, 3, 5, 7, 9 and 11 set. Then on return from this routine,
747 * @dst will have bits 31, 33, 35, 37 and 39 set.
748 *
749 * When bit 0 is set in @orig, it means turn on the bit in
750 * @dst corresponding to whatever is the first bit (if any)
751 * that is turned on in @relmap. Since bit 0 was off in the
752 * above example, we leave off that bit (bit 30) in @dst.
753 *
754 * When bit 1 is set in @orig (as in the above example), it
755 * means turn on the bit in @dst corresponding to whatever
756 * is the second bit that is turned on in @relmap. The second
757 * bit in @relmap that was turned on in the above example was
758 * bit 31, so we turned on bit 31 in @dst.
759 *
760 * Similarly, we turned on bits 33, 35, 37 and 39 in @dst,
761 * because they were the 4th, 6th, 8th and 10th set bits
762 * set in @relmap, and the 4th, 6th, 8th and 10th bits of
763 * @orig (i.e. bits 3, 5, 7 and 9) were also set.
764 *
765 * When bit 11 is set in @orig, it means turn on the bit in
766 * @dst corresponding to whatever is the twelth bit that is
767 * turned on in @relmap. In the above example, there were
768 * only ten bits turned on in @relmap (30..39), so that bit
769 * 11 was set in @orig had no affect on @dst.
770 *
771 * Example [2] for bitmap_fold() + bitmap_onto():
772 * Let's say @relmap has these ten bits set:
773 * 40 41 42 43 45 48 53 61 74 95
774 * (for the curious, that's 40 plus the first ten terms of the
775 * Fibonacci sequence.)
776 *
777 * Further lets say we use the following code, invoking
778 * bitmap_fold() then bitmap_onto, as suggested above to
779 * avoid the possitility of an empty @dst result:
780 *
781 * unsigned long *tmp; // a temporary bitmap's bits
782 *
783 * bitmap_fold(tmp, orig, bitmap_weight(relmap, bits), bits);
784 * bitmap_onto(dst, tmp, relmap, bits);
785 *
786 * Then this table shows what various values of @dst would be, for
787 * various @orig's. I list the zero-based positions of each set bit.
788 * The tmp column shows the intermediate result, as computed by
789 * using bitmap_fold() to fold the @orig bitmap modulo ten
790 * (the weight of @relmap).
791 *
792 * @orig tmp @dst
793 * 0 0 40
794 * 1 1 41
795 * 9 9 95
796 * 10 0 40 (*)
797 * 1 3 5 7 1 3 5 7 41 43 48 61
798 * 0 1 2 3 4 0 1 2 3 4 40 41 42 43 45
799 * 0 9 18 27 0 9 8 7 40 61 74 95
800 * 0 10 20 30 0 40
801 * 0 11 22 33 0 1 2 3 40 41 42 43
802 * 0 12 24 36 0 2 4 6 40 42 45 53
803 * 78 102 211 1 2 8 41 42 74 (*)
804 *
805 * (*) For these marked lines, if we hadn't first done bitmap_fold()
806 * into tmp, then the @dst result would have been empty.
807 *
808 * If either of @orig or @relmap is empty (no set bits), then @dst
809 * will be returned empty.
810 *
811 * If (as explained above) the only set bits in @orig are in positions
812 * m where m >= W, (where W is the weight of @relmap) then @dst will
813 * once again be returned empty.
814 *
815 * All bits in @dst not set by the above rule are cleared.
816 */
817void bitmap_onto(unsigned long *dst, const unsigned long *orig,
818 const unsigned long *relmap, int bits)
819{
820 int n, m; /* same meaning as in above comment */
821
822 if (dst == orig) /* following doesn't handle inplace mappings */
823 return;
824 bitmap_zero(dst, bits);
825
826 /*
827 * The following code is a more efficient, but less
828 * obvious, equivalent to the loop:
829 * for (m = 0; m < bitmap_weight(relmap, bits); m++) {
830 * n = bitmap_ord_to_pos(orig, m, bits);
831 * if (test_bit(m, orig))
832 * set_bit(n, dst);
833 * }
834 */
835
836 m = 0;
837 for (n = find_first_bit(relmap, bits);
838 n < bits;
839 n = find_next_bit(relmap, bits, n + 1)) {
840 /* m == bitmap_pos_to_ord(relmap, n, bits) */
841 if (test_bit(m, orig))
842 set_bit(n, dst);
843 m++;
844 }
845}
846EXPORT_SYMBOL(bitmap_onto);
847
848/**
849 * bitmap_fold - fold larger bitmap into smaller, modulo specified size
850 * @dst: resulting smaller bitmap
851 * @orig: original larger bitmap
852 * @sz: specified size
853 * @bits: number of bits in each of these bitmaps
854 *
855 * For each bit oldbit in @orig, set bit oldbit mod @sz in @dst.
856 * Clear all other bits in @dst. See further the comment and
857 * Example [2] for bitmap_onto() for why and how to use this.
858 */
859void bitmap_fold(unsigned long *dst, const unsigned long *orig,
860 int sz, int bits)
861{
862 int oldbit;
863
864 if (dst == orig) /* following doesn't handle inplace mappings */
865 return;
866 bitmap_zero(dst, bits);
867
868 for (oldbit = find_first_bit(orig, bits);
869 oldbit < bits;
870 oldbit = find_next_bit(orig, bits, oldbit + 1))
871 set_bit(oldbit % sz, dst);
872}
873EXPORT_SYMBOL(bitmap_fold);
874
717/* 875/*
718 * Common code for bitmap_*_region() routines. 876 * Common code for bitmap_*_region() routines.
719 * bitmap: array of unsigned longs corresponding to the bitmap 877 * bitmap: array of unsigned longs corresponding to the bitmap