aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:00:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:00:32 -0400
commite9dd2b6837e26fe202708cce5ea4bb4ee3e3482e (patch)
treef42fd892495bfc4cbb740d06b016d267c9c42d00 /block/blk-cgroup.c
parent4f3a29dadaf999a273f1e7fe2476595d0283eef3 (diff)
parentb4627321e18582dcbdeb45d77df29d3177107c65 (diff)
Merge branch 'for-2.6.37/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/core' of git://git.kernel.dk/linux-2.6-block: (39 commits) cfq-iosched: Fix a gcc 4.5 warning and put some comments block: Turn bvec_k{un,}map_irq() into static inline functions block: fix accounting bug on cross partition merges block: Make the integrity mapped property a bio flag block: Fix double free in blk_integrity_unregister block: Ensure physical block size is unsigned int blkio-throttle: Fix possible multiplication overflow in iops calculations blkio-throttle: limit max iops value to UINT_MAX blkio-throttle: There is no need to convert jiffies to milli seconds blkio-throttle: Fix link failure failure on i386 blkio: Recalculate the throttled bio dispatch time upon throttle limit change blkio: Add root group to td->tg_list blkio: deletion of a cgroup was causes oops blkio: Do not export throttle files if CONFIG_BLK_DEV_THROTTLING=n block: set the bounce_pfn to the actual DMA limit rather than to max memory block: revert bad fix for memory hotplug causing bounces Fix compile error in blk-exec.c for !CONFIG_DETECT_HUNG_TASK block: set the bounce_pfn to the actual DMA limit rather than to max memory block: Prevent hang_check firing during long I/O cfq: improve fsync performance for small files ... Fix up trivial conflicts due to __rcu sparse annotation in include/linux/genhd.h
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c804
1 files changed, 646 insertions, 158 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2fef1ef931a0..b1febd0f6d2a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -37,6 +37,12 @@ static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
37static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); 37static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
38static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); 38static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
39 39
40/* for encoding cft->private value on file */
41#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val))
42/* What policy owns the file, proportional or throttle */
43#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff)
44#define BLKIOFILE_ATTR(val) ((val) & 0xffff)
45
40struct cgroup_subsys blkio_subsys = { 46struct cgroup_subsys blkio_subsys = {
41 .name = "blkio", 47 .name = "blkio",
42 .create = blkiocg_create, 48 .create = blkiocg_create,
@@ -59,6 +65,27 @@ static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
59 list_add(&pn->node, &blkcg->policy_list); 65 list_add(&pn->node, &blkcg->policy_list);
60} 66}
61 67
68static inline bool cftype_blkg_same_policy(struct cftype *cft,
69 struct blkio_group *blkg)
70{
71 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
72
73 if (blkg->plid == plid)
74 return 1;
75
76 return 0;
77}
78
79/* Determines if policy node matches cgroup file being accessed */
80static inline bool pn_matches_cftype(struct cftype *cft,
81 struct blkio_policy_node *pn)
82{
83 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
84 int fileid = BLKIOFILE_ATTR(cft->private);
85
86 return (plid == pn->plid && fileid == pn->fileid);
87}
88
62/* Must be called with blkcg->lock held */ 89/* Must be called with blkcg->lock held */
63static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) 90static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
64{ 91{
@@ -67,12 +94,13 @@ static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
67 94
68/* Must be called with blkcg->lock held */ 95/* Must be called with blkcg->lock held */
69static struct blkio_policy_node * 96static struct blkio_policy_node *
70blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev) 97blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
98 enum blkio_policy_id plid, int fileid)
71{ 99{
72 struct blkio_policy_node *pn; 100 struct blkio_policy_node *pn;
73 101
74 list_for_each_entry(pn, &blkcg->policy_list, node) { 102 list_for_each_entry(pn, &blkcg->policy_list, node) {
75 if (pn->dev == dev) 103 if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
76 return pn; 104 return pn;
77 } 105 }
78 106
@@ -86,6 +114,67 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
86} 114}
87EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); 115EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
88 116
117static inline void
118blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
119{
120 struct blkio_policy_type *blkiop;
121
122 list_for_each_entry(blkiop, &blkio_list, list) {
123 /* If this policy does not own the blkg, do not send updates */
124 if (blkiop->plid != blkg->plid)
125 continue;
126 if (blkiop->ops.blkio_update_group_weight_fn)
127 blkiop->ops.blkio_update_group_weight_fn(blkg->key,
128 blkg, weight);
129 }
130}
131
132static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
133 int fileid)
134{
135 struct blkio_policy_type *blkiop;
136
137 list_for_each_entry(blkiop, &blkio_list, list) {
138
139 /* If this policy does not own the blkg, do not send updates */
140 if (blkiop->plid != blkg->plid)
141 continue;
142
143 if (fileid == BLKIO_THROTL_read_bps_device
144 && blkiop->ops.blkio_update_group_read_bps_fn)
145 blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
146 blkg, bps);
147
148 if (fileid == BLKIO_THROTL_write_bps_device
149 && blkiop->ops.blkio_update_group_write_bps_fn)
150 blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
151 blkg, bps);
152 }
153}
154
155static inline void blkio_update_group_iops(struct blkio_group *blkg,
156 unsigned int iops, int fileid)
157{
158 struct blkio_policy_type *blkiop;
159
160 list_for_each_entry(blkiop, &blkio_list, list) {
161
162 /* If this policy does not own the blkg, do not send updates */
163 if (blkiop->plid != blkg->plid)
164 continue;
165
166 if (fileid == BLKIO_THROTL_read_iops_device
167 && blkiop->ops.blkio_update_group_read_iops_fn)
168 blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
169 blkg, iops);
170
171 if (fileid == BLKIO_THROTL_write_iops_device
172 && blkiop->ops.blkio_update_group_write_iops_fn)
173 blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
174 blkg,iops);
175 }
176}
177
89/* 178/*
90 * Add to the appropriate stat variable depending on the request type. 179 * Add to the appropriate stat variable depending on the request type.
91 * This should be called with the blkg->stats_lock held. 180 * This should be called with the blkg->stats_lock held.
@@ -341,7 +430,8 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
341EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); 430EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
342 431
343void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 432void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
344 struct blkio_group *blkg, void *key, dev_t dev) 433 struct blkio_group *blkg, void *key, dev_t dev,
434 enum blkio_policy_id plid)
345{ 435{
346 unsigned long flags; 436 unsigned long flags;
347 437
@@ -350,6 +440,7 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
350 rcu_assign_pointer(blkg->key, key); 440 rcu_assign_pointer(blkg->key, key);
351 blkg->blkcg_id = css_id(&blkcg->css); 441 blkg->blkcg_id = css_id(&blkcg->css);
352 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 442 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
443 blkg->plid = plid;
353 spin_unlock_irqrestore(&blkcg->lock, flags); 444 spin_unlock_irqrestore(&blkcg->lock, flags);
354 /* Need to take css reference ? */ 445 /* Need to take css reference ? */
355 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 446 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
@@ -408,51 +499,6 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
408} 499}
409EXPORT_SYMBOL_GPL(blkiocg_lookup_group); 500EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
410 501
411#define SHOW_FUNCTION(__VAR) \
412static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \
413 struct cftype *cftype) \
414{ \
415 struct blkio_cgroup *blkcg; \
416 \
417 blkcg = cgroup_to_blkio_cgroup(cgroup); \
418 return (u64)blkcg->__VAR; \
419}
420
421SHOW_FUNCTION(weight);
422#undef SHOW_FUNCTION
423
424static int
425blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
426{
427 struct blkio_cgroup *blkcg;
428 struct blkio_group *blkg;
429 struct hlist_node *n;
430 struct blkio_policy_type *blkiop;
431 struct blkio_policy_node *pn;
432
433 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
434 return -EINVAL;
435
436 blkcg = cgroup_to_blkio_cgroup(cgroup);
437 spin_lock(&blkio_list_lock);
438 spin_lock_irq(&blkcg->lock);
439 blkcg->weight = (unsigned int)val;
440
441 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
442 pn = blkio_policy_search_node(blkcg, blkg->dev);
443
444 if (pn)
445 continue;
446
447 list_for_each_entry(blkiop, &blkio_list, list)
448 blkiop->ops.blkio_update_group_weight_fn(blkg,
449 blkcg->weight);
450 }
451 spin_unlock_irq(&blkcg->lock);
452 spin_unlock(&blkio_list_lock);
453 return 0;
454}
455
456static int 502static int
457blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) 503blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
458{ 504{
@@ -593,52 +639,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
593 return disk_total; 639 return disk_total;
594} 640}
595 641
596#define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \
597static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
598 struct cftype *cftype, struct cgroup_map_cb *cb) \
599{ \
600 struct blkio_cgroup *blkcg; \
601 struct blkio_group *blkg; \
602 struct hlist_node *n; \
603 uint64_t cgroup_total = 0; \
604 \
605 if (!cgroup_lock_live_group(cgroup)) \
606 return -ENODEV; \
607 \
608 blkcg = cgroup_to_blkio_cgroup(cgroup); \
609 rcu_read_lock(); \
610 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
611 if (blkg->dev) { \
612 spin_lock_irq(&blkg->stats_lock); \
613 cgroup_total += blkio_get_stat(blkg, cb, \
614 blkg->dev, type); \
615 spin_unlock_irq(&blkg->stats_lock); \
616 } \
617 } \
618 if (show_total) \
619 cb->fill(cb, "Total", cgroup_total); \
620 rcu_read_unlock(); \
621 cgroup_unlock(); \
622 return 0; \
623}
624
625SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0);
626SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0);
627SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1);
628SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1);
629SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1);
630SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1);
631SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1);
632SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1);
633#ifdef CONFIG_DEBUG_BLK_CGROUP
634SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0);
635SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0);
636SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0);
637SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0);
638SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0);
639#endif
640#undef SHOW_FUNCTION_PER_GROUP
641
642static int blkio_check_dev_num(dev_t dev) 642static int blkio_check_dev_num(dev_t dev)
643{ 643{
644 int part = 0; 644 int part = 0;
@@ -652,13 +652,14 @@ static int blkio_check_dev_num(dev_t dev)
652} 652}
653 653
654static int blkio_policy_parse_and_set(char *buf, 654static int blkio_policy_parse_and_set(char *buf,
655 struct blkio_policy_node *newpn) 655 struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
656{ 656{
657 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 657 char *s[4], *p, *major_s = NULL, *minor_s = NULL;
658 int ret; 658 int ret;
659 unsigned long major, minor, temp; 659 unsigned long major, minor, temp;
660 int i = 0; 660 int i = 0;
661 dev_t dev; 661 dev_t dev;
662 u64 bps, iops;
662 663
663 memset(s, 0, sizeof(s)); 664 memset(s, 0, sizeof(s));
664 665
@@ -705,12 +706,47 @@ static int blkio_policy_parse_and_set(char *buf,
705 if (s[1] == NULL) 706 if (s[1] == NULL)
706 return -EINVAL; 707 return -EINVAL;
707 708
708 ret = strict_strtoul(s[1], 10, &temp); 709 switch (plid) {
709 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || 710 case BLKIO_POLICY_PROP:
710 temp > BLKIO_WEIGHT_MAX) 711 ret = strict_strtoul(s[1], 10, &temp);
711 return -EINVAL; 712 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
713 temp > BLKIO_WEIGHT_MAX)
714 return -EINVAL;
712 715
713 newpn->weight = temp; 716 newpn->plid = plid;
717 newpn->fileid = fileid;
718 newpn->val.weight = temp;
719 break;
720 case BLKIO_POLICY_THROTL:
721 switch(fileid) {
722 case BLKIO_THROTL_read_bps_device:
723 case BLKIO_THROTL_write_bps_device:
724 ret = strict_strtoull(s[1], 10, &bps);
725 if (ret)
726 return -EINVAL;
727
728 newpn->plid = plid;
729 newpn->fileid = fileid;
730 newpn->val.bps = bps;
731 break;
732 case BLKIO_THROTL_read_iops_device:
733 case BLKIO_THROTL_write_iops_device:
734 ret = strict_strtoull(s[1], 10, &iops);
735 if (ret)
736 return -EINVAL;
737
738 if (iops > THROTL_IOPS_MAX)
739 return -EINVAL;
740
741 newpn->plid = plid;
742 newpn->fileid = fileid;
743 newpn->val.iops = (unsigned int)iops;
744 break;
745 }
746 break;
747 default:
748 BUG();
749 }
714 750
715 return 0; 751 return 0;
716} 752}
@@ -720,26 +756,180 @@ unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
720{ 756{
721 struct blkio_policy_node *pn; 757 struct blkio_policy_node *pn;
722 758
723 pn = blkio_policy_search_node(blkcg, dev); 759 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
760 BLKIO_PROP_weight_device);
724 if (pn) 761 if (pn)
725 return pn->weight; 762 return pn->val.weight;
726 else 763 else
727 return blkcg->weight; 764 return blkcg->weight;
728} 765}
729EXPORT_SYMBOL_GPL(blkcg_get_weight); 766EXPORT_SYMBOL_GPL(blkcg_get_weight);
730 767
768uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
769{
770 struct blkio_policy_node *pn;
771
772 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
773 BLKIO_THROTL_read_bps_device);
774 if (pn)
775 return pn->val.bps;
776 else
777 return -1;
778}
779
780uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
781{
782 struct blkio_policy_node *pn;
783 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
784 BLKIO_THROTL_write_bps_device);
785 if (pn)
786 return pn->val.bps;
787 else
788 return -1;
789}
790
791unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
792{
793 struct blkio_policy_node *pn;
794
795 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
796 BLKIO_THROTL_read_iops_device);
797 if (pn)
798 return pn->val.iops;
799 else
800 return -1;
801}
802
803unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
804{
805 struct blkio_policy_node *pn;
806 pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
807 BLKIO_THROTL_write_iops_device);
808 if (pn)
809 return pn->val.iops;
810 else
811 return -1;
812}
813
814/* Checks whether user asked for deleting a policy rule */
815static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
816{
817 switch(pn->plid) {
818 case BLKIO_POLICY_PROP:
819 if (pn->val.weight == 0)
820 return 1;
821 break;
822 case BLKIO_POLICY_THROTL:
823 switch(pn->fileid) {
824 case BLKIO_THROTL_read_bps_device:
825 case BLKIO_THROTL_write_bps_device:
826 if (pn->val.bps == 0)
827 return 1;
828 break;
829 case BLKIO_THROTL_read_iops_device:
830 case BLKIO_THROTL_write_iops_device:
831 if (pn->val.iops == 0)
832 return 1;
833 }
834 break;
835 default:
836 BUG();
837 }
838
839 return 0;
840}
841
842static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
843 struct blkio_policy_node *newpn)
844{
845 switch(oldpn->plid) {
846 case BLKIO_POLICY_PROP:
847 oldpn->val.weight = newpn->val.weight;
848 break;
849 case BLKIO_POLICY_THROTL:
850 switch(newpn->fileid) {
851 case BLKIO_THROTL_read_bps_device:
852 case BLKIO_THROTL_write_bps_device:
853 oldpn->val.bps = newpn->val.bps;
854 break;
855 case BLKIO_THROTL_read_iops_device:
856 case BLKIO_THROTL_write_iops_device:
857 oldpn->val.iops = newpn->val.iops;
858 }
859 break;
860 default:
861 BUG();
862 }
863}
864
865/*
866 * Some rules/values in blkg have changed. Propogate those to respective
867 * policies.
868 */
869static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
870 struct blkio_group *blkg, struct blkio_policy_node *pn)
871{
872 unsigned int weight, iops;
873 u64 bps;
874
875 switch(pn->plid) {
876 case BLKIO_POLICY_PROP:
877 weight = pn->val.weight ? pn->val.weight :
878 blkcg->weight;
879 blkio_update_group_weight(blkg, weight);
880 break;
881 case BLKIO_POLICY_THROTL:
882 switch(pn->fileid) {
883 case BLKIO_THROTL_read_bps_device:
884 case BLKIO_THROTL_write_bps_device:
885 bps = pn->val.bps ? pn->val.bps : (-1);
886 blkio_update_group_bps(blkg, bps, pn->fileid);
887 break;
888 case BLKIO_THROTL_read_iops_device:
889 case BLKIO_THROTL_write_iops_device:
890 iops = pn->val.iops ? pn->val.iops : (-1);
891 blkio_update_group_iops(blkg, iops, pn->fileid);
892 break;
893 }
894 break;
895 default:
896 BUG();
897 }
898}
899
900/*
901 * A policy node rule has been updated. Propogate this update to all the
902 * block groups which might be affected by this update.
903 */
904static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
905 struct blkio_policy_node *pn)
906{
907 struct blkio_group *blkg;
908 struct hlist_node *n;
909
910 spin_lock(&blkio_list_lock);
911 spin_lock_irq(&blkcg->lock);
912
913 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
914 if (pn->dev != blkg->dev || pn->plid != blkg->plid)
915 continue;
916 blkio_update_blkg_policy(blkcg, blkg, pn);
917 }
918
919 spin_unlock_irq(&blkcg->lock);
920 spin_unlock(&blkio_list_lock);
921}
731 922
732static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, 923static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
733 const char *buffer) 924 const char *buffer)
734{ 925{
735 int ret = 0; 926 int ret = 0;
736 char *buf; 927 char *buf;
737 struct blkio_policy_node *newpn, *pn; 928 struct blkio_policy_node *newpn, *pn;
738 struct blkio_cgroup *blkcg; 929 struct blkio_cgroup *blkcg;
739 struct blkio_group *blkg;
740 int keep_newpn = 0; 930 int keep_newpn = 0;
741 struct hlist_node *n; 931 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
742 struct blkio_policy_type *blkiop; 932 int fileid = BLKIOFILE_ATTR(cft->private);
743 933
744 buf = kstrdup(buffer, GFP_KERNEL); 934 buf = kstrdup(buffer, GFP_KERNEL);
745 if (!buf) 935 if (!buf)
@@ -751,7 +941,7 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
751 goto free_buf; 941 goto free_buf;
752 } 942 }
753 943
754 ret = blkio_policy_parse_and_set(buf, newpn); 944 ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
755 if (ret) 945 if (ret)
756 goto free_newpn; 946 goto free_newpn;
757 947
@@ -759,9 +949,9 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
759 949
760 spin_lock_irq(&blkcg->lock); 950 spin_lock_irq(&blkcg->lock);
761 951
762 pn = blkio_policy_search_node(blkcg, newpn->dev); 952 pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
763 if (!pn) { 953 if (!pn) {
764 if (newpn->weight != 0) { 954 if (!blkio_delete_rule_command(newpn)) {
765 blkio_policy_insert_node(blkcg, newpn); 955 blkio_policy_insert_node(blkcg, newpn);
766 keep_newpn = 1; 956 keep_newpn = 1;
767 } 957 }
@@ -769,33 +959,17 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
769 goto update_io_group; 959 goto update_io_group;
770 } 960 }
771 961
772 if (newpn->weight == 0) { 962 if (blkio_delete_rule_command(newpn)) {
773 /* weight == 0 means deleteing a specific weight */
774 blkio_policy_delete_node(pn); 963 blkio_policy_delete_node(pn);
775 spin_unlock_irq(&blkcg->lock); 964 spin_unlock_irq(&blkcg->lock);
776 goto update_io_group; 965 goto update_io_group;
777 } 966 }
778 spin_unlock_irq(&blkcg->lock); 967 spin_unlock_irq(&blkcg->lock);
779 968
780 pn->weight = newpn->weight; 969 blkio_update_policy_rule(pn, newpn);
781 970
782update_io_group: 971update_io_group:
783 /* update weight for each cfqg */ 972 blkio_update_policy_node_blkg(blkcg, newpn);
784 spin_lock(&blkio_list_lock);
785 spin_lock_irq(&blkcg->lock);
786
787 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
788 if (newpn->dev == blkg->dev) {
789 list_for_each_entry(blkiop, &blkio_list, list)
790 blkiop->ops.blkio_update_group_weight_fn(blkg,
791 newpn->weight ?
792 newpn->weight :
793 blkcg->weight);
794 }
795 }
796
797 spin_unlock_irq(&blkcg->lock);
798 spin_unlock(&blkio_list_lock);
799 973
800free_newpn: 974free_newpn:
801 if (!keep_newpn) 975 if (!keep_newpn)
@@ -805,23 +979,256 @@ free_buf:
805 return ret; 979 return ret;
806} 980}
807 981
808static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, 982static void
809 struct seq_file *m) 983blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
810{ 984{
811 struct blkio_cgroup *blkcg; 985 switch(pn->plid) {
812 struct blkio_policy_node *pn; 986 case BLKIO_POLICY_PROP:
987 if (pn->fileid == BLKIO_PROP_weight_device)
988 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
989 MINOR(pn->dev), pn->val.weight);
990 break;
991 case BLKIO_POLICY_THROTL:
992 switch(pn->fileid) {
993 case BLKIO_THROTL_read_bps_device:
994 case BLKIO_THROTL_write_bps_device:
995 seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
996 MINOR(pn->dev), pn->val.bps);
997 break;
998 case BLKIO_THROTL_read_iops_device:
999 case BLKIO_THROTL_write_iops_device:
1000 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
1001 MINOR(pn->dev), pn->val.iops);
1002 break;
1003 }
1004 break;
1005 default:
1006 BUG();
1007 }
1008}
813 1009
814 seq_printf(m, "dev\tweight\n"); 1010/* cgroup files which read their data from policy nodes end up here */
1011static void blkio_read_policy_node_files(struct cftype *cft,
1012 struct blkio_cgroup *blkcg, struct seq_file *m)
1013{
1014 struct blkio_policy_node *pn;
815 1015
816 blkcg = cgroup_to_blkio_cgroup(cgrp);
817 if (!list_empty(&blkcg->policy_list)) { 1016 if (!list_empty(&blkcg->policy_list)) {
818 spin_lock_irq(&blkcg->lock); 1017 spin_lock_irq(&blkcg->lock);
819 list_for_each_entry(pn, &blkcg->policy_list, node) { 1018 list_for_each_entry(pn, &blkcg->policy_list, node) {
820 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), 1019 if (!pn_matches_cftype(cft, pn))
821 MINOR(pn->dev), pn->weight); 1020 continue;
1021 blkio_print_policy_node(m, pn);
822 } 1022 }
823 spin_unlock_irq(&blkcg->lock); 1023 spin_unlock_irq(&blkcg->lock);
824 } 1024 }
1025}
1026
1027static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1028 struct seq_file *m)
1029{
1030 struct blkio_cgroup *blkcg;
1031 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1032 int name = BLKIOFILE_ATTR(cft->private);
1033
1034 blkcg = cgroup_to_blkio_cgroup(cgrp);
1035
1036 switch(plid) {
1037 case BLKIO_POLICY_PROP:
1038 switch(name) {
1039 case BLKIO_PROP_weight_device:
1040 blkio_read_policy_node_files(cft, blkcg, m);
1041 return 0;
1042 default:
1043 BUG();
1044 }
1045 break;
1046 case BLKIO_POLICY_THROTL:
1047 switch(name){
1048 case BLKIO_THROTL_read_bps_device:
1049 case BLKIO_THROTL_write_bps_device:
1050 case BLKIO_THROTL_read_iops_device:
1051 case BLKIO_THROTL_write_iops_device:
1052 blkio_read_policy_node_files(cft, blkcg, m);
1053 return 0;
1054 default:
1055 BUG();
1056 }
1057 break;
1058 default:
1059 BUG();
1060 }
1061
1062 return 0;
1063}
1064
1065static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1066 struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
1067 bool show_total)
1068{
1069 struct blkio_group *blkg;
1070 struct hlist_node *n;
1071 uint64_t cgroup_total = 0;
1072
1073 rcu_read_lock();
1074 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
1075 if (blkg->dev) {
1076 if (!cftype_blkg_same_policy(cft, blkg))
1077 continue;
1078 spin_lock_irq(&blkg->stats_lock);
1079 cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
1080 type);
1081 spin_unlock_irq(&blkg->stats_lock);
1082 }
1083 }
1084 if (show_total)
1085 cb->fill(cb, "Total", cgroup_total);
1086 rcu_read_unlock();
1087 return 0;
1088}
1089
1090/* All map kind of cgroup file get serviced by this function */
1091static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1092 struct cgroup_map_cb *cb)
1093{
1094 struct blkio_cgroup *blkcg;
1095 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1096 int name = BLKIOFILE_ATTR(cft->private);
1097
1098 blkcg = cgroup_to_blkio_cgroup(cgrp);
1099
1100 switch(plid) {
1101 case BLKIO_POLICY_PROP:
1102 switch(name) {
1103 case BLKIO_PROP_time:
1104 return blkio_read_blkg_stats(blkcg, cft, cb,
1105 BLKIO_STAT_TIME, 0);
1106 case BLKIO_PROP_sectors:
1107 return blkio_read_blkg_stats(blkcg, cft, cb,
1108 BLKIO_STAT_SECTORS, 0);
1109 case BLKIO_PROP_io_service_bytes:
1110 return blkio_read_blkg_stats(blkcg, cft, cb,
1111 BLKIO_STAT_SERVICE_BYTES, 1);
1112 case BLKIO_PROP_io_serviced:
1113 return blkio_read_blkg_stats(blkcg, cft, cb,
1114 BLKIO_STAT_SERVICED, 1);
1115 case BLKIO_PROP_io_service_time:
1116 return blkio_read_blkg_stats(blkcg, cft, cb,
1117 BLKIO_STAT_SERVICE_TIME, 1);
1118 case BLKIO_PROP_io_wait_time:
1119 return blkio_read_blkg_stats(blkcg, cft, cb,
1120 BLKIO_STAT_WAIT_TIME, 1);
1121 case BLKIO_PROP_io_merged:
1122 return blkio_read_blkg_stats(blkcg, cft, cb,
1123 BLKIO_STAT_MERGED, 1);
1124 case BLKIO_PROP_io_queued:
1125 return blkio_read_blkg_stats(blkcg, cft, cb,
1126 BLKIO_STAT_QUEUED, 1);
1127#ifdef CONFIG_DEBUG_BLK_CGROUP
1128 case BLKIO_PROP_dequeue:
1129 return blkio_read_blkg_stats(blkcg, cft, cb,
1130 BLKIO_STAT_DEQUEUE, 0);
1131 case BLKIO_PROP_avg_queue_size:
1132 return blkio_read_blkg_stats(blkcg, cft, cb,
1133 BLKIO_STAT_AVG_QUEUE_SIZE, 0);
1134 case BLKIO_PROP_group_wait_time:
1135 return blkio_read_blkg_stats(blkcg, cft, cb,
1136 BLKIO_STAT_GROUP_WAIT_TIME, 0);
1137 case BLKIO_PROP_idle_time:
1138 return blkio_read_blkg_stats(blkcg, cft, cb,
1139 BLKIO_STAT_IDLE_TIME, 0);
1140 case BLKIO_PROP_empty_time:
1141 return blkio_read_blkg_stats(blkcg, cft, cb,
1142 BLKIO_STAT_EMPTY_TIME, 0);
1143#endif
1144 default:
1145 BUG();
1146 }
1147 break;
1148 case BLKIO_POLICY_THROTL:
1149 switch(name){
1150 case BLKIO_THROTL_io_service_bytes:
1151 return blkio_read_blkg_stats(blkcg, cft, cb,
1152 BLKIO_STAT_SERVICE_BYTES, 1);
1153 case BLKIO_THROTL_io_serviced:
1154 return blkio_read_blkg_stats(blkcg, cft, cb,
1155 BLKIO_STAT_SERVICED, 1);
1156 default:
1157 BUG();
1158 }
1159 break;
1160 default:
1161 BUG();
1162 }
1163
1164 return 0;
1165}
1166
1167static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
1168{
1169 struct blkio_group *blkg;
1170 struct hlist_node *n;
1171 struct blkio_policy_node *pn;
1172
1173 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1174 return -EINVAL;
1175
1176 spin_lock(&blkio_list_lock);
1177 spin_lock_irq(&blkcg->lock);
1178 blkcg->weight = (unsigned int)val;
1179
1180 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1181 pn = blkio_policy_search_node(blkcg, blkg->dev,
1182 BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
1183 if (pn)
1184 continue;
1185
1186 blkio_update_group_weight(blkg, blkcg->weight);
1187 }
1188 spin_unlock_irq(&blkcg->lock);
1189 spin_unlock(&blkio_list_lock);
1190 return 0;
1191}
1192
1193static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1194 struct blkio_cgroup *blkcg;
1195 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1196 int name = BLKIOFILE_ATTR(cft->private);
1197
1198 blkcg = cgroup_to_blkio_cgroup(cgrp);
1199
1200 switch(plid) {
1201 case BLKIO_POLICY_PROP:
1202 switch(name) {
1203 case BLKIO_PROP_weight:
1204 return (u64)blkcg->weight;
1205 }
1206 break;
1207 default:
1208 BUG();
1209 }
1210 return 0;
1211}
1212
1213static int
1214blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1215{
1216 struct blkio_cgroup *blkcg;
1217 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1218 int name = BLKIOFILE_ATTR(cft->private);
1219
1220 blkcg = cgroup_to_blkio_cgroup(cgrp);
1221
1222 switch(plid) {
1223 case BLKIO_POLICY_PROP:
1224 switch(name) {
1225 case BLKIO_PROP_weight:
1226 return blkio_weight_write(blkcg, val);
1227 }
1228 break;
1229 default:
1230 BUG();
1231 }
825 1232
826 return 0; 1233 return 0;
827} 1234}
@@ -829,71 +1236,151 @@ static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
829struct cftype blkio_files[] = { 1236struct cftype blkio_files[] = {
830 { 1237 {
831 .name = "weight_device", 1238 .name = "weight_device",
832 .read_seq_string = blkiocg_weight_device_read, 1239 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
833 .write_string = blkiocg_weight_device_write, 1240 BLKIO_PROP_weight_device),
1241 .read_seq_string = blkiocg_file_read,
1242 .write_string = blkiocg_file_write,
834 .max_write_len = 256, 1243 .max_write_len = 256,
835 }, 1244 },
836 { 1245 {
837 .name = "weight", 1246 .name = "weight",
838 .read_u64 = blkiocg_weight_read, 1247 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
839 .write_u64 = blkiocg_weight_write, 1248 BLKIO_PROP_weight),
1249 .read_u64 = blkiocg_file_read_u64,
1250 .write_u64 = blkiocg_file_write_u64,
840 }, 1251 },
841 { 1252 {
842 .name = "time", 1253 .name = "time",
843 .read_map = blkiocg_time_read, 1254 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1255 BLKIO_PROP_time),
1256 .read_map = blkiocg_file_read_map,
844 }, 1257 },
845 { 1258 {
846 .name = "sectors", 1259 .name = "sectors",
847 .read_map = blkiocg_sectors_read, 1260 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1261 BLKIO_PROP_sectors),
1262 .read_map = blkiocg_file_read_map,
848 }, 1263 },
849 { 1264 {
850 .name = "io_service_bytes", 1265 .name = "io_service_bytes",
851 .read_map = blkiocg_io_service_bytes_read, 1266 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1267 BLKIO_PROP_io_service_bytes),
1268 .read_map = blkiocg_file_read_map,
852 }, 1269 },
853 { 1270 {
854 .name = "io_serviced", 1271 .name = "io_serviced",
855 .read_map = blkiocg_io_serviced_read, 1272 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1273 BLKIO_PROP_io_serviced),
1274 .read_map = blkiocg_file_read_map,
856 }, 1275 },
857 { 1276 {
858 .name = "io_service_time", 1277 .name = "io_service_time",
859 .read_map = blkiocg_io_service_time_read, 1278 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1279 BLKIO_PROP_io_service_time),
1280 .read_map = blkiocg_file_read_map,
860 }, 1281 },
861 { 1282 {
862 .name = "io_wait_time", 1283 .name = "io_wait_time",
863 .read_map = blkiocg_io_wait_time_read, 1284 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1285 BLKIO_PROP_io_wait_time),
1286 .read_map = blkiocg_file_read_map,
864 }, 1287 },
865 { 1288 {
866 .name = "io_merged", 1289 .name = "io_merged",
867 .read_map = blkiocg_io_merged_read, 1290 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1291 BLKIO_PROP_io_merged),
1292 .read_map = blkiocg_file_read_map,
868 }, 1293 },
869 { 1294 {
870 .name = "io_queued", 1295 .name = "io_queued",
871 .read_map = blkiocg_io_queued_read, 1296 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1297 BLKIO_PROP_io_queued),
1298 .read_map = blkiocg_file_read_map,
872 }, 1299 },
873 { 1300 {
874 .name = "reset_stats", 1301 .name = "reset_stats",
875 .write_u64 = blkiocg_reset_stats, 1302 .write_u64 = blkiocg_reset_stats,
876 }, 1303 },
1304#ifdef CONFIG_BLK_DEV_THROTTLING
1305 {
1306 .name = "throttle.read_bps_device",
1307 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1308 BLKIO_THROTL_read_bps_device),
1309 .read_seq_string = blkiocg_file_read,
1310 .write_string = blkiocg_file_write,
1311 .max_write_len = 256,
1312 },
1313
1314 {
1315 .name = "throttle.write_bps_device",
1316 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1317 BLKIO_THROTL_write_bps_device),
1318 .read_seq_string = blkiocg_file_read,
1319 .write_string = blkiocg_file_write,
1320 .max_write_len = 256,
1321 },
1322
1323 {
1324 .name = "throttle.read_iops_device",
1325 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1326 BLKIO_THROTL_read_iops_device),
1327 .read_seq_string = blkiocg_file_read,
1328 .write_string = blkiocg_file_write,
1329 .max_write_len = 256,
1330 },
1331
1332 {
1333 .name = "throttle.write_iops_device",
1334 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1335 BLKIO_THROTL_write_iops_device),
1336 .read_seq_string = blkiocg_file_read,
1337 .write_string = blkiocg_file_write,
1338 .max_write_len = 256,
1339 },
1340 {
1341 .name = "throttle.io_service_bytes",
1342 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1343 BLKIO_THROTL_io_service_bytes),
1344 .read_map = blkiocg_file_read_map,
1345 },
1346 {
1347 .name = "throttle.io_serviced",
1348 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1349 BLKIO_THROTL_io_serviced),
1350 .read_map = blkiocg_file_read_map,
1351 },
1352#endif /* CONFIG_BLK_DEV_THROTTLING */
1353
877#ifdef CONFIG_DEBUG_BLK_CGROUP 1354#ifdef CONFIG_DEBUG_BLK_CGROUP
878 { 1355 {
879 .name = "avg_queue_size", 1356 .name = "avg_queue_size",
880 .read_map = blkiocg_avg_queue_size_read, 1357 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1358 BLKIO_PROP_avg_queue_size),
1359 .read_map = blkiocg_file_read_map,
881 }, 1360 },
882 { 1361 {
883 .name = "group_wait_time", 1362 .name = "group_wait_time",
884 .read_map = blkiocg_group_wait_time_read, 1363 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1364 BLKIO_PROP_group_wait_time),
1365 .read_map = blkiocg_file_read_map,
885 }, 1366 },
886 { 1367 {
887 .name = "idle_time", 1368 .name = "idle_time",
888 .read_map = blkiocg_idle_time_read, 1369 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1370 BLKIO_PROP_idle_time),
1371 .read_map = blkiocg_file_read_map,
889 }, 1372 },
890 { 1373 {
891 .name = "empty_time", 1374 .name = "empty_time",
892 .read_map = blkiocg_empty_time_read, 1375 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1376 BLKIO_PROP_empty_time),
1377 .read_map = blkiocg_file_read_map,
893 }, 1378 },
894 { 1379 {
895 .name = "dequeue", 1380 .name = "dequeue",
896 .read_map = blkiocg_dequeue_read, 1381 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1382 BLKIO_PROP_dequeue),
1383 .read_map = blkiocg_file_read_map,
897 }, 1384 },
898#endif 1385#endif
899}; 1386};
@@ -932,13 +1419,14 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
932 /* 1419 /*
933 * This blkio_group is being unlinked as associated cgroup is 1420 * This blkio_group is being unlinked as associated cgroup is
934 * going away. Let all the IO controlling policies know about 1421 * going away. Let all the IO controlling policies know about
935 * this event. Currently this is static call to one io 1422 * this event.
936 * controlling policy. Once we have more policies in place, we
937 * need some dynamic registration of callback function.
938 */ 1423 */
939 spin_lock(&blkio_list_lock); 1424 spin_lock(&blkio_list_lock);
940 list_for_each_entry(blkiop, &blkio_list, list) 1425 list_for_each_entry(blkiop, &blkio_list, list) {
1426 if (blkiop->plid != blkg->plid)
1427 continue;
941 blkiop->ops.blkio_unlink_group_fn(key, blkg); 1428 blkiop->ops.blkio_unlink_group_fn(key, blkg);
1429 }
942 spin_unlock(&blkio_list_lock); 1430 spin_unlock(&blkio_list_lock);
943 } while (1); 1431 } while (1);
944 1432