kvm: Handle yield_to failure return code for potential undercommit case

yield_to returns -ESRCH, When source and target of yield_to run queue length is one. When we see three successive failures of yield_to we assume we are in potential undercommit case and abort from PLE handler. The assumption is backed by low probability of wrong decision for even worst case scenarios such as average runqueue length between 1 and 2. More detail on rationale behind using three tries: if p is the probability of finding rq length one on a particular cpu, and if we do n tries, then probability of exiting ple handler is: p^(n+1) [ because we would have come across one source with rq length 1 and n target cpu rqs with length 1 ] so num tries: probability of aborting ple handler (1.5x overcommit) 1 1/4 2 1/8 3 1/16 We can increase this probability with more tries, but the problem is the overhead. Also, If we have tried three times that means we would have iterated over 3 good eligible vcpus along with many non-eligible candidates. In worst case if we iterate all the vcpus, we reduce 1x performance and overcommit performance get hit. note that we do not update last boosted vcpu in failure cases. Thank Avi for raising question on aborting after first fail from yield_to. Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Tested-by: Chegu Vinod <chegu_vinod@hp.com> Signed-off-by: Gleb Natapov <gleb@redhat.com>
author: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> 2013-01-22 02:39:24 -0500
committer: Gleb Natapov <gleb@redhat.com> 2013-01-29 08:38:45 -0500
commit: c45c528e899094b9049b3c900e2cf1f00aa0490c (patch)
tree: ee8562c37a74f74f9fbc30772a3bc4e7c69db8d6 /virt/kvm/kvm_main.c
parent: 7b270f609982f68f2433442bf167f735e7364b06 (diff)
1 files changed, 16 insertions, 10 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index abc23e27173d..a83ca63d26fc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1694,6 +1694,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
        struct pid *pid;
        struct task_struct *task = NULL;
+        bool ret = false;
        rcu_read_lock();
        pid = rcu_dereference(target->pid);
@@ -1701,17 +1702,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
                task = get_pid_task(target->pid, PIDTYPE_PID);
        rcu_read_unlock();
        if (!task)
-                return false;
+                return ret;
        if (task->flags & PF_VCPU) {
                put_task_struct(task);
-                return false;
+                return ret;
-        }
-        if (yield_to(task, 1)) {
-                put_task_struct(task);
-                return true;
        }
+        ret = yield_to(task, 1);
        put_task_struct(task);
-        return false;
+        return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
@@ -1752,12 +1751,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
        return eligible;
 }
 #endif
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
        struct kvm *kvm = me->kvm;
        struct kvm_vcpu *vcpu;
        int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
        int yielded = 0;
+        int try = 3;
        int pass;
        int i;
@@ -1769,7 +1770,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
         * VCPU is holding the lock that we need and will release it.
         * We approximate round-robin by starting at the last boosted VCPU.
         */
-        for (pass = 0; pass < 2 && !yielded; pass++) {
+        for (pass = 0; pass < 2 && !yielded && try; pass++) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (!pass && i <= last_boosted_vcpu) {
                                i = last_boosted_vcpu;
@@ -1782,10 +1783,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                                continue;
                        if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
                                continue;
-                        if (kvm_vcpu_yield_to(vcpu)) {
+                        yielded = kvm_vcpu_yield_to(vcpu);
+                        if (yielded > 0) {
                                kvm->last_boosted_vcpu = i;
-                                yielded = 1;
                                break;
+                        } else if (yielded < 0) {
+                                try--;
+                                if (!try)
+                                        break;
                        }
                }
        }
author	Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>	2013-01-22 02:39:24 -0500
committer	Gleb Natapov <gleb@redhat.com>	2013-01-29 08:38:45 -0500
commit	c45c528e899094b9049b3c900e2cf1f00aa0490c (patch)
tree	ee8562c37a74f74f9fbc30772a3bc4e7c69db8d6 /virt/kvm/kvm_main.c
parent	7b270f609982f68f2433442bf167f735e7364b06 (diff)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index abc23e27173d..a83ca63d26fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c
@@ -1694,6 +1694,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
1694	{	1694	{
1695	struct pid *pid;	1695	struct pid *pid;
1696	struct task_struct *task = NULL;	1696	struct task_struct *task = NULL;
		1697	bool ret = false;
1697		1698
1698	rcu_read_lock();	1699	rcu_read_lock();
1699	pid = rcu_dereference(target->pid);	1700	pid = rcu_dereference(target->pid);
@@ -1701,17 +1702,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
1701	task = get_pid_task(target->pid, PIDTYPE_PID);	1702	task = get_pid_task(target->pid, PIDTYPE_PID);
1702	rcu_read_unlock();	1703	rcu_read_unlock();
1703	if (!task)	1704	if (!task)
1704	return false;	1705	return ret;
1705	if (task->flags & PF_VCPU) {	1706	if (task->flags & PF_VCPU) {
1706	put_task_struct(task);	1707	put_task_struct(task);
1707	return false;	1708	return ret;
1708	}
1709	if (yield_to(task, 1)) {
1710	put_task_struct(task);
1711	return true;
1712	}	1709	}
		1710	ret = yield_to(task, 1);
1713	put_task_struct(task);	1711	put_task_struct(task);
1714	return false;	1712
		1713	return ret;
1715	}	1714	}
1716	EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);	1715	EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
1717		1716
@@ -1752,12 +1751,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1752	return eligible;	1751	return eligible;
1753	}	1752	}
1754	#endif	1753	#endif
		1754
1755	void kvm_vcpu_on_spin(struct kvm_vcpu *me)	1755	void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1756	{	1756	{
1757	struct kvm *kvm = me->kvm;	1757	struct kvm *kvm = me->kvm;
1758	struct kvm_vcpu *vcpu;	1758	struct kvm_vcpu *vcpu;
1759	int last_boosted_vcpu = me->kvm->last_boosted_vcpu;	1759	int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
1760	int yielded = 0;	1760	int yielded = 0;
		1761	int try = 3;
1761	int pass;	1762	int pass;
1762	int i;	1763	int i;
1763		1764
@@ -1769,7 +1770,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1769	* VCPU is holding the lock that we need and will release it.	1770	* VCPU is holding the lock that we need and will release it.
1770	* We approximate round-robin by starting at the last boosted VCPU.	1771	* We approximate round-robin by starting at the last boosted VCPU.
1771	*/	1772	*/
1772	for (pass = 0; pass < 2 && !yielded; pass++) {	1773	for (pass = 0; pass < 2 && !yielded && try; pass++) {
1773	kvm_for_each_vcpu(i, vcpu, kvm) {	1774	kvm_for_each_vcpu(i, vcpu, kvm) {
1774	if (!pass && i <= last_boosted_vcpu) {	1775	if (!pass && i <= last_boosted_vcpu) {
1775	i = last_boosted_vcpu;	1776	i = last_boosted_vcpu;
@@ -1782,10 +1783,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1782	continue;	1783	continue;
1783	if (!kvm_vcpu_eligible_for_directed_yield(vcpu))	1784	if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
1784	continue;	1785	continue;
1785	if (kvm_vcpu_yield_to(vcpu)) {	1786
		1787	yielded = kvm_vcpu_yield_to(vcpu);
		1788	if (yielded > 0) {
1786	kvm->last_boosted_vcpu = i;	1789	kvm->last_boosted_vcpu = i;
1787	yielded = 1;
1788	break;	1790	break;
		1791	} else if (yielded < 0) {
		1792	try--;
		1793	if (!try)
		1794	break;
1789	}	1795	}
1790	}	1796	}
1791	}	1797	}