aboutsummaryrefslogblamecommitdiffstats
path: root/arch/i386/kernel/vmitime.c
blob: 76d2adcae5a30b74ed5505544cf5c0b45d89e427 (plain) (tree)




















































































































                                                                              
                                                             































































                                                                                
                            
 
                              










































                                                                                          

                                 












































































































































































































































































                                                                                           
/*
 * VMI paravirtual timer support routines.
 *
 * Copyright (C) 2005, VMware, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 * NON INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Send feedback to dhecht@vmware.com
 *
 */

/*
 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
 * See comments there for proper credits.
 */

#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/jiffies.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/rcupdate.h>
#include <linux/clocksource.h>

#include <asm/timer.h>
#include <asm/io.h>
#include <asm/apic.h>
#include <asm/div64.h>
#include <asm/timer.h>
#include <asm/desc.h>

#include <asm/vmi.h>
#include <asm/vmi_time.h>

#include <mach_timer.h>
#include <io_ports.h>

#ifdef CONFIG_X86_LOCAL_APIC
#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
#else
#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
#endif

/* Cached VMI operations */
struct vmi_timer_ops vmi_timer_ops;

#ifdef CONFIG_NO_IDLE_HZ

/* /proc/sys/kernel/hz_timer state. */
int sysctl_hz_timer;

/* Some stats */
static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);

#endif /* CONFIG_NO_IDLE_HZ */

/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
static int alarm_hz = CONFIG_VMI_ALARM_HZ;

/* Cache of the value get_cycle_frequency / HZ. */
static signed long long cycles_per_jiffy;

/* Cache of the value get_cycle_frequency / alarm_hz. */
static signed long long cycles_per_alarm;

/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
 * Protected by xtime_lock. */
static unsigned long long real_cycles_accounted_system;

/* The number of cycles accounted for by update_process_times(), per cpu. */
static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);

/* The number of stolen cycles accounted, per cpu. */
static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);

/* Clock source. */
static cycle_t read_real_cycles(void)
{
	return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
}

static cycle_t read_available_cycles(void)
{
	return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
}

#if 0
static cycle_t read_stolen_cycles(void)
{
	return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
}
#endif  /*  0  */

static struct clocksource clocksource_vmi = {
	.name			= "vmi-timer",
	.rating			= 450,
	.read			= read_real_cycles,
	.mask			= CLOCKSOURCE_MASK(64),
	.mult			= 0, /* to be set */
	.shift			= 22,
	.flags			= CLOCK_SOURCE_IS_CONTINUOUS,
};


/* Timer interrupt handler. */
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);

static struct irqaction vmi_timer_irq  = {
	vmi_timer_interrupt,
	SA_INTERRUPT,
	CPU_MASK_NONE,
	"VMI-alarm",
	NULL,
	NULL
};

/* Alarm rate */
static int __init vmi_timer_alarm_rate_setup(char* str)
{
	int alarm_rate;
	if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
		alarm_hz = alarm_rate;
		printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
	}
	return 1;
}
__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);


/* Initialization */
static void vmi_get_wallclock_ts(struct timespec *ts)
{
	unsigned long long wallclock;
	wallclock = vmi_timer_ops.get_wallclock(); // nsec units
	ts->tv_nsec = do_div(wallclock, 1000000000);
	ts->tv_sec = wallclock;
}

static void update_xtime_from_wallclock(void)
{
	struct timespec ts;
	vmi_get_wallclock_ts(&ts);
	do_settimeofday(&ts);
}

unsigned long vmi_get_wallclock(void)
{
	struct timespec ts;
	vmi_get_wallclock_ts(&ts);
	return ts.tv_sec;
}

int vmi_set_wallclock(unsigned long now)
{
	return -1;
}

unsigned long long vmi_sched_clock(void)
{
	return read_available_cycles();
}

void __init vmi_time_init(void)
{
	unsigned long long cycles_per_sec, cycles_per_msec;
	unsigned long flags;

	local_irq_save(flags);
	setup_irq(0, &vmi_timer_irq);
#ifdef CONFIG_X86_LOCAL_APIC
	set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
#endif

	no_sync_cmos_clock = 1;

	vmi_get_wallclock_ts(&xtime);
	set_normalized_timespec(&wall_to_monotonic,
		-xtime.tv_sec, -xtime.tv_nsec);

	real_cycles_accounted_system = read_real_cycles();
	update_xtime_from_wallclock();
	per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();

	cycles_per_sec = vmi_timer_ops.get_cycle_frequency();

	cycles_per_jiffy = cycles_per_sec;
	(void)do_div(cycles_per_jiffy, HZ);
	cycles_per_alarm = cycles_per_sec;
	(void)do_div(cycles_per_alarm, alarm_hz);
	cycles_per_msec = cycles_per_sec;
	(void)do_div(cycles_per_msec, 1000);
	cpu_khz = cycles_per_msec;

	printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
	       "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
	       cycles_per_alarm);

	clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
						    clocksource_vmi.shift);
	if (clocksource_register(&clocksource_vmi))
		printk(KERN_WARNING "Error registering VMITIME clocksource.");

	/* Disable PIT. */
	outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */

	/* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
	 * reduce the latency calling update_process_times. */
	vmi_timer_ops.set_alarm(
		      VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
		      per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
		      cycles_per_alarm);

	local_irq_restore(flags);
}

#ifdef CONFIG_X86_LOCAL_APIC

void __init vmi_timer_setup_boot_alarm(void)
{
	local_irq_disable();

	/* Route the interrupt to the correct vector. */
	apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);

	/* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
	vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
	vmi_timer_ops.set_alarm(
		      VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
		      per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
		      cycles_per_alarm);
	local_irq_enable();
}

/* Initialize the time accounting variables for an AP on an SMP system.
 * Also, set the local alarm for the AP. */
void __init vmi_timer_setup_secondary_alarm(void)
{
	int cpu = smp_processor_id();

	/* Route the interrupt to the correct vector. */
	apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);

	per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();

	vmi_timer_ops.set_alarm(
		      VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
		      per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
		      cycles_per_alarm);
}

#endif

/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
{
	long long cycles_not_accounted;

	write_seqlock(&xtime_lock);

	cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
	while (cycles_not_accounted >= cycles_per_jiffy) {
		/* systems wide jiffies and wallclock. */
		do_timer(1);

		cycles_not_accounted -= cycles_per_jiffy;
		real_cycles_accounted_system += cycles_per_jiffy;
	}

	if (vmi_timer_ops.wallclock_updated())
		update_xtime_from_wallclock();

	write_sequnlock(&xtime_lock);
}

/* Update per-cpu process times. */
static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
					     unsigned long long cur_process_times_cycles)
{
	long long cycles_not_accounted;
	cycles_not_accounted = cur_process_times_cycles -
		per_cpu(process_times_cycles_accounted_cpu, cpu);

	while (cycles_not_accounted >= cycles_per_jiffy) {
		/* Account time to the current process.  This includes
		 * calling into the scheduler to decrement the timeslice
		 * and possibly reschedule.*/
		update_process_times(user_mode(regs));
		/* XXX handle /proc/profile multiplier.  */
		profile_tick(CPU_PROFILING);

		cycles_not_accounted -= cycles_per_jiffy;
		per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
	}
}

#ifdef CONFIG_NO_IDLE_HZ
/* Update per-cpu idle times.  Used when a no-hz halt is ended. */
static void vmi_account_no_hz_idle_cycles(int cpu,
					  unsigned long long cur_process_times_cycles)
{
	long long cycles_not_accounted;
	unsigned long no_idle_hz_jiffies = 0;

	cycles_not_accounted = cur_process_times_cycles -
		per_cpu(process_times_cycles_accounted_cpu, cpu);

	while (cycles_not_accounted >= cycles_per_jiffy) {
		no_idle_hz_jiffies++;
		cycles_not_accounted -= cycles_per_jiffy;
		per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
	}
	/* Account time to the idle process. */
	account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
}
#endif

/* Update per-cpu stolen time. */
static void vmi_account_stolen_cycles(int cpu,
				      unsigned long long cur_real_cycles,
				      unsigned long long cur_avail_cycles)
{
	long long stolen_cycles_not_accounted;
	unsigned long stolen_jiffies = 0;

	if (cur_real_cycles < cur_avail_cycles)
		return;

	stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
		per_cpu(stolen_cycles_accounted_cpu, cpu);

	while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
		stolen_jiffies++;
		stolen_cycles_not_accounted -= cycles_per_jiffy;
		per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
	}
	/* HACK: pass NULL to force time onto cpustat->steal. */
	account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
}

/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
static void vmi_local_timer_interrupt(int cpu)
{
	unsigned long long cur_real_cycles, cur_process_times_cycles;

	cur_real_cycles = read_real_cycles();
	cur_process_times_cycles = read_available_cycles();
	/* Update system wide (real) time state (xtime, jiffies). */
	vmi_account_real_cycles(cur_real_cycles);
	/* Update per-cpu process times. */
	vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
        /* Update time stolen from this cpu by the hypervisor. */
	vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
}

#ifdef CONFIG_NO_IDLE_HZ

/* Must be called only from idle loop, with interrupts disabled. */
int vmi_stop_hz_timer(void)
{
	/* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */

	unsigned long seq, next;
	unsigned long long real_cycles_expiry;
	int cpu = smp_processor_id();
	int idle;

	BUG_ON(!irqs_disabled());
	if (sysctl_hz_timer != 0)
		return 0;

	cpu_set(cpu, nohz_cpu_mask);
	smp_mb();
	if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
	    (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
		cpu_clear(cpu, nohz_cpu_mask);
		next = jiffies;
		idle = 0;
	} else
		idle = 1;

	/* Convert jiffies to the real cycle counter. */
	do {
		seq = read_seqbegin(&xtime_lock);
		real_cycles_expiry = real_cycles_accounted_system +
			(long)(next - jiffies) * cycles_per_jiffy;
	} while (read_seqretry(&xtime_lock, seq));

	/* This cpu is going idle. Disable the periodic alarm. */
	if (idle) {
		vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
		per_cpu(idle_start_jiffies, cpu) = jiffies;
	}

	/* Set the real time alarm to expire at the next event. */
	vmi_timer_ops.set_alarm(
		      VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
		      real_cycles_expiry, 0);

	return idle;
}

static void vmi_reenable_hz_timer(int cpu)
{
	/* For /proc/vmi/info idle_hz stat. */
	per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
	per_cpu(vmi_idle_no_hz_irqs, cpu)++;

	/* Don't bother explicitly cancelling the one-shot alarm -- at
	 * worse we will receive a spurious timer interrupt. */
	vmi_timer_ops.set_alarm(
		      VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
		      per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
		      cycles_per_alarm);
	/* Indicate this cpu is no longer nohz idle. */
	cpu_clear(cpu, nohz_cpu_mask);
}

/* Called from interrupt handlers when (local) HZ timer is disabled. */
void vmi_account_time_restart_hz_timer(void)
{
	unsigned long long cur_real_cycles, cur_process_times_cycles;
	int cpu = smp_processor_id();

	BUG_ON(!irqs_disabled());
	/* Account the time during which the HZ timer was disabled. */
	cur_real_cycles = read_real_cycles();
	cur_process_times_cycles = read_available_cycles();
	/* Update system wide (real) time state (xtime, jiffies). */
	vmi_account_real_cycles(cur_real_cycles);
	/* Update per-cpu idle times. */
	vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
        /* Update time stolen from this cpu by the hypervisor. */
	vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
	/* Reenable the hz timer. */
	vmi_reenable_hz_timer(cpu);
}

#endif /* CONFIG_NO_IDLE_HZ */

/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
 * APIC setup and setup_boot_vmi_alarm() is called.  */
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
{
	vmi_local_timer_interrupt(smp_processor_id());
	return IRQ_HANDLED;
}

#ifdef CONFIG_X86_LOCAL_APIC

/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
 * Also used in UP when CONFIG_X86_LOCAL_APIC.
 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
{
	struct pt_regs *old_regs = set_irq_regs(regs);
	int cpu = smp_processor_id();

	/*
	 * the NMI deadlock-detector uses this.
	 */
        per_cpu(irq_stat,cpu).apic_timer_irqs++;

	/*
	 * NOTE! We'd better ACK the irq immediately,
	 * because timer handling can be slow.
	 */
	ack_APIC_irq();

	/*
	 * update_process_times() expects us to have done irq_enter().
	 * Besides, if we don't timer interrupts ignore the global
	 * interrupt lock, which is the WrongThing (tm) to do.
	 */
	irq_enter();
	vmi_local_timer_interrupt(cpu);
	irq_exit();
	set_irq_regs(old_regs);
}

#endif  /* CONFIG_X86_LOCAL_APIC */