#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/pvclock_gtod.h>
#include <linux/timekeeper_internal.h>
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/cpuid.h>
#include <xen/events.h>
#include <xen/features.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include "xen-ops.h"
#define TIMER_SLOP 100000
static u64 xen_sched_clock_offset __read_mostly;
static unsigned long xen_tsc_khz(void)
{
struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
return pvclock_tsc_khz(info);
}
static u64 xen_clocksource_read(void)
{
struct pvclock_vcpu_time_info *src;
u64 ret;
preempt_disable_notrace();
src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
}
static u64 xen_clocksource_get_cycles(struct clocksource *cs)
{
return xen_clocksource_read();
}
static noinstr u64 xen_sched_clock(void)
{
struct pvclock_vcpu_time_info *src;
u64 ret;
src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read_nowd(src);
ret -= xen_sched_clock_offset;
return ret;
}
static void xen_read_wallclock(struct timespec64 *ts)
{
struct shared_info *s = HYPERVISOR_shared_info;
struct pvclock_wall_clock *wall_clock = &(s->wc);
struct pvclock_vcpu_time_info *vcpu_time;
vcpu_time = &get_cpu_var(xen_vcpu)->time;
pvclock_read_wallclock(wall_clock, vcpu_time, ts);
put_cpu_var(xen_vcpu);
}
static void xen_get_wallclock(struct timespec64 *now)
{
xen_read_wallclock(now);
}
static int xen_set_wallclock(const struct timespec64 *now)
{
return -ENODEV;
}
static int xen_pvclock_gtod_notify(struct notifier_block *nb,
unsigned long was_set, void *priv)
{
static struct timespec64 next_sync;
struct xen_platform_op op;
struct timespec64 now;
struct timekeeper *tk = priv;
static bool settime64_supported = true;
int ret;
now.tv_sec = tk->xtime_sec;
now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
if (!was_set && timespec64_compare(&now, &next_sync) < 0)
return NOTIFY_OK;
again:
if (settime64_supported) {
op.cmd = XENPF_settime64;
op.u.settime64.mbz = 0;
op.u.settime64.secs = now.tv_sec;
op.u.settime64.nsecs = now.tv_nsec;
op.u.settime64.system_time = xen_clocksource_read();
} else {
op.cmd = XENPF_settime32;
op.u.settime32.secs = now.tv_sec;
op.u.settime32.nsecs = now.tv_nsec;
op.u.settime32.system_time = xen_clocksource_read();
}
ret = HYPERVISOR_platform_op(&op);
if (ret == -ENOSYS && settime64_supported) {
settime64_supported = false;
goto again;
}
if (ret < 0)
return NOTIFY_BAD;
next_sync = now;
next_sync.tv_sec += 11 * 60;
return NOTIFY_OK;
}
static struct notifier_block xen_pvclock_gtod_notifier = {
.notifier_call = xen_pvclock_gtod_notify,
};
static int xen_cs_enable(struct clocksource *cs)
{
vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
return 0;
}
static struct clocksource xen_clocksource __read_mostly = {
.name = "xen",
.rating = 400,
.read = xen_clocksource_get_cycles,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.enable = xen_cs_enable,
};
static s64 get_abs_timeout(unsigned long delta)
{
return xen_clocksource_read() + delta;
}
static int xen_timerop_shutdown(struct clock_event_device *evt)
{
HYPERVISOR_set_timer_op(0);
return 0;
}
static int xen_timerop_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
WARN_ON(!clockevent_state_oneshot(evt));
if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
BUG();
return 0;
}
static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
.max_delta_ns = 0xffffffff,
.max_delta_ticks = 0xffffffff,
.min_delta_ns = TIMER_SLOP,
.min_delta_ticks = TIMER_SLOP,
.mult = 1,
.shift = 0,
.rating = 500,
.set_state_shutdown = xen_timerop_shutdown,
.set_next_event = xen_timerop_set_next_event,
};
static int xen_vcpuop_shutdown(struct clock_event_device *evt)
{
int cpu = smp_processor_id();
if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
NULL) ||
HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
NULL))
BUG();
return 0;
}
static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
{
int cpu = smp_processor_id();
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
NULL))
BUG();
return 0;
}
static int xen_vcpuop_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
int cpu = smp_processor_id();
struct vcpu_set_singleshot_timer single;
int ret;
WARN_ON(!clockevent_state_oneshot(evt));
single.timeout_abs_ns = get_abs_timeout(delta);
single.flags = 0;
ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
&single);
BUG_ON(ret != 0);
return ret;
}
static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
.max_delta_ns = 0xffffffff,
.max_delta_ticks = 0xffffffff,
.min_delta_ns = TIMER_SLOP,
.min_delta_ticks = TIMER_SLOP,
.mult = 1,
.shift = 0,
.rating = 500,
.set_state_shutdown = xen_vcpuop_shutdown,
.set_state_oneshot = xen_vcpuop_set_oneshot,
.set_next_event = xen_vcpuop_set_next_event,
};
static const struct clock_event_device *xen_clockevent =
&xen_timerop_clockevent;
struct xen_clock_event_device {
struct clock_event_device evt;
char name[16];
};
static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
irqreturn_t ret;
ret = IRQ_NONE;
if (evt->event_handler) {
evt->event_handler(evt);
ret = IRQ_HANDLED;
}
return ret;
}
void xen_teardown_timer(int cpu)
{
struct clock_event_device *evt;
evt = &per_cpu(xen_clock_events, cpu).evt;
if (evt->irq >= 0) {
unbind_from_irqhandler(evt->irq, NULL);
evt->irq = -1;
}
}
void xen_setup_timer(int cpu)
{
struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
struct clock_event_device *evt = &xevt->evt;
int irq;
WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
if (evt->irq >= 0)
xen_teardown_timer(cpu);
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
xevt->name, NULL);
(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
memcpy(evt, xen_clockevent, sizeof(*evt));
evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
}
void xen_setup_cpu_clockevents(void)
{
clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
}
void xen_timer_resume(void)
{
int cpu;
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
for_each_online_cpu(cpu) {
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
xen_vcpu_nr(cpu), NULL))
BUG();
}
}
static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
static u64 xen_clock_value_saved;
void xen_save_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;
xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
if (!xen_clock)
return;
t.addr.v = NULL;
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
if (ret != 0)
pr_notice("Cannot save secondary vcpu_time_info (err %d)",
ret);
else
clear_page(xen_clock);
}
void xen_restore_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;
if (!xen_clock)
goto out;
t.addr.v = &xen_clock->pvti;
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
if (ret != 0)
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
ret);
out:
pvclock_resume();
xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
}
static void xen_setup_vsyscall_time_info(void)
{
struct vcpu_register_time_memory_area t;
struct pvclock_vsyscall_time_info *ti;
int ret;
ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
if (!ti)
return;
t.addr.v = &ti->pvti;
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
if (ret) {
pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
free_page((unsigned long)ti);
return;
}
if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
t.addr.v = NULL;
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
0, &t);
if (!ret)
free_page((unsigned long)ti);
pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
return;
}
xen_clock = ti;
pvclock_set_pvti_cpu0_va(xen_clock);
xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
}
static int __init xen_tsc_safe_clocksource(void)
{
u32 eax, ebx, ecx, edx;
if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
return 0;
if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
return 0;
if (check_tsc_unstable())
return 0;
cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
}
static void __init xen_time_init(void)
{
struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
struct timespec64 tp;
if (xen_initial_domain())
xen_clocksource.rating = 275;
else if (xen_tsc_safe_clocksource())
xen_clocksource.rating = 299;
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
NULL) == 0) {
printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
xen_clockevent = &xen_vcpuop_clockevent;
}
xen_read_wallclock(&tp);
do_settimeofday64(&tp);
setup_force_cpu_cap(X86_FEATURE_TSC);
pvti = &__this_cpu_read(xen_vcpu)->time;
if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
xen_setup_vsyscall_time_info();
}
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
xen_time_setup_guest();
if (xen_initial_domain())
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
}
static void __init xen_init_time_common(void)
{
xen_sched_clock_offset = xen_clocksource_read();
static_call_update(pv_steal_clock, xen_steal_clock);
paravirt_set_sched_clock(xen_sched_clock);
x86_platform.calibrate_tsc = xen_tsc_khz;
x86_platform.get_wallclock = xen_get_wallclock;
}
void __init xen_init_time_ops(void)
{
xen_init_time_common();
x86_init.timers.timer_init = xen_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
x86_cpuinit.setup_percpu_clockev = x86_init_noop;
if (!xen_initial_domain())
x86_platform.set_wallclock = xen_set_wallclock;
}
#ifdef CONFIG_XEN_PVHVM
static void xen_hvm_setup_cpu_clockevents(void)
{
int cpu = smp_processor_id();
xen_setup_runstate_info(cpu);
xen_setup_cpu_clockevents();
}
void __init xen_hvm_init_time_ops(void)
{
static bool hvm_time_initialized;
if (hvm_time_initialized)
return;
if (!xen_have_vector_callback)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
return;
}
if (!__this_cpu_read(xen_vcpu)) {
pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
xen_vcpu_nr(0));
return;
}
xen_init_time_common();
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
x86_platform.set_wallclock = xen_set_wallclock;
hvm_time_initialized = true;
}
#endif
static int __init parse_xen_timer_slop(char *ptr)
{
unsigned long slop = memparse(ptr, NULL);
xen_timerop_clockevent.min_delta_ns = slop;
xen_timerop_clockevent.min_delta_ticks = slop;
xen_vcpuop_clockevent.min_delta_ns = slop;
xen_vcpuop_clockevent.min_delta_ticks = slop;
return 0;
}
early_param("xen_timer_slop"