arch_timer.c source code [linux/arch/arm64/kvm/arch_timer.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) 2012 ARM Ltd.
4	* Author: Marc Zyngier <marc.zyngier@arm.com>
5	*/
6
7	#include <linux/cpu.h>
8	#include <linux/kvm.h>
9	#include <linux/kvm_host.h>
10	#include <linux/interrupt.h>
11	#include <linux/irq.h>
12	#include <linux/irqdomain.h>
13	#include <linux/uaccess.h>
14
15	#include <clocksource/arm_arch_timer.h>
16	#include <asm/arch_timer.h>
17	#include <asm/kvm_emulate.h>
18	#include <asm/kvm_hyp.h>
19	#include <asm/kvm_nested.h>
20
21	#include <kvm/arm_vgic.h>
22	#include <kvm/arm_arch_timer.h>
23
24	#include "trace.h"
25
26	static struct timecounter *timecounter;
27	static unsigned int host_vtimer_irq;
28	static unsigned int host_ptimer_irq;
29	static u32 host_vtimer_irq_flags;
30	static u32 host_ptimer_irq_flags;
31
32	static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33	DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
34
35	static const u8 default_ppi[] = {
36	[TIMER_PTIMER] = `30`,
37	[TIMER_VTIMER] = `27`,
38	[TIMER_HPTIMER] = `26`,
39	[TIMER_HVTIMER] = `28`,
40	};
41
42	static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
43	static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
44	struct arch_timer_context *timer_ctx);
45	static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
46	static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
47	struct arch_timer_context *timer,
48	enum kvm_arch_timer_regs treg,
49	u64 val);
50	static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
51	struct arch_timer_context *timer,
52	enum kvm_arch_timer_regs treg);
53	static bool kvm_arch_timer_get_input_level(int vintid);
54
55	static struct irq_ops arch_timer_irq_ops = {
56	.get_input_level = kvm_arch_timer_get_input_level,
57	};
58
59	static int nr_timers(struct kvm_vcpu *vcpu)
60	{
61	if (!vcpu_has_nv(vcpu))
62	return NR_KVM_EL0_TIMERS;
63
64	return NR_KVM_TIMERS;
65	}
66
67	u32 timer_get_ctl(struct arch_timer_context *ctxt)
68	{
69	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
70
71	switch(arch_timer_ctx_index(ctxt)) {
72	case TIMER_VTIMER:
73	return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
74	case TIMER_PTIMER:
75	return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
76	case TIMER_HVTIMER:
77	return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
78	case TIMER_HPTIMER:
79	return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
80	default:
81	WARN_ON(`1`);
82	return `0`;
83	}
84	}
85
86	u64 timer_get_cval(struct arch_timer_context *ctxt)
87	{
88	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
89
90	switch(arch_timer_ctx_index(ctxt)) {
91	case TIMER_VTIMER:
92	return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
93	case TIMER_PTIMER:
94	return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
95	case TIMER_HVTIMER:
96	return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
97	case TIMER_HPTIMER:
98	return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
99	default:
100	WARN_ON(`1`);
101	return `0`;
102	}
103	}
104
105	static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
106	{
107	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
108
109	switch(arch_timer_ctx_index(ctxt)) {
110	case TIMER_VTIMER:
111	__vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl);
112	break;
113	case TIMER_PTIMER:
114	__vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl);
115	break;
116	case TIMER_HVTIMER:
117	__vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl);
118	break;
119	case TIMER_HPTIMER:
120	__vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl);
121	break;
122	default:
123	WARN_ON(`1`);
124	}
125	}
126
127	static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
128	{
129	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
130
131	switch(arch_timer_ctx_index(ctxt)) {
132	case TIMER_VTIMER:
133	__vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval);
134	break;
135	case TIMER_PTIMER:
136	__vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval);
137	break;
138	case TIMER_HVTIMER:
139	__vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval);
140	break;
141	case TIMER_HPTIMER:
142	__vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval);
143	break;
144	default:
145	WARN_ON(`1`);
146	}
147	}
148
149	u64 kvm_phys_timer_read(void)
150	{
151	return timecounter->cc->read(timecounter->cc);
152	}
153
154	void get_timer_map(struct kvm_vcpu vcpu, struct* timer_map *map)
155	{
156	if (vcpu_has_nv(vcpu)) {
157	if (is_hyp_ctxt(vcpu)) {
158	map->direct_vtimer = vcpu_hvtimer(vcpu);
159	map->direct_ptimer = vcpu_hptimer(vcpu);
160	map->emul_vtimer = vcpu_vtimer(vcpu);
161	map->emul_ptimer = vcpu_ptimer(vcpu);
162	} else {
163	map->direct_vtimer = vcpu_vtimer(vcpu);
164	map->direct_ptimer = vcpu_ptimer(vcpu);
165	map->emul_vtimer = vcpu_hvtimer(vcpu);
166	map->emul_ptimer = vcpu_hptimer(vcpu);
167	}
168	} else if (has_vhe()) {
169	map->direct_vtimer = vcpu_vtimer(vcpu);
170	map->direct_ptimer = vcpu_ptimer(vcpu);
171	map->emul_vtimer = NULL;
172	map->emul_ptimer = NULL;
173	} else {
174	map->direct_vtimer = vcpu_vtimer(vcpu);
175	map->direct_ptimer = NULL;
176	map->emul_vtimer = NULL;
177	map->emul_ptimer = vcpu_ptimer(vcpu);
178	}
179
180	trace_kvm_get_timer_map(vcpu_id: vcpu->vcpu_id, map);
181	}
182
183	static inline bool userspace_irqchip(struct kvm *kvm)
184	{
185	return unlikely(!irqchip_in_kernel(kvm));
186	}
187
188	static void soft_timer_start(struct hrtimer *hrt, u64 ns)
189	{
190	hrtimer_start(timer: hrt, ktime_add_ns(ktime_get(), ns),
191	mode: HRTIMER_MODE_ABS_HARD);
192	}
193
194	static void soft_timer_cancel(struct hrtimer *hrt)
195	{
196	hrtimer_cancel(timer: hrt);
197	}
198
199	static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
200	{
201	struct kvm_vcpu vcpu = (struct kvm_vcpu **)dev_id;
202	struct arch_timer_context *ctx;
203	struct timer_map map;
204
205	/*
206	* We may see a timer interrupt after vcpu_put() has been called which
207	* sets the CPU's vcpu pointer to NULL, because even though the timer
208	* has been disabled in timer_save_state(), the hardware interrupt
209	* signal may not have been retired from the interrupt controller yet.
210	*/
211	if (!vcpu)
212	return IRQ_HANDLED;
213
214	get_timer_map(vcpu, map: &map);
215
216	if (irq == host_vtimer_irq)
217	ctx = map.direct_vtimer;
218	else
219	ctx = map.direct_ptimer;
220
221	if (kvm_timer_should_fire(timer_ctx: ctx))
222	kvm_timer_update_irq(vcpu, new_level: true, timer_ctx: ctx);
223
224	if (userspace_irqchip(kvm: vcpu->kvm) &&
225	!static_branch_unlikely(&has_gic_active_state))
226	disable_percpu_irq(irq: host_vtimer_irq);
227
228	return IRQ_HANDLED;
229	}
230
231	static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
232	u64 val)
233	{
234	u64 now = kvm_phys_timer_read() - timer_get_offset(ctxt: timer_ctx);
235
236	if (now < val) {
237	u64 ns;
238
239	ns = cyclecounter_cyc2ns(cc: timecounter->cc,
240	cycles: val - now,
241	mask: timecounter->mask,
242	frac: &timer_ctx->ns_frac);
243	return ns;
244	}
245
246	return `0`;
247	}
248
249	static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
250	{
251	return kvm_counter_compute_delta(timer_ctx, val: timer_get_cval(ctxt: timer_ctx));
252	}
253
254	static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
255	{
256	WARN_ON(timer_ctx && timer_ctx->loaded);
257	return timer_ctx &&
258	((timer_get_ctl(ctxt: timer_ctx) &
259	(ARCH_TIMER_CTRL_IT_MASK \| ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
260	}
261
262	static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
263	{
264	return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
265	vcpu_get_flag(vcpu, IN_WFIT));
266	}
267
268	static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
269	{
270	u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
271	struct arch_timer_context *ctx;
272
273	ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
274
275	return kvm_counter_compute_delta(timer_ctx: ctx, val);
276	}
277
278	/*
279	* Returns the earliest expiration time in ns among guest timers.
280	* Note that it will return 0 if none of timers can fire.
281	*/
282	static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
283	{
284	u64 min_delta = ULLONG_MAX;
285	int i;
286
287	for (i = `0`; i < nr_timers(vcpu); i++) {
288	struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
289
290	WARN(ctx->loaded, "timer %d loaded\n", i);
291	if (kvm_timer_irq_can_fire(timer_ctx: ctx))
292	min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
293	}
294
295	if (vcpu_has_wfit_active(vcpu))
296	min_delta = min(min_delta, wfit_delay_ns(vcpu));
297
298	/ If none of timers can fire, then return 0 /
299	if (min_delta == ULLONG_MAX)
300	return `0`;
301
302	return min_delta;
303	}
304
305	static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
306	{
307	struct arch_timer_cpu *timer;
308	struct kvm_vcpu *vcpu;
309	u64 ns;
310
311	timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
312	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
313
314	/*
315	* Check that the timer has really expired from the guest's
316	* PoV (NTP on the host may have forced it to expire
317	* early). If we should have slept longer, restart it.
318	*/
319	ns = kvm_timer_earliest_exp(vcpu);
320	if (unlikely(ns)) {
321	hrtimer_forward_now(timer: hrt, interval: ns_to_ktime(ns));
322	return HRTIMER_RESTART;
323	}
324
325	kvm_vcpu_wake_up(vcpu);
326	return HRTIMER_NORESTART;
327	}
328
329	static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
330	{
331	struct arch_timer_context *ctx;
332	struct kvm_vcpu *vcpu;
333	u64 ns;
334
335	ctx = container_of(hrt, struct arch_timer_context, hrtimer);
336	vcpu = timer_context_to_vcpu(ctx);
337
338	trace_kvm_timer_hrtimer_expire(ctx);
339
340	/*
341	* Check that the timer has really expired from the guest's
342	* PoV (NTP on the host may have forced it to expire
343	* early). If not ready, schedule for a later time.
344	*/
345	ns = kvm_timer_compute_delta(timer_ctx: ctx);
346	if (unlikely(ns)) {
347	hrtimer_forward_now(timer: hrt, interval: ns_to_ktime(ns));
348	return HRTIMER_RESTART;
349	}
350
351	kvm_timer_update_irq(vcpu, new_level: true, timer_ctx: ctx);
352	return HRTIMER_NORESTART;
353	}
354
355	static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
356	{
357	enum kvm_arch_timers index;
358	u64 cval, now;
359
360	if (!timer_ctx)
361	return false;
362
363	index = arch_timer_ctx_index(timer_ctx);
364
365	if (timer_ctx->loaded) {
366	u32 cnt_ctl = `0`;
367
368	switch (index) {
369	case TIMER_VTIMER:
370	case TIMER_HVTIMER:
371	cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
372	break;
373	case TIMER_PTIMER:
374	case TIMER_HPTIMER:
375	cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
376	break;
377	case NR_KVM_TIMERS:
378	/ GCC is braindead /
379	cnt_ctl = `0`;
380	break;
381	}
382
383	return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
384	(cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
385	!(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
386	}
387
388	if (!kvm_timer_irq_can_fire(timer_ctx))
389	return false;
390
391	cval = timer_get_cval(ctxt: timer_ctx);
392	now = kvm_phys_timer_read() - timer_get_offset(ctxt: timer_ctx);
393
394	return cval <= now;
395	}
396
397	int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
398	{
399	return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == `0`;
400	}
401
402	/*
403	* Reflect the timer output level into the kvm_run structure
404	*/
405	void kvm_timer_update_run(struct kvm_vcpu *vcpu)
406	{
407	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
408	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
409	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
410
411	/ Populate the device bitmap with the timer states /
412	regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER \|
413	KVM_ARM_DEV_EL1_PTIMER);
414	if (kvm_timer_should_fire(vtimer))
415	regs->device_irq_level \|= KVM_ARM_DEV_EL1_VTIMER;
416	if (kvm_timer_should_fire(ptimer))
417	regs->device_irq_level \|= KVM_ARM_DEV_EL1_PTIMER;
418	}
419
420	static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
421	{
422	/*
423	* Paper over NV2 brokenness by publishing the interrupt status
424	* bit. This still results in a poor quality of emulation (guest
425	* writes will have no effect until the next exit).
426	*
427	* But hey, it's fast, right?
428	*/
429	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
430	if (is_hyp_ctxt(vcpu) &&
431	(ctx == vcpu_vtimer(vcpu) \|\| ctx == vcpu_ptimer(vcpu))) {
432	unsigned long val = timer_get_ctl(ctxt: ctx);
433	__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
434	timer_set_ctl(ctxt: ctx, ctl: val);
435	}
436	}
437
438	static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
439	struct arch_timer_context *timer_ctx)
440	{
441	kvm_timer_update_status(ctx: timer_ctx, level: new_level);
442
443	timer_ctx->irq.level = new_level;
444	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
445	timer_ctx->irq.level);
446
447	if (userspace_irqchip(kvm: vcpu->kvm))
448	return;
449
450	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
451	timer_irq(timer_ctx),
452	timer_ctx->irq.level,
453	timer_ctx);
454	}
455
456	/ Only called for a fully emulated timer /
457	static void timer_emulate(struct arch_timer_context *ctx)
458	{
459	bool should_fire = kvm_timer_should_fire(timer_ctx: ctx);
460
461	trace_kvm_timer_emulate(ctx, should_fire);
462
463	if (should_fire != ctx->irq.level)
464	kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
465
466	kvm_timer_update_status(ctx, level: should_fire);
467
468	/*
469	* If the timer can fire now, we don't need to have a soft timer
470	* scheduled for the future. If the timer cannot fire at all,
471	* then we also don't need a soft timer.
472	*/
473	if (should_fire \|\| !kvm_timer_irq_can_fire(timer_ctx: ctx))
474	return;
475
476	soft_timer_start(hrt: &ctx->hrtimer, ns: kvm_timer_compute_delta(timer_ctx: ctx));
477	}
478
479	static void set_cntvoff(u64 cntvoff)
480	{
481	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
482	}
483
484	static void set_cntpoff(u64 cntpoff)
485	{
486	if (has_cntpoff())
487	write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
488	}
489
490	static void timer_save_state(struct arch_timer_context *ctx)
491	{
492	struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
493	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
494	unsigned long flags;
495
496	if (!timer->enabled)
497	return;
498
499	local_irq_save(flags);
500
501	if (!ctx->loaded)
502	goto out;
503
504	switch (index) {
505	u64 cval;
506
507	case TIMER_VTIMER:
508	case TIMER_HVTIMER:
509	timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
510	cval = read_sysreg_el0(SYS_CNTV_CVAL);
511
512	if (has_broken_cntvoff())
513	cval -= timer_get_offset(ctxt: ctx);
514
515	timer_set_cval(ctxt: ctx, cval);
516
517	/ Disable the timer /
518	write_sysreg_el0(`0`, SYS_CNTV_CTL);
519	isb();
520
521	/*
522	* The kernel may decide to run userspace after
523	* calling vcpu_put, so we reset cntvoff to 0 to
524	* ensure a consistent read between user accesses to
525	* the virtual counter and kernel access to the
526	* physical counter of non-VHE case.
527	*
528	* For VHE, the virtual counter uses a fixed virtual
529	* offset of zero, so no need to zero CNTVOFF_EL2
530	* register, but this is actually useful when switching
531	* between EL1/vEL2 with NV.
532	*
533	* Do it unconditionally, as this is either unavoidable
534	* or dirt cheap.
535	*/
536	set_cntvoff(`0`);
537	break;
538	case TIMER_PTIMER:
539	case TIMER_HPTIMER:
540	timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
541	cval = read_sysreg_el0(SYS_CNTP_CVAL);
542
543	cval -= timer_get_offset(ctxt: ctx);
544
545	timer_set_cval(ctxt: ctx, cval);
546
547	/ Disable the timer /
548	write_sysreg_el0(`0`, SYS_CNTP_CTL);
549	isb();
550
551	set_cntpoff(`0`);
552	break;
553	case NR_KVM_TIMERS:
554	BUG();
555	}
556
557	trace_kvm_timer_save_state(ctx);
558
559	ctx->loaded = false;
560	out:
561	local_irq_restore(flags);
562	}
563
564	/*
565	* Schedule the background timer before calling kvm_vcpu_halt, so that this
566	* thread is removed from its waitqueue and made runnable when there's a timer
567	* interrupt to handle.
568	*/
569	static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
570	{
571	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
572	struct timer_map map;
573
574	get_timer_map(vcpu, map: &map);
575
576	/*
577	* If no timers are capable of raising interrupts (disabled or
578	* masked), then there's no more work for us to do.
579	*/
580	if (!kvm_timer_irq_can_fire(timer_ctx: map.direct_vtimer) &&
581	!kvm_timer_irq_can_fire(timer_ctx: map.direct_ptimer) &&
582	!kvm_timer_irq_can_fire(timer_ctx: map.emul_vtimer) &&
583	!kvm_timer_irq_can_fire(timer_ctx: map.emul_ptimer) &&
584	!vcpu_has_wfit_active(vcpu))
585	return;
586
587	/*
588	* At least one guest time will expire. Schedule a background timer.
589	* Set the earliest expiration time among the guest timers.
590	*/
591	soft_timer_start(hrt: &timer->bg_timer, ns: kvm_timer_earliest_exp(vcpu));
592	}
593
594	static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
595	{
596	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
597
598	soft_timer_cancel(hrt: &timer->bg_timer);
599	}
600
601	static void timer_restore_state(struct arch_timer_context *ctx)
602	{
603	struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
604	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
605	unsigned long flags;
606
607	if (!timer->enabled)
608	return;
609
610	local_irq_save(flags);
611
612	if (ctx->loaded)
613	goto out;
614
615	switch (index) {
616	u64 cval, offset;
617
618	case TIMER_VTIMER:
619	case TIMER_HVTIMER:
620	cval = timer_get_cval(ctxt: ctx);
621	offset = timer_get_offset(ctxt: ctx);
622	if (has_broken_cntvoff()) {
623	set_cntvoff(`0`);
624	cval += offset;
625	} else {
626	set_cntvoff(offset);
627	}
628	write_sysreg_el0(cval, SYS_CNTV_CVAL);
629	isb();
630	write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
631	break;
632	case TIMER_PTIMER:
633	case TIMER_HPTIMER:
634	cval = timer_get_cval(ctxt: ctx);
635	offset = timer_get_offset(ctxt: ctx);
636	set_cntpoff(offset);
637	cval += offset;
638	write_sysreg_el0(cval, SYS_CNTP_CVAL);
639	isb();
640	write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
641	break;
642	case NR_KVM_TIMERS:
643	BUG();
644	}
645
646	trace_kvm_timer_restore_state(ctx);
647
648	ctx->loaded = true;
649	out:
650	local_irq_restore(flags);
651	}
652
653	static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
654	{
655	int r;
656	r = irq_set_irqchip_state(irq: ctx->host_timer_irq, which: IRQCHIP_STATE_ACTIVE, state: active);
657	WARN_ON(r);
658	}
659
660	static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
661	{
662	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
663	bool phys_active = false;
664
665	/*
666	* Update the timer output so that it is likely to match the
667	* state we're about to restore. If the timer expires between
668	* this point and the register restoration, we'll take the
669	* interrupt anyway.
670	*/
671	kvm_timer_update_irq(vcpu, new_level: kvm_timer_should_fire(timer_ctx: ctx), timer_ctx: ctx);
672
673	if (irqchip_in_kernel(vcpu->kvm))
674	phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
675
676	phys_active \|= ctx->irq.level;
677
678	set_timer_irq_phys_active(ctx, active: phys_active);
679	}
680
681	static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
682	{
683	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
684
685	/*
686	* Update the timer output so that it is likely to match the
687	* state we're about to restore. If the timer expires between
688	* this point and the register restoration, we'll take the
689	* interrupt anyway.
690	*/
691	kvm_timer_update_irq(vcpu, new_level: kvm_timer_should_fire(timer_ctx: vtimer), timer_ctx: vtimer);
692
693	/*
694	* When using a userspace irqchip with the architected timers and a
695	* host interrupt controller that doesn't support an active state, we
696	* must still prevent continuously exiting from the guest, and
697	* therefore mask the physical interrupt by disabling it on the host
698	* interrupt controller when the virtual level is high, such that the
699	* guest can make forward progress. Once we detect the output level
700	* being de-asserted, we unmask the interrupt again so that we exit
701	* from the guest when the timer fires.
702	*/
703	if (vtimer->irq.level)
704	disable_percpu_irq(irq: host_vtimer_irq);
705	else
706	enable_percpu_irq(irq: host_vtimer_irq, type: host_vtimer_irq_flags);
707	}
708
709	/ If _pred is true, set bit in _set, otherwise set it in _clr /
710	#define assign_clear_set_bit(_pred, _bit, _clr, _set) \
711	do { \
712	if (_pred) \
713	(_set) \|= (_bit); \
714	else \
715	(_clr) \|= (_bit); \
716	} while (0)
717
718	static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
719	struct timer_map *map)
720	{
721	int hw, ret;
722
723	if (!irqchip_in_kernel(vcpu->kvm))
724	return;
725
726	/*
727	* We only ever unmap the vtimer irq on a VHE system that runs nested
728	* virtualization, in which case we have both a valid emul_vtimer,
729	* emul_ptimer, direct_vtimer, and direct_ptimer.
730	*
731	* Since this is called from kvm_timer_vcpu_load(), a change between
732	* vEL2 and vEL1/0 will have just happened, and the timer_map will
733	* represent this, and therefore we switch the emul/direct mappings
734	* below.
735	*/
736	hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
737	if (hw < `0`) {
738	kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
739	kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
740
741	ret = kvm_vgic_map_phys_irq(vcpu,
742	map->direct_vtimer->host_timer_irq,
743	timer_irq(map->direct_vtimer),
744	&arch_timer_irq_ops);
745	WARN_ON_ONCE(ret);
746	ret = kvm_vgic_map_phys_irq(vcpu,
747	map->direct_ptimer->host_timer_irq,
748	timer_irq(map->direct_ptimer),
749	&arch_timer_irq_ops);
750	WARN_ON_ONCE(ret);
751	}
752	}
753
754	static void timer_set_traps(struct kvm_vcpu vcpu, struct* timer_map *map)
755	{
756	bool tvt, tpt, tvc, tpc, tvt02, tpt02;
757	u64 clr, set;
758
759	/*
760	* No trapping gets configured here with nVHE. See
761	* __timer_enable_traps(), which is where the stuff happens.
762	*/
763	if (!has_vhe())
764	return;
765
766	/*
767	* Our default policy is not to trap anything. As we progress
768	* within this function, reality kicks in and we start adding
769	* traps based on emulation requirements.
770	*/
771	tvt = tpt = tvc = tpc = false;
772	tvt02 = tpt02 = false;
773
774	/*
775	* NV2 badly breaks the timer semantics by redirecting accesses to
776	* the EL1 timer state to memory, so let's call ECV to the rescue if
777	* available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
778	*
779	* The treatment slightly varies depending whether we run a nVHE or
780	* VHE guest: nVHE will use the _EL0 registers directly, while VHE
781	* will use the _EL02 accessors. This translates in different trap
782	* bits.
783	*
784	* None of the trapping is required when running in non-HYP context,
785	* unless required by the L1 hypervisor settings once we advertise
786	* ECV+NV in the guest, or that we need trapping for other reasons.
787	*/
788	if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
789	if (vcpu_el2_e2h_is_set(vcpu))
790	tvt02 = tpt02 = true;
791	else
792	tvt = tpt = true;
793	}
794
795	/*
796	* We have two possibility to deal with a physical offset:
797	*
798	* - Either we have CNTPOFF (yay!) or the offset is 0:
799	* we let the guest freely access the HW
800	*
801	* - or neither of these condition apply:
802	* we trap accesses to the HW, but still use it
803	* after correcting the physical offset
804	*/
805	if (!has_cntpoff() && timer_get_offset(ctxt: map->direct_ptimer))
806	tpt = tpc = true;
807
808	/*
809	* For the poor sods that could not correctly subtract one value
810	* from another, trap the full virtual timer and counter.
811	*/
812	if (has_broken_cntvoff() && timer_get_offset(ctxt: map->direct_vtimer))
813	tvt = tvc = true;
814
815	/*
816	* Apply the enable bits that the guest hypervisor has requested for
817	* its own guest. We can only add traps that wouldn't have been set
818	* above.
819	* Implementation choices: we do not support NV when E2H=0 in the
820	* guest, and we don't support configuration where E2H is writable
821	* by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
822	* not both). This simplifies the handling of the EL1NV* bits.
823	*/
824	if (is_nested_ctxt(vcpu)) {
825	u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
826
827	/ Use the VHE format for mental sanity /
828	if (!vcpu_el2_e2h_is_set(vcpu))
829	val = (val & (CNTHCTL_EL1PCEN \| CNTHCTL_EL1PCTEN)) << `10`;
830
831	tpt \|= !(val & (CNTHCTL_EL1PCEN << `10`));
832	tpc \|= !(val & (CNTHCTL_EL1PCTEN << `10`));
833
834	tpt02 \|= (val & CNTHCTL_EL1NVPCT);
835	tvt02 \|= (val & CNTHCTL_EL1NVVCT);
836	}
837
838	/*
839	* Now that we have collected our requirements, compute the
840	* trap and enable bits.
841	*/
842	set = `0`;
843	clr = `0`;
844
845	assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << `10`, set, clr);
846	assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << `10`, set, clr);
847	assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
848	assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
849	assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
850	assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
851
852	/ This only happens on VHE, so use the CNTHCTL_EL2 accessor. /
853	sysreg_clear_set(cnthctl_el2, clr, set);
854	}
855
856	void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
857	{
858	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
859	struct timer_map map;
860
861	if (unlikely(!timer->enabled))
862	return;
863
864	get_timer_map(vcpu, map: &map);
865
866	if (static_branch_likely(&has_gic_active_state)) {
867	if (vcpu_has_nv(vcpu))
868	kvm_timer_vcpu_load_nested_switch(vcpu, map: &map);
869
870	kvm_timer_vcpu_load_gic(ctx: map.direct_vtimer);
871	if (map.direct_ptimer)
872	kvm_timer_vcpu_load_gic(ctx: map.direct_ptimer);
873	} else {
874	kvm_timer_vcpu_load_nogic(vcpu);
875	}
876
877	kvm_timer_unblocking(vcpu);
878
879	timer_restore_state(ctx: map.direct_vtimer);
880	if (map.direct_ptimer)
881	timer_restore_state(ctx: map.direct_ptimer);
882	if (map.emul_vtimer)
883	timer_emulate(ctx: map.emul_vtimer);
884	if (map.emul_ptimer)
885	timer_emulate(ctx: map.emul_ptimer);
886
887	timer_set_traps(vcpu, map: &map);
888	}
889
890	bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
891	{
892	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
893	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
894	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
895	bool vlevel, plevel;
896
897	if (likely(irqchip_in_kernel(vcpu->kvm)))
898	return false;
899
900	vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
901	plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
902
903	return kvm_timer_should_fire(timer_ctx: vtimer) != vlevel \|\|
904	kvm_timer_should_fire(timer_ctx: ptimer) != plevel;
905	}
906
907	void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
908	{
909	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
910	struct timer_map map;
911
912	if (unlikely(!timer->enabled))
913	return;
914
915	get_timer_map(vcpu, map: &map);
916
917	timer_save_state(ctx: map.direct_vtimer);
918	if (map.direct_ptimer)
919	timer_save_state(ctx: map.direct_ptimer);
920
921	/*
922	* Cancel soft timer emulation, because the only case where we
923	* need it after a vcpu_put is in the context of a sleeping VCPU, and
924	* in that case we already factor in the deadline for the physical
925	* timer when scheduling the bg_timer.
926	*
927	* In any case, we re-schedule the hrtimer for the physical timer when
928	* coming back to the VCPU thread in kvm_timer_vcpu_load().
929	*/
930	if (map.emul_vtimer)
931	soft_timer_cancel(hrt: &map.emul_vtimer->hrtimer);
932	if (map.emul_ptimer)
933	soft_timer_cancel(hrt: &map.emul_ptimer->hrtimer);
934
935	if (kvm_vcpu_is_blocking(vcpu))
936	kvm_timer_blocking(vcpu);
937	}
938
939	void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
940	{
941	/*
942	* When NV2 is on, guest hypervisors have their EL1 timer register
943	* accesses redirected to the VNCR page. Any guest action taken on
944	* the timer is postponed until the next exit, leading to a very
945	* poor quality of emulation.
946	*
947	* This is an unmitigated disaster, only papered over by FEAT_ECV,
948	* which allows trapping of the timer registers even with NV2.
949	* Still, this is still worse than FEAT_NV on its own. Meh.
950	*/
951	if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
952	/*
953	* For a VHE guest hypervisor, the EL2 state is directly
954	* stored in the host EL1 timers, while the emulated EL1
955	* state is stored in the VNCR page. The latter could have
956	* been updated behind our back, and we must reset the
957	* emulation of the timers.
958	*
959	* A non-VHE guest hypervisor doesn't have any direct access
960	* to its timers: the EL2 registers trap despite being
961	* notionally direct (we use the EL1 HW, as for VHE), while
962	* the EL1 registers access memory.
963	*
964	* In both cases, process the emulated timers on each guest
965	* exit. Boo.
966	*/
967	struct timer_map map;
968	get_timer_map(vcpu, map: &map);
969
970	soft_timer_cancel(hrt: &map.emul_vtimer->hrtimer);
971	soft_timer_cancel(hrt: &map.emul_ptimer->hrtimer);
972	timer_emulate(ctx: map.emul_vtimer);
973	timer_emulate(ctx: map.emul_ptimer);
974	}
975	}
976
977	/*
978	* With a userspace irqchip we have to check if the guest de-asserted the
979	* timer and if so, unmask the timer irq signal on the host interrupt
980	* controller to ensure that we see future timer signals.
981	*/
982	static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
983	{
984	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
985
986	if (!kvm_timer_should_fire(timer_ctx: vtimer)) {
987	kvm_timer_update_irq(vcpu, new_level: false, timer_ctx: vtimer);
988	if (static_branch_likely(&has_gic_active_state))
989	set_timer_irq_phys_active(ctx: vtimer, active: false);
990	else
991	enable_percpu_irq(irq: host_vtimer_irq, type: host_vtimer_irq_flags);
992	}
993	}
994
995	void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
996	{
997	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
998
999	if (unlikely(!timer->enabled))
1000	return;
1001
1002	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
1003	unmask_vtimer_irq_user(vcpu);
1004	}
1005
1006	void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
1007	{
1008	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1009	struct timer_map map;
1010
1011	get_timer_map(vcpu, map: &map);
1012
1013	/*
1014	* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
1015	* and to 0 for ARMv7. We provide an implementation that always
1016	* resets the timer to be disabled and unmasked and is compliant with
1017	* the ARMv7 architecture.
1018	*/
1019	for (int i = `0`; i < nr_timers(vcpu); i++)
1020	timer_set_ctl(vcpu_get_timer(vcpu, i), ctl: `0`);
1021
1022	/*
1023	* A vcpu running at EL2 is in charge of the offset applied to
1024	* the virtual timer, so use the physical VM offset, and point
1025	* the vcpu offset to CNTVOFF_EL2.
1026	*/
1027	if (vcpu_has_nv(vcpu)) {
1028	struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
1029
1030	offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2);
1031	offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
1032	}
1033
1034	if (timer->enabled) {
1035	for (int i = `0`; i < nr_timers(vcpu); i++)
1036	kvm_timer_update_irq(vcpu, new_level: false,
1037	vcpu_get_timer(vcpu, i));
1038
1039	if (irqchip_in_kernel(vcpu->kvm)) {
1040	kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
1041	if (map.direct_ptimer)
1042	kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
1043	}
1044	}
1045
1046	if (map.emul_vtimer)
1047	soft_timer_cancel(hrt: &map.emul_vtimer->hrtimer);
1048	if (map.emul_ptimer)
1049	soft_timer_cancel(hrt: &map.emul_ptimer->hrtimer);
1050	}
1051
1052	static void timer_context_init(struct kvm_vcpu vcpu, int* timerid)
1053	{
1054	struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
1055	struct kvm *kvm = vcpu->kvm;
1056
1057	ctxt->timer_id = timerid;
1058
1059	if (timerid == TIMER_VTIMER)
1060	ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
1061	else
1062	ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
1063
1064	hrtimer_setup(timer: &ctxt->hrtimer, function: kvm_hrtimer_expire, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS_HARD);
1065
1066	switch (timerid) {
1067	case TIMER_PTIMER:
1068	case TIMER_HPTIMER:
1069	ctxt->host_timer_irq = host_ptimer_irq;
1070	break;
1071	case TIMER_VTIMER:
1072	case TIMER_HVTIMER:
1073	ctxt->host_timer_irq = host_vtimer_irq;
1074	break;
1075	}
1076	}
1077
1078	void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1079	{
1080	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1081
1082	for (int i = `0`; i < NR_KVM_TIMERS; i++)
1083	timer_context_init(vcpu, timerid: i);
1084
1085	/ Synchronize offsets across timers of a VM if not already provided /
1086	if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1087	timer_set_offset(vcpu_vtimer(vcpu), offset: kvm_phys_timer_read());
1088	timer_set_offset(vcpu_ptimer(vcpu), offset: `0`);
1089	}
1090
1091	hrtimer_setup(timer: &timer->bg_timer, function: kvm_bg_timer_expire, CLOCK_MONOTONIC,
1092	mode: HRTIMER_MODE_ABS_HARD);
1093	}
1094
1095	void kvm_timer_init_vm(struct kvm *kvm)
1096	{
1097	for (int i = `0`; i < NR_KVM_TIMERS; i++)
1098	kvm->arch.timer_data.ppi[i] = default_ppi[i];
1099	}
1100
1101	void kvm_timer_cpu_up(void)
1102	{
1103	enable_percpu_irq(irq: host_vtimer_irq, type: host_vtimer_irq_flags);
1104	if (host_ptimer_irq)
1105	enable_percpu_irq(irq: host_ptimer_irq, type: host_ptimer_irq_flags);
1106	}
1107
1108	void kvm_timer_cpu_down(void)
1109	{
1110	disable_percpu_irq(irq: host_vtimer_irq);
1111	if (host_ptimer_irq)
1112	disable_percpu_irq(irq: host_ptimer_irq);
1113	}
1114
1115	static u64 read_timer_ctl(struct arch_timer_context *timer)
1116	{
1117	/*
1118	* Set ISTATUS bit if it's expired.
1119	* Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1120	* UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1121	* regardless of ENABLE bit for our implementation convenience.
1122	*/
1123	u32 ctl = timer_get_ctl(ctxt: timer);
1124
1125	if (!kvm_timer_compute_delta(timer_ctx: timer))
1126	ctl \|= ARCH_TIMER_CTRL_IT_STAT;
1127
1128	return ctl;
1129	}
1130
1131	static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1132	struct arch_timer_context *timer,
1133	enum kvm_arch_timer_regs treg)
1134	{
1135	u64 val;
1136
1137	switch (treg) {
1138	case TIMER_REG_TVAL:
1139	val = timer_get_cval(ctxt: timer) - kvm_phys_timer_read() + timer_get_offset(ctxt: timer);
1140	val = lower_32_bits(val);
1141	break;
1142
1143	case TIMER_REG_CTL:
1144	val = read_timer_ctl(timer);
1145	break;
1146
1147	case TIMER_REG_CVAL:
1148	val = timer_get_cval(ctxt: timer);
1149	break;
1150
1151	case TIMER_REG_CNT:
1152	val = kvm_phys_timer_read() - timer_get_offset(ctxt: timer);
1153	break;
1154
1155	case TIMER_REG_VOFF:
1156	val = *timer->offset.vcpu_offset;
1157	break;
1158
1159	default:
1160	BUG();
1161	}
1162
1163	return val;
1164	}
1165
1166	u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1167	enum kvm_arch_timers tmr,
1168	enum kvm_arch_timer_regs treg)
1169	{
1170	struct arch_timer_context *timer;
1171	struct timer_map map;
1172	u64 val;
1173
1174	get_timer_map(vcpu, map: &map);
1175	timer = vcpu_get_timer(vcpu, tmr);
1176
1177	if (timer == map.emul_vtimer \|\| timer == map.emul_ptimer)
1178	return kvm_arm_timer_read(vcpu, timer, treg);
1179
1180	preempt_disable();
1181	timer_save_state(ctx: timer);
1182
1183	val = kvm_arm_timer_read(vcpu, timer, treg);
1184
1185	timer_restore_state(ctx: timer);
1186	preempt_enable();
1187
1188	return val;
1189	}
1190
1191	static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1192	struct arch_timer_context *timer,
1193	enum kvm_arch_timer_regs treg,
1194	u64 val)
1195	{
1196	switch (treg) {
1197	case TIMER_REG_TVAL:
1198	timer_set_cval(ctxt: timer, cval: kvm_phys_timer_read() - timer_get_offset(ctxt: timer) + (s32)val);
1199	break;
1200
1201	case TIMER_REG_CTL:
1202	timer_set_ctl(ctxt: timer, ctl: val & ~ARCH_TIMER_CTRL_IT_STAT);
1203	break;
1204
1205	case TIMER_REG_CVAL:
1206	timer_set_cval(ctxt: timer, cval: val);
1207	break;
1208
1209	case TIMER_REG_VOFF:
1210	*timer->offset.vcpu_offset = val;
1211	break;
1212
1213	default:
1214	BUG();
1215	}
1216	}
1217
1218	void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1219	enum kvm_arch_timers tmr,
1220	enum kvm_arch_timer_regs treg,
1221	u64 val)
1222	{
1223	struct arch_timer_context *timer;
1224	struct timer_map map;
1225
1226	get_timer_map(vcpu, map: &map);
1227	timer = vcpu_get_timer(vcpu, tmr);
1228	if (timer == map.emul_vtimer \|\| timer == map.emul_ptimer) {
1229	soft_timer_cancel(hrt: &timer->hrtimer);
1230	kvm_arm_timer_write(vcpu, timer, treg, val);
1231	timer_emulate(ctx: timer);
1232	} else {
1233	preempt_disable();
1234	timer_save_state(ctx: timer);
1235	kvm_arm_timer_write(vcpu, timer, treg, val);
1236	timer_restore_state(ctx: timer);
1237	preempt_enable();
1238	}
1239	}
1240
1241	static int timer_irq_set_vcpu_affinity(struct irq_data d, void* *vcpu)
1242	{
1243	if (vcpu)
1244	irqd_set_forwarded_to_vcpu(d);
1245	else
1246	irqd_clr_forwarded_to_vcpu(d);
1247
1248	return `0`;
1249	}
1250
1251	static int timer_irq_set_irqchip_state(struct irq_data *d,
1252	enum irqchip_irq_state which, bool val)
1253	{
1254	if (which != IRQCHIP_STATE_ACTIVE \|\| !irqd_is_forwarded_to_vcpu(d))
1255	return irq_chip_set_parent_state(data: d, which, val);
1256
1257	if (val)
1258	irq_chip_mask_parent(data: d);
1259	else
1260	irq_chip_unmask_parent(data: d);
1261
1262	return `0`;
1263	}
1264
1265	static void timer_irq_eoi(struct irq_data *d)
1266	{
1267	if (!irqd_is_forwarded_to_vcpu(d))
1268	irq_chip_eoi_parent(data: d);
1269	}
1270
1271	static void timer_irq_ack(struct irq_data *d)
1272	{
1273	d = d->parent_data;
1274	if (d->chip->irq_ack)
1275	d->chip->irq_ack(d);
1276	}
1277
1278	static struct irq_chip timer_chip = {
1279	.name = "KVM",
1280	.irq_ack = timer_irq_ack,
1281	.irq_mask = irq_chip_mask_parent,
1282	.irq_unmask = irq_chip_unmask_parent,
1283	.irq_eoi = timer_irq_eoi,
1284	.irq_set_type = irq_chip_set_type_parent,
1285	.irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
1286	.irq_set_irqchip_state = timer_irq_set_irqchip_state,
1287	};
1288
1289	static int timer_irq_domain_alloc(struct irq_domain domain, unsigned* int virq,
1290	unsigned int nr_irqs, void *arg)
1291	{
1292	irq_hw_number_t hwirq = (uintptr_t)arg;
1293
1294	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1295	chip: &timer_chip, NULL);
1296	}
1297
1298	static void timer_irq_domain_free(struct irq_domain domain, unsigned* int virq,
1299	unsigned int nr_irqs)
1300	{
1301	}
1302
1303	static const struct irq_domain_ops timer_domain_ops = {
1304	.alloc = timer_irq_domain_alloc,
1305	.free = timer_irq_domain_free,
1306	};
1307
1308	static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1309	{
1310	*flags = irq_get_trigger_type(irq: virq);
1311	if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
1312	kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1313	virq);
1314	*flags = IRQF_TRIGGER_LOW;
1315	}
1316	}
1317
1318	static int kvm_irq_init(struct arch_timer_kvm_info *info)
1319	{
1320	struct irq_domain *domain = NULL;
1321
1322	if (info->virtual_irq <= `0`) {
1323	kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1324	info->virtual_irq);
1325	return -ENODEV;
1326	}
1327
1328	host_vtimer_irq = info->virtual_irq;
1329	kvm_irq_fixup_flags(virq: host_vtimer_irq, flags: &host_vtimer_irq_flags);
1330
1331	if (kvm_vgic_global_state.no_hw_deactivation) {
1332	struct fwnode_handle *fwnode;
1333	struct irq_data *data;
1334
1335	fwnode = irq_domain_alloc_named_fwnode(name: "kvm-timer");
1336	if (!fwnode)
1337	return -ENOMEM;
1338
1339	/ Assume both vtimer and ptimer in the same parent /
1340	data = irq_get_irq_data(irq: host_vtimer_irq);
1341	domain = irq_domain_create_hierarchy(parent: data->domain, flags: `0`,
1342	size: NR_KVM_TIMERS, fwnode,
1343	ops: &timer_domain_ops, NULL);
1344	if (!domain) {
1345	irq_domain_free_fwnode(fwnode);
1346	return -ENOMEM;
1347	}
1348
1349	arch_timer_irq_ops.flags \|= VGIC_IRQ_SW_RESAMPLE;
1350	WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1351	(void *)TIMER_VTIMER));
1352	}
1353
1354	if (info->physical_irq > `0`) {
1355	host_ptimer_irq = info->physical_irq;
1356	kvm_irq_fixup_flags(virq: host_ptimer_irq, flags: &host_ptimer_irq_flags);
1357
1358	if (domain)
1359	WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1360	(void *)TIMER_PTIMER));
1361	}
1362
1363	return `0`;
1364	}
1365
1366	static void kvm_timer_handle_errata(void)
1367	{
1368	u64 mmfr0, mmfr1, mmfr4;
1369
1370	/*
1371	* CNTVOFF_EL2 is broken on some implementations. For those, we trap
1372	* all virtual timer/counter accesses, requiring FEAT_ECV.
1373	*
1374	* However, a hypervisor supporting nesting is likely to mitigate the
1375	* erratum at L0, and not require other levels to mitigate it (which
1376	* would otherwise be a terrible performance sink due to trap
1377	* amplification).
1378	*
1379	* Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1380	* and that NV is likely not to (because of limitations of the
1381	* architecture), only enable the workaround when FEAT_VHE and
1382	* FEAT_E2H0 are both detected. Time will tell if this actually holds.
1383	*/
1384	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1385	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1386	mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
1387	if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
1388	!SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
1389	SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
1390	(has_vhe() \|\| has_hvhe()) &&
1391	cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
1392	static_branch_enable(&broken_cntvoff_key);
1393	kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
1394	}
1395	}
1396
1397	int __init kvm_timer_hyp_init(bool has_gic)
1398	{
1399	struct arch_timer_kvm_info *info;
1400	int err;
1401
1402	info = arch_timer_get_kvm_info();
1403	timecounter = &info->timecounter;
1404
1405	if (!timecounter->cc) {
1406	kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1407	return -ENODEV;
1408	}
1409
1410	err = kvm_irq_init(info);
1411	if (err)
1412	return err;
1413
1414	/ First, do the virtual EL1 timer irq /
1415
1416	err = request_percpu_irq(irq: host_vtimer_irq, handler: kvm_arch_timer_handler,
1417	devname: "kvm guest vtimer", percpu_dev_id: kvm_get_running_vcpus());
1418	if (err) {
1419	kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1420	host_vtimer_irq, err);
1421	return err;
1422	}
1423
1424	if (has_gic) {
1425	err = irq_set_vcpu_affinity(irq: host_vtimer_irq,
1426	vcpu_info: kvm_get_running_vcpus());
1427	if (err) {
1428	kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1429	goto out_free_vtimer_irq;
1430	}
1431
1432	static_branch_enable(&has_gic_active_state);
1433	}
1434
1435	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1436
1437	/ Now let's do the physical EL1 timer irq /
1438
1439	if (info->physical_irq > `0`) {
1440	err = request_percpu_irq(irq: host_ptimer_irq, handler: kvm_arch_timer_handler,
1441	devname: "kvm guest ptimer", percpu_dev_id: kvm_get_running_vcpus());
1442	if (err) {
1443	kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1444	host_ptimer_irq, err);
1445	goto out_free_vtimer_irq;
1446	}
1447
1448	if (has_gic) {
1449	err = irq_set_vcpu_affinity(irq: host_ptimer_irq,
1450	vcpu_info: kvm_get_running_vcpus());
1451	if (err) {
1452	kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1453	goto out_free_ptimer_irq;
1454	}
1455	}
1456
1457	kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1458	} else if (has_vhe()) {
1459	kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1460	info->physical_irq);
1461	err = -ENODEV;
1462	goto out_free_vtimer_irq;
1463	}
1464
1465	kvm_timer_handle_errata();
1466	return `0`;
1467
1468	out_free_ptimer_irq:
1469	if (info->physical_irq > `0`)
1470	free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1471	out_free_vtimer_irq:
1472	free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1473	return err;
1474	}
1475
1476	void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1477	{
1478	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1479
1480	soft_timer_cancel(hrt: &timer->bg_timer);
1481	}
1482
1483	static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1484	{
1485	u32 ppis = `0`;
1486	bool valid;
1487
1488	mutex_lock(&vcpu->kvm->arch.config_lock);
1489
1490	for (int i = `0`; i < nr_timers(vcpu); i++) {
1491	struct arch_timer_context *ctx;
1492	int irq;
1493
1494	ctx = vcpu_get_timer(vcpu, i);
1495	irq = timer_irq(ctx);
1496	if (kvm_vgic_set_owner(vcpu, intid: irq, owner: ctx))
1497	break;
1498
1499	/*
1500	* We know by construction that we only have PPIs, so
1501	* all values are less than 32.
1502	*/
1503	ppis \|= BIT(irq);
1504	}
1505
1506	valid = hweight32(ppis) == nr_timers(vcpu);
1507
1508	if (valid)
1509	set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1510
1511	mutex_unlock(lock: &vcpu->kvm->arch.config_lock);
1512
1513	return valid;
1514	}
1515
1516	static bool kvm_arch_timer_get_input_level(int vintid)
1517	{
1518	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1519
1520	if (WARN(!vcpu, "No vcpu context!\n"))
1521	return false;
1522
1523	for (int i = `0`; i < nr_timers(vcpu); i++) {
1524	struct arch_timer_context *ctx;
1525
1526	ctx = vcpu_get_timer(vcpu, i);
1527	if (timer_irq(ctx) == vintid)
1528	return kvm_timer_should_fire(timer_ctx: ctx);
1529	}
1530
1531	/ A timer IRQ has fired, but no matching timer was found? /
1532	WARN_RATELIMIT(`1`, "timer INTID%d unknown\n", vintid);
1533
1534	return false;
1535	}
1536
1537	int kvm_timer_enable(struct kvm_vcpu *vcpu)
1538	{
1539	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1540	struct timer_map map;
1541	int ret;
1542
1543	if (timer->enabled)
1544	return `0`;
1545
1546	/ Without a VGIC we do not map virtual IRQs to physical IRQs /
1547	if (!irqchip_in_kernel(vcpu->kvm))
1548	goto no_vgic;
1549
1550	/*
1551	* At this stage, we have the guarantee that the vgic is both
1552	* available and initialized.
1553	*/
1554	if (!timer_irqs_are_valid(vcpu)) {
1555	kvm_debug("incorrectly configured timer irqs\n");
1556	return -EINVAL;
1557	}
1558
1559	get_timer_map(vcpu, map: &map);
1560
1561	ret = kvm_vgic_map_phys_irq(vcpu,
1562	map.direct_vtimer->host_timer_irq,
1563	timer_irq(map.direct_vtimer),
1564	&arch_timer_irq_ops);
1565	if (ret)
1566	return ret;
1567
1568	if (map.direct_ptimer) {
1569	ret = kvm_vgic_map_phys_irq(vcpu,
1570	map.direct_ptimer->host_timer_irq,
1571	timer_irq(map.direct_ptimer),
1572	&arch_timer_irq_ops);
1573	}
1574
1575	if (ret)
1576	return ret;
1577
1578	no_vgic:
1579	timer->enabled = `1`;
1580	return `0`;
1581	}
1582
1583	/ If we have CNTPOFF, permanently set ECV to enable it /
1584	void kvm_timer_init_vhe(void)
1585	{
1586	if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1587	sysreg_clear_set(cnthctl_el2, `0`, CNTHCTL_ECV);
1588	}
1589
1590	int kvm_arm_timer_set_attr(struct kvm_vcpu vcpu, struct* kvm_device_attr *attr)
1591	{
1592	int __user uaddr = (int* __user )(long*)attr->addr;
1593	int irq, idx, ret = `0`;
1594
1595	if (!irqchip_in_kernel(vcpu->kvm))
1596	return -EINVAL;
1597
1598	if (get_user(irq, uaddr))
1599	return -EFAULT;
1600
1601	if (!(irq_is_ppi(irq)))
1602	return -EINVAL;
1603
1604	mutex_lock(&vcpu->kvm->arch.config_lock);
1605
1606	if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1607	&vcpu->kvm->arch.flags)) {
1608	ret = -EBUSY;
1609	goto out;
1610	}
1611
1612	switch (attr->attr) {
1613	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1614	idx = TIMER_VTIMER;
1615	break;
1616	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1617	idx = TIMER_PTIMER;
1618	break;
1619	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1620	idx = TIMER_HVTIMER;
1621	break;
1622	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1623	idx = TIMER_HPTIMER;
1624	break;
1625	default:
1626	ret = -ENXIO;
1627	goto out;
1628	}
1629
1630	/*
1631	* We cannot validate the IRQ unicity before we run, so take it at
1632	* face value. The verdict will be given on first vcpu run, for each
1633	* vcpu. Yes this is late. Blame it on the stupid API.
1634	*/
1635	vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1636
1637	out:
1638	mutex_unlock(lock: &vcpu->kvm->arch.config_lock);
1639	return ret;
1640	}
1641
1642	int kvm_arm_timer_get_attr(struct kvm_vcpu vcpu, struct* kvm_device_attr *attr)
1643	{
1644	int __user uaddr = (int* __user )(long*)attr->addr;
1645	struct arch_timer_context *timer;
1646	int irq;
1647
1648	switch (attr->attr) {
1649	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1650	timer = vcpu_vtimer(vcpu);
1651	break;
1652	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1653	timer = vcpu_ptimer(vcpu);
1654	break;
1655	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1656	timer = vcpu_hvtimer(vcpu);
1657	break;
1658	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1659	timer = vcpu_hptimer(vcpu);
1660	break;
1661	default:
1662	return -ENXIO;
1663	}
1664
1665	irq = timer_irq(timer);
1666	return put_user(irq, uaddr);
1667	}
1668
1669	int kvm_arm_timer_has_attr(struct kvm_vcpu vcpu, struct* kvm_device_attr *attr)
1670	{
1671	switch (attr->attr) {
1672	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1673	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1674	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1675	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1676	return `0`;
1677	}
1678
1679	return -ENXIO;
1680	}
1681
1682	int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1683	struct kvm_arm_counter_offset *offset)
1684	{
1685	int ret = `0`;
1686
1687	if (offset->reserved)
1688	return -EINVAL;
1689
1690	mutex_lock(&kvm->lock);
1691
1692	if (!kvm_trylock_all_vcpus(kvm)) {
1693	set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1694
1695	/*
1696	* If userspace decides to set the offset using this
1697	* API rather than merely restoring the counter
1698	* values, the offset applies to both the virtual and
1699	* physical views.
1700	*/
1701	kvm->arch.timer_data.voffset = offset->counter_offset;
1702	kvm->arch.timer_data.poffset = offset->counter_offset;
1703
1704	kvm_unlock_all_vcpus(kvm);
1705	} else {
1706	ret = -EBUSY;
1707	}
1708
1709	mutex_unlock(lock: &kvm->lock);
1710
1711	return ret;
1712	}
1713

source code of linux/arch/arm64/kvm/arch_timer.c