Currently, Xen will send a data abort to a guest trying to write to the
ISPENDR.
Unfortunately, recent version of Linux (at least 5.9+) will start
writing to the register if the interrupt needs to be re-triggered
(see the callback irq_retrigger). This can happen when a driver (such as
the xgbe network driver on AMD Seattle) re-enable an interrupt:
(XEN) d0v0: vGICD: unhandled word write 0x00000004000000 to ISPENDR44
[...]
[ 25.635837] Unhandled fault at 0xffff80001000522c
[...]
[ 25.818716] gic_retrigger+0x2c/0x38
[ 25.822361] irq_startup+0x78/0x138
[ 25.825920] __enable_irq+0x70/0x80
[ 25.829478] enable_irq+0x50/0xa0
[ 25.832864] xgbe_one_poll+0xc8/0xd8
[ 25.836509] net_rx_action+0x110/0x3a8
[ 25.840328] __do_softirq+0x124/0x288
[ 25.844061] irq_exit+0xe0/0xf0
[ 25.847272] __handle_domain_irq+0x68/0xc0
[ 25.851442] gic_handle_irq+0xa8/0xe0
[ 25.855171] el1_irq+0xb0/0x180
[ 25.858383] arch_cpu_idle+0x18/0x28
[ 25.862028] default_idle_call+0x24/0x5c
[ 25.866021] do_idle+0x204/0x278
[ 25.869319] cpu_startup_entry+0x24/0x68
[ 25.873313] rest_init+0xd4/0xe4
[ 25.876611] arch_call_rest_init+0x10/0x1c
[ 25.880777] start_kernel+0x5b8/0x5ec
As a consequence, the OS may become unusable.
Implementing the write part of ISPENDR is somewhat easy. For
virtual interrupt, we only need to inject the interrupt again.
For physical interrupt, we need to be more careful as the de-activation
of the virtual interrupt will be propagated to the physical distributor.
For simplicity, the physical interrupt will be set pending so the
workflow will not differ from a "real" interrupt.
Longer term, we could possible directly activate the physical interrupt
and avoid taking an exception to inject the interrupt to the domain.
(This is the approach taken by the new vGIC based on KVM).
Signed-off-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Release-Acked-by: Ian Jackson <iwj@xenproject.org>
(cherry picked from commit
067935804a8e7a33ff7170a2db8ce94bb46d9a63)
case VRANGE32(GICD_ISPENDR, GICD_ISPENDRN):
if ( dabt.size != DABT_WORD ) goto bad_width;
- printk(XENLOG_G_ERR
- "%pv: vGICD: unhandled word write %#"PRIregister" to ISPENDR%d\n",
- v, r, gicd_reg - GICD_ISPENDR);
- return 0;
+ rank = vgic_rank_offset(v, 1, gicd_reg - GICD_ISPENDR, DABT_WORD);
+ if ( rank == NULL ) goto write_ignore;
+
+ vgic_set_irqs_pending(v, r, rank->index);
+
+ return 1;
case VRANGE32(GICD_ICPENDR, GICD_ICPENDRN):
if ( dabt.size != DABT_WORD ) goto bad_width;
case VRANGE32(GICD_ISPENDR, GICD_ISPENDRN):
if ( dabt.size != DABT_WORD ) goto bad_width;
- printk(XENLOG_G_ERR
- "%pv: %s: unhandled word write %#"PRIregister" to ISPENDR%d\n",
- v, name, r, reg - GICD_ISPENDR);
- return 0;
+ rank = vgic_rank_offset(v, 1, reg - GICD_ISPENDR, DABT_WORD);
+ if ( rank == NULL ) goto write_ignore;
+
+ vgic_set_irqs_pending(v, r, rank->index);
+
+ return 1;
case VRANGE32(GICD_ICPENDR, GICD_ICPENDRN):
if ( dabt.size != DABT_WORD ) goto bad_width;
case VREG32(GICR_ICACTIVER0):
case VREG32(GICR_ICFGR1):
case VRANGE32(GICR_IPRIORITYR0, GICR_IPRIORITYR7):
+ case VREG32(GICR_ISPENDR0):
/*
* Above registers offset are common with GICD.
* So handle common with GICD handling
return __vgic_v3_distr_common_mmio_write("vGICR: SGI", v,
info, gicr_reg, r);
- case VREG32(GICR_ISPENDR0):
- if ( dabt.size != DABT_WORD ) goto bad_width;
- printk(XENLOG_G_ERR
- "%pv: vGICR: SGI: unhandled word write %#"PRIregister" to ISPENDR0\n",
- v, r);
- return 0;
-
case VREG32(GICR_ICPENDR0):
if ( dabt.size != DABT_WORD ) goto bad_width;
printk(XENLOG_G_ERR
}
}
+void vgic_set_irqs_pending(struct vcpu *v, uint32_t r, unsigned int rank)
+{
+ const unsigned long mask = r;
+ unsigned int i;
+ /* The first rank is always per-vCPU */
+ bool private = rank == 0;
+
+ /* LPIs will never be set pending via this function */
+ ASSERT(!is_lpi(32 * rank + 31));
+
+ for_each_set_bit( i, &mask, 32 )
+ {
+ unsigned int irq = i + 32 * rank;
+
+ if ( !private )
+ {
+ struct pending_irq *p = spi_to_pending(v->domain, irq);
+
+ /*
+ * When the domain sets the pending state for a HW interrupt on
+ * the virtual distributor, we set the pending state on the
+ * physical distributor.
+ *
+ * XXX: Investigate whether we would be able to set the
+ * physical interrupt active and save an interruption. (This
+ * is what the new vGIC does).
+ */
+ if ( p->desc != NULL )
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&p->desc->lock, flags);
+ gic_set_pending_state(p->desc, true);
+ spin_unlock_irqrestore(&p->desc->lock, flags);
+ continue;
+ }
+ }
+
+ /*
+ * If the interrupt is per-vCPU, then we want to inject the vIRQ
+ * to v, otherwise we should let the function figuring out the
+ * correct vCPU.
+ */
+ vgic_inject_irq(v->domain, private ? v : NULL, irq, true);
+ }
+}
+
bool vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode,
int virq, const struct sgi_target *target)
{
extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
extern void vgic_disable_irqs(struct vcpu *v, uint32_t r, int n);
extern void vgic_enable_irqs(struct vcpu *v, uint32_t r, int n);
+extern void vgic_set_irqs_pending(struct vcpu *v, uint32_t r,
+ unsigned int rank);
extern void register_vgic_ops(struct domain *d, const struct vgic_ops *ops);
int vgic_v2_init(struct domain *d, int *mmio_count);
int vgic_v3_init(struct domain *d, int *mmio_count);