/****************************************************************************** * arch/x86/irq.c * * Portions of this file are: * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ bool_t __read_mostly opt_noirqbalance = 0; boolean_param("noirqbalance", opt_noirqbalance); unsigned int __read_mostly nr_irqs_gsi = 16; unsigned int __read_mostly nr_irqs; integer_param("nr_irqs", nr_irqs); u8 __read_mostly *irq_vector; struct irq_desc __read_mostly *irq_desc = NULL; int __read_mostly *irq_status = NULL; #define IRQ_UNUSED (0) #define IRQ_USED (1) #define IRQ_RSVD (2) #define IRQ_VECTOR_UNASSIGNED (0) static DECLARE_BITMAP(used_vectors, NR_VECTORS); struct irq_cfg __read_mostly *irq_cfg = NULL; static DEFINE_SPINLOCK(vector_lock); DEFINE_PER_CPU(vector_irq_t, vector_irq); DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs); static LIST_HEAD(irq_ratelimit_list); static DEFINE_SPINLOCK(irq_ratelimit_lock); static struct timer irq_ratelimit_timer; /* irq_ratelimit: the max irq rate allowed in every 10ms, set 0 to disable */ static unsigned int __read_mostly irq_ratelimit_threshold = 10000; integer_param("irq_ratelimit", irq_ratelimit_threshold); /* Must be called when irq disabled */ void lock_vector_lock(void) { /* Used to the online set of cpus does not change * during assign_irq_vector. */ spin_lock(&vector_lock); } void unlock_vector_lock(void) { spin_unlock(&vector_lock); } static int __init __bind_irq_vector(int irq, int vector, cpumask_t cpu_mask) { cpumask_t online_mask; int cpu; struct irq_cfg *cfg = irq_cfg(irq); BUG_ON((unsigned)irq >= nr_irqs); BUG_ON((unsigned)vector >= NR_VECTORS); cpus_and(online_mask, cpu_mask, cpu_online_map); if (cpus_empty(online_mask)) return -EINVAL; if ((cfg->vector == vector) && cpus_equal(cfg->cpu_mask, online_mask)) return 0; if (cfg->vector != IRQ_VECTOR_UNASSIGNED) return -EBUSY; for_each_cpu_mask(cpu, online_mask) per_cpu(vector_irq, cpu)[vector] = irq; cfg->vector = vector; cfg->cpu_mask = online_mask; irq_status[irq] = IRQ_USED; if (IO_APIC_IRQ(irq)) irq_vector[irq] = vector; return 0; } int __init bind_irq_vector(int irq, int vector, cpumask_t cpu_mask) { unsigned long flags; int ret; spin_lock_irqsave(&vector_lock, flags); ret = __bind_irq_vector(irq, vector, cpu_mask); spin_unlock_irqrestore(&vector_lock, flags); return ret; } static inline int find_unassigned_irq(void) { int irq; for (irq = nr_irqs_gsi; irq < nr_irqs; irq++) if (irq_status[irq] == IRQ_UNUSED) return irq; return -ENOSPC; } /* * Dynamic irq allocate and deallocation for MSI */ int create_irq(void) { unsigned long flags; int irq, ret; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); irq = find_unassigned_irq(); if (irq < 0) goto out; ret = __assign_irq_vector(irq, irq_cfg(irq), TARGET_CPUS); if (ret < 0) irq = ret; out: spin_unlock_irqrestore(&vector_lock, flags); return irq; } static void dynamic_irq_cleanup(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; struct irqaction *action; spin_lock_irqsave(&desc->lock, flags); desc->status |= IRQ_DISABLED; desc->handler->shutdown(irq); action = desc->action; desc->action = NULL; desc->depth = 1; desc->msi_desc = NULL; desc->handler = &no_irq_type; cpus_setall(desc->affinity); spin_unlock_irqrestore(&desc->lock, flags); /* Wait to make sure it's not being used on another CPU */ do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS ); if (action) xfree(action); } static void init_one_irq_status(int irq); static void __clear_irq_vector(int irq) { int cpu, vector; cpumask_t tmp_mask; struct irq_cfg *cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; cpus_and(tmp_mask, cfg->cpu_mask, cpu_online_map); for_each_cpu_mask(cpu, tmp_mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = IRQ_VECTOR_UNASSIGNED; cpus_clear(cfg->cpu_mask); init_one_irq_status(irq); if (likely(!cfg->move_in_progress)) return; for_each_cpu_mask(cpu, tmp_mask) { for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) continue; per_cpu(vector_irq, cpu)[vector] = -1; break; } } cfg->move_in_progress = 0; } void clear_irq_vector(int irq) { unsigned long flags; spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } void destroy_irq(unsigned int irq) { BUG_ON(!MSI_IRQ(irq)); dynamic_irq_cleanup(irq); clear_irq_vector(irq); } int irq_to_vector(int irq) { int vector = -1; struct irq_cfg *cfg; BUG_ON(irq >= nr_irqs || irq < 0); if (IO_APIC_IRQ(irq)) vector = irq_vector[irq]; else if(MSI_IRQ(irq)) { cfg = irq_cfg(irq); vector = cfg->vector; } else vector = LEGACY_VECTOR(irq); return vector; } static void init_one_irq_desc(struct irq_desc *desc) { desc->status = IRQ_DISABLED; desc->handler = &no_irq_type; desc->action = NULL; desc->depth = 1; desc->msi_desc = NULL; spin_lock_init(&desc->lock); cpus_setall(desc->affinity); INIT_LIST_HEAD(&desc->rl_link); } static void init_one_irq_status(int irq) { irq_status[irq] = IRQ_UNUSED; } static void init_one_irq_cfg(struct irq_cfg *cfg) { cfg->vector = IRQ_VECTOR_UNASSIGNED; cpus_clear(cfg->cpu_mask); cpus_clear(cfg->old_cpu_mask); } int init_irq_data(void) { struct irq_desc *desc; struct irq_cfg *cfg; int irq, vector; for (vector = 0; vector < NR_VECTORS; ++vector) this_cpu(vector_irq)[vector] = -1; irq_desc = xmalloc_array(struct irq_desc, nr_irqs); irq_cfg = xmalloc_array(struct irq_cfg, nr_irqs); irq_status = xmalloc_array(int, nr_irqs); irq_vector = xmalloc_array(u8, nr_irqs_gsi); if ( !irq_desc || !irq_cfg || !irq_status ||! irq_vector ) return -ENOMEM; memset(irq_desc, 0, nr_irqs * sizeof(*irq_desc)); memset(irq_cfg, 0, nr_irqs * sizeof(*irq_cfg)); memset(irq_status, 0, nr_irqs * sizeof(*irq_status)); memset(irq_vector, 0, nr_irqs_gsi * sizeof(*irq_vector)); for (irq = 0; irq < nr_irqs; irq++) { desc = irq_to_desc(irq); cfg = irq_cfg(irq); desc->irq = irq; desc->chip_data = cfg; init_one_irq_desc(desc); init_one_irq_cfg(cfg); init_one_irq_status(irq); } /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */ set_bit(LEGACY_SYSCALL_VECTOR, used_vectors); set_bit(HYPERCALL_VECTOR, used_vectors); /* IRQ_MOVE_CLEANUP_VECTOR used for clean up vectors */ set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); return 0; } static void __do_IRQ_guest(int vector); void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { } static void enable_none(unsigned int vector) { } static unsigned int startup_none(unsigned int vector) { return 0; } static void disable_none(unsigned int vector) { } static void ack_none(unsigned int irq) { ack_bad_irq(irq); } #define shutdown_none disable_none #define end_none enable_none hw_irq_controller no_irq_type = { "none", startup_none, shutdown_none, enable_none, disable_none, ack_none, end_none }; int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask) { /* * NOTE! The local APIC isn't very good at handling * multiple interrupts a
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Global definitions for the INET interface module.
 *
 * Version:	@(#)if.h	1.0.2	04/18/93
 *
 * Authors:	Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988
 *		Ross Biro
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
#ifndef _LINUX_IF_H
#define _LINUX_IF_H

#include <linux/types.h>		/* for "__kernel_caddr_t" et al	*/

#define	IFNAMSIZ	16

/* Standard interface flags (netdevice->flags). */
#define	IFF_UP		0x1		/* interface is up		*/
#define	IFF_BROADCAST	0x2		/* broadcast address valid	*/
#define	IFF_DEBUG	0x4		/* turn on debugging		*/
#define	IFF_LOOPBACK	0x8		/* is a loopback net		*/
#define	IFF_POINTOPOINT	0x10		/* interface is has p-p link	*/
#define	IFF_NOTRAILERS	0x20		/* avoid use of trailers	*/
#define	IFF_RUNNING	0x40		/* interface RFC2863 OPER_UP	*/
#define	IFF_NOARP	0x80		/* no ARP protocol		*/
#define	IFF_PROMISC	0x100		/* receive all packets		*/
#define	IFF_ALLMULTI	0x200		/* receive all multicast packets*/

#define IFF_MASTER	0x400		/* master of a load balancer 	*/
#define IFF_SLAVE	0x800		/* slave of a load balancer	*/

#define IFF_MULTICAST	0x1000		/* Supports multicast		*/

#define IFF_PORTSEL	0x2000          /* can set media type		*/
#define IFF_AUTOMEDIA	0x4000		/* auto media select active	*/
#define IFF_DYNAMIC	0x8000		/* dialup device with changing addresses*/

#define IFF_LOWER_UP	0x10000		/* driver signals L1 up		*/
#define IFF_DORMANT	0x20000		/* driver signals dormant	*/

#define IFF_ECHO	0x40000		/* echo sent packets		*/

#define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)

/* Private (from user) interface flags (netdevice->priv_flags). */
#define IFF_802_1Q_VLAN 0x1             /* 802.1Q VLAN device.          */
#define IFF_EBRIDGE	0x2		/* Ethernet bridging device.	*/
#define IFF_SLAVE_INACTIVE	0x4	/* bonding slave not the curr. active */
#define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
#define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
#define IFF_BONDING	0x20		/* bonding master or slave	*/
#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
#define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/

#define IF_GET_IFACE	0x0001		/* for querying only */
#define IF_GET_PROTO	0x0002

/* For definitions see hdlc.h */
#define IF_IFACE_V35	0x1000		/* V.35 serial interface	*/
#define IF_IFACE_V24	0x1001		/* V.24 serial interface	*/
#define IF_IFACE_X21	0x1002		/* X.21 serial interface	*/
#define IF_IFACE_T1	0x1003		/* T1 telco serial interface	*/
#define IF_IFACE_E1	0x1004		/* E1 telco serial interface	*/
#define IF_IFACE_SYNC_SERIAL 0x1005	/* can't be set by software	*/
#define IF_IFACE_X21D   0x1006          /* X.21 Dual Clocking (FarSite) */

/* For definitions see hdlc.h */
#define IF_PROTO_HDLC	0x2000		/* raw HDLC protocol		*/
#define IF_PROTO_PPP	0x2001		/* PPP protocol			*/
#define IF_PROTO_CISCO	0x2002		/* Cisco HDLC protocol		*/
#define IF_PROTO_FR	0x2003		/* Frame Relay protocol		*/
#define IF_PROTO_FR_ADD_PVC 0x2004	/*    Create FR PVC		*/
#define IF_PROTO_FR_DEL_PVC 0x2005	/*    Delete FR PVC		*/
#define IF_PROTO_X25	0x2006		/* X.25				*/
#define IF_PROTO_HDLC_ETH 0x2007	/* raw HDLC, Ethernet emulation	*/
#define IF_PROTO_FR_ADD_ETH_PVC 0x2008	/*  Create FR Ethernet-bridged PVC */
#define IF_PROTO_FR_DEL_ETH_PVC 0x2009	/*  Delete FR Ethernet-bridged PVC */
#define IF_PROTO_FR_PVC	0x200A		/* for reading PVC status	*/
#define IF_PROTO_FR_ETH_PVC 0x200B
#define IF_PROTO_RAW    0x200C          /* RAW Socket                   */

/* RFC 2863 operational status */
enum {
	IF_OPER_UNKNOWN,
	IF_OPER_NOTPRESENT,
	IF_OPER_DOWN,
	IF_OPER_LOWERLAYERDOWN,
	IF_OPER_TESTING,
	IF_OPER_DORMANT,
	IF_OPER_UP,
};

/* link modes */
enum {
	IF_LINK_MODE_DEFAULT,
	IF_LINK_MODE_DORMANT,	/* limit upward transition to dormant */
};

/*
 *	Device mapping structure. I'd just gone off and designed a 
 *	beautiful scheme using only loadable modules with arguments
 *	for driver options and along come the PCMCIA people 8)
 *
 *	Ah well. The get() side of this is good for WDSETUP, and it'll
 *	be handy for debugging things. The set side is fine for now and
 *	being very small might be worth keeping for clean configuration.
 */

struct ifmap 
{
	unsigned long mem_start;
	unsigned long mem_end;
	unsigned short base_addr; 
	unsigned char irq;
	unsigned char dma;
	unsigned char port;
	/* 3 bytes spare */
};


#endif /* _LINUX_IF_H */
ts we already set, * - re-enable the vector, and * - skip the timer setup below. */ } } migrate_timer(&action->eoi_timer, smp_processor_id()); set_timer(&action->eoi_timer, NOW() + MILLISECS(1)); } /* * Retrieve Xen irq-descriptor corresponding to a domain-specific irq. * The descriptor is returned locked. This function is safe against changes * to the per-domain irq-to-vector mapping. */ struct irq_desc *domain_spin_lock_irq_desc( struct domain *d, int pirq, unsigned long *pflags) { int irq; unsigned long flags; struct irq_desc *desc; for ( ; ; ) { irq = domain_pirq_to_irq(d, pirq); if ( irq <= 0 ) return NULL; desc = irq_to_desc(irq); spin_lock_irqsave(&desc->lock, flags); if ( irq == domain_pirq_to_irq(d, pirq) ) break; spin_unlock_irqrestore(&desc->lock, flags); } if ( pflags != NULL ) *pflags = flags; return desc; } /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ static void flush_ready_eoi(void) { struct pending_eoi *peoi = this_cpu(pending_eoi); struct irq_desc *desc; int irq, sp; ASSERT(!local_irq_is_enabled()); sp = pending_eoi_sp(peoi); while ( (--sp >= 0) && peoi[sp].ready ) { irq = peoi[sp].irq; ASSERT(irq > 0); desc = irq_to_desc(irq); spin_lock(&desc->lock); desc->handler->end(irq); spin_unlock(&desc->lock); } pending_eoi_sp(peoi) = sp+1; } static void __set_eoi_ready(struct irq_desc *desc) { irq_guest_action_t *action = (irq_guest_action_t *)desc->action; struct pending_eoi *peoi = this_cpu(pending_eoi); int irq, sp; irq = desc - irq_desc; if ( !(desc->status & IRQ_GUEST) || (action->in_flight != 0) || !cpu_test_and_clear(smp_processor_id(), action->cpu_eoi_map) ) return; sp = pending_eoi_sp(peoi); do { ASSERT(sp > 0); } while ( peoi[--sp].irq != irq ); ASSERT(!peoi[sp].ready); peoi[sp].ready = 1; } /* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */ static void set_eoi_ready(void *data) { struct irq_desc *desc = data; ASSERT(!local_irq_is_enabled()); spin_lock(&desc->lock); __set_eoi_ready(desc); spin_unlock(&desc->lock); flush_ready_eoi(); } static void __pirq_guest_eoi(struct domain *d, int pirq) { struct irq_desc *desc; irq_guest_action_t *action; cpumask_t cpu_eoi_map; int irq; ASSERT(local_irq_is_enabled()); desc = domain_spin_lock_irq_desc(d, pirq, NULL); if ( desc == NULL ) return; action = (irq_guest_action_t *)desc->action; irq = desc - irq_desc; if ( action->ack_type == ACKTYPE_NONE ) { ASSERT(!test_bit(pirq, d->pirq_mask)); stop_timer(&action->eoi_timer); _irq_guest_eoi(desc); } if ( unlikely(!test_and_clear_bit(pirq, d->pirq_mask)) || unlikely(--action->in_flight != 0) ) { spin_unlock_irq(&desc->lock); return; } if ( action->ack_type == ACKTYPE_UNMASK ) { ASSERT(cpus_empty(action->cpu_eoi_map)); desc->handler->end(irq); spin_unlock_irq(&desc->lock); return; } ASSERT(action->ack_type == ACKTYPE_EOI); cpu_eoi_map = action->cpu_eoi_map; if ( cpu_test_and_clear(smp_processor_id(), cpu_eoi_map) ) { __set_eoi_ready(desc); spin_unlock(&desc->lock); flush_ready_eoi(); local_irq_enable(); } else { spin_unlock_irq(&desc->lock); } if ( !cpus_empty(cpu_eoi_map) ) on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0); } int pirq_guest_eoi(struct domain *d, int irq) { if ( (irq < 0) || (irq >= d->nr_pirqs) ) return -EINVAL; __pirq_guest_eoi(d, irq); return 0; } int pirq_guest_unmask(struct domain *d) { unsigned int irq, nr = d->nr_pirqs; for ( irq = find_first_bit(d->pirq_mask, nr); irq < nr; irq = find_next_bit(d->pirq_mask, nr, irq+1) ) { if ( !test_bit(d->pirq_to_evtchn[irq], &shared_info(d, evtchn_mask)) ) __pirq_guest_eoi(d, irq); } return 0; } extern int ioapic_ack_new; static int pirq_acktype(struct domain *d, int pirq) { struct irq_desc *desc; int irq; irq = domain_pirq_to_irq(d, pirq); if ( irq <= 0 ) return ACKTYPE_NONE; desc = irq_to_desc(irq); if ( desc->handler == &no_irq_type ) return ACKTYPE_NONE; /* * Edge-triggered IO-APIC and LAPIC interrupts need no final * acknowledgement: we ACK early during interrupt processing. */ if ( !strcmp(desc->handler->typename, "IO-APIC-edge") || !strcmp(desc->handler->typename, "local-APIC-edge") ) return ACKTYPE_NONE; /* * MSIs are treated as edge-triggered interrupts, except * when there is no proper way to mask them. */ if ( desc->handler == &pci_msi_type ) return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI; /* * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU * on which they were received. This is because we tickle the LAPIC to EOI. */ if ( !strcmp(desc->handler->typename, "IO-APIC-level") ) return ioapic_ack_new ? ACKTYPE_EOI : ACKTYPE_UNMASK; /* Legacy PIC interrupts can be acknowledged from any CPU. */ if ( !strcmp(desc->handler->typename, "XT-PIC") ) return ACKTYPE_UNMASK; printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq); BUG(); return 0; } int pirq_shared(struct domain *d, int pirq) { struct irq_desc *desc; irq_guest_action_t *action; unsigned long flags; int shared; desc = domain_spin_lock_irq_desc(d, pirq, &flags); if ( desc == NULL ) return 0; action = (irq_guest_action_t *)desc->action; shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1)); spin_unlock_irqrestore(&desc->lock, flags); return shared; } int pirq_guest_bind(struct vcpu *v, int pirq, int will_share) { unsigned int irq; struct irq_desc *desc; irq_guest_action_t *action, *newaction = NULL; int rc = 0; cpumask_t cpumask = CPU_MASK_NONE; WARN_ON(!spin_is_locked(&v->domain->event_lock)); BUG_ON(!local_irq_is_enabled()); retry: desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL); if ( desc == NULL ) { rc = -EINVAL; goto out; } action = (irq_guest_action_t *)desc->action; irq = desc - irq_desc; if ( !(desc->status & IRQ_GUEST) ) { if ( desc->action != NULL ) { gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. In use by '%s'.\n", pirq, desc->action->name); rc = -EBUSY; goto unlock_out; } if ( newaction == NULL ) { spin_unlock_irq(&desc->lock); if ( (newaction = xmalloc(irq_guest_action_t)) != NULL ) goto retry; gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. Out of memory.\n", pirq); rc = -ENOMEM; goto out; } action = newaction; desc->action = (struct irqaction *)action; newaction = NULL; action->nr_guests = 0; action->in_flight = 0; action->shareable = will_share; action->ack_type = pirq_acktype(v->domain, pirq); cpus_clear(action->cpu_eoi_map); init_timer(&action->eoi_timer, irq_guest_eoi_timer_fn, desc, 0); desc->depth = 0; desc->status |= IRQ_GUEST; desc->status &= ~IRQ_DISABLED; desc->handler->startup(irq); /* Attempt to bind the interrupt target to the correct CPU. */ cpu_set(v->processor, cpumask); if ( !opt_noirqbalance && (desc->handler->set_affinity != NULL) ) desc->handler->set_affinity(irq, cpumask); } else if ( !will_share || !action->shareable ) { gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. %s.\n", pirq, will_share ? "Others do not share" : "Will not share with others"); rc = -EBUSY; goto unlock_out; } else if ( action->nr_guests == 0 ) { /* * Indicates that an ACKTYPE_EOI interrupt is being released. * Wait for that to happen before continuing. */ ASSERT(action->ack_type == ACKTYPE_EOI); ASSERT(desc->status & IRQ_DISABLED); spin_unlock_irq(&desc->lock); cpu_relax(); goto retry; } if ( action->nr_guests == IRQ_MAX_GUESTS ) { gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. " "Already at max share.\n", pirq); rc = -EBUSY; goto unlock_out; } action->guest[action->nr_guests++] = v->domain; if ( action->ack_type != ACKTYPE_NONE ) set_pirq_eoi(v->domain, pirq); else clear_pirq_eoi(v->domain, pirq); unlock_out: spin_unlock_irq(&desc->lock); out: if ( newaction != NULL ) xfree(newaction); return rc; } static irq_guest_action_t *__pirq_guest_unbind( struct domain *d, int pirq, struct irq_desc *desc) { unsigned int irq; irq_guest_action_t *action; cpumask_t cpu_eoi_map; int i; BUG_ON(!(desc->status & IRQ_GUEST)); action = (irq_guest_action_t *)desc->action; irq = desc - irq_desc; if ( unlikely(action == NULL) ) { dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n", d->domain_id, pirq); return NULL; } for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ ) continue; BUG_ON(i == action->nr_guests); memmove(&action->guest[i], &action->guest[i+1], (action->nr_guests-i-1) * sizeof(action->guest[0])); action->nr_guests--; switch ( action->ack_type ) { case ACKTYPE_UNMASK: if ( test_and_clear_bit(pirq, d->pirq_mask) && (--action->in_flight == 0) ) desc->handler->end(irq); break; case ACKTYPE_EOI: /* NB. If #guests == 0 then we clear the eoi_map later on. */ if ( test_and_clear_bit(pirq, d->pirq_mask) && (--action->in_flight == 0) && (action->nr_guests != 0) ) { cpu_eoi_map = action->cpu_eoi_map; spin_unlock_irq(&desc->lock); on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0); spin_lock_irq(&desc->lock); } break; case ACKTYPE_NONE: stop_timer(&action->eoi_timer); _irq_guest_eoi(desc); break; } /* * The guest cannot re-bind to this IRQ until this function returns. So, * when we have flushed this IRQ from pirq_mask, it should remain flushed. */ BUG_ON(test_bit(pirq, d->pirq_mask)); if ( action->nr_guests != 0 ) return NULL; BUG_ON(action->in_flight != 0); /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */ desc->depth = 1; desc->status |= IRQ_DISABLED; desc->handler->disable(irq); /* * Mark any remaining pending EOIs as ready to flush. * NOTE: We will need to make this a stronger barrier if in future we allow * an interrupt vectors to be re-bound to a different PIC. In that case we * would need to flush all ready EOIs before returning as otherwise the * desc->handler could change and we would call the wrong 'end' hook. */ cpu_eoi_map = action->cpu_eoi_map; if ( !cpus_empty(cpu_eoi_map) ) { BUG_ON(action->ack_type != ACKTYPE_EOI); spin_unlock_irq(&desc->lock); on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1); spin_lock_irq(&desc->lock); } BUG_ON(!cpus_empty(action->cpu_eoi_map)); desc->action = NULL; desc->status &= ~(IRQ_GUEST|IRQ_GUEST_EOI_PENDING|IRQ_INPROGRESS); desc->handler->shutdown(irq); /* Caller frees the old guest descriptor block. */ return action; } void pirq_guest_unbind(struct domain *d, int pirq) { irq_guest_action_t *oldaction = NULL; struct irq_desc *desc; int irq; WARN_ON(!spin_is_locked(&d->event_lock)); BUG_ON(!local_irq_is_enabled()); desc = domain_spin_lock_irq_desc(d, pirq, NULL); if ( desc == NULL ) { irq = -domain_pirq_to_irq(d, pirq); BUG_ON(irq <= 0); desc = irq_to_desc(irq); spin_lock_irq(&desc->lock); d->arch.pirq_irq[pirq] = d->arch.irq_pirq[irq] = 0; } else { oldaction = __pirq_guest_unbind(d, pirq, desc); } spin_unlock_irq(&desc->lock); if ( oldaction != NULL ) { kill_timer(&oldaction->eoi_timer); xfree(oldaction); } } static int pirq_guest_force_unbind(struct domain *d, int irq) { struct irq_desc *desc; irq_guest_action_t *action, *oldaction = NULL; int i, bound = 0; WARN_ON(!spin_is_locked(&d->event_lock)); BUG_ON(!local_irq_is_enabled()); desc = domain_spin_lock_irq_desc(d, irq, NULL); BUG_ON(desc == NULL); if ( !(desc->status & IRQ_GUEST) ) goto out; action = (irq_guest_action_t *)desc->action; if ( unlikely(action == NULL) ) { dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n", d->domain_id, irq); goto out; } for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ ) continue; if ( i == action->nr_guests ) goto out; bound = 1; oldaction = __pirq_guest_unbind(d, irq, desc); out: spin_unlock_irq(&desc->lock); if ( oldaction != NULL ) { kill_timer(&oldaction->eoi_timer); xfree(oldaction); } return bound; } int get_free_pirq(struct domain *d, int type, int index) { int i; ASSERT(spin_is_locked(&d->event_lock)); if ( type == MAP_PIRQ_TYPE_GSI ) { for ( i = 16; i < nr_irqs_gsi; i++ ) if ( !d->arch.pirq_irq[i] ) { if ( !is_hvm_domain(d) || d->arch.pirq_emuirq[i] == IRQ_UNBOUND ) break; } if ( i == nr_irqs_gsi ) return -ENOSPC; } else { for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- ) if ( !d->arch.pirq_irq[i] ) { if ( !is_hvm_domain(d) || d->arch.pirq_emuirq[i] == IRQ_UNBOUND ) break; } if ( i < nr_irqs_gsi ) return -ENOSPC; } return i; } int map_domain_pirq( struct domain *d, int pirq, int irq, int type, void *data) { int ret = 0; int old_irq, old_pirq; struct irq_desc *desc; unsigned long flags; struct msi_desc *msi_desc; struct pci_dev *pdev = NULL; ASSERT(spin_is_locked(&pcidevs_lock)); ASSERT(spin_is_locked(&d->event_lock)); if ( !IS_PRIV(current->domain) && !(IS_PRIV_FOR(current->domain, d) && irq_access_permitted(current->domain, pirq))) return -EPERM; if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or irq %d\n", d->domain_id, pirq, irq); return -EINVAL; } old_irq = domain_pirq_to_irq(d, pirq); old_pirq = domain_irq_to_pirq(d, irq); if ( (old_irq > 0 && (old_irq != irq) ) || (old_pirq && (old_pirq != pirq)) ) { dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or irq %d already mapped\n", d->domain_id, pirq, irq); return 0; } ret = irq_permit_access(d, pirq); if ( ret ) { dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d\n", d->domain_id, pirq); return ret; } desc = irq_to_desc(irq); if ( type == MAP_PIRQ_TYPE_MSI ) { struct msi_info *msi = (struct msi_info *)data; ret = -ENODEV; if ( !cpu_has_apic ) goto done; pdev = pci_get_pdev(msi->bus, msi->devfn); ret = pci_enable_msi(msi, &msi_desc); if ( ret ) goto done; spin_lock_irqsave(&desc->lock, flags); if ( desc->handler != &no_irq_type ) dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", d->domain_id, irq); desc->handler = &pci_msi_type; d->arch.pirq_irq[pirq] = irq; d->arch.irq_pirq[irq] = pirq; setup_msi_irq(pdev, msi_desc, irq); spin_unlock_irqrestore(&desc->lock, flags); } else { spin_lock_irqsave(&desc->lock, flags); d->arch.pirq_irq[pirq] = irq; d->arch.irq_pirq[irq] = pirq; spin_unlock_irqrestore(&desc->lock, flags); } done: return ret; } /* The pirq should have been unbound before this call. */ int unmap_domain_pirq(struct domain *d, int pirq) { unsigned long flags; struct irq_desc *desc; int irq, ret = 0; bool_t forced_unbind; struct msi_desc *msi_desc = NULL; if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) return -EINVAL; if ( !IS_PRIV_FOR(current->domain, d) ) return -EINVAL; ASSERT(spin_is_locked(&pcidevs_lock)); ASSERT(spin_is_locked(&d->event_lock)); irq = domain_pirq_to_irq(d, pirq); if ( irq <= 0 ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n", d->domain_id, pirq); ret = -EINVAL; goto done; } forced_unbind = pirq_guest_force_unbind(d, pirq); if ( forced_unbind ) dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n", d->domain_id, pirq); desc = irq_to_desc(irq); if ( (msi_desc = desc->msi_desc) != NULL ) pci_disable_msi(msi_desc); spin_lock_irqsave(&desc->lock, flags); BUG_ON(irq != domain_pirq_to_irq(d, pirq)); if ( !forced_unbind ) { d->arch.pirq_irq[pirq] = 0; d->arch.irq_pirq[irq] = 0; } else { d->arch.pirq_irq[pirq] = -irq; d->arch.irq_pirq[irq] = -pirq; } spin_unlock_irqrestore(&desc->lock, flags); if (msi_desc) msi_free_irq(msi_desc); ret = irq_deny_access(d, pirq); if ( ret ) dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n", d->domain_id, pirq); if ( desc->handler == &pci_msi_type ) desc->handler = &no_irq_type; done: return ret; } void free_domain_pirqs(struct domain *d) { int i; spin_lock(&pcidevs_lock); spin_lock(&d->event_lock); for ( i = 0; i < d->nr_pirqs; i++ ) if ( d->arch.pirq_irq[i] > 0 ) unmap_domain_pirq(d, i); spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); } extern void dump_ioapic_irq_info(void); static void dump_irqs(unsigned char key) { int i, irq, pirq; struct irq_desc *desc; struct irq_cfg *cfg; irq_guest_action_t *action; struct domain *d; unsigned long flags; printk("Guest interrupt information:\n"); for ( irq = 0; irq < nr_irqs; irq++ ) { desc = irq_to_desc(irq); cfg = desc->chip_data; if ( !desc->handler || desc->handler == &no_irq_type ) continue; spin_lock_irqsave(&desc->lock, flags); cpumask_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), desc->affinity); printk(" IRQ:%4d affinity:%s vec:%02x type=%-15s" " status=%08x ", irq, keyhandler_scratch, cfg->vector, desc->handler->typename, desc->status); if ( !(desc->status & IRQ_GUEST) ) printk("mapped, unbound\n"); else { action = (irq_guest_action_t *)desc->action; printk("in-flight=%d domain-list=", action->in_flight); for ( i = 0; i < action->nr_guests; i++ ) { d = action->guest[i]; pirq = domain_irq_to_pirq(d, irq); printk("%u:%3d(%c%c%c%c)", d->domain_id, pirq, (test_bit(d->pirq_to_evtchn[pirq], &shared_info(d, evtchn_pending)) ? 'P' : '-'), (test_bit(d->pirq_to_evtchn[pirq] / BITS_PER_EVTCHN_WORD(d), &vcpu_info(d->vcpu[0], evtchn_pending_sel)) ? 'S' : '-'), (test_bit(d->pirq_to_evtchn[pirq], &shared_info(d, evtchn_mask)) ? 'M' : '-'), (test_bit(pirq, d->pirq_mask) ? 'M' : '-')); if ( i != action->nr_guests ) printk(","); } printk("\n"); } spin_unlock_irqrestore(&desc->lock, flags); } dump_ioapic_irq_info(); } static struct keyhandler dump_irqs_keyhandler = { .diagnostic = 1, .u.fn = dump_irqs, .desc = "dump interrupt bindings" }; static int __init setup_dump_irqs(void) { register_keyhandler('i', &dump_irqs_keyhandler); return 0; } __initcall(setup_dump_irqs); /* A cpu has been removed from cpu_online_mask. Re-set irq affinities. */ void fixup_irqs(void) { unsigned int irq, sp; static int warned; struct irq_desc *desc; irq_guest_action_t *action; struct pending_eoi *peoi; for ( irq = 0; irq < nr_irqs; irq++ ) { int break_affinity = 0; int set_affinity = 1; cpumask_t affinity; if ( irq == 2 ) continue; desc = irq_to_desc(irq); spin_lock(&desc->lock); affinity = desc->affinity; if ( !desc->action || cpus_subset(affinity, cpu_online_map) ) { spin_unlock(&desc->lock); continue; } cpus_and(affinity, affinity, cpu_online_map); if ( cpus_empty(affinity) ) { break_affinity = 1; affinity = cpu_online_map; } if ( desc->handler->disable ) desc->handler->disable(irq); if ( desc->handler->set_affinity ) desc->handler->set_affinity(irq, affinity); else if ( !(warned++) ) set_affinity = 0; if ( desc->handler->enable ) desc->handler->enable(irq); spin_unlock(&desc->lock); if ( break_affinity && set_affinity ) printk("Broke affinity for irq %i\n", irq); else if ( !set_affinity ) printk("Cannot set affinity for irq %i\n", irq); } /* That doesn't seem sufficient. Give it 1ms. */ local_irq_enable(); mdelay(1); local_irq_disable(); /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */ for ( irq = 0; irq < nr_irqs; irq++ ) { desc = irq_to_desc(irq); if ( !(desc->status & IRQ_GUEST) ) continue; action = (irq_guest_action_t *)desc->action; cpu_clear(smp_processor_id(), action->cpu_eoi_map); } /* Flush the interrupt EOI stack. */ peoi = this_cpu(pending_eoi); for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ ) peoi[sp].ready = 1; flush_ready_eoi(); } int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq) { int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND; ASSERT(spin_is_locked(&d->event_lock)); if ( !is_hvm_domain(d) ) return -EINVAL; if ( pirq < 0 || pirq >= d->nr_pirqs || emuirq == IRQ_UNBOUND || emuirq >= (int) nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or emuirq %d\n", d->domain_id, pirq, emuirq); return -EINVAL; } old_emuirq = domain_pirq_to_emuirq(d, pirq); if ( emuirq != IRQ_PT ) old_pirq = domain_emuirq_to_pirq(d, emuirq); if ( (old_emuirq != IRQ_UNBOUND && (old_emuirq != emuirq) ) || (old_pirq != IRQ_UNBOUND && (old_pirq != pirq)) ) { dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or emuirq %d already mapped\n", d->domain_id, pirq, emuirq); return 0; } d->arch.pirq_emuirq[pirq] = emuirq; /* do not store emuirq mappings for pt devices */ if ( emuirq != IRQ_PT ) d->arch.emuirq_pirq[emuirq] = pirq; return 0; } int unmap_domain_pirq_emuirq(struct domain *d, int pirq) { int emuirq, ret = 0; if ( !is_hvm_domain(d) ) return -EINVAL; if ( (pirq < 0) || (pirq >= d->nr_pirqs) ) return -EINVAL; ASSERT(spin_is_locked(&d->event_lock)); emuirq = domain_pirq_to_emuirq(d, pirq); if ( emuirq == IRQ_UNBOUND ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n", d->domain_id, pirq); ret = -EINVAL; goto done; } d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND; if ( emuirq != IRQ_PT ) d->arch.emuirq_pirq[emuirq] = IRQ_UNBOUND; done: return ret; } int hvm_domain_use_pirq(struct domain *d, int pirq) { int emuirq; if ( !is_hvm_domain(d) ) return 0; emuirq = domain_pirq_to_emuirq(d, pirq); if ( emuirq != IRQ_UNBOUND && d->pirq_to_evtchn[pirq] != 0 ) return 1; else return 0; }