#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int physdev_map_pirq(domid_t, int type, int *index, int *pirq_p, struct msi_info *); int physdev_unmap_pirq(domid_t, int pirq); #include "x86_64/mmconfig.h" #ifndef COMPAT typedef long ret_t; static int physdev_hvm_map_pirq( struct domain *d, int type, int *index, int *pirq) { int ret = 0; spin_lock(&d->event_lock); switch ( type ) { case MAP_PIRQ_TYPE_GSI: { struct hvm_irq_dpci *hvm_irq_dpci; struct hvm_girq_dpci_mapping *girq; uint32_t machine_gsi = 0; if ( *index < 0 || *index >= NR_HVM_IRQS ) { ret = -EINVAL; break; } /* find the machine gsi corresponding to the * emulated gsi */ hvm_irq_dpci = domain_get_irq_dpci(d); if ( hvm_irq_dpci ) { BUILD_BUG_ON(ARRAY_SIZE(hvm_irq_dpci->girq) < NR_HVM_IRQS); list_for_each_entry ( girq, &hvm_irq_dpci->girq[*index], list ) machine_gsi = girq->machine_gsi; } /* found one, this mean we are dealing with a pt device */ if ( machine_gsi ) { *index = domain_pirq_to_irq(d, machine_gsi); *pirq = machine_gsi; ret = (*pirq > 0) ? 0 : *pirq; } /* we didn't find any, this means we are dealing * with an emulated device */ else { if ( *pirq < 0 ) *pirq = get_free_pirq(d, type); ret = map_domain_emuirq_pirq(d, *pirq, *index); } break; } default: ret = -EINVAL; dprintk(XENLOG_G_WARNING, "map type %d not supported yet\n", type); break; } spin_unlock(&d->event_lock); return ret; } int physdev_map_pirq(domid_t domid, int type, int *index, int *pirq_p, struct msi_info *msi) { struct domain *d = current->domain; int pirq, irq, ret = 0; void *map_data = NULL; if ( domid == DOMID_SELF && is_hvm_domain(d) ) { /* * Only makes sense for vector-based callback, else HVM-IRQ logic * calls back into itself and deadlocks on hvm_domain.irq_lock. */ if ( !is_hvm_pv_evtchn_domain(d) ) return -EINVAL; return physdev_hvm_map_pirq(d, type, index, pirq_p); } d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; ret = xsm_map_domain_pirq(XSM_TARGET, d); if ( ret ) goto free_domain; /* Verify or get irq. */ switch ( type ) { case MAP_PIRQ_TYPE_GSI: if ( *index < 0 || *index >= nr_irqs_gsi ) { dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id, *index); ret = -EINVAL; goto free_domain; } irq = domain_pirq_to_irq(current->domain, *index); if ( irq <= 0 ) { if ( is_hardware_domain(current->domain) ) irq = *index; else { dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n", d->domain_id); ret = -EINVAL; goto free_domain; } } break; case MAP_PIRQ_TYPE_MSI: if ( !msi->table_base ) msi->entry_nr = 1; irq = *index; if ( irq == -1 ) case MAP_PIRQ_TYPE_MULTI_MSI: irq = create_irq(NUMA_NO_NODE); if ( irq < nr_irqs_gsi || irq >= nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n", d->domain_id); ret = -EINVAL; goto free_domain; } msi->irq = irq; map_data = msi; break; default: dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n", d->domain_id, type); ret = -EINVAL; goto free_domain; } spin_lock(&pcidevs_lock); /* Verify or get pirq. */ spin_lock(&d->event_lock); pirq = domain_irq_to_pirq(d, irq); if ( *pirq_p < 0 ) { if ( pirq ) { dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n", d->domain_id, *index, *pirq_p, pirq); if ( pirq < 0 ) { ret = -EBUSY; goto done; } } else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) { if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) ret = -EDOM; else if ( msi->entry_nr != 1 && !iommu_intremap ) ret = -EOPNOTSUPP; else { while ( msi->entry_nr & (msi->entry_nr - 1) ) msi->entry_nr += msi->entry_nr & -msi->entry_nr; pirq = get_free_pirqs(d, msi->entry_nr); if ( pirq < 0 ) { while ( (msi->entry_nr >>= 1) > 1 ) if ( get_free_pirqs(d, msi->entry_nr) > 0 ) break; dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n", d->domain_id, msi->entry_nr << 1); ret = pirq; } } if ( ret < 0 ) goto done; } else { pirq = get_free_pirq(d, type); if ( pirq < 0 ) { dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id); ret = pirq; goto done; } } } else { if ( pirq && pirq != *pirq_p ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d conflicts with irq %d\n", d->domain_id, *index, *pirq_p); ret = -EEXIST; goto done; } else pirq = *pirq_p; } ret = map_domain_pirq(d, pirq, irq, type, map_data); if ( ret == 0 ) *pirq_p = pirq; done: spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); if ( ret != 0 ) switch ( type ) { case MAP_PIRQ_TYPE_MSI: if ( *index == -1 ) case MAP_PIRQ_TYPE_MULTI_MSI: destroy_irq(irq); break; } free_domain: rcu_unlock_domain(d); return ret; } int physdev_unmap_pirq(domid_t domid, int pirq) { struct domain *d; int ret; d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; ret = xsm_unmap_domain_pirq(XSM_TARGET, d); if ( ret ) goto free_domain; if ( is_hvm_domain(d) ) { spin_lock(&d->event_lock); if ( domain_pirq_to_emuirq(d, pirq) != IRQ_UNBOUND ) ret = unmap_domain_pirq_emuirq(d, pirq); spin_unlock(&d->event_lock); if ( domid == DOMID_SELF || ret ) goto free_domain; } spin_lock(&pcidevs_lock); spin_lock(&d->event_lock); ret = unmap_domain_pirq(d, pirq); spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); free_domain: rcu_unlock_domain(d); return ret; } #endif /* COMPAT */ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) { int irq; ret_t ret; struct vcpu *v = current; switch ( cmd ) { case PHYSDEVOP_eoi: { struct physdev_eoi eoi; struct pirq *pirq; ret = -EFAULT; if ( copy_from_guest(&eoi, arg, 1) != 0 ) break; ret = -EINVAL; if ( eoi.irq >= v->domain->nr_pirqs ) break; spin_lock(&v->domain->event_lock); pirq = pirq_info(v->domain, eoi.irq); if ( !pirq ) { spin_unlock(&v->domain->event_lock); break; } if ( !is_hvm_domain(v->domain) && v->domain->arch.pv_domain.auto_unmask ) evtchn_unmask(pirq->evtchn); if ( !is_hvm_domain(v->domain) || domain_pirq_to_irq(v->domain, eoi.irq) > 0 ) pirq_guest_eoi(pirq); if ( is_hvm_domain(v->domain) && domain_pirq_to_emuirq(v->domain, eoi.irq) > 0 ) { struct hvm_irq *hvm_irq = &v->domain->arch.hvm_domain.irq; int gsi = domain_pirq_to_emuirq(v->domain, eoi.irq); /* if this is a level irq and count > 0, send another * notification */ if ( gsi >= NR_ISAIRQS /* ISA irqs are edge triggered */ && hvm_irq->gsi_assert_count[gsi] ) send_guest_pirq(v->domain, pirq); } spin_unlock(&v->domain->event_lock); ret = 0; break; } case PHYSDEVOP_pirq_eoi_gmfn_v2: case PHYSDEVOP_pirq_eoi_gmfn_v1: { struct physdev_pirq_eoi_gmfn info; unsigned long mfn; struct page_info *page; ret = -EFAULT; if ( copy_from_guest(&info, arg, 1) != 0 ) break; ret = -EINVAL; page = get_page_from_gfn(current->domain, info.gmfn, NULL, P2M_ALLOC); if ( !page ) break; if ( !get_page_type(page, PGT_writable_page) ) { put_page(page); break; } mfn = page_to_mfn(page); if ( cmpxchg(&v->domain->arch.pv_domain.pirq_eoi_map_mfn, 0, mfn) != 0 ) { put_page_and_type(mfn_to_page(mfn)); ret = -EBUSY; break; } v->domain->arch.pv_domain.pirq_eoi_map = map_domain_page_global(mfn); if ( v->domain->arch.pv_domain.pirq_eoi_map == NULL ) { v->domain->arch.pv_domain.pirq_eoi_map_mfn = 0; put_page_and_type(mfn_to_page(mfn)); ret = -ENOSPC; break; } if ( cmd == PHYSDEVOP_pirq_eoi_gmfn_v1 ) v->domain->arch.pv_domain.auto_unmask = 1; ret = 0; break; } /* Legacy since 0x00030202. */ case PHYSDEVOP_IRQ_UNMASK_NOTIFY: { ret = pirq_guest_unmask(v->domain); break; } case PHYSDEVOP_irq_status_query: { struct physdev_irq_status_query irq_status_query; ret = -EFAULT; if ( copy_from_guest(&irq_status_query, arg, 1) != 0 ) break; irq = irq_status_query.irq; ret = -EINVAL; if ( (irq < 0) || (irq >= v->domain->nr_pirqs) ) break; irq_status_query.flags = 0; if ( is_hvm_domain(v->domain) && domain_pirq_to_irq(v->domain, irq) <= 0 && domain_pirq_to_emuirq(v->domain, irq) == IRQ_UNBOUND ) { ret = -EINVAL; break; } /* * Even edge-triggered or message-based IRQs can need masking from * time to time. If teh guest is not dynamically checking for this * via the new pirq_eoi_map mechanism, it must conservatively always * execute the EOI hypercall. In practice, this only really makes a * difference for maskable MSI sources, and if those are supported * then dom0 is probably modern anyway. */ irq_status_query.flags |= XENIRQSTAT_needs_eoi; if ( pirq_shared(v->domain, irq) ) irq_status_query.flags |= XENIRQSTAT_shared; ret = __copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0; break; } case PHYSDEVOP_map_pirq: { physdev_map_pirq_t map; struct msi_info msi; ret = -EFAULT; if ( copy_from_guest(&map, arg, 1) != 0 ) break; switch ( map.type ) { case MAP_PIRQ_TYPE_MSI_SEG: map.type = MAP_PIRQ_TYPE_MSI; msi.seg = map.bus >> 16; break; case MAP_PIRQ_TYPE_MULTI_MSI: if ( map.table_base ) return -EINVAL; msi.seg = map.bus >> 16; break; default: msi.seg = 0; break; } msi.bus = map.bus; msi.devfn = map.devfn; msi.entry_nr = map.entry_nr; msi.table_base = map.table_base; ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, &msi); if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) map.entry_nr = msi.entry_nr; if ( __copy_to_guest(arg, &map, 1) ) ret = -EFAULT; break; } case PHYSDEVOP_unmap_pirq: { struct physdev_unmap_pirq unmap; ret = -EFAULT; if ( copy_from_guest(&unmap, arg, 1) != 0 ) break; ret = physdev_unmap_pirq(unmap.domid, unmap.pirq); break; } case PHYSDEVOP_apic_read: { struct physdev_apic apic; ret = -EFAULT; if ( copy_from_guest(&apic, arg, 1) != 0 ) break; ret = xsm_apic(XSM_PRIV, v->domain, cmd); if ( ret ) break; ret = ioapic_guest_read(apic.apic_physbase, apic.reg, &apic.value); if ( __copy_to_guest(arg, &apic, 1) ) ret = -EFAULT; break; } case PHYSDEVOP_apic_write: { struct physdev_apic apic; ret = -EFAULT; if ( copy_from_guest(&apic, arg, 1) != 0 ) break; ret = xsm_apic(XSM_PRIV, v->domain, cmd); if ( ret ) break; ret = ioapic_guest_write(apic.apic_physbase, apic.reg, apic.value); break; } case PHYSDEVOP_alloc_irq_vector: { struct physdev_irq irq_op; ret = -EFAULT; if ( copy_from_guest(&irq_op, arg, 1) != 0 ) break; /* Use the APIC check since this dummy hypercall should still only * be called by the domain with access to program the ioapic */ ret = xsm_apic(XSM_PRIV, v->domain, cmd); if ( ret ) break; /* Vector is only used by hypervisor, and dom0 shouldn't touch it in its world, return irq_op.irq as the vecotr, and make this hypercall dummy, and also defer the vector allocation when dom0 tries to programe ioapic entry. */ irq_op.vector = irq_op.irq; ret = 0; if ( __copy_to_guest(arg, &irq_op, 1) ) ret = -EFAULT; break; } case PHYSDEVOP_set_iopl: { struct physdev_set_iopl set_iopl; ret = -EFAULT; if ( copy_from_guest(&set_iopl, arg, 1) != 0 ) break; ret = -EINVAL; if ( set_iopl.iopl > 3 ) break; ret = 0; v->arch.pv_vcpu.iopl = set_iopl.iopl; break; } case PHYSDEVOP_set_iobitmap: { struct physdev_set_iobitmap set_iobitmap; ret = -EFAULT; if ( copy_from_guest(&set_iobitmap, arg, 1) != 0 ) break; ret = -EINVAL; if ( !guest_handle_okay(set_iobitmap.bitmap, IOBMP_BYTES) || (set_iobitmap.nr_ports > 65536) ) break; ret = 0; #ifndef COMPAT v->arch.pv_vcpu.iobmp = set_iobitmap.bitmap; #else guest_from_compat_handle(v->arch.pv_vcpu.iobmp, set_iobitmap.bitmap); #endif v->arch.pv_vcpu.iobmp_limit = set_iobitmap.nr_ports; break; } case PHYSDEVOP_manage_pci_add: { struct physdev_manage_pci manage_pci; ret = -EFAULT; if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; ret = pci_add_device(0, manage_pci.bus, manage_pci.devfn, NULL); break; } case PHYSDEVOP_manage_pci_remove: { struct physdev_manage_pci manage_pci; ret = -EFAULT; if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; ret = pci_remove_device(0, manage_pci.bus, manage_pci.devfn); break; } case PHYSDEVOP_manage_pci_add_ext: { struct physdev_manage_pci_ext manage_pci_ext; struct pci_dev_info pdev_info; ret = -EFAULT; if ( copy_from_guest(&manage_pci_ext, arg, 1) != 0 ) break; ret = -EINVAL; if ( (manage_pci_ext.is_extfn > 1) || (manage_pci_ext.is_virtfn > 1) ) break; pdev_info.is_extfn = manage_pci_ext.is_extfn; pdev_info.is_virtfn = manage_pci_ext.is_virtfn; pdev_info.physfn.bus = manage_pci_ext.physfn.bus; pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; ret = pci_add_device(0, manage_pci_ext.bus, manage_pci_ext.devfn, &pdev_info); break; } case PHYSDEVOP_pci_device_add: { struct physdev_pci_device_add add; struct pci_dev_info pdev_info; ret = -EFAULT; if ( copy_from_guest(&add, arg, 1) != 0 ) break; pdev_info.is_extfn = !!(add.flags & XEN_PCI_DEV_EXTFN); if ( add.flags & XEN_PCI_DEV_VIRTFN ) { pdev_info.is_virtfn = 1; pdev_info.physfn.bus = add.physfn.bus; pdev_info.physfn.devfn = add.physfn.devfn; } else pdev_info.is_virtfn = 0; ret = pci_add_device(add.seg, add.bus, add.devfn, &pdev_info); break; } case PHYSDEVOP_pci_device_remove: { struct physdev_pci_device dev; ret = -EFAULT; if ( copy_from_guest(&dev, arg, 1) != 0 ) break; ret = pci_remove_device(dev.seg, dev.bus, dev.devfn); break; } case PHYSDEVOP_prepare_msix: case PHYSDEVOP_release_msix: { struct physdev_pci_device dev; if ( copy_from_guest(&dev, arg, 1) ) ret = -EFAULT; else ret = pci_prepare_msix(dev.seg, dev.bus, dev.devfn, cmd != PHYSDEVOP_prepare_msix); break; } case PHYSDEVOP_pci_mmcfg_reserved: { struct physdev_pci_mmcfg_reserved info; ret = xsm_resource_setup_misc(XSM_PRIV); if ( ret ) break; ret = -EFAULT; if ( copy_from_guest(&info, arg, 1) ) break; ret = pci_mmcfg_reserved(info.address, info.segment, info.start_bus, info.end_bus, info.flags); break; } case PHYSDEVOP_restore_msi: { struct physdev_restore_msi restore_msi; struct pci_dev *pdev; ret = -EFAULT; if ( copy_from_guest(&restore_msi, arg, 1) != 0 ) break; spin_lock(&pcidevs_lock); pdev = pci_get_pdev(0, restore_msi.bus, restore_msi.devfn); ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV; spin_unlock(&pcidevs_lock); break; } case PHYSDEVOP_restore_msi_ext: { struct physdev_pci_device dev; struct pci_dev *pdev; ret = -EFAULT; if ( copy_from_guest(&dev, arg, 1) != 0 ) break; spin_lock(&pcidevs_lock); pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn); ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV; spin_unlock(&pcidevs_lock); break; } case PHYSDEVOP_setup_gsi: { struct physdev_setup_gsi setup_gsi; ret = -EFAULT; if ( copy_from_guest(&setup_gsi, arg, 1) != 0 ) break; ret = -EINVAL; if ( setup_gsi.gsi < 0 || setup_gsi.gsi >= nr_irqs_gsi ) break; ret = xsm_resource_setup_gsi(XSM_PRIV, setup_gsi.gsi); if ( ret ) break; ret = mp_register_gsi(setup_gsi.gsi, setup_gsi.triggering, setup_gsi.polarity); break; } case PHYSDEVOP_get_free_pirq: { struct physdev_get_free_pirq out; struct domain *d = v->domain; ret = -EFAULT; if ( copy_from_guest(&out, arg, 1) != 0 ) break; spin_lock(&d->event_lock); ret = get_free_pirq(d, out.type); if ( ret >= 0 ) { struct pirq *info = pirq_get_info(d, ret); if ( info ) info->arch.irq = PIRQ_ALLOCATED; else ret = -ENOMEM; } spin_unlock(&d->event_lock); if ( ret >= 0 ) { out.pirq = ret; ret = __copy_to_guest(arg, &out, 1) ? -EFAULT : 0; } break; } case PHYSDEVOP_dbgp_op: { struct physdev_dbgp_op op; if ( !is_hardware_domain(v->domain) ) ret = -EPERM; else if ( copy_from_guest(&op, arg, 1) ) ret = -EFAULT; else ret = dbgp_op(&op); break; } default: ret = -ENOSYS; break; } return ret; } /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */