From bbe4432e669ab94fc8059e7ab878cafad7b8d123 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Mar 2007 13:27:36 +0200 Subject: KVM: Use own minor number Use the minor number (232) allocated to kvm by lanana. Signed-off-by: Avi Kivity --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 326da7d500c7..dff9ea32606a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -29,6 +29,7 @@ #define TUN_MINOR 200 #define HPET_MINOR 228 +#define KVM_MINOR 232 struct device; -- cgit v1.2.3 From ff42697436ddf5bd026e2cb4f117656b967f0709 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 7 Mar 2007 09:29:48 +0200 Subject: KVM: Export This allows users to actually build prgrams that use kvm without the entire source tree. Signed-off-by: Avi Kivity --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 4ff0f57d0add..9f05279e7dd3 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -96,6 +96,7 @@ header-y += iso_fs.h header-y += ixjuser.h header-y += jffs2.h header-y += keyctl.h +header-y += kvm.h header-y += limits.h header-y += lock_dlm_plock.h header-y += magic.h -- cgit v1.2.3 From 9a2bb7f486dc639a1cf2ad803bf2227f0dc0809d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 22 Feb 2007 12:58:31 +0200 Subject: KVM: Use a shared page for kernel/user communication when runing a vcpu Instead of passing a 'struct kvm_run' back and forth between the kernel and userspace, allocate a page and allow the user to mmap() it. This reduces needless copying and makes the interface expandable by providing lots of free space. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 54 +++++++++++++++++++++++++++++++++++++------------- include/linux/kvm.h | 6 +++--- 3 files changed, 44 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 0d122bf889db..901b8d917b55 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -228,6 +228,7 @@ struct kvm_vcpu { struct mutex mutex; int cpu; int launched; + struct kvm_run *run; int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 946ed86a0595..42be8a8f299d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -355,6 +355,8 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); vcpu_put(vcpu); kvm_arch_ops->vcpu_free(vcpu); + free_page((unsigned long)vcpu->run); + vcpu->run = NULL; } static void kvm_free_vcpus(struct kvm *kvm) @@ -1887,6 +1889,33 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, return r; } +static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + struct kvm_vcpu *vcpu = vma->vm_file->private_data; + unsigned long pgoff; + struct page *page; + + *type = VM_FAULT_MINOR; + pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (pgoff != 0) + return NOPAGE_SIGBUS; + page = virt_to_page(vcpu->run); + get_page(page); + return page; +} + +static struct vm_operations_struct kvm_vcpu_vm_ops = { + .nopage = kvm_vcpu_nopage, +}; + +static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_vcpu_vm_ops; + return 0; +} + static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; @@ -1899,6 +1928,7 @@ static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, + .mmap = kvm_vcpu_mmap, }; /* @@ -1947,6 +1977,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) { int r; struct kvm_vcpu *vcpu; + struct page *page; r = -EINVAL; if (!valid_vcpu(n)) @@ -1961,6 +1992,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) return -EEXIST; } + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + r = -ENOMEM; + if (!page) + goto out_unlock; + vcpu->run = page_address(page); + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; @@ -1990,6 +2027,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) out_free_vcpus: kvm_free_vcpu(vcpu); +out_unlock: mutex_unlock(&vcpu->mutex); out: return r; @@ -2003,21 +2041,9 @@ static long kvm_vcpu_ioctl(struct file *filp, int r = -EINVAL; switch (ioctl) { - case KVM_RUN: { - struct kvm_run kvm_run; - - r = -EFAULT; - if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) - goto out; - r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); - if (r < 0 && r != -EINTR) - goto out; - if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { - r = -EFAULT; - goto out; - } + case KVM_RUN: + r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); break; - } case KVM_GET_REGS: { struct kvm_regs kvm_regs; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 275354ffa1cb..d88e7508ee0a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 4 +#define KVM_API_VERSION 5 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -49,7 +49,7 @@ enum kvm_exit_reason { KVM_EXIT_SHUTDOWN = 8, }; -/* for KVM_RUN */ +/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u32 emulated; /* skip current instruction */ @@ -233,7 +233,7 @@ struct kvm_dirty_log { /* * ioctls for vcpu fds */ -#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) +#define KVM_RUN _IO(KVMIO, 16) #define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) #define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) #define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) -- cgit v1.2.3 From 46fc1477887c41c8e900f2c95485e222b9a54822 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 22 Feb 2007 19:39:30 +0200 Subject: KVM: Do not communicate to userspace through cpu registers during PIO Currently when passing the a PIO emulation request to userspace, we rely on userspace updating %rax (on 'in' instructions) and %rsi/%rdi/%rcx (on string instructions). This (a) requires two extra ioctls for getting and setting the registers and (b) is unfriendly to non-x86 archs, when they get kvm ports. So fix by doing the register fixups in the kernel and passing to userspace only an abstract description of the PIO to be done. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- drivers/kvm/svm.c | 2 ++ drivers/kvm/vmx.c | 2 ++ include/linux/kvm.h | 6 +++--- 5 files changed, 53 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 901b8d917b55..59cbc5b1d905 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -274,6 +274,7 @@ struct kvm_vcpu { int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; + int pio_pending; struct { int active; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 42be8a8f299d..ff8bcfee76e5 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1504,6 +1504,44 @@ void save_msrs(struct vmx_msr_entry *e, int n) } EXPORT_SYMBOL_GPL(save_msrs); +static void complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_io *io = &vcpu->run->io; + long delta; + + kvm_arch_ops->cache_regs(vcpu); + + if (!io->string) { + if (io->direction == KVM_EXIT_IO_IN) + memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value, + io->size); + } else { + delta = 1; + if (io->rep) { + delta *= io->count; + /* + * The size of the register should really depend on + * current address size. + */ + vcpu->regs[VCPU_REGS_RCX] -= delta; + } + if (io->string_down) + delta = -delta; + delta *= io->size; + if (io->direction == KVM_EXIT_IO_IN) + vcpu->regs[VCPU_REGS_RDI] += delta; + else + vcpu->regs[VCPU_REGS_RSI] += delta; + } + + vcpu->pio_pending = 0; + vcpu->run->io_completed = 0; + + kvm_arch_ops->decache_regs(vcpu); + + kvm_arch_ops->skip_emulated_instruction(vcpu); +} + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -1518,9 +1556,13 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->emulated = 0; } - if (kvm_run->mmio_completed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; + if (kvm_run->io_completed) { + if (vcpu->pio_pending) + complete_pio(vcpu); + else { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + } } vcpu->mmio_needed = 0; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 6787f11738cf..c35b8c83bf3f 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1037,6 +1037,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; + kvm_run->io.count = 1; if (kvm_run->io.string) { unsigned addr_mask; @@ -1056,6 +1057,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } else kvm_run->io.value = vcpu->svm->vmcb->save.rax; + vcpu->pio_pending = 1; return 0; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a721b60f7385..4d5f40fcb651 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1459,12 +1459,14 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; kvm_run->io.rep = (exit_qualification & 32) != 0; kvm_run->io.port = exit_qualification >> 16; + kvm_run->io.count = 1; if (kvm_run->io.string) { if (!get_io_count(vcpu, &kvm_run->io.count)) return 1; kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); } else kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ + vcpu->pio_pending = 1; return 0; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d88e7508ee0a..19aeb3385188 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 5 +#define KVM_API_VERSION 6 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -53,7 +53,7 @@ enum kvm_exit_reason { struct kvm_run { /* in */ __u32 emulated; /* skip current instruction */ - __u32 mmio_completed; /* mmio request completed */ + __u32 io_completed; /* mmio/pio request completed */ __u8 request_interrupt_window; __u8 padding1[7]; @@ -80,7 +80,7 @@ struct kvm_run { __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct { + struct kvm_io { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; -- cgit v1.2.3 From 06465c5a3aa9948a7b00af49cd22ed8f235cdb0f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 28 Feb 2007 20:46:53 +0200 Subject: KVM: Handle cpuid in the kernel instead of punting to userspace KVM used to handle cpuid by letting userspace decide what values to return to the guest. We now handle cpuid completely in the kernel. We still let userspace decide which values the guest will see by having userspace set up the value table beforehand (this is necessary to allow management software to set the cpu features to the least common denominator, so that live migration can work). The motivation for the change is that kvm kernel code can be impacted by cpuid features, for example the x86 emulator. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 5 ++++ drivers/kvm/kvm_main.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/svm.c | 4 +-- drivers/kvm/vmx.c | 4 +-- include/linux/kvm.h | 18 ++++++++++++- 5 files changed, 95 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 59cbc5b1d905..be3a0e7ecae4 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -55,6 +55,7 @@ #define KVM_NUM_MMU_PAGES 256 #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 40 #define FX_IMAGE_SIZE 512 #define FX_IMAGE_ALIGN 16 @@ -286,6 +287,9 @@ struct kvm_vcpu { u32 ar; } tr, es, ds, fs, gs; } rmode; + + int cpuid_nent; + struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; }; struct kvm_memory_slot { @@ -446,6 +450,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, struct x86_emulate_ctxt; +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ff8bcfee76e5..caec54fbf07f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1504,6 +1504,43 @@ void save_msrs(struct vmx_msr_entry *e, int n) } EXPORT_SYMBOL_GPL(save_msrs); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +{ + int i; + u32 function; + struct kvm_cpuid_entry *e, *best; + + kvm_arch_ops->cache_regs(vcpu); + function = vcpu->regs[VCPU_REGS_RAX]; + vcpu->regs[VCPU_REGS_RAX] = 0; + vcpu->regs[VCPU_REGS_RBX] = 0; + vcpu->regs[VCPU_REGS_RCX] = 0; + vcpu->regs[VCPU_REGS_RDX] = 0; + best = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == function) { + best = e; + break; + } + /* + * Both basic or both extended? + */ + if (((e->function ^ function) & 0x80000000) == 0) + if (!best || e->function > best->function) + best = e; + } + if (best) { + vcpu->regs[VCPU_REGS_RAX] = best->eax; + vcpu->regs[VCPU_REGS_RBX] = best->ebx; + vcpu->regs[VCPU_REGS_RCX] = best->ecx; + vcpu->regs[VCPU_REGS_RDX] = best->edx; + } + kvm_arch_ops->decache_regs(vcpu); + kvm_arch_ops->skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); + static void complete_pio(struct kvm_vcpu *vcpu) { struct kvm_io *io = &vcpu->run->io; @@ -2075,6 +2112,26 @@ out: return r; } +static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid *cpuid, + struct kvm_cpuid_entry __user *entries) +{ + int r; + + r = -E2BIG; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + goto out; + r = -EFAULT; + if (copy_from_user(&vcpu->cpuid_entries, entries, + cpuid->nent * sizeof(struct kvm_cpuid_entry))) + goto out; + vcpu->cpuid_nent = cpuid->nent; + return 0; + +out: + return r; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2181,6 +2238,18 @@ static long kvm_vcpu_ioctl(struct file *filp, case KVM_SET_MSRS: r = msr_io(vcpu, argp, do_set_msr, 0); break; + case KVM_SET_CPUID: { + struct kvm_cpuid __user *cpuid_arg = argp; + struct kvm_cpuid cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); + if (r) + goto out; + break; + } default: ; } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index c35b8c83bf3f..d4b2936479d9 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1101,8 +1101,8 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; + kvm_emulate_cpuid(vcpu); + return 1; } static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 4d5f40fcb651..71410a65bb90 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1585,8 +1585,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; + kvm_emulate_cpuid(vcpu); + return 1; } static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 19aeb3385188..15e23bc06e8b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -41,7 +41,6 @@ enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, KVM_EXIT_IO = 2, - KVM_EXIT_CPUID = 3, KVM_EXIT_DEBUG = 4, KVM_EXIT_HLT = 5, KVM_EXIT_MMIO = 6, @@ -210,6 +209,22 @@ struct kvm_dirty_log { }; }; +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + #define KVMIO 0xAE /* @@ -243,5 +258,6 @@ struct kvm_dirty_log { #define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) +#define KVM_SET_CPUID _IOW(KVMIO, 17, struct kvm_cpuid) #endif -- cgit v1.2.3 From 106b552b43beac2694df5fbafc8f125a72df5f65 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 Mar 2007 16:20:40 +0200 Subject: KVM: Remove the 'emulated' field from the userspace interface We no longer emulate single instructions in userspace. Instead, we service mmio or pio requests. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 5 ----- include/linux/kvm.h | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index caec54fbf07f..5d24203afd20 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1588,11 +1588,6 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* re-sync apic's tpr */ vcpu->cr8 = kvm_run->cr8; - if (kvm_run->emulated) { - kvm_arch_ops->skip_emulated_instruction(vcpu); - kvm_run->emulated = 0; - } - if (kvm_run->io_completed) { if (vcpu->pio_pending) complete_pio(vcpu); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 15e23bc06e8b..c6dd4a79b74b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -51,10 +51,9 @@ enum kvm_exit_reason { /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ - __u32 emulated; /* skip current instruction */ __u32 io_completed; /* mmio/pio request completed */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 padding1[3]; /* out */ __u32 exit_type; -- cgit v1.2.3 From 2a4dac3952468157297b81ae0a29815c02ead179 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 Mar 2007 16:47:06 +0200 Subject: KVM: Remove minor wart from KVM_CREATE_VCPU ioctl That ioctl does not transfer any data, so it should be an _IO rather than an _IOW. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c6dd4a79b74b..d89189a81aba 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -241,7 +241,7 @@ struct kvm_cpuid { * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ -#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int) +#define KVM_CREATE_VCPU _IO(KVMIO, 11) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) /* -- cgit v1.2.3 From 739872c56f3322c38320c7a5a543ef6f56f174bc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 Mar 2007 17:20:13 +0200 Subject: KVM: Renumber ioctls The recent changes have left the ioctl numbers in complete disarray. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d89189a81aba..93472daec120 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -229,34 +229,34 @@ struct kvm_cpuid { /* * ioctls for /dev/kvm fds: */ -#define KVM_GET_API_VERSION _IO(KVMIO, 1) -#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ -#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) +#define KVM_GET_API_VERSION _IO(KVMIO, 0x00) +#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ +#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) /* * ioctls for VM fds */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ -#define KVM_CREATE_VCPU _IO(KVMIO, 11) -#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) +#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) /* * ioctls for vcpu fds */ -#define KVM_RUN _IO(KVMIO, 16) -#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) -#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) -#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) -#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) -#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) -#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) -#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) -#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) -#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) -#define KVM_SET_CPUID _IOW(KVMIO, 17, struct kvm_cpuid) +#define KVM_RUN _IO(KVMIO, 0x80) +#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs) +#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs) +#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs) +#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) +#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) +#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) +#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) +#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) +#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) +#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) #endif -- cgit v1.2.3 From 5d308f4550d9dc4c236e08b0377b610b9578577b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 Mar 2007 17:56:20 +0200 Subject: KVM: Add method to check for backwards-compatible API extensions Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 6 ++++++ include/linux/kvm.h | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5d24203afd20..39cf8fd343a3 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2416,6 +2416,12 @@ static long kvm_dev_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + /* + * No extensions defined at present. + */ + r = 0; + break; default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 93472daec120..c93cf53953af 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -232,6 +232,11 @@ struct kvm_cpuid { #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) +/* + * Check if a kvm extension is available. Argument is extension number, + * return is 1 (yes) or 0 (no, sorry). + */ +#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) /* * ioctls for VM fds -- cgit v1.2.3 From b4e63f560beb187cffdaf706e534a1e2f9effb66 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Mar 2007 13:59:30 +0200 Subject: KVM: Allow userspace to process hypercalls which have no kernel handler This is useful for paravirtualized graphics devices, for example. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 18 +++++++++++++++++- include/linux/kvm.h | 10 +++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 39cf8fd343a3..de93117f1e97 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1203,7 +1203,16 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) } switch (nr) { default: - ; + run->hypercall.args[0] = a0; + run->hypercall.args[1] = a1; + run->hypercall.args[2] = a2; + run->hypercall.args[3] = a3; + run->hypercall.args[4] = a4; + run->hypercall.args[5] = a5; + run->hypercall.ret = ret; + run->hypercall.longmode = is_long_mode(vcpu); + kvm_arch_ops->decache_regs(vcpu); + return 0; } vcpu->regs[VCPU_REGS_RAX] = ret; kvm_arch_ops->decache_regs(vcpu); @@ -1599,6 +1608,13 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->mmio_needed = 0; + if (kvm_run->exit_type == KVM_EXIT_TYPE_VM_EXIT + && kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { + kvm_arch_ops->cache_regs(vcpu); + vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; + kvm_arch_ops->decache_regs(vcpu); + } + r = kvm_arch_ops->run(vcpu, kvm_run); vcpu_put(vcpu); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c93cf53953af..9151ebfa22e9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 6 +#define KVM_API_VERSION 7 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -41,6 +41,7 @@ enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, KVM_EXIT_IO = 2, + KVM_EXIT_HYPERCALL = 3, KVM_EXIT_DEBUG = 4, KVM_EXIT_HLT = 5, KVM_EXIT_MMIO = 6, @@ -103,6 +104,13 @@ struct kvm_run { __u32 len; __u8 is_write; } mmio; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 args[6]; + __u64 ret; + __u32 longmode; + __u32 pad; + } hypercall; }; }; -- cgit v1.2.3 From 8eb7d334bd8e693340ee198280f7d45035cdab8c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Mar 2007 14:17:08 +0200 Subject: KVM: Fold kvm_run::exit_type into kvm_run::exit_reason Currently, userspace is told about the nature of the last exit from the guest using two fields, exit_type and exit_reason, where exit_type has just two enumerations (and no need for more). So fold exit_type into exit_reason, reducing the complexity of determining what really happened. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 3 +-- drivers/kvm/svm.c | 7 +++---- drivers/kvm/vmx.c | 7 +++---- include/linux/kvm.h | 15 ++++++++------- 4 files changed, 15 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index de93117f1e97..ac44df551aa8 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1608,8 +1608,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->mmio_needed = 0; - if (kvm_run->exit_type == KVM_EXIT_TYPE_VM_EXIT - && kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { kvm_arch_ops->cache_regs(vcpu); vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; kvm_arch_ops->decache_regs(vcpu); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index d4b2936479d9..b09928f14219 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1298,8 +1298,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 exit_code = vcpu->svm->vmcb->control.exit_code; - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; - if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " @@ -1609,8 +1607,9 @@ again: vcpu->svm->next_rip = 0; if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; - kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vcpu->svm->vmcb->control.exit_code; post_kvm_run_save(vcpu, kvm_run); return 0; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 71410a65bb90..cf9568fbe8a5 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1922,10 +1922,10 @@ again: asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); - kvm_run->exit_type = 0; if (fail) { - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; - kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vmcs_read32(VM_INSTRUCTION_ERROR); r = 0; } else { /* @@ -1935,7 +1935,6 @@ again: profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); vcpu->launched = 1; - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { /* Give scheduler a change to reschedule. */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 9151ebfa22e9..57f47ef93829 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 7 +#define KVM_API_VERSION 8 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -34,9 +34,6 @@ struct kvm_memory_region { #define KVM_MEM_LOG_DIRTY_PAGES 1UL -#define KVM_EXIT_TYPE_FAIL_ENTRY 1 -#define KVM_EXIT_TYPE_VM_EXIT 2 - enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, @@ -47,6 +44,7 @@ enum kvm_exit_reason { KVM_EXIT_MMIO = 6, KVM_EXIT_IRQ_WINDOW_OPEN = 7, KVM_EXIT_SHUTDOWN = 8, + KVM_EXIT_FAIL_ENTRY = 9, }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ @@ -57,12 +55,11 @@ struct kvm_run { __u8 padding1[3]; /* out */ - __u32 exit_type; __u32 exit_reason; __u32 instruction_length; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u16 padding2; + __u8 padding2[6]; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -71,8 +68,12 @@ struct kvm_run { union { /* KVM_EXIT_UNKNOWN */ struct { - __u32 hardware_exit_reason; + __u64 hardware_exit_reason; } hw; + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; + } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { __u32 exception; -- cgit v1.2.3 From 1b19f3e61d7e1edb395dd64bf7d63621a37af8ca Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Mar 2007 14:24:03 +0200 Subject: KVM: Add a special exit reason when exiting due to an interrupt This is redundant, as we also return -EINTR from the ioctl, but it allows us to examine the exit_reason field on resume without seeing old data. Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 2 ++ drivers/kvm/vmx.c | 2 ++ include/linux/kvm.h | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index b09928f14219..0311665e3c83 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1619,12 +1619,14 @@ again: if (signal_pending(current)) { ++kvm_stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { ++kvm_stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } kvm_resched(vcpu); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index cf9568fbe8a5..e69bab6d811d 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1941,12 +1941,14 @@ again: if (signal_pending(current)) { ++kvm_stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { ++kvm_stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 57f47ef93829..b3af92e7bf5d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 8 +#define KVM_API_VERSION 9 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -45,6 +45,7 @@ enum kvm_exit_reason { KVM_EXIT_IRQ_WINDOW_OPEN = 7, KVM_EXIT_SHUTDOWN = 8, KVM_EXIT_FAIL_ENTRY = 9, + KVM_EXIT_INTR = 10, }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ -- cgit v1.2.3 From 1961d276c877b99f5f16aaf36377c75e0e191c3a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Mar 2007 19:46:05 +0200 Subject: KVM: Add guest mode signal mask Allow a special signal mask to be used while executing in guest mode. This allows signals to be used to interrupt a vcpu without requiring signal delivery to a userspace handler, which is quite expensive. Userspace still receives -EINTR and can get the signal via sigwait(). Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 3 +++ drivers/kvm/kvm_main.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 7 +++++++ 3 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index be3a0e7ecae4..1c4a581938bf 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -277,6 +277,9 @@ struct kvm_vcpu { gpa_t mmio_phys_addr; int pio_pending; + int sigset_active; + sigset_t sigset; + struct { int active; u8 save_iopl; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ac44df551aa8..df85f5f65489 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1591,9 +1591,13 @@ static void complete_pio(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + sigset_t sigsaved; vcpu_load(vcpu); + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + /* re-sync apic's tpr */ vcpu->cr8 = kvm_run->cr8; @@ -1616,6 +1620,9 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = kvm_arch_ops->run(vcpu, kvm_run); + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + vcpu_put(vcpu); return r; } @@ -2142,6 +2149,17 @@ out: return r; } +static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) +{ + if (sigset) { + sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); + vcpu->sigset_active = 1; + vcpu->sigset = *sigset; + } else + vcpu->sigset_active = 0; + return 0; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2260,6 +2278,29 @@ static long kvm_vcpu_ioctl(struct file *filp, goto out; break; } + case KVM_SET_SIGNAL_MASK: { + struct kvm_signal_mask __user *sigmask_arg = argp; + struct kvm_signal_mask kvm_sigmask; + sigset_t sigset, *p; + + p = NULL; + if (argp) { + r = -EFAULT; + if (copy_from_user(&kvm_sigmask, argp, + sizeof kvm_sigmask)) + goto out; + r = -EINVAL; + if (kvm_sigmask.len != sizeof sigset) + goto out; + r = -EFAULT; + if (copy_from_user(&sigset, sigmask_arg->sigset, + sizeof sigset)) + goto out; + p = &sigset; + } + r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b3af92e7bf5d..c0d10cd8088e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -234,6 +234,12 @@ struct kvm_cpuid { struct kvm_cpuid_entry entries[0]; }; +/* for KVM_SET_SIGNAL_MASK */ +struct kvm_signal_mask { + __u32 len; + __u8 sigset[0]; +}; + #define KVMIO 0xAE /* @@ -273,5 +279,6 @@ struct kvm_cpuid { #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) +#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) #endif -- cgit v1.2.3 From 07c45a366d89f8eaec5d9890e810171b408f9a52 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 7 Mar 2007 13:05:38 +0200 Subject: KVM: Allow kernel to select size of mmap() buffer This allows us to store offsets in the kernel/user kvm_run area, and be sure that userspace has them mapped. As offsets can be outside the kvm_run struct, userspace has no way of knowing how much to mmap. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 8 +++++++- include/linux/kvm.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index df85f5f65489..cba0b87c34e4 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2436,7 +2436,7 @@ static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { void __user *argp = (void __user *)arg; - int r = -EINVAL; + long r = -EINVAL; switch (ioctl) { case KVM_GET_API_VERSION: @@ -2478,6 +2478,12 @@ static long kvm_dev_ioctl(struct file *filp, */ r = 0; break; + case KVM_GET_VCPU_MMAP_SIZE: + r = -EINVAL; + if (arg) + goto out; + r = PAGE_SIZE; + break; default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c0d10cd8088e..dad90816cad8 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -253,6 +253,10 @@ struct kvm_signal_mask { * return is 1 (yes) or 0 (no, sorry). */ #define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) +/* + * Get size for mmap(vcpu_fd) + */ +#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ /* * ioctls for VM fds -- cgit v1.2.3 From 039576c03c35e2f990ad9bb9c39e1bad3cd60d34 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Mar 2007 12:46:50 +0200 Subject: KVM: Avoid guest virtual addresses in string pio userspace interface The current string pio interface communicates using guest virtual addresses, relying on userspace to translate addresses and to check permissions. This interface cannot fully support guest smp, as the check needs to take into account two pages at one in case an unaligned string transfer straddles a page boundary. Change the interface not to communicate guest addresses at all; instead use a buffer page (mmaped by userspace) and do transfers there. The kernel manages the virtual to physical translation and can perform the checks atomically by taking the appropriate locks. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 21 +++++- drivers/kvm/kvm_main.c | 183 +++++++++++++++++++++++++++++++++++++++++++++---- drivers/kvm/mmu.c | 9 +++ drivers/kvm/svm.c | 40 +++++------ drivers/kvm/vmx.c | 40 +++++------ include/linux/kvm.h | 11 +-- 6 files changed, 238 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 1c4a581938bf..7866b34b6c96 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -74,6 +74,8 @@ #define IOPL_SHIFT 12 +#define KVM_PIO_PAGE_OFFSET 1 + /* * Address types: * @@ -220,6 +222,18 @@ enum { VCPU_SREG_LDTR, }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + struct page *guest_pages[2]; + unsigned guest_page_offset; + int in; + int size; + int string; + int down; + int rep; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -275,7 +289,8 @@ struct kvm_vcpu { int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; - int pio_pending; + struct kvm_pio_request pio; + void *pio_data; int sigset_active; sigset_t sigset; @@ -421,6 +436,7 @@ hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); void kvm_emulator_want_group7_invlpg(void); @@ -453,6 +469,9 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, struct x86_emulate_ctxt; +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ba7f43a4459e..205998c141fb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_free_physmem_slot(&kvm->memslots[i], NULL); } +static void free_pio_guest_pages(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < 2; ++i) + if (vcpu->pio.guest_pages[i]) { + __free_page(vcpu->pio.guest_pages[i]); + vcpu->pio.guest_pages[i] = NULL; + } +} + static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { if (!vcpu->vmcs) @@ -357,6 +368,9 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) kvm_arch_ops->vcpu_free(vcpu); free_page((unsigned long)vcpu->run); vcpu->run = NULL; + free_page((unsigned long)vcpu->pio_data); + vcpu->pio_data = NULL; + free_pio_guest_pages(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -1550,44 +1564,168 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); -static void complete_pio(struct kvm_vcpu *vcpu) +static int pio_copy_data(struct kvm_vcpu *vcpu) { - struct kvm_io *io = &vcpu->run->io; + void *p = vcpu->pio_data; + void *q; + unsigned bytes; + int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; + + kvm_arch_ops->vcpu_put(vcpu); + q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, + PAGE_KERNEL); + if (!q) { + kvm_arch_ops->vcpu_load(vcpu); + free_pio_guest_pages(vcpu); + return -ENOMEM; + } + q += vcpu->pio.guest_page_offset; + bytes = vcpu->pio.size * vcpu->pio.cur_count; + if (vcpu->pio.in) + memcpy(q, p, bytes); + else + memcpy(p, q, bytes); + q -= vcpu->pio.guest_page_offset; + vunmap(q); + kvm_arch_ops->vcpu_load(vcpu); + free_pio_guest_pages(vcpu); + return 0; +} + +static int complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; long delta; + int r; kvm_arch_ops->cache_regs(vcpu); if (!io->string) { - if (io->direction == KVM_EXIT_IO_IN) - memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value, + if (io->in) + memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, io->size); } else { + if (io->in) { + r = pio_copy_data(vcpu); + if (r) { + kvm_arch_ops->cache_regs(vcpu); + return r; + } + } + delta = 1; if (io->rep) { - delta *= io->count; + delta *= io->cur_count; /* * The size of the register should really depend on * current address size. */ vcpu->regs[VCPU_REGS_RCX] -= delta; } - if (io->string_down) + if (io->down) delta = -delta; delta *= io->size; - if (io->direction == KVM_EXIT_IO_IN) + if (io->in) vcpu->regs[VCPU_REGS_RDI] += delta; else vcpu->regs[VCPU_REGS_RSI] += delta; } - vcpu->pio_pending = 0; vcpu->run->io_completed = 0; kvm_arch_ops->decache_regs(vcpu); - kvm_arch_ops->skip_emulated_instruction(vcpu); + io->count -= io->cur_count; + io->cur_count = 0; + + if (!io->count) + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 0; } +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port) +{ + unsigned now, in_page; + int i; + int nr_pages = 1; + struct page *page; + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = count; + vcpu->run->io.port = port; + vcpu->pio.count = count; + vcpu->pio.cur_count = count; + vcpu->pio.size = size; + vcpu->pio.in = in; + vcpu->pio.string = string; + vcpu->pio.down = down; + vcpu->pio.guest_page_offset = offset_in_page(address); + vcpu->pio.rep = rep; + + if (!string) { + kvm_arch_ops->cache_regs(vcpu); + memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); + kvm_arch_ops->decache_regs(vcpu); + return 0; + } + + if (!count) { + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 1; + } + + now = min(count, PAGE_SIZE / size); + + if (!down) + in_page = PAGE_SIZE - offset_in_page(address); + else + in_page = offset_in_page(address) + size; + now = min(count, (unsigned long)in_page / size); + if (!now) { + /* + * String I/O straddles page boundary. Pin two guest pages + * so that we satisfy atomicity constraints. Do just one + * transaction to avoid complexity. + */ + nr_pages = 2; + now = 1; + } + if (down) { + /* + * String I/O in reverse. Yuck. Kill the guest, fix later. + */ + printk(KERN_ERR "kvm: guest string pio down\n"); + inject_gp(vcpu); + return 1; + } + vcpu->run->io.count = now; + vcpu->pio.cur_count = now; + + for (i = 0; i < nr_pages; ++i) { + spin_lock(&vcpu->kvm->lock); + page = gva_to_page(vcpu, address + i * PAGE_SIZE); + if (page) + get_page(page); + vcpu->pio.guest_pages[i] = page; + spin_unlock(&vcpu->kvm->lock); + if (!page) { + inject_gp(vcpu); + free_pio_guest_pages(vcpu); + return 1; + } + } + + if (!vcpu->pio.in) + return pio_copy_data(vcpu); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_setup_pio); + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -1602,9 +1740,11 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->cr8 = kvm_run->cr8; if (kvm_run->io_completed) { - if (vcpu->pio_pending) - complete_pio(vcpu); - else { + if (vcpu->pio.cur_count) { + r = complete_pio(vcpu); + if (r) + goto out; + } else { memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); vcpu->mmio_read_completed = 1; } @@ -1620,6 +1760,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = kvm_arch_ops->run(vcpu, kvm_run); +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1995,9 +2136,12 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (pgoff != 0) + if (pgoff == 0) + page = virt_to_page(vcpu->run); + else if (pgoff == KVM_PIO_PAGE_OFFSET) + page = virt_to_page(vcpu->pio_data); + else return NOPAGE_SIGBUS; - page = virt_to_page(vcpu->run); get_page(page); return page; } @@ -2094,6 +2238,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) goto out_unlock; vcpu->run = page_address(page); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + r = -ENOMEM; + if (!page) + goto out_free_run; + vcpu->pio_data = page_address(page); + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; @@ -2123,6 +2273,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) out_free_vcpus: kvm_free_vcpu(vcpu); +out_free_run: + free_page((unsigned long)vcpu->run); + vcpu->run = NULL; out_unlock: mutex_unlock(&vcpu->mutex); out: @@ -2491,7 +2644,7 @@ static long kvm_dev_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; - r = PAGE_SIZE; + r = 2 * PAGE_SIZE; break; default: ; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2d905770fd88..4843e95e54e1 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -735,6 +735,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) return gpa_to_hpa(vcpu, gpa); } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + if (gpa == UNMAPPED_GVA) + return NULL; + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); +} + static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 2396ada23777..64afc5cf890d 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -984,7 +984,7 @@ static int io_get_override(struct kvm_vcpu *vcpu, return 0; } -static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) +static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) { unsigned long addr_mask; unsigned long *reg; @@ -1028,40 +1028,38 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? - int _in = io_info & SVM_IOIO_TYPE_MASK; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address = 0; ++kvm_stat.io_exits; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; - kvm_run->exit_reason = KVM_EXIT_IO; - kvm_run->io.port = io_info >> 16; - kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); - kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; - kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; - kvm_run->io.count = 1; + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; + port = io_info >> 16; + size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + string = (io_info & SVM_IOIO_STR_MASK) != 0; + rep = (io_info & SVM_IOIO_REP_MASK) != 0; + count = 1; + down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - if (kvm_run->io.string) { + if (string) { unsigned addr_mask; - addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); + addr_mask = io_adress(vcpu, in, &address); if (!addr_mask) { printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); return 1; } - if (kvm_run->io.rep) { - kvm_run->io.count - = vcpu->regs[VCPU_REGS_RCX] & addr_mask; - kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags - & X86_EFLAGS_DF) != 0; - } - } else - kvm_run->io.value = vcpu->svm->vmcb->save.rax; - vcpu->pio_pending = 1; - return 0; + if (rep) + count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index e69bab6d811d..0d9bf0b36d37 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1394,7 +1394,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) +static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) { u64 inst; gva_t rip; @@ -1439,35 +1439,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) done: countr_size *= 8; *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); + //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); return 1; } static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address; ++kvm_stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - kvm_run->exit_reason = KVM_EXIT_IO; - if (exit_qualification & 8) - kvm_run->io.direction = KVM_EXIT_IO_IN; - else - kvm_run->io.direction = KVM_EXIT_IO_OUT; - kvm_run->io.size = (exit_qualification & 7) + 1; - kvm_run->io.string = (exit_qualification & 16) != 0; - kvm_run->io.string_down - = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - kvm_run->io.rep = (exit_qualification & 32) != 0; - kvm_run->io.port = exit_qualification >> 16; - kvm_run->io.count = 1; - if (kvm_run->io.string) { - if (!get_io_count(vcpu, &kvm_run->io.count)) + in = (exit_qualification & 8) != 0; + size = (exit_qualification & 7) + 1; + string = (exit_qualification & 16) != 0; + down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; + count = 1; + rep = (exit_qualification & 32) != 0; + port = exit_qualification >> 16; + address = 0; + if (string) { + if (rep && !get_io_count(vcpu, &count)) return 1; - kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); - } else - kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ - vcpu->pio_pending = 1; - return 0; + address = vmcs_readl(GUEST_LINEAR_ADDRESS); + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static void diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dad90816cad8..728b24cf5d77 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -86,16 +86,9 @@ struct kvm_run { #define KVM_EXIT_IO_OUT 1 __u8 direction; __u8 size; /* bytes */ - __u8 string; - __u8 string_down; - __u8 rep; - __u8 pad; __u16 port; - __u64 count; - union { - __u64 address; - __u32 value; - }; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ } io; struct { } debug; -- cgit v1.2.3 From e8207547d2f7b2f557bdb73015c1f74c32474438 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 30 Mar 2007 16:54:30 +0300 Subject: KVM: Add physical memory aliasing feature With this, we can specify that accesses to one physical memory range will be remapped to another. This is useful for the vga window at 0xa0000 which is used as a movable window into the (much larger) framebuffer. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 9 +++++ drivers/kvm/kvm_main.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/kvm.h | 10 +++++- 3 files changed, 104 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index d19985a5508a..fceeb840a255 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -51,6 +51,7 @@ #define UNMAPPED_GVA (~(gpa_t)0) #define KVM_MAX_VCPUS 1 +#define KVM_ALIAS_SLOTS 4 #define KVM_MEMORY_SLOTS 4 #define KVM_NUM_MMU_PAGES 256 #define KVM_MIN_FREE_MMU_PAGES 5 @@ -312,6 +313,12 @@ struct kvm_vcpu { struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; }; +struct kvm_mem_alias { + gfn_t base_gfn; + unsigned long npages; + gfn_t target_gfn; +}; + struct kvm_memory_slot { gfn_t base_gfn; unsigned long npages; @@ -322,6 +329,8 @@ struct kvm_memory_slot { struct kvm { spinlock_t lock; /* protects everything except vcpus */ + int naliases; + struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; /* diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index a0ec5dda3305..e495855727e2 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -846,7 +846,73 @@ out: return r; } -struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) +/* + * Set a new alias region. Aliases map a portion of physical memory into + * another portion. This is useful for memory windows, for example the PC + * VGA region. + */ +static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, + struct kvm_memory_alias *alias) +{ + int r, n; + struct kvm_mem_alias *p; + + r = -EINVAL; + /* General sanity checks */ + if (alias->memory_size & (PAGE_SIZE - 1)) + goto out; + if (alias->guest_phys_addr & (PAGE_SIZE - 1)) + goto out; + if (alias->slot >= KVM_ALIAS_SLOTS) + goto out; + if (alias->guest_phys_addr + alias->memory_size + < alias->guest_phys_addr) + goto out; + if (alias->target_phys_addr + alias->memory_size + < alias->target_phys_addr) + goto out; + + spin_lock(&kvm->lock); + + p = &kvm->aliases[alias->slot]; + p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; + p->npages = alias->memory_size >> PAGE_SHIFT; + p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; + + for (n = KVM_ALIAS_SLOTS; n > 0; --n) + if (kvm->aliases[n - 1].npages) + break; + kvm->naliases = n; + + spin_unlock(&kvm->lock); + + vcpu_load(&kvm->vcpus[0]); + spin_lock(&kvm->lock); + kvm_mmu_zap_all(&kvm->vcpus[0]); + spin_unlock(&kvm->lock); + vcpu_put(&kvm->vcpus[0]); + + return 0; + +out: + return r; +} + +static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + int i; + struct kvm_mem_alias *alias; + + for (i = 0; i < kvm->naliases; ++i) { + alias = &kvm->aliases[i]; + if (gfn >= alias->base_gfn + && gfn < alias->base_gfn + alias->npages) + return alias->target_gfn + gfn - alias->base_gfn; + } + return gfn; +} + +static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { int i; @@ -859,13 +925,19 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) } return NULL; } -EXPORT_SYMBOL_GPL(gfn_to_memslot); + +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) +{ + gfn = unalias_gfn(kvm, gfn); + return __gfn_to_memslot(kvm, gfn); +} struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; - slot = gfn_to_memslot(kvm, gfn); + gfn = unalias_gfn(kvm, gfn); + slot = __gfn_to_memslot(kvm, gfn); if (!slot) return NULL; return slot->phys_mem[gfn - slot->base_gfn]; @@ -2512,6 +2584,17 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } + case KVM_SET_MEMORY_ALIAS: { + struct kvm_memory_alias alias; + + r = -EFAULT; + if (copy_from_user(&alias, argp, sizeof alias)) + goto out; + r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); + if (r) + goto out; + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 728b24cf5d77..da9b23fa4b6c 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 9 +#define KVM_API_VERSION 10 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -33,6 +33,13 @@ struct kvm_memory_region { /* for kvm_memory_region::flags */ #define KVM_MEM_LOG_DIRTY_PAGES 1UL +struct kvm_memory_alias { + __u32 slot; /* this has a different namespace than memory slots */ + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 target_phys_addr; +}; enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, @@ -261,6 +268,7 @@ struct kvm_signal_mask { */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) /* * ioctls for vcpu fds -- cgit v1.2.3 From b8836737d92c139be770eae3d6574e33d1224caf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 1 Apr 2007 16:34:31 +0300 Subject: KVM: Add fpu get/set operations These are really helpful when migrating an floating point app to another machine. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 17 ++++++++++ 2 files changed, 103 insertions(+) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index e495855727e2..b065c4991568 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2398,6 +2398,67 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } +/* + * fxsave fpu state. Taken from x86_64/processor.h. To be killed when + * we have asm/x86/processor.h + */ +struct fxsave { + u16 cwd; + u16 swd; + u16 twd; + u16 fop; + u64 rip; + u64 rdp; + u32 mxcsr; + u32 mxcsr_mask; + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ +#ifdef CONFIG_X86_64 + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ +#else + u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ +#endif +}; + +static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; + + vcpu_load(vcpu); + + memcpy(fpu->fpr, fxsave->st_space, 128); + fpu->fcw = fxsave->cwd; + fpu->fsw = fxsave->swd; + fpu->ftwx = fxsave->twd; + fpu->last_opcode = fxsave->fop; + fpu->last_ip = fxsave->rip; + fpu->last_dp = fxsave->rdp; + memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); + + vcpu_put(vcpu); + + return 0; +} + +static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; + + vcpu_load(vcpu); + + memcpy(fxsave->st_space, fpu->fpr, 128); + fxsave->cwd = fpu->fcw; + fxsave->swd = fpu->fsw; + fxsave->twd = fpu->ftwx; + fxsave->fop = fpu->last_opcode; + fxsave->rip = fpu->last_ip; + fxsave->rdp = fpu->last_dp; + memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); + + vcpu_put(vcpu); + + return 0; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2542,6 +2603,31 @@ static long kvm_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); break; } + case KVM_GET_FPU: { + struct kvm_fpu fpu; + + memset(&fpu, 0, sizeof fpu); + r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &fpu, sizeof fpu)) + goto out; + r = 0; + break; + } + case KVM_SET_FPU: { + struct kvm_fpu fpu; + + r = -EFAULT; + if (copy_from_user(&fpu, argp, sizeof fpu)) + goto out; + r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu); + if (r) + goto out; + r = 0; + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index da9b23fa4b6c..07bf353eeb6f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -126,6 +126,21 @@ struct kvm_regs { __u64 rip, rflags; }; +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u8 fpr[8][16]; + __u16 fcw; + __u16 fsw; + __u8 ftwx; /* in fxsave format */ + __u8 pad1; + __u16 last_opcode; + __u64 last_ip; + __u64 last_dp; + __u8 xmm[16][16]; + __u32 mxcsr; + __u32 pad2; +}; + struct kvm_segment { __u64 base; __u32 limit; @@ -285,5 +300,7 @@ struct kvm_signal_mask { #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) +#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) +#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) #endif -- cgit v1.2.3 From 02c83209726270ddf9597deabc45e08f6fc3942c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 29 Apr 2007 15:02:17 +0300 Subject: KVM: Don't require explicit indication of completion of mmio or pio It is illegal not to return from a pio or mmio request without completing it, as mmio or pio is an atomic operation. Therefore, we can simplify the userspace interface by avoiding the completion indication. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 44 ++++++++++++++++++++++---------------------- include/linux/kvm.h | 5 ++--- 2 files changed, 24 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index f267dbb52845..c8b8cfa332bb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1237,8 +1237,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, kvm_arch_ops->decache_regs(vcpu); kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); - if (vcpu->mmio_is_write) + if (vcpu->mmio_is_write) { + vcpu->mmio_needed = 0; return EMULATE_DO_MMIO; + } return EMULATE_DONE; } @@ -1692,8 +1694,6 @@ static int complete_pio(struct kvm_vcpu *vcpu) vcpu->regs[VCPU_REGS_RSI] += delta; } - vcpu->run->io_completed = 0; - kvm_arch_ops->decache_regs(vcpu); io->count -= io->cur_count; @@ -1800,25 +1800,25 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* re-sync apic's tpr */ vcpu->cr8 = kvm_run->cr8; - if (kvm_run->io_completed) { - if (vcpu->pio.cur_count) { - r = complete_pio(vcpu); - if (r) - goto out; - } else if (!vcpu->mmio_is_write) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - r = emulate_instruction(vcpu, kvm_run, - vcpu->mmio_fault_cr2, 0); - if (r == EMULATE_DO_MMIO) { - /* - * Read-modify-write. Back to userspace. - */ - kvm_run->exit_reason = KVM_EXIT_MMIO; - r = 0; - goto out; - } + if (vcpu->pio.cur_count) { + r = complete_pio(vcpu); + if (r) + goto out; + } + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + r = emulate_instruction(vcpu, kvm_run, + vcpu->mmio_fault_cr2, 0); + if (r == EMULATE_DO_MMIO) { + /* + * Read-modify-write. Back to userspace. + */ + kvm_run->exit_reason = KVM_EXIT_MMIO; + r = 0; + goto out; } } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 07bf353eeb6f..738c2f50c774 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 10 +#define KVM_API_VERSION 11 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -58,9 +58,8 @@ enum kvm_exit_reason { /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ - __u32 io_completed; /* mmio/pio request completed */ __u8 request_interrupt_window; - __u8 padding1[3]; + __u8 padding1[7]; /* out */ __u32 exit_reason; -- cgit v1.2.3 From 2ff81f70b56dc1cdd3bf2f08414608069db6ef1a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 29 Apr 2007 16:25:49 +0300 Subject: KVM: Remove unused 'instruction_length' As we no longer emulate in userspace, this is meaningless. We don't compute it on SVM anyway. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 1 - include/linux/kvm.h | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 5a2a68dec6bf..724db0027f00 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1783,7 +1783,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) exit_reason != EXIT_REASON_EXCEPTION_NMI ) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __FUNCTION__, exit_reason); - kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 738c2f50c774..e6edca81ab84 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 11 +#define KVM_API_VERSION 12 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -63,10 +63,9 @@ struct kvm_run { /* out */ __u32 exit_reason; - __u32 instruction_length; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u8 padding2[6]; + __u8 padding2[2]; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; -- cgit v1.2.3