Commit | Line | Data |
---|---|---|
0cf1bfd2 MT |
1 | /* |
2 | * KVM paravirt_ops implementation | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
17 | * | |
18 | * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
19 | * Copyright IBM Corporation, 2007 | |
20 | * Authors: Anthony Liguori <aliguori@us.ibm.com> | |
21 | */ | |
22 | ||
23 | #include <linux/module.h> | |
24 | #include <linux/kernel.h> | |
25 | #include <linux/kvm_para.h> | |
26 | #include <linux/cpu.h> | |
27 | #include <linux/mm.h> | |
1da8a77b | 28 | #include <linux/highmem.h> |
096d14a3 | 29 | #include <linux/hardirq.h> |
a90ede7b | 30 | #include <asm/timer.h> |
096d14a3 MT |
31 | |
32 | #define MMU_QUEUE_SIZE 1024 | |
33 | ||
34 | struct kvm_para_state { | |
35 | u8 mmu_queue[MMU_QUEUE_SIZE]; | |
36 | int mmu_queue_len; | |
096d14a3 MT |
37 | }; |
38 | ||
39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | |
40 | ||
41 | static struct kvm_para_state *kvm_para_state(void) | |
42 | { | |
43 | return &per_cpu(para_state, raw_smp_processor_id()); | |
44 | } | |
0cf1bfd2 MT |
45 | |
46 | /* | |
47 | * No need for any "IO delay" on KVM | |
48 | */ | |
49 | static void kvm_io_delay(void) | |
50 | { | |
51 | } | |
52 | ||
1da8a77b MT |
53 | static void kvm_mmu_op(void *buffer, unsigned len) |
54 | { | |
55 | int r; | |
56 | unsigned long a1, a2; | |
57 | ||
58 | do { | |
59 | a1 = __pa(buffer); | |
60 | a2 = 0; /* on i386 __pa() always returns <4G */ | |
61 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | |
62 | buffer += r; | |
63 | len -= r; | |
64 | } while (len); | |
65 | } | |
66 | ||
096d14a3 MT |
67 | static void mmu_queue_flush(struct kvm_para_state *state) |
68 | { | |
69 | if (state->mmu_queue_len) { | |
70 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | |
71 | state->mmu_queue_len = 0; | |
72 | } | |
73 | } | |
74 | ||
75 | static void kvm_deferred_mmu_op(void *buffer, int len) | |
76 | { | |
77 | struct kvm_para_state *state = kvm_para_state(); | |
78 | ||
6ba66178 | 79 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { |
096d14a3 MT |
80 | kvm_mmu_op(buffer, len); |
81 | return; | |
82 | } | |
83 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | |
84 | mmu_queue_flush(state); | |
85 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | |
86 | state->mmu_queue_len += len; | |
87 | } | |
88 | ||
1da8a77b MT |
89 | static void kvm_mmu_write(void *dest, u64 val) |
90 | { | |
91 | __u64 pte_phys; | |
92 | struct kvm_mmu_op_write_pte wpte; | |
93 | ||
94 | #ifdef CONFIG_HIGHPTE | |
95 | struct page *page; | |
96 | unsigned long dst = (unsigned long) dest; | |
97 | ||
98 | page = kmap_atomic_to_page(dest); | |
99 | pte_phys = page_to_pfn(page); | |
100 | pte_phys <<= PAGE_SHIFT; | |
101 | pte_phys += (dst & ~(PAGE_MASK)); | |
102 | #else | |
103 | pte_phys = (unsigned long)__pa(dest); | |
104 | #endif | |
105 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | |
106 | wpte.pte_val = val; | |
107 | wpte.pte_phys = pte_phys; | |
108 | ||
096d14a3 | 109 | kvm_deferred_mmu_op(&wpte, sizeof wpte); |
1da8a77b MT |
110 | } |
111 | ||
112 | /* | |
113 | * We only need to hook operations that are MMU writes. We hook these so that | |
114 | * we can use lazy MMU mode to batch these operations. We could probably | |
115 | * improve the performance of the host code if we used some of the information | |
116 | * here to simplify processing of batched writes. | |
117 | */ | |
118 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | |
119 | { | |
120 | kvm_mmu_write(ptep, pte_val(pte)); | |
121 | } | |
122 | ||
123 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | |
124 | pte_t *ptep, pte_t pte) | |
125 | { | |
126 | kvm_mmu_write(ptep, pte_val(pte)); | |
127 | } | |
128 | ||
129 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | |
130 | { | |
131 | kvm_mmu_write(pmdp, pmd_val(pmd)); | |
132 | } | |
133 | ||
134 | #if PAGETABLE_LEVELS >= 3 | |
135 | #ifdef CONFIG_X86_PAE | |
136 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | |
137 | { | |
138 | kvm_mmu_write(ptep, pte_val(pte)); | |
139 | } | |
140 | ||
1da8a77b MT |
141 | static void kvm_pte_clear(struct mm_struct *mm, |
142 | unsigned long addr, pte_t *ptep) | |
143 | { | |
144 | kvm_mmu_write(ptep, 0); | |
145 | } | |
146 | ||
147 | static void kvm_pmd_clear(pmd_t *pmdp) | |
148 | { | |
149 | kvm_mmu_write(pmdp, 0); | |
150 | } | |
151 | #endif | |
152 | ||
153 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | |
154 | { | |
155 | kvm_mmu_write(pudp, pud_val(pud)); | |
156 | } | |
157 | ||
158 | #if PAGETABLE_LEVELS == 4 | |
159 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | |
160 | { | |
161 | kvm_mmu_write(pgdp, pgd_val(pgd)); | |
162 | } | |
163 | #endif | |
164 | #endif /* PAGETABLE_LEVELS >= 3 */ | |
165 | ||
166 | static void kvm_flush_tlb(void) | |
167 | { | |
168 | struct kvm_mmu_op_flush_tlb ftlb = { | |
169 | .header.op = KVM_MMU_OP_FLUSH_TLB, | |
170 | }; | |
171 | ||
096d14a3 | 172 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); |
1da8a77b MT |
173 | } |
174 | ||
f8639939 | 175 | static void kvm_release_pt(unsigned long pfn) |
1da8a77b MT |
176 | { |
177 | struct kvm_mmu_op_release_pt rpt = { | |
178 | .header.op = KVM_MMU_OP_RELEASE_PT, | |
179 | .pt_phys = (u64)pfn << PAGE_SHIFT, | |
180 | }; | |
181 | ||
182 | kvm_mmu_op(&rpt, sizeof rpt); | |
183 | } | |
184 | ||
096d14a3 MT |
185 | static void kvm_enter_lazy_mmu(void) |
186 | { | |
096d14a3 | 187 | paravirt_enter_lazy_mmu(); |
096d14a3 MT |
188 | } |
189 | ||
190 | static void kvm_leave_lazy_mmu(void) | |
191 | { | |
192 | struct kvm_para_state *state = kvm_para_state(); | |
193 | ||
194 | mmu_queue_flush(state); | |
b407fc57 | 195 | paravirt_leave_lazy_mmu(); |
096d14a3 MT |
196 | } |
197 | ||
d3ac8815 | 198 | static void __init paravirt_ops_setup(void) |
0cf1bfd2 MT |
199 | { |
200 | pv_info.name = "KVM"; | |
201 | pv_info.paravirt_enabled = 1; | |
202 | ||
203 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | |
204 | pv_cpu_ops.io_delay = kvm_io_delay; | |
205 | ||
1da8a77b MT |
206 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { |
207 | pv_mmu_ops.set_pte = kvm_set_pte; | |
208 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | |
209 | pv_mmu_ops.set_pmd = kvm_set_pmd; | |
210 | #if PAGETABLE_LEVELS >= 3 | |
211 | #ifdef CONFIG_X86_PAE | |
212 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | |
1da8a77b MT |
213 | pv_mmu_ops.pte_clear = kvm_pte_clear; |
214 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | |
215 | #endif | |
216 | pv_mmu_ops.set_pud = kvm_set_pud; | |
217 | #if PAGETABLE_LEVELS == 4 | |
218 | pv_mmu_ops.set_pgd = kvm_set_pgd; | |
219 | #endif | |
220 | #endif | |
221 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | |
222 | pv_mmu_ops.release_pte = kvm_release_pt; | |
223 | pv_mmu_ops.release_pmd = kvm_release_pt; | |
224 | pv_mmu_ops.release_pud = kvm_release_pt; | |
096d14a3 MT |
225 | |
226 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | |
227 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | |
1da8a77b | 228 | } |
a90ede7b MT |
229 | #ifdef CONFIG_X86_IO_APIC |
230 | no_timer_check = 1; | |
231 | #endif | |
0cf1bfd2 MT |
232 | } |
233 | ||
234 | void __init kvm_guest_init(void) | |
235 | { | |
236 | if (!kvm_para_available()) | |
237 | return; | |
238 | ||
239 | paravirt_ops_setup(); | |
240 | } |