Commit | Line | Data |
---|---|---|
d4c9ff2d FEL |
1 | /* |
2 | * kvm trace | |
3 | * | |
4 | * It is designed to allow debugging traces of kvm to be generated | |
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | |
6 | * it's possible to reconstruct a chronological record of trace events. | |
7 | * The implementation refers to blktrace kernel support. | |
8 | * | |
9 | * Copyright (c) 2008 Intel Corporation | |
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | |
11 | * | |
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | |
13 | * | |
14 | * Date: Feb 2008 | |
15 | */ | |
16 | ||
17 | #include <linux/module.h> | |
18 | #include <linux/relay.h> | |
19 | #include <linux/debugfs.h> | |
20 | ||
21 | #include <linux/kvm_host.h> | |
22 | ||
23 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | |
24 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | |
25 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | |
26 | ||
27 | struct kvm_trace { | |
28 | int trace_state; | |
29 | struct rchan *rchan; | |
30 | struct dentry *lost_file; | |
31 | atomic_t lost_records; | |
32 | }; | |
33 | static struct kvm_trace *kvm_trace; | |
34 | ||
35 | struct kvm_trace_probe { | |
36 | const char *name; | |
37 | const char *format; | |
38 | u32 cycle_in; | |
39 | marker_probe_func *probe_func; | |
40 | }; | |
41 | ||
42 | static inline int calc_rec_size(int cycle, int extra) | |
43 | { | |
44 | int rec_size = KVM_TRC_HEAD_SIZE; | |
45 | ||
46 | rec_size += extra; | |
47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | |
48 | } | |
49 | ||
50 | static void kvm_add_trace(void *probe_private, void *call_data, | |
51 | const char *format, va_list *args) | |
52 | { | |
53 | struct kvm_trace_probe *p = probe_private; | |
54 | struct kvm_trace *kt = kvm_trace; | |
55 | struct kvm_trace_rec rec; | |
56 | struct kvm_vcpu *vcpu; | |
57 | int i, extra, size; | |
58 | ||
59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | |
60 | return; | |
61 | ||
62 | rec.event = va_arg(*args, u32); | |
63 | vcpu = va_arg(*args, struct kvm_vcpu *); | |
64 | rec.pid = current->tgid; | |
65 | rec.vcpu_id = vcpu->vcpu_id; | |
66 | ||
67 | extra = va_arg(*args, u32); | |
68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | |
69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | |
70 | rec.extra_u32 = extra; | |
71 | ||
72 | rec.cycle_in = p->cycle_in; | |
73 | ||
74 | if (rec.cycle_in) { | |
9ef621d3 | 75 | rec.u.cycle.cycle_u64 = get_cycles(); |
d4c9ff2d FEL |
76 | |
77 | for (i = 0; i < rec.extra_u32; i++) | |
78 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); | |
79 | } else { | |
80 | for (i = 0; i < rec.extra_u32; i++) | |
81 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); | |
82 | } | |
83 | ||
84 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); | |
85 | relay_write(kt->rchan, &rec, size); | |
86 | } | |
87 | ||
88 | static struct kvm_trace_probe kvm_trace_probes[] = { | |
89 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | |
90 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | |
91 | }; | |
92 | ||
93 | static int lost_records_get(void *data, u64 *val) | |
94 | { | |
95 | struct kvm_trace *kt = data; | |
96 | ||
97 | *val = atomic_read(&kt->lost_records); | |
98 | return 0; | |
99 | } | |
100 | ||
101 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | |
102 | ||
103 | /* | |
104 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | |
105 | * many times we encountered a full subbuffer, to tell user space app the | |
106 | * lost records there were. | |
107 | */ | |
108 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | |
109 | void *prev_subbuf, size_t prev_padding) | |
110 | { | |
111 | struct kvm_trace *kt; | |
112 | ||
9ef621d3 TL |
113 | if (!relay_buf_full(buf)) { |
114 | if (!prev_subbuf) { | |
115 | /* | |
116 | * executed only once when the channel is opened | |
117 | * save metadata as first record | |
118 | */ | |
119 | subbuf_start_reserve(buf, sizeof(u32)); | |
120 | *(u32 *)subbuf = 0x12345678; | |
121 | } | |
122 | ||
d4c9ff2d | 123 | return 1; |
9ef621d3 | 124 | } |
d4c9ff2d FEL |
125 | |
126 | kt = buf->chan->private_data; | |
127 | atomic_inc(&kt->lost_records); | |
128 | ||
129 | return 0; | |
130 | } | |
131 | ||
132 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | |
133 | struct dentry *parent, | |
134 | int mode, | |
135 | struct rchan_buf *buf, | |
136 | int *is_global) | |
137 | { | |
138 | return debugfs_create_file(filename, mode, parent, buf, | |
139 | &relay_file_operations); | |
140 | } | |
141 | ||
142 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | |
143 | { | |
144 | debugfs_remove(dentry); | |
145 | return 0; | |
146 | } | |
147 | ||
148 | static struct rchan_callbacks kvm_relay_callbacks = { | |
149 | .subbuf_start = kvm_subbuf_start_callback, | |
150 | .create_buf_file = kvm_create_buf_file_callack, | |
151 | .remove_buf_file = kvm_remove_buf_file_callback, | |
152 | }; | |
153 | ||
154 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | |
155 | { | |
156 | struct kvm_trace *kt; | |
157 | int i, r = -ENOMEM; | |
158 | ||
159 | if (!kuts->buf_size || !kuts->buf_nr) | |
160 | return -EINVAL; | |
161 | ||
162 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | |
163 | if (!kt) | |
164 | goto err; | |
165 | ||
166 | r = -EIO; | |
167 | atomic_set(&kt->lost_records, 0); | |
76f7c879 | 168 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, |
d4c9ff2d FEL |
169 | kt, &kvm_trace_lost_ops); |
170 | if (!kt->lost_file) | |
171 | goto err; | |
172 | ||
76f7c879 | 173 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, |
d4c9ff2d FEL |
174 | kuts->buf_nr, &kvm_relay_callbacks, kt); |
175 | if (!kt->rchan) | |
176 | goto err; | |
177 | ||
178 | kvm_trace = kt; | |
179 | ||
180 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | |
181 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | |
182 | ||
183 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | |
184 | if (r) | |
185 | printk(KERN_INFO "Unable to register probe %s\n", | |
186 | p->name); | |
187 | } | |
188 | ||
189 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | |
190 | ||
191 | return 0; | |
192 | err: | |
193 | if (kt) { | |
194 | if (kt->lost_file) | |
195 | debugfs_remove(kt->lost_file); | |
196 | if (kt->rchan) | |
197 | relay_close(kt->rchan); | |
198 | kfree(kt); | |
199 | } | |
200 | return r; | |
201 | } | |
202 | ||
203 | static int kvm_trace_enable(char __user *arg) | |
204 | { | |
205 | struct kvm_user_trace_setup kuts; | |
206 | int ret; | |
207 | ||
208 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | |
209 | if (ret) | |
210 | return -EFAULT; | |
211 | ||
212 | ret = do_kvm_trace_enable(&kuts); | |
213 | if (ret) | |
214 | return ret; | |
215 | ||
216 | return 0; | |
217 | } | |
218 | ||
219 | static int kvm_trace_pause(void) | |
220 | { | |
221 | struct kvm_trace *kt = kvm_trace; | |
222 | int r = -EINVAL; | |
223 | ||
224 | if (kt == NULL) | |
225 | return r; | |
226 | ||
227 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | |
228 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | |
229 | relay_flush(kt->rchan); | |
230 | r = 0; | |
231 | } | |
232 | ||
233 | return r; | |
234 | } | |
235 | ||
236 | void kvm_trace_cleanup(void) | |
237 | { | |
238 | struct kvm_trace *kt = kvm_trace; | |
239 | int i; | |
240 | ||
241 | if (kt == NULL) | |
242 | return; | |
243 | ||
244 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | |
245 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | |
246 | ||
247 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | |
248 | ||
249 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | |
250 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | |
251 | marker_probe_unregister(p->name, p->probe_func, p); | |
252 | } | |
253 | ||
254 | relay_close(kt->rchan); | |
255 | debugfs_remove(kt->lost_file); | |
256 | kfree(kt); | |
257 | } | |
258 | } | |
259 | ||
260 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | |
261 | { | |
262 | void __user *argp = (void __user *)arg; | |
263 | long r = -EINVAL; | |
264 | ||
265 | if (!capable(CAP_SYS_ADMIN)) | |
266 | return -EPERM; | |
267 | ||
268 | switch (ioctl) { | |
269 | case KVM_TRACE_ENABLE: | |
270 | r = kvm_trace_enable(argp); | |
271 | break; | |
272 | case KVM_TRACE_PAUSE: | |
273 | r = kvm_trace_pause(); | |
274 | break; | |
275 | case KVM_TRACE_DISABLE: | |
276 | r = 0; | |
277 | kvm_trace_cleanup(); | |
278 | break; | |
279 | } | |
280 | ||
281 | return r; | |
282 | } |