Commit | Line | Data |
---|---|---|
c906108c | 1 | /* Simulator cache routines for CGEN simulators (and maybe others). |
c5a57081 | 2 | Copyright (C) 1996-1998, 2007-2012 Free Software Foundation, Inc. |
c906108c SS |
3 | Contributed by Cygnus Support. |
4 | ||
5 | This file is part of GDB, the GNU debugger. | |
6 | ||
7 | This program is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
4744ac1b JB |
9 | the Free Software Foundation; either version 3 of the License, or |
10 | (at your option) any later version. | |
c906108c SS |
11 | |
12 | This program is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
4744ac1b JB |
17 | You should have received a copy of the GNU General Public License |
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
c906108c SS |
19 | |
20 | #define SCACHE_DEFINE_INLINE | |
21 | ||
22 | #include "sim-main.h" | |
23 | #ifdef HAVE_STDLIB_H | |
24 | #include <stdlib.h> | |
25 | #endif | |
26 | #include "libiberty.h" | |
27 | #include "sim-options.h" | |
28 | #include "sim-io.h" | |
29 | ||
30 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) | |
31 | ||
32 | /* Unused address. */ | |
33 | #define UNUSED_ADDR 0xffffffff | |
34 | ||
35 | /* Scache configuration parameters. | |
36 | ??? Experiments to determine reasonable values is wip. | |
37 | These are just guesses. */ | |
38 | ||
39 | /* Default number of scache elements. | |
40 | The size of an element is typically 32-64 bytes, so the size of the | |
41 | default scache will be between 512K and 1M bytes. */ | |
42 | #ifdef CONFIG_SIM_CACHE_SIZE | |
43 | #define SCACHE_DEFAULT_CACHE_SIZE CONFIG_SIM_CACHE_SIZE | |
44 | #else | |
45 | #define SCACHE_DEFAULT_CACHE_SIZE 16384 | |
46 | #endif | |
47 | ||
48 | /* Minimum cache size. | |
49 | The m32r port assumes a cache size of at least 2 so it can decode both 16 | |
50 | bit insns. When compiling we need an extra for the chain entry. And this | |
51 | must be a multiple of 2. Hence 4 is the minimum (though, for those with | |
52 | featuritis or itchy pedantic bits, we could make this conditional on | |
53 | WITH_SCACHE_PBB). */ | |
54 | #define MIN_SCACHE_SIZE 4 | |
55 | ||
56 | /* Ratio of size of text section to size of scache. | |
57 | When compiling, we don't want to flush the scache more than we have to | |
58 | but we also don't want it to be exorbitantly(sp?) large. So we pick a high | |
59 | default value, then reduce it by the size of the program being simulated, | |
60 | but we don't override any value specified on the command line. | |
61 | If not specified on the command line, the size to use is computed as | |
62 | max (MIN_SCACHE_SIZE, | |
63 | min (DEFAULT_SCACHE_SIZE, | |
64 | text_size / (base_insn_size * INSN_SCACHE_RATIO))). */ | |
65 | /* ??? Interesting idea but not currently used. */ | |
66 | #define INSN_SCACHE_RATIO 4 | |
67 | ||
68 | /* Default maximum insn chain length. | |
69 | The only reason for a maximum is so we can place a maximum size on the | |
70 | profiling table. Chain lengths are determined by cti's. | |
71 | 32 is a more reasonable number, but when profiling, the before/after | |
72 | handlers take up that much more space. The scache is filled from front to | |
73 | back so all this determines is when the scache needs to be flushed. */ | |
74 | #define MAX_CHAIN_LENGTH 64 | |
75 | ||
76 | /* Default maximum hash list length. */ | |
77 | #define MAX_HASH_CHAIN_LENGTH 4 | |
78 | ||
79 | /* Minimum hash table size. */ | |
80 | #define MIN_HASH_CHAINS 32 | |
81 | ||
82 | /* Ratio of number of scache elements to number of hash lists. | |
83 | Since the user can only specify the size of the scache, we compute the | |
84 | size of the hash table as | |
85 | max (MIN_HASH_CHAINS, scache_size / SCACHE_HASH_RATIO). */ | |
86 | #define SCACHE_HASH_RATIO 8 | |
87 | ||
88 | /* Hash a PC value. | |
89 | FIXME: May wish to make the hashing architecture specific. | |
90 | FIXME: revisit */ | |
91 | #define HASH_PC(pc) (((pc) >> 2) + ((pc) >> 5)) | |
92 | ||
93 | static MODULE_INIT_FN scache_init; | |
94 | static MODULE_UNINSTALL_FN scache_uninstall; | |
95 | ||
96 | static DECLARE_OPTION_HANDLER (scache_option_handler); | |
97 | ||
98 | #define OPTION_PROFILE_SCACHE (OPTION_START + 0) | |
99 | ||
100 | static const OPTION scache_options[] = { | |
101 | { {"scache-size", optional_argument, NULL, 'c'}, | |
102 | 'c', "[SIZE]", "Specify size of simulator execution cache", | |
103 | scache_option_handler }, | |
104 | #if WITH_SCACHE_PBB | |
105 | /* ??? It might be nice to allow the user to specify the size of the hash | |
106 | table, the maximum hash list length, and the maximum chain length, but | |
107 | for now that might be more akin to featuritis. */ | |
108 | #endif | |
109 | { {"profile-scache", optional_argument, NULL, OPTION_PROFILE_SCACHE}, | |
110 | '\0', "on|off", "Perform simulator execution cache profiling", | |
111 | scache_option_handler }, | |
112 | { {NULL, no_argument, NULL, 0}, '\0', NULL, NULL, NULL } | |
113 | }; | |
114 | ||
115 | static SIM_RC | |
116 | scache_option_handler (SIM_DESC sd, sim_cpu *cpu, int opt, | |
117 | char *arg, int is_command) | |
118 | { | |
c906108c SS |
119 | switch (opt) |
120 | { | |
121 | case 'c' : | |
122 | if (WITH_SCACHE) | |
123 | { | |
124 | if (arg != NULL) | |
125 | { | |
126 | int n = strtol (arg, NULL, 0); | |
127 | if (n < MIN_SCACHE_SIZE) | |
128 | { | |
129 | sim_io_eprintf (sd, "invalid scache size `%d', must be at least 4", n); | |
130 | return SIM_RC_FAIL; | |
131 | } | |
132 | /* Ensure it's a multiple of 2. */ | |
133 | if ((n & (n - 1)) != 0) | |
134 | { | |
135 | sim_io_eprintf (sd, "scache size `%d' not a multiple of 2\n", n); | |
136 | { | |
137 | /* round up to nearest multiple of 2 */ | |
138 | int i; | |
139 | for (i = 1; i < n; i <<= 1) | |
140 | continue; | |
141 | n = i; | |
142 | } | |
143 | sim_io_eprintf (sd, "rounding scache size up to %d\n", n); | |
144 | } | |
145 | if (cpu == NULL) | |
146 | STATE_SCACHE_SIZE (sd) = n; | |
147 | else | |
148 | CPU_SCACHE_SIZE (cpu) = n; | |
149 | } | |
150 | else | |
151 | { | |
152 | if (cpu == NULL) | |
153 | STATE_SCACHE_SIZE (sd) = SCACHE_DEFAULT_CACHE_SIZE; | |
154 | else | |
155 | CPU_SCACHE_SIZE (cpu) = SCACHE_DEFAULT_CACHE_SIZE; | |
156 | } | |
157 | } | |
158 | else | |
159 | sim_io_eprintf (sd, "Simulator execution cache not enabled, `--scache-size' ignored\n"); | |
160 | break; | |
161 | ||
162 | case OPTION_PROFILE_SCACHE : | |
163 | if (WITH_SCACHE && WITH_PROFILE_SCACHE_P) | |
164 | { | |
165 | /* FIXME: handle cpu != NULL. */ | |
166 | return sim_profile_set_option (sd, "-scache", PROFILE_SCACHE_IDX, | |
167 | arg); | |
168 | } | |
169 | else | |
170 | sim_io_eprintf (sd, "Simulator cache profiling not compiled in, `--profile-scache' ignored\n"); | |
171 | break; | |
172 | } | |
173 | ||
174 | return SIM_RC_OK; | |
175 | } | |
176 | ||
177 | SIM_RC | |
178 | scache_install (SIM_DESC sd) | |
179 | { | |
180 | sim_add_option_table (sd, NULL, scache_options); | |
181 | sim_module_add_init_fn (sd, scache_init); | |
182 | sim_module_add_uninstall_fn (sd, scache_uninstall); | |
183 | ||
184 | /* This is the default, it may be overridden on the command line. */ | |
185 | STATE_SCACHE_SIZE (sd) = WITH_SCACHE; | |
186 | ||
187 | return SIM_RC_OK; | |
188 | } | |
189 | ||
190 | static SIM_RC | |
191 | scache_init (SIM_DESC sd) | |
192 | { | |
193 | int c; | |
194 | ||
195 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
196 | { | |
197 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
198 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
199 | ||
200 | /* elm_size is 0 if the cpu doesn't not have scache support */ | |
201 | if (elm_size == 0) | |
202 | { | |
203 | CPU_SCACHE_SIZE (cpu) = 0; | |
204 | CPU_SCACHE_CACHE (cpu) = NULL; | |
205 | } | |
206 | else | |
207 | { | |
208 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
209 | CPU_SCACHE_SIZE (cpu) = STATE_SCACHE_SIZE (sd); | |
210 | CPU_SCACHE_CACHE (cpu) = | |
211 | (SCACHE *) xmalloc (CPU_SCACHE_SIZE (cpu) * elm_size); | |
212 | #if WITH_SCACHE_PBB | |
213 | CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) = MAX_CHAIN_LENGTH; | |
214 | CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) = MAX_HASH_CHAIN_LENGTH; | |
215 | CPU_SCACHE_NUM_HASH_CHAINS (cpu) = MAX (MIN_HASH_CHAINS, | |
216 | CPU_SCACHE_SIZE (cpu) | |
217 | / SCACHE_HASH_RATIO); | |
218 | CPU_SCACHE_HASH_TABLE (cpu) = | |
219 | (SCACHE_MAP *) xmalloc (CPU_SCACHE_NUM_HASH_CHAINS (cpu) | |
220 | * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) | |
221 | * sizeof (SCACHE_MAP)); | |
222 | CPU_SCACHE_PBB_BEGIN (cpu) = (SCACHE *) zalloc (elm_size); | |
223 | CPU_SCACHE_CHAIN_LENGTHS (cpu) = | |
224 | (unsigned long *) zalloc ((CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) + 1) | |
225 | * sizeof (long)); | |
226 | #endif | |
227 | } | |
228 | } | |
229 | ||
230 | scache_flush (sd); | |
231 | ||
232 | return SIM_RC_OK; | |
233 | } | |
234 | ||
235 | static void | |
236 | scache_uninstall (SIM_DESC sd) | |
237 | { | |
238 | int c; | |
239 | ||
240 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
241 | { | |
242 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
243 | ||
244 | if (CPU_SCACHE_CACHE (cpu) != NULL) | |
245 | free (CPU_SCACHE_CACHE (cpu)); | |
246 | #if WITH_SCACHE_PBB | |
247 | if (CPU_SCACHE_HASH_TABLE (cpu) != NULL) | |
248 | free (CPU_SCACHE_HASH_TABLE (cpu)); | |
249 | if (CPU_SCACHE_PBB_BEGIN (cpu) != NULL) | |
250 | free (CPU_SCACHE_PBB_BEGIN (cpu)); | |
251 | if (CPU_SCACHE_CHAIN_LENGTHS (cpu) != NULL) | |
252 | free (CPU_SCACHE_CHAIN_LENGTHS (cpu)); | |
253 | #endif | |
254 | } | |
255 | } | |
256 | ||
257 | void | |
258 | scache_flush (SIM_DESC sd) | |
259 | { | |
260 | int c; | |
261 | ||
262 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
263 | { | |
264 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
265 | scache_flush_cpu (cpu); | |
266 | } | |
267 | } | |
268 | ||
269 | void | |
270 | scache_flush_cpu (SIM_CPU *cpu) | |
271 | { | |
272 | int i,n; | |
273 | ||
274 | /* Don't bother if cache not in use. */ | |
275 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
276 | return; | |
277 | ||
278 | #if WITH_SCACHE_PBB | |
279 | /* It's important that this be reasonably fast as this can be done when | |
280 | the simulation is running. */ | |
281 | CPU_SCACHE_NEXT_FREE (cpu) = CPU_SCACHE_CACHE (cpu); | |
282 | n = CPU_SCACHE_NUM_HASH_CHAINS (cpu) * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
283 | /* ??? Might be faster to just set the first entry, then update the | |
284 | "last entry" marker during allocation. */ | |
285 | for (i = 0; i < n; ++i) | |
286 | CPU_SCACHE_HASH_TABLE (cpu) [i] . pc = UNUSED_ADDR; | |
287 | #else | |
288 | { | |
289 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
290 | SCACHE *sc; | |
291 | ||
292 | /* Technically, this may not be necessary, but it helps debugging. */ | |
293 | memset (CPU_SCACHE_CACHE (cpu), 0, | |
294 | CPU_SCACHE_SIZE (cpu) * elm_size); | |
295 | ||
296 | for (i = 0, sc = CPU_SCACHE_CACHE (cpu); i < CPU_SCACHE_SIZE (cpu); | |
297 | ++i, sc = (SCACHE *) ((char *) sc + elm_size)) | |
298 | { | |
299 | sc->argbuf.addr = UNUSED_ADDR; | |
300 | } | |
301 | } | |
302 | #endif | |
303 | } | |
304 | ||
305 | #if WITH_SCACHE_PBB | |
306 | ||
307 | /* Look up PC in the hash table of scache entry points. | |
308 | Returns the entry or NULL if not found. */ | |
309 | ||
310 | SCACHE * | |
311 | scache_lookup (SIM_CPU *cpu, IADDR pc) | |
312 | { | |
96baa820 JM |
313 | /* FIXME: hash computation is wrong, doesn't take into account |
314 | NUM_HASH_CHAIN_ENTRIES. A lot of the hash table will be unused! */ | |
c906108c SS |
315 | unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1); |
316 | int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
317 | SCACHE_MAP *scm; | |
318 | ||
319 | /* We don't update hit/miss statistics as this is only used when recording | |
320 | branch target addresses. */ | |
321 | ||
322 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
323 | for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm) | |
324 | { | |
325 | if (scm->pc == pc) | |
326 | return scm->sc; | |
327 | } | |
328 | return 0; | |
329 | } | |
330 | ||
331 | /* Look up PC and if not found create an entry for it. | |
332 | If found the result is a pointer to the SCACHE entry. | |
333 | If not found the result is NULL, and the address of a buffer of at least | |
334 | N entries is stored in BUFP. | |
335 | It's done this way so the caller can still distinguish found/not-found. | |
336 | If the table is full, it is emptied to make room. | |
337 | If the maximum length of a hash list is reached a random entry is thrown out | |
338 | to make room. | |
339 | ??? One might want to try to make this smarter, but let's see some | |
340 | measurable benefit first. */ | |
341 | ||
342 | SCACHE * | |
343 | scache_lookup_or_alloc (SIM_CPU *cpu, IADDR pc, int n, SCACHE **bufp) | |
344 | { | |
96baa820 JM |
345 | /* FIXME: hash computation is wrong, doesn't take into account |
346 | NUM_HASH_CHAIN_ENTRIES. A lot of the hash table will be unused! */ | |
c906108c SS |
347 | unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1); |
348 | int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
349 | SCACHE_MAP *scm; | |
350 | SCACHE *sc; | |
351 | ||
352 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
353 | for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm) | |
354 | { | |
355 | if (scm->pc == pc) | |
356 | { | |
357 | PROFILE_COUNT_SCACHE_HIT (cpu); | |
358 | return scm->sc; | |
359 | } | |
360 | } | |
361 | PROFILE_COUNT_SCACHE_MISS (cpu); | |
362 | ||
363 | /* The address we want isn't cached. Bummer. | |
364 | If the hash chain we have for this address is full, throw out an entry | |
365 | to make room. */ | |
366 | ||
367 | if (i == max_i) | |
368 | { | |
369 | /* Rather than do something sophisticated like LRU, we just throw out | |
370 | a semi-random entry. Let someone else have the joy of saying how | |
371 | wrong this is. NEXT_FREE is the entry to throw out and cycles | |
372 | through all possibilities. */ | |
373 | static int next_free = 0; | |
374 | ||
375 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
96baa820 | 376 | /* FIXME: This seems rather clumsy. */ |
c906108c SS |
377 | for (i = 0; i < next_free; ++i, ++scm) |
378 | continue; | |
379 | ++next_free; | |
380 | if (next_free == CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu)) | |
381 | next_free = 0; | |
382 | } | |
383 | ||
384 | /* At this point SCM points to the hash table entry to use. | |
385 | Now make sure there's room in the cache. */ | |
96baa820 JM |
386 | /* FIXME: Kinda weird to use a next_free adjusted scm when cache is |
387 | flushed. */ | |
c906108c SS |
388 | |
389 | { | |
390 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
391 | int elms_used = (((char *) CPU_SCACHE_NEXT_FREE (cpu) | |
392 | - (char *) CPU_SCACHE_CACHE (cpu)) | |
393 | / elm_size); | |
394 | int elms_left = CPU_SCACHE_SIZE (cpu) - elms_used; | |
395 | ||
396 | if (elms_left < n) | |
397 | { | |
398 | PROFILE_COUNT_SCACHE_FULL_FLUSH (cpu); | |
399 | scache_flush_cpu (cpu); | |
400 | } | |
401 | } | |
402 | ||
403 | sc = CPU_SCACHE_NEXT_FREE (cpu); | |
404 | scm->pc = pc; | |
405 | scm->sc = sc; | |
406 | ||
407 | *bufp = sc; | |
408 | return NULL; | |
409 | } | |
410 | ||
411 | #endif /* WITH_SCACHE_PBB */ | |
412 | ||
413 | /* Print cache access statics for CPU. */ | |
414 | ||
415 | void | |
416 | scache_print_profile (SIM_CPU *cpu, int verbose) | |
417 | { | |
418 | SIM_DESC sd = CPU_STATE (cpu); | |
419 | unsigned long hits = CPU_SCACHE_HITS (cpu); | |
420 | unsigned long misses = CPU_SCACHE_MISSES (cpu); | |
421 | char buf[20]; | |
422 | unsigned long max_val; | |
423 | unsigned long *lengths; | |
424 | int i; | |
425 | ||
426 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
427 | return; | |
428 | ||
429 | sim_io_printf (sd, "Simulator Cache Statistics\n\n"); | |
430 | ||
431 | /* One could use PROFILE_LABEL_WIDTH here. I chose not to. */ | |
432 | sim_io_printf (sd, " Cache size: %s\n", | |
433 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_SIZE (cpu))); | |
434 | sim_io_printf (sd, " Hits: %s\n", | |
435 | sim_add_commas (buf, sizeof (buf), hits)); | |
436 | sim_io_printf (sd, " Misses: %s\n", | |
437 | sim_add_commas (buf, sizeof (buf), misses)); | |
438 | if (hits + misses != 0) | |
439 | sim_io_printf (sd, " Hit rate: %.2f%%\n", | |
440 | ((double) hits / ((double) hits + (double) misses)) * 100); | |
441 | ||
442 | #if WITH_SCACHE_PBB | |
443 | sim_io_printf (sd, "\n"); | |
444 | sim_io_printf (sd, " Hash table size: %s\n", | |
445 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAINS (cpu))); | |
446 | sim_io_printf (sd, " Max hash list length: %s\n", | |
447 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu))); | |
448 | sim_io_printf (sd, " Max insn chain length: %s\n", | |
449 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_MAX_CHAIN_LENGTH (cpu))); | |
450 | sim_io_printf (sd, " Cache full flushes: %s\n", | |
451 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_FULL_FLUSHES (cpu))); | |
452 | sim_io_printf (sd, "\n"); | |
453 | ||
454 | if (verbose) | |
455 | { | |
456 | sim_io_printf (sd, " Insn chain lengths:\n\n"); | |
457 | max_val = 0; | |
458 | lengths = CPU_SCACHE_CHAIN_LENGTHS (cpu); | |
459 | for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i) | |
460 | if (lengths[i] > max_val) | |
461 | max_val = lengths[i]; | |
462 | for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i) | |
463 | { | |
464 | sim_io_printf (sd, " %2d: %*s: ", | |
465 | i, | |
466 | max_val < 10000 ? 5 : 10, | |
467 | sim_add_commas (buf, sizeof (buf), lengths[i])); | |
ef93a840 | 468 | sim_profile_print_bar (sd, cpu, PROFILE_HISTOGRAM_WIDTH, |
c906108c SS |
469 | lengths[i], max_val); |
470 | sim_io_printf (sd, "\n"); | |
471 | } | |
472 | sim_io_printf (sd, "\n"); | |
473 | } | |
474 | #endif /* WITH_SCACHE_PBB */ | |
475 | } |