Commit | Line | Data |
---|---|---|
c906108c SS |
1 | /* Simulator cache routines for CGEN simulators (and maybe others). |
2 | Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. | |
3 | Contributed by Cygnus Support. | |
4 | ||
5 | This file is part of GDB, the GNU debugger. | |
6 | ||
7 | This program is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
11 | ||
12 | This program is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License along | |
18 | with this program; if not, write to the Free Software Foundation, Inc., | |
19 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | #define SCACHE_DEFINE_INLINE | |
22 | ||
23 | #include "sim-main.h" | |
24 | #ifdef HAVE_STDLIB_H | |
25 | #include <stdlib.h> | |
26 | #endif | |
27 | #include "libiberty.h" | |
28 | #include "sim-options.h" | |
29 | #include "sim-io.h" | |
30 | ||
31 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) | |
32 | ||
33 | /* Unused address. */ | |
34 | #define UNUSED_ADDR 0xffffffff | |
35 | ||
36 | /* Scache configuration parameters. | |
37 | ??? Experiments to determine reasonable values is wip. | |
38 | These are just guesses. */ | |
39 | ||
40 | /* Default number of scache elements. | |
41 | The size of an element is typically 32-64 bytes, so the size of the | |
42 | default scache will be between 512K and 1M bytes. */ | |
43 | #ifdef CONFIG_SIM_CACHE_SIZE | |
44 | #define SCACHE_DEFAULT_CACHE_SIZE CONFIG_SIM_CACHE_SIZE | |
45 | #else | |
46 | #define SCACHE_DEFAULT_CACHE_SIZE 16384 | |
47 | #endif | |
48 | ||
49 | /* Minimum cache size. | |
50 | The m32r port assumes a cache size of at least 2 so it can decode both 16 | |
51 | bit insns. When compiling we need an extra for the chain entry. And this | |
52 | must be a multiple of 2. Hence 4 is the minimum (though, for those with | |
53 | featuritis or itchy pedantic bits, we could make this conditional on | |
54 | WITH_SCACHE_PBB). */ | |
55 | #define MIN_SCACHE_SIZE 4 | |
56 | ||
57 | /* Ratio of size of text section to size of scache. | |
58 | When compiling, we don't want to flush the scache more than we have to | |
59 | but we also don't want it to be exorbitantly(sp?) large. So we pick a high | |
60 | default value, then reduce it by the size of the program being simulated, | |
61 | but we don't override any value specified on the command line. | |
62 | If not specified on the command line, the size to use is computed as | |
63 | max (MIN_SCACHE_SIZE, | |
64 | min (DEFAULT_SCACHE_SIZE, | |
65 | text_size / (base_insn_size * INSN_SCACHE_RATIO))). */ | |
66 | /* ??? Interesting idea but not currently used. */ | |
67 | #define INSN_SCACHE_RATIO 4 | |
68 | ||
69 | /* Default maximum insn chain length. | |
70 | The only reason for a maximum is so we can place a maximum size on the | |
71 | profiling table. Chain lengths are determined by cti's. | |
72 | 32 is a more reasonable number, but when profiling, the before/after | |
73 | handlers take up that much more space. The scache is filled from front to | |
74 | back so all this determines is when the scache needs to be flushed. */ | |
75 | #define MAX_CHAIN_LENGTH 64 | |
76 | ||
77 | /* Default maximum hash list length. */ | |
78 | #define MAX_HASH_CHAIN_LENGTH 4 | |
79 | ||
80 | /* Minimum hash table size. */ | |
81 | #define MIN_HASH_CHAINS 32 | |
82 | ||
83 | /* Ratio of number of scache elements to number of hash lists. | |
84 | Since the user can only specify the size of the scache, we compute the | |
85 | size of the hash table as | |
86 | max (MIN_HASH_CHAINS, scache_size / SCACHE_HASH_RATIO). */ | |
87 | #define SCACHE_HASH_RATIO 8 | |
88 | ||
89 | /* Hash a PC value. | |
90 | FIXME: May wish to make the hashing architecture specific. | |
91 | FIXME: revisit */ | |
92 | #define HASH_PC(pc) (((pc) >> 2) + ((pc) >> 5)) | |
93 | ||
94 | static MODULE_INIT_FN scache_init; | |
95 | static MODULE_UNINSTALL_FN scache_uninstall; | |
96 | ||
97 | static DECLARE_OPTION_HANDLER (scache_option_handler); | |
98 | ||
99 | #define OPTION_PROFILE_SCACHE (OPTION_START + 0) | |
100 | ||
101 | static const OPTION scache_options[] = { | |
102 | { {"scache-size", optional_argument, NULL, 'c'}, | |
103 | 'c', "[SIZE]", "Specify size of simulator execution cache", | |
104 | scache_option_handler }, | |
105 | #if WITH_SCACHE_PBB | |
106 | /* ??? It might be nice to allow the user to specify the size of the hash | |
107 | table, the maximum hash list length, and the maximum chain length, but | |
108 | for now that might be more akin to featuritis. */ | |
109 | #endif | |
110 | { {"profile-scache", optional_argument, NULL, OPTION_PROFILE_SCACHE}, | |
111 | '\0', "on|off", "Perform simulator execution cache profiling", | |
112 | scache_option_handler }, | |
113 | { {NULL, no_argument, NULL, 0}, '\0', NULL, NULL, NULL } | |
114 | }; | |
115 | ||
116 | static SIM_RC | |
117 | scache_option_handler (SIM_DESC sd, sim_cpu *cpu, int opt, | |
118 | char *arg, int is_command) | |
119 | { | |
c906108c SS |
120 | switch (opt) |
121 | { | |
122 | case 'c' : | |
123 | if (WITH_SCACHE) | |
124 | { | |
125 | if (arg != NULL) | |
126 | { | |
127 | int n = strtol (arg, NULL, 0); | |
128 | if (n < MIN_SCACHE_SIZE) | |
129 | { | |
130 | sim_io_eprintf (sd, "invalid scache size `%d', must be at least 4", n); | |
131 | return SIM_RC_FAIL; | |
132 | } | |
133 | /* Ensure it's a multiple of 2. */ | |
134 | if ((n & (n - 1)) != 0) | |
135 | { | |
136 | sim_io_eprintf (sd, "scache size `%d' not a multiple of 2\n", n); | |
137 | { | |
138 | /* round up to nearest multiple of 2 */ | |
139 | int i; | |
140 | for (i = 1; i < n; i <<= 1) | |
141 | continue; | |
142 | n = i; | |
143 | } | |
144 | sim_io_eprintf (sd, "rounding scache size up to %d\n", n); | |
145 | } | |
146 | if (cpu == NULL) | |
147 | STATE_SCACHE_SIZE (sd) = n; | |
148 | else | |
149 | CPU_SCACHE_SIZE (cpu) = n; | |
150 | } | |
151 | else | |
152 | { | |
153 | if (cpu == NULL) | |
154 | STATE_SCACHE_SIZE (sd) = SCACHE_DEFAULT_CACHE_SIZE; | |
155 | else | |
156 | CPU_SCACHE_SIZE (cpu) = SCACHE_DEFAULT_CACHE_SIZE; | |
157 | } | |
158 | } | |
159 | else | |
160 | sim_io_eprintf (sd, "Simulator execution cache not enabled, `--scache-size' ignored\n"); | |
161 | break; | |
162 | ||
163 | case OPTION_PROFILE_SCACHE : | |
164 | if (WITH_SCACHE && WITH_PROFILE_SCACHE_P) | |
165 | { | |
166 | /* FIXME: handle cpu != NULL. */ | |
167 | return sim_profile_set_option (sd, "-scache", PROFILE_SCACHE_IDX, | |
168 | arg); | |
169 | } | |
170 | else | |
171 | sim_io_eprintf (sd, "Simulator cache profiling not compiled in, `--profile-scache' ignored\n"); | |
172 | break; | |
173 | } | |
174 | ||
175 | return SIM_RC_OK; | |
176 | } | |
177 | ||
178 | SIM_RC | |
179 | scache_install (SIM_DESC sd) | |
180 | { | |
181 | sim_add_option_table (sd, NULL, scache_options); | |
182 | sim_module_add_init_fn (sd, scache_init); | |
183 | sim_module_add_uninstall_fn (sd, scache_uninstall); | |
184 | ||
185 | /* This is the default, it may be overridden on the command line. */ | |
186 | STATE_SCACHE_SIZE (sd) = WITH_SCACHE; | |
187 | ||
188 | return SIM_RC_OK; | |
189 | } | |
190 | ||
191 | static SIM_RC | |
192 | scache_init (SIM_DESC sd) | |
193 | { | |
194 | int c; | |
195 | ||
196 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
197 | { | |
198 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
199 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
200 | ||
201 | /* elm_size is 0 if the cpu doesn't not have scache support */ | |
202 | if (elm_size == 0) | |
203 | { | |
204 | CPU_SCACHE_SIZE (cpu) = 0; | |
205 | CPU_SCACHE_CACHE (cpu) = NULL; | |
206 | } | |
207 | else | |
208 | { | |
209 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
210 | CPU_SCACHE_SIZE (cpu) = STATE_SCACHE_SIZE (sd); | |
211 | CPU_SCACHE_CACHE (cpu) = | |
212 | (SCACHE *) xmalloc (CPU_SCACHE_SIZE (cpu) * elm_size); | |
213 | #if WITH_SCACHE_PBB | |
214 | CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) = MAX_CHAIN_LENGTH; | |
215 | CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) = MAX_HASH_CHAIN_LENGTH; | |
216 | CPU_SCACHE_NUM_HASH_CHAINS (cpu) = MAX (MIN_HASH_CHAINS, | |
217 | CPU_SCACHE_SIZE (cpu) | |
218 | / SCACHE_HASH_RATIO); | |
219 | CPU_SCACHE_HASH_TABLE (cpu) = | |
220 | (SCACHE_MAP *) xmalloc (CPU_SCACHE_NUM_HASH_CHAINS (cpu) | |
221 | * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) | |
222 | * sizeof (SCACHE_MAP)); | |
223 | CPU_SCACHE_PBB_BEGIN (cpu) = (SCACHE *) zalloc (elm_size); | |
224 | CPU_SCACHE_CHAIN_LENGTHS (cpu) = | |
225 | (unsigned long *) zalloc ((CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) + 1) | |
226 | * sizeof (long)); | |
227 | #endif | |
228 | } | |
229 | } | |
230 | ||
231 | scache_flush (sd); | |
232 | ||
233 | return SIM_RC_OK; | |
234 | } | |
235 | ||
236 | static void | |
237 | scache_uninstall (SIM_DESC sd) | |
238 | { | |
239 | int c; | |
240 | ||
241 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
242 | { | |
243 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
244 | ||
245 | if (CPU_SCACHE_CACHE (cpu) != NULL) | |
246 | free (CPU_SCACHE_CACHE (cpu)); | |
247 | #if WITH_SCACHE_PBB | |
248 | if (CPU_SCACHE_HASH_TABLE (cpu) != NULL) | |
249 | free (CPU_SCACHE_HASH_TABLE (cpu)); | |
250 | if (CPU_SCACHE_PBB_BEGIN (cpu) != NULL) | |
251 | free (CPU_SCACHE_PBB_BEGIN (cpu)); | |
252 | if (CPU_SCACHE_CHAIN_LENGTHS (cpu) != NULL) | |
253 | free (CPU_SCACHE_CHAIN_LENGTHS (cpu)); | |
254 | #endif | |
255 | } | |
256 | } | |
257 | ||
258 | void | |
259 | scache_flush (SIM_DESC sd) | |
260 | { | |
261 | int c; | |
262 | ||
263 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
264 | { | |
265 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
266 | scache_flush_cpu (cpu); | |
267 | } | |
268 | } | |
269 | ||
270 | void | |
271 | scache_flush_cpu (SIM_CPU *cpu) | |
272 | { | |
273 | int i,n; | |
274 | ||
275 | /* Don't bother if cache not in use. */ | |
276 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
277 | return; | |
278 | ||
279 | #if WITH_SCACHE_PBB | |
280 | /* It's important that this be reasonably fast as this can be done when | |
281 | the simulation is running. */ | |
282 | CPU_SCACHE_NEXT_FREE (cpu) = CPU_SCACHE_CACHE (cpu); | |
283 | n = CPU_SCACHE_NUM_HASH_CHAINS (cpu) * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
284 | /* ??? Might be faster to just set the first entry, then update the | |
285 | "last entry" marker during allocation. */ | |
286 | for (i = 0; i < n; ++i) | |
287 | CPU_SCACHE_HASH_TABLE (cpu) [i] . pc = UNUSED_ADDR; | |
288 | #else | |
289 | { | |
290 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
291 | SCACHE *sc; | |
292 | ||
293 | /* Technically, this may not be necessary, but it helps debugging. */ | |
294 | memset (CPU_SCACHE_CACHE (cpu), 0, | |
295 | CPU_SCACHE_SIZE (cpu) * elm_size); | |
296 | ||
297 | for (i = 0, sc = CPU_SCACHE_CACHE (cpu); i < CPU_SCACHE_SIZE (cpu); | |
298 | ++i, sc = (SCACHE *) ((char *) sc + elm_size)) | |
299 | { | |
300 | sc->argbuf.addr = UNUSED_ADDR; | |
301 | } | |
302 | } | |
303 | #endif | |
304 | } | |
305 | ||
306 | #if WITH_SCACHE_PBB | |
307 | ||
308 | /* Look up PC in the hash table of scache entry points. | |
309 | Returns the entry or NULL if not found. */ | |
310 | ||
311 | SCACHE * | |
312 | scache_lookup (SIM_CPU *cpu, IADDR pc) | |
313 | { | |
96baa820 JM |
314 | /* FIXME: hash computation is wrong, doesn't take into account |
315 | NUM_HASH_CHAIN_ENTRIES. A lot of the hash table will be unused! */ | |
c906108c SS |
316 | unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1); |
317 | int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
318 | SCACHE_MAP *scm; | |
319 | ||
320 | /* We don't update hit/miss statistics as this is only used when recording | |
321 | branch target addresses. */ | |
322 | ||
323 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
324 | for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm) | |
325 | { | |
326 | if (scm->pc == pc) | |
327 | return scm->sc; | |
328 | } | |
329 | return 0; | |
330 | } | |
331 | ||
332 | /* Look up PC and if not found create an entry for it. | |
333 | If found the result is a pointer to the SCACHE entry. | |
334 | If not found the result is NULL, and the address of a buffer of at least | |
335 | N entries is stored in BUFP. | |
336 | It's done this way so the caller can still distinguish found/not-found. | |
337 | If the table is full, it is emptied to make room. | |
338 | If the maximum length of a hash list is reached a random entry is thrown out | |
339 | to make room. | |
340 | ??? One might want to try to make this smarter, but let's see some | |
341 | measurable benefit first. */ | |
342 | ||
343 | SCACHE * | |
344 | scache_lookup_or_alloc (SIM_CPU *cpu, IADDR pc, int n, SCACHE **bufp) | |
345 | { | |
96baa820 JM |
346 | /* FIXME: hash computation is wrong, doesn't take into account |
347 | NUM_HASH_CHAIN_ENTRIES. A lot of the hash table will be unused! */ | |
c906108c SS |
348 | unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1); |
349 | int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
350 | SCACHE_MAP *scm; | |
351 | SCACHE *sc; | |
352 | ||
353 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
354 | for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm) | |
355 | { | |
356 | if (scm->pc == pc) | |
357 | { | |
358 | PROFILE_COUNT_SCACHE_HIT (cpu); | |
359 | return scm->sc; | |
360 | } | |
361 | } | |
362 | PROFILE_COUNT_SCACHE_MISS (cpu); | |
363 | ||
364 | /* The address we want isn't cached. Bummer. | |
365 | If the hash chain we have for this address is full, throw out an entry | |
366 | to make room. */ | |
367 | ||
368 | if (i == max_i) | |
369 | { | |
370 | /* Rather than do something sophisticated like LRU, we just throw out | |
371 | a semi-random entry. Let someone else have the joy of saying how | |
372 | wrong this is. NEXT_FREE is the entry to throw out and cycles | |
373 | through all possibilities. */ | |
374 | static int next_free = 0; | |
375 | ||
376 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
96baa820 | 377 | /* FIXME: This seems rather clumsy. */ |
c906108c SS |
378 | for (i = 0; i < next_free; ++i, ++scm) |
379 | continue; | |
380 | ++next_free; | |
381 | if (next_free == CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu)) | |
382 | next_free = 0; | |
383 | } | |
384 | ||
385 | /* At this point SCM points to the hash table entry to use. | |
386 | Now make sure there's room in the cache. */ | |
96baa820 JM |
387 | /* FIXME: Kinda weird to use a next_free adjusted scm when cache is |
388 | flushed. */ | |
c906108c SS |
389 | |
390 | { | |
391 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
392 | int elms_used = (((char *) CPU_SCACHE_NEXT_FREE (cpu) | |
393 | - (char *) CPU_SCACHE_CACHE (cpu)) | |
394 | / elm_size); | |
395 | int elms_left = CPU_SCACHE_SIZE (cpu) - elms_used; | |
396 | ||
397 | if (elms_left < n) | |
398 | { | |
399 | PROFILE_COUNT_SCACHE_FULL_FLUSH (cpu); | |
400 | scache_flush_cpu (cpu); | |
401 | } | |
402 | } | |
403 | ||
404 | sc = CPU_SCACHE_NEXT_FREE (cpu); | |
405 | scm->pc = pc; | |
406 | scm->sc = sc; | |
407 | ||
408 | *bufp = sc; | |
409 | return NULL; | |
410 | } | |
411 | ||
412 | #endif /* WITH_SCACHE_PBB */ | |
413 | ||
414 | /* Print cache access statics for CPU. */ | |
415 | ||
416 | void | |
417 | scache_print_profile (SIM_CPU *cpu, int verbose) | |
418 | { | |
419 | SIM_DESC sd = CPU_STATE (cpu); | |
420 | unsigned long hits = CPU_SCACHE_HITS (cpu); | |
421 | unsigned long misses = CPU_SCACHE_MISSES (cpu); | |
422 | char buf[20]; | |
423 | unsigned long max_val; | |
424 | unsigned long *lengths; | |
425 | int i; | |
426 | ||
427 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
428 | return; | |
429 | ||
430 | sim_io_printf (sd, "Simulator Cache Statistics\n\n"); | |
431 | ||
432 | /* One could use PROFILE_LABEL_WIDTH here. I chose not to. */ | |
433 | sim_io_printf (sd, " Cache size: %s\n", | |
434 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_SIZE (cpu))); | |
435 | sim_io_printf (sd, " Hits: %s\n", | |
436 | sim_add_commas (buf, sizeof (buf), hits)); | |
437 | sim_io_printf (sd, " Misses: %s\n", | |
438 | sim_add_commas (buf, sizeof (buf), misses)); | |
439 | if (hits + misses != 0) | |
440 | sim_io_printf (sd, " Hit rate: %.2f%%\n", | |
441 | ((double) hits / ((double) hits + (double) misses)) * 100); | |
442 | ||
443 | #if WITH_SCACHE_PBB | |
444 | sim_io_printf (sd, "\n"); | |
445 | sim_io_printf (sd, " Hash table size: %s\n", | |
446 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAINS (cpu))); | |
447 | sim_io_printf (sd, " Max hash list length: %s\n", | |
448 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu))); | |
449 | sim_io_printf (sd, " Max insn chain length: %s\n", | |
450 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_MAX_CHAIN_LENGTH (cpu))); | |
451 | sim_io_printf (sd, " Cache full flushes: %s\n", | |
452 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_FULL_FLUSHES (cpu))); | |
453 | sim_io_printf (sd, "\n"); | |
454 | ||
455 | if (verbose) | |
456 | { | |
457 | sim_io_printf (sd, " Insn chain lengths:\n\n"); | |
458 | max_val = 0; | |
459 | lengths = CPU_SCACHE_CHAIN_LENGTHS (cpu); | |
460 | for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i) | |
461 | if (lengths[i] > max_val) | |
462 | max_val = lengths[i]; | |
463 | for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i) | |
464 | { | |
465 | sim_io_printf (sd, " %2d: %*s: ", | |
466 | i, | |
467 | max_val < 10000 ? 5 : 10, | |
468 | sim_add_commas (buf, sizeof (buf), lengths[i])); | |
469 | sim_profile_print_bar (sd, PROFILE_HISTOGRAM_WIDTH, | |
470 | lengths[i], max_val); | |
471 | sim_io_printf (sd, "\n"); | |
472 | } | |
473 | sim_io_printf (sd, "\n"); | |
474 | } | |
475 | #endif /* WITH_SCACHE_PBB */ | |
476 | } |