| 1 | /* hash.c - hash table lookup strings - |
| 2 | Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1998 |
| 3 | Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of GAS, the GNU Assembler. |
| 6 | |
| 7 | GAS is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation; either version 2, or (at your option) |
| 10 | any later version. |
| 11 | |
| 12 | GAS is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with GAS; see the file COPYING. If not, write to |
| 19 | the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
| 20 | |
| 21 | /* |
| 22 | * BUGS, GRIPES, APOLOGIA etc. |
| 23 | * |
| 24 | * A typical user doesn't need ALL this: I intend to make a library out |
| 25 | * of it one day - Dean Elsner. |
| 26 | * Also, I want to change the definition of a symbol to (address,length) |
| 27 | * so I can put arbitrary binary in the names stored. [see hsh.c for that] |
| 28 | * |
| 29 | * This slime is common coupled inside the module. Com-coupling (and other |
| 30 | * vandalism) was done to speed running time. The interfaces at the |
| 31 | * module's edges are adequately clean. |
| 32 | * |
| 33 | * There is no way to (a) run a test script through this heap and (b) |
| 34 | * compare results with previous scripts, to see if we have broken any |
| 35 | * code. Use GNU (f)utilities to do this. A few commands assist test. |
| 36 | * The testing is awkward: it tries to be both batch & interactive. |
| 37 | * For now, interactive rules! |
| 38 | */ |
| 39 | \f |
| 40 | /* |
| 41 | * The idea is to implement a symbol table. A test jig is here. |
| 42 | * Symbols are arbitrary strings; they can't contain '\0'. |
| 43 | * [See hsh.c for a more general symbol flavour.] |
| 44 | * Each symbol is associated with a char*, which can point to anything |
| 45 | * you want, allowing an arbitrary property list for each symbol. |
| 46 | * |
| 47 | * The basic operations are: |
| 48 | * |
| 49 | * new creates symbol table, returns handle |
| 50 | * find (symbol) returns char* |
| 51 | * insert (symbol,char*) error if symbol already in table |
| 52 | * delete (symbol) returns char* if symbol was in table |
| 53 | * apply so you can delete all symbols before die() |
| 54 | * die destroy symbol table (free up memory) |
| 55 | * |
| 56 | * Supplementary functions include: |
| 57 | * |
| 58 | * say how big? what % full? |
| 59 | * replace (symbol,newval) report previous value |
| 60 | * jam (symbol,value) assert symbol:=value |
| 61 | * |
| 62 | * You, the caller, have control over errors: this just reports them. |
| 63 | * |
| 64 | * This package requires malloc(), free(). |
| 65 | * Malloc(size) returns NULL or address of char[size]. |
| 66 | * Free(address) frees same. |
| 67 | */ |
| 68 | \f |
| 69 | /* |
| 70 | * The code and its structures are re-enterent. |
| 71 | * |
| 72 | * Before you do anything else, you must call hash_new() which will |
| 73 | * return the address of a hash-table-control-block. You then use |
| 74 | * this address as a handle of the symbol table by passing it to all |
| 75 | * the other hash_...() functions. The only approved way to recover |
| 76 | * the memory used by the symbol table is to call hash_die() with the |
| 77 | * handle of the symbol table. |
| 78 | * |
| 79 | * Before you call hash_die() you normally delete anything pointed to |
| 80 | * by individual symbols. After hash_die() you can't use that symbol |
| 81 | * table again. |
| 82 | * |
| 83 | * The char* you associate with a symbol may not be NULL (0) because |
| 84 | * NULL is returned whenever a symbol is not in the table. Any other |
| 85 | * value is OK, except DELETED, #defined below. |
| 86 | * |
| 87 | * When you supply a symbol string for insertion, YOU MUST PRESERVE THE |
| 88 | * STRING until that symbol is deleted from the table. The reason is that |
| 89 | * only the address you supply, NOT the symbol string itself, is stored |
| 90 | * in the symbol table. |
| 91 | * |
| 92 | * You may delete and add symbols arbitrarily. |
| 93 | * Any or all symbols may have the same 'value' (char *). In fact, these |
| 94 | * routines don't do anything with your symbol values. |
| 95 | * |
| 96 | * You have no right to know where the symbol:char* mapping is stored, |
| 97 | * because it moves around in memory; also because we may change how it |
| 98 | * works and we don't want to break your code do we? However the handle |
| 99 | * (address of struct hash_control) is never changed in |
| 100 | * the life of the symbol table. |
| 101 | * |
| 102 | * What you CAN find out about a symbol table is: |
| 103 | * how many slots are in the hash table? |
| 104 | * how many slots are filled with symbols? |
| 105 | * (total hashes,collisions) for (reads,writes) (*) |
| 106 | * All of the above values vary in time. |
| 107 | * (*) some of these numbers will not be meaningful if we change the |
| 108 | * internals. */ |
| 109 | \f |
| 110 | /* |
| 111 | * I N T E R N A L |
| 112 | * |
| 113 | * Hash table is an array of hash_entries; each entry is a pointer to a |
| 114 | * a string and a user-supplied value 1 char* wide. |
| 115 | * |
| 116 | * The array always has 2 ** n elements, n>0, n integer. |
| 117 | * There is also a 'wall' entry after the array, which is always empty |
| 118 | * and acts as a sentinel to stop running off the end of the array. |
| 119 | * When the array gets too full, we create a new array twice as large |
| 120 | * and re-hash the symbols into the new array, then forget the old array. |
| 121 | * (Of course, we copy the values into the new array before we junk the |
| 122 | * old array!) |
| 123 | * |
| 124 | */ |
| 125 | |
| 126 | #include <stdio.h> |
| 127 | |
| 128 | #ifndef FALSE |
| 129 | #define FALSE (0) |
| 130 | #define TRUE (!FALSE) |
| 131 | #endif /* no FALSE yet */ |
| 132 | |
| 133 | #include <ctype.h> |
| 134 | #define min(a, b) ((a) < (b) ? (a) : (b)) |
| 135 | |
| 136 | #include "as.h" |
| 137 | |
| 138 | #define error as_fatal |
| 139 | |
| 140 | static char _deleted_[1]; |
| 141 | #define DELETED ((PTR)_deleted_) /* guarenteed unique address */ |
| 142 | #define START_POWER (10) /* power of two: size of new hash table */ |
| 143 | |
| 144 | /* TRUE if a symbol is in entry @ ptr. */ |
| 145 | #define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED) |
| 146 | |
| 147 | enum stat_enum { |
| 148 | /* Number of slots in hash table. The wall does not count here. |
| 149 | We expect this is always a power of 2. */ |
| 150 | STAT_SIZE = 0, |
| 151 | /* Number of hash_ask calls. */ |
| 152 | STAT_ACCESS, |
| 153 | STAT_ACCESS_w, |
| 154 | /* Number of collisions (total). This may exceed STAT_ACCESS if we |
| 155 | have lots of collisions/access. */ |
| 156 | STAT_COLLIDE, |
| 157 | STAT_COLLIDE_w, |
| 158 | /* Slots used right now. */ |
| 159 | STAT_USED, |
| 160 | /* How many string compares? */ |
| 161 | STAT_STRCMP, |
| 162 | STAT_STRCMP_w, |
| 163 | /* Size of statistics block... this must be last. */ |
| 164 | STATLENGTH |
| 165 | }; |
| 166 | #define STAT__READ (0) /* reading */ |
| 167 | #define STAT__WRITE (1) /* writing */ |
| 168 | |
| 169 | /* When we grow a hash table, by what power of two do we increase it? */ |
| 170 | #define GROW_FACTOR 1 |
| 171 | /* When should we grow it? */ |
| 172 | #define FULL_VALUE(N) ((N) / 2) |
| 173 | |
| 174 | /* #define SUSPECT to do runtime checks */ |
| 175 | /* #define TEST to be a test jig for hash...() */ |
| 176 | |
| 177 | #ifdef TEST |
| 178 | /* TEST: use smaller hash table */ |
| 179 | #undef START_POWER |
| 180 | #define START_POWER (3) |
| 181 | #undef START_SIZE |
| 182 | #define START_SIZE (8) |
| 183 | #undef START_FULL |
| 184 | #define START_FULL (4) |
| 185 | #endif |
| 186 | \f |
| 187 | struct hash_entry |
| 188 | { |
| 189 | const char *hash_string; /* points to where the symbol string is */ |
| 190 | /* NULL means slot is not used */ |
| 191 | /* DELETED means slot was deleted */ |
| 192 | PTR hash_value; /* user's datum, associated with symbol */ |
| 193 | unsigned long h; |
| 194 | }; |
| 195 | |
| 196 | struct hash_control { |
| 197 | struct hash_entry *hash_where;/* address of hash table */ |
| 198 | int hash_sizelog; /* Log of ( hash_mask + 1 ) */ |
| 199 | int hash_mask; /* masks a hash into index into table */ |
| 200 | int hash_full; /* when hash_stat[STAT_USED] exceeds this, */ |
| 201 | /* grow table */ |
| 202 | struct hash_entry *hash_wall; /* point just after last (usable) entry */ |
| 203 | /* here we have some statistics */ |
| 204 | int hash_stat[STATLENGTH]; /* lies & statistics */ |
| 205 | }; |
| 206 | \f |
| 207 | /*------------------ plan ---------------------------------- i = internal |
| 208 | |
| 209 | struct hash_control * c; |
| 210 | struct hash_entry * e; i |
| 211 | int b[z]; buffer for statistics |
| 212 | z size of b |
| 213 | char * s; symbol string (address) [ key ] |
| 214 | char * v; value string (address) [datum] |
| 215 | boolean f; TRUE if we found s in hash table i |
| 216 | char * t; error string; 0 means OK |
| 217 | int a; access type [0...n) i |
| 218 | |
| 219 | c=hash_new () create new hash_control |
| 220 | |
| 221 | hash_die (c) destroy hash_control (and hash table) |
| 222 | table should be empty. |
| 223 | doesn't check if table is empty. |
| 224 | c has no meaning after this. |
| 225 | |
| 226 | hash_say (c,b,z) report statistics of hash_control. |
| 227 | also report number of available statistics. |
| 228 | |
| 229 | v=hash_delete (c,s) delete symbol, return old value if any. |
| 230 | ask() NULL means no old value. |
| 231 | f |
| 232 | |
| 233 | v=hash_replace (c,s,v) replace old value of s with v. |
| 234 | ask() NULL means no old value: no table change. |
| 235 | f |
| 236 | |
| 237 | t=hash_insert (c,s,v) insert (s,v) in c. |
| 238 | ask() return error string. |
| 239 | f it is an error to insert if s is already |
| 240 | in table. |
| 241 | if any error, c is unchanged. |
| 242 | |
| 243 | t=hash_jam (c,s,v) assert that new value of s will be v. i |
| 244 | ask() it may decide to GROW the table. i |
| 245 | f i |
| 246 | grow() i |
| 247 | t=hash_grow (c) grow the hash table. i |
| 248 | jam() will invoke JAM. i |
| 249 | |
| 250 | ?=hash_apply (c,y) apply y() to every symbol in c. |
| 251 | y evtries visited in 'unspecified' order. |
| 252 | |
| 253 | v=hash_find (c,s) return value of s, or NULL if s not in c. |
| 254 | ask() |
| 255 | f |
| 256 | |
| 257 | f,e=hash_ask() (c,s,a) return slot where s SHOULD live. i |
| 258 | code() maintain collision stats in c. i |
| 259 | |
| 260 | .=hash_code (c,s) compute hash-code for s, i |
| 261 | from parameters of c. i |
| 262 | |
| 263 | */ |
| 264 | \f |
| 265 | /* Returned by hash_ask() to stop extra testing. hash_ask() wants to |
| 266 | return both a slot and a status. This is the status. TRUE: found |
| 267 | symbol FALSE: absent: empty or deleted slot Also returned by |
| 268 | hash_jam(). TRUE: we replaced a value FALSE: we inserted a value. */ |
| 269 | static char hash_found; |
| 270 | |
| 271 | static struct hash_entry *hash_ask PARAMS ((struct hash_control *, |
| 272 | const char *, int)); |
| 273 | static int hash_code PARAMS ((struct hash_control *, const char *)); |
| 274 | static const char *hash_grow PARAMS ((struct hash_control *)); |
| 275 | \f |
| 276 | /* Create a new hash table. Return NULL if failed; otherwise return handle |
| 277 | (address of struct hash). */ |
| 278 | struct hash_control * |
| 279 | hash_new () |
| 280 | { |
| 281 | struct hash_control *retval; |
| 282 | struct hash_entry *room; /* points to hash table */ |
| 283 | struct hash_entry *wall; |
| 284 | struct hash_entry *entry; |
| 285 | int *ip; /* scan stats block of struct hash_control */ |
| 286 | int *nd; /* limit of stats block */ |
| 287 | |
| 288 | room = (struct hash_entry *) xmalloc (sizeof (struct hash_entry) |
| 289 | /* +1 for the wall entry */ |
| 290 | * ((1 << START_POWER) + 1)); |
| 291 | retval = (struct hash_control *) xmalloc (sizeof (struct hash_control)); |
| 292 | |
| 293 | nd = retval->hash_stat + STATLENGTH; |
| 294 | for (ip = retval->hash_stat; ip < nd; ip++) |
| 295 | *ip = 0; |
| 296 | |
| 297 | retval->hash_stat[STAT_SIZE] = 1 << START_POWER; |
| 298 | retval->hash_mask = (1 << START_POWER) - 1; |
| 299 | retval->hash_sizelog = START_POWER; |
| 300 | /* works for 1's compl ok */ |
| 301 | retval->hash_where = room; |
| 302 | retval->hash_wall = |
| 303 | wall = room + (1 << START_POWER); |
| 304 | retval->hash_full = FULL_VALUE (1 << START_POWER); |
| 305 | for (entry = room; entry <= wall; entry++) |
| 306 | entry->hash_string = NULL; |
| 307 | return retval; |
| 308 | } |
| 309 | |
| 310 | /* |
| 311 | * h a s h _ d i e ( ) |
| 312 | * |
| 313 | * Table should be empty, but this is not checked. |
| 314 | * To empty the table, try hash_apply()ing a symbol deleter. |
| 315 | * Return to free memory both the hash table and it's control |
| 316 | * block. |
| 317 | * 'handle' has no meaning after this function. |
| 318 | * No errors are recoverable. |
| 319 | */ |
| 320 | void |
| 321 | hash_die (handle) |
| 322 | struct hash_control *handle; |
| 323 | { |
| 324 | free ((char *) handle->hash_where); |
| 325 | free ((char *) handle); |
| 326 | } |
| 327 | \f |
| 328 | #ifdef TEST |
| 329 | /* |
| 330 | * h a s h _ s a y ( ) |
| 331 | * |
| 332 | * Return the size of the statistics table, and as many statistics as |
| 333 | * we can until either (a) we have run out of statistics or (b) caller |
| 334 | * has run out of buffer. |
| 335 | * NOTE: hash_say treats all statistics alike. |
| 336 | * These numbers may change with time, due to insertions, deletions |
| 337 | * and expansions of the table. |
| 338 | * The first "statistic" returned is the length of hash_stat[]. |
| 339 | * Then contents of hash_stat[] are read out (in ascending order) |
| 340 | * until your buffer or hash_stat[] is exausted. |
| 341 | */ |
| 342 | static void |
| 343 | hash_say (handle, buffer, bufsiz) |
| 344 | struct hash_control *handle; |
| 345 | int buffer[ /*bufsiz*/ ]; |
| 346 | int bufsiz; |
| 347 | { |
| 348 | int *nd; /* limit of statistics block */ |
| 349 | int *ip; /* scan statistics */ |
| 350 | |
| 351 | ip = handle->hash_stat; |
| 352 | nd = ip + min (bufsiz - 1, STATLENGTH); |
| 353 | if (bufsiz > 0) /* trust nothing! bufsiz<=0 is dangerous */ |
| 354 | { |
| 355 | *buffer++ = STATLENGTH; |
| 356 | for (; ip < nd; ip++, buffer++) |
| 357 | { |
| 358 | *buffer = *ip; |
| 359 | } |
| 360 | } |
| 361 | } |
| 362 | #endif |
| 363 | \f |
| 364 | /* |
| 365 | * h a s h _ d e l e t e ( ) |
| 366 | * |
| 367 | * Try to delete a symbol from the table. |
| 368 | * If it was there, return its value (and adjust STAT_USED). |
| 369 | * Otherwise, return NULL. |
| 370 | * Anyway, the symbol is not present after this function. |
| 371 | * |
| 372 | */ |
| 373 | PTR /* NULL if string not in table, else */ |
| 374 | /* returns value of deleted symbol */ |
| 375 | hash_delete (handle, string) |
| 376 | struct hash_control *handle; |
| 377 | const char *string; |
| 378 | { |
| 379 | PTR retval; |
| 380 | struct hash_entry *entry; |
| 381 | |
| 382 | entry = hash_ask (handle, string, STAT__WRITE); |
| 383 | if (hash_found) |
| 384 | { |
| 385 | retval = entry->hash_value; |
| 386 | entry->hash_string = DELETED; |
| 387 | handle->hash_stat[STAT_USED] -= 1; |
| 388 | #ifdef SUSPECT |
| 389 | if (handle->hash_stat[STAT_USED] < 0) |
| 390 | { |
| 391 | error ("hash_delete"); |
| 392 | } |
| 393 | #endif /* def SUSPECT */ |
| 394 | } |
| 395 | else |
| 396 | { |
| 397 | retval = NULL; |
| 398 | } |
| 399 | return (retval); |
| 400 | } |
| 401 | \f |
| 402 | /* |
| 403 | * h a s h _ r e p l a c e ( ) |
| 404 | * |
| 405 | * Try to replace the old value of a symbol with a new value. |
| 406 | * Normally return the old value. |
| 407 | * Return NULL and don't change the table if the symbol is not already |
| 408 | * in the table. |
| 409 | */ |
| 410 | PTR |
| 411 | hash_replace (handle, string, value) |
| 412 | struct hash_control *handle; |
| 413 | const char *string; |
| 414 | PTR value; |
| 415 | { |
| 416 | struct hash_entry *entry; |
| 417 | char *retval; |
| 418 | |
| 419 | entry = hash_ask (handle, string, STAT__WRITE); |
| 420 | if (hash_found) |
| 421 | { |
| 422 | retval = entry->hash_value; |
| 423 | entry->hash_value = value; |
| 424 | } |
| 425 | else |
| 426 | { |
| 427 | retval = NULL; |
| 428 | } |
| 429 | ; |
| 430 | return retval; |
| 431 | } |
| 432 | \f |
| 433 | /* |
| 434 | * h a s h _ i n s e r t ( ) |
| 435 | * |
| 436 | * Insert a (symbol-string, value) into the hash table. |
| 437 | * Return an error string, 0 means OK. |
| 438 | * It is an 'error' to insert an existing symbol. |
| 439 | */ |
| 440 | |
| 441 | const char * /* return error string */ |
| 442 | hash_insert (handle, string, value) |
| 443 | struct hash_control *handle; |
| 444 | const char *string; |
| 445 | PTR value; |
| 446 | { |
| 447 | struct hash_entry *entry; |
| 448 | const char *retval; |
| 449 | |
| 450 | retval = 0; |
| 451 | if (handle->hash_stat[STAT_USED] > handle->hash_full) |
| 452 | { |
| 453 | retval = hash_grow (handle); |
| 454 | } |
| 455 | if (!retval) |
| 456 | { |
| 457 | entry = hash_ask (handle, string, STAT__WRITE); |
| 458 | if (hash_found) |
| 459 | { |
| 460 | retval = "exists"; |
| 461 | } |
| 462 | else |
| 463 | { |
| 464 | entry->hash_value = value; |
| 465 | entry->hash_string = string; |
| 466 | handle->hash_stat[STAT_USED] += 1; |
| 467 | } |
| 468 | } |
| 469 | return retval; |
| 470 | } |
| 471 | \f |
| 472 | /* |
| 473 | * h a s h _ j a m ( ) |
| 474 | * |
| 475 | * Regardless of what was in the symbol table before, after hash_jam() |
| 476 | * the named symbol has the given value. The symbol is either inserted or |
| 477 | * (its value is) replaced. |
| 478 | * An error message string is returned, 0 means OK. |
| 479 | * |
| 480 | * WARNING: this may decide to grow the hashed symbol table. |
| 481 | * To do this, we call hash_grow(), WHICH WILL recursively CALL US. |
| 482 | * |
| 483 | * We report status internally: hash_found is TRUE if we replaced, but |
| 484 | * false if we inserted. |
| 485 | */ |
| 486 | const char * |
| 487 | hash_jam (handle, string, value) |
| 488 | struct hash_control *handle; |
| 489 | const char *string; |
| 490 | PTR value; |
| 491 | { |
| 492 | const char *retval; |
| 493 | struct hash_entry *entry; |
| 494 | |
| 495 | retval = 0; |
| 496 | if (handle->hash_stat[STAT_USED] > handle->hash_full) |
| 497 | { |
| 498 | retval = hash_grow (handle); |
| 499 | } |
| 500 | if (!retval) |
| 501 | { |
| 502 | entry = hash_ask (handle, string, STAT__WRITE); |
| 503 | if (!hash_found) |
| 504 | { |
| 505 | entry->hash_string = string; |
| 506 | handle->hash_stat[STAT_USED] += 1; |
| 507 | } |
| 508 | entry->hash_value = value; |
| 509 | } |
| 510 | return retval; |
| 511 | } |
| 512 | |
| 513 | /* |
| 514 | * h a s h _ g r o w ( ) |
| 515 | * |
| 516 | * Grow a new (bigger) hash table from the old one. |
| 517 | * We choose to double the hash table's size. |
| 518 | * Return a human-scrutible error string: 0 if OK. |
| 519 | * Warning! This uses hash_jam(), which had better not recurse |
| 520 | * back here! Hash_jam() conditionally calls us, but we ALWAYS |
| 521 | * call hash_jam()! |
| 522 | * Internal. |
| 523 | */ |
| 524 | static const char * |
| 525 | hash_grow (handle) /* make a hash table grow */ |
| 526 | struct hash_control *handle; |
| 527 | { |
| 528 | struct hash_entry *newwall; |
| 529 | struct hash_entry *newwhere; |
| 530 | struct hash_entry *newtrack; |
| 531 | struct hash_entry *oldtrack; |
| 532 | struct hash_entry *oldwhere; |
| 533 | struct hash_entry *oldwall; |
| 534 | int temp; |
| 535 | int newsize; |
| 536 | const char *string; |
| 537 | const char *retval; |
| 538 | #ifdef SUSPECT |
| 539 | int oldused; |
| 540 | #endif |
| 541 | |
| 542 | /* |
| 543 | * capture info about old hash table |
| 544 | */ |
| 545 | oldwhere = handle->hash_where; |
| 546 | oldwall = handle->hash_wall; |
| 547 | #ifdef SUSPECT |
| 548 | oldused = handle->hash_stat[STAT_USED]; |
| 549 | #endif |
| 550 | /* |
| 551 | * attempt to get enough room for a hash table twice as big |
| 552 | */ |
| 553 | temp = handle->hash_stat[STAT_SIZE]; |
| 554 | newwhere = ((struct hash_entry *) |
| 555 | xmalloc ((unsigned long) ((temp << (GROW_FACTOR + 1)) |
| 556 | /* +1 for wall slot */ |
| 557 | * sizeof (struct hash_entry)))); |
| 558 | if (newwhere == NULL) |
| 559 | return "no_room"; |
| 560 | |
| 561 | /* |
| 562 | * have enough room: now we do all the work. |
| 563 | * double the size of everything in handle. |
| 564 | */ |
| 565 | handle->hash_mask = ((handle->hash_mask + 1) << GROW_FACTOR) - 1; |
| 566 | handle->hash_stat[STAT_SIZE] <<= GROW_FACTOR; |
| 567 | newsize = handle->hash_stat[STAT_SIZE]; |
| 568 | handle->hash_where = newwhere; |
| 569 | handle->hash_full <<= GROW_FACTOR; |
| 570 | handle->hash_sizelog += GROW_FACTOR; |
| 571 | handle->hash_wall = newwall = newwhere + newsize; |
| 572 | /* Set all those pesky new slots to vacant. */ |
| 573 | for (newtrack = newwhere; newtrack <= newwall; newtrack++) |
| 574 | newtrack->hash_string = NULL; |
| 575 | /* We will do a scan of the old table, the hard way, using the |
| 576 | * new control block to re-insert the data into new hash table. */ |
| 577 | handle->hash_stat[STAT_USED] = 0; |
| 578 | for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++) |
| 579 | if (((string = oldtrack->hash_string) != NULL) && string != DELETED) |
| 580 | if ((retval = hash_jam (handle, string, oldtrack->hash_value))) |
| 581 | return retval; |
| 582 | |
| 583 | #ifdef SUSPECT |
| 584 | if (handle->hash_stat[STAT_USED] != oldused) |
| 585 | return "hash_used"; |
| 586 | #endif |
| 587 | |
| 588 | /* We have a completely faked up control block. |
| 589 | Return the old hash table. */ |
| 590 | free ((char *) oldwhere); |
| 591 | |
| 592 | return 0; |
| 593 | } |
| 594 | \f |
| 595 | #ifdef TEST |
| 596 | /* |
| 597 | * h a s h _ a p p l y ( ) |
| 598 | * |
| 599 | * Use this to scan each entry in symbol table. |
| 600 | * For each symbol, this calls (applys) a nominated function supplying the |
| 601 | * symbol's value (and the symbol's name). |
| 602 | * The idea is you use this to destroy whatever is associted with |
| 603 | * any values in the table BEFORE you destroy the table with hash_die. |
| 604 | * Of course, you can use it for other jobs; whenever you need to |
| 605 | * visit all extant symbols in the table. |
| 606 | * |
| 607 | * We choose to have a call-you-back idea for two reasons: |
| 608 | * asthetic: it is a neater idea to use apply than an explicit loop |
| 609 | * sensible: if we ever had to grow the symbol table (due to insertions) |
| 610 | * then we would lose our place in the table when we re-hashed |
| 611 | * symbols into the new table in a different order. |
| 612 | * |
| 613 | * The order symbols are visited depends entirely on the hashing function. |
| 614 | * Whenever you insert a (symbol, value) you risk expanding the table. If |
| 615 | * you do expand the table, then the hashing function WILL change, so you |
| 616 | * MIGHT get a different order of symbols visited. In other words, if you |
| 617 | * want the same order of visiting symbols as the last time you used |
| 618 | * hash_apply() then you better not have done any hash_insert()s or |
| 619 | * hash_jam()s since the last time you used hash_apply(). |
| 620 | * |
| 621 | * In future we may use the value returned by your nominated function. |
| 622 | * One idea is to abort the scan if, after applying the function to a |
| 623 | * certain node, the function returns a certain code. |
| 624 | * |
| 625 | * The function you supply should be of the form: |
| 626 | * void myfunct(string,value) |
| 627 | * char * string; |* the symbol's name *| |
| 628 | * char * value; |* the symbol's value *| |
| 629 | * { |
| 630 | * |* ... *| |
| 631 | * } |
| 632 | * |
| 633 | */ |
| 634 | void |
| 635 | hash_apply (handle, function) |
| 636 | struct hash_control *handle; |
| 637 | void (*function) (); |
| 638 | { |
| 639 | struct hash_entry *entry; |
| 640 | struct hash_entry *wall; |
| 641 | |
| 642 | wall = handle->hash_wall; |
| 643 | for (entry = handle->hash_where; entry < wall; entry++) |
| 644 | { |
| 645 | if (islive (entry)) /* silly code: tests entry->string twice! */ |
| 646 | { |
| 647 | (*function) (entry->hash_string, entry->hash_value); |
| 648 | } |
| 649 | } |
| 650 | } |
| 651 | #endif |
| 652 | \f |
| 653 | /* |
| 654 | * h a s h _ f i n d ( ) |
| 655 | * |
| 656 | * Given symbol string, find value (if any). |
| 657 | * Return found value or NULL. |
| 658 | */ |
| 659 | PTR |
| 660 | hash_find (handle, string) |
| 661 | struct hash_control *handle; |
| 662 | const char *string; |
| 663 | { |
| 664 | struct hash_entry *entry; |
| 665 | |
| 666 | entry = hash_ask (handle, string, STAT__READ); |
| 667 | if (hash_found) |
| 668 | return entry->hash_value; |
| 669 | else |
| 670 | return NULL; |
| 671 | } |
| 672 | \f |
| 673 | /* |
| 674 | * h a s h _ a s k ( ) |
| 675 | * |
| 676 | * Searches for given symbol string. |
| 677 | * Return the slot where it OUGHT to live. It may be there. |
| 678 | * Return hash_found: TRUE only if symbol is in that slot. |
| 679 | * Access argument is to help keep statistics in control block. |
| 680 | * Internal. |
| 681 | */ |
| 682 | static struct hash_entry * /* string slot, may be empty or deleted */ |
| 683 | hash_ask (handle, string, access_type) |
| 684 | struct hash_control *handle; |
| 685 | const char *string; |
| 686 | int access_type; |
| 687 | { |
| 688 | const char *s; |
| 689 | struct hash_entry *slot; |
| 690 | int collision; /* count collisions */ |
| 691 | int strcmps; |
| 692 | int hcode; |
| 693 | |
| 694 | /* start looking here */ |
| 695 | hcode = hash_code (handle, string); |
| 696 | slot = handle->hash_where + (hcode & handle->hash_mask); |
| 697 | |
| 698 | handle->hash_stat[STAT_ACCESS + access_type] += 1; |
| 699 | collision = strcmps = 0; |
| 700 | hash_found = FALSE; |
| 701 | while (((s = slot->hash_string) != NULL) && s != DELETED) |
| 702 | { |
| 703 | if (string == s) |
| 704 | { |
| 705 | hash_found = TRUE; |
| 706 | break; |
| 707 | } |
| 708 | if (slot->h == (unsigned long) hcode) |
| 709 | { |
| 710 | if (!strcmp (string, s)) |
| 711 | { |
| 712 | hash_found = TRUE; |
| 713 | break; |
| 714 | } |
| 715 | strcmps++; |
| 716 | } |
| 717 | collision++; |
| 718 | slot++; |
| 719 | } |
| 720 | /* |
| 721 | * slot: return: |
| 722 | * in use: we found string slot |
| 723 | * at empty: |
| 724 | * at wall: we fell off: wrap round ???? |
| 725 | * in table: dig here slot |
| 726 | * at DELETED: dig here slot |
| 727 | */ |
| 728 | if (slot == handle->hash_wall) |
| 729 | { |
| 730 | slot = handle->hash_where;/* now look again */ |
| 731 | while (((s = slot->hash_string) != NULL) && s != DELETED) |
| 732 | { |
| 733 | if (string == s) |
| 734 | { |
| 735 | hash_found = TRUE; |
| 736 | break; |
| 737 | } |
| 738 | if (slot->h == (unsigned long) hcode) |
| 739 | { |
| 740 | if (!strcmp (string, s)) |
| 741 | { |
| 742 | hash_found = TRUE; |
| 743 | break; |
| 744 | } |
| 745 | strcmps++; |
| 746 | } |
| 747 | collision++; |
| 748 | slot++; |
| 749 | } |
| 750 | /* |
| 751 | * slot: return: |
| 752 | * in use: we found it slot |
| 753 | * empty: wall: ERROR IMPOSSIBLE !!!! |
| 754 | * in table: dig here slot |
| 755 | * DELETED:dig here slot |
| 756 | */ |
| 757 | } |
| 758 | handle->hash_stat[STAT_COLLIDE + access_type] += collision; |
| 759 | handle->hash_stat[STAT_STRCMP + access_type] += strcmps; |
| 760 | if (!hash_found) |
| 761 | slot->h = hcode; |
| 762 | return slot; /* also return hash_found */ |
| 763 | } |
| 764 | \f |
| 765 | /* |
| 766 | * h a s h _ c o d e |
| 767 | * |
| 768 | * Does hashing of symbol string to hash number. |
| 769 | * Internal. |
| 770 | */ |
| 771 | static int |
| 772 | hash_code (handle, string) |
| 773 | struct hash_control *handle; |
| 774 | const char *string; |
| 775 | { |
| 776 | #if 1 /* There seems to be some interesting property of this function |
| 777 | that prevents the bfd version below from being an adequate |
| 778 | substitute. @@ Figure out what this property is! */ |
| 779 | long h; /* hash code built here */ |
| 780 | long c; /* each character lands here */ |
| 781 | int n; /* Amount to shift h by */ |
| 782 | |
| 783 | n = (handle->hash_sizelog - 3); |
| 784 | h = 0; |
| 785 | while ((c = *string++) != 0) |
| 786 | { |
| 787 | h += c; |
| 788 | h = (h << 3) + (h >> n) + c; |
| 789 | } |
| 790 | return h; |
| 791 | #else |
| 792 | /* from bfd */ |
| 793 | unsigned long h = 0; |
| 794 | unsigned int len = 0; |
| 795 | unsigned int c; |
| 796 | |
| 797 | while ((c = *string++) != 0) |
| 798 | { |
| 799 | h += c + (c << 17); |
| 800 | h ^= h >> 2; |
| 801 | ++len; |
| 802 | } |
| 803 | h += len + (len << 17); |
| 804 | h ^= h >> 2; |
| 805 | return h; |
| 806 | #endif |
| 807 | } |
| 808 | \f |
| 809 | void |
| 810 | hash_print_statistics (file, name, h) |
| 811 | FILE *file; |
| 812 | const char *name; |
| 813 | struct hash_control *h; |
| 814 | { |
| 815 | unsigned long sz, used, pct; |
| 816 | |
| 817 | if (h == 0) |
| 818 | return; |
| 819 | |
| 820 | sz = h->hash_stat[STAT_SIZE]; |
| 821 | used = h->hash_stat[STAT_USED]; |
| 822 | pct = (used * 100 + sz / 2) / sz; |
| 823 | |
| 824 | fprintf (file, "%s hash statistics:\n\t%lu/%lu slots used (%lu%%)\n", |
| 825 | name, used, sz, pct); |
| 826 | |
| 827 | #define P(name, off) \ |
| 828 | fprintf (file, "\t%-16s %6dr + %6dw = %7d\n", name, \ |
| 829 | h->hash_stat[off+STAT__READ], \ |
| 830 | h->hash_stat[off+STAT__WRITE], \ |
| 831 | h->hash_stat[off+STAT__READ] + h->hash_stat[off+STAT__WRITE]) |
| 832 | |
| 833 | P ("accesses:", STAT_ACCESS); |
| 834 | P ("collisions:", STAT_COLLIDE); |
| 835 | P ("string compares:", STAT_STRCMP); |
| 836 | |
| 837 | #undef P |
| 838 | } |
| 839 | \f |
| 840 | /* |
| 841 | * Here is a test program to exercise above. |
| 842 | */ |
| 843 | #ifdef TEST |
| 844 | |
| 845 | #define TABLES (6) /* number of hash tables to maintain */ |
| 846 | /* (at once) in any testing */ |
| 847 | #define STATBUFSIZE (12) /* we can have 12 statistics */ |
| 848 | |
| 849 | int statbuf[STATBUFSIZE]; /* display statistics here */ |
| 850 | char answer[100]; /* human farts here */ |
| 851 | char *hashtable[TABLES]; /* we test many hash tables at once */ |
| 852 | char *h; /* points to curent hash_control */ |
| 853 | char **pp; |
| 854 | char *p; |
| 855 | char *name; |
| 856 | char *value; |
| 857 | int size; |
| 858 | int used; |
| 859 | char command; |
| 860 | int number; /* number 0:TABLES-1 of current hashed */ |
| 861 | /* symbol table */ |
| 862 | |
| 863 | main () |
| 864 | { |
| 865 | void applicatee (); |
| 866 | void destroy (); |
| 867 | char *what (); |
| 868 | int *ip; |
| 869 | |
| 870 | number = 0; |
| 871 | h = 0; |
| 872 | printf ("type h <RETURN> for help\n"); |
| 873 | for (;;) |
| 874 | { |
| 875 | printf ("hash_test command: "); |
| 876 | gets (answer); |
| 877 | command = answer[0]; |
| 878 | if (isupper (command)) |
| 879 | command = tolower (command); /* ecch! */ |
| 880 | switch (command) |
| 881 | { |
| 882 | case '#': |
| 883 | printf ("old hash table #=%d.\n", number); |
| 884 | whattable (); |
| 885 | break; |
| 886 | case '?': |
| 887 | for (pp = hashtable; pp < hashtable + TABLES; pp++) |
| 888 | { |
| 889 | printf ("address of hash table #%d control block is %xx\n" |
| 890 | ,pp - hashtable, *pp); |
| 891 | } |
| 892 | break; |
| 893 | case 'a': |
| 894 | hash_apply (h, applicatee); |
| 895 | break; |
| 896 | case 'd': |
| 897 | hash_apply (h, destroy); |
| 898 | hash_die (h); |
| 899 | break; |
| 900 | case 'f': |
| 901 | p = hash_find (h, name = what ("symbol")); |
| 902 | printf ("value of \"%s\" is \"%s\"\n", name, p ? p : "NOT-PRESENT"); |
| 903 | break; |
| 904 | case 'h': |
| 905 | printf ("# show old, select new default hash table number\n"); |
| 906 | printf ("? display all hashtable control block addresses\n"); |
| 907 | printf ("a apply a simple display-er to each symbol in table\n"); |
| 908 | printf ("d die: destroy hashtable\n"); |
| 909 | printf ("f find value of nominated symbol\n"); |
| 910 | printf ("h this help\n"); |
| 911 | printf ("i insert value into symbol\n"); |
| 912 | printf ("j jam value into symbol\n"); |
| 913 | printf ("n new hashtable\n"); |
| 914 | printf ("r replace a value with another\n"); |
| 915 | printf ("s say what %% of table is used\n"); |
| 916 | printf ("q exit this program\n"); |
| 917 | printf ("x delete a symbol from table, report its value\n"); |
| 918 | break; |
| 919 | case 'i': |
| 920 | p = hash_insert (h, name = what ("symbol"), value = what ("value")); |
| 921 | if (p) |
| 922 | { |
| 923 | printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, |
| 924 | p); |
| 925 | } |
| 926 | break; |
| 927 | case 'j': |
| 928 | p = hash_jam (h, name = what ("symbol"), value = what ("value")); |
| 929 | if (p) |
| 930 | { |
| 931 | printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, p); |
| 932 | } |
| 933 | break; |
| 934 | case 'n': |
| 935 | h = hashtable[number] = (char *) hash_new (); |
| 936 | break; |
| 937 | case 'q': |
| 938 | exit (EXIT_SUCCESS); |
| 939 | case 'r': |
| 940 | p = hash_replace (h, name = what ("symbol"), value = what ("value")); |
| 941 | printf ("old value was \"%s\"\n", p ? p : "{}"); |
| 942 | break; |
| 943 | case 's': |
| 944 | hash_say (h, statbuf, STATBUFSIZE); |
| 945 | for (ip = statbuf; ip < statbuf + STATBUFSIZE; ip++) |
| 946 | { |
| 947 | printf ("%d ", *ip); |
| 948 | } |
| 949 | printf ("\n"); |
| 950 | break; |
| 951 | case 'x': |
| 952 | p = hash_delete (h, name = what ("symbol")); |
| 953 | printf ("old value was \"%s\"\n", p ? p : "{}"); |
| 954 | break; |
| 955 | default: |
| 956 | printf ("I can't understand command \"%c\"\n", command); |
| 957 | break; |
| 958 | } |
| 959 | } |
| 960 | } |
| 961 | |
| 962 | char * |
| 963 | what (description) |
| 964 | char *description; |
| 965 | { |
| 966 | char *retval; |
| 967 | char *malloc (); |
| 968 | |
| 969 | printf (" %s : ", description); |
| 970 | gets (answer); |
| 971 | /* will one day clean up answer here */ |
| 972 | retval = malloc (strlen (answer) + 1); |
| 973 | if (!retval) |
| 974 | { |
| 975 | error ("room"); |
| 976 | } |
| 977 | (void) strcpy (retval, answer); |
| 978 | return (retval); |
| 979 | } |
| 980 | |
| 981 | void |
| 982 | destroy (string, value) |
| 983 | char *string; |
| 984 | char *value; |
| 985 | { |
| 986 | free (string); |
| 987 | free (value); |
| 988 | } |
| 989 | |
| 990 | |
| 991 | void |
| 992 | applicatee (string, value) |
| 993 | char *string; |
| 994 | char *value; |
| 995 | { |
| 996 | printf ("%.20s-%.20s\n", string, value); |
| 997 | } |
| 998 | |
| 999 | whattable () /* determine number: what hash table to use */ |
| 1000 | /* also determine h: points to hash_control */ |
| 1001 | { |
| 1002 | |
| 1003 | for (;;) |
| 1004 | { |
| 1005 | printf (" what hash table (%d:%d) ? ", 0, TABLES - 1); |
| 1006 | gets (answer); |
| 1007 | sscanf (answer, "%d", &number); |
| 1008 | if (number >= 0 && number < TABLES) |
| 1009 | { |
| 1010 | h = hashtable[number]; |
| 1011 | if (!h) |
| 1012 | { |
| 1013 | printf ("warning: current hash-table-#%d. has no hash-control\n", number); |
| 1014 | } |
| 1015 | return; |
| 1016 | } |
| 1017 | else |
| 1018 | { |
| 1019 | printf ("invalid hash table number: %d\n", number); |
| 1020 | } |
| 1021 | } |
| 1022 | } |
| 1023 | |
| 1024 | |
| 1025 | |
| 1026 | #endif /* #ifdef TEST */ |
| 1027 | |
| 1028 | /* end of hash.c */ |