1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Prepare for backtracking.

This commit is contained in:
Bruno Haible
2003-02-20 12:21:17 +00:00
parent 1d73fbe019
commit f1da37e04b
3 changed files with 249 additions and 148 deletions

View File

@@ -1,5 +1,20 @@
2002-11-19 Bruno Haible <bruno@clisp.org>
Prepare for backtracking.
* src/search.h (Search::try_asso_value, Search::change_some_asso_value):
Remove declarations.
(Search::less_collisions, Search::collision_prior_to): New declarations.
(Search::_fewest_collisions, Search::_union_set, Search::_num_done):
Remove fields.
* src/search.cc (Search::prepare_asso_values): Don't initialize
_union_set.
(Search::try_asso_value, Search::change_some_asso_value): Remove
methods.
(Search::less_collisions, Search::collision_prior_to): New methods.
(StackEntry): New class.
(Search::find_asso_values): Reorganized to use pseudo-recursion.
(Search::~Search): Don't free _union_set.
* src/search.h (Search::find_good_asso_values): New declaration.
* src/search.cc: Add comments about the basic structure of the
algorithm.

View File

@@ -771,9 +771,6 @@ Search::prepare_asso_values ()
values. */
_collision_detector = new Bool_Array (_max_hash_value + 1);
/* Allocate scratch set. */
_union_set = new unsigned int [2 * get_max_keysig_size ()];
if (option[DEBUG])
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
"\nmaximum size of generated hash table is %d\n",
@@ -894,32 +891,17 @@ Search::sort_by_occurrence (unsigned int *set, int len) const
}
}
/* Tries various other values for _asso_values[c]. A value is successful
if, with it, the recomputed hash values for the keywords from
_head->first() to curr - inclusive - give fewer than _fewest_collisions
collisions. Up to the given number of iterations are performed.
If successful, _asso_values[c] is changed, _fewest_collisions is decreased,
and false is returned.
If all iterations are unsuccessful, _asso_values[c] is restored and
true is returned.
/* If the recomputed hash values for the keywords from _head->first() to
curr - inclusive - give fewer than collision_bound collisions, this
collision count is returned. Otherwise some value >= collision_bound
is returned.
This is called very frequently, and needs to be fast! */
inline bool
Search::try_asso_value (unsigned int c, KeywordExt *curr, int iterations)
unsigned int
Search::less_collisions (KeywordExt *curr, unsigned int collision_bound)
{
int original_value = _asso_values[c];
unsigned int collisions = 0;
/* Try many valid associated values. */
for (int i = iterations - 1; i >= 0; i--)
{
int collisions = 0;
/* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] =
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1);
/* Iteration Number array is a win, O(1) intialization time! */
/* Iteration Number array is a win, O(1) initialization time! */
_collision_detector->clear ();
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
@@ -930,35 +912,142 @@ Search::try_asso_value (unsigned int c, KeywordExt *curr, int iterations)
collides with another keyword's hash code. If we have too
many collisions, we can safely abort the fruitless loop. */
if (_collision_detector->set_bit (compute_hash (keyword))
&& ++collisions >= _fewest_collisions)
break;
&& ++collisions >= collision_bound)
return collision_bound; /* >= collision_bound */
if (keyword == curr)
return collisions; /* < collision_bound */
}
}
/* Tests whether the given keyword has the same hash value as another one
earlier in the list. If yes, this earlier keyword is returned (more
precisely, the first one of them, but it doesn't really matter which one).
If no collision is present, NULL is returned. */
KeywordExt *
Search::collision_prior_to (KeywordExt *curr)
{
_fewest_collisions = collisions;
if (option[DEBUG])
fprintf (stderr, "- resolved after %d iterations",
iterations - i);
return false;
}
for (KeywordExt_List *prior_ptr = _head;
prior_ptr->first() != curr;
prior_ptr = prior_ptr->rest())
{
KeywordExt *prior = prior_ptr->first();
if (prior->_hash_value == curr->_hash_value)
return prior;
}
return NULL;
}
/* Restore original values, no more tries. */
_asso_values[c] = original_value;
return true;
}
/* Finding good asso_values is normally straightforwards, but needs
backtracking in some cases. The recurse/backtrack depth can be at most
_list_len. Since we cannot assume that the C stack is large enough,
we perform the processing without recursion, and simulate the stack. */
struct StackEntry
{
/* The number of collisions so far. */
unsigned int _collisions_so_far;
/* Attempts to change an _asso_value[], in order to resolve a hash value
collision between the two given keywords. */
/* The current keyword. */
KeywordExt * _curr;
/* The prior keyword, with which curr collides. */
KeywordExt * _prior;
/* Scratch set. */
unsigned int * _union_set;
unsigned int _union_set_length;
/* Current index into the scratch set. */
unsigned int _union_index;
/* Trying a different value for _asso_values[_c]. */
unsigned int _c;
/* The original value of _asso_values[_c]. */
unsigned int _original_asso_value;
/* Remaining number of iterations. */
int _iter;
};
/* Finds some _asso_values[] that fit. */
void
Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
Search::find_asso_values ()
{
/* Add one keyword after the other and see whether its hash value collides
with one of the previous hash values. If so, change some asso_values[]
entry until the number of collisions so far is reduced. Then continue
with the next keyword. */
init_asso_values ();
int iterations =
!option[FAST]
? _asso_value_max /* Try all possible values of _asso_values[c]. */
: option.get_iterations ()
? option.get_iterations ()
: keyword_list_length ();
/* Allocate stack. */
StackEntry *stack = new StackEntry[_list_len];
{
KeywordExt_List *ptr = _head;
for (int i = 0; i < _list_len; i++, ptr = ptr->rest())
{
stack[i]._curr = ptr->first();
stack[i]._union_set = new unsigned int [2 * get_max_keysig_size ()];
}
}
{
/* Current stack pointer. */
StackEntry *sp = &stack[0];
/* Local variables corresponding to *sp. */
/* The number of collisions so far. */
unsigned int collisions_so_far;
/* The current keyword. */
KeywordExt *curr;
/* The prior keyword, with which curr collides. */
KeywordExt *prior;
/* Scratch set. */
unsigned int *union_set;
unsigned int union_set_length;
/* Current index into the scratch set. */
unsigned int union_index;
/* Trying a different value for _asso_values[c]. */
unsigned int c;
/* The original value of _asso_values[c]. */
unsigned int original_asso_value;
/* Remaining number of iterations. */
int iter;
collisions_so_far = 0;
STARTOUTERLOOP:
/* Next keyword from the list. */
curr = sp->_curr;
/* Compute this keyword's hash value. */
compute_hash (curr);
/* See if it collides with a prior keyword. */
prior = collision_prior_to (curr);
if (prior != NULL)
{
collisions_so_far++;
/* Handle collision: Attempt to change an _asso_value[], in order to
resolve a hash value collision between the two given keywords. */
if (option[DEBUG])
{
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
_num_done,
sp - stack + 1,
prior->_allchars_length, prior->_allchars,
curr->_allchars_length, curr->_allchars,
curr->_hash_value);
@@ -969,8 +1058,8 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
change an _asso_values[c] for a character c that contributes to the
hash functions of prior and curr with different multiplicity.
So we compute the set of such c. */
unsigned int *union_set = _union_set;
int union_set_length =
union_set = sp->_union_set;
union_set_length =
compute_disjoint_union (prior->_selchars, prior->_selchars_length,
curr->_selchars, curr->_selchars_length,
union_set);
@@ -980,29 +1069,52 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
collisions. */
sort_by_occurrence (union_set, union_set_length);
int iterations =
!option[FAST]
? _asso_value_max /* Try all possible values of _asso_values[c]. */
: option.get_iterations ()
? option.get_iterations ()
: keyword_list_length ();
const unsigned int *p = union_set;
int i = union_set_length;
for (; i > 0; p++, i--)
if (!try_asso_value (*p, curr, iterations))
for (union_index = 0; union_index < union_set_length; union_index++)
{
/* Good, this _asso_values[] modification reduces the number of
collisions so far.
All keyword->_hash_value up to curr - inclusive - and
_fewest_collisions have been updated. */
c = union_set[union_index];
/* Try various other values for _asso_values[c]. A value is
successful if, with it, the recomputed hash values for the
keywords from _head->first() to curr - inclusive - give fewer
than collisions_so_far collisions. Up to the given number of
iterations are performed. If successful, _asso_values[c] is
changed, collisions_so_far is decreased, and the recursion
continued. If all iterations are unsuccessful, _asso_values[c]
is restored and we backtrack, trying the next union_index. */
original_asso_value = _asso_values[c];
/* Try many valid associated values. */
for (iter = iterations; iter > 0; iter--)
{
/* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] =
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1);
unsigned int collisions =
less_collisions (curr, collisions_so_far);
if (collisions < collisions_so_far)
{
collisions_so_far = collisions;
/* Good, this _asso_values[] modification reduces the
number of collisions so far.
All keyword->_hash_value up to curr - inclusive -
have been updated. */
if (option[DEBUG])
{
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
*p, p - union_set + 1, _asso_values[*p]);
fprintf (stderr, "- resolved after %d iterations by "
"changing asso_value['%c'] (char #%d) to %d\n",
iterations - iter + 1, c,
union_index + 1, _asso_values[c]);
fflush (stderr);
}
return;
goto RECURSE;
}
}
/* Restore original values, no more tries. */
_asso_values[c] = original_asso_value;
}
/* Failed to resolve a collision. */
@@ -1019,47 +1131,34 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
if (option[DEBUG])
{
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
iterations, _fewest_collisions + _total_duplicates);
iterations, collisions_so_far + _total_duplicates);
fflush (stderr);
}
}
/* Finds some _asso_values[] that fit. */
void
Search::find_asso_values ()
RECURSE:
sp->_collisions_so_far = collisions_so_far;
/*sp->_curr = curr;*/ // redundant
sp->_prior = prior;
/*sp->_union_set = union_set;*/ // redundant
sp->_union_set_length = union_set_length;
sp->_union_index = union_index;
sp->_c = c;
sp->_original_asso_value = original_asso_value;
sp->_iter = iter;
sp++;
if (sp - stack < _list_len)
{
_fewest_collisions = 0;
init_asso_values ();
/* Add one keyword after the other and see whether its hash value collides
with one of the previous hash values. */
_num_done = 1;
for (KeywordExt_List *curr_ptr = _head;
curr_ptr != NULL;
curr_ptr = curr_ptr->rest(), _num_done++)
{
KeywordExt *curr = curr_ptr->first();
/* Compute this keyword's hash value. */
compute_hash (curr);
/* See if it collides with a prior keyword. */
for (KeywordExt_List *prior_ptr = _head;
prior_ptr != curr_ptr;
prior_ptr = prior_ptr->rest())
{
KeywordExt *prior = prior_ptr->first();
if (prior->_hash_value == curr->_hash_value)
{
_fewest_collisions++;
/* Handle collision. */
change_some_asso_value (prior, curr);
break;
/*collisions_so_far = sp[-1]._collisions_so_far;*/ // redundant
goto STARTOUTERLOOP;
}
}
/* Deallocate stack. */
{
for (int i = 0; i < _list_len; i++)
delete[] stack[i]._union_set;
}
delete[] stack;
}
/* Finds good _asso_values[]. */
@@ -1210,7 +1309,6 @@ Search::optimize ()
Search::~Search ()
{
delete[] _union_set;
delete _collision_detector;
delete[] _determined;
if (option[DEBUG])

View File

@@ -93,12 +93,9 @@ private:
/* Sorts the given set in increasing frequency of _occurrences[]. */
void sort_by_occurrence (unsigned int *set, int len) const;
/* Tries various other values for _asso_values[c]. */
bool try_asso_value (unsigned int c, KeywordExt *curr, int iterations);
unsigned int less_collisions (KeywordExt *curr, unsigned int collision_bound);
/* Attempts to change an _asso_value[], in order to resolve a hash value
collision between the two given keywords. */
void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
KeywordExt * collision_prior_to (KeywordExt *curr);
/* Finds some _asso_values[] that fit. */
void find_asso_values ();
@@ -164,15 +161,6 @@ private:
/* Sparse bit vector for collision detection. */
Bool_Array * _collision_detector;
/* Minimal number of collisions found so far. */
int _fewest_collisions;
/* Scratch set, used during Search::change_some_asso_value. */
unsigned int * _union_set;
/* Number of keyword being handled during Search::find_asso_values. */
int _num_done;
};
#endif