1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Prepare for backtracking.

This commit is contained in:
Bruno Haible
2003-02-20 12:21:17 +00:00
parent 1d73fbe019
commit f1da37e04b
3 changed files with 249 additions and 148 deletions

View File

@@ -1,5 +1,20 @@
2002-11-19 Bruno Haible <bruno@clisp.org>
Prepare for backtracking.
* src/search.h (Search::try_asso_value, Search::change_some_asso_value):
Remove declarations.
(Search::less_collisions, Search::collision_prior_to): New declarations.
(Search::_fewest_collisions, Search::_union_set, Search::_num_done):
Remove fields.
* src/search.cc (Search::prepare_asso_values): Don't initialize
_union_set.
(Search::try_asso_value, Search::change_some_asso_value): Remove
methods.
(Search::less_collisions, Search::collision_prior_to): New methods.
(StackEntry): New class.
(Search::find_asso_values): Reorganized to use pseudo-recursion.
(Search::~Search): Don't free _union_set.
* src/search.h (Search::find_good_asso_values): New declaration.
* src/search.cc: Add comments about the basic structure of the
algorithm.

View File

@@ -771,9 +771,6 @@ Search::prepare_asso_values ()
values. */
_collision_detector = new Bool_Array (_max_hash_value + 1);
/* Allocate scratch set. */
_union_set = new unsigned int [2 * get_max_keysig_size ()];
if (option[DEBUG])
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
"\nmaximum size of generated hash table is %d\n",
@@ -894,91 +891,97 @@ Search::sort_by_occurrence (unsigned int *set, int len) const
}
}
/* Tries various other values for _asso_values[c]. A value is successful
if, with it, the recomputed hash values for the keywords from
_head->first() to curr - inclusive - give fewer than _fewest_collisions
collisions. Up to the given number of iterations are performed.
If successful, _asso_values[c] is changed, _fewest_collisions is decreased,
and false is returned.
If all iterations are unsuccessful, _asso_values[c] is restored and
true is returned.
/* If the recomputed hash values for the keywords from _head->first() to
curr - inclusive - give fewer than collision_bound collisions, this
collision count is returned. Otherwise some value >= collision_bound
is returned.
This is called very frequently, and needs to be fast! */
inline bool
Search::try_asso_value (unsigned int c, KeywordExt *curr, int iterations)
unsigned int
Search::less_collisions (KeywordExt *curr, unsigned int collision_bound)
{
int original_value = _asso_values[c];
unsigned int collisions = 0;
/* Try many valid associated values. */
for (int i = iterations - 1; i >= 0; i--)
/* Iteration Number array is a win, O(1) initialization time! */
_collision_detector->clear ();
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{
int collisions = 0;
KeywordExt *keyword = ptr->first();
/* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] =
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1);
/* Compute new hash code for the keyword, and see whether it
collides with another keyword's hash code. If we have too
many collisions, we can safely abort the fruitless loop. */
if (_collision_detector->set_bit (compute_hash (keyword))
&& ++collisions >= collision_bound)
return collision_bound; /* >= collision_bound */
/* Iteration Number array is a win, O(1) intialization time! */
_collision_detector->clear ();
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{
KeywordExt *keyword = ptr->first();
/* Compute new hash code for the keyword, and see whether it
collides with another keyword's hash code. If we have too
many collisions, we can safely abort the fruitless loop. */
if (_collision_detector->set_bit (compute_hash (keyword))
&& ++collisions >= _fewest_collisions)
break;
if (keyword == curr)
{
_fewest_collisions = collisions;
if (option[DEBUG])
fprintf (stderr, "- resolved after %d iterations",
iterations - i);
return false;
}
}
if (keyword == curr)
return collisions; /* < collision_bound */
}
/* Restore original values, no more tries. */
_asso_values[c] = original_value;
return true;
}
/* Attempts to change an _asso_value[], in order to resolve a hash value
collision between the two given keywords. */
/* Tests whether the given keyword has the same hash value as another one
earlier in the list. If yes, this earlier keyword is returned (more
precisely, the first one of them, but it doesn't really matter which one).
If no collision is present, NULL is returned. */
KeywordExt *
Search::collision_prior_to (KeywordExt *curr)
{
for (KeywordExt_List *prior_ptr = _head;
prior_ptr->first() != curr;
prior_ptr = prior_ptr->rest())
{
KeywordExt *prior = prior_ptr->first();
if (prior->_hash_value == curr->_hash_value)
return prior;
}
return NULL;
}
/* Finding good asso_values is normally straightforwards, but needs
backtracking in some cases. The recurse/backtrack depth can be at most
_list_len. Since we cannot assume that the C stack is large enough,
we perform the processing without recursion, and simulate the stack. */
struct StackEntry
{
/* The number of collisions so far. */
unsigned int _collisions_so_far;
/* The current keyword. */
KeywordExt * _curr;
/* The prior keyword, with which curr collides. */
KeywordExt * _prior;
/* Scratch set. */
unsigned int * _union_set;
unsigned int _union_set_length;
/* Current index into the scratch set. */
unsigned int _union_index;
/* Trying a different value for _asso_values[_c]. */
unsigned int _c;
/* The original value of _asso_values[_c]. */
unsigned int _original_asso_value;
/* Remaining number of iterations. */
int _iter;
};
/* Finds some _asso_values[] that fit. */
void
Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
Search::find_asso_values ()
{
if (option[DEBUG])
{
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
_num_done,
prior->_allchars_length, prior->_allchars,
curr->_allchars_length, curr->_allchars,
curr->_hash_value);
fflush (stderr);
}
/* Add one keyword after the other and see whether its hash value collides
with one of the previous hash values. If so, change some asso_values[]
entry until the number of collisions so far is reduced. Then continue
with the next keyword. */
/* To achieve that the two hash values become different, we have to
change an _asso_values[c] for a character c that contributes to the
hash functions of prior and curr with different multiplicity.
So we compute the set of such c. */
unsigned int *union_set = _union_set;
int union_set_length =
compute_disjoint_union (prior->_selchars, prior->_selchars_length,
curr->_selchars, curr->_selchars_length,
union_set);
/* Sort by decreasing occurrence: Try least-used characters c first.
The idea is that this reduces the number of freshly introduced
collisions. */
sort_by_occurrence (union_set, union_set_length);
init_asso_values ();
int iterations =
!option[FAST]
@@ -987,79 +990,175 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
? option.get_iterations ()
: keyword_list_length ();
const unsigned int *p = union_set;
int i = union_set_length;
for (; i > 0; p++, i--)
if (!try_asso_value (*p, curr, iterations))
/* Allocate stack. */
StackEntry *stack = new StackEntry[_list_len];
{
KeywordExt_List *ptr = _head;
for (int i = 0; i < _list_len; i++, ptr = ptr->rest())
{
/* Good, this _asso_values[] modification reduces the number of
collisions so far.
All keyword->_hash_value up to curr - inclusive - and
_fewest_collisions have been updated. */
stack[i]._curr = ptr->first();
stack[i]._union_set = new unsigned int [2 * get_max_keysig_size ()];
}
}
{
/* Current stack pointer. */
StackEntry *sp = &stack[0];
/* Local variables corresponding to *sp. */
/* The number of collisions so far. */
unsigned int collisions_so_far;
/* The current keyword. */
KeywordExt *curr;
/* The prior keyword, with which curr collides. */
KeywordExt *prior;
/* Scratch set. */
unsigned int *union_set;
unsigned int union_set_length;
/* Current index into the scratch set. */
unsigned int union_index;
/* Trying a different value for _asso_values[c]. */
unsigned int c;
/* The original value of _asso_values[c]. */
unsigned int original_asso_value;
/* Remaining number of iterations. */
int iter;
collisions_so_far = 0;
STARTOUTERLOOP:
/* Next keyword from the list. */
curr = sp->_curr;
/* Compute this keyword's hash value. */
compute_hash (curr);
/* See if it collides with a prior keyword. */
prior = collision_prior_to (curr);
if (prior != NULL)
{
collisions_so_far++;
/* Handle collision: Attempt to change an _asso_value[], in order to
resolve a hash value collision between the two given keywords. */
if (option[DEBUG])
{
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
*p, p - union_set + 1, _asso_values[*p]);
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
sp - stack + 1,
prior->_allchars_length, prior->_allchars,
curr->_allchars_length, curr->_allchars,
curr->_hash_value);
fflush (stderr);
}
return;
}
/* Failed to resolve a collision. */
/* To achieve that the two hash values become different, we have to
change an _asso_values[c] for a character c that contributes to the
hash functions of prior and curr with different multiplicity.
So we compute the set of such c. */
union_set = sp->_union_set;
union_set_length =
compute_disjoint_union (prior->_selchars, prior->_selchars_length,
curr->_selchars, curr->_selchars_length,
union_set);
/* Recompute all keyword->_hash_value up to curr - inclusive -. */
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{
KeywordExt* keyword = ptr->first();
compute_hash (keyword);
if (keyword == curr)
break;
}
/* Sort by decreasing occurrence: Try least-used characters c first.
The idea is that this reduces the number of freshly introduced
collisions. */
sort_by_occurrence (union_set, union_set_length);
if (option[DEBUG])
{
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
iterations, _fewest_collisions + _total_duplicates);
fflush (stderr);
}
}
for (union_index = 0; union_index < union_set_length; union_index++)
{
c = union_set[union_index];
/* Finds some _asso_values[] that fit. */
/* Try various other values for _asso_values[c]. A value is
successful if, with it, the recomputed hash values for the
keywords from _head->first() to curr - inclusive - give fewer
than collisions_so_far collisions. Up to the given number of
iterations are performed. If successful, _asso_values[c] is
changed, collisions_so_far is decreased, and the recursion
continued. If all iterations are unsuccessful, _asso_values[c]
is restored and we backtrack, trying the next union_index. */
void
Search::find_asso_values ()
{
_fewest_collisions = 0;
init_asso_values ();
original_asso_value = _asso_values[c];
/* Add one keyword after the other and see whether its hash value collides
with one of the previous hash values. */
_num_done = 1;
for (KeywordExt_List *curr_ptr = _head;
curr_ptr != NULL;
curr_ptr = curr_ptr->rest(), _num_done++)
{
KeywordExt *curr = curr_ptr->first();
/* Try many valid associated values. */
for (iter = iterations; iter > 0; iter--)
{
/* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] =
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1);
/* Compute this keyword's hash value. */
compute_hash (curr);
unsigned int collisions =
less_collisions (curr, collisions_so_far);
if (collisions < collisions_so_far)
{
collisions_so_far = collisions;
/* Good, this _asso_values[] modification reduces the
number of collisions so far.
All keyword->_hash_value up to curr - inclusive -
have been updated. */
if (option[DEBUG])
{
fprintf (stderr, "- resolved after %d iterations by "
"changing asso_value['%c'] (char #%d) to %d\n",
iterations - iter + 1, c,
union_index + 1, _asso_values[c]);
fflush (stderr);
}
goto RECURSE;
}
}
/* See if it collides with a prior keyword. */
for (KeywordExt_List *prior_ptr = _head;
prior_ptr != curr_ptr;
prior_ptr = prior_ptr->rest())
{
KeywordExt *prior = prior_ptr->first();
/* Restore original values, no more tries. */
_asso_values[c] = original_asso_value;
}
if (prior->_hash_value == curr->_hash_value)
{
_fewest_collisions++;
/* Handle collision. */
change_some_asso_value (prior, curr);
/* Failed to resolve a collision. */
/* Recompute all keyword->_hash_value up to curr - inclusive -. */
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
{
KeywordExt* keyword = ptr->first();
compute_hash (keyword);
if (keyword == curr)
break;
}
}
}
}
if (option[DEBUG])
{
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
iterations, collisions_so_far + _total_duplicates);
fflush (stderr);
}
}
RECURSE:
sp->_collisions_so_far = collisions_so_far;
/*sp->_curr = curr;*/ // redundant
sp->_prior = prior;
/*sp->_union_set = union_set;*/ // redundant
sp->_union_set_length = union_set_length;
sp->_union_index = union_index;
sp->_c = c;
sp->_original_asso_value = original_asso_value;
sp->_iter = iter;
sp++;
if (sp - stack < _list_len)
{
/*collisions_so_far = sp[-1]._collisions_so_far;*/ // redundant
goto STARTOUTERLOOP;
}
}
/* Deallocate stack. */
{
for (int i = 0; i < _list_len; i++)
delete[] stack[i]._union_set;
}
delete[] stack;
}
/* Finds good _asso_values[]. */
@@ -1210,7 +1309,6 @@ Search::optimize ()
Search::~Search ()
{
delete[] _union_set;
delete _collision_detector;
delete[] _determined;
if (option[DEBUG])

View File

@@ -93,12 +93,9 @@ private:
/* Sorts the given set in increasing frequency of _occurrences[]. */
void sort_by_occurrence (unsigned int *set, int len) const;
/* Tries various other values for _asso_values[c]. */
bool try_asso_value (unsigned int c, KeywordExt *curr, int iterations);
unsigned int less_collisions (KeywordExt *curr, unsigned int collision_bound);
/* Attempts to change an _asso_value[], in order to resolve a hash value
collision between the two given keywords. */
void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
KeywordExt * collision_prior_to (KeywordExt *curr);
/* Finds some _asso_values[] that fit. */
void find_asso_values ();
@@ -164,15 +161,6 @@ private:
/* Sparse bit vector for collision detection. */
Bool_Array * _collision_detector;
/* Minimal number of collisions found so far. */
int _fewest_collisions;
/* Scratch set, used during Search::change_some_asso_value. */
unsigned int * _union_set;
/* Number of keyword being handled during Search::find_asso_values. */
int _num_done;
};
#endif