mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Prepare for backtracking.
This commit is contained in:
15
ChangeLog
15
ChangeLog
@@ -1,5 +1,20 @@
|
|||||||
2002-11-19 Bruno Haible <bruno@clisp.org>
|
2002-11-19 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
Prepare for backtracking.
|
||||||
|
* src/search.h (Search::try_asso_value, Search::change_some_asso_value):
|
||||||
|
Remove declarations.
|
||||||
|
(Search::less_collisions, Search::collision_prior_to): New declarations.
|
||||||
|
(Search::_fewest_collisions, Search::_union_set, Search::_num_done):
|
||||||
|
Remove fields.
|
||||||
|
* src/search.cc (Search::prepare_asso_values): Don't initialize
|
||||||
|
_union_set.
|
||||||
|
(Search::try_asso_value, Search::change_some_asso_value): Remove
|
||||||
|
methods.
|
||||||
|
(Search::less_collisions, Search::collision_prior_to): New methods.
|
||||||
|
(StackEntry): New class.
|
||||||
|
(Search::find_asso_values): Reorganized to use pseudo-recursion.
|
||||||
|
(Search::~Search): Don't free _union_set.
|
||||||
|
|
||||||
* src/search.h (Search::find_good_asso_values): New declaration.
|
* src/search.h (Search::find_good_asso_values): New declaration.
|
||||||
* src/search.cc: Add comments about the basic structure of the
|
* src/search.cc: Add comments about the basic structure of the
|
||||||
algorithm.
|
algorithm.
|
||||||
|
|||||||
366
src/search.cc
366
src/search.cc
@@ -771,9 +771,6 @@ Search::prepare_asso_values ()
|
|||||||
values. */
|
values. */
|
||||||
_collision_detector = new Bool_Array (_max_hash_value + 1);
|
_collision_detector = new Bool_Array (_max_hash_value + 1);
|
||||||
|
|
||||||
/* Allocate scratch set. */
|
|
||||||
_union_set = new unsigned int [2 * get_max_keysig_size ()];
|
|
||||||
|
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
|
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
|
||||||
"\nmaximum size of generated hash table is %d\n",
|
"\nmaximum size of generated hash table is %d\n",
|
||||||
@@ -894,91 +891,97 @@ Search::sort_by_occurrence (unsigned int *set, int len) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tries various other values for _asso_values[c]. A value is successful
|
/* If the recomputed hash values for the keywords from _head->first() to
|
||||||
if, with it, the recomputed hash values for the keywords from
|
curr - inclusive - give fewer than collision_bound collisions, this
|
||||||
_head->first() to curr - inclusive - give fewer than _fewest_collisions
|
collision count is returned. Otherwise some value >= collision_bound
|
||||||
collisions. Up to the given number of iterations are performed.
|
is returned.
|
||||||
If successful, _asso_values[c] is changed, _fewest_collisions is decreased,
|
|
||||||
and false is returned.
|
|
||||||
If all iterations are unsuccessful, _asso_values[c] is restored and
|
|
||||||
true is returned.
|
|
||||||
This is called very frequently, and needs to be fast! */
|
This is called very frequently, and needs to be fast! */
|
||||||
|
unsigned int
|
||||||
inline bool
|
Search::less_collisions (KeywordExt *curr, unsigned int collision_bound)
|
||||||
Search::try_asso_value (unsigned int c, KeywordExt *curr, int iterations)
|
|
||||||
{
|
{
|
||||||
int original_value = _asso_values[c];
|
unsigned int collisions = 0;
|
||||||
|
|
||||||
/* Try many valid associated values. */
|
/* Iteration Number array is a win, O(1) initialization time! */
|
||||||
for (int i = iterations - 1; i >= 0; i--)
|
_collision_detector->clear ();
|
||||||
|
|
||||||
|
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
int collisions = 0;
|
KeywordExt *keyword = ptr->first();
|
||||||
|
|
||||||
/* Try next value. Wrap around mod _asso_value_max. */
|
/* Compute new hash code for the keyword, and see whether it
|
||||||
_asso_values[c] =
|
collides with another keyword's hash code. If we have too
|
||||||
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
|
many collisions, we can safely abort the fruitless loop. */
|
||||||
& (_asso_value_max - 1);
|
if (_collision_detector->set_bit (compute_hash (keyword))
|
||||||
|
&& ++collisions >= collision_bound)
|
||||||
|
return collision_bound; /* >= collision_bound */
|
||||||
|
|
||||||
/* Iteration Number array is a win, O(1) intialization time! */
|
if (keyword == curr)
|
||||||
_collision_detector->clear ();
|
return collisions; /* < collision_bound */
|
||||||
|
|
||||||
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
|
||||||
{
|
|
||||||
KeywordExt *keyword = ptr->first();
|
|
||||||
|
|
||||||
/* Compute new hash code for the keyword, and see whether it
|
|
||||||
collides with another keyword's hash code. If we have too
|
|
||||||
many collisions, we can safely abort the fruitless loop. */
|
|
||||||
if (_collision_detector->set_bit (compute_hash (keyword))
|
|
||||||
&& ++collisions >= _fewest_collisions)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (keyword == curr)
|
|
||||||
{
|
|
||||||
_fewest_collisions = collisions;
|
|
||||||
if (option[DEBUG])
|
|
||||||
fprintf (stderr, "- resolved after %d iterations",
|
|
||||||
iterations - i);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Restore original values, no more tries. */
|
|
||||||
_asso_values[c] = original_value;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Attempts to change an _asso_value[], in order to resolve a hash value
|
/* Tests whether the given keyword has the same hash value as another one
|
||||||
collision between the two given keywords. */
|
earlier in the list. If yes, this earlier keyword is returned (more
|
||||||
|
precisely, the first one of them, but it doesn't really matter which one).
|
||||||
|
If no collision is present, NULL is returned. */
|
||||||
|
KeywordExt *
|
||||||
|
Search::collision_prior_to (KeywordExt *curr)
|
||||||
|
{
|
||||||
|
for (KeywordExt_List *prior_ptr = _head;
|
||||||
|
prior_ptr->first() != curr;
|
||||||
|
prior_ptr = prior_ptr->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *prior = prior_ptr->first();
|
||||||
|
|
||||||
|
if (prior->_hash_value == curr->_hash_value)
|
||||||
|
return prior;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finding good asso_values is normally straightforwards, but needs
|
||||||
|
backtracking in some cases. The recurse/backtrack depth can be at most
|
||||||
|
_list_len. Since we cannot assume that the C stack is large enough,
|
||||||
|
we perform the processing without recursion, and simulate the stack. */
|
||||||
|
struct StackEntry
|
||||||
|
{
|
||||||
|
/* The number of collisions so far. */
|
||||||
|
unsigned int _collisions_so_far;
|
||||||
|
|
||||||
|
/* The current keyword. */
|
||||||
|
KeywordExt * _curr;
|
||||||
|
|
||||||
|
/* The prior keyword, with which curr collides. */
|
||||||
|
KeywordExt * _prior;
|
||||||
|
|
||||||
|
/* Scratch set. */
|
||||||
|
unsigned int * _union_set;
|
||||||
|
unsigned int _union_set_length;
|
||||||
|
|
||||||
|
/* Current index into the scratch set. */
|
||||||
|
unsigned int _union_index;
|
||||||
|
|
||||||
|
/* Trying a different value for _asso_values[_c]. */
|
||||||
|
unsigned int _c;
|
||||||
|
|
||||||
|
/* The original value of _asso_values[_c]. */
|
||||||
|
unsigned int _original_asso_value;
|
||||||
|
|
||||||
|
/* Remaining number of iterations. */
|
||||||
|
int _iter;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Finds some _asso_values[] that fit. */
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
|
Search::find_asso_values ()
|
||||||
{
|
{
|
||||||
if (option[DEBUG])
|
/* Add one keyword after the other and see whether its hash value collides
|
||||||
{
|
with one of the previous hash values. If so, change some asso_values[]
|
||||||
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
|
entry until the number of collisions so far is reduced. Then continue
|
||||||
_num_done,
|
with the next keyword. */
|
||||||
prior->_allchars_length, prior->_allchars,
|
|
||||||
curr->_allchars_length, curr->_allchars,
|
|
||||||
curr->_hash_value);
|
|
||||||
fflush (stderr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* To achieve that the two hash values become different, we have to
|
init_asso_values ();
|
||||||
change an _asso_values[c] for a character c that contributes to the
|
|
||||||
hash functions of prior and curr with different multiplicity.
|
|
||||||
So we compute the set of such c. */
|
|
||||||
unsigned int *union_set = _union_set;
|
|
||||||
int union_set_length =
|
|
||||||
compute_disjoint_union (prior->_selchars, prior->_selchars_length,
|
|
||||||
curr->_selchars, curr->_selchars_length,
|
|
||||||
union_set);
|
|
||||||
|
|
||||||
/* Sort by decreasing occurrence: Try least-used characters c first.
|
|
||||||
The idea is that this reduces the number of freshly introduced
|
|
||||||
collisions. */
|
|
||||||
sort_by_occurrence (union_set, union_set_length);
|
|
||||||
|
|
||||||
int iterations =
|
int iterations =
|
||||||
!option[FAST]
|
!option[FAST]
|
||||||
@@ -987,79 +990,175 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
|
|||||||
? option.get_iterations ()
|
? option.get_iterations ()
|
||||||
: keyword_list_length ();
|
: keyword_list_length ();
|
||||||
|
|
||||||
const unsigned int *p = union_set;
|
/* Allocate stack. */
|
||||||
int i = union_set_length;
|
StackEntry *stack = new StackEntry[_list_len];
|
||||||
for (; i > 0; p++, i--)
|
{
|
||||||
if (!try_asso_value (*p, curr, iterations))
|
KeywordExt_List *ptr = _head;
|
||||||
|
for (int i = 0; i < _list_len; i++, ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
/* Good, this _asso_values[] modification reduces the number of
|
stack[i]._curr = ptr->first();
|
||||||
collisions so far.
|
stack[i]._union_set = new unsigned int [2 * get_max_keysig_size ()];
|
||||||
All keyword->_hash_value up to curr - inclusive - and
|
}
|
||||||
_fewest_collisions have been updated. */
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Current stack pointer. */
|
||||||
|
StackEntry *sp = &stack[0];
|
||||||
|
|
||||||
|
/* Local variables corresponding to *sp. */
|
||||||
|
/* The number of collisions so far. */
|
||||||
|
unsigned int collisions_so_far;
|
||||||
|
/* The current keyword. */
|
||||||
|
KeywordExt *curr;
|
||||||
|
/* The prior keyword, with which curr collides. */
|
||||||
|
KeywordExt *prior;
|
||||||
|
/* Scratch set. */
|
||||||
|
unsigned int *union_set;
|
||||||
|
unsigned int union_set_length;
|
||||||
|
/* Current index into the scratch set. */
|
||||||
|
unsigned int union_index;
|
||||||
|
/* Trying a different value for _asso_values[c]. */
|
||||||
|
unsigned int c;
|
||||||
|
/* The original value of _asso_values[c]. */
|
||||||
|
unsigned int original_asso_value;
|
||||||
|
/* Remaining number of iterations. */
|
||||||
|
int iter;
|
||||||
|
|
||||||
|
collisions_so_far = 0;
|
||||||
|
|
||||||
|
STARTOUTERLOOP:
|
||||||
|
|
||||||
|
/* Next keyword from the list. */
|
||||||
|
curr = sp->_curr;
|
||||||
|
|
||||||
|
/* Compute this keyword's hash value. */
|
||||||
|
compute_hash (curr);
|
||||||
|
|
||||||
|
/* See if it collides with a prior keyword. */
|
||||||
|
prior = collision_prior_to (curr);
|
||||||
|
|
||||||
|
if (prior != NULL)
|
||||||
|
{
|
||||||
|
collisions_so_far++;
|
||||||
|
|
||||||
|
/* Handle collision: Attempt to change an _asso_value[], in order to
|
||||||
|
resolve a hash value collision between the two given keywords. */
|
||||||
|
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
{
|
{
|
||||||
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
|
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
|
||||||
*p, p - union_set + 1, _asso_values[*p]);
|
sp - stack + 1,
|
||||||
|
prior->_allchars_length, prior->_allchars,
|
||||||
|
curr->_allchars_length, curr->_allchars,
|
||||||
|
curr->_hash_value);
|
||||||
fflush (stderr);
|
fflush (stderr);
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Failed to resolve a collision. */
|
/* To achieve that the two hash values become different, we have to
|
||||||
|
change an _asso_values[c] for a character c that contributes to the
|
||||||
|
hash functions of prior and curr with different multiplicity.
|
||||||
|
So we compute the set of such c. */
|
||||||
|
union_set = sp->_union_set;
|
||||||
|
union_set_length =
|
||||||
|
compute_disjoint_union (prior->_selchars, prior->_selchars_length,
|
||||||
|
curr->_selchars, curr->_selchars_length,
|
||||||
|
union_set);
|
||||||
|
|
||||||
/* Recompute all keyword->_hash_value up to curr - inclusive -. */
|
/* Sort by decreasing occurrence: Try least-used characters c first.
|
||||||
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
The idea is that this reduces the number of freshly introduced
|
||||||
{
|
collisions. */
|
||||||
KeywordExt* keyword = ptr->first();
|
sort_by_occurrence (union_set, union_set_length);
|
||||||
compute_hash (keyword);
|
|
||||||
if (keyword == curr)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (option[DEBUG])
|
for (union_index = 0; union_index < union_set_length; union_index++)
|
||||||
{
|
{
|
||||||
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
|
c = union_set[union_index];
|
||||||
iterations, _fewest_collisions + _total_duplicates);
|
|
||||||
fflush (stderr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finds some _asso_values[] that fit. */
|
/* Try various other values for _asso_values[c]. A value is
|
||||||
|
successful if, with it, the recomputed hash values for the
|
||||||
|
keywords from _head->first() to curr - inclusive - give fewer
|
||||||
|
than collisions_so_far collisions. Up to the given number of
|
||||||
|
iterations are performed. If successful, _asso_values[c] is
|
||||||
|
changed, collisions_so_far is decreased, and the recursion
|
||||||
|
continued. If all iterations are unsuccessful, _asso_values[c]
|
||||||
|
is restored and we backtrack, trying the next union_index. */
|
||||||
|
|
||||||
void
|
original_asso_value = _asso_values[c];
|
||||||
Search::find_asso_values ()
|
|
||||||
{
|
|
||||||
_fewest_collisions = 0;
|
|
||||||
init_asso_values ();
|
|
||||||
|
|
||||||
/* Add one keyword after the other and see whether its hash value collides
|
/* Try many valid associated values. */
|
||||||
with one of the previous hash values. */
|
for (iter = iterations; iter > 0; iter--)
|
||||||
_num_done = 1;
|
{
|
||||||
for (KeywordExt_List *curr_ptr = _head;
|
/* Try next value. Wrap around mod _asso_value_max. */
|
||||||
curr_ptr != NULL;
|
_asso_values[c] =
|
||||||
curr_ptr = curr_ptr->rest(), _num_done++)
|
(_asso_values[c] + (_jump != 0 ? _jump : rand ()))
|
||||||
{
|
& (_asso_value_max - 1);
|
||||||
KeywordExt *curr = curr_ptr->first();
|
|
||||||
|
|
||||||
/* Compute this keyword's hash value. */
|
unsigned int collisions =
|
||||||
compute_hash (curr);
|
less_collisions (curr, collisions_so_far);
|
||||||
|
if (collisions < collisions_so_far)
|
||||||
|
{
|
||||||
|
collisions_so_far = collisions;
|
||||||
|
/* Good, this _asso_values[] modification reduces the
|
||||||
|
number of collisions so far.
|
||||||
|
All keyword->_hash_value up to curr - inclusive -
|
||||||
|
have been updated. */
|
||||||
|
if (option[DEBUG])
|
||||||
|
{
|
||||||
|
fprintf (stderr, "- resolved after %d iterations by "
|
||||||
|
"changing asso_value['%c'] (char #%d) to %d\n",
|
||||||
|
iterations - iter + 1, c,
|
||||||
|
union_index + 1, _asso_values[c]);
|
||||||
|
fflush (stderr);
|
||||||
|
}
|
||||||
|
goto RECURSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* See if it collides with a prior keyword. */
|
/* Restore original values, no more tries. */
|
||||||
for (KeywordExt_List *prior_ptr = _head;
|
_asso_values[c] = original_asso_value;
|
||||||
prior_ptr != curr_ptr;
|
}
|
||||||
prior_ptr = prior_ptr->rest())
|
|
||||||
{
|
|
||||||
KeywordExt *prior = prior_ptr->first();
|
|
||||||
|
|
||||||
if (prior->_hash_value == curr->_hash_value)
|
/* Failed to resolve a collision. */
|
||||||
{
|
|
||||||
_fewest_collisions++;
|
/* Recompute all keyword->_hash_value up to curr - inclusive -. */
|
||||||
/* Handle collision. */
|
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
||||||
change_some_asso_value (prior, curr);
|
{
|
||||||
|
KeywordExt* keyword = ptr->first();
|
||||||
|
compute_hash (keyword);
|
||||||
|
if (keyword == curr)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
if (option[DEBUG])
|
||||||
|
{
|
||||||
|
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
|
||||||
|
iterations, collisions_so_far + _total_duplicates);
|
||||||
|
fflush (stderr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RECURSE:
|
||||||
|
sp->_collisions_so_far = collisions_so_far;
|
||||||
|
/*sp->_curr = curr;*/ // redundant
|
||||||
|
sp->_prior = prior;
|
||||||
|
/*sp->_union_set = union_set;*/ // redundant
|
||||||
|
sp->_union_set_length = union_set_length;
|
||||||
|
sp->_union_index = union_index;
|
||||||
|
sp->_c = c;
|
||||||
|
sp->_original_asso_value = original_asso_value;
|
||||||
|
sp->_iter = iter;
|
||||||
|
sp++;
|
||||||
|
if (sp - stack < _list_len)
|
||||||
|
{
|
||||||
|
/*collisions_so_far = sp[-1]._collisions_so_far;*/ // redundant
|
||||||
|
goto STARTOUTERLOOP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deallocate stack. */
|
||||||
|
{
|
||||||
|
for (int i = 0; i < _list_len; i++)
|
||||||
|
delete[] stack[i]._union_set;
|
||||||
|
}
|
||||||
|
delete[] stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finds good _asso_values[]. */
|
/* Finds good _asso_values[]. */
|
||||||
@@ -1210,7 +1309,6 @@ Search::optimize ()
|
|||||||
|
|
||||||
Search::~Search ()
|
Search::~Search ()
|
||||||
{
|
{
|
||||||
delete[] _union_set;
|
|
||||||
delete _collision_detector;
|
delete _collision_detector;
|
||||||
delete[] _determined;
|
delete[] _determined;
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
|
|||||||
16
src/search.h
16
src/search.h
@@ -93,12 +93,9 @@ private:
|
|||||||
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
||||||
void sort_by_occurrence (unsigned int *set, int len) const;
|
void sort_by_occurrence (unsigned int *set, int len) const;
|
||||||
|
|
||||||
/* Tries various other values for _asso_values[c]. */
|
unsigned int less_collisions (KeywordExt *curr, unsigned int collision_bound);
|
||||||
bool try_asso_value (unsigned int c, KeywordExt *curr, int iterations);
|
|
||||||
|
|
||||||
/* Attempts to change an _asso_value[], in order to resolve a hash value
|
KeywordExt * collision_prior_to (KeywordExt *curr);
|
||||||
collision between the two given keywords. */
|
|
||||||
void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
|
|
||||||
|
|
||||||
/* Finds some _asso_values[] that fit. */
|
/* Finds some _asso_values[] that fit. */
|
||||||
void find_asso_values ();
|
void find_asso_values ();
|
||||||
@@ -164,15 +161,6 @@ private:
|
|||||||
|
|
||||||
/* Sparse bit vector for collision detection. */
|
/* Sparse bit vector for collision detection. */
|
||||||
Bool_Array * _collision_detector;
|
Bool_Array * _collision_detector;
|
||||||
|
|
||||||
/* Minimal number of collisions found so far. */
|
|
||||||
int _fewest_collisions;
|
|
||||||
|
|
||||||
/* Scratch set, used during Search::change_some_asso_value. */
|
|
||||||
unsigned int * _union_set;
|
|
||||||
|
|
||||||
/* Number of keyword being handled during Search::find_asso_values. */
|
|
||||||
int _num_done;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user