mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Restructure the asso_values[] searching code.
This commit is contained in:
16
ChangeLog
16
ChangeLog
@@ -1,5 +1,21 @@
|
|||||||
2002-11-03 Bruno Haible <bruno@clisp.org>
|
2002-11-03 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
* src/search.h (Search::init_asso_values, Search::find_asso_values):
|
||||||
|
New declarations.
|
||||||
|
(Search::try_asso_value): Renamed from Search::affects_prev.
|
||||||
|
(Search::change_some_asso_value): Renamed from Search::change.
|
||||||
|
(Search::set_asso_max, Search::get_asso_max): Remove methods.
|
||||||
|
(Search::_union_set): New field.
|
||||||
|
* src/search.cc (Search::init_asso_values): New method, extracted
|
||||||
|
from Search::optimize.
|
||||||
|
(Search::try_asso_value): Renamed from Search::affects_prev. Take the
|
||||||
|
iteration count as argument.
|
||||||
|
(Search::change_some_asso_value): Renamed from Search::change. Don't
|
||||||
|
make union_set static. Don't increment _fewest_collisions here.
|
||||||
|
(Search::find_asso_values): New method, extracted from
|
||||||
|
Search::optimize.
|
||||||
|
(Search::optimize); Update.
|
||||||
|
|
||||||
* src/search.h (Search::compute_hash): Renamed from Search::hash.
|
* src/search.h (Search::compute_hash): Renamed from Search::hash.
|
||||||
(Search::compute_disjoint_union): Remove declaration.
|
(Search::compute_disjoint_union): Remove declaration.
|
||||||
(Search::sort_by_occurrence): Renamed from Search::sort_set.
|
(Search::sort_by_occurrence): Renamed from Search::sort_set.
|
||||||
|
|||||||
277
src/search.cc
277
src/search.cc
@@ -31,8 +31,7 @@
|
|||||||
#include "options.h"
|
#include "options.h"
|
||||||
#include "hash-table.h"
|
#include "hash-table.h"
|
||||||
|
|
||||||
/* Efficiently returns the least power of two greater than or equal to X! */
|
/* -------------------- Initialization and Preparation --------------------- */
|
||||||
#define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X)))
|
|
||||||
|
|
||||||
Search::Search (KeywordExt_List *list)
|
Search::Search (KeywordExt_List *list)
|
||||||
: _head (list),
|
: _head (list),
|
||||||
@@ -41,7 +40,6 @@ Search::Search (KeywordExt_List *list)
|
|||||||
_asso_values (new int[_alpha_size]),
|
_asso_values (new int[_alpha_size]),
|
||||||
_determined (new bool[_alpha_size])
|
_determined (new bool[_alpha_size])
|
||||||
{
|
{
|
||||||
memset (_asso_values, 0, _alpha_size * sizeof (_asso_values[0]));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -154,6 +152,8 @@ Search::prepare ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------- Sorting the Keyword list ------------------------ */
|
||||||
|
|
||||||
/* Merges two sorted lists together to form one sorted list.
|
/* Merges two sorted lists together to form one sorted list.
|
||||||
The sorting criterion depends on which of _occurrence_sort and _hash_sort
|
The sorting criterion depends on which of _occurrence_sort and _hash_sort
|
||||||
is set to true. This is a kludge, but permits nice sharing of almost
|
is set to true. This is a kludge, but permits nice sharing of almost
|
||||||
@@ -230,6 +230,8 @@ Search::merge_sort (KeywordExt_List *head)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ---------------- Reordering the Keyword list (optional) ----------------- */
|
||||||
|
|
||||||
/* Computes the sum of occurrences of the _selchars of a keyword.
|
/* Computes the sum of occurrences of the _selchars of a keyword.
|
||||||
This is a kind of correlation measure: Keywords which have many
|
This is a kind of correlation measure: Keywords which have many
|
||||||
selected characters in common with other keywords have a high
|
selected characters in common with other keywords have a high
|
||||||
@@ -356,6 +358,8 @@ Search::reorder ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
/* Returns the length of keyword list. */
|
/* Returns the length of keyword list. */
|
||||||
|
|
||||||
int
|
int
|
||||||
@@ -380,6 +384,71 @@ Search::get_max_keysig_size ()
|
|||||||
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
|
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ---------------------- Finding good asso_values[] ----------------------- */
|
||||||
|
|
||||||
|
/* Initializes the asso_values[] related parameters and put a first guess
|
||||||
|
into asso_values[]. */
|
||||||
|
|
||||||
|
void
|
||||||
|
Search::init_asso_values ()
|
||||||
|
{
|
||||||
|
int size_multiple = option.get_size_multiple ();
|
||||||
|
int non_linked_length = keyword_list_length ();
|
||||||
|
int asso_value_max;
|
||||||
|
|
||||||
|
if (size_multiple == 0)
|
||||||
|
asso_value_max = non_linked_length;
|
||||||
|
else if (size_multiple > 0)
|
||||||
|
asso_value_max = non_linked_length * size_multiple;
|
||||||
|
else /* if (size_multiple < 0) */
|
||||||
|
asso_value_max = non_linked_length / -size_multiple;
|
||||||
|
/* Round up to the next power of two. This makes it easy to ensure
|
||||||
|
an _asso_value[c] is >= 0 and < asso_value_max. Also, the jump value
|
||||||
|
being odd, it guarantees that Search::try_asso_value() will iterate
|
||||||
|
through different values for _asso_value[c]. */
|
||||||
|
if (asso_value_max == 0)
|
||||||
|
asso_value_max = 1;
|
||||||
|
asso_value_max |= asso_value_max >> 1;
|
||||||
|
asso_value_max |= asso_value_max >> 2;
|
||||||
|
asso_value_max |= asso_value_max >> 4;
|
||||||
|
asso_value_max |= asso_value_max >> 8;
|
||||||
|
asso_value_max |= asso_value_max >> 16;
|
||||||
|
asso_value_max++;
|
||||||
|
_asso_value_max = asso_value_max;
|
||||||
|
|
||||||
|
if (option[RANDOM])
|
||||||
|
{
|
||||||
|
srand (reinterpret_cast<long>(time (0)));
|
||||||
|
|
||||||
|
for (int i = 0; i < _alpha_size; i++)
|
||||||
|
_asso_values[i] = rand () & (asso_value_max - 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int asso_value = option.get_initial_asso_value ();
|
||||||
|
|
||||||
|
asso_value = asso_value & (_asso_value_max - 1);
|
||||||
|
for (int i = 0; i < _alpha_size; i++)
|
||||||
|
_asso_values[i] = asso_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Given the bound for _asso_values[c], we have a bound for the possible
|
||||||
|
hash values, as computed in compute_hash(). */
|
||||||
|
_max_hash_value = (option[NOLENGTH] ? 0 : max_key_length ())
|
||||||
|
+ (_asso_value_max - 1) * get_max_keysig_size ();
|
||||||
|
/* Allocate a sparse bit vector for detection of collisions of hash
|
||||||
|
values. */
|
||||||
|
_collision_detector = new Bool_Array (_max_hash_value + 1);
|
||||||
|
|
||||||
|
/* Allocate scratch set. */
|
||||||
|
_union_set = new unsigned char [2 * get_max_keysig_size ()];
|
||||||
|
|
||||||
|
if (option[DEBUG])
|
||||||
|
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
|
||||||
|
"\nmaximum size of generated hash table is %d\n",
|
||||||
|
non_linked_length, asso_value_max, _max_hash_value);
|
||||||
|
}
|
||||||
|
|
||||||
/* Computes a keyword's hash value, relative to the current _asso_values[],
|
/* Computes a keyword's hash value, relative to the current _asso_values[],
|
||||||
and stores it in keyword->_hash_value.
|
and stores it in keyword->_hash_value.
|
||||||
This is called very frequently, and needs to be fast! */
|
This is called very frequently, and needs to be fast! */
|
||||||
@@ -467,68 +536,67 @@ Search::sort_by_occurrence (unsigned char *set, int len)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find out how character value change affects successfully hashed items.
|
/* Tries various other values for _asso_values[c]. A value is successful
|
||||||
Returns FALSE if no other hash values are affected, else returns TRUE.
|
if, with it, the recomputed hash values for the keywords from
|
||||||
Note that because option.get_asso_max() is a power of two we can guarantee
|
_head->first() to curr - inclusive - give fewer than _fewest_collisions
|
||||||
that all valid asso_values are visited without repetition since
|
collisions. Up to the given number of iterations are performed.
|
||||||
Option.Get_Jump was forced to be an odd value! */
|
If successful, _asso_values[c] is changed, _fewest_collisions is decreased,
|
||||||
|
and false is returned.
|
||||||
|
If all iterations are unsuccessful, _asso_values[c] is restored and
|
||||||
|
true is returned.
|
||||||
|
This is called very frequently, and needs to be fast! */
|
||||||
|
|
||||||
inline bool
|
inline bool
|
||||||
Search::affects_prev (unsigned char c, KeywordExt *curr)
|
Search::try_asso_value (unsigned char c, KeywordExt *curr, int iterations)
|
||||||
{
|
{
|
||||||
int original_char = _asso_values[c];
|
int original_value = _asso_values[c];
|
||||||
int total_iterations = !option[FAST]
|
|
||||||
? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length ();
|
|
||||||
|
|
||||||
/* Try all valid associated values. */
|
/* Try many valid associated values. */
|
||||||
|
for (int i = iterations - 1; i >= 0; i--)
|
||||||
for (int i = total_iterations - 1; i >= 0; i--)
|
|
||||||
{
|
{
|
||||||
int collisions = 0;
|
int collisions = 0;
|
||||||
|
|
||||||
|
/* Try next value. Wrap around mod _asso_value_max. */
|
||||||
_asso_values[c] =
|
_asso_values[c] =
|
||||||
(_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ()))
|
(_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ()))
|
||||||
& (get_asso_max () - 1);
|
& (_asso_value_max - 1);
|
||||||
|
|
||||||
/* Iteration Number array is a win, O(1) intialization time! */
|
/* Iteration Number array is a win, O(1) intialization time! */
|
||||||
_collision_detector->clear ();
|
_collision_detector->clear ();
|
||||||
|
|
||||||
/* See how this asso_value change affects previous keywords. If
|
|
||||||
it does better than before we'll take it! */
|
|
||||||
|
|
||||||
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
KeywordExt *keyword = ptr->first();
|
KeywordExt *keyword = ptr->first();
|
||||||
|
|
||||||
|
/* Compute new hash code for the keyword, and see whether it
|
||||||
|
collides with another keyword's hash code. If we have too
|
||||||
|
many collisions, we can safely abort the fruitless loop. */
|
||||||
if (_collision_detector->set_bit (compute_hash (keyword))
|
if (_collision_detector->set_bit (compute_hash (keyword))
|
||||||
&& ++collisions >= _fewest_collisions)
|
&& ++collisions >= _fewest_collisions)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (keyword == curr)
|
if (keyword == curr)
|
||||||
{
|
{
|
||||||
_fewest_collisions = collisions;
|
_fewest_collisions = collisions;
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
fprintf (stderr, "- resolved after %d iterations", total_iterations - i);
|
fprintf (stderr, "- resolved after %d iterations",
|
||||||
|
iterations - i);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Restore original values, no more tries. */
|
/* Restore original values, no more tries. */
|
||||||
_asso_values[c] = original_char;
|
_asso_values[c] = original_value;
|
||||||
/* If we're this far it's time to try the next character.... */
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Change a character value, try least-used characters first. */
|
/* Attempts to change an _asso_value[], in order to resolve a hash value
|
||||||
|
collision between the two given keywords. */
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::change (KeywordExt *prior, KeywordExt *curr)
|
Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
|
||||||
{
|
{
|
||||||
static unsigned char *union_set;
|
|
||||||
int union_set_length;
|
|
||||||
|
|
||||||
if (!union_set)
|
|
||||||
union_set = new unsigned char [2 * get_max_keysig_size ()];
|
|
||||||
|
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
{
|
{
|
||||||
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
|
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
|
||||||
@@ -538,26 +606,50 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
|
|||||||
curr->_hash_value);
|
curr->_hash_value);
|
||||||
fflush (stderr);
|
fflush (stderr);
|
||||||
}
|
}
|
||||||
union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set);
|
|
||||||
|
/* To achieve that the two hash values become different, we have to
|
||||||
|
change an _asso_values[c] for a character c that contributes to the
|
||||||
|
hash functions of prior and curr with different multiplicity.
|
||||||
|
So we compute the set of such c. */
|
||||||
|
unsigned char *union_set = _union_set;
|
||||||
|
int union_set_length =
|
||||||
|
compute_disjoint_union (prior->_selchars, prior->_selchars_length,
|
||||||
|
curr->_selchars, curr->_selchars_length,
|
||||||
|
union_set);
|
||||||
|
|
||||||
|
/* Sort by decreasing occurrence: Try least-used characters c first.
|
||||||
|
The idea is that this reduces the number of freshly introduced
|
||||||
|
collisions. */
|
||||||
sort_by_occurrence (union_set, union_set_length);
|
sort_by_occurrence (union_set, union_set_length);
|
||||||
|
|
||||||
/* Try changing some values, if change doesn't alter other values continue normal action. */
|
int iterations =
|
||||||
_fewest_collisions++;
|
!option[FAST]
|
||||||
|
? _asso_value_max /* Try all possible values of _asso_values[c]. */
|
||||||
|
: option.get_iterations ()
|
||||||
|
? option.get_iterations ()
|
||||||
|
: keyword_list_length ();
|
||||||
|
|
||||||
const unsigned char *p = union_set;
|
const unsigned char *p = union_set;
|
||||||
int i = union_set_length;
|
int i = union_set_length;
|
||||||
for (; i > 0; p++, i--)
|
for (; i > 0; p++, i--)
|
||||||
if (!affects_prev (*p, curr))
|
if (!try_asso_value (*p, curr, iterations))
|
||||||
{
|
{
|
||||||
|
/* Good, this _asso_values[] modification reduces the number of
|
||||||
|
collisions so far.
|
||||||
|
All keyword->_hash_value up to curr - inclusive - and
|
||||||
|
_fewest_collisions have been updated. */
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
{
|
{
|
||||||
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
|
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
|
||||||
*p, p - union_set + 1, _asso_values[*p]);
|
*p, p - union_set + 1, _asso_values[*p]);
|
||||||
fflush (stderr);
|
fflush (stderr);
|
||||||
}
|
}
|
||||||
return; /* Good, doesn't affect previous hash values, we'll take it. */
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Failed to resolve a collision. */
|
||||||
|
|
||||||
|
/* Recompute all keyword->_hash_value up to curr - inclusive -. */
|
||||||
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
KeywordExt* keyword = ptr->first();
|
KeywordExt* keyword = ptr->first();
|
||||||
@@ -569,91 +661,78 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
|
|||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
{
|
{
|
||||||
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
|
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
|
||||||
!option[FAST] ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (),
|
iterations, _fewest_collisions + _total_duplicates);
|
||||||
_fewest_collisions + _total_duplicates);
|
|
||||||
fflush (stderr);
|
fflush (stderr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Finds good _asso_values[]. */
|
||||||
|
|
||||||
|
void
|
||||||
|
Search::find_asso_values ()
|
||||||
|
{
|
||||||
|
_fewest_collisions = 0;
|
||||||
|
init_asso_values ();
|
||||||
|
|
||||||
|
/* Add one keyword after the other and see whether its hash value collides
|
||||||
|
with one of the previous hash values. */
|
||||||
|
_num_done = 1;
|
||||||
|
for (KeywordExt_List *curr_ptr = _head;
|
||||||
|
curr_ptr != NULL;
|
||||||
|
curr_ptr = curr_ptr->rest(), _num_done++)
|
||||||
|
{
|
||||||
|
KeywordExt *curr = curr_ptr->first();
|
||||||
|
|
||||||
|
/* Compute this keyword's hash value. */
|
||||||
|
compute_hash (curr);
|
||||||
|
|
||||||
|
/* See if it collides with a prior keyword. */
|
||||||
|
for (KeywordExt_List *prior_ptr = _head;
|
||||||
|
prior_ptr != curr_ptr;
|
||||||
|
prior_ptr = prior_ptr->rest())
|
||||||
|
{
|
||||||
|
KeywordExt *prior = prior_ptr->first();
|
||||||
|
|
||||||
|
if (prior->_hash_value == curr->_hash_value)
|
||||||
|
{
|
||||||
|
_fewest_collisions++;
|
||||||
|
/* Handle collision. */
|
||||||
|
change_some_asso_value (prior, curr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
/* Sorts the keys by hash value. */
|
/* Sorts the keys by hash value. */
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::sort ()
|
Search::sort ()
|
||||||
{
|
{
|
||||||
_hash_sort = true;
|
_hash_sort = true;
|
||||||
_occurrence_sort = false;
|
_occurrence_sort = false;
|
||||||
|
|
||||||
_head = merge_sort (_head);
|
_head = merge_sort (_head);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::optimize ()
|
Search::optimize ()
|
||||||
{
|
{
|
||||||
|
/* Preparations. */
|
||||||
prepare ();
|
prepare ();
|
||||||
if (option[ORDER])
|
if (option[ORDER])
|
||||||
reorder ();
|
reorder ();
|
||||||
_num_done = 1;
|
|
||||||
_fewest_collisions = 0;
|
|
||||||
int asso_value_max = option.get_size_multiple ();
|
|
||||||
int non_linked_length = keyword_list_length ();
|
|
||||||
if (asso_value_max == 0)
|
|
||||||
asso_value_max = non_linked_length;
|
|
||||||
else if (asso_value_max > 0)
|
|
||||||
asso_value_max *= non_linked_length;
|
|
||||||
else /* if (asso_value_max < 0) */
|
|
||||||
asso_value_max = non_linked_length / -asso_value_max;
|
|
||||||
set_asso_max (POW (asso_value_max));
|
|
||||||
|
|
||||||
if (option[RANDOM])
|
/* Search for good _asso_values[]. */
|
||||||
{
|
find_asso_values ();
|
||||||
srand (reinterpret_cast<long>(time (0)));
|
|
||||||
|
|
||||||
for (int i = 0; i < _alpha_size; i++)
|
|
||||||
_asso_values[i] = rand () & (asso_value_max - 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int asso_value = option.get_initial_asso_value ();
|
|
||||||
|
|
||||||
if (asso_value) /* Initialize array if user requests non-zero default. */
|
|
||||||
for (int i = _alpha_size - 1; i >= 0; i--)
|
|
||||||
_asso_values[i] = asso_value & get_asso_max () - 1;
|
|
||||||
}
|
|
||||||
_max_hash_value = max_key_length () + get_asso_max () * get_max_keysig_size ();
|
|
||||||
_collision_detector = new Bool_Array (_max_hash_value + 1);
|
|
||||||
|
|
||||||
if (option[DEBUG])
|
|
||||||
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
|
|
||||||
"\nmaximum size of generated hash table is %d\n",
|
|
||||||
non_linked_length, asso_value_max, _max_hash_value);
|
|
||||||
|
|
||||||
KeywordExt_List *curr;
|
|
||||||
for (curr = _head; curr != NULL; curr = curr->rest())
|
|
||||||
{
|
|
||||||
KeywordExt *currkw = curr->first();
|
|
||||||
|
|
||||||
compute_hash (currkw);
|
|
||||||
|
|
||||||
for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest())
|
|
||||||
{
|
|
||||||
KeywordExt *ptrkw = ptr->first();
|
|
||||||
|
|
||||||
if (ptrkw->_hash_value == currkw->_hash_value)
|
|
||||||
{
|
|
||||||
change (ptrkw, currkw);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_num_done++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Make one final check, just to make sure nothing weird happened.... */
|
/* Make one final check, just to make sure nothing weird happened.... */
|
||||||
|
|
||||||
_collision_detector->clear ();
|
_collision_detector->clear ();
|
||||||
|
for (KeywordExt_List *curr_ptr = _head; curr_ptr; curr_ptr = curr_ptr->rest())
|
||||||
for (curr = _head; curr; curr = curr->rest())
|
|
||||||
{
|
{
|
||||||
unsigned int hashcode = compute_hash (curr->first());
|
KeywordExt *curr = curr_ptr->first();
|
||||||
|
unsigned int hashcode = compute_hash (curr);
|
||||||
if (_collision_detector->set_bit (hashcode))
|
if (_collision_detector->set_bit (hashcode))
|
||||||
{
|
{
|
||||||
if (option[DUP]) /* Keep track of this number... */
|
if (option[DUP]) /* Keep track of this number... */
|
||||||
@@ -673,7 +752,7 @@ Search::optimize ()
|
|||||||
sort ();
|
sort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prints out some diagnostics upon completion. */
|
/* Prints out some diagnostics upon completion. */
|
||||||
|
|
||||||
Search::~Search ()
|
Search::~Search ()
|
||||||
{
|
{
|
||||||
|
|||||||
40
src/search.h
40
src/search.h
@@ -62,15 +62,27 @@ private:
|
|||||||
/* Returns the number of key positions. */
|
/* Returns the number of key positions. */
|
||||||
int get_max_keysig_size ();
|
int get_max_keysig_size ();
|
||||||
|
|
||||||
|
/* Initializes the asso_values[] related parameters and put a first guess
|
||||||
|
into asso_values[]. */
|
||||||
|
void init_asso_values ();
|
||||||
|
|
||||||
/* Computes a keyword's hash value, relative to the current _asso_values[],
|
/* Computes a keyword's hash value, relative to the current _asso_values[],
|
||||||
and stores it in keyword->_hash_value. */
|
and stores it in keyword->_hash_value. */
|
||||||
int compute_hash (KeywordExt *key_node);
|
int compute_hash (KeywordExt *keyword);
|
||||||
|
|
||||||
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
||||||
void sort_by_occurrence (unsigned char *set, int len);
|
void sort_by_occurrence (unsigned char *set, int len);
|
||||||
|
|
||||||
bool affects_prev (unsigned char c, KeywordExt *curr);
|
/* Tries various other values for _asso_values[c]. */
|
||||||
void change (KeywordExt *prior, KeywordExt *curr);
|
bool try_asso_value (unsigned char c, KeywordExt *curr, int iterations);
|
||||||
|
|
||||||
|
/* Attempts to change an _asso_value[], in order to resolve a hash value
|
||||||
|
collision between the two given keywords. */
|
||||||
|
void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
|
||||||
|
|
||||||
|
/* Finds good _asso_values[]. */
|
||||||
|
void find_asso_values ();
|
||||||
|
|
||||||
void sort ();
|
void sort ();
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@@ -114,13 +126,23 @@ private:
|
|||||||
/* Vector used during Search::reorder(). */
|
/* Vector used during Search::reorder(). */
|
||||||
bool * const _determined;
|
bool * const _determined;
|
||||||
|
|
||||||
int _num_done; /* Number of keywords processed without a collision. */
|
/* Exclusive upper bound for every _asso_values[c]. A power of 2. */
|
||||||
int _fewest_collisions; /* Records fewest # of collisions for asso value. */
|
int _asso_value_max;
|
||||||
int _max_hash_value; /* Maximum possible hash value. */
|
|
||||||
|
/* Maximal possible hash value. */
|
||||||
|
int _max_hash_value;
|
||||||
|
|
||||||
|
/* Sparse bit vector for collision detection. */
|
||||||
Bool_Array * _collision_detector;
|
Bool_Array * _collision_detector;
|
||||||
int _size; /* Range of the hash table. */
|
|
||||||
void set_asso_max (int r) { _size = r; }
|
/* Minimal number of collisions found so far. */
|
||||||
int get_asso_max () { return _size; }
|
int _fewest_collisions;
|
||||||
|
|
||||||
|
/* Scratch set, used during Search::change_some_asso_value. */
|
||||||
|
unsigned char * _union_set;
|
||||||
|
|
||||||
|
/* Number of keyword being handled during Search::find_asso_values. */
|
||||||
|
int _num_done;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user