1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Fix the reorder logic.

This commit is contained in:
Bruno Haible
2002-12-31 12:53:17 +00:00
parent 1f70ea5dfd
commit 72a3884ff9
3 changed files with 94 additions and 43 deletions

View File

@@ -1,5 +1,12 @@
2002-11-03 Bruno Haible <bruno@clisp.org>
* src/search.h (Search::clear_determined): New declaration.
* src/search.cc (Search::clear_determined): New method.
(Search::already_determined): Optimize.
(Search::reorder): Even when the next keyword after the current one
is completely determined, move all determined keywords after the
current one.
Compute the occurrences after removal of duplicates, not before.
* src/keyword.h (KeywordExt::init_selchars): Remove occurrences
argument.

View File

@@ -42,7 +42,6 @@ Search::Search (KeywordExt_List *list)
_determined (new bool[_alpha_size])
{
memset (_asso_values, 0, _alpha_size * sizeof (_asso_values[0]));
memset (_determined, 0, _alpha_size * sizeof (_determined[0]));
}
void
@@ -231,7 +230,11 @@ Search::merge_sort (KeywordExt_List *head)
}
}
/* Returns the frequency of occurrence of elements in the key set. */
/* Computes the sum of occurrences of the _selchars of a keyword.
This is a kind of correlation measure: Keywords which have many
selected characters in common with other keywords have a high
occurrence sum. Keywords whose selected characters don't occur
in other keywords have a low occurrence sum. */
inline int
Search::compute_occurrence (KeywordExt *ptr)
@@ -246,43 +249,55 @@ Search::compute_occurrence (KeywordExt *ptr)
return value;
}
/* Enables the index location of all key set elements that are now
determined. */
/* Auxiliary function for reorder():
Sets all alphabet characters as undetermined. */
inline void
Search::set_determined (KeywordExt *ptr)
Search::clear_determined ()
{
const unsigned char *p = ptr->_selchars;
unsigned int i = ptr->_selchars_length;
memset (_determined, 0, _alpha_size * sizeof (_determined[0]));
}
/* Auxiliary function for reorder():
Sets all selected characters of the keyword as determined. */
inline void
Search::set_determined (KeywordExt *keyword)
{
const unsigned char *p = keyword->_selchars;
unsigned int i = keyword->_selchars_length;
for (; i > 0; p++, i--)
_determined[*p] = true;
}
/* Returns TRUE if PTR's key set is already completely determined. */
/* Auxiliary function for reorder():
Returns true if the keyword's selected characters are all determined. */
inline bool
Search::already_determined (KeywordExt *ptr)
Search::already_determined (KeywordExt *keyword)
{
bool is_determined = true;
const unsigned char *p = keyword->_selchars;
unsigned int i = keyword->_selchars_length;
for (; i > 0; p++, i--)
if (!_determined[*p])
return false;
const unsigned char *p = ptr->_selchars;
unsigned int i = ptr->_selchars_length;
for (; is_determined && i > 0; p++, i--)
is_determined = _determined[*p];
return is_determined;
return true;
}
/* Reorders the table by first sorting the list so that frequently occuring
keys appear first, and then the list is reordered so that keys whose values
are already determined will be placed towards the front of the list. This
helps prune the search time by handling inevitable collisions early in the
search process. See Cichelli's paper from Jan 1980 JACM for details.... */
/* Reorders the keyword list so as to minimize search times.
First the list is reordered so that frequently occuring keys appear first.
Then the list is reordered so that keys whose values are already determined
will be placed towards the front of the list. This helps prune the search
time by handling inevitable collisions early in the search process. See
Cichelli's paper from Jan 1980 JACM for details.... */
void
Search::reorder ()
{
KeywordExt_List *ptr;
/* Compute the _occurrence valuation of every keyword on the list. */
for (ptr = _head; ptr; ptr = ptr->rest())
{
KeywordExt *keyword = ptr->first();
@@ -290,32 +305,53 @@ Search::reorder ()
keyword->_occurrence = compute_occurrence (keyword);
}
/* Sort the list by decreasing _occurrence valuation. */
_hash_sort = false;
_occurrence_sort = true;
_head = merge_sort (_head);
for (ptr = _head; ptr->rest(); ptr = ptr->rest())
/* Reorder the list to maximize the efficiency of the search. */
/* At the beginning, consider that no asso_values[c] is fixed. */
clear_determined ();
for (ptr = _head; ptr != NULL && ptr->rest() != NULL; ptr = ptr->rest())
{
set_determined (ptr->first());
KeywordExt *keyword = ptr->first();
if (!already_determined (ptr->rest()->first()))
/* Then we'll fix asso_values[c] for all c occurring in this keyword. */
set_determined (keyword);
/* Then we wish to test for hash value collisions the remaining keywords
whose hash value is completely determined, as quickly as possible.
For this purpose, move all the completely determined keywords in the
remaining list immediately past this keyword. */
KeywordExt_List *curr_ptr;
KeywordExt_List *next_ptr; /* = curr_ptr->rest() */
for (curr_ptr = ptr, next_ptr = curr_ptr->rest();
next_ptr != NULL;
next_ptr = curr_ptr->rest())
{
KeywordExt_List *trail_ptr = ptr->rest();
KeywordExt_List *run_ptr = trail_ptr->rest();
KeywordExt *next_keyword = next_ptr->first();
for (; run_ptr; run_ptr = trail_ptr->rest())
if (already_determined (next_keyword))
{
if (already_determined (run_ptr->first()))
{
trail_ptr->rest() = run_ptr->rest();
run_ptr->rest() = ptr->rest();
ptr = ptr->rest() = run_ptr;
}
if (curr_ptr == ptr)
/* Keep next_ptr where it is. */
curr_ptr = next_ptr;
else
trail_ptr = run_ptr;
{
/* Remove next_ptr from its current list position... */
curr_ptr->rest() = next_ptr->rest();
/* ... and insert it right after ptr. */
next_ptr->rest() = ptr->rest();
ptr->rest() = next_ptr;
}
/* Advance ptr. */
ptr = ptr->rest();
}
else
curr_ptr = next_ptr;
}
}
}
@@ -427,8 +463,8 @@ Search::sort_set (unsigned char *union_set, int len)
/* Find out how character value change affects successfully hashed items.
Returns FALSE if no other hash values are affected, else returns TRUE.
Note that because Option.Get_Asso_Max is a power of two we can guarantee
that all legal Asso_Values are visited without repetition since
Note that because option.get_asso_max() is a power of two we can guarantee
that all valid asso_values are visited without repetition since
Option.Get_Jump was forced to be an odd value! */
inline bool
@@ -438,7 +474,7 @@ Search::affects_prev (unsigned char c, KeywordExt *curr)
int total_iterations = !option[FAST]
? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length ();
/* Try all legal associated values. */
/* Try all valid associated values. */
for (int i = total_iterations - 1; i >= 0; i--)
{
@@ -569,7 +605,7 @@ Search::optimize ()
srand (reinterpret_cast<long>(time (0)));
for (int i = 0; i < _alpha_size; i++)
_asso_values[i] = (rand () & asso_value_max - 1);
_asso_values[i] = rand () & (asso_value_max - 1);
}
else
{

View File

@@ -43,10 +43,16 @@ private:
/* Sorts a list using the recursive merge sort algorithm. */
KeywordExt_List * merge_sort (KeywordExt_List *head);
/* Computes the sum of occurrences of the _selchars of a keyword. */
int compute_occurrence (KeywordExt *ptr);
void set_determined (KeywordExt *ptr);
bool already_determined (KeywordExt *ptr);
/* Auxiliary functions used by Search::reorder(). */
void clear_determined ();
void set_determined (KeywordExt *keyword);
bool already_determined (KeywordExt *keyword);
/* Reorders the keyword list so as to minimize search times. */
void reorder ();
int keyword_list_length ();
int max_key_length ();
int get_max_keysig_size ();
@@ -95,7 +101,9 @@ private:
/* True if sorting by hash value. */
bool _hash_sort;
bool * const _determined; /* Used in function reorder, below. */
/* Vector used during Search::reorder(). */
bool * const _determined;
int _num_done; /* Number of keywords processed without a collision. */
int _fewest_collisions; /* Records fewest # of collisions for asso value. */
int _max_hash_value; /* Maximum possible hash value. */