mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Comments and code restructuring.
This commit is contained in:
11
ChangeLog
11
ChangeLog
@@ -1,3 +1,14 @@
|
|||||||
|
2002-11-19 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
* src/search.h (Search::find_good_asso_values): New declaration.
|
||||||
|
* src/search.cc: Add comments about the basic structure of the
|
||||||
|
algorithm.
|
||||||
|
(Search::find_positions): Move the option[POSITIONS] test to here.
|
||||||
|
(Search::find_good_asso_values): New method, extracted from
|
||||||
|
Search::optimize.
|
||||||
|
(Search::optimize): Remove option[POSITIONS] test. Call
|
||||||
|
find_good_asso_values.
|
||||||
|
|
||||||
2002-11-17 Bruno Haible <bruno@clisp.org>
|
2002-11-17 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
* src/options.cc (Options::parse_options): Include copyright notice
|
* src/options.cc (Options::parse_options): Include copyright notice
|
||||||
|
|||||||
112
src/search.cc
112
src/search.cc
@@ -34,10 +34,12 @@
|
|||||||
/* The most general form of the hash function is
|
/* The most general form of the hash function is
|
||||||
|
|
||||||
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
|
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
|
||||||
|
+ len (keyword)
|
||||||
|
|
||||||
where Pos is a set of byte positions,
|
where Pos is a set of byte positions,
|
||||||
each alpha_inc[i] is a nonnegative integer,
|
each alpha_inc[i] is a nonnegative integer,
|
||||||
each asso_values[c] is a nonnegative integer.
|
each asso_values[c] is a nonnegative integer,
|
||||||
|
len (keyword) is the keyword's length if !option[NOLENGTH], or 0 otherwise.
|
||||||
|
|
||||||
Theorem 1: If all keywords are different, there is a set Pos such that
|
Theorem 1: If all keywords are different, there is a set Pos such that
|
||||||
all tuples (keyword[i] : i in Pos) are different.
|
all tuples (keyword[i] : i in Pos) are different.
|
||||||
@@ -63,9 +65,23 @@
|
|||||||
|
|
||||||
Step 3 (Finding good asso_values):
|
Step 3 (Finding good asso_values):
|
||||||
Find asso_values[c] such that all hash (keyword) are different.
|
Find asso_values[c] such that all hash (keyword) are different.
|
||||||
|
|
||||||
|
In other words, each step finds a projection that is injective on the
|
||||||
|
given finite set:
|
||||||
|
proj1 : String --> Map (Pos --> N)
|
||||||
|
proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
|
||||||
|
proj3 : Map (Pos --> N) / S(Pos) --> N
|
||||||
|
where N denotes the set of nonnegative integers, and S(Pos) is the
|
||||||
|
symmetric group over Pos.
|
||||||
|
|
||||||
|
This was the theory for option[NOLENGTH]; if !option[NOLENGTH], slight
|
||||||
|
modifications apply:
|
||||||
|
proj1 : String --> Map (Pos --> N) x N
|
||||||
|
proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
|
||||||
|
proj3 : Map (Pos --> N) / S(Pos) x N --> N
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* -------------------- Initialization and Preparation --------------------- */
|
/* ==================== Initialization and Preparation ===================== */
|
||||||
|
|
||||||
Search::Search (KeywordExt_List *list)
|
Search::Search (KeywordExt_List *list)
|
||||||
: _head (list)
|
: _head (list)
|
||||||
@@ -106,7 +122,7 @@ Search::preprepare ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------- Finding good byte positions ---------------------- */
|
/* ====================== Finding good byte positions ====================== */
|
||||||
|
|
||||||
/* Initializes each keyword's _selchars array. */
|
/* Initializes each keyword's _selchars array. */
|
||||||
void
|
void
|
||||||
@@ -124,10 +140,16 @@ Search::delete_selchars () const
|
|||||||
temp->first()->delete_selchars();
|
temp->first()->delete_selchars();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Count the duplicate keywords that occur with a given set of positions. */
|
/* Count the duplicate keywords that occur with a given set of positions.
|
||||||
|
In other words, it returns the difference
|
||||||
|
# K - # proj1 (K)
|
||||||
|
where K is the multiset of given keywords. */
|
||||||
unsigned int
|
unsigned int
|
||||||
Search::count_duplicates_tuple (const Positions& positions) const
|
Search::count_duplicates_tuple (const Positions& positions) const
|
||||||
{
|
{
|
||||||
|
/* Run through the keyword list and count the duplicates incrementally.
|
||||||
|
The result does not depend on the order of the keyword list, thanks to
|
||||||
|
the formula above. */
|
||||||
init_selchars_tuple (option[ALLCHARS], positions);
|
init_selchars_tuple (option[ALLCHARS], positions);
|
||||||
|
|
||||||
unsigned int count = 0;
|
unsigned int count = 0;
|
||||||
@@ -151,6 +173,13 @@ Search::count_duplicates_tuple (const Positions& positions) const
|
|||||||
void
|
void
|
||||||
Search::find_positions ()
|
Search::find_positions ()
|
||||||
{
|
{
|
||||||
|
/* If the user gave the key positions, we use them. */
|
||||||
|
if (option[POSITIONS])
|
||||||
|
{
|
||||||
|
_key_positions = option.get_key_positions();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* 1. Find positions that must occur in order to distinguish duplicates. */
|
/* 1. Find positions that must occur in order to distinguish duplicates. */
|
||||||
Positions mandatory;
|
Positions mandatory;
|
||||||
|
|
||||||
@@ -302,7 +331,7 @@ Search::find_positions ()
|
|||||||
_key_positions = current;
|
_key_positions = current;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --------------------- Finding good alpha increments --------------------- */
|
/* ===================== Finding good alpha increments ===================== */
|
||||||
|
|
||||||
/* Initializes each keyword's _selchars array. */
|
/* Initializes each keyword's _selchars array. */
|
||||||
void
|
void
|
||||||
@@ -313,10 +342,16 @@ Search::init_selchars_multiset (bool use_all_chars, const Positions& positions,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Count the duplicate keywords that occur with the given set of positions
|
/* Count the duplicate keywords that occur with the given set of positions
|
||||||
and a given alpha_inc[] array. */
|
and a given alpha_inc[] array.
|
||||||
|
In other words, it returns the difference
|
||||||
|
# K - # proj2 (proj1 (K))
|
||||||
|
where K is the multiset of given keywords. */
|
||||||
unsigned int
|
unsigned int
|
||||||
Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
|
Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
|
||||||
{
|
{
|
||||||
|
/* Run through the keyword list and count the duplicates incrementally.
|
||||||
|
The result does not depend on the order of the keyword list, thanks to
|
||||||
|
the formula above. */
|
||||||
init_selchars_multiset (option[ALLCHARS], _key_positions, alpha_inc);
|
init_selchars_multiset (option[ALLCHARS], _key_positions, alpha_inc);
|
||||||
|
|
||||||
unsigned int count = 0;
|
unsigned int count = 0;
|
||||||
@@ -341,7 +376,8 @@ void
|
|||||||
Search::find_alpha_inc ()
|
Search::find_alpha_inc ()
|
||||||
{
|
{
|
||||||
/* The goal is to choose _alpha_inc[] such that it doesn't introduce
|
/* The goal is to choose _alpha_inc[] such that it doesn't introduce
|
||||||
artificial duplicates. */
|
artificial duplicates.
|
||||||
|
In other words, the goal is # proj2 (proj1 (K)) = # proj1 (K). */
|
||||||
unsigned int duplicates_goal = count_duplicates_tuple (_key_positions);
|
unsigned int duplicates_goal = count_duplicates_tuple (_key_positions);
|
||||||
|
|
||||||
/* Start with zero increments. This is sufficient in most cases. */
|
/* Start with zero increments. This is sufficient in most cases. */
|
||||||
@@ -428,7 +464,7 @@ Search::find_alpha_inc ()
|
|||||||
_alpha_inc = current;
|
_alpha_inc = current;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ------------------------------------------------------------------------- */
|
/* ======================= Finding good asso_values ======================== */
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::prepare ()
|
Search::prepare ()
|
||||||
@@ -988,7 +1024,7 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finds good _asso_values[]. */
|
/* Finds some _asso_values[] that fit. */
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::find_asso_values ()
|
Search::find_asso_values ()
|
||||||
@@ -1026,32 +1062,11 @@ Search::find_asso_values ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ------------------------------------------------------------------------- */
|
/* Finds good _asso_values[]. */
|
||||||
|
|
||||||
/* Comparison function for sorting by increasing _hash_value. */
|
|
||||||
static bool
|
|
||||||
less_by_hash_value (KeywordExt *keyword1, KeywordExt *keyword2)
|
|
||||||
{
|
|
||||||
return keyword1->_hash_value < keyword2->_hash_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sorts the keyword list by hash value. */
|
|
||||||
|
|
||||||
void
|
void
|
||||||
Search::sort ()
|
Search::find_good_asso_values ()
|
||||||
{
|
{
|
||||||
_head = mergesort_list (_head, less_by_hash_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
Search::optimize ()
|
|
||||||
{
|
|
||||||
/* Preparations. */
|
|
||||||
preprepare ();
|
|
||||||
_key_positions = option.get_key_positions();
|
|
||||||
if (!option[POSITIONS])
|
|
||||||
find_positions ();
|
|
||||||
find_alpha_inc ();
|
|
||||||
prepare ();
|
prepare ();
|
||||||
if (option[ORDER])
|
if (option[ORDER])
|
||||||
reorder ();
|
reorder ();
|
||||||
@@ -1129,6 +1144,39 @@ Search::optimize ()
|
|||||||
delete[] best_asso_values;
|
delete[] best_asso_values;
|
||||||
/* The keywords' _hash_value fields are recomputed below. */
|
/* The keywords' _hash_value fields are recomputed below. */
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ========================================================================= */
|
||||||
|
|
||||||
|
/* Comparison function for sorting by increasing _hash_value. */
|
||||||
|
static bool
|
||||||
|
less_by_hash_value (KeywordExt *keyword1, KeywordExt *keyword2)
|
||||||
|
{
|
||||||
|
return keyword1->_hash_value < keyword2->_hash_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sorts the keyword list by hash value. */
|
||||||
|
|
||||||
|
void
|
||||||
|
Search::sort ()
|
||||||
|
{
|
||||||
|
_head = mergesort_list (_head, less_by_hash_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Search::optimize ()
|
||||||
|
{
|
||||||
|
/* Preparations. */
|
||||||
|
preprepare ();
|
||||||
|
|
||||||
|
/* Step 1: Finding good byte positions. */
|
||||||
|
find_positions ();
|
||||||
|
|
||||||
|
/* Step 2: Finding good alpha increments. */
|
||||||
|
find_alpha_inc ();
|
||||||
|
|
||||||
|
/* Step 3: Finding good asso_values. */
|
||||||
|
find_good_asso_values ();
|
||||||
|
|
||||||
/* Make one final check, just to make sure nothing weird happened.... */
|
/* Make one final check, just to make sure nothing weird happened.... */
|
||||||
_collision_detector->clear ();
|
_collision_detector->clear ();
|
||||||
|
|||||||
@@ -100,9 +100,12 @@ private:
|
|||||||
collision between the two given keywords. */
|
collision between the two given keywords. */
|
||||||
void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
|
void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
|
||||||
|
|
||||||
/* Finds good _asso_values[]. */
|
/* Finds some _asso_values[] that fit. */
|
||||||
void find_asso_values ();
|
void find_asso_values ();
|
||||||
|
|
||||||
|
/* Finds good _asso_values[]. */
|
||||||
|
void find_good_asso_values ();
|
||||||
|
|
||||||
/* Sorts the keyword list by hash value. */
|
/* Sorts the keyword list by hash value. */
|
||||||
void sort ();
|
void sort ();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user