mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Cleanup the alpha_unify handling code.
This commit is contained in:
10
ChangeLog
10
ChangeLog
@@ -1,5 +1,15 @@
|
|||||||
2002-12-12 Bruno Haible <bruno@clisp.org>
|
2002-12-12 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
* src/search.h (Search::init_selchars_tuple,
|
||||||
|
Search::count_duplicates_tuple): Add alpha_unify argument.
|
||||||
|
(Search::count_duplicates_tuple): New method declaration.
|
||||||
|
* src/search.cc (Search::init_selchars_tuple,
|
||||||
|
Search::count_duplicates_tuple): Add alpha_unify argument.
|
||||||
|
(Search::find_positions): Update.
|
||||||
|
(Search::count_duplicates_tuple): New method.
|
||||||
|
(Search::count_duplicates_multiset): Free temp alpha_unify vector.
|
||||||
|
(Search::find_alpha_inc): Call count_duplicates_tuple.
|
||||||
|
|
||||||
* src/configure.in: Add test for stack-allocated variable-size arrays.
|
* src/configure.in: Add test for stack-allocated variable-size arrays.
|
||||||
* src/config.h.in: Regenerated.
|
* src/config.h.in: Regenerated.
|
||||||
* src/search.cc: Include config.h.
|
* src/search.cc: Include config.h.
|
||||||
|
|||||||
@@ -58,7 +58,7 @@
|
|||||||
|
|
||||||
/* ================================ Theory ================================= */
|
/* ================================ Theory ================================= */
|
||||||
|
|
||||||
/* The most general form of the hash function is
|
/* The general form of the hash function is
|
||||||
|
|
||||||
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
|
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
|
||||||
+ len (keyword)
|
+ len (keyword)
|
||||||
@@ -75,6 +75,8 @@
|
|||||||
are nonnegative integers alpha_inc[i] such that all multisets
|
are nonnegative integers alpha_inc[i] such that all multisets
|
||||||
{keyword[i] + alpha_inc[i] : i in Pos} are different.
|
{keyword[i] + alpha_inc[i] : i in Pos} are different.
|
||||||
|
|
||||||
|
Define selchars[keyword] := {keyword[i] + alpha_inc[i] : i in Pos}.
|
||||||
|
|
||||||
Theorem 3: If all multisets selchars[keyword] are different, there are
|
Theorem 3: If all multisets selchars[keyword] are different, there are
|
||||||
nonnegative integers asso_values[c] such that all hash values
|
nonnegative integers asso_values[c] such that all hash values
|
||||||
sum (asso_values[c] : c in selchars[keyword]) are different.
|
sum (asso_values[c] : c in selchars[keyword]) are different.
|
||||||
@@ -98,14 +100,25 @@
|
|||||||
proj1 : String --> Map (Pos --> N)
|
proj1 : String --> Map (Pos --> N)
|
||||||
proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
|
proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
|
||||||
proj3 : Map (Pos --> N) / S(Pos) --> N
|
proj3 : Map (Pos --> N) / S(Pos) --> N
|
||||||
where N denotes the set of nonnegative integers, and S(Pos) is the
|
where
|
||||||
symmetric group over Pos.
|
N denotes the set of nonnegative integers,
|
||||||
|
Map (A --> B) := Hom_Set (A, B) is the set of maps from A to B, and
|
||||||
|
S(Pos) is the symmetric group over Pos.
|
||||||
|
|
||||||
This was the theory for option[NOLENGTH]; if !option[NOLENGTH], slight
|
This was the theory for option[NOLENGTH]; if !option[NOLENGTH], slight
|
||||||
modifications apply:
|
modifications apply:
|
||||||
proj1 : String --> Map (Pos --> N) x N
|
proj1 : String --> Map (Pos --> N) x N
|
||||||
proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
|
proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
|
||||||
proj3 : Map (Pos --> N) / S(Pos) x N --> N
|
proj3 : Map (Pos --> N) / S(Pos) x N --> N
|
||||||
|
|
||||||
|
For a case-insensitive hash function, the general form is
|
||||||
|
|
||||||
|
hash (keyword) =
|
||||||
|
sum (asso_values[alpha_unify[keyword[i] + alpha_inc[i]]] : i in Pos)
|
||||||
|
+ len (keyword)
|
||||||
|
|
||||||
|
where alpha_unify[c] is chosen so that an upper/lower case change in
|
||||||
|
keyword[i] doesn't change alpha_unify[keyword[i] + alpha_inc[i]].
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* ==================== Initialization and Preparation ===================== */
|
/* ==================== Initialization and Preparation ===================== */
|
||||||
@@ -118,17 +131,15 @@ Search::Search (KeywordExt_List *list)
|
|||||||
void
|
void
|
||||||
Search::prepare ()
|
Search::prepare ()
|
||||||
{
|
{
|
||||||
KeywordExt_List *temp;
|
|
||||||
|
|
||||||
/* Compute the total number of keywords. */
|
/* Compute the total number of keywords. */
|
||||||
_total_keys = 0;
|
_total_keys = 0;
|
||||||
for (temp = _head; temp; temp = temp->rest())
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
_total_keys++;
|
_total_keys++;
|
||||||
|
|
||||||
/* Compute the minimum and maximum keyword length. */
|
/* Compute the minimum and maximum keyword length. */
|
||||||
_max_key_len = INT_MIN;
|
_max_key_len = INT_MIN;
|
||||||
_min_key_len = INT_MAX;
|
_min_key_len = INT_MAX;
|
||||||
for (temp = _head; temp; temp = temp->rest())
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
{
|
{
|
||||||
KeywordExt *keyword = temp->first();
|
KeywordExt *keyword = temp->first();
|
||||||
|
|
||||||
@@ -142,16 +153,16 @@ Search::prepare ()
|
|||||||
expressions don't work correctly for looking up an empty string. */
|
expressions don't work correctly for looking up an empty string. */
|
||||||
if (_min_key_len == 0)
|
if (_min_key_len == 0)
|
||||||
{
|
{
|
||||||
fprintf (stderr, "Empty input key is not allowed.\n"
|
fprintf (stderr, "Empty input keyword is not allowed.\n"
|
||||||
"To recognize an empty input key, your code should check for\n"
|
"To recognize an empty input keyword, your code should check for\n"
|
||||||
"len == 0 before calling the gperf generated lookup function.\n");
|
"len == 0 before calling the gperf generated lookup function.\n");
|
||||||
exit (1);
|
exit (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Exit program if the characters in the keywords are not in the required
|
/* Exit program if the characters in the keywords are not in the required
|
||||||
range. */
|
range. */
|
||||||
if (option[SEVENBIT])
|
if (option[SEVENBIT])
|
||||||
for (temp = _head; temp; temp = temp->rest())
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
{
|
{
|
||||||
KeywordExt *keyword = temp->first();
|
KeywordExt *keyword = temp->first();
|
||||||
|
|
||||||
@@ -185,6 +196,7 @@ Search::compute_alpha_unify () const
|
|||||||
{
|
{
|
||||||
if (option[UPPERLOWER])
|
if (option[UPPERLOWER])
|
||||||
{
|
{
|
||||||
|
/* Uppercase to lowercase mapping. */
|
||||||
unsigned int alpha_size = compute_alpha_size();
|
unsigned int alpha_size = compute_alpha_size();
|
||||||
unsigned int *alpha_unify = new unsigned int[alpha_size];
|
unsigned int *alpha_unify = new unsigned int[alpha_size];
|
||||||
for (unsigned int c = 0; c < alpha_size; c++)
|
for (unsigned int c = 0; c < alpha_size; c++)
|
||||||
@@ -194,15 +206,16 @@ Search::compute_alpha_unify () const
|
|||||||
return alpha_unify;
|
return alpha_unify;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
/* Identity mapping. */
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initializes each keyword's _selchars array. */
|
/* Initializes each keyword's _selchars array. */
|
||||||
void
|
void
|
||||||
Search::init_selchars_tuple (const Positions& positions) const
|
Search::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const
|
||||||
{
|
{
|
||||||
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
|
||||||
temp->first()->init_selchars_tuple(positions, _alpha_unify);
|
temp->first()->init_selchars_tuple(positions, alpha_unify);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Deletes each keyword's _selchars array. */
|
/* Deletes each keyword's _selchars array. */
|
||||||
@@ -218,12 +231,12 @@ Search::delete_selchars () const
|
|||||||
# K - # proj1 (K)
|
# K - # proj1 (K)
|
||||||
where K is the multiset of given keywords. */
|
where K is the multiset of given keywords. */
|
||||||
unsigned int
|
unsigned int
|
||||||
Search::count_duplicates_tuple (const Positions& positions) const
|
Search::count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const
|
||||||
{
|
{
|
||||||
/* Run through the keyword list and count the duplicates incrementally.
|
/* Run through the keyword list and count the duplicates incrementally.
|
||||||
The result does not depend on the order of the keyword list, thanks to
|
The result does not depend on the order of the keyword list, thanks to
|
||||||
the formula above. */
|
the formula above. */
|
||||||
init_selchars_tuple (positions);
|
init_selchars_tuple (positions, alpha_unify);
|
||||||
|
|
||||||
unsigned int count = 0;
|
unsigned int count = 0;
|
||||||
{
|
{
|
||||||
@@ -253,8 +266,8 @@ Search::find_positions ()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute preliminary value for _alpha_unify. */
|
/* Compute preliminary alpha_unify table. */
|
||||||
_alpha_unify = compute_alpha_unify ();
|
unsigned int *alpha_unify = compute_alpha_unify ();
|
||||||
|
|
||||||
/* 1. Find positions that must occur in order to distinguish duplicates. */
|
/* 1. Find positions that must occur in order to distinguish duplicates. */
|
||||||
Positions mandatory;
|
Positions mandatory;
|
||||||
@@ -322,7 +335,8 @@ Search::find_positions ()
|
|||||||
int imax = (_max_key_len - 1 < Positions::MAX_KEY_POS - 1
|
int imax = (_max_key_len - 1 < Positions::MAX_KEY_POS - 1
|
||||||
? _max_key_len - 1 : Positions::MAX_KEY_POS - 1);
|
? _max_key_len - 1 : Positions::MAX_KEY_POS - 1);
|
||||||
Positions current = mandatory;
|
Positions current = mandatory;
|
||||||
unsigned int current_duplicates_count = count_duplicates_tuple (current);
|
unsigned int current_duplicates_count =
|
||||||
|
count_duplicates_tuple (current, alpha_unify);
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
Positions best;
|
Positions best;
|
||||||
@@ -333,7 +347,8 @@ Search::find_positions ()
|
|||||||
{
|
{
|
||||||
Positions tryal = current;
|
Positions tryal = current;
|
||||||
tryal.add (i);
|
tryal.add (i);
|
||||||
unsigned int try_duplicates_count = count_duplicates_tuple (tryal);
|
unsigned int try_duplicates_count =
|
||||||
|
count_duplicates_tuple (tryal, alpha_unify);
|
||||||
|
|
||||||
/* We prefer 'try' to 'best' if it produces less duplicates,
|
/* We prefer 'try' to 'best' if it produces less duplicates,
|
||||||
or if it produces the same number of duplicates but with
|
or if it produces the same number of duplicates but with
|
||||||
@@ -366,7 +381,8 @@ Search::find_positions ()
|
|||||||
{
|
{
|
||||||
Positions tryal = current;
|
Positions tryal = current;
|
||||||
tryal.remove (i);
|
tryal.remove (i);
|
||||||
unsigned int try_duplicates_count = count_duplicates_tuple (tryal);
|
unsigned int try_duplicates_count =
|
||||||
|
count_duplicates_tuple (tryal, alpha_unify);
|
||||||
|
|
||||||
/* We prefer 'try' to 'best' if it produces less duplicates,
|
/* We prefer 'try' to 'best' if it produces less duplicates,
|
||||||
or if it produces the same number of duplicates but with
|
or if it produces the same number of duplicates but with
|
||||||
@@ -406,7 +422,7 @@ Search::find_positions ()
|
|||||||
tryal.remove (i2);
|
tryal.remove (i2);
|
||||||
tryal.add (i3);
|
tryal.add (i3);
|
||||||
unsigned int try_duplicates_count =
|
unsigned int try_duplicates_count =
|
||||||
count_duplicates_tuple (tryal);
|
count_duplicates_tuple (tryal, alpha_unify);
|
||||||
|
|
||||||
/* We prefer 'try' to 'best' if it produces less duplicates,
|
/* We prefer 'try' to 'best' if it produces less duplicates,
|
||||||
or if it produces the same number of duplicates but with
|
or if it produces the same number of duplicates but with
|
||||||
@@ -459,8 +475,21 @@ Search::find_positions ()
|
|||||||
fprintf (stderr, "\n");
|
fprintf (stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Free preliminary value for _alpha_unify. */
|
/* Free preliminary alpha_unify table. */
|
||||||
delete[] _alpha_unify;
|
delete[] alpha_unify;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Count the duplicate keywords that occur with the found set of positions.
|
||||||
|
In other words, it returns the difference
|
||||||
|
# K - # proj1 (K)
|
||||||
|
where K is the multiset of given keywords. */
|
||||||
|
unsigned int
|
||||||
|
Search::count_duplicates_tuple () const
|
||||||
|
{
|
||||||
|
unsigned int *alpha_unify = compute_alpha_unify ();
|
||||||
|
unsigned int count = count_duplicates_tuple (_key_positions, alpha_unify);
|
||||||
|
delete[] alpha_unify;
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ===================== Finding good alpha increments ===================== */
|
/* ===================== Finding good alpha increments ===================== */
|
||||||
@@ -558,9 +587,8 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
|
|||||||
/* Run through the keyword list and count the duplicates incrementally.
|
/* Run through the keyword list and count the duplicates incrementally.
|
||||||
The result does not depend on the order of the keyword list, thanks to
|
The result does not depend on the order of the keyword list, thanks to
|
||||||
the formula above. */
|
the formula above. */
|
||||||
init_selchars_multiset (_key_positions,
|
unsigned int *alpha_unify = compute_alpha_unify (_key_positions, alpha_inc);
|
||||||
compute_alpha_unify (_key_positions, alpha_inc),
|
init_selchars_multiset (_key_positions, alpha_unify, alpha_inc);
|
||||||
alpha_inc);
|
|
||||||
|
|
||||||
unsigned int count = 0;
|
unsigned int count = 0;
|
||||||
{
|
{
|
||||||
@@ -574,6 +602,7 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
delete_selchars ();
|
delete_selchars ();
|
||||||
|
delete[] alpha_unify;
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
@@ -586,9 +615,7 @@ Search::find_alpha_inc ()
|
|||||||
/* The goal is to choose _alpha_inc[] such that it doesn't introduce
|
/* The goal is to choose _alpha_inc[] such that it doesn't introduce
|
||||||
artificial duplicates.
|
artificial duplicates.
|
||||||
In other words, the goal is # proj2 (proj1 (K)) = # proj1 (K). */
|
In other words, the goal is # proj2 (proj1 (K)) = # proj1 (K). */
|
||||||
_alpha_unify = compute_alpha_unify ();
|
unsigned int duplicates_goal = count_duplicates_tuple ();
|
||||||
unsigned int duplicates_goal = count_duplicates_tuple (_key_positions);
|
|
||||||
delete[] _alpha_unify;
|
|
||||||
|
|
||||||
/* Start with zero increments. This is sufficient in most cases. */
|
/* Start with zero increments. This is sufficient in most cases. */
|
||||||
unsigned int *current = new unsigned int [_max_key_len];
|
unsigned int *current = new unsigned int [_max_key_len];
|
||||||
|
|||||||
@@ -50,16 +50,19 @@ private:
|
|||||||
unsigned int * compute_alpha_unify () const;
|
unsigned int * compute_alpha_unify () const;
|
||||||
|
|
||||||
/* Initializes each keyword's _selchars array. */
|
/* Initializes each keyword's _selchars array. */
|
||||||
void init_selchars_tuple (const Positions& positions) const;
|
void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
|
||||||
/* Deletes each keyword's _selchars array. */
|
/* Deletes each keyword's _selchars array. */
|
||||||
void delete_selchars () const;
|
void delete_selchars () const;
|
||||||
|
|
||||||
/* Count the duplicate keywords that occur with a given set of positions. */
|
/* Count the duplicate keywords that occur with a given set of positions. */
|
||||||
unsigned int count_duplicates_tuple (const Positions& positions) const;
|
unsigned int count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
|
||||||
|
|
||||||
/* Find good key positions. */
|
/* Find good key positions. */
|
||||||
void find_positions ();
|
void find_positions ();
|
||||||
|
|
||||||
|
/* Count the duplicate keywords that occur with the found set of positions. */
|
||||||
|
unsigned int count_duplicates_tuple () const;
|
||||||
|
|
||||||
/* Computes the upper bound on the indices passed to asso_values[]. */
|
/* Computes the upper bound on the indices passed to asso_values[]. */
|
||||||
unsigned int compute_alpha_size (const unsigned int *alpha_inc) const;
|
unsigned int compute_alpha_size (const unsigned int *alpha_inc) const;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user