1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Cleanup the alpha_unify handling code.

This commit is contained in:
Bruno Haible
2003-04-11 09:10:58 +00:00
parent f619dea2f5
commit ab6f0966b7
3 changed files with 71 additions and 31 deletions

View File

@@ -1,5 +1,15 @@
2002-12-12 Bruno Haible <bruno@clisp.org>
* src/search.h (Search::init_selchars_tuple,
Search::count_duplicates_tuple): Add alpha_unify argument.
(Search::count_duplicates_tuple): New method declaration.
* src/search.cc (Search::init_selchars_tuple,
Search::count_duplicates_tuple): Add alpha_unify argument.
(Search::find_positions): Update.
(Search::count_duplicates_tuple): New method.
(Search::count_duplicates_multiset): Free temp alpha_unify vector.
(Search::find_alpha_inc): Call count_duplicates_tuple.
* src/configure.in: Add test for stack-allocated variable-size arrays.
* src/config.h.in: Regenerated.
* src/search.cc: Include config.h.

View File

@@ -58,7 +58,7 @@
/* ================================ Theory ================================= */
/* The most general form of the hash function is
/* The general form of the hash function is
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
+ len (keyword)
@@ -75,6 +75,8 @@
are nonnegative integers alpha_inc[i] such that all multisets
{keyword[i] + alpha_inc[i] : i in Pos} are different.
Define selchars[keyword] := {keyword[i] + alpha_inc[i] : i in Pos}.
Theorem 3: If all multisets selchars[keyword] are different, there are
nonnegative integers asso_values[c] such that all hash values
sum (asso_values[c] : c in selchars[keyword]) are different.
@@ -98,14 +100,25 @@
proj1 : String --> Map (Pos --> N)
proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
proj3 : Map (Pos --> N) / S(Pos) --> N
where N denotes the set of nonnegative integers, and S(Pos) is the
symmetric group over Pos.
where
N denotes the set of nonnegative integers,
Map (A --> B) := Hom_Set (A, B) is the set of maps from A to B, and
S(Pos) is the symmetric group over Pos.
This was the theory for option[NOLENGTH]; if !option[NOLENGTH], slight
modifications apply:
proj1 : String --> Map (Pos --> N) x N
proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
proj3 : Map (Pos --> N) / S(Pos) x N --> N
For a case-insensitive hash function, the general form is
hash (keyword) =
sum (asso_values[alpha_unify[keyword[i] + alpha_inc[i]]] : i in Pos)
+ len (keyword)
where alpha_unify[c] is chosen so that an upper/lower case change in
keyword[i] doesn't change alpha_unify[keyword[i] + alpha_inc[i]].
*/
/* ==================== Initialization and Preparation ===================== */
@@ -118,17 +131,15 @@ Search::Search (KeywordExt_List *list)
void
Search::prepare ()
{
KeywordExt_List *temp;
/* Compute the total number of keywords. */
_total_keys = 0;
for (temp = _head; temp; temp = temp->rest())
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
_total_keys++;
/* Compute the minimum and maximum keyword length. */
_max_key_len = INT_MIN;
_min_key_len = INT_MAX;
for (temp = _head; temp; temp = temp->rest())
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
@@ -142,8 +153,8 @@ Search::prepare ()
expressions don't work correctly for looking up an empty string. */
if (_min_key_len == 0)
{
fprintf (stderr, "Empty input key is not allowed.\n"
"To recognize an empty input key, your code should check for\n"
fprintf (stderr, "Empty input keyword is not allowed.\n"
"To recognize an empty input keyword, your code should check for\n"
"len == 0 before calling the gperf generated lookup function.\n");
exit (1);
}
@@ -151,7 +162,7 @@ Search::prepare ()
/* Exit program if the characters in the keywords are not in the required
range. */
if (option[SEVENBIT])
for (temp = _head; temp; temp = temp->rest())
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
@@ -185,6 +196,7 @@ Search::compute_alpha_unify () const
{
if (option[UPPERLOWER])
{
/* Uppercase to lowercase mapping. */
unsigned int alpha_size = compute_alpha_size();
unsigned int *alpha_unify = new unsigned int[alpha_size];
for (unsigned int c = 0; c < alpha_size; c++)
@@ -194,15 +206,16 @@ Search::compute_alpha_unify () const
return alpha_unify;
}
else
/* Identity mapping. */
return NULL;
}
/* Initializes each keyword's _selchars array. */
void
Search::init_selchars_tuple (const Positions& positions) const
Search::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const
{
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
temp->first()->init_selchars_tuple(positions, _alpha_unify);
temp->first()->init_selchars_tuple(positions, alpha_unify);
}
/* Deletes each keyword's _selchars array. */
@@ -218,12 +231,12 @@ Search::delete_selchars () const
# K - # proj1 (K)
where K is the multiset of given keywords. */
unsigned int
Search::count_duplicates_tuple (const Positions& positions) const
Search::count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const
{
/* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to
the formula above. */
init_selchars_tuple (positions);
init_selchars_tuple (positions, alpha_unify);
unsigned int count = 0;
{
@@ -253,8 +266,8 @@ Search::find_positions ()
return;
}
/* Compute preliminary value for _alpha_unify. */
_alpha_unify = compute_alpha_unify ();
/* Compute preliminary alpha_unify table. */
unsigned int *alpha_unify = compute_alpha_unify ();
/* 1. Find positions that must occur in order to distinguish duplicates. */
Positions mandatory;
@@ -322,7 +335,8 @@ Search::find_positions ()
int imax = (_max_key_len - 1 < Positions::MAX_KEY_POS - 1
? _max_key_len - 1 : Positions::MAX_KEY_POS - 1);
Positions current = mandatory;
unsigned int current_duplicates_count = count_duplicates_tuple (current);
unsigned int current_duplicates_count =
count_duplicates_tuple (current, alpha_unify);
for (;;)
{
Positions best;
@@ -333,7 +347,8 @@ Search::find_positions ()
{
Positions tryal = current;
tryal.add (i);
unsigned int try_duplicates_count = count_duplicates_tuple (tryal);
unsigned int try_duplicates_count =
count_duplicates_tuple (tryal, alpha_unify);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
@@ -366,7 +381,8 @@ Search::find_positions ()
{
Positions tryal = current;
tryal.remove (i);
unsigned int try_duplicates_count = count_duplicates_tuple (tryal);
unsigned int try_duplicates_count =
count_duplicates_tuple (tryal, alpha_unify);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
@@ -406,7 +422,7 @@ Search::find_positions ()
tryal.remove (i2);
tryal.add (i3);
unsigned int try_duplicates_count =
count_duplicates_tuple (tryal);
count_duplicates_tuple (tryal, alpha_unify);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
@@ -459,8 +475,21 @@ Search::find_positions ()
fprintf (stderr, "\n");
}
/* Free preliminary value for _alpha_unify. */
delete[] _alpha_unify;
/* Free preliminary alpha_unify table. */
delete[] alpha_unify;
}
/* Count the duplicate keywords that occur with the found set of positions.
In other words, it returns the difference
# K - # proj1 (K)
where K is the multiset of given keywords. */
unsigned int
Search::count_duplicates_tuple () const
{
unsigned int *alpha_unify = compute_alpha_unify ();
unsigned int count = count_duplicates_tuple (_key_positions, alpha_unify);
delete[] alpha_unify;
return count;
}
/* ===================== Finding good alpha increments ===================== */
@@ -558,9 +587,8 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
/* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to
the formula above. */
init_selchars_multiset (_key_positions,
compute_alpha_unify (_key_positions, alpha_inc),
alpha_inc);
unsigned int *alpha_unify = compute_alpha_unify (_key_positions, alpha_inc);
init_selchars_multiset (_key_positions, alpha_unify, alpha_inc);
unsigned int count = 0;
{
@@ -574,6 +602,7 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
}
delete_selchars ();
delete[] alpha_unify;
return count;
}
@@ -586,9 +615,7 @@ Search::find_alpha_inc ()
/* The goal is to choose _alpha_inc[] such that it doesn't introduce
artificial duplicates.
In other words, the goal is # proj2 (proj1 (K)) = # proj1 (K). */
_alpha_unify = compute_alpha_unify ();
unsigned int duplicates_goal = count_duplicates_tuple (_key_positions);
delete[] _alpha_unify;
unsigned int duplicates_goal = count_duplicates_tuple ();
/* Start with zero increments. This is sufficient in most cases. */
unsigned int *current = new unsigned int [_max_key_len];

View File

@@ -50,16 +50,19 @@ private:
unsigned int * compute_alpha_unify () const;
/* Initializes each keyword's _selchars array. */
void init_selchars_tuple (const Positions& positions) const;
void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
/* Deletes each keyword's _selchars array. */
void delete_selchars () const;
/* Count the duplicate keywords that occur with a given set of positions. */
unsigned int count_duplicates_tuple (const Positions& positions) const;
unsigned int count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
/* Find good key positions. */
void find_positions ();
/* Count the duplicate keywords that occur with the found set of positions. */
unsigned int count_duplicates_tuple () const;
/* Computes the upper bound on the indices passed to asso_values[]. */
unsigned int compute_alpha_size (const unsigned int *alpha_inc) const;