1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Move the handling of ALLCHARS (-k'*') into the Positions class.

This commit is contained in:
Bruno Haible
2003-04-07 09:50:11 +00:00
parent ec3d1127fa
commit 2535f34494
14 changed files with 398 additions and 241 deletions

View File

@@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len)
}
/* Initializes selchars and selchars_length.
The hash function will be computed as
asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
We compute selchars as the multiset
{ allchars[key_pos[0]], allchars[key_pos[1]], ... }
so that the hash function becomes
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
General idea:
The hash function will be computed as
asso_values[allchars[key_pos[0]]] +
asso_values[allchars[key_pos[1]]] + ...
We compute selchars as the multiset
{ allchars[key_pos[0]], allchars[key_pos[1]], ... }
so that the hash function becomes
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
Furthermore we sort the selchars array, to ease detection of duplicates
later.
More in detail: The arguments alpha_unify (used for case-insensitive
hash functions) and alpha_inc (used to disambiguate permutations)
apply slight modifications. The hash function will be computed as
sum (j=0,1,...: k = key_pos[j]:
asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
+ (allchars_length if !option[NOLENGTH], 0 otherwise).
We compute selchars as the multiset
{ alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
so that the hash function becomes
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
+ (allchars_length if !option[NOLENGTH], 0 otherwise).
*/
unsigned int *
KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{
const char *k = _allchars;
unsigned int *key_set =
new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())];
/* Iterate through the list of positions, initializing selchars
(via ptr). */
PositionIterator iter = positions.iterator(_allchars_length);
unsigned int *key_set = new unsigned int[iter.remaining()];
unsigned int *ptr = key_set;
if (use_all_chars)
/* Use all the character positions in the KEY. */
for (int i = _allchars_length; i > 0; k++, i--)
{
unsigned int c = static_cast<unsigned char>(*k);
if (alpha_inc)
c += alpha_inc[k-_allchars];
if (alpha_unify)
c = alpha_unify[c];
*ptr = c;
ptr++;
}
else
/* Only use those character positions specified by the user. */
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
/* Iterate through the list of key_positions, initializing selchars
(via ptr). */
PositionIterator iter (positions);
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
unsigned int c;
if (i == Positions::LASTCHAR)
/* Special notation for last KEY position, i.e. '$'. */
c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
else if (i < _allchars_length)
{
unsigned int c;
if (i == Positions::LASTCHAR)
/* Special notation for last KEY position, i.e. '$'. */
c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
else if (i < _allchars_length)
{
/* Within range of KEY length, so we'll keep it. */
c = static_cast<unsigned char>(_allchars[i]);
if (alpha_inc)
c += alpha_inc[i];
}
else
/* Out of range of KEY length, so we'll just skip it. */
continue;
if (alpha_unify)
c = alpha_unify[c];
*ptr = c;
ptr++;
/* Within range of KEY length, so we'll keep it. */
c = static_cast<unsigned char>(_allchars[i]);
if (alpha_inc)
c += alpha_inc[i];
}
else
/* Out of range of KEY length, the iterator should not have
produced this. */
abort ();
if (alpha_unify)
c = alpha_unify[c];
*ptr = c;
ptr++;
}
_selchars = key_set;
@@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
}
void
KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify)
KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify)
{
init_selchars_low (use_all_chars, positions, alpha_unify, NULL);
init_selchars_low (positions, alpha_unify, NULL);
}
void
KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{
unsigned int *selchars =
init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc);
init_selchars_low (positions, alpha_unify, alpha_inc);
/* Sort the selchars elements alphabetically. */
sort_char_set (selchars, _selchars_length);