mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Move the handling of ALLCHARS (-k'*') into the Positions class.
This commit is contained in:
100
src/keyword.cc
100
src/keyword.cc
@@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len)
|
||||
}
|
||||
|
||||
/* Initializes selchars and selchars_length.
|
||||
The hash function will be computed as
|
||||
asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
|
||||
We compute selchars as the multiset
|
||||
{ allchars[key_pos[0]], allchars[key_pos[1]], ... }
|
||||
so that the hash function becomes
|
||||
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
|
||||
|
||||
General idea:
|
||||
The hash function will be computed as
|
||||
asso_values[allchars[key_pos[0]]] +
|
||||
asso_values[allchars[key_pos[1]]] + ...
|
||||
We compute selchars as the multiset
|
||||
{ allchars[key_pos[0]], allchars[key_pos[1]], ... }
|
||||
so that the hash function becomes
|
||||
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
|
||||
Furthermore we sort the selchars array, to ease detection of duplicates
|
||||
later.
|
||||
|
||||
More in detail: The arguments alpha_unify (used for case-insensitive
|
||||
hash functions) and alpha_inc (used to disambiguate permutations)
|
||||
apply slight modifications. The hash function will be computed as
|
||||
sum (j=0,1,...: k = key_pos[j]:
|
||||
asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
|
||||
+ (allchars_length if !option[NOLENGTH], 0 otherwise).
|
||||
We compute selchars as the multiset
|
||||
{ alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
|
||||
so that the hash function becomes
|
||||
asso_values[selchars[0]] + asso_values[selchars[1]] + ...
|
||||
+ (allchars_length if !option[NOLENGTH], 0 otherwise).
|
||||
*/
|
||||
|
||||
unsigned int *
|
||||
KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
|
||||
KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
|
||||
{
|
||||
const char *k = _allchars;
|
||||
unsigned int *key_set =
|
||||
new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())];
|
||||
/* Iterate through the list of positions, initializing selchars
|
||||
(via ptr). */
|
||||
PositionIterator iter = positions.iterator(_allchars_length);
|
||||
|
||||
unsigned int *key_set = new unsigned int[iter.remaining()];
|
||||
unsigned int *ptr = key_set;
|
||||
|
||||
if (use_all_chars)
|
||||
/* Use all the character positions in the KEY. */
|
||||
for (int i = _allchars_length; i > 0; k++, i--)
|
||||
{
|
||||
unsigned int c = static_cast<unsigned char>(*k);
|
||||
if (alpha_inc)
|
||||
c += alpha_inc[k-_allchars];
|
||||
if (alpha_unify)
|
||||
c = alpha_unify[c];
|
||||
*ptr = c;
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
/* Only use those character positions specified by the user. */
|
||||
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
|
||||
{
|
||||
/* Iterate through the list of key_positions, initializing selchars
|
||||
(via ptr). */
|
||||
PositionIterator iter (positions);
|
||||
|
||||
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
|
||||
unsigned int c;
|
||||
if (i == Positions::LASTCHAR)
|
||||
/* Special notation for last KEY position, i.e. '$'. */
|
||||
c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
|
||||
else if (i < _allchars_length)
|
||||
{
|
||||
unsigned int c;
|
||||
if (i == Positions::LASTCHAR)
|
||||
/* Special notation for last KEY position, i.e. '$'. */
|
||||
c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
|
||||
else if (i < _allchars_length)
|
||||
{
|
||||
/* Within range of KEY length, so we'll keep it. */
|
||||
c = static_cast<unsigned char>(_allchars[i]);
|
||||
if (alpha_inc)
|
||||
c += alpha_inc[i];
|
||||
}
|
||||
else
|
||||
/* Out of range of KEY length, so we'll just skip it. */
|
||||
continue;
|
||||
if (alpha_unify)
|
||||
c = alpha_unify[c];
|
||||
*ptr = c;
|
||||
ptr++;
|
||||
/* Within range of KEY length, so we'll keep it. */
|
||||
c = static_cast<unsigned char>(_allchars[i]);
|
||||
if (alpha_inc)
|
||||
c += alpha_inc[i];
|
||||
}
|
||||
else
|
||||
/* Out of range of KEY length, the iterator should not have
|
||||
produced this. */
|
||||
abort ();
|
||||
if (alpha_unify)
|
||||
c = alpha_unify[c];
|
||||
*ptr = c;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
_selchars = key_set;
|
||||
@@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
|
||||
}
|
||||
|
||||
void
|
||||
KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify)
|
||||
KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify)
|
||||
{
|
||||
init_selchars_low (use_all_chars, positions, alpha_unify, NULL);
|
||||
init_selchars_low (positions, alpha_unify, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
|
||||
KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
|
||||
{
|
||||
unsigned int *selchars =
|
||||
init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc);
|
||||
init_selchars_low (positions, alpha_unify, alpha_inc);
|
||||
|
||||
/* Sort the selchars elements alphabetically. */
|
||||
sort_char_set (selchars, _selchars_length);
|
||||
|
||||
Reference in New Issue
Block a user