mirror of
https://git.savannah.gnu.org/git/gperf.git
synced 2025-12-02 13:09:22 +00:00
Continuing rework.
This commit is contained in:
@@ -1,5 +1,13 @@
|
|||||||
2002-11-03 Bruno Haible <bruno@clisp.org>
|
2002-11-03 Bruno Haible <bruno@clisp.org>
|
||||||
|
|
||||||
|
* src/search.h (Search::compute_hash): Renamed from Search::hash.
|
||||||
|
(Search::compute_disjoint_union): Remove declaration.
|
||||||
|
(Search::sort_by_occurrence): Renamed from Search::sort_set.
|
||||||
|
* src/search.cc (Search::compute_hash): Renamed from Search::hash.
|
||||||
|
(compute_disjoint_union): Renamed from Search::compute_disjoint_union.
|
||||||
|
(Search::sort_by_occurrence): Renamed from Search::sort_set.
|
||||||
|
(Search::change): Simplify loop.
|
||||||
|
|
||||||
* src/search.h (Search::clear_determined): New declaration.
|
* src/search.h (Search::clear_determined): New declaration.
|
||||||
* src/search.cc (Search::clear_determined): New method.
|
* src/search.cc (Search::clear_determined): New method.
|
||||||
(Search::already_determined): Optimize.
|
(Search::already_determined): Optimize.
|
||||||
|
|||||||
@@ -356,7 +356,7 @@ Search::reorder ()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns the length of entire key list. */
|
/* Returns the length of keyword list. */
|
||||||
|
|
||||||
int
|
int
|
||||||
Search::keyword_list_length ()
|
Search::keyword_list_length ()
|
||||||
@@ -364,7 +364,7 @@ Search::keyword_list_length ()
|
|||||||
return _list_len;
|
return _list_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns length of longest key read. */
|
/* Returns the maximum length of keywords. */
|
||||||
|
|
||||||
int
|
int
|
||||||
Search::max_key_length ()
|
Search::max_key_length ()
|
||||||
@@ -372,7 +372,7 @@ Search::max_key_length ()
|
|||||||
return _max_key_len;
|
return _max_key_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns number of key positions. */
|
/* Returns the number of key positions. */
|
||||||
|
|
||||||
int
|
int
|
||||||
Search::get_max_keysig_size ()
|
Search::get_max_keysig_size ()
|
||||||
@@ -380,34 +380,44 @@ Search::get_max_keysig_size ()
|
|||||||
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
|
return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate a key set's hash value. */
|
/* Computes a keyword's hash value, relative to the current _asso_values[],
|
||||||
|
and stores it in keyword->_hash_value.
|
||||||
|
This is called very frequently, and needs to be fast! */
|
||||||
|
|
||||||
inline int
|
inline int
|
||||||
Search::hash (KeywordExt *key_node)
|
Search::compute_hash (KeywordExt *keyword)
|
||||||
{
|
{
|
||||||
int sum = option[NOLENGTH] ? 0 : key_node->_allchars_length;
|
int sum = option[NOLENGTH] ? 0 : keyword->_allchars_length;
|
||||||
|
|
||||||
const unsigned char *p = key_node->_selchars;
|
const unsigned char *p = keyword->_selchars;
|
||||||
int i = key_node->_selchars_length;
|
int i = keyword->_selchars_length;
|
||||||
for (; i > 0; p++, i--)
|
for (; i > 0; p++, i--)
|
||||||
sum += _asso_values[*p];
|
sum += _asso_values[*p];
|
||||||
|
|
||||||
return key_node->_hash_value = sum;
|
return keyword->_hash_value = sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets.
|
/* Computes the disjoint union of two multisets of characters, i.e.
|
||||||
(In a multiset, an element can occur multiple times.)
|
the set of characters that are contained with a different multiplicity
|
||||||
Precondition: both set_1 and set_2 must be ordered. Returns the length
|
in set_1 and set_2. This includes those characters which are contained
|
||||||
of the combined set. */
|
in one of the sets but not both.
|
||||||
|
Both sets set_1[0..size_1-1] and set_2[0..size_2-1] are given ordered.
|
||||||
|
The result, an ordered set (not multiset!) is stored in set_3[0...].
|
||||||
|
Returns the size of the resulting set. */
|
||||||
|
|
||||||
inline int
|
inline int
|
||||||
Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3)
|
compute_disjoint_union (const unsigned char *set_1, int size_1,
|
||||||
|
const unsigned char *set_2, int size_2,
|
||||||
|
unsigned char *set_3)
|
||||||
{
|
{
|
||||||
unsigned char *base = set_3;
|
unsigned char *base = set_3;
|
||||||
|
|
||||||
while (size_1 > 0 && size_2 > 0)
|
while (size_1 > 0 && size_2 > 0)
|
||||||
if (*set_1 == *set_2)
|
if (*set_1 == *set_2)
|
||||||
set_1++, size_1--, set_2++, size_2--;
|
{
|
||||||
|
set_1++, size_1--;
|
||||||
|
set_2++, size_2--;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
unsigned char next;
|
unsigned char next;
|
||||||
@@ -437,27 +447,23 @@ Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const u
|
|||||||
return set_3 - base;
|
return set_3 - base;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sort the UNION_SET in increasing frequency of occurrence.
|
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
||||||
This speeds up later processing since we may assume the resulting
|
|
||||||
set (Set_3, in this case), is ordered. Uses insertion sort, since
|
|
||||||
the UNION_SET is typically short. */
|
|
||||||
|
|
||||||
inline void
|
inline void
|
||||||
Search::sort_set (unsigned char *union_set, int len)
|
Search::sort_by_occurrence (unsigned char *set, int len)
|
||||||
{
|
{
|
||||||
int i, j;
|
/* Use bubble sort, since the set is typically short. */
|
||||||
|
for (int i = 1; i < len; i++)
|
||||||
for (i = 0, j = len - 1; i < j; i++)
|
|
||||||
{
|
{
|
||||||
int curr;
|
int curr;
|
||||||
unsigned char tmp;
|
unsigned char tmp;
|
||||||
|
|
||||||
for (curr = i + 1, tmp = union_set[curr];
|
for (curr = i, tmp = set[curr];
|
||||||
curr > 0 && _occurrences[tmp] < _occurrences[union_set[curr-1]];
|
curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]];
|
||||||
curr--)
|
curr--)
|
||||||
union_set[curr] = union_set[curr - 1];
|
set[curr] = set[curr - 1];
|
||||||
|
|
||||||
union_set[curr] = tmp;
|
set[curr] = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -493,7 +499,7 @@ Search::affects_prev (unsigned char c, KeywordExt *curr)
|
|||||||
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
KeywordExt *keyword = ptr->first();
|
KeywordExt *keyword = ptr->first();
|
||||||
if (_collision_detector->set_bit (hash (keyword))
|
if (_collision_detector->set_bit (compute_hash (keyword))
|
||||||
&& ++collisions >= _fewest_collisions)
|
&& ++collisions >= _fewest_collisions)
|
||||||
break;
|
break;
|
||||||
if (keyword == curr)
|
if (keyword == curr)
|
||||||
@@ -533,7 +539,7 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
|
|||||||
fflush (stderr);
|
fflush (stderr);
|
||||||
}
|
}
|
||||||
union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set);
|
union_set_length = compute_disjoint_union (prior->_selchars, prior->_selchars_length, curr->_selchars, curr->_selchars_length, union_set);
|
||||||
sort_set (union_set, union_set_length);
|
sort_by_occurrence (union_set, union_set_length);
|
||||||
|
|
||||||
/* Try changing some values, if change doesn't alter other values continue normal action. */
|
/* Try changing some values, if change doesn't alter other values continue normal action. */
|
||||||
_fewest_collisions++;
|
_fewest_collisions++;
|
||||||
@@ -555,13 +561,11 @@ Search::change (KeywordExt *prior, KeywordExt *curr)
|
|||||||
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
KeywordExt* keyword = ptr->first();
|
KeywordExt* keyword = ptr->first();
|
||||||
|
compute_hash (keyword);
|
||||||
if (keyword == curr)
|
if (keyword == curr)
|
||||||
break;
|
break;
|
||||||
hash (keyword);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
hash (curr);
|
|
||||||
|
|
||||||
if (option[DEBUG])
|
if (option[DEBUG])
|
||||||
{
|
{
|
||||||
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
|
fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
|
||||||
@@ -628,7 +632,7 @@ Search::optimize ()
|
|||||||
{
|
{
|
||||||
KeywordExt *currkw = curr->first();
|
KeywordExt *currkw = curr->first();
|
||||||
|
|
||||||
hash (currkw);
|
compute_hash (currkw);
|
||||||
|
|
||||||
for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest())
|
for (KeywordExt_List *ptr = _head; ptr != curr; ptr = ptr->rest())
|
||||||
{
|
{
|
||||||
@@ -649,7 +653,7 @@ Search::optimize ()
|
|||||||
|
|
||||||
for (curr = _head; curr; curr = curr->rest())
|
for (curr = _head; curr; curr = curr->rest())
|
||||||
{
|
{
|
||||||
unsigned int hashcode = hash (curr->first());
|
unsigned int hashcode = compute_hash (curr->first());
|
||||||
if (_collision_detector->set_bit (hashcode))
|
if (_collision_detector->set_bit (hashcode))
|
||||||
{
|
{
|
||||||
if (option[DUP]) /* Keep track of this number... */
|
if (option[DUP]) /* Keep track of this number... */
|
||||||
|
|||||||
16
src/search.h
16
src/search.h
@@ -53,12 +53,22 @@ private:
|
|||||||
/* Reorders the keyword list so as to minimize search times. */
|
/* Reorders the keyword list so as to minimize search times. */
|
||||||
void reorder ();
|
void reorder ();
|
||||||
|
|
||||||
|
/* Returns the length of keyword list. */
|
||||||
int keyword_list_length ();
|
int keyword_list_length ();
|
||||||
|
|
||||||
|
/* Returns the maximum length of keywords. */
|
||||||
int max_key_length ();
|
int max_key_length ();
|
||||||
|
|
||||||
|
/* Returns the number of key positions. */
|
||||||
int get_max_keysig_size ();
|
int get_max_keysig_size ();
|
||||||
int hash (KeywordExt *key_node);
|
|
||||||
static int compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3);
|
/* Computes a keyword's hash value, relative to the current _asso_values[],
|
||||||
void sort_set (unsigned char *union_set, int len);
|
and stores it in keyword->_hash_value. */
|
||||||
|
int compute_hash (KeywordExt *key_node);
|
||||||
|
|
||||||
|
/* Sorts the given set in increasing frequency of _occurrences[]. */
|
||||||
|
void sort_by_occurrence (unsigned char *set, int len);
|
||||||
|
|
||||||
bool affects_prev (unsigned char c, KeywordExt *curr);
|
bool affects_prev (unsigned char c, KeywordExt *curr);
|
||||||
void change (KeywordExt *prior, KeywordExt *curr);
|
void change (KeywordExt *prior, KeywordExt *curr);
|
||||||
void sort ();
|
void sort ();
|
||||||
|
|||||||
Reference in New Issue
Block a user