From 8797dd362b7ca7fe57e9716120f80943b758710a Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 4 Nov 2002 12:09:45 +0000 Subject: [PATCH] Introduce class KeywordExt. --- ChangeLog | 9 ++++++ src/gen-perf.cc | 6 ++-- src/hash-table.cc | 8 +++--- src/key-list.cc | 72 +++++++++++++++++++++++------------------------ src/keyword.cc | 10 +++++++ src/keyword.h | 28 ++++++++++++++++-- src/list-node.cc | 10 +++---- src/list-node.h | 8 +----- 8 files changed, 93 insertions(+), 58 deletions(-) diff --git a/ChangeLog b/ChangeLog index 279e3af..08a0681 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2002-10-03 Bruno Haible + * src/keyword.h (KeywordExt): New class. + * src/keyword.cc (KeywordExt): New constructor. + * src/list-node.h (List_Node): Inherit from KeywordExt. + * src/list-node.cc: Update. + * src/gen-perf.cc: Update. + * src/hash-table.cc: Update. + * src/key-list.cc: Update. + (output_keyword_entry): Change argument type to KeywordExt*. + * src/keyword.h: New file. * src/keyword.cc: New file. * src/list-node.h (List_Node): Extend Keyword. diff --git a/src/gen-perf.cc b/src/gen-perf.cc index c2285b9..0dd1681 100644 --- a/src/gen-perf.cc +++ b/src/gen-perf.cc @@ -152,8 +152,8 @@ Gen_Perf::hash (List_Node *key_node) { int sum = option[NOLENGTH] ? 0 : key_node->allchars_length; - const char *p = key_node->char_set; - int i = key_node->char_set_length; + const char *p = key_node->selchars; + int i = key_node->selchars_length; for (; i > 0; p++, i--) sum += asso_values[(unsigned char)(*p)]; @@ -227,7 +227,7 @@ Gen_Perf::change (List_Node *prior, List_Node *curr) curr->hash_value); fflush (stderr); } - union_set_length = compute_disjoint_union (prior->char_set, prior->char_set_length, curr->char_set, curr->char_set_length, union_set); + union_set_length = compute_disjoint_union (prior->selchars, prior->selchars_length, curr->selchars, curr->selchars_length, union_set); sort_set (union_set, union_set_length); /* Try changing some values, if change doesn't alter other values continue normal action. */ diff --git a/src/hash-table.cc b/src/hash-table.cc index 67575fe..c37d666 100644 --- a/src/hash-table.cc +++ b/src/hash-table.cc @@ -57,7 +57,7 @@ Hash_Table::~Hash_Table (void) if (table[i]) fprintf (stderr, "%8d, %*.*s, %.*s\n", i, - field_width, table[i]->char_set_length, table[i]->char_set, + field_width, table[i]->selchars_length, table[i]->selchars, table[i]->allchars_length, table[i]->allchars); fprintf (stderr, "\nend dumping hash table\n\n"); @@ -71,14 +71,14 @@ Hash_Table::~Hash_Table (void) List_Node * Hash_Table::insert (List_Node *item) { - unsigned hash_val = hashpjw (item->char_set, item->char_set_length); + unsigned hash_val = hashpjw (item->selchars, item->selchars_length); int probe = hash_val & (size - 1); int increment = ((hash_val ^ item->allchars_length) | 1) & (size - 1); while (table[probe]) { - if (table[probe]->char_set_length == item->char_set_length - && memcmp (table[probe]->char_set, item->char_set, item->char_set_length) == 0 + if (table[probe]->selchars_length == item->selchars_length + && memcmp (table[probe]->selchars, item->selchars, item->selchars_length) == 0 && (ignore_length || table[probe]->allchars_length == item->allchars_length)) return table[probe]; diff --git a/src/key-list.cc b/src/key-list.cc index 1f6046b..23d1206 100644 --- a/src/key-list.cc +++ b/src/key-list.cc @@ -405,15 +405,15 @@ Key_List::read_keys (void) total_duplicates++; list_len--; trail->next = temp->next; - temp->link = ptr->link; - ptr->link = temp; + temp->duplicate_link = ptr->duplicate_link; + ptr->duplicate_link = temp; /* Complain if user hasn't enabled the duplicate option. */ if (!option[DUP] || option[DEBUG]) fprintf (stderr, "Key link: \"%.*s\" = \"%.*s\", with key set \"%.*s\".\n", temp->allchars_length, temp->allchars, ptr->allchars_length, ptr->allchars, - temp->char_set_length, temp->char_set); + temp->selchars_length, temp->selchars); } else trail = temp; @@ -524,8 +524,8 @@ Key_List::get_occurrence (List_Node *ptr) { int value = 0; - const char *p = ptr->char_set; - unsigned int i = ptr->char_set_length; + const char *p = ptr->selchars; + unsigned int i = ptr->selchars_length; for (; i > 0; p++, i--) value += occurrences[(unsigned char)(*p)]; @@ -538,8 +538,8 @@ Key_List::get_occurrence (List_Node *ptr) inline void Key_List::set_determined (List_Node *ptr) { - const char *p = ptr->char_set; - unsigned int i = ptr->char_set_length; + const char *p = ptr->selchars; + unsigned int i = ptr->selchars_length; for (; i > 0; p++, i--) determined[(unsigned char)(*p)] = 1; } @@ -551,8 +551,8 @@ Key_List::already_determined (List_Node *ptr) { int is_determined = 1; - const char *p = ptr->char_set; - unsigned int i = ptr->char_set_length; + const char *p = ptr->selchars; + unsigned int i = ptr->selchars_length; for (; is_determined && i > 0; p++, i--) is_determined = determined[(unsigned char)(*p)]; @@ -1148,7 +1148,7 @@ Key_List::output_keylength_table (void) for (temp = head, index = 0; temp; temp = temp->next) { if (option[SWITCH] && !option[TYPE] - && !(temp->link + && !(temp->duplicate_link || (temp->next && temp->hash_value == temp->next->hash_value))) continue; @@ -1172,8 +1172,8 @@ Key_List::output_keylength_table (void) printf ("%3d", temp->allchars_length); /* Deal with links specially. */ - if (temp->link) // implies option[DUP] - for (List_Node *links = temp->link; links; links = links->link) + if (temp->duplicate_link) // implies option[DUP] + for (KeywordExt *links = temp->duplicate_link; links; links = links->duplicate_link) { ++index; printf (","); @@ -1193,7 +1193,7 @@ Key_List::output_keylength_table (void) /* ------------------------------------------------------------------------- */ static void -output_keyword_entry (List_Node *temp, const char *indent) +output_keyword_entry (KeywordExt *temp, const char *indent) { printf ("%s ", indent); if (option[TYPE]) @@ -1207,7 +1207,7 @@ output_keyword_entry (List_Node *temp, const char *indent) } if (option[DEBUG]) printf (" /* hash value = %d, index = %d */", - temp->hash_value, temp->index); + temp->hash_value, temp->final_index); } static void @@ -1268,7 +1268,7 @@ Key_List::output_keyword_table (void) for (temp = head, index = 0; temp; temp = temp->next) { if (option[SWITCH] && !option[TYPE] - && !(temp->link + && !(temp->duplicate_link || (temp->next && temp->hash_value == temp->next->hash_value))) continue; @@ -1283,15 +1283,15 @@ Key_List::output_keyword_table (void) index = temp->hash_value; } - temp->index = index; + temp->final_index = index; output_keyword_entry (temp, indent); /* Deal with links specially. */ - if (temp->link) // implies option[DUP] - for (List_Node *links = temp->link; links; links = links->link) + if (temp->duplicate_link) // implies option[DUP] + for (KeywordExt *links = temp->duplicate_link; links; links = links->duplicate_link) { - links->index = ++index; + links->final_index = ++index; printf (",\n"); output_keyword_entry (links, indent); } @@ -1339,27 +1339,27 @@ Key_List::output_lookup_array (void) for (List_Node *temp = head; temp; temp = temp->next) { int hash_value = temp->hash_value; - lookup_array[hash_value] = temp->index; + lookup_array[hash_value] = temp->final_index; if (option[DEBUG]) fprintf (stderr, "keyword = %.*s, index = %d\n", - temp->allchars_length, temp->allchars, temp->index); - if (temp->link + temp->allchars_length, temp->allchars, temp->final_index); + if (temp->duplicate_link || (temp->next && hash_value == temp->next->hash_value)) { /* Start a duplicate entry. */ dup_ptr->hash_value = hash_value; - dup_ptr->index = temp->index; - dup_ptr->count = 1; + dup_ptr->index = temp->final_index; + dup_ptr->count = 1; for (;;) { - for (List_Node *ptr = temp->link; ptr; ptr = ptr->link) + for (KeywordExt *ptr = temp->duplicate_link; ptr; ptr = ptr->duplicate_link) { dup_ptr->count++; if (option[DEBUG]) fprintf (stderr, "static linked keyword = %.*s, index = %d\n", - ptr->allchars_length, ptr->allchars, ptr->index); + ptr->allchars_length, ptr->allchars, ptr->final_index); } if (!(temp->next && hash_value == temp->next->hash_value)) @@ -1370,7 +1370,7 @@ Key_List::output_lookup_array (void) dup_ptr->count++; if (option[DEBUG]) fprintf (stderr, "dynamic linked keyword = %.*s, index = %d\n", - temp->allchars_length, temp->allchars, temp->index); + temp->allchars_length, temp->allchars, temp->final_index); } assert (dup_ptr->count >= 2); dup_ptr++; @@ -1404,7 +1404,7 @@ Key_List::output_lookup_array (void) lookup_array_size += 2; found_i: /* Put in an indirection from dup_ptr->hash_value to i. - At i and i+1 store dup_ptr->index and dup_ptr->count. */ + At i and i+1 store dup_ptr->final_index and dup_ptr->count. */ assert (lookup_array[dup_ptr->hash_value] == dup_ptr->index); lookup_array[dup_ptr->hash_value] = - 1 - total_keys - i; lookup_array[i] = - total_keys + dup_ptr->index; @@ -1505,19 +1505,19 @@ output_switch_case (List_Node *list, int indent, int *jumps_away) indent, "", list->hash_value, list->allchars_length, list->allchars); if (option[DUP] - && (list->link + && (list->duplicate_link || (list->next && list->hash_value == list->next->hash_value))) { if (option[LENTABLE]) printf ("%*slengthptr = &lengthtable[%d];\n", - indent, "", list->index); + indent, "", list->final_index); printf ("%*swordptr = &%s[%d];\n", - indent, "", option.get_wordlist_name (), list->index); + indent, "", option.get_wordlist_name (), list->final_index); int count = 0; for (List_Node *temp = list; ; temp = temp->next) { - for (List_Node *links = temp; links; links = links->link) + for (KeywordExt *links = temp; links; links = links->duplicate_link) count++; if (!(temp->next && temp->hash_value == temp->next->hash_value)) break; @@ -1542,7 +1542,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away) printf ("%*sresword = ", indent, ""); if (option[TYPE]) - printf ("&%s[%d]", option.get_wordlist_name (), list->index); + printf ("&%s[%d]", option.get_wordlist_name (), list->final_index); else output_string (list->allchars, list->allchars_length); printf (";\n"); @@ -2064,12 +2064,12 @@ Key_List::dump () int field_width = option.get_max_keysig_size (); fprintf (stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n", - field_width, "char_set"); + field_width, "selchars"); for (List_Node *ptr = head; ptr; ptr = ptr->next) fprintf (stderr, "%11d,%11d,%6d, %*.*s, %.*s\n", - ptr->hash_value, ptr->allchars_length, ptr->index, - field_width, ptr->char_set_length, ptr->char_set, + ptr->hash_value, ptr->allchars_length, ptr->final_index, + field_width, ptr->selchars_length, ptr->selchars, ptr->allchars_length, ptr->allchars); } diff --git a/src/keyword.cc b/src/keyword.cc index 3047dda..564e49d 100644 --- a/src/keyword.cc +++ b/src/keyword.cc @@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License along with GNU GPERF; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ +#include #include "keyword.h" @@ -28,6 +29,15 @@ Keyword::Keyword (const char *s, int s_len, const char *r) { } + +/* KeywordExt class. */ + +KeywordExt::KeywordExt (const char *s, int s_len, const char *r) + : Keyword (s, s_len, r), duplicate_link (NULL), final_index (0) +{ +} + + /* Keyword_Factory class. */ Keyword_Factory::Keyword_Factory () {} diff --git a/src/keyword.h b/src/keyword.h index aa2ebfc..d52dce3 100644 --- a/src/keyword.h +++ b/src/keyword.h @@ -30,7 +30,7 @@ struct Keyword /* Constructor. */ Keyword (const char *allchars, int allchars_length, const char *rest); - /* Data members. */ + /* Data members defined immediately by the input file. */ /* The keyword as a string, possibly containing NUL bytes. */ const char *const allchars; const int allchars_length; @@ -38,6 +38,28 @@ struct Keyword const char *const rest; }; +/* A keyword, in the context of a given keyposition list. */ +struct KeywordExt : public Keyword +{ + /* Constructor. */ + KeywordExt (const char *allchars, int allchars_length, const char *rest); + + /* Data members depending on the keyposition list. */ + /* The selected characters that participate for the hash function, + reordered according to the keyposition list. */ + const char * selchars; + int selchars_length; + /* Chained list of keywords having the same selchars. */ + KeywordExt * duplicate_link; + + /* Data members used by the algorithm. */ + int occurrence; /* A metric for frequency of key set occurrences. */ + int hash_value; /* Hash value for the key. */ + + /* Data members used by the output routines. */ + int final_index; +}; + /* A factory for creating Keyword instances. */ class Keyword_Factory { @@ -45,8 +67,8 @@ public: Keyword_Factory (); virtual ~Keyword_Factory (); /* Creates a new Keyword. */ - virtual Keyword create_keyword (const char *allchars, int allchars_length, - const char *rest) = 0; + virtual Keyword * create_keyword (const char *allchars, int allchars_length, + const char *rest) = 0; }; #endif diff --git a/src/list-node.cc b/src/list-node.cc index 26c3919..7cf9c7f 100644 --- a/src/list-node.cc +++ b/src/list-node.cc @@ -56,7 +56,7 @@ List_Node::set_sort (char *base, int len) the INDEX field to some useful value. */ List_Node::List_Node (const char *k, int len, const char *r): - Keyword (k, len, r), link (0), next (0), index (0) + KeywordExt (k, len, r), next (NULL) { char *key_set = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ())]; char *ptr = key_set; @@ -68,7 +68,7 @@ List_Node::List_Node (const char *k, int len, const char *r): else /* Only use those character positions specified by the user. */ { /* Iterate through the list of key_positions, initializing occurrences table - and char_set (via char * pointer ptr). */ + and selchars (via char * pointer ptr). */ for (option.reset (); (i = option.get ()) != EOS; ) { @@ -83,7 +83,7 @@ List_Node::List_Node (const char *k, int len, const char *r): /* Didn't get any hits and user doesn't want to consider the keylength, so there are essentially no usable hash positions! */ - if (ptr == char_set && option[NOLENGTH]) + if (ptr == selchars && option[NOLENGTH]) { fprintf (stderr, "Can't hash keyword %.*s with chosen key positions.\n", allchars_length, allchars); @@ -94,6 +94,6 @@ List_Node::List_Node (const char *k, int len, const char *r): /* Sort the KEY_SET items alphabetically. */ set_sort (key_set, ptr - key_set); - char_set = key_set; - char_set_length = ptr - key_set; + selchars = key_set; + selchars_length = ptr - key_set; } diff --git a/src/list-node.h b/src/list-node.h index ce5d8a9..375cc2e 100644 --- a/src/list-node.h +++ b/src/list-node.h @@ -27,15 +27,9 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ #include "vectors.h" #include "keyword.h" -struct List_Node : public Keyword, private Vectors +struct List_Node : public KeywordExt, private Vectors { - List_Node *link; /* TRUE if key has an identical KEY_SET as another key. */ List_Node *next; /* Points to next element on the list. */ - const char *char_set; /* Set of characters to hash, specified by user. */ - int char_set_length; /* Length of char_set. */ - int hash_value; /* Hash value for the key. */ - int occurrence; /* A metric for frequency of key set occurrences. */ - int index; /* Position of this node relative to other nodes. */ List_Node (const char *key, int len, const char *rest); static void set_sort (char *base, int len);