From c0eb5203949ad864470076ec23b6f348639fad2d Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 20 Aug 2000 16:50:54 +0000 Subject: [PATCH] Allow the use of embedded NULs in keys. --- ChangeLog | 44 +++++++++ lib/hash.cc | 33 +++---- lib/hash.h | 18 +--- src/gen-perf.cc | 65 ++++++++----- src/gen-perf.h | 4 +- src/hash-table.cc | 36 ++++--- src/hash-table.h | 7 +- src/key-list.cc | 241 ++++++++++++++++++++++++++++++++++++++-------- src/list-node.cc | 26 ++--- src/list-node.h | 7 +- 10 files changed, 345 insertions(+), 136 deletions(-) diff --git a/ChangeLog b/ChangeLog index ddc7af3..7656098 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,49 @@ 2000-08-20 Bruno Haible + Allow the use of embedded NULs in keys. + * lib/hash.h (hashpjw): Add a length argument. + * lib/hash.cc (hashpjw): Likewise. Don't stop when encountering a NUL + character. + * src/hash-table.h (Hash_Table constructor): Add ignore_len argument. + (Hash_Table::ignore_length): New field. + (Hash_Table::insert): Renamed from Hash_Table::operator(). Remove + ignore_length argument. + * src/hash-table.cc (NIL): Remove macro. + (Hash_Table constructor): Add ignore_len argument. Use it to + initialize ignore_length. + (Hash_Table destructor): Specify explicit length of char_set and + key. + (Hash_Table::insert): Renamed from Hash_Table::operator(). Remove + ignore_length argument. Pass explicit length to hashpjw. Compare + char_set using memcmp, not strcmp. + * src/list-node.h (List_Node): Rename field length to key_length. + New field char_set_length. + (List_Node constructor): Accept key and rest, not the entire line. + * src/list-node.cc (List_Node constructor): Accept key and rest, not + the entire line. Don't NUL terminate key and char_set. Initialize + char_set_length field. + * src/key-list.cc: Include . + (parse_line): New function. + (Key_List::read_keys): Call parse_line instead of new List_Node. + Pass option[NOLENGTH] to Hash_Table constructor, not + Hash_Table::insert. Specify explicit length of key and char_set. + (Key_List::get_occurrence): Use explicit length of char_set. + (Key_List::set_determined): Likewise. + (Key_List::already_determined): Likewise. + (output_string): Add length argument. Output unprintable characters + using octal escape sequence. + (output_keyword_entry): Use explicit length of key. + (Key_List::output_lookup_array): Specify explicit length of key. + (output_switch_case): Likewise. + (Key_List::dump): Likewise. + * src/gen-perf.h (Gen_Perf::compute_disjoint_union): Add two length + arguments. + * src/gen-perf.cc (Gen_Perf::compute_disjoint_union): Likewise. Don't + stop when encountering NUL characters. Don't NUL terminate the + result. + (Gen_Perf::hash): Use explicit length of char_set. + (Gen_Perf::change): Specify explicit length of key. + * doc/help2man: New file, help2man version 1.022. * Makefile.devel (all): Add doc/gperf.1. (doc/gperf.1): New target. diff --git a/lib/hash.cc b/lib/hash.cc index 72a287f..b5bb4ad 100644 --- a/lib/hash.cc +++ b/lib/hash.cc @@ -1,36 +1,27 @@ /* -Copyright (C) 1990 Free Software Foundation +Copyright (C) 1990, 2000 Free Software Foundation written by Doug Lea (dl@rocky.oswego.edu) - -This file is part of the GNU C++ Library. This library is free -software; you can redistribute it and/or modify it under the terms of -the GNU Library General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your -option) any later version. This library is distributed in the hope -that it will be useful, but WITHOUT ANY WARRANTY; without even the -implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -PURPOSE. See the GNU Library General Public License for more details. -You should have received a copy of the GNU Library General Public -License along with this library; if not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include /* - some useful hash functions + Some useful hash function. + It's not a particularly good hash function (<< 5 would be better than << 4), + but people believe in it because it comes from Dragon book. */ -unsigned int hashpjw (const char* x) // From Dragon book, p436 +unsigned int +hashpjw (const char *x, unsigned int len) // From Dragon book, p436 { unsigned int h = 0; unsigned int g; - while (*x != 0) - { - h = (h << 4) + (unsigned char) *x++; - if ((g = h & 0xf0000000) != 0) - h = (h ^ (g >> 24)) ^ g; - } + for (; len > 0; len--) + { + h = (h << 4) + (unsigned char) *x++; + if ((g = h & 0xf0000000) != 0) + h = (h ^ (g >> 24)) ^ g; + } return h; } diff --git a/lib/hash.h b/lib/hash.h index a5f324b..5dbc92b 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -1,27 +1,15 @@ // This may look like C code, but it is really -*- C++ -*- /* -Copyright (C) 1988, 1992 Free Software Foundation +Copyright (C) 1988, 1992, 2000 Free Software Foundation written by Doug Lea (dl@rocky.oswego.edu) - -This file is part of the GNU C++ Library. This library is free -software; you can redistribute it and/or modify it under the terms of -the GNU Library General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your -option) any later version. This library is distributed in the hope -that it will be useful, but WITHOUT ANY WARRANTY; without even the -implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -PURPOSE. See the GNU Library General Public License for more details. -You should have received a copy of the GNU Library General Public -License along with this library; if not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef _hash_h #define _hash_h 1 -/* a hash function for null-terminated char* strings using the +/* a hash function for char[] arrays using the method described in Aho, Sethi, & Ullman, p 436. */ -extern unsigned int hashpjw (const char*); +extern unsigned int hashpjw (const char *string, unsigned int len); #endif diff --git a/src/gen-perf.cc b/src/gen-perf.cc index 0d0ad41..0b5109d 100644 --- a/src/gen-perf.cc +++ b/src/gen-perf.cc @@ -1,6 +1,6 @@ /* Provides high-level routines to manipulate the keywork list structures the code generation output. - Copyright (C) 1989-1998 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. written by Douglas C. Schmidt (schmidt@ics.uci.edu) This file is part of GNU GPERF. @@ -81,37 +81,45 @@ Gen_Perf::Gen_Perf (void) } /* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets. - (In a multiset, an element can occur multiple times). + (In a multiset, an element can occur multiple times.) Precondition: both set_1 and set_2 must be ordered. Returns the length of the combined set. */ inline int -Gen_Perf::compute_disjoint_union (const char *set_1, const char *set_2, char *set_3) +Gen_Perf::compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3) { T (Trace t ("Gen_Perf::compute_disjoint_union");) char *base = set_3; - while (*set_1 && *set_2) + while (size_1 > 0 && size_2 > 0) if (*set_1 == *set_2) - set_1++, set_2++; + set_1++, size_1--, set_2++, size_2--; else { - *set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++; - if (set_3 == base || *set_3 != *(set_3-1)) set_3++; + char next; + if (*set_1 < *set_2) + next = *set_1++, size_1--; + else + next = *set_2++, size_2--; + if (set_3 == base || next != set_3[-1]) + *set_3++ = next; } - while (*set_1) + while (size_1 > 0) { - *set_3 = *set_1++; - if (set_3 == base || *set_3 != *(set_3-1)) set_3++; + char next; + next = *set_1++, size_1--; + if (set_3 == base || next != set_3[-1]) + *set_3++ = next; } - while (*set_2) + while (size_2 > 0) { - *set_3 = *set_2++; - if (set_3 == base || *set_3 != *(set_3-1)) set_3++; + char next; + next = *set_2++, size_2--; + if (set_3 == base || next != set_3[-1]) + *set_3++ = next; } - *set_3 = '\0'; return set_3 - base; } @@ -146,10 +154,12 @@ inline int Gen_Perf::hash (List_Node *key_node) { T (Trace t ("Gen_Perf::hash");) - int sum = option[NOLENGTH] ? 0 : key_node->length; + int sum = option[NOLENGTH] ? 0 : key_node->key_length; - for (const char *ptr = key_node->char_set; *ptr; ptr++) - sum += asso_values[(unsigned char)(*ptr)]; + const char *p = key_node->char_set; + int i = key_node->char_set_length; + for (; i > 0; p++, i--) + sum += asso_values[(unsigned char)(*p)]; return key_node->hash_value = sum; } @@ -209,28 +219,35 @@ Gen_Perf::change (List_Node *prior, List_Node *curr) { T (Trace t ("Gen_Perf::change");) static char *union_set; + int union_set_length; if (!union_set) - union_set = new char [2 * option.get_max_keysig_size () + 1]; + union_set = new char [2 * option.get_max_keysig_size ()]; if (option[DEBUG]) { - fprintf (stderr, "collision on keyword #%d, prior = \"%s\", curr = \"%s\" hash = %d\n", - num_done, prior->key, curr->key, curr->hash_value); + fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n", + num_done, + prior->key_length, prior->key, + curr->key_length, curr->key, + curr->hash_value); fflush (stderr); } - sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set)); + union_set_length = compute_disjoint_union (prior->char_set, prior->char_set_length, curr->char_set, curr->char_set_length, union_set); + sort_set (union_set, union_set_length); /* Try changing some values, if change doesn't alter other values continue normal action. */ fewest_collisions++; - for (char *temp = union_set; *temp; temp++) - if (!affects_prev (*temp, curr)) + const char *p = union_set; + int i = union_set_length; + for (; i > 0; p++, i--) + if (!affects_prev (*p, curr)) { if (option[DEBUG]) { fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n", - *temp, temp - union_set + 1, asso_values[(unsigned char)(*temp)]); + *p, p - union_set + 1, asso_values[(unsigned char)(*p)]); fflush (stderr); } return; /* Good, doesn't affect previous hash values, we'll take it. */ diff --git a/src/gen-perf.h b/src/gen-perf.h index 09b8727..602d160 100644 --- a/src/gen-perf.h +++ b/src/gen-perf.h @@ -3,7 +3,7 @@ /* Provides high-level routines to manipulate the keyword list structures the code generation output. - Copyright (C) 1989-1998 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. written by Douglas C. Schmidt (schmidt@ics.uci.edu) This file is part of GNU GPERF. @@ -38,7 +38,7 @@ private: void change (List_Node *prior, List_Node *curr); int affects_prev (char c, List_Node *curr); static int hash (List_Node *key_node); - static int compute_disjoint_union (const char *set_1, const char *set_2, char *set_3); + static int compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3); static void sort_set (char *union_set, int len); public: diff --git a/src/hash-table.cc b/src/hash-table.cc index db8d6dd..a147674 100644 --- a/src/hash-table.cc +++ b/src/hash-table.cc @@ -1,5 +1,5 @@ /* Hash table for checking keyword links. Implemented using double hashing. - Copyright (C) 1989-1998 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. written by Douglas C. Schmidt (schmidt@ics.uci.edu) This file is part of GNU GPERF. @@ -26,8 +26,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ #include "options.h" #include "trace.h" -#define NIL(TYPE) (TYPE *)0 - /* The size of the hash table is always the smallest power of 2 >= the size indicated by the user. This allows several optimizations, including the use of double hashing and elimination of the mod instruction. @@ -37,8 +35,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ This compromises information hiding somewhat, but greatly reduces memory fragmentation, since we can now use alloca! */ -Hash_Table::Hash_Table (List_Node **table_ptr, int s): - table (table_ptr), size (s), collisions (0) +Hash_Table::Hash_Table (List_Node **table_ptr, int s, int ignore_len): + table (table_ptr), size (s), collisions (0), ignore_length (ignore_len) { T (Trace t ("Hash_Table::Hash_Table");) memset ((char *) table, 0, size * sizeof (*table)); @@ -60,8 +58,10 @@ Hash_Table::~Hash_Table (void) for (int i = size - 1; i >= 0; i--) if (table[i]) - fprintf (stderr, "%8d, %*s, %s\n", - i, field_width, table[i]->char_set, table[i]->key); + fprintf (stderr, "%8d, %*.*s, %.*s\n", + i, + field_width, table[i]->char_set_length, table[i]->char_set, + table[i]->key_length, table[i]->key); fprintf (stderr, "\nend dumping hash table\n\n"); } @@ -72,20 +72,24 @@ Hash_Table::~Hash_Table (void) Uses double hashing. */ List_Node * -Hash_Table::operator() (List_Node *item, int ignore_length) +Hash_Table::insert (List_Node *item) { T (Trace t ("Hash_Table::operator()");) - unsigned hash_val = hashpjw (item->char_set); - int probe = hash_val & size - 1; - int increment = (hash_val ^ item->length | 1) & size - 1; + unsigned hash_val = hashpjw (item->char_set, item->char_set_length); + int probe = hash_val & (size - 1); + int increment = ((hash_val ^ item->key_length) | 1) & (size - 1); - while (table[probe] - && (strcmp (table[probe]->char_set, item->char_set) - || (!ignore_length && table[probe]->length != item->length))) + while (table[probe]) { + if (table[probe]->char_set_length == item->char_set_length + && memcmp (table[probe]->char_set, item->char_set, item->char_set_length) == 0 + && (ignore_length || table[probe]->key_length == item->key_length)) + return table[probe]; + collisions++; - probe = probe + increment & size - 1; + probe = (probe + increment) & (size - 1); } - return table[probe] ? table[probe] : (table[probe] = item, NIL (List_Node)); + table[probe] = item; + return (List_Node *) 0; } diff --git a/src/hash-table.h b/src/hash-table.h index 7d23999..86438d0 100644 --- a/src/hash-table.h +++ b/src/hash-table.h @@ -2,7 +2,7 @@ /* Hash table used to check for duplicate keyword entries. - Copyright (C) 1989-1998 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. written by Douglas C. Schmidt (schmidt@ics.uci.edu) This file is part of GNU GPERF. @@ -32,11 +32,12 @@ private: List_Node **table; /* Vector of pointers to linked lists of List_Node's. */ int size; /* Size of the vector. */ int collisions; /* Find out how well our double hashing is working! */ + int ignore_length; public: - Hash_Table (List_Node **t, int s); + Hash_Table (List_Node **t, int s, int ignore_len); ~Hash_Table (void); - List_Node *operator () (List_Node *item, int ignore_length); + List_Node *insert (List_Node *item); }; #endif diff --git a/src/key-list.cc b/src/key-list.cc index 27a2ff8..157458c 100644 --- a/src/key-list.cc +++ b/src/key-list.cc @@ -21,6 +21,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ #include #include /* declares strncpy(), strchr() */ #include /* declares malloc(), free(), abs(), exit(), abort() */ +#include /* declares isprint() */ #include /* defines assert() */ #include /* defines SCHAR_MAX etc. */ #include "options.h" @@ -209,6 +210,148 @@ Key_List::set_output_types (void) } } +/* Extracts a key from an input line and creates a new List_Node for it. */ + +static List_Node * +parse_line (char *line, const char *delimiters) +{ + if (*line == '"') + { + /* Parse a string in ANSI C syntax. */ + char *key = new char[strlen(line)]; + char *kp = key; + const char *lp = line + 1; + + for (; *lp;) + { + char c = *lp; + + if (c == '\0') + { + fprintf (stderr, "unterminated string: %s\n", line); + exit (1); + } + else if (c == '\\') + { + c = *++lp; + switch (c) + { + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + { + int code = 0; + int count = 0; + while (count < 3 && *lp >= '0' && *lp <= '7') + { + code = (code << 3) + (*lp - '0'); + lp++; + count++; + } + if (code > UCHAR_MAX) + fprintf (stderr, "octal escape out of range: %s\n", line); + *kp = (char) code; + break; + } + case 'x': + { + int code = 0; + int count = 0; + lp++; + while ((*lp >= '0' && *lp <= '9') + || (*lp >= 'A' && *lp <= 'F') + || (*lp >= 'a' && *lp <= 'f')) + { + code = (code << 4) + + (*lp >= 'A' && *lp <= 'F' ? *lp - 'A' + 10 : + *lp >= 'a' && *lp <= 'f' ? *lp - 'a' + 10 : + *lp - '0'); + lp++; + count++; + } + if (count == 0) + fprintf (stderr, "hexadecimal escape without any hex digits: %s\n", line); + if (code > UCHAR_MAX) + fprintf (stderr, "hexadecimal escape out of range: %s\n", line); + *kp = (char) code; + break; + } + case '\\': case '\'': case '"': + *kp = c; + lp++; + break; + case 'n': + *kp = '\n'; + lp++; + break; + case 't': + *kp = '\t'; + lp++; + break; + case 'r': + *kp = '\r'; + lp++; + break; + case 'f': + *kp = '\f'; + lp++; + break; + case 'b': + *kp = '\b'; + lp++; + break; + case 'a': + *kp = '\a'; + lp++; + break; + case 'v': + *kp = '\v'; + lp++; + break; + default: + fprintf (stderr, "invalid escape sequence in string: %s\n", line); + exit (1); + } + } + else if (c == '"') + break; + else + { + *kp = c; + lp++; + } + kp++; + } + lp++; + if (*lp != '\0') + { + if (strchr (delimiters, *lp) == NULL) + { + fprintf (stderr, "string not followed by delimiter: %s\n", line); + exit (1); + } + lp++; + } + return new List_Node (key, kp - key, option[TYPE] ? lp : ""); + } + else + { + /* Not a string. Look for the delimiter. */ + int len = strcspn (line, delimiters); + const char *rest; + + if (line[len] == '\0') + rest = ""; + else + { + /* Quick hack to separate the key from the rest, killing the first + delimiter. */ + line[len] = '\0'; + rest = &line[len + 1]; + } + return new List_Node (line, len, option[TYPE] ? rest : ""); + } +} + /* Reads in all keys from standard input and creates a linked list pointed to by Head. This list is then quickly checked for ``links,'' i.e., unhashable elements possessing identical key sets and lengths. */ @@ -235,13 +378,13 @@ Key_List::read_keys (void) const char *delimiter = option.get_delimiter (); List_Node *temp, *trail = 0; - head = new List_Node (ptr, strcspn (ptr, delimiter)); + head = parse_line (ptr, delimiter); for (temp = head; (ptr = Read_Line::get_line ()) && strcmp (ptr, "%%"); temp = temp->next) { - temp->next = new List_Node (ptr, strcspn (ptr, delimiter)); + temp->next = parse_line (ptr, delimiter); total_keys++; } @@ -266,14 +409,14 @@ Key_List::read_keys (void) #endif /* Make large hash table for efficiency. */ - Hash_Table found_link (table, table_size); + Hash_Table found_link (table, table_size, option[NOLENGTH]); /* Test whether there are any links and also set the maximum length of an identifier in the keyword list. */ for (temp = head; temp; temp = temp->next) { - List_Node *ptr = found_link (temp, option[NOLENGTH]); + List_Node *ptr = found_link.insert (temp); /* Check for links. We deal with these by building an equivalence class of all duplicate values (i.e., links) so that only 1 keyword is @@ -290,17 +433,19 @@ Key_List::read_keys (void) /* Complain if user hasn't enabled the duplicate option. */ if (!option[DUP] || option[DEBUG]) - fprintf (stderr, "Key link: \"%s\" = \"%s\", with key set \"%s\".\n", - temp->key, ptr->key, temp->char_set); + fprintf (stderr, "Key link: \"%.*s\" = \"%.*s\", with key set \"%.*s\".\n", + temp->key_length, temp->key, + ptr->key_length, ptr->key, + temp->char_set_length, temp->char_set); } else trail = temp; /* Update minimum and maximum keyword length, if needed. */ - if (max_key_len < temp->length) - max_key_len = temp->length; - if (min_key_len > temp->length) - min_key_len = temp->length; + if (max_key_len < temp->key_length) + max_key_len = temp->key_length; + if (min_key_len > temp->key_length) + min_key_len = temp->key_length; } #if !LARGE_STACK_ARRAYS @@ -407,8 +552,10 @@ Key_List::get_occurrence (List_Node *ptr) T (Trace t ("Key_List::get_occurrence");) int value = 0; - for (const char *temp = ptr->char_set; *temp; temp++) - value += occurrences[(unsigned char)(*temp)]; + const char *p = ptr->char_set; + unsigned int i = ptr->char_set_length; + for (; i > 0; p++, i--) + value += occurrences[(unsigned char)(*p)]; return value; } @@ -420,8 +567,11 @@ inline void Key_List::set_determined (List_Node *ptr) { T (Trace t ("Key_List::set_determined");) - for (const char *temp = ptr->char_set; *temp; temp++) - determined[(unsigned char)(*temp)] = 1; + + const char *p = ptr->char_set; + unsigned int i = ptr->char_set_length; + for (; i > 0; p++, i--) + determined[(unsigned char)(*p)] = 1; } /* Returns TRUE if PTR's key set is already completely determined. */ @@ -432,8 +582,10 @@ Key_List::already_determined (List_Node *ptr) T (Trace t ("Key_List::already_determined");) int is_determined = 1; - for (const char *temp = ptr->char_set; is_determined && *temp; temp++) - is_determined = determined[(unsigned char)(*temp)]; + const char *p = ptr->char_set; + unsigned int i = ptr->char_set_length; + for (; is_determined && i > 0; p++, i--) + is_determined = determined[(unsigned char)(*p)]; return is_determined; } @@ -660,20 +812,30 @@ Key_List::output_constants (struct Output_Constants& style) /* ------------------------------------------------------------------------- */ /* Outputs a keyword, as a string: enclosed in double quotes, escaping - backslashes and double quote characters. */ + backslashes, double quote and unprintable characters. */ static void -output_string (const char *key) +output_string (const char *key, int len) { T (Trace t ("output_string");) - char c; putchar ('"'); - while (c = *key++, c != '\0') + for (; len > 0; len--) { - if (c == '"' || c == '\\') - putchar ('\\'); - putchar (c); + unsigned char c = (unsigned char) *key++; + if (isprint (c)) + { + if (c == '"' || c == '\\') + putchar ('\\'); + putchar (c); + } + else + { + putchar ('\\'); + putchar ('0' + ((c >> 6) & 7)); + putchar ('0' + ((c >> 3) & 7)); + putchar ('0' + (c & 7)); + } } putchar ('"'); } @@ -1054,7 +1216,7 @@ Key_List::output_keylength_table (void) printf (","); if ((column++ % columns) == 0) printf("\n%s ", indent); - printf ("%3d", temp->length); + printf ("%3d", temp->key_length); /* Deal with links specially. */ if (temp->link) // implies option[DUP] @@ -1064,7 +1226,7 @@ Key_List::output_keylength_table (void) printf (","); if ((column++ % columns) == 0) printf("\n%s ", indent); - printf ("%3d", links->length); + printf ("%3d", links->key_length); } index++; @@ -1083,7 +1245,7 @@ output_keyword_entry (List_Node *temp, const char *indent) printf ("%s ", indent); if (option[TYPE]) printf ("{"); - output_string (temp->key); + output_string (temp->key, temp->key_length); if (option[TYPE]) { if (strlen (temp->rest) > 0) @@ -1238,8 +1400,8 @@ Key_List::output_lookup_array (void) int hash_value = temp->hash_value; lookup_array[hash_value] = temp->index; if (option[DEBUG]) - fprintf (stderr, "keyword = %s, index = %d\n", - temp->key, temp->index); + fprintf (stderr, "keyword = %.*s, index = %d\n", + temp->key_length, temp->key, temp->index); if (temp->link || (temp->next && hash_value == temp->next->hash_value)) { @@ -1255,8 +1417,8 @@ Key_List::output_lookup_array (void) dup_ptr->count++; if (option[DEBUG]) fprintf (stderr, - "static linked keyword = %s, index = %d\n", - ptr->key, ptr->index); + "static linked keyword = %.*s, index = %d\n", + ptr->key_length, ptr->key, ptr->index); } if (!(temp->next && hash_value == temp->next->hash_value)) @@ -1266,8 +1428,8 @@ Key_List::output_lookup_array (void) dup_ptr->count++; if (option[DEBUG]) - fprintf (stderr, "dynamic linked keyword = %s, index = %d\n", - temp->key, temp->index); + fprintf (stderr, "dynamic linked keyword = %.*s, index = %d\n", + temp->key_length, temp->key, temp->index); } assert (dup_ptr->count >= 2); dup_ptr++; @@ -1404,8 +1566,8 @@ output_switch_case (List_Node *list, int indent, int *jumps_away) T (Trace t ("output_switch_case");) if (option[DEBUG]) - printf ("%*s/* hash value = %4d, keyword = \"%s\" */\n", - indent, "", list->hash_value, list->key); + printf ("%*s/* hash value = %4d, keyword = \"%.*s\" */\n", + indent, "", list->hash_value, list->key_length, list->key); if (option[DUP] && (list->link @@ -1438,7 +1600,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away) { printf ("%*sif (len == %d)\n" "%*s {\n", - indent, "", list->length, + indent, "", list->key_length, indent, ""); indent += 4; } @@ -1447,7 +1609,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away) if (option[TYPE]) printf ("&%s[%d]", option.get_wordlist_name (), list->index); else - output_string (list->key); + output_string (list->key, list->key_length); printf (";\n"); printf ("%*sgoto compare;\n", indent, ""); @@ -1980,9 +2142,10 @@ Key_List::dump () field_width, "char_set"); for (List_Node *ptr = head; ptr; ptr = ptr->next) - fprintf (stderr, "%11d,%11d,%6d, %*s, %s\n", - ptr->hash_value, ptr->length, ptr->index, - field_width, ptr->char_set, ptr->key); + fprintf (stderr, "%11d,%11d,%6d, %*.*s, %.*s\n", + ptr->hash_value, ptr->key_length, ptr->index, + field_width, ptr->char_set_length, ptr->char_set, + ptr->key_length, ptr->key); } /* Simple-minded constructor action here... */ diff --git a/src/list-node.cc b/src/list-node.cc index 6c78889..1b6e142 100644 --- a/src/list-node.cc +++ b/src/list-node.cc @@ -1,5 +1,5 @@ /* Creates and initializes a new list node. - Copyright (C) 1989-1998 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. written by Douglas C. Schmidt (schmidt@ics.uci.edu) This file is part of GNU GPERF. @@ -57,23 +57,21 @@ List_Node::set_sort (char *base, int len) of the total number of keys seen so far. This is used to initialize the INDEX field to some useful value. */ -List_Node::List_Node (char *k, int len): link (0), next (0), - key (k), rest (option[TYPE] ? k + len + 1 : ""), length (len), index (0) +List_Node::List_Node (const char *k, int len, const char *r): + link (0), next (0), key (k), key_length (len), rest (r), index (0) { T (Trace t ("List_Node::List_Node");) - char *ptr = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ()) + 1]; - char *key_set = ptr; - k[len] = '\0'; /* Null terminate KEY to separate it from REST. */ + char *key_set = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ())]; + char *ptr = key_set; + int i; - if (option[ALLCHARS]) /* Use all the character position in the KEY. */ - for (; *k; k++, ptr++) + if (option[ALLCHARS]) /* Use all the character positions in the KEY. */ + for (i = len; i > 0; k++, ptr++, i--) ++occurrences[(unsigned char)(*ptr = *k)]; else /* Only use those character positions specified by the user. */ { - int i; - - /* Iterate thru the list of key_positions, initializing occurrences table - and char_set (via char * pointer ptr). */ + /* Iterate through the list of key_positions, initializing occurrences table + and char_set (via char * pointer ptr). */ for (option.reset (); (i = option.get ()) != EOS; ) { @@ -94,8 +92,10 @@ List_Node::List_Node (char *k, int len): link (0), next (0), exit (1); } } - *ptr = '\0'; /* Terminate this bastard.... */ + /* Sort the KEY_SET items alphabetically. */ set_sort (key_set, ptr - key_set); + char_set = key_set; + char_set_length = ptr - key_set; } diff --git a/src/list-node.h b/src/list-node.h index 630d91a..3bd21b3 100644 --- a/src/list-node.h +++ b/src/list-node.h @@ -2,7 +2,7 @@ /* Data and function members for defining values and operations of a list node. - Copyright (C) 1989-1998 Free Software Foundation, Inc. + Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. written by Douglas C. Schmidt (schmidt@ics.uci.edu) This file is part of GNU GPERF. @@ -31,14 +31,15 @@ struct List_Node : private Vectors List_Node *link; /* TRUE if key has an identical KEY_SET as another key. */ List_Node *next; /* Points to next element on the list. */ const char *key; /* Each keyword string stored here. */ + int key_length; /* Length of the key. */ const char *rest; /* Additional information for building hash function. */ const char *char_set; /* Set of characters to hash, specified by user. */ - int length; /* Length of the key. */ + int char_set_length; /* Length of char_set. */ int hash_value; /* Hash value for the key. */ int occurrence; /* A metric for frequency of key set occurrences. */ int index; /* Position of this node relative to other nodes. */ - List_Node (char *key, int len); + List_Node (const char *key, int len, const char *rest); static void set_sort (char *base, int len); };