1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 13:09:22 +00:00

Allow the use of embedded NULs in keys.

This commit is contained in:
Bruno Haible
2000-08-20 16:50:54 +00:00
parent cb286153b2
commit c0eb520394
10 changed files with 345 additions and 136 deletions

View File

@@ -1,5 +1,49 @@
2000-08-20 Bruno Haible <bruno@linuix.math.u-bordeaux.fr> 2000-08-20 Bruno Haible <bruno@linuix.math.u-bordeaux.fr>
Allow the use of embedded NULs in keys.
* lib/hash.h (hashpjw): Add a length argument.
* lib/hash.cc (hashpjw): Likewise. Don't stop when encountering a NUL
character.
* src/hash-table.h (Hash_Table constructor): Add ignore_len argument.
(Hash_Table::ignore_length): New field.
(Hash_Table::insert): Renamed from Hash_Table::operator(). Remove
ignore_length argument.
* src/hash-table.cc (NIL): Remove macro.
(Hash_Table constructor): Add ignore_len argument. Use it to
initialize ignore_length.
(Hash_Table destructor): Specify explicit length of char_set and
key.
(Hash_Table::insert): Renamed from Hash_Table::operator(). Remove
ignore_length argument. Pass explicit length to hashpjw. Compare
char_set using memcmp, not strcmp.
* src/list-node.h (List_Node): Rename field length to key_length.
New field char_set_length.
(List_Node constructor): Accept key and rest, not the entire line.
* src/list-node.cc (List_Node constructor): Accept key and rest, not
the entire line. Don't NUL terminate key and char_set. Initialize
char_set_length field.
* src/key-list.cc: Include <ctype.h>.
(parse_line): New function.
(Key_List::read_keys): Call parse_line instead of new List_Node.
Pass option[NOLENGTH] to Hash_Table constructor, not
Hash_Table::insert. Specify explicit length of key and char_set.
(Key_List::get_occurrence): Use explicit length of char_set.
(Key_List::set_determined): Likewise.
(Key_List::already_determined): Likewise.
(output_string): Add length argument. Output unprintable characters
using octal escape sequence.
(output_keyword_entry): Use explicit length of key.
(Key_List::output_lookup_array): Specify explicit length of key.
(output_switch_case): Likewise.
(Key_List::dump): Likewise.
* src/gen-perf.h (Gen_Perf::compute_disjoint_union): Add two length
arguments.
* src/gen-perf.cc (Gen_Perf::compute_disjoint_union): Likewise. Don't
stop when encountering NUL characters. Don't NUL terminate the
result.
(Gen_Perf::hash): Use explicit length of char_set.
(Gen_Perf::change): Specify explicit length of key.
* doc/help2man: New file, help2man version 1.022. * doc/help2man: New file, help2man version 1.022.
* Makefile.devel (all): Add doc/gperf.1. * Makefile.devel (all): Add doc/gperf.1.
(doc/gperf.1): New target. (doc/gperf.1): New target.

View File

@@ -1,32 +1,23 @@
/* /*
Copyright (C) 1990 Free Software Foundation Copyright (C) 1990, 2000 Free Software Foundation
written by Doug Lea (dl@rocky.oswego.edu) written by Doug Lea (dl@rocky.oswego.edu)
This file is part of the GNU C++ Library. This library is free
software; you can redistribute it and/or modify it under the terms of
the GNU Library General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version. This library is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/ */
#include <hash.h> #include <hash.h>
/* /*
some useful hash functions Some useful hash function.
It's not a particularly good hash function (<< 5 would be better than << 4),
but people believe in it because it comes from Dragon book.
*/ */
unsigned int hashpjw (const char* x) // From Dragon book, p436 unsigned int
hashpjw (const char *x, unsigned int len) // From Dragon book, p436
{ {
unsigned int h = 0; unsigned int h = 0;
unsigned int g; unsigned int g;
while (*x != 0) for (; len > 0; len--)
{ {
h = (h << 4) + (unsigned char) *x++; h = (h << 4) + (unsigned char) *x++;
if ((g = h & 0xf0000000) != 0) if ((g = h & 0xf0000000) != 0)

View File

@@ -1,27 +1,15 @@
// This may look like C code, but it is really -*- C++ -*- // This may look like C code, but it is really -*- C++ -*-
/* /*
Copyright (C) 1988, 1992 Free Software Foundation Copyright (C) 1988, 1992, 2000 Free Software Foundation
written by Doug Lea (dl@rocky.oswego.edu) written by Doug Lea (dl@rocky.oswego.edu)
This file is part of the GNU C++ Library. This library is free
software; you can redistribute it and/or modify it under the terms of
the GNU Library General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version. This library is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/ */
#ifndef _hash_h #ifndef _hash_h
#define _hash_h 1 #define _hash_h 1
/* a hash function for null-terminated char* strings using the /* a hash function for char[] arrays using the
method described in Aho, Sethi, & Ullman, p 436. */ method described in Aho, Sethi, & Ullman, p 436. */
extern unsigned int hashpjw (const char*); extern unsigned int hashpjw (const char *string, unsigned int len);
#endif #endif

View File

@@ -1,6 +1,6 @@
/* Provides high-level routines to manipulate the keywork list /* Provides high-level routines to manipulate the keywork list
structures the code generation output. structures the code generation output.
Copyright (C) 1989-1998 Free Software Foundation, Inc. Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu) written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF. This file is part of GNU GPERF.
@@ -81,37 +81,45 @@ Gen_Perf::Gen_Perf (void)
} }
/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets. /* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets.
(In a multiset, an element can occur multiple times). (In a multiset, an element can occur multiple times.)
Precondition: both set_1 and set_2 must be ordered. Returns the length Precondition: both set_1 and set_2 must be ordered. Returns the length
of the combined set. */ of the combined set. */
inline int inline int
Gen_Perf::compute_disjoint_union (const char *set_1, const char *set_2, char *set_3) Gen_Perf::compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3)
{ {
T (Trace t ("Gen_Perf::compute_disjoint_union");) T (Trace t ("Gen_Perf::compute_disjoint_union");)
char *base = set_3; char *base = set_3;
while (*set_1 && *set_2) while (size_1 > 0 && size_2 > 0)
if (*set_1 == *set_2) if (*set_1 == *set_2)
set_1++, set_2++; set_1++, size_1--, set_2++, size_2--;
else else
{ {
*set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++; char next;
if (set_3 == base || *set_3 != *(set_3-1)) set_3++; if (*set_1 < *set_2)
next = *set_1++, size_1--;
else
next = *set_2++, size_2--;
if (set_3 == base || next != set_3[-1])
*set_3++ = next;
} }
while (*set_1) while (size_1 > 0)
{ {
*set_3 = *set_1++; char next;
if (set_3 == base || *set_3 != *(set_3-1)) set_3++; next = *set_1++, size_1--;
if (set_3 == base || next != set_3[-1])
*set_3++ = next;
} }
while (*set_2) while (size_2 > 0)
{ {
*set_3 = *set_2++; char next;
if (set_3 == base || *set_3 != *(set_3-1)) set_3++; next = *set_2++, size_2--;
if (set_3 == base || next != set_3[-1])
*set_3++ = next;
} }
*set_3 = '\0';
return set_3 - base; return set_3 - base;
} }
@@ -146,10 +154,12 @@ inline int
Gen_Perf::hash (List_Node *key_node) Gen_Perf::hash (List_Node *key_node)
{ {
T (Trace t ("Gen_Perf::hash");) T (Trace t ("Gen_Perf::hash");)
int sum = option[NOLENGTH] ? 0 : key_node->length; int sum = option[NOLENGTH] ? 0 : key_node->key_length;
for (const char *ptr = key_node->char_set; *ptr; ptr++) const char *p = key_node->char_set;
sum += asso_values[(unsigned char)(*ptr)]; int i = key_node->char_set_length;
for (; i > 0; p++, i--)
sum += asso_values[(unsigned char)(*p)];
return key_node->hash_value = sum; return key_node->hash_value = sum;
} }
@@ -209,28 +219,35 @@ Gen_Perf::change (List_Node *prior, List_Node *curr)
{ {
T (Trace t ("Gen_Perf::change");) T (Trace t ("Gen_Perf::change");)
static char *union_set; static char *union_set;
int union_set_length;
if (!union_set) if (!union_set)
union_set = new char [2 * option.get_max_keysig_size () + 1]; union_set = new char [2 * option.get_max_keysig_size ()];
if (option[DEBUG]) if (option[DEBUG])
{ {
fprintf (stderr, "collision on keyword #%d, prior = \"%s\", curr = \"%s\" hash = %d\n", fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
num_done, prior->key, curr->key, curr->hash_value); num_done,
prior->key_length, prior->key,
curr->key_length, curr->key,
curr->hash_value);
fflush (stderr); fflush (stderr);
} }
sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set)); union_set_length = compute_disjoint_union (prior->char_set, prior->char_set_length, curr->char_set, curr->char_set_length, union_set);
sort_set (union_set, union_set_length);
/* Try changing some values, if change doesn't alter other values continue normal action. */ /* Try changing some values, if change doesn't alter other values continue normal action. */
fewest_collisions++; fewest_collisions++;
for (char *temp = union_set; *temp; temp++) const char *p = union_set;
if (!affects_prev (*temp, curr)) int i = union_set_length;
for (; i > 0; p++, i--)
if (!affects_prev (*p, curr))
{ {
if (option[DEBUG]) if (option[DEBUG])
{ {
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n", fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
*temp, temp - union_set + 1, asso_values[(unsigned char)(*temp)]); *p, p - union_set + 1, asso_values[(unsigned char)(*p)]);
fflush (stderr); fflush (stderr);
} }
return; /* Good, doesn't affect previous hash values, we'll take it. */ return; /* Good, doesn't affect previous hash values, we'll take it. */

View File

@@ -3,7 +3,7 @@
/* Provides high-level routines to manipulate the keyword list /* Provides high-level routines to manipulate the keyword list
structures the code generation output. structures the code generation output.
Copyright (C) 1989-1998 Free Software Foundation, Inc. Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu) written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF. This file is part of GNU GPERF.
@@ -38,7 +38,7 @@ private:
void change (List_Node *prior, List_Node *curr); void change (List_Node *prior, List_Node *curr);
int affects_prev (char c, List_Node *curr); int affects_prev (char c, List_Node *curr);
static int hash (List_Node *key_node); static int hash (List_Node *key_node);
static int compute_disjoint_union (const char *set_1, const char *set_2, char *set_3); static int compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3);
static void sort_set (char *union_set, int len); static void sort_set (char *union_set, int len);
public: public:

View File

@@ -1,5 +1,5 @@
/* Hash table for checking keyword links. Implemented using double hashing. /* Hash table for checking keyword links. Implemented using double hashing.
Copyright (C) 1989-1998 Free Software Foundation, Inc. Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu) written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF. This file is part of GNU GPERF.
@@ -26,8 +26,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
#include "options.h" #include "options.h"
#include "trace.h" #include "trace.h"
#define NIL(TYPE) (TYPE *)0
/* The size of the hash table is always the smallest power of 2 >= the size /* The size of the hash table is always the smallest power of 2 >= the size
indicated by the user. This allows several optimizations, including indicated by the user. This allows several optimizations, including
the use of double hashing and elimination of the mod instruction. the use of double hashing and elimination of the mod instruction.
@@ -37,8 +35,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
This compromises information hiding somewhat, but greatly reduces This compromises information hiding somewhat, but greatly reduces
memory fragmentation, since we can now use alloca! */ memory fragmentation, since we can now use alloca! */
Hash_Table::Hash_Table (List_Node **table_ptr, int s): Hash_Table::Hash_Table (List_Node **table_ptr, int s, int ignore_len):
table (table_ptr), size (s), collisions (0) table (table_ptr), size (s), collisions (0), ignore_length (ignore_len)
{ {
T (Trace t ("Hash_Table::Hash_Table");) T (Trace t ("Hash_Table::Hash_Table");)
memset ((char *) table, 0, size * sizeof (*table)); memset ((char *) table, 0, size * sizeof (*table));
@@ -60,8 +58,10 @@ Hash_Table::~Hash_Table (void)
for (int i = size - 1; i >= 0; i--) for (int i = size - 1; i >= 0; i--)
if (table[i]) if (table[i])
fprintf (stderr, "%8d, %*s, %s\n", fprintf (stderr, "%8d, %*.*s, %.*s\n",
i, field_width, table[i]->char_set, table[i]->key); i,
field_width, table[i]->char_set_length, table[i]->char_set,
table[i]->key_length, table[i]->key);
fprintf (stderr, "\nend dumping hash table\n\n"); fprintf (stderr, "\nend dumping hash table\n\n");
} }
@@ -72,20 +72,24 @@ Hash_Table::~Hash_Table (void)
Uses double hashing. */ Uses double hashing. */
List_Node * List_Node *
Hash_Table::operator() (List_Node *item, int ignore_length) Hash_Table::insert (List_Node *item)
{ {
T (Trace t ("Hash_Table::operator()");) T (Trace t ("Hash_Table::operator()");)
unsigned hash_val = hashpjw (item->char_set); unsigned hash_val = hashpjw (item->char_set, item->char_set_length);
int probe = hash_val & size - 1; int probe = hash_val & (size - 1);
int increment = (hash_val ^ item->length | 1) & size - 1; int increment = ((hash_val ^ item->key_length) | 1) & (size - 1);
while (table[probe] while (table[probe])
&& (strcmp (table[probe]->char_set, item->char_set)
|| (!ignore_length && table[probe]->length != item->length)))
{ {
if (table[probe]->char_set_length == item->char_set_length
&& memcmp (table[probe]->char_set, item->char_set, item->char_set_length) == 0
&& (ignore_length || table[probe]->key_length == item->key_length))
return table[probe];
collisions++; collisions++;
probe = probe + increment & size - 1; probe = (probe + increment) & (size - 1);
} }
return table[probe] ? table[probe] : (table[probe] = item, NIL (List_Node)); table[probe] = item;
return (List_Node *) 0;
} }

View File

@@ -2,7 +2,7 @@
/* Hash table used to check for duplicate keyword entries. /* Hash table used to check for duplicate keyword entries.
Copyright (C) 1989-1998 Free Software Foundation, Inc. Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu) written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF. This file is part of GNU GPERF.
@@ -32,11 +32,12 @@ private:
List_Node **table; /* Vector of pointers to linked lists of List_Node's. */ List_Node **table; /* Vector of pointers to linked lists of List_Node's. */
int size; /* Size of the vector. */ int size; /* Size of the vector. */
int collisions; /* Find out how well our double hashing is working! */ int collisions; /* Find out how well our double hashing is working! */
int ignore_length;
public: public:
Hash_Table (List_Node **t, int s); Hash_Table (List_Node **t, int s, int ignore_len);
~Hash_Table (void); ~Hash_Table (void);
List_Node *operator () (List_Node *item, int ignore_length); List_Node *insert (List_Node *item);
}; };
#endif #endif

View File

@@ -21,6 +21,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
#include <stdio.h> #include <stdio.h>
#include <string.h> /* declares strncpy(), strchr() */ #include <string.h> /* declares strncpy(), strchr() */
#include <stdlib.h> /* declares malloc(), free(), abs(), exit(), abort() */ #include <stdlib.h> /* declares malloc(), free(), abs(), exit(), abort() */
#include <ctype.h> /* declares isprint() */
#include <assert.h> /* defines assert() */ #include <assert.h> /* defines assert() */
#include <limits.h> /* defines SCHAR_MAX etc. */ #include <limits.h> /* defines SCHAR_MAX etc. */
#include "options.h" #include "options.h"
@@ -209,6 +210,148 @@ Key_List::set_output_types (void)
} }
} }
/* Extracts a key from an input line and creates a new List_Node for it. */
static List_Node *
parse_line (char *line, const char *delimiters)
{
if (*line == '"')
{
/* Parse a string in ANSI C syntax. */
char *key = new char[strlen(line)];
char *kp = key;
const char *lp = line + 1;
for (; *lp;)
{
char c = *lp;
if (c == '\0')
{
fprintf (stderr, "unterminated string: %s\n", line);
exit (1);
}
else if (c == '\\')
{
c = *++lp;
switch (c)
{
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
int code = 0;
int count = 0;
while (count < 3 && *lp >= '0' && *lp <= '7')
{
code = (code << 3) + (*lp - '0');
lp++;
count++;
}
if (code > UCHAR_MAX)
fprintf (stderr, "octal escape out of range: %s\n", line);
*kp = (char) code;
break;
}
case 'x':
{
int code = 0;
int count = 0;
lp++;
while ((*lp >= '0' && *lp <= '9')
|| (*lp >= 'A' && *lp <= 'F')
|| (*lp >= 'a' && *lp <= 'f'))
{
code = (code << 4)
+ (*lp >= 'A' && *lp <= 'F' ? *lp - 'A' + 10 :
*lp >= 'a' && *lp <= 'f' ? *lp - 'a' + 10 :
*lp - '0');
lp++;
count++;
}
if (count == 0)
fprintf (stderr, "hexadecimal escape without any hex digits: %s\n", line);
if (code > UCHAR_MAX)
fprintf (stderr, "hexadecimal escape out of range: %s\n", line);
*kp = (char) code;
break;
}
case '\\': case '\'': case '"':
*kp = c;
lp++;
break;
case 'n':
*kp = '\n';
lp++;
break;
case 't':
*kp = '\t';
lp++;
break;
case 'r':
*kp = '\r';
lp++;
break;
case 'f':
*kp = '\f';
lp++;
break;
case 'b':
*kp = '\b';
lp++;
break;
case 'a':
*kp = '\a';
lp++;
break;
case 'v':
*kp = '\v';
lp++;
break;
default:
fprintf (stderr, "invalid escape sequence in string: %s\n", line);
exit (1);
}
}
else if (c == '"')
break;
else
{
*kp = c;
lp++;
}
kp++;
}
lp++;
if (*lp != '\0')
{
if (strchr (delimiters, *lp) == NULL)
{
fprintf (stderr, "string not followed by delimiter: %s\n", line);
exit (1);
}
lp++;
}
return new List_Node (key, kp - key, option[TYPE] ? lp : "");
}
else
{
/* Not a string. Look for the delimiter. */
int len = strcspn (line, delimiters);
const char *rest;
if (line[len] == '\0')
rest = "";
else
{
/* Quick hack to separate the key from the rest, killing the first
delimiter. */
line[len] = '\0';
rest = &line[len + 1];
}
return new List_Node (line, len, option[TYPE] ? rest : "");
}
}
/* Reads in all keys from standard input and creates a linked list pointed /* Reads in all keys from standard input and creates a linked list pointed
to by Head. This list is then quickly checked for ``links,'' i.e., to by Head. This list is then quickly checked for ``links,'' i.e.,
unhashable elements possessing identical key sets and lengths. */ unhashable elements possessing identical key sets and lengths. */
@@ -235,13 +378,13 @@ Key_List::read_keys (void)
const char *delimiter = option.get_delimiter (); const char *delimiter = option.get_delimiter ();
List_Node *temp, *trail = 0; List_Node *temp, *trail = 0;
head = new List_Node (ptr, strcspn (ptr, delimiter)); head = parse_line (ptr, delimiter);
for (temp = head; for (temp = head;
(ptr = Read_Line::get_line ()) && strcmp (ptr, "%%"); (ptr = Read_Line::get_line ()) && strcmp (ptr, "%%");
temp = temp->next) temp = temp->next)
{ {
temp->next = new List_Node (ptr, strcspn (ptr, delimiter)); temp->next = parse_line (ptr, delimiter);
total_keys++; total_keys++;
} }
@@ -266,14 +409,14 @@ Key_List::read_keys (void)
#endif #endif
/* Make large hash table for efficiency. */ /* Make large hash table for efficiency. */
Hash_Table found_link (table, table_size); Hash_Table found_link (table, table_size, option[NOLENGTH]);
/* Test whether there are any links and also set the maximum length of /* Test whether there are any links and also set the maximum length of
an identifier in the keyword list. */ an identifier in the keyword list. */
for (temp = head; temp; temp = temp->next) for (temp = head; temp; temp = temp->next)
{ {
List_Node *ptr = found_link (temp, option[NOLENGTH]); List_Node *ptr = found_link.insert (temp);
/* Check for links. We deal with these by building an equivalence class /* Check for links. We deal with these by building an equivalence class
of all duplicate values (i.e., links) so that only 1 keyword is of all duplicate values (i.e., links) so that only 1 keyword is
@@ -290,17 +433,19 @@ Key_List::read_keys (void)
/* Complain if user hasn't enabled the duplicate option. */ /* Complain if user hasn't enabled the duplicate option. */
if (!option[DUP] || option[DEBUG]) if (!option[DUP] || option[DEBUG])
fprintf (stderr, "Key link: \"%s\" = \"%s\", with key set \"%s\".\n", fprintf (stderr, "Key link: \"%.*s\" = \"%.*s\", with key set \"%.*s\".\n",
temp->key, ptr->key, temp->char_set); temp->key_length, temp->key,
ptr->key_length, ptr->key,
temp->char_set_length, temp->char_set);
} }
else else
trail = temp; trail = temp;
/* Update minimum and maximum keyword length, if needed. */ /* Update minimum and maximum keyword length, if needed. */
if (max_key_len < temp->length) if (max_key_len < temp->key_length)
max_key_len = temp->length; max_key_len = temp->key_length;
if (min_key_len > temp->length) if (min_key_len > temp->key_length)
min_key_len = temp->length; min_key_len = temp->key_length;
} }
#if !LARGE_STACK_ARRAYS #if !LARGE_STACK_ARRAYS
@@ -407,8 +552,10 @@ Key_List::get_occurrence (List_Node *ptr)
T (Trace t ("Key_List::get_occurrence");) T (Trace t ("Key_List::get_occurrence");)
int value = 0; int value = 0;
for (const char *temp = ptr->char_set; *temp; temp++) const char *p = ptr->char_set;
value += occurrences[(unsigned char)(*temp)]; unsigned int i = ptr->char_set_length;
for (; i > 0; p++, i--)
value += occurrences[(unsigned char)(*p)];
return value; return value;
} }
@@ -420,8 +567,11 @@ inline void
Key_List::set_determined (List_Node *ptr) Key_List::set_determined (List_Node *ptr)
{ {
T (Trace t ("Key_List::set_determined");) T (Trace t ("Key_List::set_determined");)
for (const char *temp = ptr->char_set; *temp; temp++)
determined[(unsigned char)(*temp)] = 1; const char *p = ptr->char_set;
unsigned int i = ptr->char_set_length;
for (; i > 0; p++, i--)
determined[(unsigned char)(*p)] = 1;
} }
/* Returns TRUE if PTR's key set is already completely determined. */ /* Returns TRUE if PTR's key set is already completely determined. */
@@ -432,8 +582,10 @@ Key_List::already_determined (List_Node *ptr)
T (Trace t ("Key_List::already_determined");) T (Trace t ("Key_List::already_determined");)
int is_determined = 1; int is_determined = 1;
for (const char *temp = ptr->char_set; is_determined && *temp; temp++) const char *p = ptr->char_set;
is_determined = determined[(unsigned char)(*temp)]; unsigned int i = ptr->char_set_length;
for (; is_determined && i > 0; p++, i--)
is_determined = determined[(unsigned char)(*p)];
return is_determined; return is_determined;
} }
@@ -660,21 +812,31 @@ Key_List::output_constants (struct Output_Constants& style)
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
/* Outputs a keyword, as a string: enclosed in double quotes, escaping /* Outputs a keyword, as a string: enclosed in double quotes, escaping
backslashes and double quote characters. */ backslashes, double quote and unprintable characters. */
static void static void
output_string (const char *key) output_string (const char *key, int len)
{ {
T (Trace t ("output_string");) T (Trace t ("output_string");)
char c;
putchar ('"'); putchar ('"');
while (c = *key++, c != '\0') for (; len > 0; len--)
{
unsigned char c = (unsigned char) *key++;
if (isprint (c))
{ {
if (c == '"' || c == '\\') if (c == '"' || c == '\\')
putchar ('\\'); putchar ('\\');
putchar (c); putchar (c);
} }
else
{
putchar ('\\');
putchar ('0' + ((c >> 6) & 7));
putchar ('0' + ((c >> 3) & 7));
putchar ('0' + (c & 7));
}
}
putchar ('"'); putchar ('"');
} }
@@ -1054,7 +1216,7 @@ Key_List::output_keylength_table (void)
printf (","); printf (",");
if ((column++ % columns) == 0) if ((column++ % columns) == 0)
printf("\n%s ", indent); printf("\n%s ", indent);
printf ("%3d", temp->length); printf ("%3d", temp->key_length);
/* Deal with links specially. */ /* Deal with links specially. */
if (temp->link) // implies option[DUP] if (temp->link) // implies option[DUP]
@@ -1064,7 +1226,7 @@ Key_List::output_keylength_table (void)
printf (","); printf (",");
if ((column++ % columns) == 0) if ((column++ % columns) == 0)
printf("\n%s ", indent); printf("\n%s ", indent);
printf ("%3d", links->length); printf ("%3d", links->key_length);
} }
index++; index++;
@@ -1083,7 +1245,7 @@ output_keyword_entry (List_Node *temp, const char *indent)
printf ("%s ", indent); printf ("%s ", indent);
if (option[TYPE]) if (option[TYPE])
printf ("{"); printf ("{");
output_string (temp->key); output_string (temp->key, temp->key_length);
if (option[TYPE]) if (option[TYPE])
{ {
if (strlen (temp->rest) > 0) if (strlen (temp->rest) > 0)
@@ -1238,8 +1400,8 @@ Key_List::output_lookup_array (void)
int hash_value = temp->hash_value; int hash_value = temp->hash_value;
lookup_array[hash_value] = temp->index; lookup_array[hash_value] = temp->index;
if (option[DEBUG]) if (option[DEBUG])
fprintf (stderr, "keyword = %s, index = %d\n", fprintf (stderr, "keyword = %.*s, index = %d\n",
temp->key, temp->index); temp->key_length, temp->key, temp->index);
if (temp->link if (temp->link
|| (temp->next && hash_value == temp->next->hash_value)) || (temp->next && hash_value == temp->next->hash_value))
{ {
@@ -1255,8 +1417,8 @@ Key_List::output_lookup_array (void)
dup_ptr->count++; dup_ptr->count++;
if (option[DEBUG]) if (option[DEBUG])
fprintf (stderr, fprintf (stderr,
"static linked keyword = %s, index = %d\n", "static linked keyword = %.*s, index = %d\n",
ptr->key, ptr->index); ptr->key_length, ptr->key, ptr->index);
} }
if (!(temp->next && hash_value == temp->next->hash_value)) if (!(temp->next && hash_value == temp->next->hash_value))
@@ -1266,8 +1428,8 @@ Key_List::output_lookup_array (void)
dup_ptr->count++; dup_ptr->count++;
if (option[DEBUG]) if (option[DEBUG])
fprintf (stderr, "dynamic linked keyword = %s, index = %d\n", fprintf (stderr, "dynamic linked keyword = %.*s, index = %d\n",
temp->key, temp->index); temp->key_length, temp->key, temp->index);
} }
assert (dup_ptr->count >= 2); assert (dup_ptr->count >= 2);
dup_ptr++; dup_ptr++;
@@ -1404,8 +1566,8 @@ output_switch_case (List_Node *list, int indent, int *jumps_away)
T (Trace t ("output_switch_case");) T (Trace t ("output_switch_case");)
if (option[DEBUG]) if (option[DEBUG])
printf ("%*s/* hash value = %4d, keyword = \"%s\" */\n", printf ("%*s/* hash value = %4d, keyword = \"%.*s\" */\n",
indent, "", list->hash_value, list->key); indent, "", list->hash_value, list->key_length, list->key);
if (option[DUP] if (option[DUP]
&& (list->link && (list->link
@@ -1438,7 +1600,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away)
{ {
printf ("%*sif (len == %d)\n" printf ("%*sif (len == %d)\n"
"%*s {\n", "%*s {\n",
indent, "", list->length, indent, "", list->key_length,
indent, ""); indent, "");
indent += 4; indent += 4;
} }
@@ -1447,7 +1609,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away)
if (option[TYPE]) if (option[TYPE])
printf ("&%s[%d]", option.get_wordlist_name (), list->index); printf ("&%s[%d]", option.get_wordlist_name (), list->index);
else else
output_string (list->key); output_string (list->key, list->key_length);
printf (";\n"); printf (";\n");
printf ("%*sgoto compare;\n", printf ("%*sgoto compare;\n",
indent, ""); indent, "");
@@ -1980,9 +2142,10 @@ Key_List::dump ()
field_width, "char_set"); field_width, "char_set");
for (List_Node *ptr = head; ptr; ptr = ptr->next) for (List_Node *ptr = head; ptr; ptr = ptr->next)
fprintf (stderr, "%11d,%11d,%6d, %*s, %s\n", fprintf (stderr, "%11d,%11d,%6d, %*.*s, %.*s\n",
ptr->hash_value, ptr->length, ptr->index, ptr->hash_value, ptr->key_length, ptr->index,
field_width, ptr->char_set, ptr->key); field_width, ptr->char_set_length, ptr->char_set,
ptr->key_length, ptr->key);
} }
/* Simple-minded constructor action here... */ /* Simple-minded constructor action here... */

View File

@@ -1,5 +1,5 @@
/* Creates and initializes a new list node. /* Creates and initializes a new list node.
Copyright (C) 1989-1998 Free Software Foundation, Inc. Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu) written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF. This file is part of GNU GPERF.
@@ -57,22 +57,20 @@ List_Node::set_sort (char *base, int len)
of the total number of keys seen so far. This is used to initialize of the total number of keys seen so far. This is used to initialize
the INDEX field to some useful value. */ the INDEX field to some useful value. */
List_Node::List_Node (char *k, int len): link (0), next (0), List_Node::List_Node (const char *k, int len, const char *r):
key (k), rest (option[TYPE] ? k + len + 1 : ""), length (len), index (0) link (0), next (0), key (k), key_length (len), rest (r), index (0)
{ {
T (Trace t ("List_Node::List_Node");) T (Trace t ("List_Node::List_Node");)
char *ptr = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ()) + 1]; char *key_set = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ())];
char *key_set = ptr; char *ptr = key_set;
k[len] = '\0'; /* Null terminate KEY to separate it from REST. */ int i;
if (option[ALLCHARS]) /* Use all the character position in the KEY. */ if (option[ALLCHARS]) /* Use all the character positions in the KEY. */
for (; *k; k++, ptr++) for (i = len; i > 0; k++, ptr++, i--)
++occurrences[(unsigned char)(*ptr = *k)]; ++occurrences[(unsigned char)(*ptr = *k)];
else /* Only use those character positions specified by the user. */ else /* Only use those character positions specified by the user. */
{ {
int i; /* Iterate through the list of key_positions, initializing occurrences table
/* Iterate thru the list of key_positions, initializing occurrences table
and char_set (via char * pointer ptr). */ and char_set (via char * pointer ptr). */
for (option.reset (); (i = option.get ()) != EOS; ) for (option.reset (); (i = option.get ()) != EOS; )
@@ -94,8 +92,10 @@ List_Node::List_Node (char *k, int len): link (0), next (0),
exit (1); exit (1);
} }
} }
*ptr = '\0'; /* Terminate this bastard.... */
/* Sort the KEY_SET items alphabetically. */ /* Sort the KEY_SET items alphabetically. */
set_sort (key_set, ptr - key_set); set_sort (key_set, ptr - key_set);
char_set = key_set; char_set = key_set;
char_set_length = ptr - key_set;
} }

View File

@@ -2,7 +2,7 @@
/* Data and function members for defining values and operations of a list node. /* Data and function members for defining values and operations of a list node.
Copyright (C) 1989-1998 Free Software Foundation, Inc. Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu) written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF. This file is part of GNU GPERF.
@@ -31,14 +31,15 @@ struct List_Node : private Vectors
List_Node *link; /* TRUE if key has an identical KEY_SET as another key. */ List_Node *link; /* TRUE if key has an identical KEY_SET as another key. */
List_Node *next; /* Points to next element on the list. */ List_Node *next; /* Points to next element on the list. */
const char *key; /* Each keyword string stored here. */ const char *key; /* Each keyword string stored here. */
int key_length; /* Length of the key. */
const char *rest; /* Additional information for building hash function. */ const char *rest; /* Additional information for building hash function. */
const char *char_set; /* Set of characters to hash, specified by user. */ const char *char_set; /* Set of characters to hash, specified by user. */
int length; /* Length of the key. */ int char_set_length; /* Length of char_set. */
int hash_value; /* Hash value for the key. */ int hash_value; /* Hash value for the key. */
int occurrence; /* A metric for frequency of key set occurrences. */ int occurrence; /* A metric for frequency of key set occurrences. */
int index; /* Position of this node relative to other nodes. */ int index; /* Position of this node relative to other nodes. */
List_Node (char *key, int len); List_Node (const char *key, int len, const char *rest);
static void set_sort (char *base, int len); static void set_sort (char *base, int len);
}; };