1
0
mirror of https://git.savannah.gnu.org/git/gperf.git synced 2025-12-02 21:19:24 +00:00

Allow the use of embedded NULs in keys.

This commit is contained in:
Bruno Haible
2000-08-20 16:50:54 +00:00
parent cb286153b2
commit c0eb520394
10 changed files with 345 additions and 136 deletions

View File

@@ -1,5 +1,49 @@
2000-08-20 Bruno Haible <bruno@linuix.math.u-bordeaux.fr>
Allow the use of embedded NULs in keys.
* lib/hash.h (hashpjw): Add a length argument.
* lib/hash.cc (hashpjw): Likewise. Don't stop when encountering a NUL
character.
* src/hash-table.h (Hash_Table constructor): Add ignore_len argument.
(Hash_Table::ignore_length): New field.
(Hash_Table::insert): Renamed from Hash_Table::operator(). Remove
ignore_length argument.
* src/hash-table.cc (NIL): Remove macro.
(Hash_Table constructor): Add ignore_len argument. Use it to
initialize ignore_length.
(Hash_Table destructor): Specify explicit length of char_set and
key.
(Hash_Table::insert): Renamed from Hash_Table::operator(). Remove
ignore_length argument. Pass explicit length to hashpjw. Compare
char_set using memcmp, not strcmp.
* src/list-node.h (List_Node): Rename field length to key_length.
New field char_set_length.
(List_Node constructor): Accept key and rest, not the entire line.
* src/list-node.cc (List_Node constructor): Accept key and rest, not
the entire line. Don't NUL terminate key and char_set. Initialize
char_set_length field.
* src/key-list.cc: Include <ctype.h>.
(parse_line): New function.
(Key_List::read_keys): Call parse_line instead of new List_Node.
Pass option[NOLENGTH] to Hash_Table constructor, not
Hash_Table::insert. Specify explicit length of key and char_set.
(Key_List::get_occurrence): Use explicit length of char_set.
(Key_List::set_determined): Likewise.
(Key_List::already_determined): Likewise.
(output_string): Add length argument. Output unprintable characters
using octal escape sequence.
(output_keyword_entry): Use explicit length of key.
(Key_List::output_lookup_array): Specify explicit length of key.
(output_switch_case): Likewise.
(Key_List::dump): Likewise.
* src/gen-perf.h (Gen_Perf::compute_disjoint_union): Add two length
arguments.
* src/gen-perf.cc (Gen_Perf::compute_disjoint_union): Likewise. Don't
stop when encountering NUL characters. Don't NUL terminate the
result.
(Gen_Perf::hash): Use explicit length of char_set.
(Gen_Perf::change): Specify explicit length of key.
* doc/help2man: New file, help2man version 1.022.
* Makefile.devel (all): Add doc/gperf.1.
(doc/gperf.1): New target.

View File

@@ -1,32 +1,23 @@
/*
Copyright (C) 1990 Free Software Foundation
Copyright (C) 1990, 2000 Free Software Foundation
written by Doug Lea (dl@rocky.oswego.edu)
This file is part of the GNU C++ Library. This library is free
software; you can redistribute it and/or modify it under the terms of
the GNU Library General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version. This library is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <hash.h>
/*
some useful hash functions
Some useful hash function.
It's not a particularly good hash function (<< 5 would be better than << 4),
but people believe in it because it comes from Dragon book.
*/
unsigned int hashpjw (const char* x) // From Dragon book, p436
unsigned int
hashpjw (const char *x, unsigned int len) // From Dragon book, p436
{
unsigned int h = 0;
unsigned int g;
while (*x != 0)
for (; len > 0; len--)
{
h = (h << 4) + (unsigned char) *x++;
if ((g = h & 0xf0000000) != 0)

View File

@@ -1,27 +1,15 @@
// This may look like C code, but it is really -*- C++ -*-
/*
Copyright (C) 1988, 1992 Free Software Foundation
Copyright (C) 1988, 1992, 2000 Free Software Foundation
written by Doug Lea (dl@rocky.oswego.edu)
This file is part of the GNU C++ Library. This library is free
software; you can redistribute it and/or modify it under the terms of
the GNU Library General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version. This library is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _hash_h
#define _hash_h 1
/* a hash function for null-terminated char* strings using the
/* a hash function for char[] arrays using the
method described in Aho, Sethi, & Ullman, p 436. */
extern unsigned int hashpjw (const char*);
extern unsigned int hashpjw (const char *string, unsigned int len);
#endif

View File

@@ -1,6 +1,6 @@
/* Provides high-level routines to manipulate the keywork list
structures the code generation output.
Copyright (C) 1989-1998 Free Software Foundation, Inc.
Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF.
@@ -81,37 +81,45 @@ Gen_Perf::Gen_Perf (void)
}
/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets.
(In a multiset, an element can occur multiple times).
(In a multiset, an element can occur multiple times.)
Precondition: both set_1 and set_2 must be ordered. Returns the length
of the combined set. */
inline int
Gen_Perf::compute_disjoint_union (const char *set_1, const char *set_2, char *set_3)
Gen_Perf::compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3)
{
T (Trace t ("Gen_Perf::compute_disjoint_union");)
char *base = set_3;
while (*set_1 && *set_2)
while (size_1 > 0 && size_2 > 0)
if (*set_1 == *set_2)
set_1++, set_2++;
set_1++, size_1--, set_2++, size_2--;
else
{
*set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++;
if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
char next;
if (*set_1 < *set_2)
next = *set_1++, size_1--;
else
next = *set_2++, size_2--;
if (set_3 == base || next != set_3[-1])
*set_3++ = next;
}
while (*set_1)
while (size_1 > 0)
{
*set_3 = *set_1++;
if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
char next;
next = *set_1++, size_1--;
if (set_3 == base || next != set_3[-1])
*set_3++ = next;
}
while (*set_2)
while (size_2 > 0)
{
*set_3 = *set_2++;
if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
char next;
next = *set_2++, size_2--;
if (set_3 == base || next != set_3[-1])
*set_3++ = next;
}
*set_3 = '\0';
return set_3 - base;
}
@@ -146,10 +154,12 @@ inline int
Gen_Perf::hash (List_Node *key_node)
{
T (Trace t ("Gen_Perf::hash");)
int sum = option[NOLENGTH] ? 0 : key_node->length;
int sum = option[NOLENGTH] ? 0 : key_node->key_length;
for (const char *ptr = key_node->char_set; *ptr; ptr++)
sum += asso_values[(unsigned char)(*ptr)];
const char *p = key_node->char_set;
int i = key_node->char_set_length;
for (; i > 0; p++, i--)
sum += asso_values[(unsigned char)(*p)];
return key_node->hash_value = sum;
}
@@ -209,28 +219,35 @@ Gen_Perf::change (List_Node *prior, List_Node *curr)
{
T (Trace t ("Gen_Perf::change");)
static char *union_set;
int union_set_length;
if (!union_set)
union_set = new char [2 * option.get_max_keysig_size () + 1];
union_set = new char [2 * option.get_max_keysig_size ()];
if (option[DEBUG])
{
fprintf (stderr, "collision on keyword #%d, prior = \"%s\", curr = \"%s\" hash = %d\n",
num_done, prior->key, curr->key, curr->hash_value);
fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
num_done,
prior->key_length, prior->key,
curr->key_length, curr->key,
curr->hash_value);
fflush (stderr);
}
sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set));
union_set_length = compute_disjoint_union (prior->char_set, prior->char_set_length, curr->char_set, curr->char_set_length, union_set);
sort_set (union_set, union_set_length);
/* Try changing some values, if change doesn't alter other values continue normal action. */
fewest_collisions++;
for (char *temp = union_set; *temp; temp++)
if (!affects_prev (*temp, curr))
const char *p = union_set;
int i = union_set_length;
for (; i > 0; p++, i--)
if (!affects_prev (*p, curr))
{
if (option[DEBUG])
{
fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
*temp, temp - union_set + 1, asso_values[(unsigned char)(*temp)]);
*p, p - union_set + 1, asso_values[(unsigned char)(*p)]);
fflush (stderr);
}
return; /* Good, doesn't affect previous hash values, we'll take it. */

View File

@@ -3,7 +3,7 @@
/* Provides high-level routines to manipulate the keyword list
structures the code generation output.
Copyright (C) 1989-1998 Free Software Foundation, Inc.
Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF.
@@ -38,7 +38,7 @@ private:
void change (List_Node *prior, List_Node *curr);
int affects_prev (char c, List_Node *curr);
static int hash (List_Node *key_node);
static int compute_disjoint_union (const char *set_1, const char *set_2, char *set_3);
static int compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3);
static void sort_set (char *union_set, int len);
public:

View File

@@ -1,5 +1,5 @@
/* Hash table for checking keyword links. Implemented using double hashing.
Copyright (C) 1989-1998 Free Software Foundation, Inc.
Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF.
@@ -26,8 +26,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
#include "options.h"
#include "trace.h"
#define NIL(TYPE) (TYPE *)0
/* The size of the hash table is always the smallest power of 2 >= the size
indicated by the user. This allows several optimizations, including
the use of double hashing and elimination of the mod instruction.
@@ -37,8 +35,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
This compromises information hiding somewhat, but greatly reduces
memory fragmentation, since we can now use alloca! */
Hash_Table::Hash_Table (List_Node **table_ptr, int s):
table (table_ptr), size (s), collisions (0)
Hash_Table::Hash_Table (List_Node **table_ptr, int s, int ignore_len):
table (table_ptr), size (s), collisions (0), ignore_length (ignore_len)
{
T (Trace t ("Hash_Table::Hash_Table");)
memset ((char *) table, 0, size * sizeof (*table));
@@ -60,8 +58,10 @@ Hash_Table::~Hash_Table (void)
for (int i = size - 1; i >= 0; i--)
if (table[i])
fprintf (stderr, "%8d, %*s, %s\n",
i, field_width, table[i]->char_set, table[i]->key);
fprintf (stderr, "%8d, %*.*s, %.*s\n",
i,
field_width, table[i]->char_set_length, table[i]->char_set,
table[i]->key_length, table[i]->key);
fprintf (stderr, "\nend dumping hash table\n\n");
}
@@ -72,20 +72,24 @@ Hash_Table::~Hash_Table (void)
Uses double hashing. */
List_Node *
Hash_Table::operator() (List_Node *item, int ignore_length)
Hash_Table::insert (List_Node *item)
{
T (Trace t ("Hash_Table::operator()");)
unsigned hash_val = hashpjw (item->char_set);
int probe = hash_val & size - 1;
int increment = (hash_val ^ item->length | 1) & size - 1;
unsigned hash_val = hashpjw (item->char_set, item->char_set_length);
int probe = hash_val & (size - 1);
int increment = ((hash_val ^ item->key_length) | 1) & (size - 1);
while (table[probe]
&& (strcmp (table[probe]->char_set, item->char_set)
|| (!ignore_length && table[probe]->length != item->length)))
while (table[probe])
{
if (table[probe]->char_set_length == item->char_set_length
&& memcmp (table[probe]->char_set, item->char_set, item->char_set_length) == 0
&& (ignore_length || table[probe]->key_length == item->key_length))
return table[probe];
collisions++;
probe = probe + increment & size - 1;
probe = (probe + increment) & (size - 1);
}
return table[probe] ? table[probe] : (table[probe] = item, NIL (List_Node));
table[probe] = item;
return (List_Node *) 0;
}

View File

@@ -2,7 +2,7 @@
/* Hash table used to check for duplicate keyword entries.
Copyright (C) 1989-1998 Free Software Foundation, Inc.
Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF.
@@ -32,11 +32,12 @@ private:
List_Node **table; /* Vector of pointers to linked lists of List_Node's. */
int size; /* Size of the vector. */
int collisions; /* Find out how well our double hashing is working! */
int ignore_length;
public:
Hash_Table (List_Node **t, int s);
Hash_Table (List_Node **t, int s, int ignore_len);
~Hash_Table (void);
List_Node *operator () (List_Node *item, int ignore_length);
List_Node *insert (List_Node *item);
};
#endif

View File

@@ -21,6 +21,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
#include <stdio.h>
#include <string.h> /* declares strncpy(), strchr() */
#include <stdlib.h> /* declares malloc(), free(), abs(), exit(), abort() */
#include <ctype.h> /* declares isprint() */
#include <assert.h> /* defines assert() */
#include <limits.h> /* defines SCHAR_MAX etc. */
#include "options.h"
@@ -209,6 +210,148 @@ Key_List::set_output_types (void)
}
}
/* Extracts a key from an input line and creates a new List_Node for it. */
static List_Node *
parse_line (char *line, const char *delimiters)
{
if (*line == '"')
{
/* Parse a string in ANSI C syntax. */
char *key = new char[strlen(line)];
char *kp = key;
const char *lp = line + 1;
for (; *lp;)
{
char c = *lp;
if (c == '\0')
{
fprintf (stderr, "unterminated string: %s\n", line);
exit (1);
}
else if (c == '\\')
{
c = *++lp;
switch (c)
{
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
int code = 0;
int count = 0;
while (count < 3 && *lp >= '0' && *lp <= '7')
{
code = (code << 3) + (*lp - '0');
lp++;
count++;
}
if (code > UCHAR_MAX)
fprintf (stderr, "octal escape out of range: %s\n", line);
*kp = (char) code;
break;
}
case 'x':
{
int code = 0;
int count = 0;
lp++;
while ((*lp >= '0' && *lp <= '9')
|| (*lp >= 'A' && *lp <= 'F')
|| (*lp >= 'a' && *lp <= 'f'))
{
code = (code << 4)
+ (*lp >= 'A' && *lp <= 'F' ? *lp - 'A' + 10 :
*lp >= 'a' && *lp <= 'f' ? *lp - 'a' + 10 :
*lp - '0');
lp++;
count++;
}
if (count == 0)
fprintf (stderr, "hexadecimal escape without any hex digits: %s\n", line);
if (code > UCHAR_MAX)
fprintf (stderr, "hexadecimal escape out of range: %s\n", line);
*kp = (char) code;
break;
}
case '\\': case '\'': case '"':
*kp = c;
lp++;
break;
case 'n':
*kp = '\n';
lp++;
break;
case 't':
*kp = '\t';
lp++;
break;
case 'r':
*kp = '\r';
lp++;
break;
case 'f':
*kp = '\f';
lp++;
break;
case 'b':
*kp = '\b';
lp++;
break;
case 'a':
*kp = '\a';
lp++;
break;
case 'v':
*kp = '\v';
lp++;
break;
default:
fprintf (stderr, "invalid escape sequence in string: %s\n", line);
exit (1);
}
}
else if (c == '"')
break;
else
{
*kp = c;
lp++;
}
kp++;
}
lp++;
if (*lp != '\0')
{
if (strchr (delimiters, *lp) == NULL)
{
fprintf (stderr, "string not followed by delimiter: %s\n", line);
exit (1);
}
lp++;
}
return new List_Node (key, kp - key, option[TYPE] ? lp : "");
}
else
{
/* Not a string. Look for the delimiter. */
int len = strcspn (line, delimiters);
const char *rest;
if (line[len] == '\0')
rest = "";
else
{
/* Quick hack to separate the key from the rest, killing the first
delimiter. */
line[len] = '\0';
rest = &line[len + 1];
}
return new List_Node (line, len, option[TYPE] ? rest : "");
}
}
/* Reads in all keys from standard input and creates a linked list pointed
to by Head. This list is then quickly checked for ``links,'' i.e.,
unhashable elements possessing identical key sets and lengths. */
@@ -235,13 +378,13 @@ Key_List::read_keys (void)
const char *delimiter = option.get_delimiter ();
List_Node *temp, *trail = 0;
head = new List_Node (ptr, strcspn (ptr, delimiter));
head = parse_line (ptr, delimiter);
for (temp = head;
(ptr = Read_Line::get_line ()) && strcmp (ptr, "%%");
temp = temp->next)
{
temp->next = new List_Node (ptr, strcspn (ptr, delimiter));
temp->next = parse_line (ptr, delimiter);
total_keys++;
}
@@ -266,14 +409,14 @@ Key_List::read_keys (void)
#endif
/* Make large hash table for efficiency. */
Hash_Table found_link (table, table_size);
Hash_Table found_link (table, table_size, option[NOLENGTH]);
/* Test whether there are any links and also set the maximum length of
an identifier in the keyword list. */
for (temp = head; temp; temp = temp->next)
{
List_Node *ptr = found_link (temp, option[NOLENGTH]);
List_Node *ptr = found_link.insert (temp);
/* Check for links. We deal with these by building an equivalence class
of all duplicate values (i.e., links) so that only 1 keyword is
@@ -290,17 +433,19 @@ Key_List::read_keys (void)
/* Complain if user hasn't enabled the duplicate option. */
if (!option[DUP] || option[DEBUG])
fprintf (stderr, "Key link: \"%s\" = \"%s\", with key set \"%s\".\n",
temp->key, ptr->key, temp->char_set);
fprintf (stderr, "Key link: \"%.*s\" = \"%.*s\", with key set \"%.*s\".\n",
temp->key_length, temp->key,
ptr->key_length, ptr->key,
temp->char_set_length, temp->char_set);
}
else
trail = temp;
/* Update minimum and maximum keyword length, if needed. */
if (max_key_len < temp->length)
max_key_len = temp->length;
if (min_key_len > temp->length)
min_key_len = temp->length;
if (max_key_len < temp->key_length)
max_key_len = temp->key_length;
if (min_key_len > temp->key_length)
min_key_len = temp->key_length;
}
#if !LARGE_STACK_ARRAYS
@@ -407,8 +552,10 @@ Key_List::get_occurrence (List_Node *ptr)
T (Trace t ("Key_List::get_occurrence");)
int value = 0;
for (const char *temp = ptr->char_set; *temp; temp++)
value += occurrences[(unsigned char)(*temp)];
const char *p = ptr->char_set;
unsigned int i = ptr->char_set_length;
for (; i > 0; p++, i--)
value += occurrences[(unsigned char)(*p)];
return value;
}
@@ -420,8 +567,11 @@ inline void
Key_List::set_determined (List_Node *ptr)
{
T (Trace t ("Key_List::set_determined");)
for (const char *temp = ptr->char_set; *temp; temp++)
determined[(unsigned char)(*temp)] = 1;
const char *p = ptr->char_set;
unsigned int i = ptr->char_set_length;
for (; i > 0; p++, i--)
determined[(unsigned char)(*p)] = 1;
}
/* Returns TRUE if PTR's key set is already completely determined. */
@@ -432,8 +582,10 @@ Key_List::already_determined (List_Node *ptr)
T (Trace t ("Key_List::already_determined");)
int is_determined = 1;
for (const char *temp = ptr->char_set; is_determined && *temp; temp++)
is_determined = determined[(unsigned char)(*temp)];
const char *p = ptr->char_set;
unsigned int i = ptr->char_set_length;
for (; is_determined && i > 0; p++, i--)
is_determined = determined[(unsigned char)(*p)];
return is_determined;
}
@@ -660,21 +812,31 @@ Key_List::output_constants (struct Output_Constants& style)
/* ------------------------------------------------------------------------- */
/* Outputs a keyword, as a string: enclosed in double quotes, escaping
backslashes and double quote characters. */
backslashes, double quote and unprintable characters. */
static void
output_string (const char *key)
output_string (const char *key, int len)
{
T (Trace t ("output_string");)
char c;
putchar ('"');
while (c = *key++, c != '\0')
for (; len > 0; len--)
{
unsigned char c = (unsigned char) *key++;
if (isprint (c))
{
if (c == '"' || c == '\\')
putchar ('\\');
putchar (c);
}
else
{
putchar ('\\');
putchar ('0' + ((c >> 6) & 7));
putchar ('0' + ((c >> 3) & 7));
putchar ('0' + (c & 7));
}
}
putchar ('"');
}
@@ -1054,7 +1216,7 @@ Key_List::output_keylength_table (void)
printf (",");
if ((column++ % columns) == 0)
printf("\n%s ", indent);
printf ("%3d", temp->length);
printf ("%3d", temp->key_length);
/* Deal with links specially. */
if (temp->link) // implies option[DUP]
@@ -1064,7 +1226,7 @@ Key_List::output_keylength_table (void)
printf (",");
if ((column++ % columns) == 0)
printf("\n%s ", indent);
printf ("%3d", links->length);
printf ("%3d", links->key_length);
}
index++;
@@ -1083,7 +1245,7 @@ output_keyword_entry (List_Node *temp, const char *indent)
printf ("%s ", indent);
if (option[TYPE])
printf ("{");
output_string (temp->key);
output_string (temp->key, temp->key_length);
if (option[TYPE])
{
if (strlen (temp->rest) > 0)
@@ -1238,8 +1400,8 @@ Key_List::output_lookup_array (void)
int hash_value = temp->hash_value;
lookup_array[hash_value] = temp->index;
if (option[DEBUG])
fprintf (stderr, "keyword = %s, index = %d\n",
temp->key, temp->index);
fprintf (stderr, "keyword = %.*s, index = %d\n",
temp->key_length, temp->key, temp->index);
if (temp->link
|| (temp->next && hash_value == temp->next->hash_value))
{
@@ -1255,8 +1417,8 @@ Key_List::output_lookup_array (void)
dup_ptr->count++;
if (option[DEBUG])
fprintf (stderr,
"static linked keyword = %s, index = %d\n",
ptr->key, ptr->index);
"static linked keyword = %.*s, index = %d\n",
ptr->key_length, ptr->key, ptr->index);
}
if (!(temp->next && hash_value == temp->next->hash_value))
@@ -1266,8 +1428,8 @@ Key_List::output_lookup_array (void)
dup_ptr->count++;
if (option[DEBUG])
fprintf (stderr, "dynamic linked keyword = %s, index = %d\n",
temp->key, temp->index);
fprintf (stderr, "dynamic linked keyword = %.*s, index = %d\n",
temp->key_length, temp->key, temp->index);
}
assert (dup_ptr->count >= 2);
dup_ptr++;
@@ -1404,8 +1566,8 @@ output_switch_case (List_Node *list, int indent, int *jumps_away)
T (Trace t ("output_switch_case");)
if (option[DEBUG])
printf ("%*s/* hash value = %4d, keyword = \"%s\" */\n",
indent, "", list->hash_value, list->key);
printf ("%*s/* hash value = %4d, keyword = \"%.*s\" */\n",
indent, "", list->hash_value, list->key_length, list->key);
if (option[DUP]
&& (list->link
@@ -1438,7 +1600,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away)
{
printf ("%*sif (len == %d)\n"
"%*s {\n",
indent, "", list->length,
indent, "", list->key_length,
indent, "");
indent += 4;
}
@@ -1447,7 +1609,7 @@ output_switch_case (List_Node *list, int indent, int *jumps_away)
if (option[TYPE])
printf ("&%s[%d]", option.get_wordlist_name (), list->index);
else
output_string (list->key);
output_string (list->key, list->key_length);
printf (";\n");
printf ("%*sgoto compare;\n",
indent, "");
@@ -1980,9 +2142,10 @@ Key_List::dump ()
field_width, "char_set");
for (List_Node *ptr = head; ptr; ptr = ptr->next)
fprintf (stderr, "%11d,%11d,%6d, %*s, %s\n",
ptr->hash_value, ptr->length, ptr->index,
field_width, ptr->char_set, ptr->key);
fprintf (stderr, "%11d,%11d,%6d, %*.*s, %.*s\n",
ptr->hash_value, ptr->key_length, ptr->index,
field_width, ptr->char_set_length, ptr->char_set,
ptr->key_length, ptr->key);
}
/* Simple-minded constructor action here... */

View File

@@ -1,5 +1,5 @@
/* Creates and initializes a new list node.
Copyright (C) 1989-1998 Free Software Foundation, Inc.
Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF.
@@ -57,22 +57,20 @@ List_Node::set_sort (char *base, int len)
of the total number of keys seen so far. This is used to initialize
the INDEX field to some useful value. */
List_Node::List_Node (char *k, int len): link (0), next (0),
key (k), rest (option[TYPE] ? k + len + 1 : ""), length (len), index (0)
List_Node::List_Node (const char *k, int len, const char *r):
link (0), next (0), key (k), key_length (len), rest (r), index (0)
{
T (Trace t ("List_Node::List_Node");)
char *ptr = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ()) + 1];
char *key_set = ptr;
k[len] = '\0'; /* Null terminate KEY to separate it from REST. */
char *key_set = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ())];
char *ptr = key_set;
int i;
if (option[ALLCHARS]) /* Use all the character position in the KEY. */
for (; *k; k++, ptr++)
if (option[ALLCHARS]) /* Use all the character positions in the KEY. */
for (i = len; i > 0; k++, ptr++, i--)
++occurrences[(unsigned char)(*ptr = *k)];
else /* Only use those character positions specified by the user. */
{
int i;
/* Iterate thru the list of key_positions, initializing occurrences table
/* Iterate through the list of key_positions, initializing occurrences table
and char_set (via char * pointer ptr). */
for (option.reset (); (i = option.get ()) != EOS; )
@@ -94,8 +92,10 @@ List_Node::List_Node (char *k, int len): link (0), next (0),
exit (1);
}
}
*ptr = '\0'; /* Terminate this bastard.... */
/* Sort the KEY_SET items alphabetically. */
set_sort (key_set, ptr - key_set);
char_set = key_set;
char_set_length = ptr - key_set;
}

View File

@@ -2,7 +2,7 @@
/* Data and function members for defining values and operations of a list node.
Copyright (C) 1989-1998 Free Software Foundation, Inc.
Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU GPERF.
@@ -31,14 +31,15 @@ struct List_Node : private Vectors
List_Node *link; /* TRUE if key has an identical KEY_SET as another key. */
List_Node *next; /* Points to next element on the list. */
const char *key; /* Each keyword string stored here. */
int key_length; /* Length of the key. */
const char *rest; /* Additional information for building hash function. */
const char *char_set; /* Set of characters to hash, specified by user. */
int length; /* Length of the key. */
int char_set_length; /* Length of char_set. */
int hash_value; /* Hash value for the key. */
int occurrence; /* A metric for frequency of key set occurrences. */
int index; /* Position of this node relative to other nodes. */
List_Node (char *key, int len);
List_Node (const char *key, int len, const char *rest);
static void set_sort (char *base, int len);
};