Rewrite the input routines.

2025-12-02 13:09:22 +00:00 · 2003-01-22 12:03:19 +00:00
parent d3a9c2f7ad
commit 83440a2aed
18 changed files with 676 additions and 518 deletions
--- a/34
+++ b/34
@@ -1,5 +1,39 @@
 2002-11-10  Bruno Haible  <bruno@clisp.org>

+	Rewrite the input routines.
+	* src/input.h: Don't include read-line.h.
+	(Input): Don't inherit from class Read_Line.
+	(Input::read_keys, Input::strcspn, Input::set_output_types,
+	Input::get_array_type, Input::save_include_src,
+	Input::get_special_input): Remove declarations.
+	(Input::read_input): New declaration.
+	(Input::_struct_decl): Renamed from Input::_array_type.
+	(Input::_verbatim_declarations): Renamed from Input::_include_src.
+	(Input::_verbatim_code): Replaces Input::_additional_code.
+	* src/input.cc: Completely rewritten.
+	* src/output.h (Output::Output): Update the verbatim_* arguments.
+	(Output::_struct_decl): Renamed from Output::_array_type.
+	(Output::_verbatim_declarations): Renamed from Output::_include_src.
+	(Output::_verbatim_code): Replaces Output::_additional_code.
+	* src/output.cc (Output::Output): Update the verbatim_* arguments.
+	(Output::output): Output the verbatim_* code pieces with #line.
+	* src/main.cc (main): Call Input::read_input instead of
+	Input::read_keys. Update Output::Output arguments.
+	* src/read-line.h: Remove file.
+	* src/read-line.cc, src/read-line.icc: Remove files.
+	* src/Makefile.in (OBJECTS): Remove read-line.o.
+	(READ_LINE_H): Remove variable.
+	(INPUT_H): Update.
+	(read-line.o): Remove rule.
+	* doc/gperf.texi (Declarations): Correct the example.
+	(Keywords): Mention that lines starting with % are forbidden here.
+	* tests/c-parse.exp: Update.
+	* tests/cplusplus.exp: Update.
+	* tests/gpc.exp: Update.
+	* tests/java.exp: Update.
+	* tests/objc.exp: Update.
+	* tests/test-4.exp: Update.
+
 	* src/options.h (Options::get_input_file_name): New declaration.
 	(Options::_input_file_name): New field.
 	* src/options.icc (Options::get_input_file_name): New method.
--- a/3
+++ b/3
@@ -2,6 +2,9 @@ New in 2.8:

 * Added option -m/--multiple-iterations that reduces the size of the
  generated table.
+* If the input file is given by name, the output file will now contain
+  #line directives referring to the input file.
+* Bug fixes.

 New in 2.7.2:

--- a/doc/gperf.texi
+++ b/doc/gperf.texi
@@ -163,8 +163,8 @@ In addition, Adam de Boor and Nels Olson provided many tips and insights
 that greatly helped improve the quality and functionality of @code{gperf}.

@item
-A testsuite was added by Bruno Haible. He also rewrote the output
-routines for better reliability.
+A testsuite was added by Bruno Haible. He also rewrote the input routines
+and the output routines for better reliability.
@end itemize

@node Motivation, Search Structures, Contributors, Top
@@ -389,15 +389,16 @@ march,     3, 31, 31
@end group
@end example

-It is possible to omit the declaration section entirely.  In this case
+It is possible to omit the declaration section entirely, if the @samp{-t}
+option is not given.  In this case
 the input file begins directly with the first keyword line, e.g.:

@example
@group
-january,   1, 31, 31
-february,  2, 28, 29
-march,     3, 31, 31
-april,     4, 30, 30
+january
+february
+march
+april
 ...
@end group
@end example
@@ -408,7 +409,9 @@ april,     4, 30, 30
 The second input file format section contains lines of keywords and any
 associated attributes you might supply.  A line beginning with @samp{#}
 in the first column is considered a comment.  Everything following the
-@samp{#} is ignored, up to and including the following newline.
+@samp{#} is ignored, up to and including the following newline.  A line
+beginning with @samp{%} in the first column is an option declaration and
+must not occur within the keywords section.

 The first field of each non-comment line is always the keyword itself.  It
 can be given in two ways: as a simple name, i.e., without surrounding
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -61,7 +61,7 @@ SHELL = /bin/sh

 VPATH = $(srcdir)

-OBJECTS  = version.o options.o read-line.o keyword.o keyword-list.o \
+OBJECTS  = version.o options.o keyword.o keyword-list.o \
           input.o bool-array.o hash-table.o search.o output.o main.o
 LIBS     = ../lib/libgp.a @GPERF_LIBM@
 CPPFLAGS = -I. -I$(srcdir)/../lib
@@ -86,10 +86,9 @@ $(TARGETPROG): $(OBJECTS)
 CONFIG_H = config.h
 VERSION_H = version.h
 OPTIONS_H = options.h options.icc
-READ_LINE_H = read-line.h read-line.icc
 KEYWORD_H = keyword.h keyword.icc
 KEYWORD_LIST_H = keyword-list.h keyword-list.icc $(KEYWORD_H)
-INPUT_H = input.h $(READ_LINE_H) $(KEYWORD_LIST_H)
+INPUT_H = input.h $(KEYWORD_LIST_H)
 BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H)
 HASH_TABLE_H = hash-table.h $(KEYWORD_H)
 SEARCH_H = search.h $(KEYWORD_LIST_H) $(BOOL_ARRAY_H)
@@ -99,8 +98,6 @@ version.o : version.cc $(VERSION_H)
 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/version.cc
 options.o : options.cc $(OPTIONS_H) $(VERSION_H)
 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/options.cc
-read-line.o : read-line.cc $(READ_LINE_H)
-	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/read-line.cc
 keyword.o : keyword.cc $(KEYWORD_H) $(OPTIONS_H)
 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/keyword.cc
 keyword-list.o : keyword-list.cc $(KEYWORD_LIST_H)
--- a/src/input.cc
+++ b/src/input.cc
@@ -28,332 +28,569 @@
 #include <string.h> /* declares strncpy(), strchr() */
 #include <limits.h> /* defines UCHAR_MAX etc. */
 #include "options.h"
+#include "getline.h"

 Input::Input (FILE *stream, Keyword_Factory *keyword_factory)
-  : Read_Line (stream), _factory (keyword_factory)
+  : _stream (stream), _factory (keyword_factory)
 {
 }

-/* Gathers the input stream into a buffer until one of two things occur:
-
-   1. We read a '%' followed by a '%'
-   2. We read a '%' followed by a '}'
-
-   The first symbolizes the beginning of the keyword list proper,
-   The second symbolizes the end of the C source code to be generated
-   verbatim in the output file.
-
-   I assume that the keys are separated from the optional preceding struct
-   declaration by a consecutive % followed by either % or } starting in
-   the first column. The code below uses an expandible buffer to scan off
-   and return a pointer to all the code (if any) appearing before the delimiter. */
-
-const char *
-Input::get_special_input (char delimiter)
+/* Reads the entire input file.  */
+void
+Input::read_input ()
 {
-  int size  = 80;
-  char *buf = new char[size];
-  int c, i;
+  /* The input file has the following structure:
+        DECLARATIONS
+        %%
+        KEYWORDS
+        %%
+        ADDITIONAL_CODE
+     Since the DECLARATIONS and the ADDITIONAL_CODE sections are optional,
+     we have to read the entire file in the case there is only one %%
+     separator line, in order to determine whether the structure is
+        DECLARATIONS
+        %%
+        KEYWORDS
+     or
+        KEYWORDS
+        %%
+        ADDITIONAL_CODE
+     When the option -t is given or when the first section contains
+     declaration lines starting with %, we go for the first interpretation,
+     otherwise for the second interpretation.  */

-  for (i = 0; (c = getchar ()) != EOF; i++)
+  char *input = NULL;
+  size_t input_size = 0;
+  int input_length = get_delim (&input, &input_size, EOF, _stream);
+  if (input_length < 0)
    {
-      if (c == '%')
-        {
-          if ((c = getchar ()) == delimiter)
-            {
-
-              while ((c = getchar ()) != '\n')
-                ; /* discard newline */
-
-              if (i == 0)
-                return "";
-              else
-                {
-                  buf[delimiter == '%' && buf[i - 2] == ';' ? i - 2 : i - 1] = '\0';
-                  return buf;
-                }
-            }
-          else
-            buf[i++] = '%';
-        }
-      else if (i >= size) /* Yikes, time to grow the buffer! */
-        {
-          char *temp = new char[size *= 2];
-          int j;
-
-          for (j = 0; j < i; j++)
-            temp[j] = buf[j];
-
-          buf = temp;
-        }
-      buf[i] = c;
-    }
-
-  return 0;        /* Problem here. */
-}
-
-/* Stores any C text that must be included verbatim into the
-   generated code output. */
-
-const char *
-Input::save_include_src ()
-{
-  int c;
-
-  if ((c = getchar ()) != '%')
-    ungetc (c, stdin);
-  else if ((c = getchar ()) != '{')
-    {
-      fprintf (stderr, "internal error, %c != '{' on line %d in file %s", c, __LINE__, __FILE__);
+      if (ferror (_stream))
+        fprintf (stderr, "error while reading input file\n");
+      else
+        fprintf (stderr, "The input file is empty!\n");
      exit (1);
    }
-  else
-    return get_special_input ('}');
-  return "";
-}

-/* Determines from the input file whether the user wants to build a table
-   from a user-defined struct, or whether the user is content to simply
-   use the default array of keys. */
+  /* We use input_end as a limit, in order to cope with NUL bytes in the
+     input.  But note that one trailing NUL byte has been added after
+     input_end, for convenience.  */
+  char *input_end = input + input_length;

-const char *
-Input::get_array_type ()
-{
-  return get_special_input ('%');
-}
+  const char *declarations;
+  const char *declarations_end;
+  const char *keywords;
+  const char *keywords_end;
+  unsigned int keywords_lineno;

-/* strcspn - find length of initial segment of S consisting entirely
-   of characters not from REJECT (borrowed from Henry Spencer's
-   ANSI string package, when GNU libc comes out I'll replace this...). */
-
-#ifndef strcspn
-inline int
-Input::strcspn (const char *s, const char *reject)
-{
-  const char *scan;
-  const char *rej_scan;
-  int   count = 0;
-
-  for (scan = s; *scan; scan++)
+  /* Break up the input into the three sections.  */
+  {
+    const char *separator[2] = { NULL, NULL };
+    unsigned int separator_lineno[2] = { 0, 0 };
+    int separators = 0;
    {
-
-      for (rej_scan = reject; *rej_scan; rej_scan++)
-        if (*scan == *rej_scan)
-          return count;
-
-      count++;
+      unsigned int lineno = 1;
+      for (const char *p = input; p < input_end; )
+        {
+          if (p[0] == '%' && p[1] == '%')
+            {
+              separator[separators] = p;
+              separator_lineno[separators] = lineno;
+              if (++separators == 2)
+                break;
+            }
+          lineno++;
+          p = (const char *) memchr (p, '\n', input_end - p);
+          if (p != NULL)
+            p++;
+          else
+            p = input_end;
+        }
    }

-  return count;
-}
-#endif
+    bool has_declarations;
+    if (separators == 1)
+      {
+        if (option[TYPE])
+          has_declarations = true;
+        else
+          {
+            has_declarations = false;
+            for (const char *p = input; p < separator[0]; )
+              {
+                if (p[0] == '%')
+                  {
+                    has_declarations = true;
+                    break;
+                  }
+                p = (const char *) memchr (p, '\n', separator[0] - p);
+                if (p != NULL)
+                  p++;
+                else
+                  p = separator[0];
+              }
+          }
+      }
+    else
+      has_declarations = (separators > 0);

-/* Sets up the Return_Type, the Struct_Tag type and the Array_Type
-   based upon various user Options. */
+    if (has_declarations)
+      {
+        declarations = input;
+        declarations_end = separator[0];
+        /* Give a warning if the separator line is nonempty.  */
+        bool nonempty_line = false;
+        const char *p;
+        for (p = declarations_end + 2; p < input_end; )
+          {
+            if (*p == '\n')
+              {
+                p++;
+                break;
+              }
+            if (!(*p == ' ' || *p == '\t'))
+              nonempty_line = true;
+            p++;
+          }
+        if (nonempty_line)
+          fprintf (stderr, "line %u: warning: junk after %%%% is ignored\n",
+                   separator_lineno[0]);
+        keywords = p;
+        keywords_lineno = separator_lineno[0] + 1;
+      }
+    else
+      {
+        declarations = NULL;
+        declarations_end = NULL;
+        keywords = input;
+        keywords_lineno = 1;
+      }

-void
-Input::set_output_types ()
-{
-  _array_type = NULL;
+    if (separators > (has_declarations ? 1 : 0))
+      {
+        keywords_end = separator[separators-1];
+        _verbatim_code = separator[separators-1] + 2;
+        _verbatim_code_end = input_end;
+        _verbatim_code_lineno = separator_lineno[separators-1];
+      }
+    else
+      {
+        keywords_end = input_end;
+        _verbatim_code = NULL;
+        _verbatim_code_end = NULL;
+        _verbatim_code_lineno = 0;
+      }
+  }
+
+  /* Parse the declarations section.  */
+
+  _verbatim_declarations = NULL;
+  _verbatim_declarations_end = NULL;
+  _verbatim_declarations_lineno = 0;
+  _struct_decl = NULL;
  _return_type = NULL;
  _struct_tag = NULL;
-  if (option[TYPE])
-    {
-      _array_type = get_array_type ();
-      if (!_array_type)
-        /* Something's wrong, but we'll catch it later on, in read_keys()... */
-        return;
-      /* Yow, we've got a user-defined type... */
-      int i = strcspn (_array_type, "{\n\0");
-      /* Remove trailing whitespace. */
-      while (i > 0 && strchr (" \t", _array_type[i-1]))
-        i--;
-      int struct_tag_length = i;
+  {
+    unsigned int lineno = 1;
+    char *struct_decl = NULL;
+    for (const char *p = declarations; p < declarations_end; )
+      {
+        const char *line_end;
+        line_end = (const char *) memchr (p, '\n', declarations_end - p);
+        if (line_end != NULL)
+          line_end++;
+        else
+          line_end = declarations_end;

-      /* Set `struct_tag' to a naked "struct something". */
-      char *structtag = new char[struct_tag_length + 1];
-      strncpy (structtag, _array_type, struct_tag_length);
-      structtag[struct_tag_length] = '\0';
-      _struct_tag = structtag;
-
-      /* The return type of the lookup function is "struct something *".
-         No "const" here, because if !option[CONST], some user code might want
-         to modify the structure. */
-      char *rettype = new char[struct_tag_length + 3];
-      strncpy (rettype, _array_type, struct_tag_length);
-      rettype[struct_tag_length] = ' ';
-      rettype[struct_tag_length + 1] = '*';
-      rettype[struct_tag_length + 2] = '\0';
-      _return_type = rettype;
-    }
-}
-
-/* Extracts a key from an input line and creates a new Keyword_List for
-   it. */
-
-Keyword_List *
-Input::parse_line (const char *line, const char *delimiters)
-{
-  if (*line == '"')
-    {
-      /* Parse a string in ANSI C syntax. */
-      char *key = new char[strlen(line)];
-      char *kp = key;
-      const char *lp = line + 1;
-
-      for (; *lp;)
-        {
-          char c = *lp;
-
-          if (c == '\0')
-            {
-              fprintf (stderr, "unterminated string: %s\n", line);
-              exit (1);
-            }
-          else if (c == '\\')
-            {
-              c = *++lp;
-              switch (c)
-                {
-                case '0': case '1': case '2': case '3':
-                case '4': case '5': case '6': case '7':
+        if (*p == '%')
+          {
+            if (p[1] == '{')
+              {
+                /* Handle %{.  */
+                if (_verbatim_declarations != NULL)
                  {
-                    int code = 0;
-                    int count = 0;
-                    while (count < 3 && *lp >= '0' && *lp <= '7')
-                      {
-                        code = (code << 3) + (*lp - '0');
-                        lp++;
-                        count++;
-                      }
-                    if (code > UCHAR_MAX)
-                      fprintf (stderr, "octal escape out of range: %s\n", line);
-                    *kp = static_cast<char>(code);
-                    break;
+                    fprintf (stderr, "lines %u and %u:"
+                             " only one %%{...%%} section is allowed\n",
+                             _verbatim_declarations_lineno, lineno);
+                    exit (1);
                  }
-                case 'x':
+                _verbatim_declarations = p + 2;
+                _verbatim_declarations_lineno = lineno;
+              }
+            else if (p[1] == '}')
+              {
+                /* Handle %}.  */
+                if (_verbatim_declarations == NULL)
                  {
-                    int code = 0;
-                    int count = 0;
-                    lp++;
-                    while ((*lp >= '0' && *lp <= '9')
-                           || (*lp >= 'A' && *lp <= 'F')
-                           || (*lp >= 'a' && *lp <= 'f'))
-                      {
-                        code = (code << 4)
-                               + (*lp >= 'A' && *lp <= 'F' ? *lp - 'A' + 10 :
-                                  *lp >= 'a' && *lp <= 'f' ? *lp - 'a' + 10 :
-                                  *lp - '0');
-                        lp++;
-                        count++;
-                      }
-                    if (count == 0)
-                      fprintf (stderr, "hexadecimal escape without any hex digits: %s\n", line);
-                    if (code > UCHAR_MAX)
-                      fprintf (stderr, "hexadecimal escape out of range: %s\n", line);
-                    *kp = static_cast<char>(code);
-                    break;
+                    fprintf (stderr, "line %u:"
+                             " %%} outside of %%{...%%} section\n",
+                             lineno);
+                    exit (1);
                  }
-                case '\\': case '\'': case '"':
-                  *kp = c;
-                  lp++;
-                  break;
-                case 'n':
-                  *kp = '\n';
-                  lp++;
-                  break;
-                case 't':
-                  *kp = '\t';
-                  lp++;
-                  break;
-                case 'r':
-                  *kp = '\r';
-                  lp++;
-                  break;
-                case 'f':
-                  *kp = '\f';
-                  lp++;
-                  break;
-                case 'b':
-                  *kp = '\b';
-                  lp++;
-                  break;
-                case 'a':
-                  *kp = '\a';
-                  lp++;
-                  break;
-                case 'v':
-                  *kp = '\v';
-                  lp++;
-                  break;
-                default:
-                  fprintf (stderr, "invalid escape sequence in string: %s\n", line);
-                  exit (1);
-                }
-            }
-          else if (c == '"')
-            break;
+                if (_verbatim_declarations_end != NULL)
+                  {
+                    fprintf (stderr, "line %u:"
+                             " %%{...%%} section already closed\n",
+                             lineno);
+                    exit (1);
+                  }
+                _verbatim_declarations_end = p;
+                /* Give a warning if the rest of the line is nonempty.  */
+                bool nonempty_line = false;
+                const char *q;
+                for (q = p + 2; q < line_end; q++)
+                  {
+                    if (*q == '\n')
+                      {
+                        q++;
+                        break;
+                      }
+                    if (!(*q == ' ' || *q == '\t'))
+                      nonempty_line = true;
+                  }
+                if (nonempty_line)
+                  fprintf (stderr, "line %u:"
+                           " warning: junk after %%} is ignored\n",
+                           lineno);
+              }
+            else if (_verbatim_declarations != NULL
+                     && _verbatim_declarations_end == NULL)
+              {
+                fprintf (stderr, "line %u:"
+                         " warning: %% directives are ignored"
+                         " inside the %%{...%%} section\n",
+                         lineno);
+              }
+            else
+              {
+                fprintf (stderr, "line %u: unrecognized %% directive\n",
+                         lineno);
+                exit (1);
+              }
+          }
+        else if (!(_verbatim_declarations != NULL
+                   && _verbatim_declarations_end == NULL))
+          {
+            /* Append the line to struct_decl.  */
+            size_t old_len = (struct_decl ? strlen (struct_decl) : 0);
+            size_t line_len = line_end - p;
+            size_t new_len = old_len + line_len + 1;
+            char *new_struct_decl = new char[new_len];
+            if (old_len > 0)
+              memcpy (new_struct_decl, struct_decl, old_len);
+            memcpy (new_struct_decl + old_len, p, line_len);
+            new_struct_decl[old_len + line_len] = '\0';
+            if (struct_decl)
+              delete[] struct_decl;
+            struct_decl = new_struct_decl;
+          }
+        lineno++;
+        p = line_end;
+      }
+    if (_verbatim_declarations != NULL && _verbatim_declarations_end == NULL)
+      {
+        fprintf (stderr, "line %u: unterminated %%{ section\n",
+                 _verbatim_declarations_lineno);
+        exit (1);
+      }
+
+    /* Determine _struct_decl, _return_type, _struct_tag.  */
+    if (option[TYPE])
+      {
+        if (struct_decl)
+          {
+            /* Drop leading whitespace.  */
+            while (struct_decl[0] == '\n' || struct_decl[0] == ' '
+                   || struct_decl[0] == '\t')
+              struct_decl++;
+            /* Drop trailing whitespace.  */
+            for (char *p = struct_decl + strlen (struct_decl); p > struct_decl;)
+              if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t')
+                *--p = '\0';
+              else
+                break;
+          }
+        if (struct_decl == NULL || struct_decl[0] == '\0')
+          {
+            fprintf (stderr, "missing struct declaration"
+                     " for option --struct-type\n");
+            exit (1);
+          }
+        if (struct_decl)
+          {
+            /* Ensure trailing semicolon.  */
+            size_t old_len = strlen (struct_decl);
+            if (struct_decl[old_len - 1] != ';')
+              {
+                char *new_struct_decl = new char[old_len + 2];
+                memcpy (new_struct_decl, struct_decl, old_len);
+                new_struct_decl[old_len] = ';';
+                new_struct_decl[old_len + 1] = '\0';
+                delete[] struct_decl;
+                struct_decl = new_struct_decl;
+              }
+          }
+        /* Set _struct_decl to the entire declaration.  */
+        _struct_decl = struct_decl;
+        /* Set _struct_tag to the naked "struct something".  */
+        const char *p;
+        for (p = struct_decl; *p && *p != '{' && *p != '\n'; p++)
+          ;
+        for (; p > struct_decl;)
+          if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t')
+            --p;
          else
-            {
-              *kp = c;
-              lp++;
-            }
-          kp++;
-        }
-      lp++;
-      if (*lp != '\0')
-        {
-          if (strchr (delimiters, *lp) == NULL)
-            {
-              fprintf (stderr, "string not followed by delimiter: %s\n", line);
-              exit (1);
-            }
-          lp++;
-        }
-      return new Keyword_List (_factory->create_keyword (key, kp - key, option[TYPE] ? lp : ""));
-    }
-  else
-    {
-      /* Not a string. Look for the delimiter. */
-      int len = strcspn (line, delimiters);
-      const char *rest;
+            break;
+        size_t struct_tag_length = p - struct_decl;
+        char *struct_tag = new char[struct_tag_length + 1];
+        memcpy (struct_tag, struct_decl, struct_tag_length);
+        struct_tag[struct_tag_length] = '\0';
+        _struct_tag = struct_tag;
+        /* The return type of the lookup function is "struct something *".
+           No "const" here, because if !option[CONST], some user code might
+           want to modify the structure. */
+        char *return_type = new char[struct_tag_length + 3];
+        memcpy (return_type, struct_decl, struct_tag_length);
+        return_type[struct_tag_length] = ' ';
+        return_type[struct_tag_length + 1] = '*';
+        return_type[struct_tag_length + 2] = '\0';
+        _return_type = return_type;
+      }
+  }

-      if (line[len] == '\0')
-        rest = "";
-      else
-        /* Skip the first delimiter. */
-        rest = &line[len + 1];
-      return new Keyword_List (_factory->create_keyword (line, len, option[TYPE] ? rest : ""));
-    }
+  /* Parse the keywords section.  */
+  {
+    Keyword_List **list_tail = &_head;
+    const char *delimiters = option.get_delimiters ();
+    unsigned int lineno = keywords_lineno;
+    for (const char *line = keywords; line < keywords_end; )
+      {
+        const char *line_end;
+        line_end = (const char *) memchr (line, '\n', keywords_end - line);
+        if (line_end != NULL)
+          line_end++;
+        else
+          line_end = keywords_end;
+
+        if (line[0] == '#')
+          ; /* Comment line.  */
+        else if (line[0] == '%')
+          {
+            fprintf (stderr, "line %u:"
+                     " declarations are not allowed in the keywords section.\n"
+                     "To declare a keyword starting with %%, enclose it in"
+                     " double-quotes.\n",
+                     lineno);
+            exit (1);
+          }
+        else
+          {
+            /* An input line carrying a keyword.  */
+            const char *keyword;
+            size_t keyword_length;
+            const char *rest;
+
+            if (line[0] == '"')
+              {
+                /* Parse a string in ANSI C syntax.  */
+                char *kp = new char[line_end-line];
+                keyword = kp;
+                const char *lp = line + 1;
+
+                for (;;)
+                  {
+                    if (lp == line_end)
+                      {
+                        fprintf (stderr, "line %u: unterminated string\n",
+                                 lineno);
+                        exit (1);
+                      }
+
+                    char c = *lp;
+                    if (c == '\\')
+                      {
+                        c = *++lp;
+                        switch (c)
+                          {
+                          case '0': case '1': case '2': case '3':
+                          case '4': case '5': case '6': case '7':
+                            {
+                              int code = 0;
+                              int count = 0;
+                              while (count < 3 && *lp >= '0' && *lp <= '7')
+                                {
+                                  code = (code << 3) + (*lp - '0');
+                                  lp++;
+                                  count++;
+                                }
+                              if (code > UCHAR_MAX)
+                                fprintf (stderr,
+                                         "line %u: octal escape out of range\n",
+                                         lineno);
+                              *kp = static_cast<char>(code);
+                              break;
+                            }
+                          case 'x':
+                            {
+                              int code = 0;
+                              int count = 0;
+                              lp++;
+                              while ((*lp >= '0' && *lp <= '9')
+                                     || (*lp >= 'A' && *lp <= 'F')
+                                     || (*lp >= 'a' && *lp <= 'f'))
+                                {
+                                  code = (code << 4)
+                                         + (*lp >= 'A' && *lp <= 'F'
+                                            ? *lp - 'A' + 10 :
+                                            *lp >= 'a' && *lp <= 'f'
+                                            ? *lp - 'a' + 10 :
+                                            *lp - '0');
+                                  lp++;
+                                  count++;
+                                }
+                              if (count == 0)
+                                fprintf (stderr, "line %u: hexadecimal escape"
+                                         " without any hex digits\n",
+                                         lineno);
+                              if (code > UCHAR_MAX)
+                                fprintf (stderr, "line %u: hexadecimal escape"
+                                         " out of range\n",
+                                         lineno);
+                              *kp = static_cast<char>(code);
+                              break;
+                            }
+                          case '\\': case '\'': case '"':
+                            *kp = c;
+                            lp++;
+                            break;
+                          case 'n':
+                            *kp = '\n';
+                            lp++;
+                            break;
+                          case 't':
+                            *kp = '\t';
+                            lp++;
+                            break;
+                          case 'r':
+                            *kp = '\r';
+                            lp++;
+                            break;
+                          case 'f':
+                            *kp = '\f';
+                            lp++;
+                            break;
+                          case 'b':
+                            *kp = '\b';
+                            lp++;
+                            break;
+                          case 'a':
+                            *kp = '\a';
+                            lp++;
+                            break;
+                          case 'v':
+                            *kp = '\v';
+                            lp++;
+                            break;
+                          default:
+                            fprintf (stderr, "line %u: invalid escape sequence"
+                                     " in string\n",
+                                     lineno);
+                            exit (1);
+                          }
+                      }
+                    else if (c == '"')
+                      break;
+                    else
+                      {
+                        *kp = c;
+                        lp++;
+                      }
+                    kp++;
+                  }
+                lp++;
+                if (lp < line_end && *lp != '\n')
+                  {
+                    if (strchr (delimiters, *lp) == NULL)
+                      {
+                        fprintf (stderr, "line %u: string not followed"
+                                 " by delimiter\n",
+                                 lineno);
+                        exit (1);
+                      }
+                    lp++;
+                  }
+                keyword_length = kp - keyword;
+                if (option[TYPE])
+                  {
+                    char *line_rest = new char[line_end - lp + 1];
+                    memcpy (line_rest, lp, line_end - lp);
+                    line_rest[line_end - lp -
+                              (line_end > lp && line_end[-1] == '\n' ? 1 : 0)]
+                      = '\0';
+                    rest = line_rest;
+                  }
+                else
+                  rest = "";
+              }
+            else
+              {
+                /* Not a string.  Look for the delimiter.  */
+                const char *lp = line;
+                for (;;)
+                  {
+                    if (!(lp < line_end && *lp != '\n'))
+                      {
+                        keyword = line;
+                        keyword_length = lp - line;
+                        rest = "";
+                        break;
+                      }
+                    if (strchr (delimiters, *lp) != NULL)
+                      {
+                        keyword = line;
+                        keyword_length = lp - line;
+                        lp++;
+                        if (option[TYPE])
+                          {
+                            char *line_rest = new char[line_end - lp + 1];
+                            memcpy (line_rest, lp, line_end - lp);
+                            line_rest[line_end - lp -
+                                      (line_end > lp && line_end[-1] == '\n'
+                                       ? 1 : 0)]
+                              = '\0';
+                            rest = line_rest;
+                          }
+                        else
+                          rest = "";
+                        break;
+                      }
+                    lp++;
+                  }
+              }
+
+            /* Allocate Keyword and add it to the list.  */
+            Keyword *new_kw = _factory->create_keyword (keyword, keyword_length,
+                                                        rest);
+            *list_tail = new Keyword_List (new_kw);
+            list_tail = &(*list_tail)->rest();
+          }
+
+        lineno++;
+        line = line_end;
+      }
+    *list_tail = NULL;
+
+    if (_head == NULL)
+      {
+        fprintf (stderr, "No keywords in input file!\n");
+        exit (1);
+      }
+  }
+
+  /* To be freed in the destructor.  */
+  _input = input;
 }

-void
-Input::read_keys ()
+Input::~Input ()
 {
-  char *ptr;
-
-  _include_src = save_include_src ();
-  set_output_types ();
-
-  /* Oops, problem with the input file. */
-  if (! (ptr = Read_Line::read_next_line ()))
-    {
-      fprintf (stderr, "No words in input file, did you forget to prepend %s or use -t accidentally?\n", "%%");
-      exit (1);
-    }
-
-  /* Read in all the keywords from the input file. */
-  const char *delimiters = option.get_delimiters ();
-
-  _head = parse_line (ptr, delimiters);
-
-  for (Keyword_List *temp = _head;
-       (ptr = Read_Line::read_next_line ()) && strcmp (ptr, "%%");
-       temp = temp->rest())
-    temp->rest() = parse_line (ptr, delimiters);
-
-  /* See if any additional C code is included at end of this file. */
-  _additional_code = false;
-  if (ptr)
-    _additional_code = true;
+  delete[] _input;
 }
--- a/src/input.h
+++ b/src/input.h
@@ -26,32 +26,39 @@
 #ifndef input_h
 #define input_h 1

-#include "read-line.h"
+#include <stdio.h>
 #include "keyword-list.h"

-class Input : private Read_Line
+class Input
 {
 public:
                        Input (FILE *stream, Keyword_Factory *keyword_factory);
-  void                  read_keys ();
+                        ~Input ();
+  void                  read_input ();
 private:
-#ifndef strcspn
-  static int            strcspn (const char *s, const char *reject);
-#endif
-  void                  set_output_types ();
-  const char *          get_array_type ();
-  const char *          save_include_src ();
-  const char *          get_special_input (char delimiter);
+  /* Input stream.  */
+  FILE *                _stream;
+  /* Creates the keywords.  */
+  Keyword_Factory * const _factory;
+  /* Memory block containing the entire input.  */
+  char *                _input;
 public:
-  const char *          _array_type;                           /* Pointer to the type for word list. */
-  const char *          _return_type;                          /* Pointer to return type for lookup function. */
-  const char *          _struct_tag;                           /* Shorthand for user-defined struct tag type. */
-  const char *          _include_src;                          /* C source code to be included verbatim. */
-  bool                  _additional_code;                      /* True if any additional C code is included. */
-  Keyword_Factory * const _factory;                            /* Creates the keywords. */
-  Keyword_List *        _head;                            /* Points to the head of the linked list. */
-private:
-  Keyword_List *        parse_line (const char *line, const char *delimiters);
+  /* The C code from the declarations section.  */
+  const char *          _verbatim_declarations;
+  const char *          _verbatim_declarations_end;
+  unsigned int          _verbatim_declarations_lineno;
+  /* The C code from the end of the file.  */
+  const char *          _verbatim_code;
+  const char *          _verbatim_code_end;
+  unsigned int          _verbatim_code_lineno;
+  /* Declaration of struct type for a keyword and its attributes.  */
+  const char *          _struct_decl;
+  /* Return type of the lookup function.  */
+  const char *          _return_type;
+  /* Shorthand for user-defined struct tag type.  */
+  const char *          _struct_tag;
+  /* List of all keywords.  */
+  Keyword_List *        _head;
 };

 #endif
--- a/src/main.cc
+++ b/src/main.cc
@@ -64,7 +64,7 @@ main (int argc, char *argv[])
  /* Initialize the keyword list.  */
  KeywordExt_Factory factory;
  Input inputter (stdin, &factory);
-  inputter.read_keys ();
+  inputter.read_input ();
  /* We can cast the keyword list to KeywordExt_List* because its list
     elements were created by KeywordExt_Factory.  */
  KeywordExt_List* list = static_cast<KeywordExt_List*>(inputter._head);
@@ -75,11 +75,15 @@ main (int argc, char *argv[])

  /* Output the hash function code.  */
  Output outputter (searcher._head,
-                    inputter._array_type,
+                    inputter._struct_decl,
                    inputter._return_type,
                    inputter._struct_tag,
-                    inputter._additional_code,
-                    inputter._include_src,
+                    inputter._verbatim_declarations,
+                    inputter._verbatim_declarations_end,
+                    inputter._verbatim_declarations_lineno,
+                    inputter._verbatim_code,
+                    inputter._verbatim_code_end,
+                    inputter._verbatim_code_lineno,
                    searcher._total_keys,
                    searcher._total_duplicates,
                    searcher._max_key_len,
--- a/src/output.cc
+++ b/src/output.cc
@@ -80,17 +80,27 @@ static const char *char_to_index;
     keyword list.  After Search::sort(), we know that they form blocks of
     consecutive list elements.
 */
-Output::Output (KeywordExt_List *head, const char *array_type,
+Output::Output (KeywordExt_List *head, const char *struct_decl,
                const char *return_type, const char *struct_tag,
-                bool additional_code, const char *include_src,
+                const char *verbatim_declarations,
+                const char *verbatim_declarations_end,
+                unsigned int verbatim_declarations_lineno,
+                const char *verbatim_code, const char *verbatim_code_end,
+                unsigned int verbatim_code_lineno,
                int total_keys, int total_duplicates, int max_key_len,
                int min_key_len, int alpha_size, const int *occurrences,
                const int *asso_values)
-  : _head (head), _array_type (array_type), _return_type (return_type),
-    _struct_tag (struct_tag), _additional_code (additional_code),
-    _include_src (include_src), _total_keys (total_keys),
-    _total_duplicates (total_duplicates), _max_key_len (max_key_len),
-    _min_key_len (min_key_len), _alpha_size (alpha_size),
+  : _head (head), _struct_decl (struct_decl), _return_type (return_type),
+    _struct_tag (struct_tag),
+    _verbatim_declarations (verbatim_declarations),
+    _verbatim_declarations_end (verbatim_declarations_end),
+    _verbatim_declarations_lineno (verbatim_declarations_lineno),
+    _verbatim_code (verbatim_code),
+    _verbatim_code_end (verbatim_code_end),
+    _verbatim_code_lineno (verbatim_code_lineno),
+    _total_keys (total_keys), _total_duplicates (total_duplicates),
+    _max_key_len (max_key_len), _min_key_len (min_key_len),
+    _alpha_size (alpha_size),
    _occurrences (occurrences), _asso_values (asso_values)
 {
 }
@@ -1477,11 +1487,19 @@ Output::output ()
    printf ("C++");
  printf (" code produced by gperf version %s */\n", version_string);
  option.print_options ();
+  printf ("\n");

-  printf ("%s\n", _include_src);
+  if (_verbatim_declarations < _verbatim_declarations_end)
+    {
+      if (option.get_input_file_name ())
+        printf ("#line %u \"%s\"\n",
+                _verbatim_declarations_lineno, option.get_input_file_name ());
+      fwrite (_verbatim_declarations, 1,
+              _verbatim_declarations_end - _verbatim_declarations, stdout);
+    }

  if (option[TYPE] && !option[NOTYPE]) /* Output type declaration now, reference it later on.... */
-    printf ("%s;\n", _array_type);
+    printf ("%s\n", _struct_decl);

  if (option[INCLUDE])
    printf ("#include <string.h>\n"); /* Declare strlen(), strcmp(), strncmp(). */
@@ -1519,9 +1537,13 @@ Output::output ()

  output_lookup_function ();

-  if (_additional_code)
-    for (int c; (c = getchar ()) != EOF; putchar (c))
-      ;
+  if (_verbatim_code < _verbatim_code_end)
+    {
+      if (option.get_input_file_name ())
+        printf ("#line %u \"%s\"\n",
+                _verbatim_code_lineno, option.get_input_file_name ());
+      fwrite (_verbatim_code, 1, _verbatim_code_end - _verbatim_code, stdout);
+    }

  fflush (stdout);
 }
--- a/src/output.h
+++ b/src/output.h
@@ -37,11 +37,15 @@ class Output
 public:
  /* Constructor.  */
                        Output (KeywordExt_List *head,
-                                const char *array_type,
+                                const char *struct_decl,
                                const char *return_type,
                                const char *struct_tag,
-                                bool additional_code,
-                                const char *include_src,
+                                const char *verbatim_declarations,
+                                const char *verbatim_declarations_end,
+                                unsigned int verbatim_declarations_lineno,
+                                const char *verbatim_code,
+                                const char *verbatim_code_end,
+                                unsigned int verbatim_code_lineno,
                                int total_keys,
                                int total_duplicates,
                                int max_key_len, int min_key_len,
@@ -91,16 +95,20 @@ private:
  /* Linked list of keywords.  */
  KeywordExt_List *     _head;

-  /* Pointer to the type for word list. */
-  const char * const    _array_type;
+  /* Declaration of struct type for a keyword and its attributes.  */
+  const char * const    _struct_decl;
  /* Pointer to return type for lookup function. */
  const char *          _return_type;
  /* Shorthand for user-defined struct tag type. */
  const char *          _struct_tag;
-  /* True if any additional C code is included. */
-  bool const            _additional_code;
-  /* C source code to be included verbatim. */
-  const char * const    _include_src;
+  /* The C code from the declarations section.  */
+  const char * const    _verbatim_declarations;
+  const char * const    _verbatim_declarations_end;
+  unsigned int const    _verbatim_declarations_lineno;
+  /* The C code from the end of the file.  */
+  const char * const    _verbatim_code;
+  const char * const    _verbatim_code_end;
+  unsigned int const    _verbatim_code_lineno;
  /* Total number of keys, counting duplicates. */
  int const             _total_keys;
  /* Total number of duplicate hash values. */
--- a/src/read-line.cc
+++ b/src/read-line.cc
@@ -1,33 +0,0 @@
-/* Correctly reads an arbitrarily long string.
-
-   Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc.
-   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
-   and Bruno Haible <bruno@clisp.org>.
-
-   This file is part of GNU GPERF.
-
-   GNU GPERF is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   GNU GPERF is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; see the file COPYING.
-   If not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-/* Specification. */
-#include "read-line.h"
-
-#ifndef __OPTIMIZE__
-
-#define INLINE /* not inline */
-#include "read-line.icc"
-#undef INLINE
-
-#endif /* not defined __OPTIMIZE__ */
--- a/src/read-line.h
+++ b/src/read-line.h
@@ -1,61 +0,0 @@
-/* This may look like C code, but it is really -*- C++ -*- */
-
-/* Reads arbitrarily long string from input file, returning it as a
-   dynamically allocated buffer.
-
-   Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc.
-   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
-   and Bruno Haible <bruno@clisp.org>.
-
-   This file is part of GNU GPERF.
-
-   GNU GPERF is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   GNU GPERF is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; see the file COPYING.
-   If not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef read_line_h
-#define read_line_h 1
-
-#include <stdio.h>
-#include "getline.h"
-
-/* An instance of this class is used for repeatedly reading lines of text
-   from an input stream.  */
-class Read_Line
-{
-public:
-
-  /* Initializes the instance with a given input stream.  */
-                        Read_Line (FILE *stream);
-
-  /* Reads the next line and returns it, excluding the terminating newline,
-     and ignoring lines starting with '#'.  Returns NULL on error or EOF.
-     The storage for the string is dynamically allocated and must be freed
-     through delete[].  */
-  char *                read_next_line ();
-
-private:
-  /* FILE pointer to the input stream.  */
-  FILE * const          _fp;
-};
-
-#ifdef __OPTIMIZE__
-
-#define INLINE inline
-#include "read-line.icc"
-#undef INLINE
-
-#endif
-
-#endif
--- a/src/read-line.icc
+++ b/src/read-line.icc
@@ -1,69 +0,0 @@
-/* Inline Functions for read-line.{h,cc}.
-
-   Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc.
-   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
-   and Bruno Haible <bruno@clisp.org>.
-
-   This file is part of GNU GPERF.
-
-   GNU GPERF is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   GNU GPERF is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; see the file COPYING.
-   If not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-// This needs:
-//#include <stdio.h>
-//#include "getline.h"
-
-/* Initializes the instance with a given input stream.  */
-INLINE
-Read_Line::Read_Line (FILE *stream)
-  : _fp (stream)
-{
-}
-
-/* Reads the next line and returns it, excluding the terminating newline,
-   and ignoring lines starting with '#'.  Returns NULL on error or EOF.
-   The storage for the string is dynamically allocated and must be freed
-   through delete[].  */
-INLINE char *
-Read_Line::read_next_line ()
-{
-  int c;
-
-  while ((c = getc (_fp)) == '#')
-    {
-      while (c = getc (_fp), c != EOF && c != '\n')
-        ;
-
-      if (c == EOF)
-        return NULL;
-    }
-
-  if (c == EOF)
-    return NULL;
-
-  ungetc (c, stdin);
-
-  char *line = NULL;
-  size_t linesize = 0;
-  int length = get_line (&line, &linesize, _fp);
-  if (length < 0)
-    {
-      delete[] line;
-      return NULL;
-    }
-  if (length > 0 && line[length - 1] == '\n')
-    line[length - 1] = '\0';
-  return line;
-}
--- a/tests/c-parse.exp
+++ b/tests/c-parse.exp
@@ -1,5 +1,6 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -L C -F ', 0, 0' -p -j1 -i 1 -g -o -t -G -N is_reserved_word -k'1,3,$'  */
+
 /* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf  */ 
 struct resword { const char *name; short token; enum rid rid; };

--- a/tests/cplusplus.exp
+++ b/tests/cplusplus.exp
@@ -1,5 +1,6 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -L C -F ', 0, 0' -p -j1 -g -o -t -N is_reserved_word -k'1,4,7,$'  */
+
 /* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -g -o -t -N is_reserved_word -k1,4,$,7 gplus.gperf  */
 struct resword { const char *name; short token; enum rid rid;};

--- a/tests/gpc.exp
+++ b/tests/gpc.exp
@@ -1,5 +1,6 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -g -o -j1 -t -p -N is_reserved_word  */
+
 /* ISO Pascal 7185 reserved words.
 *
 * For GNU Pascal compiler (GPC) by jtv@hut.fi
--- a/tests/java.exp
+++ b/tests/java.exp
@@ -1,5 +1,6 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -L C -F ', 0' -p -t -j1 -i 1 -g -o -N java_keyword -k'1,3,$'  */
+
 /* Keyword definition for the GNU compiler for the Java(TM) language.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
--- a/tests/objc.exp
+++ b/tests/objc.exp
@@ -1,5 +1,6 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k'1,3,$'  */
+
 /* Command-line: gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ objc.gperf  */ 
 struct resword { char *name; short token; enum rid rid; };

--- a/tests/test-4.exp
+++ b/tests/test-4.exp
@@ -1,5 +1,6 @@
 /* C code produced by gperf version 2.7.2 */
 /* Command-line: ../src/gperf -D -p -t  */
+
 /* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf  */ 
 struct resword { const char *name; short token; enum rid rid; };