diff --git a/ChangeLog b/ChangeLog index 4f36570..2b6b146 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,39 @@ 2002-11-10 Bruno Haible + Rewrite the input routines. + * src/input.h: Don't include read-line.h. + (Input): Don't inherit from class Read_Line. + (Input::read_keys, Input::strcspn, Input::set_output_types, + Input::get_array_type, Input::save_include_src, + Input::get_special_input): Remove declarations. + (Input::read_input): New declaration. + (Input::_struct_decl): Renamed from Input::_array_type. + (Input::_verbatim_declarations): Renamed from Input::_include_src. + (Input::_verbatim_code): Replaces Input::_additional_code. + * src/input.cc: Completely rewritten. + * src/output.h (Output::Output): Update the verbatim_* arguments. + (Output::_struct_decl): Renamed from Output::_array_type. + (Output::_verbatim_declarations): Renamed from Output::_include_src. + (Output::_verbatim_code): Replaces Output::_additional_code. + * src/output.cc (Output::Output): Update the verbatim_* arguments. + (Output::output): Output the verbatim_* code pieces with #line. + * src/main.cc (main): Call Input::read_input instead of + Input::read_keys. Update Output::Output arguments. + * src/read-line.h: Remove file. + * src/read-line.cc, src/read-line.icc: Remove files. + * src/Makefile.in (OBJECTS): Remove read-line.o. + (READ_LINE_H): Remove variable. + (INPUT_H): Update. + (read-line.o): Remove rule. + * doc/gperf.texi (Declarations): Correct the example. + (Keywords): Mention that lines starting with % are forbidden here. + * tests/c-parse.exp: Update. + * tests/cplusplus.exp: Update. + * tests/gpc.exp: Update. + * tests/java.exp: Update. + * tests/objc.exp: Update. + * tests/test-4.exp: Update. + * src/options.h (Options::get_input_file_name): New declaration. (Options::_input_file_name): New field. * src/options.icc (Options::get_input_file_name): New method. diff --git a/NEWS b/NEWS index d638a60..a0f1715 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ New in 2.8: * Added option -m/--multiple-iterations that reduces the size of the generated table. +* If the input file is given by name, the output file will now contain + #line directives referring to the input file. +* Bug fixes. New in 2.7.2: diff --git a/doc/gperf.texi b/doc/gperf.texi index 224bec6..38b6bdc 100644 --- a/doc/gperf.texi +++ b/doc/gperf.texi @@ -163,8 +163,8 @@ In addition, Adam de Boor and Nels Olson provided many tips and insights that greatly helped improve the quality and functionality of @code{gperf}. @item -A testsuite was added by Bruno Haible. He also rewrote the output -routines for better reliability. +A testsuite was added by Bruno Haible. He also rewrote the input routines +and the output routines for better reliability. @end itemize @node Motivation, Search Structures, Contributors, Top @@ -389,15 +389,16 @@ march, 3, 31, 31 @end group @end example -It is possible to omit the declaration section entirely. In this case +It is possible to omit the declaration section entirely, if the @samp{-t} +option is not given. In this case the input file begins directly with the first keyword line, e.g.: @example @group -january, 1, 31, 31 -february, 2, 28, 29 -march, 3, 31, 31 -april, 4, 30, 30 +january +february +march +april ... @end group @end example @@ -408,7 +409,9 @@ april, 4, 30, 30 The second input file format section contains lines of keywords and any associated attributes you might supply. A line beginning with @samp{#} in the first column is considered a comment. Everything following the -@samp{#} is ignored, up to and including the following newline. +@samp{#} is ignored, up to and including the following newline. A line +beginning with @samp{%} in the first column is an option declaration and +must not occur within the keywords section. The first field of each non-comment line is always the keyword itself. It can be given in two ways: as a simple name, i.e., without surrounding diff --git a/src/Makefile.in b/src/Makefile.in index c61d23e..1aecdcf 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -61,7 +61,7 @@ SHELL = /bin/sh VPATH = $(srcdir) -OBJECTS = version.o options.o read-line.o keyword.o keyword-list.o \ +OBJECTS = version.o options.o keyword.o keyword-list.o \ input.o bool-array.o hash-table.o search.o output.o main.o LIBS = ../lib/libgp.a @GPERF_LIBM@ CPPFLAGS = -I. -I$(srcdir)/../lib @@ -86,10 +86,9 @@ $(TARGETPROG): $(OBJECTS) CONFIG_H = config.h VERSION_H = version.h OPTIONS_H = options.h options.icc -READ_LINE_H = read-line.h read-line.icc KEYWORD_H = keyword.h keyword.icc KEYWORD_LIST_H = keyword-list.h keyword-list.icc $(KEYWORD_H) -INPUT_H = input.h $(READ_LINE_H) $(KEYWORD_LIST_H) +INPUT_H = input.h $(KEYWORD_LIST_H) BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H) HASH_TABLE_H = hash-table.h $(KEYWORD_H) SEARCH_H = search.h $(KEYWORD_LIST_H) $(BOOL_ARRAY_H) @@ -99,8 +98,6 @@ version.o : version.cc $(VERSION_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/version.cc options.o : options.cc $(OPTIONS_H) $(VERSION_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/options.cc -read-line.o : read-line.cc $(READ_LINE_H) - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/read-line.cc keyword.o : keyword.cc $(KEYWORD_H) $(OPTIONS_H) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/keyword.cc keyword-list.o : keyword-list.cc $(KEYWORD_LIST_H) diff --git a/src/input.cc b/src/input.cc index 6645570..43e502e 100644 --- a/src/input.cc +++ b/src/input.cc @@ -28,332 +28,569 @@ #include /* declares strncpy(), strchr() */ #include /* defines UCHAR_MAX etc. */ #include "options.h" +#include "getline.h" Input::Input (FILE *stream, Keyword_Factory *keyword_factory) - : Read_Line (stream), _factory (keyword_factory) + : _stream (stream), _factory (keyword_factory) { } -/* Gathers the input stream into a buffer until one of two things occur: - - 1. We read a '%' followed by a '%' - 2. We read a '%' followed by a '}' - - The first symbolizes the beginning of the keyword list proper, - The second symbolizes the end of the C source code to be generated - verbatim in the output file. - - I assume that the keys are separated from the optional preceding struct - declaration by a consecutive % followed by either % or } starting in - the first column. The code below uses an expandible buffer to scan off - and return a pointer to all the code (if any) appearing before the delimiter. */ - -const char * -Input::get_special_input (char delimiter) +/* Reads the entire input file. */ +void +Input::read_input () { - int size = 80; - char *buf = new char[size]; - int c, i; + /* The input file has the following structure: + DECLARATIONS + %% + KEYWORDS + %% + ADDITIONAL_CODE + Since the DECLARATIONS and the ADDITIONAL_CODE sections are optional, + we have to read the entire file in the case there is only one %% + separator line, in order to determine whether the structure is + DECLARATIONS + %% + KEYWORDS + or + KEYWORDS + %% + ADDITIONAL_CODE + When the option -t is given or when the first section contains + declaration lines starting with %, we go for the first interpretation, + otherwise for the second interpretation. */ - for (i = 0; (c = getchar ()) != EOF; i++) + char *input = NULL; + size_t input_size = 0; + int input_length = get_delim (&input, &input_size, EOF, _stream); + if (input_length < 0) { - if (c == '%') - { - if ((c = getchar ()) == delimiter) - { - - while ((c = getchar ()) != '\n') - ; /* discard newline */ - - if (i == 0) - return ""; - else - { - buf[delimiter == '%' && buf[i - 2] == ';' ? i - 2 : i - 1] = '\0'; - return buf; - } - } - else - buf[i++] = '%'; - } - else if (i >= size) /* Yikes, time to grow the buffer! */ - { - char *temp = new char[size *= 2]; - int j; - - for (j = 0; j < i; j++) - temp[j] = buf[j]; - - buf = temp; - } - buf[i] = c; - } - - return 0; /* Problem here. */ -} - -/* Stores any C text that must be included verbatim into the - generated code output. */ - -const char * -Input::save_include_src () -{ - int c; - - if ((c = getchar ()) != '%') - ungetc (c, stdin); - else if ((c = getchar ()) != '{') - { - fprintf (stderr, "internal error, %c != '{' on line %d in file %s", c, __LINE__, __FILE__); + if (ferror (_stream)) + fprintf (stderr, "error while reading input file\n"); + else + fprintf (stderr, "The input file is empty!\n"); exit (1); } - else - return get_special_input ('}'); - return ""; -} -/* Determines from the input file whether the user wants to build a table - from a user-defined struct, or whether the user is content to simply - use the default array of keys. */ + /* We use input_end as a limit, in order to cope with NUL bytes in the + input. But note that one trailing NUL byte has been added after + input_end, for convenience. */ + char *input_end = input + input_length; -const char * -Input::get_array_type () -{ - return get_special_input ('%'); -} + const char *declarations; + const char *declarations_end; + const char *keywords; + const char *keywords_end; + unsigned int keywords_lineno; -/* strcspn - find length of initial segment of S consisting entirely - of characters not from REJECT (borrowed from Henry Spencer's - ANSI string package, when GNU libc comes out I'll replace this...). */ - -#ifndef strcspn -inline int -Input::strcspn (const char *s, const char *reject) -{ - const char *scan; - const char *rej_scan; - int count = 0; - - for (scan = s; *scan; scan++) + /* Break up the input into the three sections. */ + { + const char *separator[2] = { NULL, NULL }; + unsigned int separator_lineno[2] = { 0, 0 }; + int separators = 0; { - - for (rej_scan = reject; *rej_scan; rej_scan++) - if (*scan == *rej_scan) - return count; - - count++; + unsigned int lineno = 1; + for (const char *p = input; p < input_end; ) + { + if (p[0] == '%' && p[1] == '%') + { + separator[separators] = p; + separator_lineno[separators] = lineno; + if (++separators == 2) + break; + } + lineno++; + p = (const char *) memchr (p, '\n', input_end - p); + if (p != NULL) + p++; + else + p = input_end; + } } - return count; -} -#endif + bool has_declarations; + if (separators == 1) + { + if (option[TYPE]) + has_declarations = true; + else + { + has_declarations = false; + for (const char *p = input; p < separator[0]; ) + { + if (p[0] == '%') + { + has_declarations = true; + break; + } + p = (const char *) memchr (p, '\n', separator[0] - p); + if (p != NULL) + p++; + else + p = separator[0]; + } + } + } + else + has_declarations = (separators > 0); -/* Sets up the Return_Type, the Struct_Tag type and the Array_Type - based upon various user Options. */ + if (has_declarations) + { + declarations = input; + declarations_end = separator[0]; + /* Give a warning if the separator line is nonempty. */ + bool nonempty_line = false; + const char *p; + for (p = declarations_end + 2; p < input_end; ) + { + if (*p == '\n') + { + p++; + break; + } + if (!(*p == ' ' || *p == '\t')) + nonempty_line = true; + p++; + } + if (nonempty_line) + fprintf (stderr, "line %u: warning: junk after %%%% is ignored\n", + separator_lineno[0]); + keywords = p; + keywords_lineno = separator_lineno[0] + 1; + } + else + { + declarations = NULL; + declarations_end = NULL; + keywords = input; + keywords_lineno = 1; + } -void -Input::set_output_types () -{ - _array_type = NULL; + if (separators > (has_declarations ? 1 : 0)) + { + keywords_end = separator[separators-1]; + _verbatim_code = separator[separators-1] + 2; + _verbatim_code_end = input_end; + _verbatim_code_lineno = separator_lineno[separators-1]; + } + else + { + keywords_end = input_end; + _verbatim_code = NULL; + _verbatim_code_end = NULL; + _verbatim_code_lineno = 0; + } + } + + /* Parse the declarations section. */ + + _verbatim_declarations = NULL; + _verbatim_declarations_end = NULL; + _verbatim_declarations_lineno = 0; + _struct_decl = NULL; _return_type = NULL; _struct_tag = NULL; - if (option[TYPE]) - { - _array_type = get_array_type (); - if (!_array_type) - /* Something's wrong, but we'll catch it later on, in read_keys()... */ - return; - /* Yow, we've got a user-defined type... */ - int i = strcspn (_array_type, "{\n\0"); - /* Remove trailing whitespace. */ - while (i > 0 && strchr (" \t", _array_type[i-1])) - i--; - int struct_tag_length = i; + { + unsigned int lineno = 1; + char *struct_decl = NULL; + for (const char *p = declarations; p < declarations_end; ) + { + const char *line_end; + line_end = (const char *) memchr (p, '\n', declarations_end - p); + if (line_end != NULL) + line_end++; + else + line_end = declarations_end; - /* Set `struct_tag' to a naked "struct something". */ - char *structtag = new char[struct_tag_length + 1]; - strncpy (structtag, _array_type, struct_tag_length); - structtag[struct_tag_length] = '\0'; - _struct_tag = structtag; - - /* The return type of the lookup function is "struct something *". - No "const" here, because if !option[CONST], some user code might want - to modify the structure. */ - char *rettype = new char[struct_tag_length + 3]; - strncpy (rettype, _array_type, struct_tag_length); - rettype[struct_tag_length] = ' '; - rettype[struct_tag_length + 1] = '*'; - rettype[struct_tag_length + 2] = '\0'; - _return_type = rettype; - } -} - -/* Extracts a key from an input line and creates a new Keyword_List for - it. */ - -Keyword_List * -Input::parse_line (const char *line, const char *delimiters) -{ - if (*line == '"') - { - /* Parse a string in ANSI C syntax. */ - char *key = new char[strlen(line)]; - char *kp = key; - const char *lp = line + 1; - - for (; *lp;) - { - char c = *lp; - - if (c == '\0') - { - fprintf (stderr, "unterminated string: %s\n", line); - exit (1); - } - else if (c == '\\') - { - c = *++lp; - switch (c) - { - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': + if (*p == '%') + { + if (p[1] == '{') + { + /* Handle %{. */ + if (_verbatim_declarations != NULL) { - int code = 0; - int count = 0; - while (count < 3 && *lp >= '0' && *lp <= '7') - { - code = (code << 3) + (*lp - '0'); - lp++; - count++; - } - if (code > UCHAR_MAX) - fprintf (stderr, "octal escape out of range: %s\n", line); - *kp = static_cast(code); - break; + fprintf (stderr, "lines %u and %u:" + " only one %%{...%%} section is allowed\n", + _verbatim_declarations_lineno, lineno); + exit (1); } - case 'x': + _verbatim_declarations = p + 2; + _verbatim_declarations_lineno = lineno; + } + else if (p[1] == '}') + { + /* Handle %}. */ + if (_verbatim_declarations == NULL) { - int code = 0; - int count = 0; - lp++; - while ((*lp >= '0' && *lp <= '9') - || (*lp >= 'A' && *lp <= 'F') - || (*lp >= 'a' && *lp <= 'f')) - { - code = (code << 4) - + (*lp >= 'A' && *lp <= 'F' ? *lp - 'A' + 10 : - *lp >= 'a' && *lp <= 'f' ? *lp - 'a' + 10 : - *lp - '0'); - lp++; - count++; - } - if (count == 0) - fprintf (stderr, "hexadecimal escape without any hex digits: %s\n", line); - if (code > UCHAR_MAX) - fprintf (stderr, "hexadecimal escape out of range: %s\n", line); - *kp = static_cast(code); - break; + fprintf (stderr, "line %u:" + " %%} outside of %%{...%%} section\n", + lineno); + exit (1); } - case '\\': case '\'': case '"': - *kp = c; - lp++; - break; - case 'n': - *kp = '\n'; - lp++; - break; - case 't': - *kp = '\t'; - lp++; - break; - case 'r': - *kp = '\r'; - lp++; - break; - case 'f': - *kp = '\f'; - lp++; - break; - case 'b': - *kp = '\b'; - lp++; - break; - case 'a': - *kp = '\a'; - lp++; - break; - case 'v': - *kp = '\v'; - lp++; - break; - default: - fprintf (stderr, "invalid escape sequence in string: %s\n", line); - exit (1); - } - } - else if (c == '"') - break; + if (_verbatim_declarations_end != NULL) + { + fprintf (stderr, "line %u:" + " %%{...%%} section already closed\n", + lineno); + exit (1); + } + _verbatim_declarations_end = p; + /* Give a warning if the rest of the line is nonempty. */ + bool nonempty_line = false; + const char *q; + for (q = p + 2; q < line_end; q++) + { + if (*q == '\n') + { + q++; + break; + } + if (!(*q == ' ' || *q == '\t')) + nonempty_line = true; + } + if (nonempty_line) + fprintf (stderr, "line %u:" + " warning: junk after %%} is ignored\n", + lineno); + } + else if (_verbatim_declarations != NULL + && _verbatim_declarations_end == NULL) + { + fprintf (stderr, "line %u:" + " warning: %% directives are ignored" + " inside the %%{...%%} section\n", + lineno); + } + else + { + fprintf (stderr, "line %u: unrecognized %% directive\n", + lineno); + exit (1); + } + } + else if (!(_verbatim_declarations != NULL + && _verbatim_declarations_end == NULL)) + { + /* Append the line to struct_decl. */ + size_t old_len = (struct_decl ? strlen (struct_decl) : 0); + size_t line_len = line_end - p; + size_t new_len = old_len + line_len + 1; + char *new_struct_decl = new char[new_len]; + if (old_len > 0) + memcpy (new_struct_decl, struct_decl, old_len); + memcpy (new_struct_decl + old_len, p, line_len); + new_struct_decl[old_len + line_len] = '\0'; + if (struct_decl) + delete[] struct_decl; + struct_decl = new_struct_decl; + } + lineno++; + p = line_end; + } + if (_verbatim_declarations != NULL && _verbatim_declarations_end == NULL) + { + fprintf (stderr, "line %u: unterminated %%{ section\n", + _verbatim_declarations_lineno); + exit (1); + } + + /* Determine _struct_decl, _return_type, _struct_tag. */ + if (option[TYPE]) + { + if (struct_decl) + { + /* Drop leading whitespace. */ + while (struct_decl[0] == '\n' || struct_decl[0] == ' ' + || struct_decl[0] == '\t') + struct_decl++; + /* Drop trailing whitespace. */ + for (char *p = struct_decl + strlen (struct_decl); p > struct_decl;) + if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t') + *--p = '\0'; + else + break; + } + if (struct_decl == NULL || struct_decl[0] == '\0') + { + fprintf (stderr, "missing struct declaration" + " for option --struct-type\n"); + exit (1); + } + if (struct_decl) + { + /* Ensure trailing semicolon. */ + size_t old_len = strlen (struct_decl); + if (struct_decl[old_len - 1] != ';') + { + char *new_struct_decl = new char[old_len + 2]; + memcpy (new_struct_decl, struct_decl, old_len); + new_struct_decl[old_len] = ';'; + new_struct_decl[old_len + 1] = '\0'; + delete[] struct_decl; + struct_decl = new_struct_decl; + } + } + /* Set _struct_decl to the entire declaration. */ + _struct_decl = struct_decl; + /* Set _struct_tag to the naked "struct something". */ + const char *p; + for (p = struct_decl; *p && *p != '{' && *p != '\n'; p++) + ; + for (; p > struct_decl;) + if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t') + --p; else - { - *kp = c; - lp++; - } - kp++; - } - lp++; - if (*lp != '\0') - { - if (strchr (delimiters, *lp) == NULL) - { - fprintf (stderr, "string not followed by delimiter: %s\n", line); - exit (1); - } - lp++; - } - return new Keyword_List (_factory->create_keyword (key, kp - key, option[TYPE] ? lp : "")); - } - else - { - /* Not a string. Look for the delimiter. */ - int len = strcspn (line, delimiters); - const char *rest; + break; + size_t struct_tag_length = p - struct_decl; + char *struct_tag = new char[struct_tag_length + 1]; + memcpy (struct_tag, struct_decl, struct_tag_length); + struct_tag[struct_tag_length] = '\0'; + _struct_tag = struct_tag; + /* The return type of the lookup function is "struct something *". + No "const" here, because if !option[CONST], some user code might + want to modify the structure. */ + char *return_type = new char[struct_tag_length + 3]; + memcpy (return_type, struct_decl, struct_tag_length); + return_type[struct_tag_length] = ' '; + return_type[struct_tag_length + 1] = '*'; + return_type[struct_tag_length + 2] = '\0'; + _return_type = return_type; + } + } - if (line[len] == '\0') - rest = ""; - else - /* Skip the first delimiter. */ - rest = &line[len + 1]; - return new Keyword_List (_factory->create_keyword (line, len, option[TYPE] ? rest : "")); - } + /* Parse the keywords section. */ + { + Keyword_List **list_tail = &_head; + const char *delimiters = option.get_delimiters (); + unsigned int lineno = keywords_lineno; + for (const char *line = keywords; line < keywords_end; ) + { + const char *line_end; + line_end = (const char *) memchr (line, '\n', keywords_end - line); + if (line_end != NULL) + line_end++; + else + line_end = keywords_end; + + if (line[0] == '#') + ; /* Comment line. */ + else if (line[0] == '%') + { + fprintf (stderr, "line %u:" + " declarations are not allowed in the keywords section.\n" + "To declare a keyword starting with %%, enclose it in" + " double-quotes.\n", + lineno); + exit (1); + } + else + { + /* An input line carrying a keyword. */ + const char *keyword; + size_t keyword_length; + const char *rest; + + if (line[0] == '"') + { + /* Parse a string in ANSI C syntax. */ + char *kp = new char[line_end-line]; + keyword = kp; + const char *lp = line + 1; + + for (;;) + { + if (lp == line_end) + { + fprintf (stderr, "line %u: unterminated string\n", + lineno); + exit (1); + } + + char c = *lp; + if (c == '\\') + { + c = *++lp; + switch (c) + { + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + { + int code = 0; + int count = 0; + while (count < 3 && *lp >= '0' && *lp <= '7') + { + code = (code << 3) + (*lp - '0'); + lp++; + count++; + } + if (code > UCHAR_MAX) + fprintf (stderr, + "line %u: octal escape out of range\n", + lineno); + *kp = static_cast(code); + break; + } + case 'x': + { + int code = 0; + int count = 0; + lp++; + while ((*lp >= '0' && *lp <= '9') + || (*lp >= 'A' && *lp <= 'F') + || (*lp >= 'a' && *lp <= 'f')) + { + code = (code << 4) + + (*lp >= 'A' && *lp <= 'F' + ? *lp - 'A' + 10 : + *lp >= 'a' && *lp <= 'f' + ? *lp - 'a' + 10 : + *lp - '0'); + lp++; + count++; + } + if (count == 0) + fprintf (stderr, "line %u: hexadecimal escape" + " without any hex digits\n", + lineno); + if (code > UCHAR_MAX) + fprintf (stderr, "line %u: hexadecimal escape" + " out of range\n", + lineno); + *kp = static_cast(code); + break; + } + case '\\': case '\'': case '"': + *kp = c; + lp++; + break; + case 'n': + *kp = '\n'; + lp++; + break; + case 't': + *kp = '\t'; + lp++; + break; + case 'r': + *kp = '\r'; + lp++; + break; + case 'f': + *kp = '\f'; + lp++; + break; + case 'b': + *kp = '\b'; + lp++; + break; + case 'a': + *kp = '\a'; + lp++; + break; + case 'v': + *kp = '\v'; + lp++; + break; + default: + fprintf (stderr, "line %u: invalid escape sequence" + " in string\n", + lineno); + exit (1); + } + } + else if (c == '"') + break; + else + { + *kp = c; + lp++; + } + kp++; + } + lp++; + if (lp < line_end && *lp != '\n') + { + if (strchr (delimiters, *lp) == NULL) + { + fprintf (stderr, "line %u: string not followed" + " by delimiter\n", + lineno); + exit (1); + } + lp++; + } + keyword_length = kp - keyword; + if (option[TYPE]) + { + char *line_rest = new char[line_end - lp + 1]; + memcpy (line_rest, lp, line_end - lp); + line_rest[line_end - lp - + (line_end > lp && line_end[-1] == '\n' ? 1 : 0)] + = '\0'; + rest = line_rest; + } + else + rest = ""; + } + else + { + /* Not a string. Look for the delimiter. */ + const char *lp = line; + for (;;) + { + if (!(lp < line_end && *lp != '\n')) + { + keyword = line; + keyword_length = lp - line; + rest = ""; + break; + } + if (strchr (delimiters, *lp) != NULL) + { + keyword = line; + keyword_length = lp - line; + lp++; + if (option[TYPE]) + { + char *line_rest = new char[line_end - lp + 1]; + memcpy (line_rest, lp, line_end - lp); + line_rest[line_end - lp - + (line_end > lp && line_end[-1] == '\n' + ? 1 : 0)] + = '\0'; + rest = line_rest; + } + else + rest = ""; + break; + } + lp++; + } + } + + /* Allocate Keyword and add it to the list. */ + Keyword *new_kw = _factory->create_keyword (keyword, keyword_length, + rest); + *list_tail = new Keyword_List (new_kw); + list_tail = &(*list_tail)->rest(); + } + + lineno++; + line = line_end; + } + *list_tail = NULL; + + if (_head == NULL) + { + fprintf (stderr, "No keywords in input file!\n"); + exit (1); + } + } + + /* To be freed in the destructor. */ + _input = input; } -void -Input::read_keys () +Input::~Input () { - char *ptr; - - _include_src = save_include_src (); - set_output_types (); - - /* Oops, problem with the input file. */ - if (! (ptr = Read_Line::read_next_line ())) - { - fprintf (stderr, "No words in input file, did you forget to prepend %s or use -t accidentally?\n", "%%"); - exit (1); - } - - /* Read in all the keywords from the input file. */ - const char *delimiters = option.get_delimiters (); - - _head = parse_line (ptr, delimiters); - - for (Keyword_List *temp = _head; - (ptr = Read_Line::read_next_line ()) && strcmp (ptr, "%%"); - temp = temp->rest()) - temp->rest() = parse_line (ptr, delimiters); - - /* See if any additional C code is included at end of this file. */ - _additional_code = false; - if (ptr) - _additional_code = true; + delete[] _input; } diff --git a/src/input.h b/src/input.h index 14b0e1e..947fd5f 100644 --- a/src/input.h +++ b/src/input.h @@ -26,32 +26,39 @@ #ifndef input_h #define input_h 1 -#include "read-line.h" +#include #include "keyword-list.h" -class Input : private Read_Line +class Input { public: Input (FILE *stream, Keyword_Factory *keyword_factory); - void read_keys (); + ~Input (); + void read_input (); private: -#ifndef strcspn - static int strcspn (const char *s, const char *reject); -#endif - void set_output_types (); - const char * get_array_type (); - const char * save_include_src (); - const char * get_special_input (char delimiter); + /* Input stream. */ + FILE * _stream; + /* Creates the keywords. */ + Keyword_Factory * const _factory; + /* Memory block containing the entire input. */ + char * _input; public: - const char * _array_type; /* Pointer to the type for word list. */ - const char * _return_type; /* Pointer to return type for lookup function. */ - const char * _struct_tag; /* Shorthand for user-defined struct tag type. */ - const char * _include_src; /* C source code to be included verbatim. */ - bool _additional_code; /* True if any additional C code is included. */ - Keyword_Factory * const _factory; /* Creates the keywords. */ - Keyword_List * _head; /* Points to the head of the linked list. */ -private: - Keyword_List * parse_line (const char *line, const char *delimiters); + /* The C code from the declarations section. */ + const char * _verbatim_declarations; + const char * _verbatim_declarations_end; + unsigned int _verbatim_declarations_lineno; + /* The C code from the end of the file. */ + const char * _verbatim_code; + const char * _verbatim_code_end; + unsigned int _verbatim_code_lineno; + /* Declaration of struct type for a keyword and its attributes. */ + const char * _struct_decl; + /* Return type of the lookup function. */ + const char * _return_type; + /* Shorthand for user-defined struct tag type. */ + const char * _struct_tag; + /* List of all keywords. */ + Keyword_List * _head; }; #endif diff --git a/src/main.cc b/src/main.cc index 22ebd46..0c03bfe 100644 --- a/src/main.cc +++ b/src/main.cc @@ -64,7 +64,7 @@ main (int argc, char *argv[]) /* Initialize the keyword list. */ KeywordExt_Factory factory; Input inputter (stdin, &factory); - inputter.read_keys (); + inputter.read_input (); /* We can cast the keyword list to KeywordExt_List* because its list elements were created by KeywordExt_Factory. */ KeywordExt_List* list = static_cast(inputter._head); @@ -75,11 +75,15 @@ main (int argc, char *argv[]) /* Output the hash function code. */ Output outputter (searcher._head, - inputter._array_type, + inputter._struct_decl, inputter._return_type, inputter._struct_tag, - inputter._additional_code, - inputter._include_src, + inputter._verbatim_declarations, + inputter._verbatim_declarations_end, + inputter._verbatim_declarations_lineno, + inputter._verbatim_code, + inputter._verbatim_code_end, + inputter._verbatim_code_lineno, searcher._total_keys, searcher._total_duplicates, searcher._max_key_len, diff --git a/src/output.cc b/src/output.cc index eac1d57..013a3ad 100644 --- a/src/output.cc +++ b/src/output.cc @@ -80,17 +80,27 @@ static const char *char_to_index; keyword list. After Search::sort(), we know that they form blocks of consecutive list elements. */ -Output::Output (KeywordExt_List *head, const char *array_type, +Output::Output (KeywordExt_List *head, const char *struct_decl, const char *return_type, const char *struct_tag, - bool additional_code, const char *include_src, + const char *verbatim_declarations, + const char *verbatim_declarations_end, + unsigned int verbatim_declarations_lineno, + const char *verbatim_code, const char *verbatim_code_end, + unsigned int verbatim_code_lineno, int total_keys, int total_duplicates, int max_key_len, int min_key_len, int alpha_size, const int *occurrences, const int *asso_values) - : _head (head), _array_type (array_type), _return_type (return_type), - _struct_tag (struct_tag), _additional_code (additional_code), - _include_src (include_src), _total_keys (total_keys), - _total_duplicates (total_duplicates), _max_key_len (max_key_len), - _min_key_len (min_key_len), _alpha_size (alpha_size), + : _head (head), _struct_decl (struct_decl), _return_type (return_type), + _struct_tag (struct_tag), + _verbatim_declarations (verbatim_declarations), + _verbatim_declarations_end (verbatim_declarations_end), + _verbatim_declarations_lineno (verbatim_declarations_lineno), + _verbatim_code (verbatim_code), + _verbatim_code_end (verbatim_code_end), + _verbatim_code_lineno (verbatim_code_lineno), + _total_keys (total_keys), _total_duplicates (total_duplicates), + _max_key_len (max_key_len), _min_key_len (min_key_len), + _alpha_size (alpha_size), _occurrences (occurrences), _asso_values (asso_values) { } @@ -1477,11 +1487,19 @@ Output::output () printf ("C++"); printf (" code produced by gperf version %s */\n", version_string); option.print_options (); + printf ("\n"); - printf ("%s\n", _include_src); + if (_verbatim_declarations < _verbatim_declarations_end) + { + if (option.get_input_file_name ()) + printf ("#line %u \"%s\"\n", + _verbatim_declarations_lineno, option.get_input_file_name ()); + fwrite (_verbatim_declarations, 1, + _verbatim_declarations_end - _verbatim_declarations, stdout); + } if (option[TYPE] && !option[NOTYPE]) /* Output type declaration now, reference it later on.... */ - printf ("%s;\n", _array_type); + printf ("%s\n", _struct_decl); if (option[INCLUDE]) printf ("#include \n"); /* Declare strlen(), strcmp(), strncmp(). */ @@ -1519,9 +1537,13 @@ Output::output () output_lookup_function (); - if (_additional_code) - for (int c; (c = getchar ()) != EOF; putchar (c)) - ; + if (_verbatim_code < _verbatim_code_end) + { + if (option.get_input_file_name ()) + printf ("#line %u \"%s\"\n", + _verbatim_code_lineno, option.get_input_file_name ()); + fwrite (_verbatim_code, 1, _verbatim_code_end - _verbatim_code, stdout); + } fflush (stdout); } diff --git a/src/output.h b/src/output.h index 57506c4..b079155 100644 --- a/src/output.h +++ b/src/output.h @@ -37,11 +37,15 @@ class Output public: /* Constructor. */ Output (KeywordExt_List *head, - const char *array_type, + const char *struct_decl, const char *return_type, const char *struct_tag, - bool additional_code, - const char *include_src, + const char *verbatim_declarations, + const char *verbatim_declarations_end, + unsigned int verbatim_declarations_lineno, + const char *verbatim_code, + const char *verbatim_code_end, + unsigned int verbatim_code_lineno, int total_keys, int total_duplicates, int max_key_len, int min_key_len, @@ -91,16 +95,20 @@ private: /* Linked list of keywords. */ KeywordExt_List * _head; - /* Pointer to the type for word list. */ - const char * const _array_type; + /* Declaration of struct type for a keyword and its attributes. */ + const char * const _struct_decl; /* Pointer to return type for lookup function. */ const char * _return_type; /* Shorthand for user-defined struct tag type. */ const char * _struct_tag; - /* True if any additional C code is included. */ - bool const _additional_code; - /* C source code to be included verbatim. */ - const char * const _include_src; + /* The C code from the declarations section. */ + const char * const _verbatim_declarations; + const char * const _verbatim_declarations_end; + unsigned int const _verbatim_declarations_lineno; + /* The C code from the end of the file. */ + const char * const _verbatim_code; + const char * const _verbatim_code_end; + unsigned int const _verbatim_code_lineno; /* Total number of keys, counting duplicates. */ int const _total_keys; /* Total number of duplicate hash values. */ diff --git a/src/read-line.cc b/src/read-line.cc deleted file mode 100644 index ba24345..0000000 --- a/src/read-line.cc +++ /dev/null @@ -1,33 +0,0 @@ -/* Correctly reads an arbitrarily long string. - - Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc. - Written by Douglas C. Schmidt - and Bruno Haible . - - This file is part of GNU GPERF. - - GNU GPERF is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - GNU GPERF is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* Specification. */ -#include "read-line.h" - -#ifndef __OPTIMIZE__ - -#define INLINE /* not inline */ -#include "read-line.icc" -#undef INLINE - -#endif /* not defined __OPTIMIZE__ */ diff --git a/src/read-line.h b/src/read-line.h deleted file mode 100644 index d1cbdc9..0000000 --- a/src/read-line.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This may look like C code, but it is really -*- C++ -*- */ - -/* Reads arbitrarily long string from input file, returning it as a - dynamically allocated buffer. - - Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc. - Written by Douglas C. Schmidt - and Bruno Haible . - - This file is part of GNU GPERF. - - GNU GPERF is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - GNU GPERF is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifndef read_line_h -#define read_line_h 1 - -#include -#include "getline.h" - -/* An instance of this class is used for repeatedly reading lines of text - from an input stream. */ -class Read_Line -{ -public: - - /* Initializes the instance with a given input stream. */ - Read_Line (FILE *stream); - - /* Reads the next line and returns it, excluding the terminating newline, - and ignoring lines starting with '#'. Returns NULL on error or EOF. - The storage for the string is dynamically allocated and must be freed - through delete[]. */ - char * read_next_line (); - -private: - /* FILE pointer to the input stream. */ - FILE * const _fp; -}; - -#ifdef __OPTIMIZE__ - -#define INLINE inline -#include "read-line.icc" -#undef INLINE - -#endif - -#endif diff --git a/src/read-line.icc b/src/read-line.icc deleted file mode 100644 index c03d514..0000000 --- a/src/read-line.icc +++ /dev/null @@ -1,69 +0,0 @@ -/* Inline Functions for read-line.{h,cc}. - - Copyright (C) 1989-1998, 2002 Free Software Foundation, Inc. - Written by Douglas C. Schmidt - and Bruno Haible . - - This file is part of GNU GPERF. - - GNU GPERF is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - GNU GPERF is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -// This needs: -//#include -//#include "getline.h" - -/* Initializes the instance with a given input stream. */ -INLINE -Read_Line::Read_Line (FILE *stream) - : _fp (stream) -{ -} - -/* Reads the next line and returns it, excluding the terminating newline, - and ignoring lines starting with '#'. Returns NULL on error or EOF. - The storage for the string is dynamically allocated and must be freed - through delete[]. */ -INLINE char * -Read_Line::read_next_line () -{ - int c; - - while ((c = getc (_fp)) == '#') - { - while (c = getc (_fp), c != EOF && c != '\n') - ; - - if (c == EOF) - return NULL; - } - - if (c == EOF) - return NULL; - - ungetc (c, stdin); - - char *line = NULL; - size_t linesize = 0; - int length = get_line (&line, &linesize, _fp); - if (length < 0) - { - delete[] line; - return NULL; - } - if (length > 0 && line[length - 1] == '\n') - line[length - 1] = '\0'; - return line; -} diff --git a/tests/c-parse.exp b/tests/c-parse.exp index d0dc46a..9d229ba 100644 --- a/tests/c-parse.exp +++ b/tests/c-parse.exp @@ -1,5 +1,6 @@ /* C code produced by gperf version 2.7.2 */ /* Command-line: ../src/gperf -L C -F ', 0, 0' -p -j1 -i 1 -g -o -t -G -N is_reserved_word -k'1,3,$' */ + /* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */ struct resword { const char *name; short token; enum rid rid; }; diff --git a/tests/cplusplus.exp b/tests/cplusplus.exp index 8fb6427..71c88ff 100644 --- a/tests/cplusplus.exp +++ b/tests/cplusplus.exp @@ -1,5 +1,6 @@ /* C code produced by gperf version 2.7.2 */ /* Command-line: ../src/gperf -L C -F ', 0, 0' -p -j1 -g -o -t -N is_reserved_word -k'1,4,7,$' */ + /* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -g -o -t -N is_reserved_word -k1,4,$,7 gplus.gperf */ struct resword { const char *name; short token; enum rid rid;}; diff --git a/tests/gpc.exp b/tests/gpc.exp index d4ada45..70fd312 100644 --- a/tests/gpc.exp +++ b/tests/gpc.exp @@ -1,5 +1,6 @@ /* C code produced by gperf version 2.7.2 */ /* Command-line: ../src/gperf -g -o -j1 -t -p -N is_reserved_word */ + /* ISO Pascal 7185 reserved words. * * For GNU Pascal compiler (GPC) by jtv@hut.fi diff --git a/tests/java.exp b/tests/java.exp index d9cdf68..a711b70 100644 --- a/tests/java.exp +++ b/tests/java.exp @@ -1,5 +1,6 @@ /* C code produced by gperf version 2.7.2 */ /* Command-line: ../src/gperf -L C -F ', 0' -p -t -j1 -i 1 -g -o -N java_keyword -k'1,3,$' */ + /* Keyword definition for the GNU compiler for the Java(TM) language. Copyright (C) 1997, 1998 Free Software Foundation, Inc. Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com) diff --git a/tests/objc.exp b/tests/objc.exp index abf2e08..b2eaba4 100644 --- a/tests/objc.exp +++ b/tests/objc.exp @@ -1,5 +1,6 @@ /* C code produced by gperf version 2.7.2 */ /* Command-line: ../src/gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k'1,3,$' */ + /* Command-line: gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ objc.gperf */ struct resword { char *name; short token; enum rid rid; }; diff --git a/tests/test-4.exp b/tests/test-4.exp index 945331c..9e218cc 100644 --- a/tests/test-4.exp +++ b/tests/test-4.exp @@ -1,5 +1,6 @@ /* C code produced by gperf version 2.7.2 */ /* Command-line: ../src/gperf -D -p -t */ + /* Command-line: gperf -L KR-C -F ', 0, 0' -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */ struct resword { const char *name; short token; enum rid rid; };