794 lines
31 KiB
OpenEdge ABL
794 lines
31 KiB
OpenEdge ABL
[Parser::] The Parser.
|
|
|
|
To work through the program read in, assigning each line its category,
|
|
and noting down other useful information as we go.
|
|
|
|
@h Sequence of parsing.
|
|
At this point, thw web has been read into memory. It's a linked list of
|
|
chapters, each of which is a linked list of sections, each of which must
|
|
be parsed in turn.
|
|
|
|
When we're done, we offer the support code for the web's programming language
|
|
a chance to do some further work, if it wants to. (This is how, for example,
|
|
function definitions are recognised in C programs.) There is no requirement
|
|
for it to do anything.
|
|
|
|
=
|
|
void Parser::parse_web(web *W, int inweb_mode) {
|
|
chapter *C;
|
|
section *S;
|
|
LOOP_OVER_LINKED_LIST(C, chapter, W->chapters)
|
|
LOOP_OVER_LINKED_LIST(S, section, C->sections)
|
|
@<Parse a section@>;
|
|
LanguageMethods::parse_types(W, W->main_language);
|
|
LanguageMethods::parse_functions(W, W->main_language);
|
|
LanguageMethods::further_parsing(W, W->main_language);
|
|
}
|
|
|
|
@ The task now is to parse those source lines, categorise them, and group them
|
|
further into a linked list of paragraphs. The basic method would be simple
|
|
enough, but is made more elaborate by supporting both version 1 and version 2
|
|
markup syntax, and trying to detect incorrect uses of one within the other.
|
|
|
|
@<Parse a section@> =
|
|
int comment_mode = TRUE, extract_mode = FALSE;
|
|
int code_lcat_for_body = NO_LCAT,
|
|
code_plainness_for_body = FALSE,
|
|
hyperlink_body = FALSE;
|
|
programming_language *code_pl_for_body = NULL;
|
|
int before_bar = TRUE;
|
|
int next_par_number = 1;
|
|
paragraph *current_paragraph = NULL;
|
|
TEMPORARY_TEXT(tag_list);
|
|
for (source_line *L = S->first_line, *PL = NULL; L; PL = L, L = L->next_line) {
|
|
@<Apply tag list, if any@>;
|
|
@<Remove tag list, if any@>;
|
|
@<Detect implied paragraph breaks@>;
|
|
@<Determine category for this source line@>;
|
|
}
|
|
DISCARD_TEXT(tag_list);
|
|
@<In version 2 syntax, construe the comment under the heading as the purpose@>;
|
|
@<If the section as a whole is tagged, apply that tag to each paragraph in it@>;
|
|
|
|
@ In versiom 2 syntax, the notation for tags was clarified. The tag list
|
|
for a paragraph is the run of |^"This"| and |^"That"| markers at the end of
|
|
the line introducing that paragraph. They can only occur, therefore, on a
|
|
line beginning with an |@|. We extract them into a string called |tag_list|.
|
|
(The reason we can't act on them straight away, which would make for simpler
|
|
code, is that they need to be applied to a paragraph structure which doesn't
|
|
yet exist -- it will only exist when the line has been fully parsed.)
|
|
|
|
@<Remove tag list, if any@> =
|
|
if (Str::get_first_char(L->text) == '@') {
|
|
match_results mr = Regexp::create_mr();
|
|
while (Regexp::match(&mr, L->text, L"(%c*?)( *%^\"%c+?\")(%c*)")) {
|
|
if (S->md->using_syntax < V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "tags written ^\"thus\"", V2_SYNTAX);
|
|
Str::clear(L->text);
|
|
WRITE_TO(tag_list, "%S", mr.exp[1]);
|
|
Str::copy(L->text, mr.exp[0]); WRITE_TO(L->text, " %S", mr.exp[2]);
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
}
|
|
|
|
@ And now it's later, and we can safely apply the tags. |current_paragraph|
|
|
now points to the para which was created by this line, not the one before.
|
|
|
|
@<Apply tag list, if any@> =
|
|
match_results mr = Regexp::create_mr();
|
|
while (Regexp::match(&mr, tag_list, L" *%^\"(%c+?)\" *(%c*)")) {
|
|
Tags::add_by_name(current_paragraph, mr.exp[0]);
|
|
Str::copy(tag_list, mr.exp[1]);
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
Str::clear(tag_list);
|
|
|
|
@<If the section as a whole is tagged, apply that tag to each paragraph in it@> =
|
|
paragraph *P;
|
|
if (S->tag_with)
|
|
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
|
|
Tags::add_to_paragraph(P, S->tag_with, NULL);
|
|
|
|
@ The "purpose" of a section is a brief note about what it's for. In version 1
|
|
syntax, this had to be explicitly declared with a |@Purpose:| command; in
|
|
version 2 it's much tidier.
|
|
|
|
@<In version 2 syntax, construe the comment under the heading as the purpose@> =
|
|
if (S->md->using_syntax >= V2_SYNTAX) {
|
|
source_line *L = S->first_line;
|
|
if ((L) && (L->category == CHAPTER_HEADING_LCAT)) L = L->next_line;
|
|
S->sect_purpose = Parser::extract_purpose(I"", L?L->next_line: NULL, S, NULL);
|
|
if (Str::len(S->sect_purpose) > 0) L->next_line->category = PURPOSE_LCAT;
|
|
}
|
|
|
|
@ A new paragraph is implied when a macro definition begins in the middle of
|
|
what otherwise would be code, or when a paragraph and its code divider are
|
|
immediately adjacent on the same line.
|
|
|
|
@<Detect implied paragraph breaks@> =
|
|
match_results mr = Regexp::create_mr();
|
|
if ((PL) && (PL->category == CODE_BODY_LCAT) &&
|
|
(Str::get_first_char(L->text) == '@') && (Str::get_at(L->text, 1) == '<') &&
|
|
(Regexp::match(&mr, L->text, L"%c<(%c+)@> *= *")) &&
|
|
(S->md->using_syntax >= V2_SYNTAX)) {
|
|
@<Insert an implied paragraph break@>;
|
|
}
|
|
if ((PL) && (Regexp::match(&mr, L->text, L"@ *= *"))) {
|
|
Str::clear(L->text);
|
|
Str::copy(L->text, I"=");
|
|
if (S->md->using_syntax < V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "implied paragraph breaks", V2_SYNTAX);
|
|
@<Insert an implied paragraph break@>;
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ We handle implied paragraph dividers by inserting a paragraph marker and
|
|
reparsing from there.
|
|
|
|
@<Insert an implied paragraph break@> =
|
|
source_line *NL = Lines::new_source_line_in(I"@", &(L->source), S);
|
|
PL->next_line = NL;
|
|
NL->next_line = L;
|
|
L = PL;
|
|
Regexp::dispose_of(&mr);
|
|
continue;
|
|
|
|
@h Categorisatiom.
|
|
This is where the work is really done. We have a source line: is it comment,
|
|
code, definition, what?
|
|
|
|
@<Determine category for this source line@> =
|
|
L->is_commentary = comment_mode;
|
|
L->category = COMMENT_BODY_LCAT; /* until set otherwise down below */
|
|
L->owning_paragraph = current_paragraph;
|
|
|
|
if (L->source.line_count == 0) @<Parse the line as a probable chapter heading@>;
|
|
if (L->source.line_count <= 1) @<Parse the line as a probable section heading@>;
|
|
@<Parse the line as a possible Inweb command@>;
|
|
@<Parse the line as a possible paragraph macro definition@>;
|
|
if (Str::get_first_char(L->text) == '=') {
|
|
if (S->md->using_syntax < V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "column-1 '=' as code divider", V2_SYNTAX);
|
|
if (extract_mode) @<Exit extract mode@>
|
|
else @<Parse the line as an equals structural marker@>;
|
|
}
|
|
if ((Str::get_first_char(L->text) == '@') &&
|
|
(Str::get_at(L->text, 1) != '<') &&
|
|
(L->category != MACRO_DEFINITION_LCAT))
|
|
@<Parse the line as a structural marker@>;
|
|
if (comment_mode) @<This is a line destined for commentary@>;
|
|
if (comment_mode == FALSE) @<This is a line destined for the verbatim code@>;
|
|
|
|
@ This must be one of the inserted lines marking chapter headings; it doesn't
|
|
come literally from the source web.
|
|
|
|
@<Parse the line as a probable chapter heading@> =
|
|
if (Str::eq_wide_string(L->text, L"Chapter Heading")) {
|
|
comment_mode = TRUE;
|
|
extract_mode = FALSE;
|
|
L->is_commentary = TRUE;
|
|
L->category = CHAPTER_HEADING_LCAT;
|
|
}
|
|
|
|
@ The top line of a section gives its title; in InC, it can also give the
|
|
namespace for its functions.
|
|
|
|
@<Parse the line as a probable section heading@> =
|
|
match_results mr = Regexp::create_mr();
|
|
if (Regexp::match(&mr, L->text, L"%[(%C+)%] (%C+/%C+): (%c+).")) {
|
|
if (S->md->using_syntax >= V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L,
|
|
"section range in header line", V1_SYNTAX);
|
|
S->sect_namespace = Str::duplicate(mr.exp[0]);
|
|
S->md->sect_range = Str::duplicate(mr.exp[1]);
|
|
S->md->sect_title = Str::duplicate(mr.exp[2]);
|
|
L->text_operand = Str::duplicate(mr.exp[2]);
|
|
L->category = SECTION_HEADING_LCAT;
|
|
} else if (Regexp::match(&mr, L->text, L"(%C+/%C+): (%c+).")) {
|
|
if (S->md->using_syntax >= V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L,
|
|
"section range in header line", V1_SYNTAX);
|
|
S->md->sect_range = Str::duplicate(mr.exp[0]);
|
|
S->md->sect_title = Str::duplicate(mr.exp[1]);
|
|
L->text_operand = Str::duplicate(mr.exp[1]);
|
|
L->category = SECTION_HEADING_LCAT;
|
|
} else if (Regexp::match(&mr, L->text, L"%[(%C+::)%] (%c+).")) {
|
|
S->sect_namespace = Str::duplicate(mr.exp[0]);
|
|
S->md->sect_title = Str::duplicate(mr.exp[1]);
|
|
L->text_operand = Str::duplicate(mr.exp[1]);
|
|
L->category = SECTION_HEADING_LCAT;
|
|
} else if (Regexp::match(&mr, L->text, L"(%c+).")) {
|
|
S->md->sect_title = Str::duplicate(mr.exp[0]);
|
|
L->text_operand = Str::duplicate(mr.exp[0]);
|
|
L->category = SECTION_HEADING_LCAT;
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ Version 1 syntax was cluttered up with a number of hardly-used markup
|
|
syntaxes called "commands", written in double squared brackets |[[Thus]]|.
|
|
In version 2, this notation is never used.
|
|
|
|
@<Parse the line as a possible Inweb command@> =
|
|
match_results mr = Regexp::create_mr();
|
|
if (Regexp::match(&mr, L->text, L"%[%[(%c+)%]%]")) {
|
|
TEMPORARY_TEXT(full_command);
|
|
TEMPORARY_TEXT(command_text);
|
|
Str::copy(full_command, mr.exp[0]);
|
|
Str::copy(command_text, mr.exp[0]);
|
|
L->category = COMMAND_LCAT;
|
|
if (Regexp::match(&mr, command_text, L"(%c+?): *(%c+)")) {
|
|
Str::copy(command_text, mr.exp[0]);
|
|
L->text_operand = Str::duplicate(mr.exp[1]);
|
|
}
|
|
if (Str::eq_wide_string(command_text, L"Page Break")) {
|
|
if (S->md->using_syntax > V1_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "[[Page Break]]", V1_SYNTAX);
|
|
L->command_code = PAGEBREAK_CMD;
|
|
} else if (Str::eq_wide_string(command_text, L"Grammar Index"))
|
|
L->command_code = GRAMMAR_INDEX_CMD;
|
|
else if (Str::eq_wide_string(command_text, L"Tag")) {
|
|
if (S->md->using_syntax > V1_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "[[Tag...]]", V1_SYNTAX);
|
|
Tags::add_by_name(L->owning_paragraph, L->text_operand);
|
|
L->command_code = TAG_CMD;
|
|
} else if (Str::eq_wide_string(command_text, L"Figure")) {
|
|
if (S->md->using_syntax > V1_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "[[Figure...]]", V1_SYNTAX);
|
|
Tags::add_by_name(L->owning_paragraph, I"Figures");
|
|
L->command_code = FIGURE_CMD;
|
|
} else {
|
|
Main::error_in_web(I"unknown [[command]]", L);
|
|
}
|
|
L->is_commentary = TRUE;
|
|
DISCARD_TEXT(command_text);
|
|
DISCARD_TEXT(full_command);
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ Some paragraphs define angle-bracketed macros, and those need special
|
|
handling. We'll call these "paragraph macros".
|
|
|
|
@<Parse the line as a possible paragraph macro definition@> =
|
|
match_results mr = Regexp::create_mr();
|
|
if ((Str::get_first_char(L->text) == '@') && (Str::get_at(L->text, 1) == '<') &&
|
|
(Regexp::match(&mr, L->text, L"%c<(%c+)@> *= *"))) {
|
|
TEMPORARY_TEXT(para_macro_name);
|
|
Str::copy(para_macro_name, mr.exp[0]);
|
|
L->category = MACRO_DEFINITION_LCAT;
|
|
if (current_paragraph == NULL)
|
|
Main::error_in_web(I"<...> definition begins outside of a paragraph", L);
|
|
else Macros::create(S, current_paragraph, L, para_macro_name);
|
|
comment_mode = FALSE; extract_mode = FALSE;
|
|
L->is_commentary = FALSE;
|
|
code_lcat_for_body = CODE_BODY_LCAT; /* code follows on subsequent lines */
|
|
code_pl_for_body = NULL;
|
|
code_plainness_for_body = FALSE;
|
|
hyperlink_body = FALSE;
|
|
DISCARD_TEXT(para_macro_name);
|
|
continue;
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ A structural marker is introduced by an |@| in column 1, and is a structural
|
|
division in the current section.
|
|
|
|
@<Parse the line as a structural marker@> =
|
|
TEMPORARY_TEXT(command_text);
|
|
Str::copy(command_text, L->text);
|
|
Str::delete_first_character(command_text); /* i.e., strip the at-sign from the front */
|
|
TEMPORARY_TEXT(remainder);
|
|
match_results mr = Regexp::create_mr();
|
|
if (Regexp::match(&mr, command_text, L"(%C*) *(%c*?)")) {
|
|
Str::copy(command_text, mr.exp[0]);
|
|
Str::copy(remainder, mr.exp[1]);
|
|
}
|
|
@<Deal with a structural marker@>;
|
|
DISCARD_TEXT(remainder);
|
|
DISCARD_TEXT(command_text);
|
|
Regexp::dispose_of(&mr);
|
|
continue;
|
|
|
|
@ An equals sign in column 1 can just mean the end of an extract, so:
|
|
|
|
@<Exit extract mode@> =
|
|
L->category = END_EXTRACT_LCAT;
|
|
comment_mode = TRUE;
|
|
extract_mode = FALSE;
|
|
|
|
@ But more usually an equals sign in column 1 is a structural marker:
|
|
|
|
@<Parse the line as an equals structural marker@> =
|
|
L->category = BEGIN_CODE_LCAT;
|
|
L->plainer = FALSE;
|
|
code_lcat_for_body = CODE_BODY_LCAT;
|
|
code_pl_for_body = NULL;
|
|
comment_mode = FALSE;
|
|
match_results mr = Regexp::create_mr();
|
|
match_results mr2 = Regexp::create_mr();
|
|
if (Regexp::match(&mr, L->text, L"= *(%c+) *")) {
|
|
if ((current_paragraph) && (Str::eq(mr.exp[0], I"(very early code)"))) {
|
|
current_paragraph->placed_very_early = TRUE;
|
|
} else if ((current_paragraph) && (Str::eq(mr.exp[0], I"(early code)"))) {
|
|
current_paragraph->placed_early = TRUE;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text%)"))) {
|
|
@<Make plainer@>;
|
|
code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
|
code_pl_for_body = NULL;
|
|
extract_mode = TRUE;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text as code%)"))) {
|
|
@<Make plainer@>;
|
|
code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
|
code_pl_for_body = S->sect_language;
|
|
extract_mode = TRUE;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text as (%c+)%)"))) {
|
|
@<Make plainer@>;
|
|
code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
|
code_pl_for_body = Languages::find_by_name(mr2.exp[1], W);
|
|
extract_mode = TRUE;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text from (%c+) as code%)"))) {
|
|
@<Make plainer@>;
|
|
code_pl_for_body = S->sect_language;
|
|
@<Spool from file@>;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text from (%c+) as (%c+)%)"))) {
|
|
@<Make plainer@>;
|
|
code_pl_for_body = Languages::find_by_name(mr2.exp[2], W);
|
|
@<Spool from file@>;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text from (%c+)%)"))) {
|
|
@<Make plainer@>;
|
|
code_pl_for_body = NULL;
|
|
@<Spool from file@>;
|
|
} else if ((current_paragraph) &&
|
|
(Regexp::match(&mr2, mr.exp[0], L"%(figure (%c+)%)"))) {
|
|
Tags::add_by_name(L->owning_paragraph, I"Figures");
|
|
L->command_code = FIGURE_CMD;
|
|
L->category = COMMAND_LCAT;
|
|
code_lcat_for_body = COMMENT_BODY_LCAT;
|
|
L->text_operand = Str::duplicate(mr2.exp[0]);
|
|
comment_mode = TRUE;
|
|
} else if ((current_paragraph) &&
|
|
((Regexp::match(&mr2, mr.exp[0], L"%(embedded (%C+) video (%c+)%)")) ||
|
|
(Regexp::match(&mr2, mr.exp[0], L"%(embedded (%C+) audio (%c+)%)")))) {
|
|
Tags::add_by_name(L->owning_paragraph, I"Figures");
|
|
L->command_code = EMBED_CMD;
|
|
L->category = COMMAND_LCAT;
|
|
code_lcat_for_body = COMMENT_BODY_LCAT;
|
|
L->text_operand = Str::duplicate(mr2.exp[0]);
|
|
L->text_operand2 = Str::duplicate(mr2.exp[1]);
|
|
comment_mode = TRUE;
|
|
} else {
|
|
Main::error_in_web(I"unknown bracketed annotation", L);
|
|
}
|
|
} else if (Regexp::match(&mr, L->text, L"= *%C%c*")) {
|
|
Main::error_in_web(I"unknown material after '='", L);
|
|
}
|
|
code_plainness_for_body = L->plainer;
|
|
hyperlink_body = L->enable_hyperlinks;
|
|
Regexp::dispose_of(&mr);
|
|
Regexp::dispose_of(&mr2);
|
|
continue;
|
|
|
|
@<Make plainer@> =
|
|
match_results mr3 = Regexp::create_mr();
|
|
while (TRUE) {
|
|
if (Regexp::match(&mr3, mr2.exp[0], L" *(%C+) *(%c*?)")) {
|
|
if (Str::eq(mr3.exp[0], I"undisplayed")) L->plainer = TRUE;
|
|
else if (Str::eq(mr3.exp[0], I"hyperlinked")) L->enable_hyperlinks = TRUE;
|
|
else {
|
|
Main::error_in_web(
|
|
I"only 'undisplayed' and/or 'hyperlinked' can precede 'text' here", L);
|
|
}
|
|
} else break;
|
|
Str::clear(mr2.exp[0]);
|
|
Str::copy(mr2.exp[0], mr3.exp[1]);
|
|
}
|
|
Regexp::dispose_of(&mr3);
|
|
|
|
@<Spool from file@> =
|
|
L->category = BEGIN_CODE_LCAT;
|
|
pathname *P = W->md->path_to_web;
|
|
filename *F = Filenames::from_text_relative(P, mr2.exp[1]);
|
|
linked_list *lines = Painter::lines(F);
|
|
text_stream *T;
|
|
source_line *latest = L;
|
|
LOOP_OVER_LINKED_LIST(T, text_stream, lines) {
|
|
source_line *TL = Lines::new_source_line_in(T, &(L->source), S);
|
|
TL->next_line = latest->next_line;
|
|
TL->plainer = L->plainer;
|
|
latest->next_line = TL;
|
|
latest = TL;
|
|
}
|
|
source_line *EEL = Lines::new_source_line_in(I"=", &(L->source), S);
|
|
EEL->next_line = latest->next_line;
|
|
latest->next_line = EEL;
|
|
code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
|
extract_mode = TRUE;
|
|
|
|
@ So here we have the possibilities which start with a column-1 |@| sign.
|
|
There appear to be hordes of these, but in fact most of them were removed
|
|
in Inweb syntax version 2: in modern syntax, only |@d|, |@e|, |@h|, their
|
|
long forms |@define|, |@enum| and |@heading|, and plain old |@| remain.
|
|
(But |@e| has a different meaning from in version 1.)
|
|
|
|
@<Deal with a structural marker@> =
|
|
extract_mode = FALSE;
|
|
if (Str::eq_wide_string(command_text, L"Purpose:")) @<Deal with Purpose@>
|
|
else if (Str::eq_wide_string(command_text, L"Interface:")) @<Deal with Interface@>
|
|
else if (Str::eq_wide_string(command_text, L"Definitions:")) @<Deal with Definitions@>
|
|
else if (Regexp::match(&mr, command_text, L"----+")) @<Deal with the bar@>
|
|
else if ((Str::eq_wide_string(command_text, L"c")) ||
|
|
(Str::eq_wide_string(command_text, L"x")) ||
|
|
((S->md->using_syntax == V1_SYNTAX) && (Str::eq_wide_string(command_text, L"e"))))
|
|
@<Deal with the code and extract markers@>
|
|
else if (Str::eq_wide_string(command_text, L"d")) @<Deal with the define marker@>
|
|
else if (Str::eq_wide_string(command_text, L"define")) {
|
|
if (S->md->using_syntax < V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@define' for definitions (use '@d' instead)", V2_SYNTAX);
|
|
@<Deal with the define marker@>;
|
|
} else if (Str::eq_wide_string(command_text, L"default")) {
|
|
if (S->md->using_syntax < V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@default' for definitions", V2_SYNTAX);
|
|
L->default_defn = TRUE;
|
|
@<Deal with the define marker@>;
|
|
} else if (Str::eq_wide_string(command_text, L"enum")) @<Deal with the enumeration marker@>
|
|
else if ((Str::eq_wide_string(command_text, L"e")) && (S->md->using_syntax >= V2_SYNTAX))
|
|
@<Deal with the enumeration marker@>
|
|
else {
|
|
int weight = -1, new_page = FALSE;
|
|
if (Str::eq_wide_string(command_text, L"")) weight = ORDINARY_WEIGHT;
|
|
if ((Str::eq_wide_string(command_text, L"h")) || (Str::eq_wide_string(command_text, L"heading"))) {
|
|
if (S->md->using_syntax < V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@h' or '@heading' for headings (use '@p' instead)", V2_SYNTAX);
|
|
weight = SUBHEADING_WEIGHT;
|
|
}
|
|
if (Str::eq_wide_string(command_text, L"p")) {
|
|
if (S->md->using_syntax > V1_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@p' for headings (use '@h' instead)", V1_SYNTAX);
|
|
weight = SUBHEADING_WEIGHT;
|
|
}
|
|
if (Str::eq_wide_string(command_text, L"pp")) {
|
|
if (S->md->using_syntax > V1_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@pp' for super-headings", V1_SYNTAX);
|
|
weight = SUBHEADING_WEIGHT; new_page = TRUE;
|
|
}
|
|
if (weight >= 0) @<Begin a new paragraph of this weight@>
|
|
else Main::error_in_web(I"don't understand @command", L);
|
|
}
|
|
|
|
@ In version 1 syntax there were some peculiar special headings above a divider
|
|
in the file made of hyphens, called "the bar". All of that has gone in V2.
|
|
|
|
@<Deal with Purpose@> =
|
|
if (before_bar == FALSE) Main::error_in_web(I"Purpose used after bar", L);
|
|
if (S->md->using_syntax >= V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@Purpose'", V1_SYNTAX);
|
|
L->category = PURPOSE_LCAT;
|
|
L->is_commentary = TRUE;
|
|
L->text_operand = Str::duplicate(remainder);
|
|
S->sect_purpose = Parser::extract_purpose(remainder, L->next_line, L->owning_section, &L);
|
|
|
|
@<Deal with Interface@> =
|
|
if (S->md->using_syntax >= V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@Interface'", V1_SYNTAX);
|
|
if (before_bar == FALSE) Main::error_in_web(I"Interface used after bar", L);
|
|
L->category = INTERFACE_LCAT;
|
|
L->is_commentary = TRUE;
|
|
source_line *XL = L->next_line;
|
|
while ((XL) && (XL->next_line) && (XL->owning_section == L->owning_section)) {
|
|
if (Str::get_first_char(XL->text) == '@') break;
|
|
XL->category = INTERFACE_BODY_LCAT;
|
|
L = XL;
|
|
XL = XL->next_line;
|
|
}
|
|
|
|
@<Deal with Definitions@> =
|
|
if (S->md->using_syntax >= V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@Definitions' headings", V1_SYNTAX);
|
|
if (before_bar == FALSE) Main::error_in_web(I"Definitions used after bar", L);
|
|
L->category = DEFINITIONS_LCAT;
|
|
L->is_commentary = TRUE;
|
|
before_bar = TRUE;
|
|
next_par_number = 1;
|
|
|
|
@ An |@| sign in the first column, followed by a row of four or more dashes,
|
|
constitutes the optional division bar in a section.
|
|
|
|
@<Deal with the bar@> =
|
|
if (S->md->using_syntax >= V2_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "the bar '----...'", V1_SYNTAX);
|
|
if (before_bar == FALSE) Main::error_in_web(I"second bar in the same section", L);
|
|
L->category = BAR_LCAT;
|
|
L->is_commentary = TRUE;
|
|
comment_mode = TRUE;
|
|
S->barred = TRUE;
|
|
before_bar = FALSE;
|
|
next_par_number = 1;
|
|
|
|
@ In version 1, the division point where a paragraoh begins to go into
|
|
verbatim code was not marked with an equals sign, but with one of the three
|
|
commands |@c| ("code"), |@e| ("early code") and |@x| ("code-like extract").
|
|
These had identical behaviour except for whether or not to tangle what
|
|
follows:
|
|
|
|
@<Deal with the code and extract markers@> =
|
|
if (S->md->using_syntax > V1_SYNTAX)
|
|
Parser::wrong_version(S->md->using_syntax, L, "'@c' and '@x'", V1_SYNTAX);
|
|
L->category = BEGIN_CODE_LCAT;
|
|
if ((Str::eq_wide_string(command_text, L"e")) && (current_paragraph))
|
|
current_paragraph->placed_early = TRUE;
|
|
if (Str::eq_wide_string(command_text, L"x")) code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
|
else code_lcat_for_body = CODE_BODY_LCAT;
|
|
code_pl_for_body = NULL;
|
|
comment_mode = FALSE;
|
|
code_plainness_for_body = FALSE;
|
|
hyperlink_body = FALSE;
|
|
|
|
@ This is for |@d| and |@define|. Definitions are intended to translate to
|
|
C preprocessor macros, Inform 6 |Constant|s, and so on.
|
|
|
|
@<Deal with the define marker@> =
|
|
L->category = BEGIN_DEFINITION_LCAT;
|
|
code_lcat_for_body = CONT_DEFINITION_LCAT;
|
|
code_pl_for_body = NULL;
|
|
match_results mr = Regexp::create_mr();
|
|
if (Regexp::match(&mr, remainder, L"(%C+) (%c+)")) {
|
|
L->text_operand = Str::duplicate(mr.exp[0]); /* name of term defined */
|
|
L->text_operand2 = Str::duplicate(mr.exp[1]); /* Value */
|
|
} else {
|
|
L->text_operand = Str::duplicate(remainder); /* name of term defined */
|
|
L->text_operand2 = Str::new(); /* no value given */
|
|
}
|
|
Analyser::mark_reserved_word_at_line(L, L->text_operand, CONSTANT_COLOUR);
|
|
comment_mode = FALSE;
|
|
L->is_commentary = FALSE;
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ This is for |@e| (in version 2) and |@enum|, which makes an automatically
|
|
enumerated sort of |@d|.
|
|
|
|
@<Deal with the enumeration marker@> =
|
|
L->category = BEGIN_DEFINITION_LCAT;
|
|
text_stream *from = NULL;
|
|
match_results mr = Regexp::create_mr();
|
|
L->text_operand = Str::duplicate(remainder); /* name of term defined */
|
|
TEMPORARY_TEXT(before);
|
|
TEMPORARY_TEXT(after);
|
|
if (LanguageMethods::parse_comment(S->sect_language, L->text_operand,
|
|
before, after)) {
|
|
Str::copy(L->text_operand, before);
|
|
}
|
|
DISCARD_TEXT(before);
|
|
DISCARD_TEXT(after);
|
|
Str::trim_white_space(L->text_operand);
|
|
if (Regexp::match(&mr, L->text_operand, L"(%C+) from (%c+)")) {
|
|
from = mr.exp[1];
|
|
Str::copy(L->text_operand, mr.exp[0]);
|
|
} else if (Regexp::match(&mr, L->text_operand, L"(%C+) (%c+)")) {
|
|
Main::error_in_web(I"enumeration constants can't supply a value", L);
|
|
}
|
|
L->text_operand2 = Str::new();
|
|
if (inweb_mode == TANGLE_MODE)
|
|
Enumerations::define(L->text_operand2, L->text_operand, from, L);
|
|
Analyser::mark_reserved_word_at_line(L, L->text_operand, CONSTANT_COLOUR);
|
|
comment_mode = FALSE;
|
|
L->is_commentary = FALSE;
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ Here we handle paragraph breaks which may or may not be headings. In
|
|
version 1, |@p| was a heading, and |@pp| a grander heading, while plain |@|
|
|
is no heading at all. The use of "p" was a little confusing, and went back
|
|
to CWEB, which used the term "paragraph" differently from us: it was "p"
|
|
short for what CWEB called a "paragraph". We now use |@h| or equivalently
|
|
|@heading| for a heading.
|
|
|
|
The noteworthy thing here is the way we fool around with the text on the line
|
|
of the paragraph opening. This is one of the few cases where Inweb has
|
|
retained the stream-based style of CWEB, where escape characters can appear
|
|
anywhere in a line and line breaks are not significant. Thus
|
|
= (text)
|
|
@h The chronology of French weaving. Auguste de Papillon (1734-56) soon
|
|
=
|
|
is split into two, so that the title of the paragraph is just "The chronology
|
|
of French weaving" and the remainder,
|
|
= (text)
|
|
Auguste de Papillon (1734-56) soon
|
|
=
|
|
will be woven exactly as the succeeding lines will be.
|
|
|
|
@d ORDINARY_WEIGHT 0 /* an ordinary paragraph has this "weight" */
|
|
@d SUBHEADING_WEIGHT 1 /* a heading paragraph */
|
|
|
|
@<Begin a new paragraph of this weight@> =
|
|
comment_mode = TRUE;
|
|
L->is_commentary = TRUE;
|
|
L->category = PARAGRAPH_START_LCAT;
|
|
if (weight == SUBHEADING_WEIGHT) L->category = HEADING_START_LCAT;
|
|
L->text_operand = Str::new(); /* title */
|
|
match_results mr = Regexp::create_mr();
|
|
if ((weight == SUBHEADING_WEIGHT) && (Regexp::match(&mr, remainder, L"(%c+). (%c+)"))) {
|
|
L->text_operand = Str::duplicate(mr.exp[0]);
|
|
L->text_operand2 = Str::duplicate(mr.exp[1]);
|
|
} else if ((weight == SUBHEADING_WEIGHT) && (Regexp::match(&mr, remainder, L"(%c+). *"))) {
|
|
L->text_operand = Str::duplicate(mr.exp[0]);
|
|
L->text_operand2 = Str::new();
|
|
} else {
|
|
L->text_operand = Str::new();
|
|
L->text_operand2 = Str::duplicate(remainder);
|
|
}
|
|
@<Create a new paragraph, starting here, as new current paragraph@>;
|
|
|
|
L->owning_paragraph = current_paragraph;
|
|
W->no_paragraphs++;
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ So now it's time to create paragraph structures:
|
|
|
|
=
|
|
typedef struct paragraph {
|
|
int above_bar; /* placed above the dividing bar in its section (in Version 1 syntax) */
|
|
int placed_early; /* should appear early in the tangled code */
|
|
int placed_very_early; /* should appear very early in the tangled code */
|
|
struct text_stream *ornament; /* a "P" for a pilcrow or "S" for section-marker */
|
|
struct text_stream *paragraph_number; /* used in combination with the ornament */
|
|
int next_child_number; /* used when working out paragraph numbers */
|
|
struct paragraph *parent_paragraph; /* ditto */
|
|
|
|
int weight; /* typographic prominence: one of the |*_WEIGHT| values */
|
|
int starts_on_new_page; /* relevant for weaving to TeX only, of course */
|
|
|
|
struct para_macro *defines_macro; /* there can only be one */
|
|
struct linked_list *functions; /* of |function|: those defined in this para */
|
|
struct linked_list *structures; /* of |language_type|: similarly */
|
|
struct linked_list *taggings; /* of |paragraph_tagging| */
|
|
struct source_line *first_line_in_paragraph;
|
|
struct section *under_section;
|
|
MEMORY_MANAGEMENT
|
|
} paragraph;
|
|
|
|
@<Create a new paragraph, starting here, as new current paragraph@> =
|
|
paragraph *P = CREATE(paragraph);
|
|
if (S->md->using_syntax > V1_SYNTAX) {
|
|
P->above_bar = FALSE;
|
|
P->placed_early = FALSE;
|
|
P->placed_very_early = FALSE;
|
|
} else {
|
|
P->above_bar = before_bar;
|
|
P->placed_early = before_bar;
|
|
P->placed_very_early = FALSE;
|
|
}
|
|
if ((S->md->using_syntax == V1_SYNTAX) && (before_bar))
|
|
P->ornament = Str::duplicate(I"P");
|
|
else
|
|
P->ornament = Str::duplicate(I"S");
|
|
WRITE_TO(P->paragraph_number, "%d", next_par_number++);
|
|
P->parent_paragraph = NULL;
|
|
P->next_child_number = 1;
|
|
P->starts_on_new_page = FALSE;
|
|
P->weight = weight;
|
|
P->first_line_in_paragraph = L;
|
|
P->defines_macro = NULL;
|
|
P->functions = NEW_LINKED_LIST(function);
|
|
P->structures = NEW_LINKED_LIST(language_type);
|
|
P->taggings = NEW_LINKED_LIST(paragraph_tagging);
|
|
|
|
P->under_section = S;
|
|
S->sect_paragraphs++;
|
|
ADD_TO_LINKED_LIST(P, paragraph, S->paragraphs);
|
|
|
|
current_paragraph = P;
|
|
|
|
@ Finally, we're down to either commentary or code.
|
|
|
|
@<This is a line destined for commentary@> =
|
|
match_results mr = Regexp::create_mr();
|
|
if (Regexp::match(&mr, L->text, L">> (%c+)")) {
|
|
L->category = SOURCE_DISPLAY_LCAT;
|
|
L->text_operand = Str::duplicate(mr.exp[0]);
|
|
}
|
|
Regexp::dispose_of(&mr);
|
|
|
|
@ Note that in an |@d| definition, a blank line is treated as the end of the
|
|
definition. (This is unnecessary for C, and is a point of difference with
|
|
CWEB, but is needed for languages which don't allow multi-line definitions.)
|
|
|
|
@<This is a line destined for the verbatim code@> =
|
|
if ((L->category != BEGIN_DEFINITION_LCAT) && (L->category != COMMAND_LCAT)) {
|
|
L->category = code_lcat_for_body;
|
|
L->plainer = code_plainness_for_body;
|
|
L->enable_hyperlinks = hyperlink_body;
|
|
if (L->category == TEXT_EXTRACT_LCAT) L->colour_as = code_pl_for_body;
|
|
}
|
|
|
|
if ((L->category == CONT_DEFINITION_LCAT) && (Regexp::string_is_white_space(L->text))) {
|
|
L->category = COMMENT_BODY_LCAT;
|
|
L->is_commentary = TRUE;
|
|
code_lcat_for_body = COMMENT_BODY_LCAT;
|
|
comment_mode = TRUE;
|
|
}
|
|
|
|
LanguageMethods::subcategorise_line(S->sect_language, L);
|
|
|
|
@ The purpose text occurs just below the heading. In version 1 it's cued with
|
|
a |@Purpose:| command; in version 2 it is unmarked. The following routine
|
|
is not elegant but handles the back end of both possibilities.
|
|
|
|
=
|
|
text_stream *Parser::extract_purpose(text_stream *prologue, source_line *XL, section *S, source_line **adjust) {
|
|
text_stream *P = Str::duplicate(prologue);
|
|
while ((XL) && (XL->next_line) && (XL->owning_section == S) &&
|
|
(((adjust) && (isalnum(Str::get_first_char(XL->text)))) ||
|
|
((!adjust) && (XL->category == COMMENT_BODY_LCAT)))) {
|
|
WRITE_TO(P, " %S", XL->text);
|
|
XL->category = PURPOSE_BODY_LCAT;
|
|
XL->is_commentary = TRUE;
|
|
if (adjust) *adjust = XL;
|
|
XL = XL->next_line;
|
|
}
|
|
Str::trim_white_space(P);
|
|
return P;
|
|
}
|
|
|
|
@h Version errors.
|
|
These are not fatal (why should they be?): Inweb carries on and allows the use
|
|
of the feature despite the version mismatch. They nevertheless count as errors
|
|
when it comes to Inweb's exit code, so they will halt a make.
|
|
|
|
=
|
|
void Parser::wrong_version(int using, source_line *L, char *feature, int need) {
|
|
TEMPORARY_TEXT(warning);
|
|
WRITE_TO(warning, "%s is a feature available only in version %d syntax (you're using version %d)",
|
|
feature, need, using);
|
|
Main::error_in_web(warning, L);
|
|
DISCARD_TEXT(warning);
|
|
}
|
|
|
|
@h Footnote notation.
|
|
|
|
=
|
|
int Parser::detect_footnote(web *W, text_stream *matter, text_stream *before,
|
|
text_stream *cue, text_stream *after) {
|
|
text_stream *fn_on_notation =
|
|
Bibliographic::get_datum(W->md, I"Footnote Begins Notation");
|
|
text_stream *fn_off_notation =
|
|
Bibliographic::get_datum(W->md, I"Footnote Ends Notation");
|
|
if (Str::ne(fn_on_notation, I"Off")) {
|
|
int N1 = Str::len(fn_on_notation);
|
|
int N2 = Str::len(fn_off_notation);
|
|
if ((N1 > 0) && (N2 > 0))
|
|
for (int i=0; i < Str::len(matter); i++) {
|
|
if (Str::includes_at(matter, i, fn_on_notation)) {
|
|
int j = i + N1 + 1;
|
|
while (j < Str::len(matter)) {
|
|
if (Str::includes_at(matter, j, fn_off_notation)) {
|
|
TEMPORARY_TEXT(b);
|
|
TEMPORARY_TEXT(c);
|
|
TEMPORARY_TEXT(a);
|
|
Str::substr(b, Str::start(matter), Str::at(matter, i));
|
|
Str::substr(c, Str::at(matter, i + N1), Str::at(matter, j));
|
|
Str::substr(a, Str::at(matter, j + N2), Str::end(matter));
|
|
int allow = TRUE;
|
|
LOOP_THROUGH_TEXT(pos, c)
|
|
if (Characters::isdigit(Str::get(pos)) == FALSE)
|
|
allow = FALSE;
|
|
if (allow) {
|
|
Str::clear(before); Str::copy(before, b);
|
|
Str::clear(cue); Str::copy(cue, c);
|
|
Str::clear(after); Str::copy(after, a);
|
|
}
|
|
DISCARD_TEXT(b);
|
|
DISCARD_TEXT(c);
|
|
DISCARD_TEXT(a);
|
|
if (allow) return TRUE;
|
|
}
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|