Chapter 2: Nowebify.

This commit is contained in:
AwesomeAdam54321 2024-03-09 13:41:29 +08:00
parent 5cdfe7a60b
commit cf990fc08d
7 changed files with 311 additions and 306 deletions

View file

@ -3,14 +3,14 @@
To define sequentially numbered values for families of constants.
@ The idea here is that each enumeration set is a sequence of named constants
with a given postfix: for example, |HARRY_ST|, |NEVILLE_ST|, |ANGELINA_ST|
form the |*_ST| set. By definition, the postfix part is the portion of the
name following the final underscore, so in this case |ST|.
with a given postfix: for example, [[HARRY_ST]], [[NEVILLE_ST]], [[ANGELINA_ST]]
form the [[*_ST]] set. By definition, the postfix part is the portion of the
name following the final underscore, so in this case [[ST]].
Each set of constants begins at a given value (typically 0) and then
increments sequentially in definition order.
=
<<*>>=
typedef struct enumeration_set {
struct text_stream *postfix;
struct text_stream *stub;
@ -23,7 +23,7 @@ typedef struct enumeration_set {
@ There won't be enough sets to make a hash table worth the overhead, so
compare all against all:
=
<<*>>=
enumeration_set *Enumerations::find(text_stream *post) {
enumeration_set *es = NULL;
LOOP_OVER(es, enumeration_set)
@ -32,25 +32,25 @@ enumeration_set *Enumerations::find(text_stream *post) {
return NULL;
}
@ The following is called when an enumeration is found. If |from| has a
@ The following is called when an enumeration is found. If [[from]] has a
sensible value, this is the start of a new enumeration set; otherwise it's
a further constant in what ought to be an existing set.
=
<<*>>=
void Enumerations::define(OUTPUT_STREAM, text_stream *symbol,
text_stream *from, source_line *L) {
TEMPORARY_TEXT(pf)
@<Find the postfix in this symbol name@>;
<<Find the postfix in this symbol name>>;
enumeration_set *es = Enumerations::find(pf);
if (from == NULL) @<Continue existing set@>
else @<Begin new set@>;
if (from == NULL) <<Continue existing set>>
else <<Begin new set>>;
DISCARD_TEXT(pf)
if (es) es->last_observed_at = L;
}
@ So for instance |HARRY_ST| to |ST|:
@ So for instance [[HARRY_ST]] to [[ST]]:
@<Find the postfix in this symbol name@> =
<<Find the postfix in this symbol name>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, symbol, L"%c*_(%C+?)")) Str::copy(pf, mr.exp[0]);
else {
@ -59,14 +59,14 @@ void Enumerations::define(OUTPUT_STREAM, text_stream *symbol,
}
Regexp::dispose_of(&mr);
@<Continue existing set@> =
<<Continue existing set>>=
if (es) {
if (es->stub) WRITE("(%S+", es->stub);
WRITE("%d", es->next_free_value++);
if (es->stub) WRITE(")");
} else Main::error_in_web(I"this enumeration _FAMILY is unknown", L);
@<Begin new set@> =
<<Begin new set>>=
if (es) Main::error_in_web(I"this enumeration _FAMILY already exists", L);
else {
es = CREATE(enumeration_set);
@ -88,10 +88,10 @@ void Enumerations::define(OUTPUT_STREAM, text_stream *symbol,
if (es->stub) WRITE(")");
@ For each set, a further constant is defined to give the range; for example,
we would have |NO_DEFINED_ST_VALUES| set to 3. This is notionally placed in
the code at the last line on which an |*_ST| value was defined.
we would have [[NO_DEFINED_ST_VALUES]] set to 3. This is notionally placed in
the code at the last line on which an [[*_ST]] value was defined.
=
<<*>>=
void Enumerations::define_extents(OUTPUT_STREAM, tangle_target *target, programming_language *lang) {
enumeration_set *es;
LOOP_OVER(es, enumeration_set) {

View file

@ -3,12 +3,12 @@
To store individual lines from webs, and to categorise them according
to their meaning.
@h Line storage.
@ \section{Line storage.}
In the next section, we'll read in an entire web, building its hierarchical
structure of chapters, sections and eventually paragraphs. But before we do
that, we'll define the structure used to store a single line of the web.
Because Inweb markup makes use of the special characters |@| and |=| as
Because Inweb markup makes use of the special characters [[@]] and [[=]] as
dividers, but only in column 1, the important divisions between material
all effectively occur at line boundaries -- this is a major point of
difference with, for example, CWEB, for which the source is just a stream
@ -16,19 +16,19 @@ of characters in which all white space is equivalent. Because Inweb source
is so tidily divisible into lines, we can usefully make each source line
correspond to one of these:
=
<<*>>=
typedef struct source_line {
struct text_stream *text; /* the text as read in */
struct text_stream *text_operand; /* meaning depends on category */
struct text_stream *text_operand2; /* meaning depends on category */
int category; /* what sort of line this is: an |*_LCAT| value */
int command_code; /* used only for |COMMAND_LCAT| lines: a |*_CMD| value */
int default_defn; /* used only for |BEGIN_DEFINITION_LCAT| lines */
int plainer; /* used only for |BEGIN_CODE_LCAT| lines: suppresses box */
int enable_hyperlinks; /* used only for |CODE_BODY_LCAT| lines: link URLs in weave */
struct programming_language *colour_as; /* used only for |TEXT_EXTRACT_LCAT| lines */
struct text_stream *extract_to; /* used only for |TEXT_EXTRACT_LCAT| lines */
int category; /* what sort of line this is: an [[*_LCAT]] value */
int command_code; /* used only for [[COMMAND_LCAT| lines: a |*_CMD]] value */
int default_defn; /* used only for [[BEGIN_DEFINITION_LCAT]] lines */
int plainer; /* used only for [[BEGIN_CODE_LCAT]] lines: suppresses box */
int enable_hyperlinks; /* used only for [[CODE_BODY_LCAT]] lines: link URLs in weave */
struct programming_language *colour_as; /* used only for [[TEXT_EXTRACT_LCAT]] lines */
struct text_stream *extract_to; /* used only for [[TEXT_EXTRACT_LCAT]] lines */
int is_commentary; /* flag */
struct language_function *function_defined; /* if any C-like function is defined on this line */
struct preform_nonterminal *preform_nonterminal_defined; /* similarly */
@ -40,10 +40,10 @@ typedef struct source_line {
struct section *owning_section; /* for interleaved title lines, it's the one about to start */
struct source_line *next_line; /* within the owning section's linked list */
struct paragraph *owning_paragraph; /* for lines falling under paragraphs; |NULL| if not */
struct paragraph *owning_paragraph; /* for lines falling under paragraphs; [[NULL]] if not */
} source_line;
@ =
<<*>>=
source_line *Lines::new_source_line_in(text_stream *line, text_file_position *tfp,
section *S) {
source_line *sl = CREATE(source_line);
@ -76,45 +76,46 @@ source_line *Lines::new_source_line_in(text_stream *line, text_file_position *tf
return sl;
}
@h Categories.
@ \section{Categories.}
The line categories are enumerated as follows. We briefly note what the text
operands (TO and TO2) are set to, if anything: most of the time they're blank.
Note that a few of these categories are needed only for the more cumbersome
version 1 syntax; version 2 removed the need for |BAR_LCAT|,
|INTERFACE_BODY_LCAT|, and |INTERFACE_LCAT|.
version 1 syntax; version 2 removed the need for [[BAR_LCAT]],
[[INTERFACE_BODY_LCAT]], and [[INTERFACE_LCAT]].
@e NO_LCAT from 0 /* (used when none has been set as yet) */
<<*>>=
enum NO_LCAT from 0 /* (used when none has been set as yet) */
@e BAR_LCAT /* a bar line |@---------------|... */
@e BEGIN_CODE_LCAT /* an |@c|, |@e| or |@x| line below which is code, early code or extract */
@e BEGIN_DEFINITION_LCAT /* an |@d| definition: TO is term, TO2 is this line's part of defn */
@e C_LIBRARY_INCLUDE_LCAT /* C-like languages only: a |#include| for an ANSI C header file */
@e CHAPTER_HEADING_LCAT /* chapter heading line inserted automatically, not read from web */
@e CODE_BODY_LCAT /* the rest of the paragraph under an |@c| or |@e| or macro definition */
@e COMMAND_LCAT /* a |[[Command]]| line, with the operand set to the |*_CMD| value */
@e COMMENT_BODY_LCAT /* text following a paragraph header, which is all comment */
@e CONT_DEFINITION_LCAT /* subsequent lines of an |@d| definition */
@e DEFINITIONS_LCAT /* line holding the |@Definitions:| heading */
@e END_EXTRACT_LCAT /* an |=| line used to mark the end of an extract */
@e FOOTNOTE_TEXT_LCAT /* the opening of the text of a footnote */
@e HEADING_START_LCAT /* |@h| paragraph start: TO is title, TO2 is rest of line */
@e INTERFACE_BODY_LCAT /* line within the interface, under this heading */
@e INTERFACE_LCAT /* line holding the |@Interface:| heading */
@e MACRO_DEFINITION_LCAT /* line on which a paragraph macro is defined with an |=| sign */
@e PARAGRAPH_START_LCAT /* simple |@| paragraph start: TO is blank, TO2 is rest of line */
@e PREFORM_GRAMMAR_LCAT /* InC only: line of Preform grammar */
@e PREFORM_LCAT /* InC only: opening line of a Preform nonterminal */
@e PURPOSE_BODY_LCAT /* continuation lines of purpose declaration */
@e PURPOSE_LCAT /* first line of purpose declaration; TO is rest of line */
@e SECTION_HEADING_LCAT /* section heading line, at top of file */
@e SOURCE_DISPLAY_LCAT /* commentary line beginning |>>| for display: TO is display text */
@e TEXT_EXTRACT_LCAT /* the rest of the paragraph under an |@x| */
@e TYPEDEF_LCAT /* C-like languages only: a |typedef| which isn't a structure definition */
enum BAR_LCAT /* a bar line [[@---------------]]... */
enum BEGIN_CODE_LCAT /* an [[@c|, |@e| or |@x]] line below which is code, early code or extract */
enum BEGIN_DEFINITION_LCAT /* an [[@d]] definition: TO is term, TO2 is this line's part of defn */
enum C_LIBRARY_INCLUDE_LCAT /* C-like languages only: a [[#include]] for an ANSI C header file */
enum CHAPTER_HEADING_LCAT /* chapter heading line inserted automatically, not read from web */
enum CODE_BODY_LCAT /* the rest of the paragraph under an [[@c| or |@e]] or macro definition */
enum COMMAND_LCAT /* a [[[[Command]]| line, with the operand set to the |*_CMD]] value */
enum COMMENT_BODY_LCAT /* text following a paragraph header, which is all comment */
enum CONT_DEFINITION_LCAT /* subsequent lines of an [[@d]] definition */
enum DEFINITIONS_LCAT /* line holding the [[@Definitions:]] heading */
enum END_EXTRACT_LCAT /* an [[=]] line used to mark the end of an extract */
enum FOOTNOTE_TEXT_LCAT /* the opening of the text of a footnote */
enum HEADING_START_LCAT /* [[@h]] paragraph start: TO is title, TO2 is rest of line */
enum INTERFACE_BODY_LCAT /* line within the interface, under this heading */
enum INTERFACE_LCAT /* line holding the [[@Interface:]] heading */
enum MACRO_DEFINITION_LCAT /* line on which a paragraph macro is defined with an [[=]] sign */
enum PARAGRAPH_START_LCAT /* simple [[@]] paragraph start: TO is blank, TO2 is rest of line */
enum PREFORM_GRAMMAR_LCAT /* InC only: line of Preform grammar */
enum PREFORM_LCAT /* InC only: opening line of a Preform nonterminal */
enum PURPOSE_BODY_LCAT /* continuation lines of purpose declaration */
enum PURPOSE_LCAT /* first line of purpose declaration; TO is rest of line */
enum SECTION_HEADING_LCAT /* section heading line, at top of file */
enum SOURCE_DISPLAY_LCAT /* commentary line beginning [[>>]] for display: TO is display text */
enum TEXT_EXTRACT_LCAT /* the rest of the paragraph under an [[@x]] */
enum TYPEDEF_LCAT /* C-like languages only: a [[typedef]] which isn't a structure definition */
@ We want to print these out nicely for the sake of a |-scan| analysis run
@ We want to print these out nicely for the sake of a [[-scan]] analysis run
of Inweb:
=
<<*>>=
char *Lines::category_name(int cat) {
switch (cat) {
case NO_LCAT: return "(uncategorised)";
@ -148,22 +149,23 @@ char *Lines::category_name(int cat) {
return "(?unknown)";
}
@h Command codes.
@ \section{Command codes.}
Command-category lines are further divided up into the following. Again,
some of these fell into disuse in version 2 syntax.
@e NO_CMD from 0
@e PAGEBREAK_CMD
@e GRAMMAR_INDEX_CMD
@e FIGURE_CMD
@e AUDIO_CMD
@e VIDEO_CMD
@e DOWNLOAD_CMD
@e CAROUSEL_CMD
@e CAROUSEL_ABOVE_CMD
@e CAROUSEL_BELOW_CMD
@e CAROUSEL_UNCAPTIONED_CMD
@e CAROUSEL_END_CMD
@e EMBED_CMD
@e TAG_CMD
@e HTML_CMD
<<*>>=
enum NO_CMD from 0
enum PAGEBREAK_CMD
enum GRAMMAR_INDEX_CMD
enum FIGURE_CMD
enum AUDIO_CMD
enum VIDEO_CMD
enum DOWNLOAD_CMD
enum CAROUSEL_CMD
enum CAROUSEL_ABOVE_CMD
enum CAROUSEL_BELOW_CMD
enum CAROUSEL_UNCAPTIONED_CMD
enum CAROUSEL_END_CMD
enum EMBED_CMD
enum TAG_CMD
enum HTML_CMD

View file

@ -4,19 +4,19 @@ To manage the set of named paragraph macros in a section.
@ We store these like so:
=
<<*>>=
typedef struct para_macro {
struct text_stream *macro_name; /* usually long, like "Create a paragraph macro here" */
struct paragraph *defining_paragraph; /* as printed in small type after the name in any usage */
struct source_line *defn_start; /* it ends at the end of its defining paragraph */
struct linked_list *macro_usages; /* of |macro_usage|: only computed for weaves */
struct linked_list *macro_usages; /* of [[macro_usage]]: only computed for weaves */
CLASS_DEFINITION
} para_macro;
@ Each section has its own linked list of paragraph macros, since the scope for
the usage of these is always a single section.
=
<<*>>=
para_macro *Macros::create(section *S, paragraph *P, source_line *L, text_stream *name) {
para_macro *pmac = CREATE(para_macro);
pmac->macro_name = Str::duplicate(name);
@ -28,13 +28,13 @@ para_macro *Macros::create(section *S, paragraph *P, source_line *L, text_stream
return pmac;
}
@h Paragraph macro search.
@ \section{Paragraph macro search.}
The scope for looking up paragraph macro names is a single section, not the
entire web. So you can't expand a macro from another section, but then again,
you can use the same macro name twice in different sections; and lookup is
much faster.
=
<<*>>=
para_macro *Macros::find_by_name(text_stream *name, section *scope) {
para_macro *pmac;
LOOP_OVER_LINKED_LIST(pmac, para_macro, scope->macros)

View file

@ -15,19 +15,19 @@ We can certainly only do it if we know exactly where macros are used. This
is something we scan for on a weave, but not on a tangle; that's fine, though,
because tangled code doesn't need to know its own paragraph numbers.
=
<<*>>=
void Numbering::number_web(web *W) {
chapter *C;
section *S;
LOOP_OVER_LINKED_LIST(C, chapter, W->chapters) {
LOOP_OVER_LINKED_LIST(S, section, C->sections) {
@<Scan this section to see where paragraph macros are used@>;
@<Work out paragraph numbers within this section@>;
<<Scan this section to see where paragraph macros are used>>;
<<Work out paragraph numbers within this section>>;
}
}
}
@<Scan this section to see where paragraph macros are used@> =
<<Scan this section to see where paragraph macros are used>>=
for (source_line *L = S->first_line; L; L = L->next_line) {
TEMPORARY_TEXT(p)
Str::copy(p, L->text);
@ -41,7 +41,7 @@ void Numbering::number_web(web *W) {
Str::substr(p, Str::at(original_p, mpos + mlen), Str::end(original_p));
DISCARD_TEXT(original_p)
para_macro *pmac = Macros::find_by_name(found_macro, S);
if (pmac) @<Add a record that the macro is used in this paragraph@>;
if (pmac) <<Add a record that the macro is used in this paragraph>>;
DISCARD_TEXT(found_macro)
}
DISCARD_TEXT(p)
@ -57,14 +57,14 @@ we end up with numbers out of order, since the one after it would have to
be 1.1.1. Instead this one will be 1.1.1, to place it into the natural
lexicographic sequence.
=
<<*>>=
typedef struct macro_usage {
struct paragraph *used_in_paragraph;
int multiplicity; /* for example, 2 if it's used twice in this paragraph */
CLASS_DEFINITION
} macro_usage;
@<Add a record that the macro is used in this paragraph@> =
<<Add a record that the macro is used in this paragraph>>=
macro_usage *mu, *last = NULL;
LOOP_OVER_LINKED_LIST(mu, macro_usage, pmac->macro_usages) {
last = mu;
@ -84,14 +84,14 @@ paragraph defines a macro then we want it to be a child node of the
paragraph where the macro is first used; it's then a matter of filling in
other nodes a bit speculatively.
@<Work out paragraph numbers within this section@> =
@<The parent of a macro definition is the place where it's first used@>;
@<Otherwise share the parent of a following paragraph, provided it precedes us@>;
@<Create paragraph number texts@>;
@<Number the still parent-less paragraphs consecutively from 1@>;
@<Recursively derive the numbers of parented paragraphs from those of their parents@>;
<<Work out paragraph numbers within this section>>=
<<The parent of a macro definition is the place where it's first used>>;
<<Otherwise share the parent of a following paragraph, provided it precedes us>>;
<<Create paragraph number texts>>;
<<Number the still parent-less paragraphs consecutively from 1>>;
<<Recursively derive the numbers of parented paragraphs from those of their parents>>;
@<The parent of a macro definition is the place where it's first used@> =
<<The parent of a macro definition is the place where it's first used>>=
paragraph *P;
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
if (P->defines_macro) {
@ -103,7 +103,7 @@ other nodes a bit speculatively.
}
}
@<Otherwise share the parent of a following paragraph, provided it precedes us@> =
<<Otherwise share the parent of a following paragraph, provided it precedes us>>=
paragraph *P;
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
if (P->parent_paragraph == NULL)
@ -116,7 +116,7 @@ other nodes a bit speculatively.
}
}
@<Create paragraph number texts@> =
<<Create paragraph number texts>>=
paragraph *P;
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
P->paragraph_number = Str::new();
@ -125,7 +125,7 @@ other nodes a bit speculatively.
numbered 1, 2, 3, ..., and then children are numbered with suffixes .1, .2, .3,
..., under their parents.
@<Number the still parent-less paragraphs consecutively from 1@> =
<<Number the still parent-less paragraphs consecutively from 1>>=
int top_level = 1;
paragraph *P;
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
@ -135,7 +135,7 @@ numbered 1, 2, 3, ..., and then children are numbered with suffixes .1, .2, .3,
} else
Str::clear(P->paragraph_number);
@<Recursively derive the numbers of parented paragraphs from those of their parents@> =
<<Recursively derive the numbers of parented paragraphs from those of their parents>>=
paragraph *P;
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
Numbering::settle_paragraph_number(P);
@ -145,7 +145,7 @@ to end up numbered 2, because it isn't used anywhere and doesn't seem to be
in the middle of a wider description. But better to keep it in the sequence
chosen by the author, so 2 it is.
=
<<*>>=
void Numbering::settle_paragraph_number(paragraph *P) {
if (Str::len(P->paragraph_number) > 0) return;
WRITE_TO(P->paragraph_number, "X"); /* to prevent malformed sections hanging this */

View file

@ -6,7 +6,7 @@ Inweb, others manually by the author.
@ A tag really is just a textual name. Each differently-named tag leads
to one of the following being created:
=
<<*>>=
typedef struct theme_tag {
struct text_stream *tag_name;
int ifdef_positive;
@ -18,7 +18,7 @@ typedef struct theme_tag {
there's just a single namespace of all known tags. There are never very
many differently-named tags in a given web.
=
<<*>>=
theme_tag *Tags::find_by_name(text_stream *name, int creating_if_necessary) {
theme_tag *tag;
LOOP_OVER(tag, theme_tag)
@ -46,7 +46,7 @@ theme_tag *Tags::find_by_name(text_stream *name, int creating_if_necessary) {
also with a contextually relevant caption. The following records those;
they're stored as a linked list within each paragraph.
=
<<*>>=
typedef struct paragraph_tagging {
struct theme_tag *the_tag;
struct text_stream *caption;
@ -64,10 +64,10 @@ void Tags::add_to_paragraph(paragraph *P, theme_tag *tag, text_stream *caption)
}
@ Tags are created simply by being used in taggings. If the tag notation
|^"History: How tags came about"| is found, the following is called, and
the tag is |History|, the caption "How tags came about".
[[^"History: How tags came about"]] is found, the following is called, and
the tag is [[History]], the caption "How tags came about".
=
<<*>>=
theme_tag *Tags::add_by_name(paragraph *P, text_stream *text) {
if (Str::len(text) == 0) internal_error("empty tag name");
TEMPORARY_TEXT(name) Str::copy(name, text);
@ -87,7 +87,7 @@ theme_tag *Tags::add_by_name(paragraph *P, text_stream *text) {
@ If a given line is tagged with a given tag, what caption does it have?
=
<<*>>=
text_stream *Tags::retrieve_caption(paragraph *P, theme_tag *tag) {
if (tag == NULL) return NULL;
if (P) {
@ -103,7 +103,7 @@ text_stream *Tags::retrieve_caption(paragraph *P, theme_tag *tag) {
(Everything falls under the null non-tag: this ensures that a weave which
doesn't specify a tag.)
=
<<*>>=
int Tags::tagged_with(paragraph *P, theme_tag *tag) {
if (tag == NULL) return TRUE;
if (P) {
@ -115,7 +115,7 @@ int Tags::tagged_with(paragraph *P, theme_tag *tag) {
return FALSE;
}
@ =
<<*>>=
void Tags::open_ifdefs(OUTPUT_STREAM, paragraph *P) {
paragraph_tagging *pt;
LOOP_OVER_LINKED_LIST(pt, paragraph_tagging, P->taggings)
@ -134,13 +134,13 @@ void Tags::close_ifdefs(OUTPUT_STREAM, paragraph *P) {
void Tags::show_endnote_on_ifdefs(heterogeneous_tree *tree, tree_node *ap, paragraph *P) {
int d = 0, sense = TRUE;
@<Show ifdef endnoting@>;
<<Show ifdef endnoting>>;
sense = FALSE;
@<Show ifdef endnoting@>;
<<Show ifdef endnoting>>;
if (d > 0) TextWeaver::commentary_text(tree, ap, I".");
}
@<Show ifdef endnoting@> =
<<Show ifdef endnoting>>=
int c = 0;
paragraph_tagging *pt;
LOOP_OVER_LINKED_LIST(pt, paragraph_tagging, P->taggings)

View file

@ -3,7 +3,7 @@
To work through the program read in, assigning each line its category,
and noting down other useful information as we go.
@h Sequence of parsing.
@ \section{Sequence of parsing.}
At this point, thw web has been read into memory. It's a linked list of
chapters, each of which is a linked list of sections, each of which must
be parsed in turn.
@ -13,13 +13,13 @@ a chance to do some further work, if it wants to. (This is how, for example,
function definitions are recognised in C programs.) There is no requirement
for it to do anything.
=
<<*>>=
void Parser::parse_web(web *W, int inweb_mode) {
chapter *C;
section *S;
LOOP_OVER_LINKED_LIST(C, chapter, W->chapters)
LOOP_OVER_LINKED_LIST(S, section, C->sections)
@<Parse a section@>;
<<Parse a section>>;
LanguageMethods::parse_types(W, W->main_language);
LanguageMethods::parse_functions(W, W->main_language);
LanguageMethods::further_parsing(W, W->main_language);
@ -30,7 +30,7 @@ further into a linked list of paragraphs. The basic method would be simple
enough, but is made more elaborate by supporting both version 1 and version 2
markup syntax, and trying to detect incorrect uses of one within the other.
@<Parse a section@> =
<<Parse a section>>=
int comment_mode = TRUE, extract_mode = FALSE;
int code_lcat_for_body = NO_LCAT,
code_plainness_for_body = FALSE,
@ -42,25 +42,25 @@ markup syntax, and trying to detect incorrect uses of one within the other.
paragraph *current_paragraph = NULL;
TEMPORARY_TEXT(tag_list)
for (source_line *L = S->first_line, *PL = NULL; L; PL = L, L = L->next_line) {
@<Apply tag list, if any@>;
@<Remove tag list, if any@>;
@<Detect implied paragraph breaks@>;
@<Determine category for this source line@>;
<<Apply tag list, if any>>;
<<Remove tag list, if any>>;
<<Detect implied paragraph breaks>>;
<<Determine category for this source line>>;
}
DISCARD_TEXT(tag_list)
@<In version 2 syntax, construe the comment under the heading as the purpose@>;
@<If the section as a whole is tagged, apply that tag to each paragraph in it@>;
@<Work out footnote numbering for this section@>;
<<In version 2 syntax, construe the comment under the heading as the purpose>>;
<<If the section as a whole is tagged, apply that tag to each paragraph in it>>;
<<Work out footnote numbering for this section>>;
@ In versiom 2 syntax, the notation for tags was clarified. The tag list
for a paragraph is the run of |^"This"| and |^"That"| markers at the end of
for a paragraph is the run of [[^"This"]] and [[^"That"]] markers at the end of
the line introducing that paragraph. They can only occur, therefore, on a
line beginning with an |@|. We extract them into a string called |tag_list|.
line beginning with an [[@]]. We extract them into a string called [[tag_list]].
(The reason we can't act on them straight away, which would make for simpler
code, is that they need to be applied to a paragraph structure which doesn't
yet exist -- it will only exist when the line has been fully parsed.)
@<Remove tag list, if any@> =
<<Remove tag list, if any>>=
if (Str::get_first_char(L->text) == '@') {
match_results mr = Regexp::create_mr();
while (Regexp::match(&mr, L->text, L"(%c*?)( *%^\"%c+?\")(%c*)")) {
@ -73,10 +73,10 @@ yet exist -- it will only exist when the line has been fully parsed.)
Regexp::dispose_of(&mr);
}
@ And now it's later, and we can safely apply the tags. |current_paragraph|
@ And now it's later, and we can safely apply the tags. [[current_paragraph]]
now points to the para which was created by this line, not the one before.
@<Apply tag list, if any@> =
<<Apply tag list, if any>>=
match_results mr = Regexp::create_mr();
while (Regexp::match(&mr, tag_list, L" *%^\"(%c+?)\" *(%c*)")) {
Tags::add_by_name(current_paragraph, mr.exp[0]);
@ -85,7 +85,7 @@ now points to the para which was created by this line, not the one before.
Regexp::dispose_of(&mr);
Str::clear(tag_list);
@<If the section as a whole is tagged, apply that tag to each paragraph in it@> =
<<If the section as a whole is tagged, apply that tag to each paragraph in it>>=
paragraph *P;
if (S->tag_with)
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
@ -93,17 +93,17 @@ now points to the para which was created by this line, not the one before.
@ In the woven form of each section, footnotes are counting upwards from 1.
@<Work out footnote numbering for this section@> =
<<Work out footnote numbering for this section>>=
int next_footnote = 1;
paragraph *P;
LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)
@<Work out footnote numbering for this paragraph@>;
<<Work out footnote numbering for this paragraph>>;
@ The "purpose" of a section is a brief note about what it's for. In version 1
syntax, this had to be explicitly declared with a |@Purpose:| command; in
syntax, this had to be explicitly declared with a [[@Purpose:]] command; in
version 2 it's much tidier.
@<In version 2 syntax, construe the comment under the heading as the purpose@> =
<<In version 2 syntax, construe the comment under the heading as the purpose>>=
if (S->md->using_syntax >= V2_SYNTAX) {
source_line *L = S->first_line;
if ((L) && (L->category == CHAPTER_HEADING_LCAT)) L = L->next_line;
@ -117,27 +117,27 @@ version 2 it's much tidier.
what otherwise would be code, or when a paragraph and its code divider are
immediately adjacent on the same line.
@<Detect implied paragraph breaks@> =
<<Detect implied paragraph breaks>>=
match_results mr = Regexp::create_mr();
if ((PL) && (PL->category == CODE_BODY_LCAT) &&
(Str::get_first_char(L->text) == '@') && (Str::get_at(L->text, 1) == '<') &&
(Regexp::match(&mr, L->text, L"%c<(%c+)@> *= *")) &&
(Regexp::match(&mr, L->text, L"%c<(%c+)>> *= *")) &&
(S->md->using_syntax >= V2_SYNTAX)) {
@<Insert an implied paragraph break@>;
<<Insert an implied paragraph break>>;
}
if ((PL) && (Regexp::match(&mr, L->text, L"@ *= *"))) {
Str::clear(L->text);
Str::copy(L->text, I"=");
if (S->md->using_syntax < V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "implied paragraph breaks", V2_SYNTAX);
@<Insert an implied paragraph break@>;
<<Insert an implied paragraph break>>;
}
Regexp::dispose_of(&mr);
@ We handle implied paragraph dividers by inserting a paragraph marker and
reparsing from there.
@<Insert an implied paragraph break@> =
<<Insert an implied paragraph break>>=
source_line *NL = Lines::new_source_line_in(I"@", &(L->source), S);
PL->next_line = NL;
NL->next_line = L;
@ -145,38 +145,38 @@ reparsing from there.
Regexp::dispose_of(&mr);
continue;
@h Categorisatiom.
@ \section{Categorisatiom.}
This is where the work is really done. We have a source line: is it comment,
code, definition, what?
@<Determine category for this source line@> =
<<Determine category for this source line>>=
L->is_commentary = comment_mode;
L->category = COMMENT_BODY_LCAT; /* until set otherwise down below */
L->owning_paragraph = current_paragraph;
if (L->source.line_count == 0) @<Parse the line as a probable chapter heading@>;
if (L->source.line_count <= 1) @<Parse the line as a probable section heading@>;
if (L->source.line_count == 0) <<Parse the line as a probable chapter heading>>;
if (L->source.line_count <= 1) <<Parse the line as a probable section heading>>;
if (extract_mode == FALSE) {
@<Parse the line as a possible Inweb command@>;
@<Parse the line as a possible paragraph macro definition@>;
<<Parse the line as a possible Inweb command>>;
<<Parse the line as a possible paragraph macro definition>>;
}
if (Str::get_first_char(L->text) == '=') {
if (S->md->using_syntax < V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "column-1 '=' as code divider", V2_SYNTAX);
if (extract_mode) @<Exit extract mode@>
else @<Parse the line as an equals structural marker@>;
if (extract_mode) <<Exit extract mode>>
else <<Parse the line as an equals structural marker>>;
}
if ((Str::get_first_char(L->text) == '@') &&
(Str::get_at(L->text, 1) != '<') &&
(L->category != MACRO_DEFINITION_LCAT))
@<Parse the line as a structural marker@>;
if (comment_mode) @<This is a line destined for commentary@>;
if (comment_mode == FALSE) @<This is a line destined for the verbatim code@>;
<<Parse the line as a structural marker>>;
if (comment_mode) <<This is a line destined for commentary>>;
if (comment_mode == FALSE) <<This is a line destined for the verbatim code>>;
@ This must be one of the inserted lines marking chapter headings; it doesn't
come literally from the source web.
@<Parse the line as a probable chapter heading@> =
<<Parse the line as a probable chapter heading>>=
if (Str::eq_wide_string(L->text, L"Chapter Heading")) {
comment_mode = TRUE;
extract_mode = FALSE;
@ -188,7 +188,7 @@ come literally from the source web.
@ The top line of a section gives its title; in InC, it can also give the
namespace for its functions.
@<Parse the line as a probable section heading@> =
<<Parse the line as a probable section heading>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, L->text, L"Implied Purpose: (%c+)")) {
S->sect_purpose = Str::duplicate(mr.exp[0]);
@ -230,10 +230,10 @@ namespace for its functions.
Regexp::dispose_of(&mr);
@ Version 1 syntax was cluttered up with a number of hardly-used markup
syntaxes called "commands", written in double squared brackets |[[Thus]]|.
syntaxes called "commands", written in double squared brackets [[[[Thus]]]].
In version 2, this notation is never used.
@<Parse the line as a possible Inweb command@> =
<<Parse the line as a possible Inweb command>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, L->text, L"%[%[(%c+)%]%]")) {
TEMPORARY_TEXT(full_command)
@ -273,10 +273,10 @@ In version 2, this notation is never used.
@ Some paragraphs define angle-bracketed macros, and those need special
handling. We'll call these "paragraph macros".
@<Parse the line as a possible paragraph macro definition@> =
<<Parse the line as a possible paragraph macro definition>>=
match_results mr = Regexp::create_mr();
if ((Str::get_first_char(L->text) == '@') && (Str::get_at(L->text, 1) == '<') &&
(Regexp::match(&mr, L->text, L"%c<(%c+)@> *= *"))) {
(Regexp::match(&mr, L->text, L"%c<(%c+)>> *= *"))) {
TEMPORARY_TEXT(para_macro_name)
Str::copy(para_macro_name, mr.exp[0]);
L->category = MACRO_DEFINITION_LCAT;
@ -294,10 +294,10 @@ handling. We'll call these "paragraph macros".
}
Regexp::dispose_of(&mr);
@ A structural marker is introduced by an |@| in column 1, and is a structural
@ A structural marker is introduced by an [[@]] in column 1, and is a structural
division in the current section.
@<Parse the line as a structural marker@> =
<<Parse the line as a structural marker>>=
TEMPORARY_TEXT(command_text)
Str::copy(command_text, L->text);
Str::delete_first_character(command_text); /* i.e., strip the at-sign from the front */
@ -307,7 +307,7 @@ division in the current section.
Str::copy(command_text, mr.exp[0]);
Str::copy(remainder, mr.exp[1]);
}
@<Deal with a structural marker@>;
<<Deal with a structural marker>>;
DISCARD_TEXT(remainder)
DISCARD_TEXT(command_text)
Regexp::dispose_of(&mr);
@ -315,14 +315,14 @@ division in the current section.
@ An equals sign in column 1 can just mean the end of an extract, so:
@<Exit extract mode@> =
<<Exit extract mode>>=
L->category = END_EXTRACT_LCAT;
comment_mode = TRUE;
extract_mode = FALSE;
@ But more usually an equals sign in column 1 is a structural marker:
@<Parse the line as an equals structural marker@> =
<<Parse the line as an equals structural marker>>=
L->category = BEGIN_CODE_LCAT;
L->plainer = FALSE;
code_lcat_for_body = CODE_BODY_LCAT;
@ -338,47 +338,47 @@ division in the current section.
current_paragraph->placed_early = TRUE;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_lcat_for_body = TEXT_EXTRACT_LCAT;
code_destination = NULL;
code_pl_for_body = NULL;
extract_mode = TRUE;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text to *(%c+)%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_lcat_for_body = TEXT_EXTRACT_LCAT;
code_destination = Str::duplicate(mr2.exp[1]);
code_pl_for_body = Languages::find_by_name(I"Extracts", W, TRUE);
extract_mode = TRUE;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text as code%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_lcat_for_body = TEXT_EXTRACT_LCAT;
code_destination = NULL;
code_pl_for_body = S->sect_language;
extract_mode = TRUE;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text as (%c+)%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_lcat_for_body = TEXT_EXTRACT_LCAT;
code_destination = NULL;
code_pl_for_body = Languages::find_by_name(mr2.exp[1], W, TRUE);
extract_mode = TRUE;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text from (%c+) as code%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_pl_for_body = S->sect_language;
@<Spool from file@>;
<<Spool from file>>;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text from (%c+) as (%c+)%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_pl_for_body = Languages::find_by_name(mr2.exp[2], W, TRUE);
@<Spool from file@>;
<<Spool from file>>;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%((%c*?) *text from (%c+)%)"))) {
@<Make plainer@>;
<<Make plainer>>;
code_pl_for_body = NULL;
@<Spool from file@>;
<<Spool from file>>;
} else if ((current_paragraph) &&
(Regexp::match(&mr2, mr.exp[0], L"%(figure (%c+)%)"))) {
Tags::add_by_name(L->owning_paragraph, I"Figures");
@ -490,7 +490,7 @@ division in the current section.
Regexp::dispose_of(&mr2);
continue;
@<Make plainer@> =
<<Make plainer>>=
match_results mr3 = Regexp::create_mr();
while (TRUE) {
if (Regexp::match(&mr3, mr2.exp[0], L" *(%C+) *(%c*?)")) {
@ -506,7 +506,7 @@ division in the current section.
}
Regexp::dispose_of(&mr3);
@<Spool from file@> =
<<Spool from file>>=
L->category = BEGIN_CODE_LCAT;
pathname *P = W->md->path_to_web;
if ((S->md->owning_module) && (S->md->owning_module->module_location))
@ -528,35 +528,35 @@ division in the current section.
code_lcat_for_body = TEXT_EXTRACT_LCAT;
extract_mode = TRUE;
@ So here we have the possibilities which start with a column-1 |@| sign.
@ So here we have the possibilities which start with a column-1 [[@]] sign.
There appear to be hordes of these, but in fact most of them were removed
in Inweb syntax version 2: in modern syntax, only |@d|, |@e|, |@h|, their
long forms |@define|, |@enum| and |@heading|, and plain old |@| remain.
(But |@e| has a different meaning from in version 1.)
in Inweb syntax version 2: in modern syntax, only [[@d]], [[@e]], [[@h]], their
long forms [[@define]], [[@enum]] and [[@heading]], and plain old [[@]] remain.
(But [[@e]] has a different meaning from in version 1.)
@<Deal with a structural marker@> =
<<Deal with a structural marker>>=
extract_mode = FALSE;
if (Str::eq_wide_string(command_text, L"Purpose:")) @<Deal with Purpose@>
else if (Str::eq_wide_string(command_text, L"Interface:")) @<Deal with Interface@>
else if (Str::eq_wide_string(command_text, L"Definitions:")) @<Deal with Definitions@>
else if (Regexp::match(&mr, command_text, L"----+")) @<Deal with the bar@>
if (Str::eq_wide_string(command_text, L"Purpose:")) <<Deal with Purpose>>
else if (Str::eq_wide_string(command_text, L"Interface:")) <<Deal with Interface>>
else if (Str::eq_wide_string(command_text, L"Definitions:")) <<Deal with Definitions>>
else if (Regexp::match(&mr, command_text, L"----+")) <<Deal with the bar>>
else if ((Str::eq_wide_string(command_text, L"c")) ||
(Str::eq_wide_string(command_text, L"x")) ||
((S->md->using_syntax == V1_SYNTAX) && (Str::eq_wide_string(command_text, L"e"))))
@<Deal with the code and extract markers@>
else if (Str::eq_wide_string(command_text, L"d")) @<Deal with the define marker@>
<<Deal with the code and extract markers>>
else if (Str::eq_wide_string(command_text, L"d")) <<Deal with the define marker>>
else if (Str::eq_wide_string(command_text, L"define")) {
if (S->md->using_syntax < V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "'@define' for definitions (use '@d' instead)", V2_SYNTAX);
@<Deal with the define marker@>;
<<Deal with the define marker>>;
} else if (Str::eq_wide_string(command_text, L"default")) {
if (S->md->using_syntax < V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "'@default' for definitions", V2_SYNTAX);
L->default_defn = TRUE;
@<Deal with the define marker@>;
} else if (Str::eq_wide_string(command_text, L"enum")) @<Deal with the enumeration marker@>
<<Deal with the define marker>>;
} else if (Str::eq_wide_string(command_text, L"enum")) <<Deal with the enumeration marker>>
else if ((Str::eq_wide_string(command_text, L"e")) && (S->md->using_syntax >= V2_SYNTAX))
@<Deal with the enumeration marker@>
<<Deal with the enumeration marker>>
else {
int weight = -1, new_page = FALSE;
if (Str::eq_wide_string(command_text, L"")) weight = ORDINARY_WEIGHT;
@ -575,14 +575,14 @@ long forms |@define|, |@enum| and |@heading|, and plain old |@| remain.
Parser::wrong_version(S->md->using_syntax, L, "'@pp' for super-headings", V1_SYNTAX);
weight = SUBHEADING_WEIGHT; new_page = TRUE;
}
if (weight >= 0) @<Begin a new paragraph of this weight@>
if (weight >= 0) <<Begin a new paragraph of this weight>>
else Main::error_in_web(I"don't understand @command", L);
}
@ In version 1 syntax there were some peculiar special headings above a divider
in the file made of hyphens, called "the bar". All of that has gone in V2.
@<Deal with Purpose@> =
<<Deal with Purpose>>=
if (before_bar == FALSE) Main::error_in_web(I"Purpose used after bar", L);
if (S->md->using_syntax >= V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "'@Purpose'", V1_SYNTAX);
@ -591,7 +591,7 @@ in the file made of hyphens, called "the bar". All of that has gone in V2.
L->text_operand = Str::duplicate(remainder);
S->sect_purpose = Parser::extract_purpose(remainder, L->next_line, L->owning_section, &L);
@<Deal with Interface@> =
<<Deal with Interface>>=
if (S->md->using_syntax >= V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "'@Interface'", V1_SYNTAX);
if (before_bar == FALSE) Main::error_in_web(I"Interface used after bar", L);
@ -606,7 +606,7 @@ in the file made of hyphens, called "the bar". All of that has gone in V2.
XL = XL->next_line;
}
@<Deal with Definitions@> =
<<Deal with Definitions>>=
if (S->md->using_syntax >= V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "'@Definitions' headings", V1_SYNTAX);
if (before_bar == FALSE) Main::error_in_web(I"Definitions used after bar", L);
@ -616,10 +616,10 @@ in the file made of hyphens, called "the bar". All of that has gone in V2.
before_bar = TRUE;
next_par_number = 1;
@ An |@| sign in the first column, followed by a row of four or more dashes,
@ An [[@]] sign in the first column, followed by a row of four or more dashes,
constitutes the optional division bar in a section.
@<Deal with the bar@> =
<<Deal with the bar>>=
if (S->md->using_syntax >= V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "the bar '----...'", V1_SYNTAX);
if (before_bar == FALSE) Main::error_in_web(I"second bar in the same section", L);
@ -633,11 +633,11 @@ constitutes the optional division bar in a section.
@ In version 1, the division point where a paragraoh begins to go into
verbatim code was not marked with an equals sign, but with one of the three
commands |@c| ("code"), |@e| ("early code") and |@x| ("code-like extract").
commands [[@c]] ("code"), [[@e]] ("early code") and [[@x]] ("code-like extract").
These had identical behaviour except for whether or not to tangle what
follows:
@<Deal with the code and extract markers@> =
<<Deal with the code and extract markers>>=
if (S->md->using_syntax > V1_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "'@c' and '@x'", V1_SYNTAX);
L->category = BEGIN_CODE_LCAT;
@ -650,10 +650,10 @@ follows:
code_plainness_for_body = FALSE;
hyperlink_body = FALSE;
@ This is for |@d| and |@define|. Definitions are intended to translate to
C preprocessor macros, Inform 6 |Constant|s, and so on.
@ This is for [[@d]] and [[@define]]. Definitions are intended to translate to
C preprocessor macros, Inform 6 [[Constant]]s, and so on.
@<Deal with the define marker@> =
<<Deal with the define marker>>=
L->category = BEGIN_DEFINITION_LCAT;
code_lcat_for_body = CONT_DEFINITION_LCAT;
code_pl_for_body = NULL;
@ -671,10 +671,10 @@ C preprocessor macros, Inform 6 |Constant|s, and so on.
L->is_commentary = FALSE;
Regexp::dispose_of(&mr);
@ This is for |@e| (in version 2) and |@enum|, which makes an automatically
enumerated sort of |@d|.
@ This is for [[@e]] (in version 2) and [[@enum]], which makes an automatically
enumerated sort of [[@d]].
@<Deal with the enumeration marker@> =
<<Deal with the enumeration marker>>=
L->category = BEGIN_DEFINITION_LCAT;
text_stream *from = NULL;
match_results mr = Regexp::create_mr();
@ -704,30 +704,31 @@ enumerated sort of |@d|.
Regexp::dispose_of(&mr);
@ Here we handle paragraph breaks which may or may not be headings. In
version 1, |@p| was a heading, and |@pp| a grander heading, while plain |@|
version 1, [[@p]] was a heading, and [[@pp| a grander heading, while plain [[@]]
is no heading at all. The use of "p" was a little confusing, and went back
to CWEB, which used the term "paragraph" differently from us: it was "p"
short for what CWEB called a "paragraph". We now use |@h| or equivalently
|@heading| for a heading.
short for what CWEB called a "paragraph". We now use [[@h]] or equivalently
[[@heading]] for a heading.
The noteworthy thing here is the way we fool around with the text on the line
of the paragraph opening. This is one of the few cases where Inweb has
retained the stream-based style of CWEB, where escape characters can appear
anywhere in a line and line breaks are not significant. Thus
= (text)
@h The chronology of French weaving. Auguste de Papillon (1734-56) soon
=
is split into two, so that the title of the paragraph is just "The chronology
of French weaving" and the remainder,
= (text)
Auguste de Papillon (1734-56) soon
=
will be woven exactly as the succeeding lines will be.
@d ORDINARY_WEIGHT 0 /* an ordinary paragraph has this "weight" */
@d SUBHEADING_WEIGHT 1 /* a heading paragraph */
<<*>>=
#define ORDINARY_WEIGHT 0 /* an ordinary paragraph has this "weight" */
#define SUBHEADING_WEIGHT 1 /* a heading paragraph */
@<Begin a new paragraph of this weight@> =
<<Begin a new paragraph of this weight>>=
comment_mode = TRUE;
L->is_commentary = TRUE;
L->category = PARAGRAPH_START_LCAT;
@ -744,7 +745,7 @@ will be woven exactly as the succeeding lines will be.
L->text_operand = Str::new();
L->text_operand2 = Str::duplicate(remainder);
}
@<Create a new paragraph, starting here, as new current paragraph@>;
<<Create a new paragraph, starting here, as new current paragraph>>;
L->owning_paragraph = current_paragraph;
W->no_paragraphs++;
@ -752,7 +753,7 @@ will be woven exactly as the succeeding lines will be.
@ So now it's time to create paragraph structures:
=
<<*>>=
typedef struct paragraph {
int above_bar; /* placed above the dividing bar in its section (in Version 1 syntax) */
int placed_early; /* should appear early in the tangled code */
@ -764,20 +765,20 @@ typedef struct paragraph {
int next_child_number; /* used when working out paragraph numbers */
struct paragraph *parent_paragraph; /* ditto */
int weight; /* typographic prominence: one of the |*_WEIGHT| values */
int weight; /* typographic prominence: one of the [[*_WEIGHT]] values */
int starts_on_new_page; /* relevant for weaving to TeX only, of course */
struct para_macro *defines_macro; /* there can only be one */
struct linked_list *functions; /* of |function|: those defined in this para */
struct linked_list *structures; /* of |language_type|: similarly */
struct linked_list *taggings; /* of |paragraph_tagging| */
struct linked_list *footnotes; /* of |footnote| */
struct linked_list *functions; /* of [[function]]: those defined in this para */
struct linked_list *structures; /* of [[language_type]]: similarly */
struct linked_list *taggings; /* of [[paragraph_tagging]] */
struct linked_list *footnotes; /* of [[footnote]] */
struct source_line *first_line_in_paragraph;
struct section *under_section;
CLASS_DEFINITION
} paragraph;
@<Create a new paragraph, starting here, as new current paragraph@> =
<<Create a new paragraph, starting here, as new current paragraph>>=
paragraph *P = CREATE(paragraph);
if (S->md->using_syntax > V1_SYNTAX) {
P->above_bar = FALSE;
@ -816,7 +817,7 @@ typedef struct paragraph {
@ Finally, we're down to either commentary or code.
@<This is a line destined for commentary@> =
<<This is a line destined for commentary>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, L->text, L">> (%c+)")) {
L->category = SOURCE_DISPLAY_LCAT;
@ -824,11 +825,11 @@ typedef struct paragraph {
}
Regexp::dispose_of(&mr);
@ Note that in an |@d| definition, a blank line is treated as the end of the
@ Note that in an [[@d]] definition, a blank line is treated as the end of the
definition. (This is unnecessary for C, and is a point of difference with
CWEB, but is needed for languages which don't allow multi-line definitions.)
@<This is a line destined for the verbatim code@> =
<<This is a line destined for the verbatim code>>=
if ((L->category != BEGIN_DEFINITION_LCAT) && (L->category != COMMAND_LCAT)) {
L->category = code_lcat_for_body;
L->plainer = code_plainness_for_body;
@ -849,10 +850,10 @@ CWEB, but is needed for languages which don't allow multi-line definitions.)
LanguageMethods::subcategorise_line(S->sect_language, L);
@ The purpose text occurs just below the heading. In version 1 it's cued with
a |@Purpose:| command; in version 2 it is unmarked. The following routine
a [[@Purpose:]] command; in version 2 it is unmarked. The following routine
is not elegant but handles the back end of both possibilities.
=
<<*>>=
text_stream *Parser::extract_purpose(text_stream *prologue, source_line *XL, section *S, source_line **adjust) {
text_stream *P = Str::duplicate(prologue);
while ((XL) && (XL->next_line) && (XL->owning_section == S) &&
@ -868,18 +869,18 @@ text_stream *Parser::extract_purpose(text_stream *prologue, source_line *XL, sec
return P;
}
@h Footnote notation.
@ \section{Footnote notation.}
=
<<*>>=
typedef struct footnote {
int footnote_cue_number; /* used only for |FOOTNOTE_TEXT_LCAT| lines */
int footnote_text_number; /* used only for |FOOTNOTE_TEXT_LCAT| lines */
int footnote_cue_number; /* used only for [[FOOTNOTE_TEXT_LCAT]] lines */
int footnote_text_number; /* used only for [[FOOTNOTE_TEXT_LCAT]] lines */
struct text_stream *cue_text;
int cued_already;
CLASS_DEFINITION
} footnote;
@<Work out footnote numbering for this paragraph@> =
<<Work out footnote numbering for this paragraph>>=
int next_footnote_in_para = 1;
footnote *current_text = NULL;
TEMPORARY_TEXT(before)
@ -895,7 +896,7 @@ typedef struct footnote {
if (Characters::is_whitespace(Str::get(pos)) == FALSE)
this_is_a_cue = TRUE;
if (this_is_a_cue == FALSE)
@<This line begins a footnote text@>;
<<This line begins a footnote text>>;
}
L->footnote_text = current_text;
}
@ -903,7 +904,7 @@ typedef struct footnote {
DISCARD_TEXT(cue)
DISCARD_TEXT(after)
@<This line begins a footnote text@> =
<<This line begins a footnote text>>=
L->category = FOOTNOTE_TEXT_LCAT;
footnote *F = CREATE(footnote);
F->footnote_cue_number = Str::atoi(cue, 0);
@ -924,7 +925,7 @@ typedef struct footnote {
@ Where:
=
<<*>>=
int Parser::detect_footnote(web *W, text_stream *matter, text_stream *before,
text_stream *cue, text_stream *after) {
text_stream *fn_on_notation =
@ -978,13 +979,14 @@ footnote *Parser::find_footnote_in_para(paragraph *P, text_stream *cue) {
return NULL;
}
@h Parsing of dimensions.
@ \section{Parsing of dimensions.}
It's possible, optionally, to specify width and height for some visual matter.
This is the syntax used.
@d POINTS_PER_CM 72
<<*>>=
#define POINTS_PER_CM 72
=
<<*>>=
text_stream *Parser::dimensions(text_stream *item, int *w, int *h, source_line *L) {
int sv = L->owning_section->md->using_syntax;
*w = -1; *h = -1;
@ -1027,12 +1029,12 @@ text_stream *Parser::dimensions(text_stream *item, int *w, int *h, source_line *
return use;
}
@h Version errors.
@ \section{Version errors.}
These are not fatal (why should they be?): Inweb carries on and allows the use
of the feature despite the version mismatch. They nevertheless count as errors
when it comes to Inweb's exit code, so they will halt a make.
=
<<*>>=
void Parser::wrong_version(int using, source_line *L, char *feature, int need) {
TEMPORARY_TEXT(warning)
WRITE_TO(warning, "%s is a feature of version %d syntax (you're using v%d)",

View file

@ -3,7 +3,7 @@
To read the Contents section of the web, and through that each of
the other sections in turn, and to collate all of this material.
@h Web semantics.
@ \section{Web semantics.}
There's normally only one web read in during a single run of Inweb, but
this might change if we ever add batch-processing in future. A web is a set
of chapters each of which is a set of sections; webs which don't obviously
@ -14,30 +14,30 @@ The program expressed by a web is output, or "tangled", to a number of
stand-alone files called "tangle targets". By default there is just one
of these.
We use the |WebMetadata::get| function of |foundation| to read the structure
of the web in from the file system. This produces a |web_md| metadata
structure for the web itself, which contains a list of |chapter_md|
structures for the chapters, each in turn containing a list of |section_md|s.
We use the [[WebMetadata::get]] function of [[foundation]] to read the structure
of the web in from the file system. This produces a [[web_md]] metadata
structure for the web itself, which contains a list of [[chapter_md]]
structures for the chapters, each in turn containing a list of [[section_md]]s.
We will imitate that structure exactly, but because we want to attach a lot
of semantics at each level, we will make a |web| with a list of |chapter|s
each of which has a list of |section|s.
of semantics at each level, we will make a [[web]] with a list of [[chapter]]s
each of which has a list of [[section]]s.
Here are the semantics for a web:
=
<<*>>=
typedef struct web {
struct web_md *md;
struct linked_list *chapters; /* of |chapter| (including Sections, Preliminaries, etc.) */
struct linked_list *chapters; /* of [[chapter]] (including Sections, Preliminaries, etc.) */
int web_extent; /* total lines in literate source, excluding contents */
int no_paragraphs; /* this will be at least 1 */
struct programming_language *main_language; /* in which most of the sections are written */
struct linked_list *tangle_targets; /* of |tangle_target| */
struct linked_list *tangle_targets; /* of [[tangle_target]] */
struct linked_list *headers; /* of |filename|: additional header files */
struct linked_list *headers; /* of [[filename]]: additional header files */
int analysed; /* has this been scanned for function usage and such? */
struct linked_list *language_types; /* of |language_type|: used only for C-like languages */
struct linked_list *language_types; /* of [[language_type]]: used only for C-like languages */
struct ebook *as_ebook; /* when being woven to an ebook */
struct pathname *redirect_weaves_to; /* ditto */
@ -47,13 +47,13 @@ typedef struct web {
@ And for a chapter:
=
<<*>>=
typedef struct chapter {
struct chapter_md *md;
struct web *owning_web;
struct linked_list *sections; /* of |section| */
struct linked_list *sections; /* of [[section]] */
struct weave_order *ch_weave; /* |NULL| unless this chapter produces a weave of its own */
struct weave_order *ch_weave; /* [[NULL]] unless this chapter produces a weave of its own */
int titling_line_inserted; /* has an interleaved chapter heading been added yet? */
struct programming_language *ch_language; /* in which this chapter is written */
CLASS_DEFINITION
@ -61,7 +61,7 @@ typedef struct chapter {
@ And lastly for a section.
=
<<*>>=
typedef struct section {
struct section_md *md;
struct web *owning_web;
@ -71,29 +71,29 @@ typedef struct section {
struct text_stream *sect_purpose; /* e.g., "To manage the zoo, and feed all penguins" */
int barred; /* if version 1 syntax, contains a dividing bar? */
struct programming_language *sect_language; /* in which this section is written */
struct tangle_target *sect_target; /* |NULL| unless this section produces a tangle of its own */
struct weave_order *sect_weave; /* |NULL| unless this section produces a weave of its own */
struct tangle_target *sect_target; /* [[NULL]] unless this section produces a tangle of its own */
struct weave_order *sect_weave; /* [[NULL]] unless this section produces a weave of its own */
int sect_extent; /* total number of lines in this section */
struct source_line *first_line; /* for efficiency's sake not held as a |linked_list|, */
struct source_line *first_line; /* for efficiency's sake not held as a [[linked_list]], */
struct source_line *last_line; /* but that's what it is, all the same */
int sect_paragraphs; /* total number of paragraphs in this section */
struct linked_list *paragraphs; /* of |paragraph|: the content of this section */
struct linked_list *paragraphs; /* of [[paragraph]]: the content of this section */
struct theme_tag *tag_with; /* automatically tag paras in this section thus */
struct linked_list *macros; /* of |para_macro|: those defined in this section */
struct linked_list *macros; /* of [[para_macro]]: those defined in this section */
int scratch_flag; /* temporary workspace */
int paused_until_at; /* ignore the top half of the file, until the first |@| sign */
int paused_until_at; /* ignore the top half of the file, until the first [[@]] sign */
int printed_number; /* temporary again: sometimes used in weaving */
CLASS_DEFINITION
} section;
@ The following routine makes the |web|-|chapter|-|section| tree out of a
|web_md|-|chapter_md|-|section_md| tree:
@ The following routine makes the [[web]]-[[chapter]]-[[section]] tree out of a
[[web_md]]-[[chapter_md]]-[[section_md]] tree:
=
<<*>>=
web_md *Reader::load_web_md(pathname *P, filename *alt_F, module_search *I,
int including_modules) {
return WebMetadata::get(P, alt_F, default_inweb_syntax, I, verbose_mode,
@ -107,14 +107,14 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I,
W->md = Reader::load_web_md(P, alt_F, I, including_modules);
tangle_target *main_target = NULL;
@<Write the Inweb Version bibliographic datum@>;
@<Initialise the rest of the web structure@>;
<<Write the Inweb Version bibliographic datum>>;
<<Initialise the rest of the web structure>>;
chapter_md *Cm;
LOOP_OVER_LINKED_LIST(Cm, chapter_md, W->md->chapters_md) {
chapter *C = CREATE(chapter);
C->md = Cm;
C->owning_web = W;
@<Initialise the rest of the chapter structure@>;
<<Initialise the rest of the chapter structure>>;
ADD_TO_LINKED_LIST(C, chapter, W->chapters);
section_md *Sm;
LOOP_OVER_LINKED_LIST(Sm, section_md, Cm->sections_md) {
@ -122,22 +122,22 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I,
S->md = Sm;
S->owning_chapter = C;
S->owning_web = W;
@<Initialise the rest of the section structure@>;
<<Initialise the rest of the section structure>>;
ADD_TO_LINKED_LIST(S, section, C->sections);
}
}
@<Add the imported headers@>;
<<Add the imported headers>>;
return W;
}
@<Write the Inweb Version bibliographic datum@> =
<<Write the Inweb Version bibliographic datum>>=
TEMPORARY_TEXT(IB)
WRITE_TO(IB, "[[Version Number]]");
web_bibliographic_datum *bd = Bibliographic::set_datum(W->md, I"Inweb Version", IB);
bd->declaration_permitted = FALSE;
DISCARD_TEXT(IB)
@<Initialise the rest of the web structure@> =
<<Initialise the rest of the web structure>>=
W->chapters = NEW_LINKED_LIST(chapter);
W->headers = NEW_LINKED_LIST(filename);
W->language_types = NEW_LINKED_LIST(language_type);
@ -152,7 +152,7 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I,
W->main_language = Languages::find_by_name(language_name, W, TRUE);
main_target = Reader::add_tangle_target(W, W->main_language);
@<Initialise the rest of the chapter structure@> =
<<Initialise the rest of the chapter structure>>=
C->ch_weave = NULL;
C->titling_line_inserted = FALSE;
C->sections = NEW_LINKED_LIST(section);
@ -160,7 +160,7 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I,
if (Str::len(Cm->ch_language_name) > 0)
C->ch_language = Languages::find_by_name(Cm->ch_language_name, W, TRUE);
@<Initialise the rest of the section structure@> =
<<Initialise the rest of the section structure>>=
S->sect_extent = 0;
S->first_line = NULL; S->last_line = NULL;
S->sect_paragraphs = 0;
@ -188,12 +188,12 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I,
if (Str::len(Sm->tag_name) > 0)
S->tag_with = Tags::add_by_name(NULL, Sm->tag_name);
@<Add the imported headers@> =
<<Add the imported headers>>=
filename *HF;
LOOP_OVER_LINKED_LIST(HF, filename, W->md->header_filenames)
Reader::add_imported_header(W, HF);
@h Web reading.
@ \section{Web reading.}
All of that ran very quickly, but now things will slow down. The next
function is where the actual contents of a web are read -- which means opening
each section and reading it line by line. We read the complete literate source
@ -201,7 +201,7 @@ of the web into memory, which is profligate, but saves time. Most of the lines
come straight from the source files, but a few chapter heading lines are
inserted if this is a multi-chapter web.
=
<<*>>=
void Reader::read_web(web *W) {
chapter *C;
section *S;
@ -215,7 +215,7 @@ void Reader::read_web(web *W) {
@ Each file, then:
=
<<*>>=
void Reader::read_file(web *W, chapter *C, filename *F, text_stream *titling_line,
section *S, int disregard_top) {
S->owning_chapter = C;
@ -226,38 +226,38 @@ void Reader::read_file(web *W, chapter *C, filename *F, text_stream *titling_lin
if ((titling_line) && (Str::len(titling_line) > 0) &&
(S->owning_chapter->titling_line_inserted == FALSE))
@<Insert an implied chapter heading@>;
<<Insert an implied chapter heading>>;
if (disregard_top)
@<Insert an implied section heading, for a single-file web@>;
<<Insert an implied section heading, for a single-file web>>;
int cl = TextFiles::read(F, FALSE, "can't open section file", TRUE,
Reader::scan_source_line, NULL, (void *) S);
if (verbose_mode) PRINT("Read section: '%S' (%d lines)\n", S->md->sect_title, cl);
}
@<Insert an implied chapter heading@> =
<<Insert an implied chapter heading>>=
S->owning_chapter->titling_line_inserted = TRUE;
TEMPORARY_TEXT(line)
text_file_position *tfp = NULL;
WRITE_TO(line, "Chapter Heading");
@<Accept this as a line belonging to this section and chapter@>;
<<Accept this as a line belonging to this section and chapter>>;
DISCARD_TEXT(line)
@<Insert an implied section heading, for a single-file web@> =
<<Insert an implied section heading, for a single-file web>>=
TEMPORARY_TEXT(line)
text_file_position *tfp = NULL;
WRITE_TO(line, "Main.");
@<Accept this as a line belonging to this section and chapter@>;
<<Accept this as a line belonging to this section and chapter>>;
Str::clear(line);
@<Accept this as a line belonging to this section and chapter@>;
<<Accept this as a line belonging to this section and chapter>>;
text_stream *purpose = Bibliographic::get_datum(W->md, I"Purpose");
if (Str::len(purpose) > 0) {
Str::clear(line);
WRITE_TO(line, "Implied Purpose: %S", purpose);
@<Accept this as a line belonging to this section and chapter@>;
<<Accept this as a line belonging to this section and chapter>>;
Str::clear(line);
@<Accept this as a line belonging to this section and chapter@>;
<<Accept this as a line belonging to this section and chapter>>;
}
DISCARD_TEXT(line)
@ -265,7 +265,7 @@ void Reader::read_file(web *W, chapter *C, filename *F, text_stream *titling_lin
trailing whitespace on a line is not significant in the language being
tangled for.
=
<<*>>=
void Reader::scan_source_line(text_stream *line, text_file_position *tfp, void *state) {
section *S = (section *) state;
int l = Str::len(line) - 1;
@ -275,10 +275,10 @@ void Reader::scan_source_line(text_stream *line, text_file_position *tfp, void *
if (Str::get_at(line, 0) == '@') S->paused_until_at = FALSE;
else return;
}
@<Accept this as a line belonging to this section and chapter@>;
<<Accept this as a line belonging to this section and chapter>>;
}
@<Accept this as a line belonging to this section and chapter@> =
<<Accept this as a line belonging to this section and chapter>>=
source_line *sl = Lines::new_source_line_in(line, tfp, S);
/* enter this in its section's linked list of lines: */
@ -289,10 +289,10 @@ void Reader::scan_source_line(text_stream *line, text_file_position *tfp, void *
/* we haven't detected paragraph boundaries yet, so: */
sl->owning_paragraph = NULL;
@h Woven and Tangled folders.
@ \section{Woven and Tangled folders.}
We abstract these in order to be able to respond well to their not existing:
=
<<*>>=
pathname *Reader::woven_folder(web *W) {
pathname *P = Pathnames::down(W->md->path_to_web, I"Woven");
if (Pathnames::create_in_file_system(P) == FALSE)
@ -306,14 +306,14 @@ pathname *Reader::tangled_folder(web *W) {
return P;
}
@h Looking up chapters and sections.
@ \section{Looking up chapters and sections.}
Given a range, which chapter or section does it correspond to? There is no
need for this to be at all quick: there are fewer than 1000 sections even
in large webs, and lookup is performed only a few times.
Note that range comparison is case sensitive.
=
<<*>>=
chapter *Reader::get_chapter_for_range(web *W, text_stream *range) {
chapter *C;
if (W)
@ -336,7 +336,7 @@ section *Reader::get_section_for_range(web *W, text_stream *range) {
@ This clumsy routine is never used in syntax version 2 or later.
=
<<*>>=
section *Reader::section_by_filename(web *W, text_stream *filename) {
chapter *C;
section *S;
@ -352,13 +352,13 @@ section *Reader::section_by_filename(web *W, text_stream *filename) {
return NULL;
}
@h Ranges and containment.
@ \section{Ranges and containment.}
This provides a sort of partial ordering on ranges, testing if the portion
of the web represented by |range1| is contained inside the portion represented
by |range2|. Note that |"0"| means the entire web, and is what the word |all|
of the web represented by [[range1]] is contained inside the portion represented
by [[range2]]. Note that [["0"]] means the entire web, and is what the word [[all]]
translates to when it's used on the command line.
=
<<*>>=
int Reader::range_within(text_stream *range1, text_stream *range2) {
if (Str::eq_wide_string(range2, L"0")) return TRUE;
if (Str::eq(range1, range2)) return TRUE;
@ -370,7 +370,7 @@ int Reader::range_within(text_stream *range1, text_stream *range2) {
return FALSE;
}
@h Tangle targets.
@ \section{Tangle targets.}
In Knuth's original conception of literate programming, a web produces
just one piece of tangled output -- the program for compilation. But this
assumes that the underlying program is so simple that it won't require
@ -380,14 +380,14 @@ web to contain multiple tangle targets, each of which contains a union of
sections. Each section belongs to exactly one tangle target; by default
a web contains just one target, which contains all of the sections.
=
<<*>>=
typedef struct tangle_target {
struct programming_language *tangle_language; /* common to the entire contents */
struct hash_table symbols; /* a table of identifiable names in this program */
CLASS_DEFINITION
} tangle_target;
@ =
<<*>>=
tangle_target *Reader::add_tangle_target(web *W, programming_language *language) {
tangle_target *tt = CREATE(tangle_target);
tt->tangle_language = language;
@ -397,28 +397,29 @@ tangle_target *Reader::add_tangle_target(web *W, programming_language *language)
}
@ And the following provides a way to iterate through the lines in a tangle,
while keeping the variables |C|, |S| and |L| pointing to the current chapter,
while keeping the variables [[C]], [[S]] and [[L]] pointing to the current chapter,
section and line.
@d LOOP_WITHIN_TANGLE(C, S, T)
<<*>>=
#define LOOP_WITHIN_TANGLE(C, S, T)
LOOP_OVER_LINKED_LIST(C, chapter, W->chapters)
LOOP_OVER_LINKED_LIST(S, section, C->sections)
if (S->sect_target == T)
for (source_line *L = S->first_line; L; L = L->next_line)
@h Additional header files.
@ \section{Additional header files.}
Some C programs, in particular, may need additional header files added to
any tangle in order for them to compile. (The Inform project uses this to
get around the lack of some POSIX facilities on Windows.)
=
<<*>>=
void Reader::add_imported_header(web *W, filename *HF) {
ADD_TO_LINKED_LIST(HF, filename, W->headers);
}
@h Extent.
@ \section{Extent.}
=
<<*>>=
int Reader::web_has_one_section(web *W) {
if (WebMetadata::section_count(W->md) == 1) return TRUE;
return FALSE;
@ -426,7 +427,7 @@ int Reader::web_has_one_section(web *W) {
@ This really serves no purpose, but seems to boost morale.
=
<<*>>=
void Reader::print_web_statistics(web *W) {
PRINT("web \"%S\": ", Bibliographic::get_datum(W->md, I"Title"));
int c = WebMetadata::chapter_count(W->md);