inweb-bootstrap/Chapter_2/Line_Categories.nw

172 lines
7.7 KiB
Text
Raw Normal View History

2019-02-04 22:26:45 +00:00
[Lines::] Line Categories.
To store individual lines from webs, and to categorise them according
to their meaning.
2024-03-09 05:41:29 +00:00
@ \section{Line storage.}
2019-02-04 22:26:45 +00:00
In the next section, we'll read in an entire web, building its hierarchical
structure of chapters, sections and eventually paragraphs. But before we do
that, we'll define the structure used to store a single line of the web.
2024-03-09 05:41:29 +00:00
Because Inweb markup makes use of the special characters [[@]] and [[=]] as
2019-02-04 22:26:45 +00:00
dividers, but only in column 1, the important divisions between material
all effectively occur at line boundaries -- this is a major point of
difference with, for example, CWEB, for which the source is just a stream
of characters in which all white space is equivalent. Because Inweb source
is so tidily divisible into lines, we can usefully make each source line
correspond to one of these:
2024-03-09 05:41:29 +00:00
<<*>>=
2019-02-04 22:26:45 +00:00
typedef struct source_line {
struct text_stream *text; /* the text as read in */
struct text_stream *text_operand; /* meaning depends on category */
struct text_stream *text_operand2; /* meaning depends on category */
2024-03-09 05:41:29 +00:00
int category; /* what sort of line this is: an [[*_LCAT]] value */
int command_code; /* used only for [[COMMAND_LCAT| lines: a |*_CMD]] value */
int default_defn; /* used only for [[BEGIN_DEFINITION_LCAT]] lines */
int plainer; /* used only for [[BEGIN_CODE_LCAT]] lines: suppresses box */
int enable_hyperlinks; /* used only for [[CODE_BODY_LCAT]] lines: link URLs in weave */
struct programming_language *colour_as; /* used only for [[TEXT_EXTRACT_LCAT]] lines */
struct text_stream *extract_to; /* used only for [[TEXT_EXTRACT_LCAT]] lines */
2019-02-04 22:26:45 +00:00
int is_commentary; /* flag */
struct language_function *function_defined; /* if any C-like function is defined on this line */
2019-02-04 22:26:45 +00:00
struct preform_nonterminal *preform_nonterminal_defined; /* similarly */
int suppress_tangling; /* if e.g., lines are tangled out of order */
int interface_line_identified; /* only relevant during parsing of Interface lines */
struct footnote *footnote_text; /* which fn this is the text of, if it is at all */
2019-02-04 22:26:45 +00:00
struct text_file_position source; /* which file this was read in from, if any */
struct section *owning_section; /* for interleaved title lines, it's the one about to start */
struct source_line *next_line; /* within the owning section's linked list */
2024-03-09 05:41:29 +00:00
struct paragraph *owning_paragraph; /* for lines falling under paragraphs; [[NULL]] if not */
2019-02-04 22:26:45 +00:00
} source_line;
2024-03-09 05:41:29 +00:00
<<*>>=
2020-04-12 16:24:23 +00:00
source_line *Lines::new_source_line_in(text_stream *line, text_file_position *tfp,
section *S) {
2019-02-04 22:26:45 +00:00
source_line *sl = CREATE(source_line);
sl->text = Str::duplicate(line);
sl->text_operand = Str::new();
sl->text_operand2 = Str::new();
sl->category = NO_LCAT; /* that is, unknown category as yet */
sl->command_code = NO_CMD;
2020-03-28 16:59:54 +00:00
sl->default_defn = FALSE;
sl->plainer = FALSE;
2020-04-09 13:00:28 +00:00
sl->enable_hyperlinks = FALSE;
2020-04-05 22:28:05 +00:00
sl->colour_as = NULL;
2021-08-10 15:15:43 +00:00
sl->extract_to = NULL;
2019-02-04 22:26:45 +00:00
sl->is_commentary = FALSE;
sl->function_defined = NULL;
sl->preform_nonterminal_defined = NULL;
sl->suppress_tangling = FALSE;
sl->interface_line_identified = FALSE;
sl->footnote_text = NULL;
2019-02-04 22:26:45 +00:00
if (tfp) sl->source = *tfp; else sl->source = TextFiles::nowhere();
2020-04-12 16:24:23 +00:00
sl->owning_section = S;
sl->owning_section->sect_extent++;
sl->owning_section->owning_chapter->owning_web->web_extent++;
2019-02-04 22:26:45 +00:00
sl->next_line = NULL;
sl->owning_paragraph = NULL;
return sl;
}
2024-03-09 05:41:29 +00:00
@ \section{Categories.}
2019-02-04 22:26:45 +00:00
The line categories are enumerated as follows. We briefly note what the text
operands (TO and TO2) are set to, if anything: most of the time they're blank.
Note that a few of these categories are needed only for the more cumbersome
2024-03-09 05:41:29 +00:00
version 1 syntax; version 2 removed the need for [[BAR_LCAT]],
[[INTERFACE_BODY_LCAT]], and [[INTERFACE_LCAT]].
<<*>>=
enum NO_LCAT from 0 /* (used when none has been set as yet) */
enum BAR_LCAT /* a bar line [[@---------------]]... */
enum BEGIN_CODE_LCAT /* an [[@c|, |@e| or |@x]] line below which is code, early code or extract */
enum BEGIN_DEFINITION_LCAT /* an [[@d]] definition: TO is term, TO2 is this line's part of defn */
enum C_LIBRARY_INCLUDE_LCAT /* C-like languages only: a [[#include]] for an ANSI C header file */
enum CHAPTER_HEADING_LCAT /* chapter heading line inserted automatically, not read from web */
enum CODE_BODY_LCAT /* the rest of the paragraph under an [[@c| or |@e]] or macro definition */
enum COMMAND_LCAT /* a [[[[Command]]| line, with the operand set to the |*_CMD]] value */
enum COMMENT_BODY_LCAT /* text following a paragraph header, which is all comment */
enum CONT_DEFINITION_LCAT /* subsequent lines of an [[@d]] definition */
enum DEFINITIONS_LCAT /* line holding the [[@Definitions:]] heading */
enum END_EXTRACT_LCAT /* an [[=]] line used to mark the end of an extract */
enum FOOTNOTE_TEXT_LCAT /* the opening of the text of a footnote */
enum HEADING_START_LCAT /* [[@h]] paragraph start: TO is title, TO2 is rest of line */
enum INTERFACE_BODY_LCAT /* line within the interface, under this heading */
enum INTERFACE_LCAT /* line holding the [[@Interface:]] heading */
enum MACRO_DEFINITION_LCAT /* line on which a paragraph macro is defined with an [[=]] sign */
enum PARAGRAPH_START_LCAT /* simple [[@]] paragraph start: TO is blank, TO2 is rest of line */
enum PREFORM_GRAMMAR_LCAT /* InC only: line of Preform grammar */
enum PREFORM_LCAT /* InC only: opening line of a Preform nonterminal */
enum PURPOSE_BODY_LCAT /* continuation lines of purpose declaration */
enum PURPOSE_LCAT /* first line of purpose declaration; TO is rest of line */
enum SECTION_HEADING_LCAT /* section heading line, at top of file */
enum SOURCE_DISPLAY_LCAT /* commentary line beginning [[>>]] for display: TO is display text */
enum TEXT_EXTRACT_LCAT /* the rest of the paragraph under an [[@x]] */
enum TYPEDEF_LCAT /* C-like languages only: a [[typedef]] which isn't a structure definition */
@ We want to print these out nicely for the sake of a [[-scan]] analysis run
2019-02-04 22:26:45 +00:00
of Inweb:
2024-03-09 05:41:29 +00:00
<<*>>=
2019-02-04 22:26:45 +00:00
char *Lines::category_name(int cat) {
switch (cat) {
case NO_LCAT: return "(uncategorised)";
case BAR_LCAT: return "BAR";
case BEGIN_CODE_LCAT: return "BEGIN_CODE";
case BEGIN_DEFINITION_LCAT: return "BEGIN_DEFINITION";
case C_LIBRARY_INCLUDE_LCAT: return "C_LIBRARY_INCLUDE";
case CHAPTER_HEADING_LCAT: return "CHAPTER_HEADING";
case CODE_BODY_LCAT: return "CODE_BODY";
case COMMAND_LCAT: return "COMMAND";
case COMMENT_BODY_LCAT: return "COMMENT_BODY";
case CONT_DEFINITION_LCAT: return "CONT_DEFINITION";
case DEFINITIONS_LCAT: return "DEFINITIONS";
2020-04-06 11:26:10 +00:00
case END_EXTRACT_LCAT: return "END_EXTRACT";
case FOOTNOTE_TEXT_LCAT: return "FOOTNOTE_TEXT";
case HEADING_START_LCAT: return "HEADING_START";
2019-02-04 22:26:45 +00:00
case INTERFACE_BODY_LCAT: return "INTERFACE_BODY";
case INTERFACE_LCAT: return "INTERFACE";
case MACRO_DEFINITION_LCAT: return "MACRO_DEFINITION";
case PARAGRAPH_START_LCAT: return "PARAGRAPH_START";
case PREFORM_GRAMMAR_LCAT: return "PREFORM_GRAMMAR";
case PREFORM_LCAT: return "PREFORM";
case PURPOSE_BODY_LCAT: return "PURPOSE_BODY";
case PURPOSE_LCAT: return "PURPOSE";
case SECTION_HEADING_LCAT: return "SECTION_HEADING";
case SOURCE_DISPLAY_LCAT: return "SOURCE_DISPLAY";
case TEXT_EXTRACT_LCAT: return "TEXT_EXTRACT";
case TYPEDEF_LCAT: return "TYPEDEF";
}
return "(?unknown)";
}
2024-03-09 05:41:29 +00:00
@ \section{Command codes.}
2019-02-04 22:26:45 +00:00
Command-category lines are further divided up into the following. Again,
some of these fell into disuse in version 2 syntax.
2024-03-09 05:41:29 +00:00
<<*>>=
enum NO_CMD from 0
enum PAGEBREAK_CMD
enum GRAMMAR_INDEX_CMD
enum FIGURE_CMD
enum AUDIO_CMD
enum VIDEO_CMD
enum DOWNLOAD_CMD
enum CAROUSEL_CMD
enum CAROUSEL_ABOVE_CMD
enum CAROUSEL_BELOW_CMD
enum CAROUSEL_UNCAPTIONED_CMD
enum CAROUSEL_END_CMD
enum EMBED_CMD
enum TAG_CMD
enum HTML_CMD