inweb-bootstrap/foundation-module/Chapter_8/Simple_Tangler.nw

324 lines
11 KiB
Text

[SimpleTangler::] Simple Tangler.
Unravelling (a simple version of) Inweb's literate programming notation to
access the tangled content.
@ Suppose we have a simple form of a web, in the sense of Inweb: one which
makes no use of macros, definitions or enumerations.[1] Because the syntax
used is a subset of Inweb syntax, there's no problem weaving such a web:
Inweb can be used for that. But now suppose we want to tangle the web, within
some application. We don't really want to embed the whole of Inweb into such
a program: something much simpler would surely be sufficient. And here it is.
[1] Why might we have this? Because kits of Inter code take this form.
@ The simple tangler is controlled using a parcel of settings. Note also the
[[state]], which is not used by the reader itself, but instead allows the callback
functions to have a shared state of their own.
<<*>>=
typedef struct simple_tangle_docket {
void (*raw_callback)(struct text_stream *, struct simple_tangle_docket *);
void (*command_callback)(struct text_stream *, struct text_stream *,
struct text_stream *, struct simple_tangle_docket *);
void (*bplus_callback)(struct text_stream *, struct simple_tangle_docket *);
void (*error_callback)(char *, struct text_stream *);
void *state;
struct pathname *web_path;
} simple_tangle_docket;
<<*>>=
simple_tangle_docket SimpleTangler::new_docket(
void (*A)(struct text_stream *, struct simple_tangle_docket *),
void (*B)(struct text_stream *, struct text_stream *,
struct text_stream *, struct simple_tangle_docket *),
void (*C)(struct text_stream *, struct simple_tangle_docket *),
void (*D)(char *, struct text_stream *),
pathname *web_path, void *initial_state) {
simple_tangle_docket docket;
docket.raw_callback = A;
docket.command_callback = B;
docket.bplus_callback = C;
docket.error_callback = D;
docket.state = initial_state;
docket.web_path = web_path;
return docket;
}
@ We can tangle either text already in memory, or a file (which the tangler
should open), or a section (which the tangler should find and open), or a
whole web of section files (ditto):
<<*>>=
void SimpleTangler::tangle_text(simple_tangle_docket *docket, text_stream *text) {
SimpleTangler::tangle_L1(docket, text, NULL, NULL, FALSE);
}
void SimpleTangler::tangle_file(simple_tangle_docket *docket, filename *F) {
SimpleTangler::tangle_L1(docket, NULL, F, NULL, FALSE);
}
void SimpleTangler::tangle_section(simple_tangle_docket *docket, text_stream *leafname) {
SimpleTangler::tangle_L1(docket, NULL, NULL, leafname, FALSE);
}
void SimpleTangler::tangle_web(simple_tangle_docket *docket) {
SimpleTangler::tangle_L1(docket, NULL, NULL, NULL, TRUE);
}
<<*>>=
void SimpleTangler::tangle_L1(simple_tangle_docket *docket, text_stream *text,
filename *F, text_stream *leafname, int whole_web) {
TEMPORARY_TEXT(T)
SimpleTangler::tangle_L2(T, text, F, leafname, docket, whole_web);
(*(docket->raw_callback))(T, docket);
DISCARD_TEXT(T)
}
@ First, dispose of the "whole web" possibility.
<<*>>=
void SimpleTangler::tangle_L2(OUTPUT_STREAM, text_stream *text, filename *F,
text_stream *leafname, simple_tangle_docket *docket, int whole_web) {
if (whole_web) {
web_md *Wm = WebMetadata::get(docket->web_path, NULL, V2_SYNTAX, NULL, FALSE, TRUE, NULL);
chapter_md *Cm;
LOOP_OVER_LINKED_LIST(Cm, chapter_md, Wm->chapters_md) {
section_md *Sm;
LOOP_OVER_LINKED_LIST(Sm, section_md, Cm->sections_md) {
filename *SF = Sm->source_file_for_section;
SimpleTangler::tangle_L3(OUT, text, Sm->sect_title, docket, SF);
}
}
} else {
SimpleTangler::tangle_L3(OUT, text, leafname, docket, F);
}
}
@ When tangling a file, we begin in [[comment]] mode; when tangling other matter,
not so much.
<<*>>=
void SimpleTangler::tangle_L3(OUTPUT_STREAM, text_stream *text,
text_stream *leafname, simple_tangle_docket *docket, filename *F) {
int comment = FALSE;
FILE *Input_File = NULL;
if ((Str::len(leafname) > 0) || (F)) {
<<Open the file>>;
comment = TRUE;
}
<<Tangle the material>>;
if (Input_File) fclose(Input_File);
}
@ Note that if we are looking for an explicit section -- say, [[Juggling.i6t]] --
from a web [[W]], we translate that into the path [[W/Sections/Juggling.i6t]].
<<Open the file>>=
if (F) {
Input_File = Filenames::fopen(F, "r");
} else if (Str::len(leafname) > 0) {
pathname *P = Pathnames::down(docket->web_path, I"Sections");
Input_File = Filenames::fopen(Filenames::in(P, leafname), "r");
}
if (Input_File == NULL)
(*(docket->error_callback))("unable to open the file '%S'", leafname);
<<Tangle the material>>=
TEMPORARY_TEXT(command)
TEMPORARY_TEXT(argument)
int skip_part = FALSE, extract = FALSE;
int col = 1, cr, sfp = 0;
do {
Str::clear(command);
Str::clear(argument);
<<Read next character>>;
NewCharacter: if (cr == EOF) break;
if (((cr == '@') || (cr == '=')) && (col == 1)) {
int inweb_syntax = -1;
if (cr == '=') <<Read the rest of line as an equals-heading>>
else <<Read the rest of line as an at-heading>>;
<<Act on the heading, going in or out of comment mode as appropriate>>;
continue;
}
if (comment == FALSE) <<Deal with material which isn't commentary>>;
} while (cr != EOF);
DISCARD_TEXT(command)
DISCARD_TEXT(argument)
@ Our text files are encoded as ISO Latin-1, not as Unicode UTF-8, so ordinary
[[fgetc]] is used, and no BOM marker is parsed. Lines are assumed to be terminated
with either [[0x0a]] or [[0x0d]]. (Since blank lines are harmless, we take no
trouble over [[0a0d]] or [[0d0a]] combinations.) The built-in template files, almost
always the only ones used, are line terminated [[0x0a]] in Unix fashion.
<<Read next character>>=
if (Input_File) cr = fgetc(Input_File);
else if (text) {
cr = Str::get_at(text, sfp); if (cr == 0) cr = EOF; else sfp++;
} else cr = EOF;
col++; if ((cr == 10) || (cr == 13)) col = 0;
@ Here we see the limited range of Inweb syntaxes allowed; but some [[@]] and [[=]]
commands can be used, at least.
<<*>>=
#define INWEB_PARAGRAPH_SYNTAX 1
#define INWEB_CODE_SYNTAX 2
#define INWEB_DASH_SYNTAX 3
#define INWEB_PURPOSE_SYNTAX 4
#define INWEB_FIGURE_SYNTAX 5
#define INWEB_EQUALS_SYNTAX 6
#define INWEB_EXTRACT_SYNTAX 7
<<Read the rest of line as an at-heading>>=
TEMPORARY_TEXT(at_cmd)
int committed = FALSE, unacceptable_character = FALSE;
while (TRUE) {
<<Read next character>>;
if ((committed == FALSE) && ((cr == 10) [[| (cr == 13) |]] (cr == ' '))) {
if (Str::eq_wide_string(at_cmd, L"p"))
inweb_syntax = INWEB_PARAGRAPH_SYNTAX;
else if (Str::eq_wide_string(at_cmd, L"h"))
inweb_syntax = INWEB_PARAGRAPH_SYNTAX;
else if (Str::eq_wide_string(at_cmd, L"c"))
inweb_syntax = INWEB_CODE_SYNTAX;
else if (Str::get_first_char(at_cmd) == '-')
inweb_syntax = INWEB_DASH_SYNTAX;
else if (Str::begins_with_wide_string(at_cmd, L"Purpose:"))
inweb_syntax = INWEB_PURPOSE_SYNTAX;
committed = TRUE;
if (inweb_syntax == -1) {
if (unacceptable_character == FALSE) {
PUT_TO(OUT, '@');
WRITE_TO(OUT, "%S", at_cmd);
PUT_TO(OUT, cr);
break;
} else {
LOG("heading begins: <%S>\n", at_cmd);
(*(docket->error_callback))(
"unknown '@...' marker at column 0: '%S'", at_cmd);
}
}
}
if (!(((cr >= 'A') && (cr <= 'Z')) || ((cr >= 'a') && (cr <= 'z'))
|| ((cr >= '0') && (cr <= '9'))
[[| (cr == '-') || (cr == '>') || (cr == ':') |]] (cr == '_')))
unacceptable_character = TRUE;
if ((cr == 10) || (cr == 13)) break;
PUT_TO(at_cmd, cr);
}
Str::copy(command, at_cmd);
DISCARD_TEXT(at_cmd)
<<Read the rest of line as an equals-heading>>=
TEMPORARY_TEXT(equals_cmd)
while (TRUE) {
<<Read next character>>;
if ((cr == 10) || (cr == 13)) break;
PUT_TO(equals_cmd, cr);
}
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, equals_cmd, L" %(text%c*%) *")) {
inweb_syntax = INWEB_EXTRACT_SYNTAX;
} else if (Regexp::match(&mr, equals_cmd, L" %(figure%c*%) *")) {
inweb_syntax = INWEB_FIGURE_SYNTAX;
} else if (Regexp::match(&mr, equals_cmd, L" %(%c*%) *")) {
(*(docket->error_callback))(
"unsupported '= (...)' marker at column 0", NULL);
} else {
inweb_syntax = INWEB_EQUALS_SYNTAX;
}
Regexp::dispose_of(&mr);
DISCARD_TEXT(equals_cmd)
<<Act on the heading, going in or out of comment mode as appropriate>>=
switch (inweb_syntax) {
case INWEB_PARAGRAPH_SYNTAX: {
TEMPORARY_TEXT(heading_name)
Str::copy_tail(heading_name, command, 2);
int c;
while (((c = Str::get_last_char(heading_name)) != 0) &&
((c == ' ') [[| (c == '\t') |]] (c == '.')))
Str::delete_last_character(heading_name);
if (Str::len(heading_name) == 0)
(*(docket->error_callback))("Empty heading name", NULL);
DISCARD_TEXT(heading_name)
extract = FALSE;
comment = TRUE; skip_part = FALSE;
break;
}
case INWEB_CODE_SYNTAX:
extract = FALSE;
if (skip_part == FALSE) comment = FALSE;
break;
case INWEB_EQUALS_SYNTAX:
if (extract) {
comment = TRUE; extract = FALSE;
} else {
if (skip_part == FALSE) comment = FALSE;
}
break;
case INWEB_EXTRACT_SYNTAX:
comment = TRUE; extract = TRUE;
break;
case INWEB_DASH_SYNTAX: break;
case INWEB_PURPOSE_SYNTAX: break;
case INWEB_FIGURE_SYNTAX: break;
}
<<Deal with material which isn't commentary>>=
if (cr == '{') {
<<Read next character>>;
if ((cr == '-') && (docket->command_callback)) {
<<Read up to the next close brace as a braced command and argument>>;
if (Str::get_first_char(command) == '!') continue;
(*(docket->command_callback))(OUT, command, argument, docket);
continue;
} else { /* otherwise the open brace was a literal */
PUT_TO(OUT, '{');
goto NewCharacter;
}
}
if ((cr == '(') && (docket->bplus_callback)) {
<<Read next character>>;
if (cr == '+') {
<<Read up to the next plus close-bracket as an I7 expression>>;
continue;
} else { /* otherwise the open bracket was a literal */
PUT_TO(OUT, '(');
goto NewCharacter;
}
}
PUT_TO(OUT, cr);
@ And here we read a normal command. The command name must not include [[}]]
or [[:]]. If there is no [[:]] then the argument is left unset (so that it will
be the empty string: see above). The argument must not include [[}]].
<<Read up to the next close brace as a braced command and argument>>=
Str::clear(command);
Str::clear(argument);
int com_mode = TRUE;
while (TRUE) {
<<Read next character>>;
if ((cr == '}') || (cr == EOF)) break;
if ((cr == ':') && (com_mode)) { com_mode = FALSE; continue; }
if (com_mode) PUT_TO(command, cr);
else PUT_TO(argument, cr);
}
@ And similarly, for the [[(+| ... |+)]] notation which was once used to mark
I7 material within I6:
<<Read up to the next plus close-bracket as an I7 expression>>=
TEMPORARY_TEXT(material)
while (TRUE) {
<<Read next character>>;
if (cr == EOF) break;
if ((cr == ')') && (Str::get_last_char(material) == '+')) {
Str::delete_last_character(material); break; }
PUT_TO(material, cr);
}
(*(docket->bplus_callback))(material, docket);
DISCARD_TEXT(material)