Chapter 4: Nowebify.

This commit is contained in:
AwesomeAdam54321 2024-03-09 20:44:19 +08:00
parent 82d8056b15
commit e1ca0836cd
7 changed files with 612 additions and 575 deletions

View file

@ -2,7 +2,7 @@
For generic programming languages by the ACME corporation.
@h One Dozen ACME Explosive Tennis Balls.
@ \section{One Dozen ACME Explosive Tennis Balls.}
Older readers will remember that Wile E. Coyote, when wishing to frustrate
Road Runner with some ingenious device, would invariably buy it from the Acme
Corporation, which manufactured everything imaginable. See Wikipedia, "Acme
@ -12,7 +12,7 @@ For us, ACME is an imaginary programming language, providing generic support
for comments and syntax colouring. Ironically, this code grew out of a language
actually called ACME: the 6502 assembler of the same name.
=
<<*>>=
void ACMESupport::add_fallbacks(programming_language *pl) {
if (Methods::provided(pl->methods, PARSE_TYPES_PAR_MTID) == FALSE)
METHOD_ADD(pl, PARSE_TYPES_PAR_MTID, ACMESupport::parse_types);
@ -54,11 +54,11 @@ void ACMESupport::add_fallbacks(programming_language *pl) {
METHOD_ADD(pl, SYNTAX_COLOUR_WEA_MTID, ACMESupport::syntax_colour);
}
@ This utility does a very limited |WRITE|-like job. (We don't want to use
the actual |WRITE| because that would make it possible for malicious language
@ This utility does a very limited [[WRITE]]-like job. (We don't want to use
the actual [[WRITE]] because that would make it possible for malicious language
files to crash Inweb.)
=
<<*>>=
void ACMESupport::expand(OUTPUT_STREAM, text_stream *prototype, text_stream *S,
int N, filename *F) {
if (Str::len(prototype) > 0) {
@ -80,9 +80,9 @@ void ACMESupport::expand(OUTPUT_STREAM, text_stream *prototype, text_stream *S,
}
}
@h Tangling methods.
@ \section{Tangling methods.}
=
<<*>>=
void ACMESupport::shebang(programming_language *pl, text_stream *OUT, web *W,
tangle_target *target) {
ACMESupport::expand(OUT, pl->shebang, NULL, -1, NULL);
@ -158,20 +158,20 @@ void ACMESupport::comment(programming_language *pl,
}
}
@ In the following, |q_mode| is 0 outside quotes, 1 inside a character literal,
and 2 inside a string literal; |c_mode| is 0 outside comments, 1 inside a line
@ In the following, [[q_mode]] is 0 outside quotes, 1 inside a character literal,
and 2 inside a string literal; [[c_mode]] is 0 outside comments, 1 inside a line
comment, and 2 inside a multiline comment.
=
<<*>>=
int ACMESupport::parse_comment(programming_language *pl,
text_stream *line, text_stream *part_before_comment, text_stream *part_within_comment) {
int q_mode = 0, c_mode = 0, non_white_space = FALSE, c_position = -1, c_end = -1;
for (int i=0; i<Str::len(line); i++) {
wchar_t c = Str::get_at(line, i);
switch (c_mode) {
case 0: @<Outside commentary@>; break;
case 1: @<Inside a line comment@>; break;
case 2: @<Inside a multiline comment@>; break;
case 0: <<Outside commentary>>; break;
case 1: <<Inside a line comment>>; break;
case 2: <<Inside a multiline comment>>; break;
}
}
if (c_mode == 2) c_end = Str::len(line);
@ -188,22 +188,22 @@ int ACMESupport::parse_comment(programming_language *pl,
return FALSE;
}
@<Inside a multiline comment@> =
<<Inside a multiline comment>>=
if (Str::includes_at(line, i, pl->multiline_comment_close)) {
c_mode = 0; c_end = i; i += Str::len(pl->multiline_comment_close) - 1;
}
@<Inside a line comment@> =
<<Inside a line comment>>=
;
@<Outside commentary@> =
<<Outside commentary>>=
switch (q_mode) {
case 0: @<Outside quoted matter@>; break;
case 1: @<Inside a literal character@>; break;
case 2: @<Inside a literal string@>; break;
case 0: <<Outside quoted matter>>; break;
case 1: <<Inside a literal character>>; break;
case 2: <<Inside a literal string>>; break;
}
@<Outside quoted matter@> =
<<Outside quoted matter>>=
if (!(Characters::is_whitespace(c))) non_white_space = TRUE;
if (c == Str::get_first_char(pl->string_literal)) q_mode = 2;
else if (c == Str::get_first_char(pl->character_literal)) q_mode = 1;
@ -225,13 +225,13 @@ int ACMESupport::parse_comment(programming_language *pl,
}
}
@<Inside a literal character@> =
<<Inside a literal character>>=
if (!(Characters::is_whitespace(c))) non_white_space = TRUE;
if (c == Str::get_first_char(pl->character_literal_escape)) i += 1;
if (c == Str::get_first_char(pl->character_literal)) q_mode = 0;
q_mode = 0;
@<Inside a literal string@> =
<<Inside a literal string>>=
if (!(Characters::is_whitespace(c))) non_white_space = TRUE;
if (c == Str::get_first_char(pl->string_literal_escape)) i += 1;
if (c == Str::get_first_char(pl->string_literal)) q_mode = 0;
@ -239,7 +239,7 @@ int ACMESupport::parse_comment(programming_language *pl,
@
=
<<*>>=
void ACMESupport::parse_types(programming_language *self, web *W) {
if (W->main_language->type_notation[0]) {
chapter *C;
@ -258,7 +258,7 @@ void ACMESupport::parse_types(programming_language *self, web *W) {
@
=
<<*>>=
void ACMESupport::parse_functions(programming_language *self, web *W) {
if (W->main_language->function_notation[0]) {
chapter *C;
@ -278,12 +278,12 @@ void ACMESupport::parse_functions(programming_language *self, web *W) {
@ The following is an opportunity for us to scold the author for any
violation of the namespace rules. We're going to look for functions named
|Whatever::name()| whose definitions are not in the |Whatever::| section;
[[Whatever::name()]] whose definitions are not in the [[Whatever::]] section;
in other words, we police the rule that functions actually are defined in the
namespace which their names imply. This can be turned off with a special
bibliographic variable, but don't do that.
=
<<*>>=
void ACMESupport::post_analysis(programming_language *self, web *W) {
int check_namespaces = FALSE;
if (Str::eq_wide_string(Bibliographic::get_datum(W->md, I"Namespaces"), L"On"))
@ -319,7 +319,7 @@ void ACMESupport::post_analysis(programming_language *self, web *W) {
@ Having found all those functions and structure elements, we make sure they
are all known to Inweb's hash table of interesting identifiers:
=
<<*>>=
void ACMESupport::analyse_code(programming_language *self, web *W) {
language_function *fn;
LOOP_OVER(fn, language_function)
@ -337,14 +337,14 @@ void ACMESupport::analyse_code(programming_language *self, web *W) {
@ This is here so that tangling the Standard Rules extension doesn't insert
a spurious comment betraying Inweb's involvement in the process.
=
<<*>>=
int ACMESupport::suppress_disclaimer(programming_language *pl) {
return pl->suppress_disclaimer;
}
@
=
<<*>>=
void ACMESupport::begin_weave(programming_language *pl, section *S, weave_order *wv) {
reserved_word *rw;
LOOP_OVER_LINKED_LIST(rw, reserved_word, pl->reserved_words)
@ -353,7 +353,7 @@ void ACMESupport::begin_weave(programming_language *pl, section *S, weave_order
@ ACME has all of its syntax-colouring done by the default engine:
=
<<*>>=
void ACMESupport::reset_syntax_colouring(programming_language *pl) {
Painter::reset_syntax_colouring(pl);
}

View file

@ -2,10 +2,10 @@
To provide special features for the whole C family of languages.
@h What makes a language C-like?
@ \section{What makes a language C-like?}
This does:
=
<<*>>=
void CLike::make_c_like(programming_language *pl) {
METHOD_ADD(pl, PARSE_TYPES_PAR_MTID, CLike::parse_types);
METHOD_ADD(pl, PARSE_FUNCTIONS_PAR_MTID, CLike::parse_functions);
@ -15,38 +15,38 @@ void CLike::make_c_like(programming_language *pl) {
METHOD_ADD(pl, ADDITIONAL_PREDECLARATIONS_TAN_MTID, CLike::additional_predeclarations);
}
@h Parsing.
@ \section{Parsing.}
After a web has been read in and then parsed, code supporting its language
is then called to do any further parsing it might want to. The code below
is run if the language is "C-like": regular C and InC both qualify.
=
<<*>>=
void CLike::parse_types(programming_language *self, web *W) {
@<Find every typedef struct in the tangle@>;
@<Work out which structs contain which others@>;
<<Find every typedef struct in the tangle>>;
<<Work out which structs contain which others>>;
}
@ We're going to assume that the C source code uses structures looking
something like this:
= (text as C)
typedef struct fruit {
struct pip the_pips[5];
struct fruit *often_confused_with;
struct tree_species *grows_on;
int typical_weight;
} fruit;
=
which adopts the traditional layout conventions of Kernighan and Ritchie.
The structure definitions in this Inweb web all take the required form,
of course, and provide many more examples.
Note that a |fruit| structure contains a |pip| structure (in fact, five of
them), but only contains pointers to |tree_species| structures and itself.
C requires therefore that the structure definition for |pip| must occur
earlier in the code than that for |fruit|. This is a nuisance, so Inweb
Note that a [[fruit]] structure contains a [[pip]] structure (in fact, five of
them), but only contains pointers to [[tree_species]] structures and itself.
C requires therefore that the structure definition for [[pip]] must occur
earlier in the code than that for [[fruit]]. This is a nuisance, so Inweb
takes care of it automatically.
@<Find every typedef struct in the tangle@> =
<<Find every typedef struct in the tangle>>=
language_type *current_str = NULL;
chapter *C;
section *S;
@ -61,7 +61,7 @@ takes care of it automatically.
current_str->typedef_ends = L;
current_str = NULL;
} else if ((current_str) && (current_str->typedef_ends == NULL)) {
@<Work through a line in the structure definition@>;
<<Work through a line in the structure definition>>;
} else if ((Regexp::match(&mr, L->text, L"typedef %c+")) &&
(Regexp::match(&mr, L->text, L"%c+##%c+") == FALSE)) {
if (L->owning_paragraph->placed_very_early == FALSE)
@ -73,24 +73,24 @@ takes care of it automatically.
@ At this point we're reading a line within the structure's definition; for
the sake of an illustrative example, let's suppose that line is:
= (text)
unsigned long long int *val;
=
We need to extract the element name, |val|, and make a note of it.
@<Work through a line in the structure definition@> =
unsigned long long int *val;
We need to extract the element name, [[val]], and make a note of it.
<<Work through a line in the structure definition>>=
TEMPORARY_TEXT(p)
Str::copy(p, L->text);
Str::trim_white_space(p);
@<Remove C type modifiers from the front of p@>;
<<Remove C type modifiers from the front of p>>;
string_position pos = Str::start(p);
if (Str::get(pos) != '/') { /* a slash must introduce a comment here */
@<Move pos past the type name@>;
@<Move pos past any typographical type modifiers@>;
<<Move pos past the type name>>;
<<Move pos past any typographical type modifiers>>;
if (Str::in_range(pos)) {
match_results mr = Regexp::create_mr();
TEMPORARY_TEXT(elname)
@<Copy the element name into elname@>;
<<Copy the element name into elname>>;
Functions::new_element(current_str, elname, L);
DISCARD_TEXT(elname)
Regexp::dispose_of(&mr);
@ -98,9 +98,9 @@ We need to extract the element name, |val|, and make a note of it.
}
DISCARD_TEXT(p)
@ The following reduces |unsigned long long int *val;| to just |int *val;|.
@ The following reduces [[unsigned long long int *val;]] to just [[int *val;]].
@<Remove C type modifiers from the front of p@> =
<<Remove C type modifiers from the front of p>>=
wchar_t *modifier_patterns[] = {
L"(struct )(%C%c*)", L"(signed )(%C%c*)", L"(unsigned )(%C%c*)",
L"(short )(%C%c*)", L"(long )(%C%c*)", L"(static )(%C%c*)", NULL };
@ -115,40 +115,40 @@ We need to extract the element name, |val|, and make a note of it.
}
}
@ At this point |p| has been reduced to |int *val;|, but the following moves
|pos| to point to the |*|:
@ At this point [[p]] has been reduced to [[int *val;]], but the following moves
[[pos]] to point to the [[*]]:
@<Move pos past the type name@> =
<<Move pos past the type name>>=
while ((Str::get(pos)) && (Characters::is_space_or_tab(Str::get(pos)) == FALSE))
pos = Str::forward(pos);
@ And this moves it past the |*| to point to the |v| in |int *val;|:
@ And this moves it past the [[*]] to point to the [[v]] in [[int *val;]]:
@<Move pos past any typographical type modifiers@> =
while ((Characters::is_space_or_tab(Str::get(pos))) || (Str::get(pos) == '*') ||
<<Move pos past any typographical type modifiers>>=
while ((Characters::is_space_or_tab(Str::get(pos))) [[| (Str::get(pos) == '*') |]]
(Str::get(pos) == '(') || (Str::get(pos) == ')')) pos = Str::forward(pos);
@ This then first copies the substring |val;| into |elname|, then cuts that
down to just the identifier characters at the front, i.e., to |val|.
@ This then first copies the substring [[val;]] into [[elname]], then cuts that
down to just the identifier characters at the front, i.e., to [[val]].
@<Copy the element name into elname@> =
<<Copy the element name into elname>>=
Str::substr(elname, pos, Str::end(p));
if (Regexp::match(&mr, elname, L"(%i+)%c*")) Str::copy(elname, mr.exp[0]);
@h Structure dependency.
We say that S depends on T if |struct S| has an element whose type is
|struct T|. That matters because if so then |struct T| has to be defined
before |struct S| in the tangled output.
@ \section{Structure dependency.}
We say that S depends on T if [[struct S]] has an element whose type is
[[struct T]]. That matters because if so then [[struct T]] has to be defined
before [[struct S]] in the tangled output.
It's important to note that [[struct S]] merely having a member of type
[[struct *T| does not create a dependency. In the code below, because [[%i]]
matches only identifier characters and [[*]] is not one of those, a line like
It's important to note that |struct S| merely having a member of type
|struct *T| does not create a dependency. In the code below, because |%i|
matches only identifier characters and |*| is not one of those, a line like
= (text)
struct fruit *often_confused_with;
=
will not trip the switch here.
@<Work out which structs contain which others@> =
<<Work out which structs contain which others>>=
language_type *current_str;
LOOP_OVER(current_str, language_type) {
for (source_line *L = current_str->structure_header_at;
@ -156,12 +156,12 @@ will not trip the switch here.
L = L->next_line) {
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, L->text, L" struct (%i+) %i%c*"))
@<One structure appears to contain a copy of another one@>;
<<One structure appears to contain a copy of another one>>;
Regexp::dispose_of(&mr);
}
}
@<One structure appears to contain a copy of another one@> =
<<One structure appears to contain a copy of another one>>=
text_stream *used_structure = mr.exp[0];
language_type *str;
LOOP_OVER_LINKED_LIST(str, language_type, W->language_types)
@ -169,8 +169,8 @@ will not trip the switch here.
(Str::eq(used_structure, str->structure_name)))
ADD_TO_LINKED_LIST(str, language_type, current_str->incorporates);
@h Functions.
This time, we will need to keep track of |#ifdef| and |#endif| pairs
@ \section{Functions.}
This time, we will need to keep track of [[#ifdef]] and [[#endif]] pairs
in the source. This matters because we will want to predeclare functions;
but if functions are declared in conditional compilation, then their
predeclarations have to be made under the same conditions.
@ -178,9 +178,10 @@ predeclarations have to be made under the same conditions.
The following stack holds the current set of conditional compilations which the
source line being scanned lies within.
@d MAX_CONDITIONAL_COMPILATION_STACK 8
<<*>>=
#define MAX_CONDITIONAL_COMPILATION_STACK 8
=
<<*>>=
int cc_sp = 0;
source_line *cc_stack[MAX_CONDITIONAL_COMPILATION_STACK];
@ -192,14 +193,14 @@ void CLike::parse_functions(programming_language *self, web *W) {
if ((L->category == CODE_BODY_LCAT) ||
(L->category == BEGIN_DEFINITION_LCAT) ||
(L->category == CONT_DEFINITION_LCAT)) {
@<Look for conditional compilation on this line@>;
@<Look for a function definition on this line@>;
<<Look for conditional compilation on this line>>;
<<Look for a function definition on this line>>;
}
if (cc_sp > 0)
Main::error_in_web(I"program ended with conditional compilation open", NULL);
}
@<Look for conditional compilation on this line@> =
<<Look for conditional compilation on this line>>=
match_results mr = Regexp::create_mr();
if ((Regexp::match(&mr, L->text, L" *#ifn*def %c+")) ||
(Regexp::match(&mr, L->text, L" *#IFN*DEF %c+"))) {
@ -217,27 +218,27 @@ void CLike::parse_functions(programming_language *self, web *W) {
}
@ So, then, we recognise a C function as being a line which takes the form
= (text)
type identifier(args...
=
where we parse |type| only minimally. In InC (only), the identifier can
contain namespace dividers written |::|. Function declarations, we will assume,
where we parse [[type]] only minimally. In InC (only), the identifier can
contain namespace dividers written [[::]]. Function declarations, we will assume,
always begin on column 1 of their source files, and we expect them to take
modern ANSI C style, not the long-deprecated late 1970s C style.
@<Look for a function definition on this line@> =
<<Look for a function definition on this line>>=
if (!(Characters::is_space_or_tab(Str::get_first_char(L->text)))) {
TEMPORARY_TEXT(qualifiers)
TEMPORARY_TEXT(modified)
Str::copy(modified, L->text);
@<Parse past any type modifiers@>;
<<Parse past any type modifiers>>;
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, modified, L"(%i+) (%**)(%i+)%((%c*)")) {
TEMPORARY_TEXT(ftype) Str::copy(ftype, mr.exp[0]);
TEMPORARY_TEXT(asts) Str::copy(asts, mr.exp[1]);
TEMPORARY_TEXT(fname) Str::copy(fname, mr.exp[2]);
TEMPORARY_TEXT(arguments) Str::copy(arguments, mr.exp[3]);
@<A function definition was found@>;
<<A function definition was found>>;
DISCARD_TEXT(ftype)
DISCARD_TEXT(asts)
DISCARD_TEXT(fname)
@ -250,9 +251,9 @@ modern ANSI C style, not the long-deprecated late 1970s C style.
@ C has a whole soup of reserved words applying to types, but most of them
can't apply to the return type of a function. We do, however, iterate so that
forms like |static long long int| will work.
forms like [[static long long int]] will work.
@<Parse past any type modifiers@> =
<<Parse past any type modifiers>>=
wchar_t *modifier_patterns[] = {
L"(signed )(%C%c*)", L"(unsigned )(%C%c*)",
L"(short )(%C%c*)", L"(long )(%C%c*)", L"(static )(%C%c*)", NULL };
@ -269,8 +270,8 @@ forms like |static long long int| will work.
Regexp::dispose_of(&mr);
}
@<A function definition was found@> =
@<Soak up further arguments from continuation lines after the declaration@>;
<<A function definition was found>>=
<<Soak up further arguments from continuation lines after the declaration>>;
language_function *fn = Functions::new_function(fname, L);
fn->function_arguments = Str::duplicate(arguments);
WRITE_TO(fn->function_type, "%S%S %S", qualifiers, ftype, asts);
@ -279,17 +280,18 @@ forms like |static long long int| will work.
for (int i=0; i<cc_sp; i++) fn->within_conditionals[i] = cc_stack[i];
@ In some cases the function's declaration runs over several lines:
= (text as code)
void World::Subjects::make_adj_const_domain(inference_subject *infs,|
instance *nc, property *prn) {|
=
Having read the first line, |arguments| would contain |inference_subject *infs,|
Having read the first line, [[arguments]] would contain [[inference_subject *infs,]]
and would thus be incomplete. We continue across subsequent lines until we
reach an open brace |{|.
reach an open brace [[{]].
@d MAX_ARG_LINES 32 /* maximum number of lines over which a function's header can extend */
<<*>>=
#define MAX_ARG_LINES 32 /* maximum number of lines over which a function's header can extend */
@<Soak up further arguments from continuation lines after the declaration@> =
<<Soak up further arguments from continuation lines after the declaration>>=
source_line *AL = L;
int arg_lc = 1;
while ((AL) && (arg_lc <= MAX_ARG_LINES) && (Regexp::find_open_brace(arguments) == -1)) {
@ -307,12 +309,12 @@ reach an open brace |{|.
int n = Regexp::find_open_brace(arguments);
if (n >= 0) Str::truncate(arguments, n);
@h Subcategorisation.
@ \section{Subcategorisation.}
The following is called after the parser gives every line in the web a
category; we can, if we wish, change that for a more exotic one. We simply
look for a |#include| of one of the ANSI C standard libraries.
look for a [[#include]] of one of the ANSI C standard libraries.
=
<<*>>=
void CLike::subcategorise_code(programming_language *self, source_line *L) {
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, L->text, L"#include <(%C+)>%c*")) {
@ -330,18 +332,18 @@ void CLike::subcategorise_code(programming_language *self, source_line *L) {
Regexp::dispose_of(&mr);
}
@h Tangling extras.
@ \section{Tangling extras.}
"Additional early matter" is used for the inclusions of the ANSI library
files. We need to do that early, because otherwise types declared in them
(such as |FILE|) won't exist in time for the structure definitions we will
(such as [[FILE]]) won't exist in time for the structure definitions we will
be tangling next.
It might seem reasonable to move all |#include| files up front this way,
It might seem reasonable to move all [[#include]] files up front this way,
not just the ANSI ones. But that would defeat any conditional compilation
around the inclusions; which Inform (for instance) needs in order to make
platform-specific details to handle directories without POSIX in Windows.
=
<<*>>=
void CLike::additional_early_matter(programming_language *self, text_stream *OUT, web *W, tangle_target *target) {
chapter *C;
section *S;
@ -354,23 +356,23 @@ void CLike::additional_early_matter(programming_language *self, text_stream *OUT
}
}
@h Tangling predeclarations.
@ \section{Tangling predeclarations.}
This is where a language gets the chance to tangle predeclarations, early
on in the file. We use it first for the structures, and then the functions --
in that order since the function types likely involve the typedef names for the
structures.
=
<<*>>=
void CLike::additional_predeclarations(programming_language *self, text_stream *OUT, web *W) {
@<Predeclare the structures in a well-founded order@>;
@<Predeclare simple typedefs@>;
@<Predeclare the functions@>;
<<Predeclare the structures in a well-founded order>>;
<<Predeclare simple typedefs>>;
<<Predeclare the functions>>;
}
@ A "simple typedef" here means one that is aliasing something other than
a structure: for example |typedef unsigned int uint;| would be a simple typedef.
a structure: for example [[typedef unsigned int uint;]] would be a simple typedef.
@<Predeclare simple typedefs@> =
<<Predeclare simple typedefs>>=
chapter *C;
section *S;
LOOP_WITHIN_TANGLE(C, S, Tangler::primary_target(W))
@ -385,11 +387,11 @@ a structure: for example |typedef unsigned int uint;| would be a simple typedef.
precede outer, but we need to be careful to be terminating if the source
code we're given is not well founded because of an error by its programmer:
for example, that structure A contains B contains C contains A. We do this
with the |tangled| flag, which is |FALSE| if a structure hasn't been
started yet, |NOT_APPLICABLE| if it's in progress, and |TRUE| if it's
with the [[tangled]] flag, which is [[FALSE]] if a structure hasn't been
started yet, [[NOT_APPLICABLE]] if it's in progress, and [[TRUE]] if it's
finished.
@<Predeclare the structures in a well-founded order@> =
<<Predeclare the structures in a well-founded order>>=
language_type *str;
LOOP_OVER_LINKED_LIST(str, language_type, W->language_types)
str->tangled = FALSE;
@ -398,7 +400,7 @@ finished.
@ Using the following recursion, which is therefore terminating:
=
<<*>>=
void CLike::tangle_structure(OUTPUT_STREAM, programming_language *self, language_type *str) {
if (str->tangled != FALSE) return;
str->tangled = NOT_APPLICABLE;
@ -417,12 +419,12 @@ void CLike::tangle_structure(OUTPUT_STREAM, programming_language *self, language
}
@ Functions are rather easier to deal with. In general, if a function was
defined within some number of nested |#ifdef| or |#ifndef| directives, then
defined within some number of nested [[#ifdef]] or [[#ifndef]] directives, then
we reproduce those around the predeclaration: except, as a special trick,
if the line contains a particular comment. For example:
= (text)
#ifdef SOLARIS /* inweb: always predeclare */
=
That exempts any functions inside this condition from meeting the condition
in order to be predeclared. It's a trick used in the foundation module just
a couple of times: the idea is that although a definition of the functions
@ -431,7 +433,7 @@ provide alternative function definitions which would work without SOLARIS.
The functions therefore need predeclaration regardless, because they will
exist either way.
@<Predeclare the functions@> =
<<Predeclare the functions>>=
chapter *C;
section *S;
LOOP_WITHIN_TANGLE(C, S, Tangler::primary_target(W))
@ -467,6 +469,6 @@ exist either way.
}
}
@h Overriding regular code weaving.
@ \section{Overriding regular code weaving.}
We have the opportunity here to sidestep the regular weaving algorithm, and do
our own thing. We decline.

View file

@ -2,11 +2,11 @@
To support a modest extension of C called InC.
@h Creation.
@ \section{Creation.}
As can be seen, InC is a basically C-like language, but in addition to having
all of those methods, it has a whole lot more of its own.
=
<<*>>=
void InCSupport::add_features(programming_language *pl) {
METHOD_ADD(pl, FURTHER_PARSING_PAR_MTID, InCSupport::further_parsing);
@ -29,13 +29,13 @@ void InCSupport::add_features(programming_language *pl) {
@ We will apply this special tag wherever Preform grammar is defined:
=
<<*>>=
theme_tag *Preform_theme = NULL;
@h Parsing methods.
@ \section{Parsing methods.}
We only provide one parsing method, but it's a big one:
=
<<*>>=
preform_nonterminal *alphabetical_list_of_nonterminals = NULL;
void InCSupport::further_parsing(programming_language *self, web *W) {
@ -43,46 +43,47 @@ void InCSupport::further_parsing(programming_language *self, web *W) {
section *S;
LOOP_WITHIN_TANGLE(C, S, Tangler::primary_target(W))
if ((L->category == CODE_BODY_LCAT) || (L->category == CONT_DEFINITION_LCAT)) {
@<Detect and deal with Preform grammar@>;
@<Detect and deal with I-literals@>
<<Detect and deal with Preform grammar>>;
<<Detect and deal with I-literals>>
}
}
@h Parsing Preform grammar.
@ \section{Parsing Preform grammar.}
This is where we look for declarations of nonterminals. Very little about
the following code will make sense unless you've first read the Preform
section of the |words| module, which is what we're supporting, and seen
section of the [[words]] module, which is what we're supporting, and seen
some examples of Preform being used in the Inform source code.
In parsing, we categorise the opening lines |PREFORM_LCAT|. Subsequent lines
of grammar are |PREFORM_GRAMMAR_LCAT|; but the lines of InC code inside an
|internal| definition remain just plain |CODE_BODY_LCAT| lines.
In parsing, we categorise the opening lines [[PREFORM_LCAT]]. Subsequent lines
of grammar are [[PREFORM_GRAMMAR_LCAT]]; but the lines of InC code inside an
[[internal]] definition remain just plain [[CODE_BODY_LCAT]] lines.
@d NOT_A_NONTERMINAL -4
@d A_FLEXIBLE_NONTERMINAL -3
@d A_VORACIOUS_NONTERMINAL -2
@d A_GRAMMAR_NONTERMINAL -1
<<*>>=
#define NOT_A_NONTERMINAL -4
#define A_FLEXIBLE_NONTERMINAL -3
#define A_VORACIOUS_NONTERMINAL -2
#define A_GRAMMAR_NONTERMINAL -1
@<Detect and deal with Preform grammar@> =
<<Detect and deal with Preform grammar>>=
int form = NOT_A_NONTERMINAL; /* one of the four values above, or a non-negative word count */
TEMPORARY_TEXT(pntname)
TEMPORARY_TEXT(header)
@<Parse a Preform nonterminal header line@>;
if (form != NOT_A_NONTERMINAL) @<Record a Preform nonterminal here@>;
<<Parse a Preform nonterminal header line>>;
if (form != NOT_A_NONTERMINAL) <<Record a Preform nonterminal here>>;
DISCARD_TEXT(pntname)
DISCARD_TEXT(header)
@ The keyword |internal| can be followed by an indication of the number
@ The keyword [[internal]] can be followed by an indication of the number
of words the nonterminal will match: usually a decimal non-negative number,
but optionally a question mark |?| to indicate voracity.
but optionally a question mark [[?]] to indicate voracity.
@<Parse a Preform nonterminal header line@> =
<<Parse a Preform nonterminal header line>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, L->text, L"(<%p+>) ::=%c*")) {
form = A_GRAMMAR_NONTERMINAL;
Str::copy(pntname, mr.exp[0]);
Str::copy(header, mr.exp[0]);
@<Parse the subsequent lines as Preform grammar@>;
<<Parse the subsequent lines as Preform grammar>>;
} else if (Regexp::match(&mr, L->text, L"((<%p+>) internal %?) {%c*")) {
form = A_VORACIOUS_NONTERMINAL;
Str::copy(pntname, mr.exp[1]);
@ -101,16 +102,16 @@ but optionally a question mark |?| to indicate voracity.
@ Each Preform nonterminal defined in the tangle will cause one of these
structures to be created:
=
<<*>>=
typedef struct preform_nonterminal {
struct text_stream *nt_name; /* e.g., |<action-clause>| */
struct text_stream *unangled_name; /* e.g., |action-clause| */
struct text_stream *as_C_identifier; /* e.g., |action_clause_NTM| */
struct text_stream *nt_name; /* e.g., [[<action-clause>]] */
struct text_stream *unangled_name; /* e.g., [[action-clause]] */
struct text_stream *as_C_identifier; /* e.g., [[action_clause_NTM]] */
int as_function; /* defined internally, that is, parsed by a C language_function */
int voracious; /* a voracious nonterminal: see "The English Syntax of Inform" */
int min_word_count; /* for internals only */
int max_word_count;
int takes_pointer_result; /* right-hand formula defines |*XP|, not |*X| */
int takes_pointer_result; /* right-hand formula defines [[*XP|, not |*X]] */
struct source_line *where_defined;
struct preform_nonterminal *next_pnt_alphabetically;
CLASS_DEFINITION
@ -118,21 +119,21 @@ typedef struct preform_nonterminal {
@ We will
@<Record a Preform nonterminal here@> =
<<Record a Preform nonterminal here>>=
preform_nonterminal *pnt = CREATE(preform_nonterminal);
pnt->where_defined = L;
pnt->nt_name = Str::duplicate(pntname);
pnt->unangled_name = Str::duplicate(pntname);
pnt->as_C_identifier = Str::duplicate(pntname);
pnt->next_pnt_alphabetically = NULL;
@<Apply unangling cream to name@>;
@<Compose a C identifier for the nonterminal@>;
@<Work out the parsing characteristics of the nonterminal@>;
<<Apply unangling cream to name>>;
<<Compose a C identifier for the nonterminal>>;
<<Work out the parsing characteristics of the nonterminal>>;
@<Insertion-sort this this nonterminal into the alphabetical list@>;
@<Register the nonterminal with the line and paragraph from which it comes@>;
<<Insertion-sort this this nonterminal into the alphabetical list>>;
<<Register the nonterminal with the line and paragraph from which it comes>>;
@<Apply unangling cream to name@> =
<<Apply unangling cream to name>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, pntname, L"%<(%c*)%>")) pnt->unangled_name = Str::duplicate(mr.exp[0]);
Regexp::dispose_of(&mr);
@ -142,7 +143,7 @@ will be represented by a pointer to a unique data structure for it. Inweb
automatically compiles code to create these pointers; and here's how it
works out their names.
@<Compose a C identifier for the nonterminal@> =
<<Compose a C identifier for the nonterminal>>=
Str::delete_first_character(pnt->as_C_identifier);
LOOP_THROUGH_TEXT(pos, pnt->as_C_identifier) {
if (Str::get(pos) == '-') Str::put(pos, '_');
@ -155,9 +156,10 @@ de Scudéry, published around 1650, runs to 1,954,300 words. If you can write
an Inform source text 500 times longer than that, then you may need to raise
the following definition:
@d INFINITE_WORD_COUNT 1000000000
<<*>>=
#define INFINITE_WORD_COUNT 1000000000
@<Work out the parsing characteristics of the nonterminal@> =
<<Work out the parsing characteristics of the nonterminal>>=
pnt->voracious = FALSE; if (form == A_VORACIOUS_NONTERMINAL) pnt->voracious = TRUE;
pnt->as_function = TRUE; if (form == A_GRAMMAR_NONTERMINAL) pnt->as_function = FALSE;
@ -174,7 +176,7 @@ the following definition:
pnt->min_word_count = min;
pnt->max_word_count = max;
@<Insertion-sort this this nonterminal into the alphabetical list@> =
<<Insertion-sort this this nonterminal into the alphabetical list>>=
if (alphabetical_list_of_nonterminals == NULL) alphabetical_list_of_nonterminals = pnt;
else {
int placed = FALSE;
@ -197,24 +199,24 @@ the following definition:
if (placed == FALSE) last->next_pnt_alphabetically = pnt;
}
@<Register the nonterminal with the line and paragraph from which it comes@> =
<<Register the nonterminal with the line and paragraph from which it comes>>=
L->preform_nonterminal_defined = pnt;
if (Preform_theme) Tags::add_to_paragraph(L->owning_paragraph, Preform_theme, NULL);
L->category = PREFORM_LCAT;
L->text_operand = Str::duplicate(header);
@h Parsing the body of Preform grammar.
After a line like |<action-clause> ::=|, Preform grammar follows on subsequent
@ \section{Parsing the body of Preform grammar.}
After a line like [[<action-clause> ::=]], Preform grammar follows on subsequent
lines until we hit the end of the paragraph, or a white-space line, whichever
comes first. Each line of grammar is categorised |PREFORM_GRAMMAR_LCAT|.
comes first. Each line of grammar is categorised [[PREFORM_GRAMMAR_LCAT]].
If we have a line with an arrow, like so:
= (text)
porcupine tree ==> { 2, - }{}
=
then the text on the left goes into |text_operand| and the right into
|text_operand2|, with the arrow itself (and white space around it) cut out.
@<Parse the subsequent lines as Preform grammar@> =
porcupine tree ==> { 2, - }{}
then the text on the left goes into [[text_operand]] and the right into
[[text_operand2]], with the arrow itself (and white space around it) cut out.
<<Parse the subsequent lines as Preform grammar>>=
Tags::add_by_name(L->owning_paragraph, I"Preform");
source_line *AL;
for (AL = L; (AL) && (AL->category == CODE_BODY_LCAT); AL = AL->next_line) {
@ -228,29 +230,29 @@ then the text on the left goes into |text_operand| and the right into
AL->text_operand = AL->text;
AL->text_operand2 = Str::new();
}
@<Remove any C comment from the left side of the arrow@>;
@<Detect any nonterminal variables being set on the right side of the arrow@>;
<<Remove any C comment from the left side of the arrow>>;
<<Detect any nonterminal variables being set on the right side of the arrow>>;
Regexp::dispose_of(&mr);
}
@ In case we have a comment at the end of the grammar, like this:
= (text)
porcupine tree /* what happens now? */
=
we want to remove it. The regular expression here isn't terribly legible, but
trust me, it's correct.
@<Remove any C comment from the left side of the arrow@> =
<<Remove any C comment from the left side of the arrow>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, AL->text_operand, L"(%c*)%/%*%c*%*%/ *"))
AL->text_operand = Str::duplicate(mr.exp[0]);
Regexp::dispose_of(&mr);
@ Note that nonterminal variables are, by default, integers. If their names
are divided internally with a colon, however, as |<<structure:name>>|, then
they have the type |structure *|.
are divided internally with a colon, however, as [[<<structure:name>>]], then
they have the type [[structure *]].
@<Detect any nonterminal variables being set on the right side of the arrow@> =
<<Detect any nonterminal variables being set on the right side of the arrow>>=
TEMPORARY_TEXT(to_scan) Str::copy(to_scan, AL->text_operand2);
match_results mr = Regexp::create_mr();
while (Regexp::match(&mr, to_scan, L"%c*?<<(%P+?)>> =(%c*)")) {
@ -265,7 +267,7 @@ they have the type |structure *|.
LOOP_OVER(ntv, nonterminal_variable)
if (Str::eq(ntv->ntv_name, var_given))
break;
if (ntv == NULL) @<This one's new, so create a new nonterminal variable@>;
if (ntv == NULL) <<This one's new, so create a new nonterminal variable>>;
DISCARD_TEXT(var_given)
DISCARD_TEXT(type_given)
}
@ -273,19 +275,19 @@ they have the type |structure *|.
Regexp::dispose_of(&mr);
@ Nonterminal variables are actually just global C variables, and their C
identifiers need to avoid hyphens and colons. For example, |<<kind:ref>>|
has identifier |"kind_ref_NTMV"|. Each one is recorded in a structure thus:
identifiers need to avoid hyphens and colons. For example, [[<<kind:ref>>]]
has identifier [["kind_ref_NTMV"]]. Each one is recorded in a structure thus:
=
<<*>>=
typedef struct nonterminal_variable {
struct text_stream *ntv_name; /* e.g., |"num"| */
struct text_stream *ntv_type; /* e.g., |"int"| */
struct text_stream *ntv_identifier; /* e.g., |"num_NTMV"| */
struct text_stream *ntv_name; /* e.g., [["num"]] */
struct text_stream *ntv_type; /* e.g., [["int"]] */
struct text_stream *ntv_identifier; /* e.g., [["num_NTMV"]] */
struct source_line *first_mention; /* first usage */
CLASS_DEFINITION
} nonterminal_variable;
@<This one's new, so create a new nonterminal variable@> =
<<This one's new, so create a new nonterminal variable>>=
ntv = CREATE(nonterminal_variable);
ntv->ntv_name = Str::duplicate(var_given);
ntv->ntv_type = Str::duplicate(type_given);
@ -296,12 +298,12 @@ typedef struct nonterminal_variable {
WRITE_TO(ntv->ntv_identifier, "%S_NTMV", var_given);
ntv->first_mention = AL;
@h Parsing I-literals.
@ \section{Parsing I-literals.}
A simpler but useful further addition to C is that we recognise a new form
of string literal: |I"quartz"| makes a constant text stream with the content
of string literal: [[I"quartz"]] makes a constant text stream with the content
"quartz".
@<Detect and deal with I-literals@> =
<<Detect and deal with I-literals>>=
for (int i = 0, quoted = FALSE; i < Str::len(L->text); i++) {
if (Str::get_at(L->text, i) == '"')
if ((Str::get_at(L->text, i-1) != '\\') &&
@ -309,10 +311,10 @@ of string literal: |I"quartz"| makes a constant text stream with the content
quoted = quoted?FALSE:TRUE;
if ((fundamental_mode != WEAVE_MODE) && (quoted == FALSE) &&
(Str::get_at(L->text, i) == 'I') && (Str::get_at(L->text, i+1) == '"'))
@<This looks like an I-literal@>;
<<This looks like an I-literal>>;
}
@<This looks like an I-literal@> =
<<This looks like an I-literal>>=
TEMPORARY_TEXT(lit)
int i_was = i;
int ended = FALSE;
@ -321,14 +323,14 @@ of string literal: |I"quartz"| makes a constant text stream with the content
if (Str::get_at(L->text, i) == '"') { ended = TRUE; break; }
PUT_TO(lit, Str::get_at(L->text, i++));
}
if (ended) @<This is definitely an I-literal@>;
if (ended) <<This is definitely an I-literal>>;
DISCARD_TEXT(lit)
@ Each I-literal results in an instance of the following being created. The
I-literal |I"quartz"| would have content |quartz| and identifier something
like |TL_IS_123|.
I-literal [[I"quartz"]] would have content [[quartz]] and identifier something
like [[TL_IS_123]].
=
<<*>>=
typedef struct text_literal {
struct text_stream *tl_identifier;
struct text_stream *tl_content;
@ -336,14 +338,14 @@ typedef struct text_literal {
} text_literal;
@ So suppose we've got a line of web such as
= (text)
text_stream *T = I"quartz";
=
We create the necessary I-literal, and splice the line so that it now reads
|text_stream *T = TL_IS_123;|. (That's why we don't call any of this on a
[[text_stream *T = TL_IS_123;]]. (That's why we don't call any of this on a
weave run; we're actually amending the code of the web.)
@<This is definitely an I-literal@> =
<<This is definitely an I-literal>>=
text_literal *tl = CREATE(text_literal);
tl->tl_identifier = Str::new();
WRITE_TO(tl->tl_identifier, "TL_IS_%d", tl->allocation_id);
@ -360,11 +362,11 @@ weave run; we're actually amending the code of the web.)
DISCARD_TEXT(before)
DISCARD_TEXT(after)
@h Tangling methods.
Suppress the expansion of macros occurring on a line introduced by a |//|
@ \section{Tangling methods.}
Suppress the expansion of macros occurring on a line introduced by a [[//]]
comment. (This avoids problems when tangling code that's been commented out.)
=
<<*>>=
int InCSupport::suppress_expansion(programming_language *self, text_stream *material) {
if ((Str::get_at(material, 0) == '/') && (Str::get_at(material, 1) == '/'))
return TRUE;
@ -372,23 +374,23 @@ int InCSupport::suppress_expansion(programming_language *self, text_stream *mate
}
@ InC does three things which C doesn't: it allows the namespaced function
names like |Section::function()|; it allows Foundation-class-style string
literals marked with an I, |I"like this"|, which we will call I-literals;
names like [[Section::function()]]; it allows Foundation-class-style string
literals marked with an I, [[I"like this"]], which we will call I-literals;
and it allows Preform natural language grammar to be mixed in with code.
The following routine is a hook needed for two of these. It recognises
two special tangling commands:
(a) |[[nonterminals]]| tangles to code which initialises the Preform
(a) [[[[nonterminals]]]] tangles to code which initialises the Preform
grammar. (The grammar defines the meaning of nonterminals such as
|<sentence>|. They're not terminal in the sense that they are defined
[[<sentence>]]. They're not terminal in the sense that they are defined
as combinations of other things.) In practice, this needs to appear once
in any program using Preform. For the Inform project, that's done in the
|words| module of the Inform 7 compiler.
[[words]] module of the Inform 7 compiler.
(b) |[[textliterals]]| tangles to code which initialises the I-literals.
(b) [[[[textliterals]]]] tangles to code which initialises the I-literals.
=
<<*>>=
int InCSupport::special_tangle_command(programming_language *me, OUTPUT_STREAM, text_stream *data) {
if (Str::eq_wide_string(data, L"nonterminals")) {
WRITE("register_tangled_nonterminals();\n");
@ -404,15 +406,15 @@ int InCSupport::special_tangle_command(programming_language *me, OUTPUT_STREAM,
@ Time to predeclare things. InC is going to create a special function, right
at the end of the code, which "registers" the nonterminals, creating their
run-time data structures; we must predeclare this function. It will set values
for the pointers |action_clause_NTM|, and so on; these are global variables,
which we initially declare as |NULL|.
for the pointers [[action_clause_NTM]], and so on; these are global variables,
which we initially declare as [[NULL]].
We also declare the nonterminal variables like |kind_ref_NTMV|, initialising
all integers to zero and all pointers to |NULL|.
We also declare the nonterminal variables like [[kind_ref_NTMV]], initialising
all integers to zero and all pointers to [[NULL]].
We do something similar, but simpler, to declare text stream constants.
=
<<*>>=
void InCSupport::additional_predeclarations(programming_language *self, text_stream *OUT, web *W) {
chapter *C;
section *S;
@ -441,7 +443,7 @@ void InCSupport::additional_predeclarations(programming_language *self, text_str
@ And here are the promised routines, which appear at the very end of the code.
They make use of macros and data structures defined in the Inform 7 web.
=
<<*>>=
void InCSupport::gnabehs(programming_language *self, text_stream *OUT, web *W) {
WRITE("void register_tangled_nonterminals(void) {\n");
chapter *C;
@ -472,10 +474,10 @@ void InCSupport::gnabehs(programming_language *self, text_stream *OUT, web *W) {
@ That's it for big structural additions to the tangled C code. Now we turn
to how to tangle the lines we've given special categories to.
We need to tangle |PREFORM_LCAT| lines (those holding nonterminal declarations)
We need to tangle [[PREFORM_LCAT]] lines (those holding nonterminal declarations)
in a special way...
=
<<*>>=
int InCSupport::will_insert_in_tangle(programming_language *self, source_line *L) {
if (L->category == PREFORM_LCAT) return TRUE;
return FALSE;
@ -483,20 +485,20 @@ int InCSupport::will_insert_in_tangle(programming_language *self, source_line *L
@ ...and this is how. As can be seen, each nonterminal turns into a C function.
In the case of an internal definition, like
= (text)
<k-kind-for-template> internal {
=
we tangle this opening line to
= (text as code)
int k_kind_for_template_NTM(wording W, int *X, void **XP) {
=
that is, to a function which returns |TRUE| if it makes a match on the text
excerpt in Inform's source text, |FALSE| otherwise; if it matches and produces
an integer and/or pointer result, these are copied into |*X| and |*XP|. The
that is, to a function which returns [[TRUE]] if it makes a match on the text
excerpt in Inform's source text, [[FALSE]] otherwise; if it matches and produces
an integer and/or pointer result, these are copied into [[*X]] and [[*XP]]. The
remaining lines of the function are tangled unaltered, i.e., following the
same rules as for the body of any other C function.
=
<<*>>=
void InCSupport::insert_in_tangle(programming_language *self, text_stream *OUT, source_line *L) {
preform_nonterminal *pnt = L->preform_nonterminal_defined;
if (pnt->as_function) {
@ -505,73 +507,73 @@ void InCSupport::insert_in_tangle(programming_language *self, text_stream *OUT,
} else {
WRITE("int %SC(int *X, void **XP, int *R, void **RP, wording *FW, wording W) {\n",
pnt->as_C_identifier);
@<Compile the body of the compositor function@>;
<<Compile the body of the compositor function>>;
WRITE("}\n");
}
}
@ On the other hand, a grammar nonterminal tangles to a "compositor function".
Thus the opening line
= (text)
<action-clause> ::=
=
tangles to a function header:
= (text as code)
int action_clause_NTMC(int *X, void **XP, int *R, void **RP, wording *FW, wording W) {
=
Subsequent lines of the nonterminal are categorised |PREFORM_GRAMMAR_LCAT|
Subsequent lines of the nonterminal are categorised [[PREFORM_GRAMMAR_LCAT]]
and thus won't tangle to code at all, by the usual rules; so we tangle from
them directly here.
Composition is what happens after a successful match of the text in the
word range |W|. The idea is that, especially if the pattern was
word range [[W]]. The idea is that, especially if the pattern was
complicated, we will need to "compose" the results of parsing individual
pieces of it into a result for the whole. These partial results can be found
in the arrays |R[n]| and |RP[n]| passed as parameters; recall that every
in the arrays [[R[n]]] and [[RP[n]]] passed as parameters; recall that every
nonterminal has in principle both an integer and a pointer result, though
often one or both is undefined.
A simple example would be
= (text)
<cardinal-number> + <cardinal-number> ==> R[1] + R[2]
=
where the composition function would be called on a match of, say, "$5 + 7$",
and would find the values 5 and 7 in |R[1]| and |R[2]| respectively. It would
then add these together, store 12 in |*X|, and return |TRUE| to show that all
and would find the values 5 and 7 in [[R[1]]] and [[R[2]]] respectively. It would
then add these together, store 12 in [[*X]], and return [[TRUE]] to show that all
was well.
A more typical example, drawn from the actual Inform 7 web, is:
= (text)
<k-kind-of-kind> <k-formal-variable> ==> { - , Kinds::var_construction(R[2], RP[1]) }
=
which says that the composite result -- the right-hand formula -- is formed by
calling a particular routine on the integer result of subexpression 2
(|<k-formal-variable>|) and the pointer result of subexpression 1
(|<k-kind-of-kind>|). The answer, the composite result, that is, must be
placed in |*X| and |*XP|. (Composition functions are also allowed to
invalidate the result, by returning |FALSE|, and have other tricks up their
([[<k-formal-variable>]]) and the pointer result of subexpression 1
([[<k-kind-of-kind>]]). The answer, the composite result, that is, must be
placed in [[*X]] and [[*XP]]. (Composition functions are also allowed to
invalidate the result, by returning [[FALSE]], and have other tricks up their
sleeves, but none of that is handled by Inweb: see the Inform 7 web for more
on this.)
@<Compile the body of the compositor function@> =
<<Compile the body of the compositor function>>=
int needs_collation = FALSE;
for (source_line *AL = L->next_line;
((AL) && (AL->category == PREFORM_GRAMMAR_LCAT));
AL = AL->next_line)
if (Str::len(AL->text_operand2) > 0)
needs_collation = TRUE;
if (needs_collation) @<At least one of the grammar lines provided an arrow and formula@>
else @<None of the grammar lines provided an arrow and formula@>;
if (needs_collation) <<At least one of the grammar lines provided an arrow and formula>>
else <<None of the grammar lines provided an arrow and formula>>;
WRITE("\treturn TRUE;\n");
@ In the absence of any |==>| formulae, we simply set |*X| to the default
@ In the absence of any [[==>]] formulae, we simply set [[*X]] to the default
result supplied; this is the production number within the grammar (0 for the
first line, 1 for the second, and so on) by default, with an undefined pointer.
@<None of the grammar lines provided an arrow and formula@> =
<<None of the grammar lines provided an arrow and formula>>=
WRITE("\t*X = R[0];\n");
@<At least one of the grammar lines provided an arrow and formula@> =
<<At least one of the grammar lines provided an arrow and formula>>=
WRITE("\tswitch(R[0]) {\n");
int c = 0;
for (source_line *AL = L->next_line;
@ -581,7 +583,7 @@ first line, 1 for the second, and so on) by default, with an undefined pointer.
if (Str::len(formula) > 0) {
LanguageMethods::insert_line_marker(OUT, AL->owning_section->sect_language, AL);
WRITE("\t\tcase %d: ", c);
@<Tangle the formula on the right-hand side of the arrow@>;
<<Tangle the formula on the right-hand side of the arrow>>;
WRITE(";\n");
WRITE("#pragma clang diagnostic push\n");
WRITE("#pragma clang diagnostic ignored \"-Wunreachable-code\"\n");
@ -597,19 +599,19 @@ and that it produces an integer or a pointer according to what the
non-terminal expects as its main result. But we make one exception: if
the formula begins with a paragraph macro, then it can't be an expression,
and instead we read it as code in a void context. (This code will, we
assume, set |*X| and/or |*XP| in some ingenious way of its own.)
assume, set [[*X]] and/or [[*XP]] in some ingenious way of its own.)
Within the body of the formula, we allow a pseudo-macro to work: |WR[n]|
expands to word range |n| in the match which we're compositing. This actually
Within the body of the formula, we allow a pseudo-macro to work: [[WR[n]]]
expands to word range [[n]] in the match which we're compositing. This actually
expands like so:
= (text as code)
action_clause_NTM->range_result[n]
=
which saves a good deal of typing. (A regular C preprocessor macro couldn't
easily do this, because it needs to include the identifier name of the
nonterminal being parsed.)
@<Tangle the formula on the right-hand side of the arrow@> =
<<Tangle the formula on the right-hand side of the arrow>>=
match_results mr = Regexp::create_mr();
if (Regexp::match(&mr, formula, L"{ *(%c*?) *} *(%c*)")) {
TEMPORARY_TEXT(rewritten)
@ -619,7 +621,7 @@ nonterminal being parsed.)
InCSupport::expand_formula(OUT, AL, pnt, mr.exp[1], TRUE);
DISCARD_TEXT(rewritten)
} else {
if (!Regexp::match(&mr, formula, L"@<%c*")) {
if (!Regexp::match(&mr, formula, L"<<%c*")) {
if (pnt->takes_pointer_result) WRITE("*XP = ");
else WRITE("*X = ");
}
@ -629,7 +631,7 @@ nonterminal being parsed.)
@
=
<<*>>=
void InCSupport::expand_formula(text_stream *OUT, source_line *AL, preform_nonterminal *pnt,
text_stream *formula, int full) {
TEMPORARY_TEXT(expanded)
@ -657,7 +659,7 @@ void InCSupport::expand_formula(text_stream *OUT, source_line *AL, preform_nonte
@ Going down from line level to the tangling of little excerpts of C code,
we also provide for some other special extensions to C.
=
<<*>>=
int InCSupport::tangle_line(programming_language *self, text_stream *OUT, text_stream *original) {
InCSupport::tangle_line_inner(OUT, NULL, NULL, original);
return TRUE;
@ -666,13 +668,13 @@ int InCSupport::tangle_line(programming_language *self, text_stream *OUT, text_s
void InCSupport::tangle_line_inner(text_stream *OUT, source_line *AL, preform_nonterminal *pnt, text_stream *original) {
int fcall_pos = -1;
for (int i = 0; i < Str::len(original); i++) {
@<Double-colons are namespace dividers in function names@>;
@<Long arrow and braces assigns Preform results@>;
<<Double-colons are namespace dividers in function names>>;
<<Long arrow and braces assigns Preform results>>;
if (Str::get_at(original, i) == '<') {
if (Str::get_at(original, i+1) == '<') {
@<Double-angles sometimes delimit Preform variable names@>;
<<Double-angles sometimes delimit Preform variable names>>;
} else {
@<Single-angles sometimes delimit Preform nonterminal names@>;
<<Single-angles sometimes delimit Preform nonterminal names>>;
}
}
if (i == fcall_pos) {
@ -683,35 +685,36 @@ void InCSupport::tangle_line_inner(text_stream *OUT, source_line *AL, preform_no
}
}
@ For example, a function name like |Text::Parsing::get_next| must be rewritten
as |Text__Parsing__get_next| since colons aren't valid in C identifiers. The
@ For example, a function name like [[Text::Parsing::get_next]] must be rewritten
as [[Text__Parsing__get_next]] since colons aren't valid in C identifiers. The
following is prone to all kinds of misreadings, of course; it picks up any use
of |::| between an alphanumberic character and a letter. In particular, code
of [[::]] between an alphanumberic character and a letter. In particular, code
like
= (text)
printf("Trying Text::Parsing::get_next now.\n");
=
will be rewritten as
= (text as code)
printf("Trying Text__Parsing__get_next now.\n");
=
This is probably unwanted, but it doesn't matter, because these Inform-only
extension features of Inweb aren't intended for general use: only for
Inform, where no misreadings occur.
@<Double-colons are namespace dividers in function names@> =
<<Double-colons are namespace dividers in function names>>=
if ((i > 0) && (Str::get_at(original, i) == ':') && (Str::get_at(original, i+1) == ':') &&
(isalpha(Str::get_at(original, i+2))) && (isalnum(Str::get_at(original, i-1)))) {
WRITE("__"); i++;
continue;
}
@ For example, |==> { A, B }| assigns the expressions A and B as the results
@ For example, [[==> { A, B }]] assigns the expressions A and B as the results
of parsing a Preform nonterminal.
@d MAX_PREFORM_RESULT_CLAUSES 10
<<*>>=
#define MAX_PREFORM_RESULT_CLAUSES 10
@<Long arrow and braces assigns Preform results@> =
<<Long arrow and braces assigns Preform results>>=
if ((Str::get_at(original, i) == '=') &&
(Str::get_at(original, i+1) == '=') &&
(Str::get_at(original, i+2) == '>') &&
@ -719,11 +722,11 @@ of parsing a Preform nonterminal.
(Str::get_at(original, i+4) == '{')) {
int clauses, err = FALSE;
text_stream *clause[MAX_PREFORM_RESULT_CLAUSES];
@<Find the clauses@>;
<<Find the clauses>>;
TEMPORARY_TEXT(extra)
if (clauses == 1) @<Recognise one-clause specials@>;
if (clauses == 1) <<Recognise one-clause specials>>;
if (clauses < 2) err = TRUE;
if (err == FALSE) @<Write the assignments@>;
if (err == FALSE) <<Write the assignments>>;
if (err) {
Main::error_in_web(I"malformed '{ , }' formula", AL);
if (AL == NULL) WRITE_TO(STDERR, "%S\n", original);
@ -734,7 +737,7 @@ of parsing a Preform nonterminal.
@ The clauses are a comma-separated list inside the braces, except that the
commas need to be outside of any parentheses.
@<Find the clauses@> =
<<Find the clauses>>=
clauses = 1;
clause[0] = Str::new();
int bl = 0;
@ -761,7 +764,7 @@ commas need to be outside of any parentheses.
are implemented by rewriting them in two clauses, and sometimes adding some
extra code to execute after the assignments.
@<Recognise one-clause specials@> =
<<Recognise one-clause specials>>=
if (Str::eq(clause[0], I"fail")) {
clause[1] = Str::new(); clauses = 2;
WRITE_TO(extra, "return FAIL_NONTERMINAL;");
@ -808,10 +811,10 @@ extra code to execute after the assignments.
for the current nonterminal; any subsequent clauses must specify which
variable is to be set. A dash means make no assignment.
For example, |{ R[1], - , <<to>> = R[2] }| sets |*X| to |R[1]|, does not
alter |*XP|, and sets |<<to>>| to |R[2]|.
For example, [[{ R[1], - , <<to>> = R[2] }]] sets [[*X]] to [[R[1]]], does not
alter [[*XP]], and sets [[<<to>>]] to [[R[2]]].
@<Write the assignments@> =
<<Write the assignments>>=
for (int c=0; c<clauses; c++) {
if (Str::ne(clause[c], I"-")) {
switch (c) {
@ -842,11 +845,11 @@ alter |*XP|, and sets |<<to>>| to |R[2]|.
}
@ Angle brackets around a valid Preform variable name expand into its
C identifier; for example, |<<R>>| becomes |most_recent_result|.
We take no action if it's not a valid name, so |<<fish>>| becomes
just |<<fish>>|.
C identifier; for example, [[<<R>>]] becomes [[most_recent_result]].
We take no action if it's not a valid name, so [[<<fish>>]] becomes
just [[<<fish>>]].
@<Double-angles sometimes delimit Preform variable names@> =
<<Double-angles sometimes delimit Preform variable names>>=
match_results mr = Regexp::create_mr();
TEMPORARY_TEXT(check_this)
Str::substr(check_this, Str::at(original, i), Str::end(original));
@ -863,22 +866,22 @@ just |<<fish>>|.
DISCARD_TEXT(check_this)
Regexp::dispose_of(&mr);
@ Similarly for nonterminals; |<k-kind>| might become |k_kind_NTM|.
@ Similarly for nonterminals; [[<k-kind>]] might become [[k_kind_NTM]].
Here, though, there's a complication:
= (text)
if (<k-kind>(W)) { ...
=
must expand to:
= (text as code)
if (Text__Languages__parse_nt_against_word_range(k_kind_NTM, W, NULL, NULL)) { ...
=
This is all syntactic sugar to make it easier to see parsing in action.
Anyway, it means we have to set |fcall_pos| to remember to add in the
two |NULL| arguments when we hit the |)| a little later. We're doing all
Anyway, it means we have to set [[fcall_pos]] to remember to add in the
two [[NULL]] arguments when we hit the [[)]] a little later. We're doing all
of this fairly laxly, but as before: it only needs to work for Inform,
and Inform doesn't cause any trouble.
@<Single-angles sometimes delimit Preform nonterminal names@> =
<<Single-angles sometimes delimit Preform nonterminal names>>=
match_results mr = Regexp::create_mr();
TEMPORARY_TEXT(check_this)
Str::substr(check_this, Str::at(original, i), Str::end(original));
@ -912,7 +915,7 @@ name. They're not very efficient, but experience shows that even on a web
the size of Inform 7, there's no significant gain from speeding them up
(with, say, a hash table).
=
<<*>>=
preform_nonterminal *InCSupport::nonterminal_by_name(text_stream *name) {
preform_nonterminal *pnt;
LOOP_OVER(pnt, preform_nonterminal)
@ -921,12 +924,12 @@ preform_nonterminal *InCSupport::nonterminal_by_name(text_stream *name) {
return NULL;
}
@ The special variables |<<R>>| and |<<RP>>| hold the results,
@ The special variables [[<<R>>]] and [[<<RP>>]] hold the results,
integer and pointer, for the most recent successful match. They're defined
in the Inform 7 web (see the code for parsing text against Preform grammars),
not by Inweb.
=
<<*>>=
text_stream *InCSupport::nonterminal_variable_identifier(text_stream *name) {
if (Str::eq_wide_string(name, L"r")) return I"most_recent_result";
if (Str::eq_wide_string(name, L"rp")) return I"most_recent_result_p";
@ -944,10 +947,10 @@ simply thrown away. It doesn't appear anywhere in the C code tangled by
Inweb.
So what does happen to it? The answer is that it's transcribed into an
auxiliary file called |Syntax.preform|, which Inform, once it is compiled,
auxiliary file called [[Syntax.preform]], which Inform, once it is compiled,
will read in at run-time. This is how that happens:
=
<<*>>=
void InCSupport::additional_tangling(programming_language *self, web *W, tangle_target *target) {
if (NUMBER_CREATED(preform_nonterminal) > 0) {
pathname *P = Reader::tangled_folder(W);
@ -965,7 +968,7 @@ void InCSupport::additional_tangling(programming_language *self, web *W, tangle_
if (Bibliographic::data_exists(W->md, I"Preform Language"))
WRITE("language %S\n", Bibliographic::get_datum(W->md, I"Preform Language"));
@<Actually write out the Preform syntax@>;
<<Actually write out the Preform syntax>>;
STREAM_CLOSE(OUT);
}
}
@ -976,13 +979,13 @@ right-hand side of the arrow in a grammar line uses a paragraph macro which
mentions a problem message, then we transcribe a Preform comment to that
effect. (This really is a comment: Inform ignores it, but it makes the
file more comprehensible to human eyes.) For example,
= (text)
<article> kind ==> @<Issue C8PropertyOfKind problem@>
=
<article> kind ==> <<Issue C8PropertyOfKind problem>>
(The code in this paragraph macro will indeed issue this problem message, we
assume.)
@<Actually write out the Preform syntax@> =
<<Actually write out the Preform syntax>>=
chapter *C;
section *S;
LOOP_WITHIN_TANGLE(C, S, target)
@ -1004,13 +1007,13 @@ assume.)
}
}
@h Weaving.
@ \section{Weaving.}
The following isn't a method, but is called by the weaver directly. It adds
additional endnotes to the woven form of a paragraph which includes Preform
nonterminal definitions; it is meaningful only in the TeX format, and should
probably be dropped.
=
<<*>>=
void InCSupport::weave_grammar_index(OUTPUT_STREAM) {
WRITE("\\raggedright\\tolerance=10000");
preform_nonterminal *pnt;
@ -1022,8 +1025,8 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) {
(pnt->as_function)?" (internal)":"",
pnt->where_defined->owning_section->md->sect_range);
int said_something = FALSE;
@<List where the nonterminal appears in other Preform declarations@>;
@<List where the nonterminal is called from Inform code@>;
<<List where the nonterminal appears in other Preform declarations>>;
<<List where the nonterminal is called from Inform code>>;
if (said_something == FALSE)
WRITE("\\par\\hangindent=3em{\\it unused}\n\n");
}
@ -1032,7 +1035,7 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) {
WRITE("\\hrule\\smallbreak\n");
}
@<List where the nonterminal is called from Inform code@> =
<<List where the nonterminal is called from Inform code>>=
section *S;
LOOP_OVER(S, section) S->scratch_flag = FALSE;
hash_table_entry *hte = Analyser::find_hash_entry_for_section(
@ -1057,7 +1060,7 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) {
WRITE("\n\n");
}
@<List where the nonterminal appears in other Preform declarations@> =
<<List where the nonterminal appears in other Preform declarations>>=
section *S;
LOOP_OVER(S, section) S->scratch_flag = FALSE;
hash_table_entry *hte = Analyser::find_hash_entry_for_section(
@ -1082,11 +1085,11 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) {
WRITE("\n\n");
}
@h Weaving methods.
@ \section{Weaving methods.}
If we're weaving just a document of Preform grammar, then we skip any lines
of C code which appear in |internal| nonterminal definitions:
of C code which appear in [[internal]] nonterminal definitions:
=
<<*>>=
int skipping_internal = FALSE, preform_production_count = 0;
int InCSupport::skip_in_weaving(programming_language *self, weave_order *wv, source_line *L) {
@ -1103,7 +1106,7 @@ int InCSupport::skip_in_weaving(programming_language *self, weave_order *wv, sou
@ And here is the TeX code for displaying Preform grammar:
=
<<*>>=
int InCSupport::weave_code_line(programming_language *self, text_stream *OUT,
weave_order *wv, web *W, chapter *C, section *S, source_line *L,
text_stream *matter, text_stream *concluding_comment) {
@ -1114,17 +1117,17 @@ int InCSupport::weave_code_line(programming_language *self, text_stream *OUT,
}
@ In paragraphs where we spot Preform nonterminals being defined, we're
going to automatically apply the tag |^"Preform"|, but only if it already
going to automatically apply the tag [[^"Preform"]], but only if it already
exists. We watch for it here:
=
<<*>>=
void InCSupport::new_tag_declared(programming_language *self, theme_tag *tag) {
if (Str::eq_wide_string(tag->tag_name, L"Preform")) Preform_theme = tag;
}
@h Analysis methods.
@ \section{Analysis methods.}
=
<<*>>=
void InCSupport::analyse_code(programming_language *self, web *W) {
preform_nonterminal *pnt;
LOOP_OVER(pnt, preform_nonterminal)

View file

@ -3,12 +3,12 @@
To characterise the relevant differences in behaviour between the
various programming languages supported.
@h Introduction.
@ \section{Introduction.}
The conventions for writing, weaving and tangling a web are really quite
independent of the programming language being written, woven or tangled;
Knuth began literate programming with Pascal, but now uses C, and the original
Pascal webs were mechanically translated into C ones with remarkably little
fuss or bother. Modern LP tools, such as |noweb|, aim to be language-agnostic.
fuss or bother. Modern LP tools, such as [[noweb]], aim to be language-agnostic.
But of course if you act the same on all languages, you give up the benefits
which might follow from knowing something about the languages you actually
write in.
@ -23,64 +23,69 @@ all of them made from this section. That means a lot of simple wrapper routines
which don't do very much. This section may still be useful to read, since it
documents what amounts to an API.
@h Parsing methods.
@ \section{Parsing methods.}
We begin with parsing extensions. When these are used, we have already read
the web into chapters, sections and paragraphs, but for some languages we will
need a more detailed picture.
|PARSE_TYPES_PAR_MTID| gives a language to look for type declarations.
[[PARSE_TYPES_PAR_MTID]] gives a language to look for type declarations.
@e PARSE_TYPES_PAR_MTID
<<*>>=
enum PARSE_TYPES_PAR_MTID
=
<<*>>=
VOID_METHOD_TYPE(PARSE_TYPES_PAR_MTID, programming_language *pl, web *W)
void LanguageMethods::parse_types(web *W, programming_language *pl) {
VOID_METHOD_CALL(pl, PARSE_TYPES_PAR_MTID, W);
}
@ |PARSE_FUNCTIONS_PAR_MTID| is, similarly, for function declarations.
@ [[PARSE_FUNCTIONS_PAR_MTID]] is, similarly, for function declarations.
@e PARSE_FUNCTIONS_PAR_MTID
<<*>>=
enum PARSE_FUNCTIONS_PAR_MTID
=
<<*>>=
VOID_METHOD_TYPE(PARSE_FUNCTIONS_PAR_MTID, programming_language *pl, web *W)
void LanguageMethods::parse_functions(web *W, programming_language *pl) {
VOID_METHOD_CALL(pl, PARSE_FUNCTIONS_PAR_MTID, W);
}
@ |FURTHER_PARSING_PAR_MTID| is "further" in that it is called when the main
@ [[FURTHER_PARSING_PAR_MTID]] is "further" in that it is called when the main
parser has finished work; it typically looks over the whole web for something
of interest.
@e FURTHER_PARSING_PAR_MTID
<<*>>=
enum FURTHER_PARSING_PAR_MTID
=
<<*>>=
VOID_METHOD_TYPE(FURTHER_PARSING_PAR_MTID, programming_language *pl, web *W)
void LanguageMethods::further_parsing(web *W, programming_language *pl) {
VOID_METHOD_CALL(pl, FURTHER_PARSING_PAR_MTID, W);
}
@ |SUBCATEGORISE_LINE_PAR_MTID| looks at a single line, after the main parser
@ [[SUBCATEGORISE_LINE_PAR_MTID]] looks at a single line, after the main parser
has given it a category. The idea is not so much to second-guess the parser
(although we can) but to change to a more exotic category which it would
otherwise never produce.
@e SUBCATEGORISE_LINE_PAR_MTID
<<*>>=
enum SUBCATEGORISE_LINE_PAR_MTID
=
<<*>>=
VOID_METHOD_TYPE(SUBCATEGORISE_LINE_PAR_MTID, programming_language *pl, source_line *L)
void LanguageMethods::subcategorise_line(programming_language *pl, source_line *L) {
VOID_METHOD_CALL(pl, SUBCATEGORISE_LINE_PAR_MTID, L);
}
@ Comments have different syntax in different languages. The method here is
expected to look for a comment on the |line|, and if so to return |TRUE|,
expected to look for a comment on the [[line]], and if so to return [[TRUE]],
but not before splicing the non-comment parts of the line before and
within the comment into the supplied strings.
@e PARSE_COMMENT_TAN_MTID
<<*>>=
enum PARSE_COMMENT_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(PARSE_COMMENT_TAN_MTID, programming_language *pl, text_stream *line, text_stream *before, text_stream *within)
int LanguageMethods::parse_comment(programming_language *pl,
@ -90,17 +95,18 @@ int LanguageMethods::parse_comment(programming_language *pl,
return rv;
}
@h Tangling methods.
@ \section{Tangling methods.}
We take these roughly in order of their effects on the tangled output, from
the top to the bottom of the file.
The top of the tangled file is a header called the "shebang". By default,
there's nothing there, but |SHEBANG_TAN_MTID| allows the language to add one.
For example, Perl prints |#!/usr/bin/perl| here.
there's nothing there, but [[SHEBANG_TAN_MTID]] allows the language to add one.
For example, Perl prints [[#!/usr/bin/perl]] here.
@e SHEBANG_TAN_MTID
<<*>>=
enum SHEBANG_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(SHEBANG_TAN_MTID, programming_language *pl, text_stream *OUT, web *W, tangle_target *target)
void LanguageMethods::shebang(OUTPUT_STREAM, programming_language *pl, web *W, tangle_target *target) {
VOID_METHOD_CALL(pl, SHEBANG_TAN_MTID, OUT, W, target);
@ -109,9 +115,10 @@ void LanguageMethods::shebang(OUTPUT_STREAM, programming_language *pl, web *W, t
@ Next is the disclaimer, text warning the human reader that she is looking
at tangled (therefore not original) material.
@e SUPPRESS_DISCLAIMER_TAN_MTID
<<*>>=
enum SUPPRESS_DISCLAIMER_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(SUPPRESS_DISCLAIMER_TAN_MTID, programming_language *pl)
void LanguageMethods::disclaimer(text_stream *OUT, programming_language *pl, web *W, tangle_target *target) {
int rv = FALSE;
@ -123,25 +130,27 @@ void LanguageMethods::disclaimer(text_stream *OUT, programming_language *pl, web
@ Next is the disclaimer, text warning the human reader that she is looking
at tangled (therefore not original) material.
@e ADDITIONAL_EARLY_MATTER_TAN_MTID
<<*>>=
enum ADDITIONAL_EARLY_MATTER_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(ADDITIONAL_EARLY_MATTER_TAN_MTID, programming_language *pl, text_stream *OUT, web *W, tangle_target *target)
void LanguageMethods::additional_early_matter(text_stream *OUT, programming_language *pl, web *W, tangle_target *target) {
VOID_METHOD_CALL(pl, ADDITIONAL_EARLY_MATTER_TAN_MTID, OUT, W, target);
}
@ A tangled file then normally declares "definitions". The following write a
definition of the constant named |term| as the value given. If the value spans
multiple lines, the first-line part is supplied to |START_DEFN_TAN_MTID| and
then subsequent lines are fed in order to |PROLONG_DEFN_TAN_MTID|. At the end,
|END_DEFN_TAN_MTID| is called.
definition of the constant named [[term]] as the value given. If the value spans
multiple lines, the first-line part is supplied to [[START_DEFN_TAN_MTID]] and
then subsequent lines are fed in order to [[PROLONG_DEFN_TAN_MTID]]. At the end,
[[END_DEFN_TAN_MTID]] is called.
@e START_DEFN_TAN_MTID
@e PROLONG_DEFN_TAN_MTID
@e END_DEFN_TAN_MTID
<<*>>=
enum START_DEFN_TAN_MTID
enum PROLONG_DEFN_TAN_MTID
enum END_DEFN_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(START_DEFN_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *term, text_stream *start, section *S, source_line *L)
INT_METHOD_TYPE(PROLONG_DEFN_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *more, section *S, source_line *L)
INT_METHOD_TYPE(END_DEFN_TAN_MTID, programming_language *pl, text_stream *OUT, section *S, source_line *L)
@ -171,9 +180,10 @@ void LanguageMethods::end_definition(OUTPUT_STREAM, programming_language *pl,
@ Then we have some "predeclarations"; for example, for C-like languages we
automatically predeclare all functions, obviating the need for header files.
@e ADDITIONAL_PREDECLARATIONS_TAN_MTID
<<*>>=
enum ADDITIONAL_PREDECLARATIONS_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(ADDITIONAL_PREDECLARATIONS_TAN_MTID, programming_language *pl, text_stream *OUT, web *W)
void LanguageMethods::additional_predeclarations(OUTPUT_STREAM, programming_language *pl, web *W) {
VOID_METHOD_CALL(pl, ADDITIONAL_PREDECLARATIONS_TAN_MTID, OUT, W);
@ -184,9 +194,10 @@ the more routine matter, tangling ordinary paragraphs into code.
Languages have the ability to suppress paragraph macro expansion:
@e SUPPRESS_EXPANSION_TAN_MTID
<<*>>=
enum SUPPRESS_EXPANSION_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(SUPPRESS_EXPANSION_TAN_MTID, programming_language *pl, text_stream *material)
int LanguageMethods::allow_expansion(programming_language *pl, text_stream *material) {
int rv = FALSE;
@ -195,12 +206,13 @@ int LanguageMethods::allow_expansion(programming_language *pl, text_stream *mate
}
@ Inweb supports very few "tangle commands", that is, instructions written
inside double squares |[[Thus]]|. These can be handled by attaching methods
as follows, which return |TRUE| if they recognised and acted on the command.
inside double squares [[[[Thus]]]]. These can be handled by attaching methods
as follows, which return [[TRUE]] if they recognised and acted on the command.
@e TANGLE_COMMAND_TAN_MTID
<<*>>=
enum TANGLE_COMMAND_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(TANGLE_COMMAND_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *data)
int LanguageMethods::special_tangle_command(OUTPUT_STREAM, programming_language *pl, text_stream *data) {
@ -210,14 +222,15 @@ int LanguageMethods::special_tangle_command(OUTPUT_STREAM, programming_language
}
@ The following methods make it possible for languages to tangle unorthodox
lines into code. Ordinarily, only |CODE_BODY_LCAT| lines are tangled, but
lines into code. Ordinarily, only [[CODE_BODY_LCAT]] lines are tangled, but
we can intervene to say that we want to tangle a different line; and if we
do so, we should then act on that basis.
@e WILL_TANGLE_EXTRA_LINE_TAN_MTID
@e TANGLE_EXTRA_LINE_TAN_MTID
<<*>>=
enum WILL_TANGLE_EXTRA_LINE_TAN_MTID
enum TANGLE_EXTRA_LINE_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(WILL_TANGLE_EXTRA_LINE_TAN_MTID, programming_language *pl, source_line *L)
VOID_METHOD_TYPE(TANGLE_EXTRA_LINE_TAN_MTID, programming_language *pl, text_stream *OUT, source_line *L)
int LanguageMethods::will_insert_in_tangle(programming_language *pl, source_line *L) {
@ -231,12 +244,13 @@ void LanguageMethods::insert_in_tangle(OUTPUT_STREAM, programming_language *pl,
@ In order for C compilers to report C syntax errors on the correct line,
despite rearranging by automatic tools, C conventionally recognises the
preprocessor directive |#line| to tell it that a contiguous extract follows
preprocessor directive [[#line]] to tell it that a contiguous extract follows
from the given file; we generate this automatically.
@e INSERT_LINE_MARKER_TAN_MTID
<<*>>=
enum INSERT_LINE_MARKER_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(INSERT_LINE_MARKER_TAN_MTID, programming_language *pl, text_stream *OUT, source_line *L)
void LanguageMethods::insert_line_marker(OUTPUT_STREAM, programming_language *pl, source_line *L) {
VOID_METHOD_CALL(pl, INSERT_LINE_MARKER_TAN_MTID, OUT, L);
@ -244,12 +258,13 @@ void LanguageMethods::insert_line_marker(OUTPUT_STREAM, programming_language *pl
@ The following hooks are provided so that we can top and/or tail the expansion
of paragraph macros in the code. For example, C-like languages, use this to
splice |{| and |}| around the expanded matter.
splice [[{]] and [[}]] around the expanded matter.
@e BEFORE_MACRO_EXPANSION_TAN_MTID
@e AFTER_MACRO_EXPANSION_TAN_MTID
<<*>>=
enum BEFORE_MACRO_EXPANSION_TAN_MTID
enum AFTER_MACRO_EXPANSION_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(BEFORE_MACRO_EXPANSION_TAN_MTID, programming_language *pl, text_stream *OUT, para_macro *pmac)
VOID_METHOD_TYPE(AFTER_MACRO_EXPANSION_TAN_MTID, programming_language *pl, text_stream *OUT, para_macro *pmac)
void LanguageMethods::before_macro_expansion(OUTPUT_STREAM, programming_language *pl, para_macro *pmac) {
@ -261,12 +276,13 @@ void LanguageMethods::after_macro_expansion(OUTPUT_STREAM, programming_language
@ It's a sad necessity, but sometimes we have to unconditionally tangle code
for a preprocessor to conditionally read: that is, to tangle code which contains
|#ifdef| or similar preprocessor directive.
[[#ifdef]] or similar preprocessor directive.
@e OPEN_IFDEF_TAN_MTID
@e CLOSE_IFDEF_TAN_MTID
<<*>>=
enum OPEN_IFDEF_TAN_MTID
enum CLOSE_IFDEF_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(OPEN_IFDEF_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *symbol, int sense)
VOID_METHOD_TYPE(CLOSE_IFDEF_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *symbol, int sense)
void LanguageMethods::open_ifdef(OUTPUT_STREAM, programming_language *pl, text_stream *symbol, int sense) {
@ -278,9 +294,10 @@ void LanguageMethods::close_ifdef(OUTPUT_STREAM, programming_language *pl, text_
@ Now a routine to tangle a comment. Languages without comment should write nothing.
@e COMMENT_TAN_MTID
<<*>>=
enum COMMENT_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(COMMENT_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *comm)
void LanguageMethods::comment(OUTPUT_STREAM, programming_language *pl, text_stream *comm) {
VOID_METHOD_CALL(pl, COMMENT_TAN_MTID, OUT, comm);
@ -288,11 +305,12 @@ void LanguageMethods::comment(OUTPUT_STREAM, programming_language *pl, text_stre
@ The inner code tangler now acts on all code known not to contain CWEB
macros or double-square substitutions. In almost every language this simply
passes the code straight through, printing |original| to |OUT|.
passes the code straight through, printing [[original]] to [[OUT]].
@e TANGLE_LINE_UNUSUALLY_TAN_MTID
<<*>>=
enum TANGLE_LINE_UNUSUALLY_TAN_MTID
=
<<*>>=
INT_METHOD_TYPE(TANGLE_LINE_UNUSUALLY_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *original)
void LanguageMethods::tangle_line(OUTPUT_STREAM, programming_language *pl, text_stream *original) {
int rv = FALSE;
@ -302,9 +320,10 @@ void LanguageMethods::tangle_line(OUTPUT_STREAM, programming_language *pl, text_
@ We finally reach the bottom of the tangled file, a footer called the "gnabehs":
@e GNABEHS_TAN_MTID
<<*>>=
enum GNABEHS_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(GNABEHS_TAN_MTID, programming_language *pl, text_stream *OUT, web *W)
void LanguageMethods::gnabehs(OUTPUT_STREAM, programming_language *pl, web *W) {
VOID_METHOD_CALL(pl, GNABEHS_TAN_MTID, OUT, W);
@ -314,21 +333,23 @@ void LanguageMethods::gnabehs(OUTPUT_STREAM, programming_language *pl, web *W) {
sidekick files alongside the main tangle file. This method exists to give
them the opportunity.
@e ADDITIONAL_TANGLING_TAN_MTID
<<*>>=
enum ADDITIONAL_TANGLING_TAN_MTID
=
<<*>>=
VOID_METHOD_TYPE(ADDITIONAL_TANGLING_TAN_MTID, programming_language *pl, web *W, tangle_target *target)
void LanguageMethods::additional_tangling(programming_language *pl, web *W, tangle_target *target) {
VOID_METHOD_CALL(pl, ADDITIONAL_TANGLING_TAN_MTID, W, target);
}
@h Weaving methods.
@ \section{Weaving methods.}
This metnod shouldn't do any actual weaving: it should simply initialise
anything that the language in question might need later.
@e BEGIN_WEAVE_WEA_MTID
<<*>>=
enum BEGIN_WEAVE_WEA_MTID
=
<<*>>=
VOID_METHOD_TYPE(BEGIN_WEAVE_WEA_MTID, programming_language *pl, section *S, weave_order *wv)
void LanguageMethods::begin_weave(section *S, weave_order *wv) {
VOID_METHOD_CALL(S->sect_language, BEGIN_WEAVE_WEA_MTID, S, wv);
@ -336,9 +357,10 @@ void LanguageMethods::begin_weave(section *S, weave_order *wv) {
@ This method allows languages to tell the weaver to ignore certain lines.
@e SKIP_IN_WEAVING_WEA_MTID
<<*>>=
enum SKIP_IN_WEAVING_WEA_MTID
=
<<*>>=
INT_METHOD_TYPE(SKIP_IN_WEAVING_WEA_MTID, programming_language *pl, weave_order *wv, source_line *L)
int LanguageMethods::skip_in_weaving(programming_language *pl, weave_order *wv, source_line *L) {
int rv = FALSE;
@ -351,9 +373,10 @@ a comment, inside qupted text, and so on); the following method is provided
to reset that state, if so. Inweb runs it once per paragraph for safety's
sake, which minimises the knock-on effect of any colouring mistakes.
@e RESET_SYNTAX_COLOURING_WEA_MTID
<<*>>=
enum RESET_SYNTAX_COLOURING_WEA_MTID
=
<<*>>=
VOID_METHOD_TYPE(RESET_SYNTAX_COLOURING_WEA_MTID, programming_language *pl)
void LanguageMethods::reset_syntax_colouring(programming_language *pl) {
VOID_METHOD_CALL_WITHOUT_ARGUMENTS(pl, RESET_SYNTAX_COLOURING_WEA_MTID);
@ -361,9 +384,10 @@ void LanguageMethods::reset_syntax_colouring(programming_language *pl) {
@ And this is where colouring is done.
@e SYNTAX_COLOUR_WEA_MTID
<<*>>=
enum SYNTAX_COLOUR_WEA_MTID
=
<<*>>=
int colouring_state = PLAIN_COLOUR;
INT_METHOD_TYPE(SYNTAX_COLOUR_WEA_MTID, programming_language *pl,
@ -386,13 +410,14 @@ int LanguageMethods::syntax_colour(programming_language *pl,
return rv;
}
@ This method is called for each code line to be woven. If it returns |FALSE|, the
@ This method is called for each code line to be woven. If it returns [[FALSE]], the
weaver carries on in the normal way. If not, it does nothing, assuming that the
method has already woven something more attractive.
@e WEAVE_CODE_LINE_WEA_MTID
<<*>>=
enum WEAVE_CODE_LINE_WEA_MTID
=
<<*>>=
INT_METHOD_TYPE(WEAVE_CODE_LINE_WEA_MTID, programming_language *pl, text_stream *OUT, weave_order *wv, web *W,
chapter *C, section *S, source_line *L, text_stream *matter, text_stream *concluding_comment)
int LanguageMethods::weave_code_line(OUTPUT_STREAM, programming_language *pl, weave_order *wv,
@ -402,11 +427,12 @@ int LanguageMethods::weave_code_line(OUTPUT_STREAM, programming_language *pl, we
return rv;
}
@ When Inweb creates a new |^"Theme"|, it lets everybody know about that.
@ When Inweb creates a new [[^"Theme"]], it lets everybody know about that.
@e NOTIFY_NEW_TAG_WEA_MTID
<<*>>=
enum NOTIFY_NEW_TAG_WEA_MTID
=
<<*>>=
VOID_METHOD_TYPE(NOTIFY_NEW_TAG_WEA_MTID, programming_language *pl, theme_tag *tag)
void LanguageMethods::new_tag_declared(theme_tag *tag) {
programming_language *pl;
@ -414,7 +440,7 @@ void LanguageMethods::new_tag_declared(theme_tag *tag) {
VOID_METHOD_CALL(pl, NOTIFY_NEW_TAG_WEA_MTID, tag);
}
@h Analysis methods.
@ \section{Analysis methods.}
These are really a little miscellaneous, but they all have to do with looking
at the code in a web and working out what's going on, rather than producing
any weave or tangle output.
@ -425,10 +451,11 @@ are called first and last in the process, respectively. (What happens in
between is essentially that Inweb looks for identifiers, for later syntax
colouring purposes.)
@e ANALYSIS_ANA_MTID
@e POST_ANALYSIS_ANA_MTID
<<*>>=
enum ANALYSIS_ANA_MTID
enum POST_ANALYSIS_ANA_MTID
=
<<*>>=
VOID_METHOD_TYPE(ANALYSIS_ANA_MTID, programming_language *pl, web *W)
VOID_METHOD_TYPE(POST_ANALYSIS_ANA_MTID, programming_language *pl, web *W)
void LanguageMethods::early_preweave_analysis(programming_language *pl, web *W) {
@ -441,9 +468,10 @@ void LanguageMethods::late_preweave_analysis(programming_language *pl, web *W) {
@ And finally: in InC only, a few structure element names are given very slightly
special treatment, and this method decides which.
@e SHARE_ELEMENT_ANA_MTID
<<*>>=
enum SHARE_ELEMENT_ANA_MTID
=
<<*>>=
INT_METHOD_TYPE(SHARE_ELEMENT_ANA_MTID, programming_language *pl, text_stream *element_name)
int LanguageMethods::share_element(programming_language *pl, text_stream *element_name) {
int rv = FALSE;
@ -451,9 +479,9 @@ int LanguageMethods::share_element(programming_language *pl, text_stream *elemen
return rv;
}
@h What we support.
@ \section{What we support.}
=
<<*>>=
int LanguageMethods::supports_definitions(programming_language *pl) {
if (Str::len(pl->start_definition) > 0) return TRUE;
if (Str::len(pl->prolong_definition) > 0) return TRUE;

View file

@ -3,34 +3,34 @@
Defining the programming languages supported by Inweb, loading in their
definitions from files.
@h Languages.
Programming languages are identified by name: for example, |C++| or |Perl|.
@ \section{Languages.}
Programming languages are identified by name: for example, [[C++]] or [[Perl]].
@ =
<<*>>=
programming_language *Languages::find_by_name(text_stream *lname, web *W,
int error_if_not_found) {
programming_language *pl;
@<If this is the name of a language already known, return that@>;
@<Read the language definition file with this name@>;
<<If this is the name of a language already known, return that>>;
<<Read the language definition file with this name>>;
if (Str::ne(pl->language_name, lname))
Errors::fatal_with_text(
"definition of programming language '%S' is for something else", lname);
return pl;
}
@<If this is the name of a language already known, return that@> =
<<If this is the name of a language already known, return that>>=
LOOP_OVER(pl, programming_language)
if (Str::eq(lname, pl->language_name))
return pl;
@<Read the language definition file with this name@> =
<<Read the language definition file with this name>>=
filename *F = NULL;
if (W) {
pathname *P = Pathnames::down(W->md->path_to_web, I"Dialects");
@<Try P@>;
<<Try P>>;
}
pathname *P = Languages::default_directory();
@<Try P@>;
<<Try P>>;
if (F == NULL) {
if (error_if_not_found)
Errors::fatal_with_text(
@ -39,7 +39,7 @@ programming_language *Languages::find_by_name(text_stream *lname, web *W,
}
pl = Languages::read_definition(F);
@<Try P@> =
<<Try P>>=
if (F == NULL) {
TEMPORARY_TEXT(leaf)
WRITE_TO(leaf, "%S.ildf", lname);
@ -50,7 +50,7 @@ programming_language *Languages::find_by_name(text_stream *lname, web *W,
@ I'm probably showing my age here.
=
<<*>>=
programming_language *Languages::default(web *W) {
return Languages::find_by_name(I"C", W, TRUE);
}
@ -71,7 +71,7 @@ void Languages::show(OUTPUT_STREAM) {
Memory::I7_free(sorted_table, ARRAY_SORTING_MREASON, N*((int) sizeof(programming_language *)));
}
@ =
<<*>>=
int Languages::compare_names(const void *ent1, const void *ent2) {
text_stream *tx1 = (*((const programming_language **) ent1))->language_name;
text_stream *tx2 = (*((const programming_language **) ent2))->language_name;
@ -80,7 +80,7 @@ int Languages::compare_names(const void *ent1, const void *ent2) {
@ We can read every language in a directory:
=
<<*>>=
void Languages::read_definitions(pathname *P) {
if (P == NULL) P = Languages::default_directory();
scan_directory *D = Directories::open(P);
@ -102,7 +102,7 @@ pathname *Languages::default_directory(void) {
@ So, then, languages are defined by files which are read in, and parsed
into the following structure (one per language):
=
<<*>>=
typedef struct programming_language {
text_stream *language_name; /* identifies it: see above */
@ -139,20 +139,20 @@ typedef struct programming_language {
int suppress_disclaimer;
int C_like; /* languages with this set have access to extra features */
struct linked_list *reserved_words; /* of |reserved_word| */
struct linked_list *reserved_words; /* of [[reserved_word]] */
struct hash_table built_in_keywords;
struct colouring_language_block *program; /* algorithm for syntax colouring */
struct method_set *methods;
CLASS_DEFINITION
} programming_language;
@ This is a simple one-pass compiler. The |language_reader_state| provides
@ This is a simple one-pass compiler. The [[language_reader_state]] provides
the only state preserved as we work through line by line, except of course
that we are also working on the programming language it is |defining|. The
|current_block| is the braced block of colouring instructions we are
that we are also working on the programming language it is [[defining]]. The
[[current_block]] is the braced block of colouring instructions we are
currently inside.
=
<<*>>=
typedef struct language_reader_state {
struct programming_language *defining;
struct colouring_language_block *current_block;
@ -160,17 +160,17 @@ typedef struct language_reader_state {
programming_language *Languages::read_definition(filename *F) {
programming_language *pl = CREATE(programming_language);
@<Initialise the language to a plain-text state@>;
<<Initialise the language to a plain-text state>>;
language_reader_state lrs;
lrs.defining = pl;
lrs.current_block = NULL;
TextFiles::read(F, FALSE, "can't open programming language definition file",
TRUE, Languages::read_definition_line, NULL, (void *) &lrs);
@<Add method calls to the language@>;
<<Add method calls to the language>>;
return pl;
}
@<Initialise the language to a plain-text state@> =
<<Initialise the language to a plain-text state>>=
pl->language_name = NULL;
pl->file_extension = NULL;
pl->supports_namespaces = FALSE;
@ -212,35 +212,35 @@ itself C-like has functionality for function and structure definitions;
the language whose name is InC gets even more, without having to ask.
Languages have effect through their method calls, which is how those
extra features are provided. The call to |ACMESupport::add_fallbacks|
extra features are provided. The call to [[ACMESupport::add_fallbacks]]
adds generic method calls to give effect to the settings in the definition.
@<Add method calls to the language@> =
<<Add method calls to the language>>=
if (pl->C_like) CLike::make_c_like(pl);
if (Str::eq(pl->language_name, I"InC")) InCSupport::add_features(pl);
ACMESupport::add_fallbacks(pl);
@ So, then, the above reads the file and feeds it line by line to this:
=
<<*>>=
void Languages::read_definition_line(text_stream *line, text_file_position *tfp, void *v_state) {
language_reader_state *state = (language_reader_state *) v_state;
programming_language *pl = state->defining;
Str::trim_white_space(line); /* ignore trailing space */
if (Str::len(line) == 0) return; /* ignore blank lines */
if (Str::get_first_char(line) == '#') return; /* lines opening with |#| are comments */
if (Str::get_first_char(line) == '#') return; /* lines opening with [[#]] are comments */
match_results mr = Regexp::create_mr();
if (state->current_block) @<Syntax inside a colouring program@>
else @<Syntax outside a colouring program@>;
if (state->current_block) <<Syntax inside a colouring program>>
else <<Syntax outside a colouring program>>;
Regexp::dispose_of(&mr);
}
@ Outside a colouring program, you can do three things: start a program,
declare a reserved keyword, or set a key to a value.
@<Syntax outside a colouring program@> =
<<Syntax outside a colouring program>>=
if (Regexp::match(&mr, line, L"colouring {")) {
if (pl->program) Errors::in_text_file("duplicate colouring program", tfp);
pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
@ -323,7 +323,7 @@ declare a reserved keyword, or set a key to a value.
the entire program), open a new block to apply to each character or to
runs of a given colour, or give an if-X-then-Y rule:
@<Syntax inside a colouring program@> =
<<Syntax inside a colouring program>>=
if (Str::eq(line, I"}")) {
state->current_block = state->current_block->parent;
} else if (Regexp::match(&mr, line, L"characters {")) {
@ -379,32 +379,33 @@ runs of a given colour, or give an if-X-then-Y rule:
}
}
@h Blocks.
These are code blocks of colouring instructions. A block whose |parent| is |NULL|
@ \section{Blocks.}
These are code blocks of colouring instructions. A block whose [[parent]] is [[NULL]]
represents a complete program.
@d WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
@d CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
@d CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
@d INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
@d MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
@d BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */
<<*>>=
#define WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
#define CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
#define CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
#define INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
#define MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
#define BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */
=
<<*>>=
typedef struct colouring_language_block {
struct linked_list *rules; /* of |colouring_rule| */
struct colouring_language_block *parent; /* or |NULL| for the topmost one */
int run; /* one of the |*_CRULE_RUN| values, or else a colour */
struct text_stream *run_instance; /* used only for |INSTANCES_CRULE_RUN| */
struct text_stream *char_set; /* used only for |CHARACTERS_IN_CRULE_RUN| */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for |MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN| */
struct linked_list *rules; /* of [[colouring_rule]] */
struct colouring_language_block *parent; /* or [[NULL]] for the topmost one */
int run; /* one of the [[*_CRULE_RUN]] values, or else a colour */
struct text_stream *run_instance; /* used only for [[INSTANCES_CRULE_RUN]] */
struct text_stream *char_set; /* used only for [[CHARACTERS_IN_CRULE_RUN]] */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for [[MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN]] */
/* workspace during painting */
struct match_results mr; /* of a regular expression */
CLASS_DEFINITION
} colouring_language_block;
@ =
<<*>>=
colouring_language_block *Languages::new_block(colouring_language_block *within, int r) {
colouring_language_block *block = CREATE(colouring_language_block);
block->rules = NEW_LINKED_LIST(colouring_rule);
@ -417,37 +418,38 @@ colouring_language_block *Languages::new_block(colouring_language_block *within,
return block;
}
@h Colouring Rules.
@ \section{Colouring Rules.}
Each individual rule has the form: if a premiss, then a conclusion. It will be
applied to a snippet of text, and the premiss can test that, together with a
little context before it (where available).
Note that rules can be unconditional, in that the premiss always passes.
@d NOT_A_RULE_PREFIX 1 /* this isn't a prefix rule */
@d UNSPACED_RULE_PREFIX 2 /* for |prefix P| */
@d SPACED_RULE_PREFIX 3 /* for |spaced prefix P| */
@d OPTIONALLY_SPACED_RULE_PREFIX 4 /* for |optionally spaced prefix P| */
@d UNSPACED_RULE_SUFFIX 5 /* for |suffix P| */
@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */
@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */
<<*>>=
#define NOT_A_RULE_PREFIX 1 /* this isn't a prefix rule */
#define UNSPACED_RULE_PREFIX 2 /* for [[prefix P]] */
#define SPACED_RULE_PREFIX 3 /* for [[spaced prefix P]] */
#define OPTIONALLY_SPACED_RULE_PREFIX 4 /* for [[optionally spaced prefix P]] */
#define UNSPACED_RULE_SUFFIX 5 /* for [[suffix P]] */
#define SPACED_RULE_SUFFIX 6 /* for [[spaced suffix P]] */
#define OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for [[optionally spaced suffix P]] */
@d MAX_ILDF_REGEXP_LENGTH 64
#define MAX_ILDF_REGEXP_LENGTH 64
=
<<*>>=
typedef struct colouring_rule {
/* the premiss: */
int sense; /* |FALSE| to negate the condition */
wchar_t match_colour; /* for |coloured C|, or else |NOT_A_COLOUR| */
wchar_t match_keyword_of_colour; /* for |keyword C|, or else |NOT_A_COLOUR| */
int sense; /* [[FALSE]] to negate the condition */
wchar_t match_colour; /* for [[coloured C|, or else |NOT_A_COLOUR]] */
wchar_t match_keyword_of_colour; /* for [[keyword C|, or else |NOT_A_COLOUR]] */
struct text_stream *match_text; /* or length 0 to mean "anything" */
int match_prefix; /* one of the |*_RULE_PREFIX| values above */
int match_prefix; /* one of the [[*_RULE_PREFIX]] values above */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
int number; /* for |number N| rules; 0 for others */
int number_of; /* for |number N of M| rules; 0 for others */
int number; /* for [[number N]] rules; 0 for others */
int number_of; /* for [[number N of M]] rules; 0 for others */
/* the conclusion: */
struct colouring_language_block *execute_block; /* or |NULL|, in which case... */
struct colouring_language_block *execute_block; /* or [[NULL]], in which case... */
wchar_t set_to_colour; /* ...paint the snippet in this colour */
wchar_t set_prefix_to_colour; /* ...also paint this (same for suffix) */
int debug; /* ...or print debugging text to console */
@ -458,7 +460,7 @@ typedef struct colouring_rule {
CLASS_DEFINITION
} colouring_rule;
@ =
<<*>>=
colouring_rule *Languages::new_rule(colouring_language_block *within) {
if (within == NULL) internal_error("rule outside block");
colouring_rule *rule = CREATE(colouring_rule);
@ -482,18 +484,18 @@ colouring_rule *Languages::new_rule(colouring_language_block *within) {
return rule;
}
@ =
<<*>>=
void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
text_stream *action, text_file_position *tfp) {
match_results mr = Regexp::create_mr();
colouring_rule *rule = Languages::new_rule(state->current_block);
Str::trim_white_space(premiss); Str::trim_white_space(action);
@<Parse the premiss@>;
@<Parse the conclusion@>;
<<Parse the premiss>>;
<<Parse the conclusion>>;
Regexp::dispose_of(&mr);
}
@<Parse the premiss@> =
<<Parse the premiss>>=
while (Regexp::match(&mr, premiss, L"not (%c+)")) {
rule->sense = (rule->sense)?FALSE:TRUE;
Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
@ -533,7 +535,7 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
rule->match_text = Languages::text(premiss, tfp, FALSE);
}
@<Parse the conclusion@> =
<<Parse the conclusion>>=
if (Str::eq(action, I"{")) {
rule->execute_block =
Languages::new_block(state->current_block, WHOLE_LINE_CRULE_RUN);
@ -553,10 +555,10 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
Errors::in_text_file("action after '=>' illegible", tfp);
}
@h Reserved words.
Note that these can come in any colour, though usually it's |!reserved|.
@ \section{Reserved words.}
Note that these can come in any colour, though usually it's [[!reserved]].
=
<<*>>=
typedef struct reserved_word {
struct text_stream *word;
int colour;
@ -578,28 +580,29 @@ reserved_word *Languages::reserved(programming_language *pl, text_stream *W, wch
return rw;
}
@h Expressions.
@ \section{Expressions.}
Language definition files have three types of data: colours, booleans, and
text. Colours first. Note that there are two pseudo-colours used above,
but which are not expressible in the syntax of this file.
@d DEFINITION_COLOUR 'd'
@d FUNCTION_COLOUR 'f'
@d RESERVED_COLOUR 'r'
@d ELEMENT_COLOUR 'e'
@d IDENTIFIER_COLOUR 'i'
@d CHARACTER_COLOUR 'c'
@d CONSTANT_COLOUR 'n'
@d STRING_COLOUR 's'
@d PLAIN_COLOUR 'p'
@d EXTRACT_COLOUR 'x'
@d COMMENT_COLOUR '!'
@d NEWLINE_COLOUR '\n'
<<*>>=
#define DEFINITION_COLOUR 'd'
#define FUNCTION_COLOUR 'f'
#define RESERVED_COLOUR 'r'
#define ELEMENT_COLOUR 'e'
#define IDENTIFIER_COLOUR 'i'
#define CHARACTER_COLOUR 'c'
#define CONSTANT_COLOUR 'n'
#define STRING_COLOUR 's'
#define PLAIN_COLOUR 'p'
#define EXTRACT_COLOUR 'x'
#define COMMENT_COLOUR '!'
#define NEWLINE_COLOUR '\n'
@d NOT_A_COLOUR ' '
@d UNQUOTED_COLOUR '_'
#define NOT_A_COLOUR ' '
#define UNQUOTED_COLOUR '_'
=
<<*>>=
wchar_t Languages::colour(text_stream *T, text_file_position *tfp) {
if (Str::get_first_char(T) != '!') {
Errors::in_text_file("colour names must begin with !", tfp);
@ -622,9 +625,9 @@ wchar_t Languages::colour(text_stream *T, text_file_position *tfp) {
}
}
@ A boolean must be written as |true| or |false|.
@ A boolean must be written as [[true]] or [[false]].
=
<<*>>=
int Languages::boolean(text_stream *T, text_file_position *tfp) {
if (Str::eq(T, I"true")) return TRUE;
else if (Str::eq(T, I"false")) return FALSE;
@ -634,11 +637,11 @@ int Languages::boolean(text_stream *T, text_file_position *tfp) {
}
}
@ In text, |\n| represents a newline, |\s| a space and |\t| a tab. Spaces
can be given in the ordinary way inside a text in any case. |\\| is a
@ In text, [[\n]] represents a newline, [[\s]] a space and [[\t]] a tab. Spaces
can be given in the ordinary way inside a text in any case. [[\\]] is a
literal backslash.
=
<<*>>=
text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow) {
text_stream *V = Str::new();
if (Str::len(T) > 0) {
@ -725,7 +728,7 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
@ And regular expressions.
=
<<*>>=
void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) {
if (write_to == NULL) internal_error("no buffer");
write_to[0] = 0;

View file

@ -3,38 +3,38 @@
A simple syntax-colouring engine.
@ This is a very simple syntax colouring algorithm. The work is done by the
function |Painter::syntax_colour|, which can in principle be applied to texts
function [[Painter::syntax_colour]], which can in principle be applied to texts
of any length. But it's usually convenient to run it on a long file one line
at a time, so that it is called repeatedly. The variable |colouring_state|
at a time, so that it is called repeatedly. The variable [[colouring_state]]
remembers where we were at the end of the previous line, so that we can pick
up again later at the start of the next.
Because of that, we need to call the following before we begin a run of calls
to |Painter::syntax_colour|:
to [[Painter::syntax_colour]]:
=
<<*>>=
int painter_count = 1;
void Painter::reset_syntax_colouring(programming_language *pl) {
colouring_state = PLAIN_COLOUR;
painter_count = 1;
}
@ As we begin, the text to colour is in |matter|, while |colouring| is an
@ As we begin, the text to colour is in [[matter]], while [[colouring]] is an
equal-length text where each character represents the colour of its
corresponding character in |matter|. For example, we might start as:
= (text as PainterOutput)
corresponding character in [[matter]]. For example, we might start as:
int x = 55;
ppppppppppp
=
with every character having |PLAIN_COLOUR|, but end up with:
= (text as PainterOutput)
with every character having [[PLAIN_COLOUR]], but end up with:
int x = 55;
rrrpipppnnp
=
We get to that by using a language's rules on literals, and then executing
its colouring program.
=
<<*>>=
int Painter::syntax_colour(programming_language *pl,
hash_table *HT, text_stream *matter, text_stream *colouring, int with_comments) {
int from = 0, to = Str::len(matter) - 1;
@ -57,12 +57,12 @@ int Painter::syntax_colour(programming_language *pl,
void Painter::syntax_colour_inner(programming_language *pl,
hash_table *HT, text_stream *matter, text_stream *colouring, int from, int to) {
@<Spot identifiers, literal text and character constants@>;
@<Spot literal numerical constants@>;
@<Now run the colouring program@>;
<<Spot identifiers, literal text and character constants>>;
<<Spot literal numerical constants>>;
<<Now run the colouring program>>;
}
@<Spot identifiers, literal text and character constants@> =
<<Spot identifiers, literal text and character constants>>=
int squote = Str::get_first_char(pl->character_literal);
int squote_escape = Str::get_first_char(pl->character_literal_escape);
int dquote = Str::get_first_char(pl->string_literal);
@ -106,7 +106,7 @@ void Painter::syntax_colour_inner(programming_language *pl,
}
}
@<Spot literal numerical constants@> =
<<Spot literal numerical constants>>=
int base = -1, dec_possible = TRUE;
for (int i=from; i <= to; i++) {
if ((Str::get_at(colouring, i) == PLAIN_COLOUR) ||
@ -148,8 +148,8 @@ void Painter::syntax_colour_inner(programming_language *pl,
case 10: if (Characters::isdigit(c)) pass = TRUE; break;
case 16: if (Characters::isdigit(c)) pass = TRUE;
int d = Characters::tolower(c);
if ((d == 'a') || (d == 'b') || (d == 'c') ||
(d == 'd') || (d == 'e') || (d == 'f')) pass = TRUE;
if ((d == 'a') [[| (d == 'b') || (d == 'c') |]]
(d == 'd') [[| (d == 'e') |]] (d == 'f')) pass = TRUE;
break;
}
if (pass) {
@ -165,9 +165,9 @@ void Painter::syntax_colour_inner(programming_language *pl,
@ For the moment, we always adopt the C rules on identifiers: they have to
begin with an underscore or letter, then continue with underscores or
alphanumeric characters, except that if the language allows it then they
can contain a |::| namespace divider.
can contain a [[::]] namespace divider.
=
<<*>>=
int Painter::identifier_at(programming_language *pl,
text_stream *matter, text_stream *colouring, int i) {
wchar_t c = Str::get_at(matter, i);
@ -191,7 +191,7 @@ int Painter::identifier_at(programming_language *pl,
@ With those preliminaries out of the way, the language's colouring program
takes over.
@<Now run the colouring program@> =
<<Now run the colouring program>>=
if (pl->program)
Painter::execute(HT, pl->program, matter, colouring, from, to, painter_count++);
@ -200,7 +200,7 @@ whole snippet of text, or each character on its own, or each run of characters
of a given sort. Note that we work width-first, as it were: we complete each
rule across the whole snippet before moving on to the next.
=
<<*>>=
void Painter::execute(hash_table *HT, colouring_language_block *block, text_stream *matter,
text_stream *colouring, int from, int to, int N) {
if (block == NULL) internal_error("no block");
@ -281,7 +281,7 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
@ Rules have the form: if X, then Y.
=
<<*>>=
void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *matter,
text_stream *colouring, int from, int to, int N) {
if (Painter::satisfies(HT, rule, matter, colouring, from, to, N) == rule->sense)
@ -290,14 +290,15 @@ void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *ma
@ Here we test the "if X":
@d UNSPACED_RULE_PREFIX 2 /* for |prefix P| */
@d SPACED_RULE_PREFIX 3 /* for |spaced prefix P| */
@d OPTIONALLY_SPACED_RULE_PREFIX 4 /* for |optionally spaced prefix P| */
@d UNSPACED_RULE_SUFFIX 5 /* for |suffix P| */
@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */
@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */
<<*>>=
#define UNSPACED_RULE_PREFIX 2 /* for [[prefix P]] */
#define SPACED_RULE_PREFIX 3 /* for [[spaced prefix P]] */
#define OPTIONALLY_SPACED_RULE_PREFIX 4 /* for [[optionally spaced prefix P]] */
#define UNSPACED_RULE_SUFFIX 5 /* for [[suffix P]] */
#define SPACED_RULE_SUFFIX 6 /* for [[spaced suffix P]] */
#define OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for [[optionally spaced suffix P]] */
=
<<*>>=
int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter,
text_stream *colouring, int from, int to, int N) {
if (rule->number > 0) {
@ -361,12 +362,12 @@ int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter
@ And here we carry out the "then Y":
=
<<*>>=
void Painter::follow(hash_table *HT, colouring_rule *rule, text_stream *matter,
text_stream *colouring, int from, int to) {
if (rule->execute_block)
Painter::execute(HT, rule->execute_block, matter, colouring, from, to, 0);
else if (rule->debug) @<Print some debugging text@>
else if (rule->debug) <<Print some debugging text>>
else {
if (rule->set_to_colour != NOT_A_COLOUR)
for (int i=from; i<=to; i++)
@ -377,7 +378,7 @@ void Painter::follow(hash_table *HT, colouring_rule *rule, text_stream *matter,
}
}
@<Print some debugging text@> =
<<Print some debugging text>>=
PRINT("[%d, %d] text: ", from, to);
for (int i=from; i<=to; i++)
PUT_TO(STDOUT, Str::get_at(matter, i));
@ -386,9 +387,9 @@ void Painter::follow(hash_table *HT, colouring_rule *rule, text_stream *matter,
PUT_TO(STDOUT, Str::get_at(colouring, i));
PRINT("\n");
@h Painting a file.
@ \section{Painting a file.}
=
<<*>>=
linked_list *Painter::lines(filename *F) {
linked_list *L = NEW_LINKED_LIST(text_stream);
TextFiles::read(F, FALSE, "unable to read file of textual extract", TRUE,

View file

@ -2,33 +2,33 @@
Basic support for languages to recognise structure and function declarations.
@ For each |typedef struct| we find, we will make one of these:
@ For each [[typedef struct]] we find, we will make one of these:
=
<<*>>=
typedef struct language_type {
struct text_stream *structure_name;
int tangled; /* whether the structure definition has been tangled out */
struct source_line *structure_header_at; /* opening line of |typedef| */
struct source_line *typedef_ends; /* closing line, where |}| appears */
struct linked_list *incorporates; /* of |language_type| */
struct linked_list *elements; /* of |structure_element| */
struct source_line *structure_header_at; /* opening line of [[typedef]] */
struct source_line *typedef_ends; /* closing line, where [[}]] appears */
struct linked_list *incorporates; /* of [[language_type]] */
struct linked_list *elements; /* of [[structure_element]] */
struct language_type *next_cst_alphabetically;
CLASS_DEFINITION
} language_type;
@ =
<<*>>=
language_type *first_cst_alphabetically = NULL;
language_type *Functions::new_struct(web *W, text_stream *name, source_line *L) {
language_type *str = CREATE(language_type);
@<Initialise the language type structure@>;
<<Initialise the language type structure>>;
Analyser::mark_reserved_word_at_line(L, str->structure_name, RESERVED_COLOUR);
@<Add this to the lists for its web and its paragraph@>;
@<Insertion-sort this into the alphabetical list of all structures found@>;
<<Add this to the lists for its web and its paragraph>>;
<<Insertion-sort this into the alphabetical list of all structures found>>;
return str;
}
@<Initialise the language type structure@> =
<<Initialise the language type structure>>=
str->structure_name = Str::duplicate(name);
str->structure_header_at = L;
str->tangled = FALSE;
@ -36,12 +36,12 @@ language_type *Functions::new_struct(web *W, text_stream *name, source_line *L)
str->incorporates = NEW_LINKED_LIST(language_type);
str->elements = NEW_LINKED_LIST(structure_element);
@<Add this to the lists for its web and its paragraph@> =
<<Add this to the lists for its web and its paragraph>>=
Tags::add_by_name(L->owning_paragraph, I"Structures");
ADD_TO_LINKED_LIST(str, language_type, W->language_types);
ADD_TO_LINKED_LIST(str, language_type, L->owning_paragraph->structures);
@<Insertion-sort this into the alphabetical list of all structures found@> =
<<Insertion-sort this into the alphabetical list of all structures found>>=
str->next_cst_alphabetically = NULL;
if (first_cst_alphabetically == NULL) first_cst_alphabetically = str;
else {
@ -65,16 +65,16 @@ language_type *Functions::new_struct(web *W, text_stream *name, source_line *L)
if (placed == FALSE) last->next_cst_alphabetically = str;
}
@ A language can also create an instance of |structure_element| to record the
existence of the element |val|, and add it to the linked list of elements of
@ A language can also create an instance of [[structure_element]] to record the
existence of the element [[val]], and add it to the linked list of elements of
the structure being defined.
In InC, only, certain element names used often in Inform's source code are
given mildly special treatment. This doesn't amount to much. |allow_sharing|
given mildly special treatment. This doesn't amount to much. [[allow_sharing]]
has no effect on tangling, so it doesn't change the program. It simply
affects the reports in the woven code about where structures are used.
=
<<*>>=
typedef struct structure_element {
struct text_stream *element_name;
struct source_line *element_created_at;
@ -82,7 +82,7 @@ typedef struct structure_element {
CLASS_DEFINITION
} structure_element;
@ =
<<*>>=
structure_element *Functions::new_element(language_type *str, text_stream *elname,
source_line *L) {
Analyser::mark_reserved_word_at_line(L, elname, ELEMENT_COLOUR);
@ -96,7 +96,7 @@ structure_element *Functions::new_element(language_type *str, text_stream *elnam
return elt;
}
@ =
<<*>>=
language_type *Functions::find_structure(web *W, text_stream *name) {
language_type *str;
LOOP_OVER_LINKED_LIST(str, language_type, W->language_types)
@ -105,14 +105,14 @@ language_type *Functions::find_structure(web *W, text_stream *name) {
return NULL;
}
@h Functions.
@ \section{Functions.}
Each function definition found results in one of these structures being made:
=
<<*>>=
typedef struct language_function {
struct text_stream *function_name; /* e.g., |"cultivate"| */
struct text_stream *function_type; /* e.g., |"tree *"| */
struct text_stream *function_arguments; /* e.g., |"int rainfall)"|: note |)| */
struct text_stream *function_name; /* e.g., [["cultivate"]] */
struct text_stream *function_type; /* e.g., [["tree *"]] */
struct text_stream *function_arguments; /* e.g., [["int rainfall)"|: note |)]] */
struct source_line *function_header_at; /* where the first line of the header begins */
int within_namespace; /* written using InC namespace dividers */
int called_from_other_sections;
@ -123,23 +123,23 @@ typedef struct language_function {
CLASS_DEFINITION
} language_function;
@ =
<<*>>=
language_function *Functions::new_function(text_stream *fname, source_line *L) {
hash_table_entry *hte =
Analyser::mark_reserved_word_at_line(L, fname, FUNCTION_COLOUR);
language_function *fn = CREATE(language_function);
hte->as_function = fn;
@<Initialise the function structure@>;
@<Add the function to its paragraph and line@>;
<<Initialise the function structure>>;
<<Add the function to its paragraph and line>>;
if (L->owning_section->sect_language->supports_namespaces)
@<Check that the function has its namespace correctly declared@>;
<<Check that the function has its namespace correctly declared>>;
return fn;
}
@ Note that we take a snapshot of the conditional compilation stack as
part of the function structure. We'll need it when predeclaring the function.
@<Initialise the function structure@> =
<<Initialise the function structure>>=
fn->function_name = Str::duplicate(fname);
fn->function_arguments = Str::new();
fn->function_type = Str::new();
@ -153,12 +153,12 @@ part of the function structure. We'll need it when predeclaring the function.
fn->usage_described = TRUE;
fn->no_conditionals = 0;
@<Add the function to its paragraph and line@> =
<<Add the function to its paragraph and line>>=
paragraph *P = L->owning_paragraph;
if (P) ADD_TO_LINKED_LIST(fn, language_function, P->functions);
L->function_defined = fn;
@<Check that the function has its namespace correctly declared@> =
<<Check that the function has its namespace correctly declared>>=
text_stream *declared_namespace = NULL;
text_stream *ambient_namespace = L->owning_section->sect_namespace;
match_results mr = Regexp::create_mr();
@ -188,7 +188,7 @@ part of the function structure. We'll need it when predeclaring the function.
@ "Elsewhere" here means "in a paragraph of code other than the one in which the
function's definition appears".
=
<<*>>=
int Functions::used_elsewhere(language_function *fn) {
paragraph *P = fn->function_header_at->owning_paragraph;
hash_table_entry *hte =
@ -205,11 +205,11 @@ int Functions::used_elsewhere(language_function *fn) {
return FALSE;
}
@h Cataloguing.
This implements the additional information in the |-structures| and |-functions|
@ \section{Cataloguing.}
This implements the additional information in the [[-structures]] and [[-functions]]
forms of section catalogue.
=
<<*>>=
void Functions::catalogue(section *S, int functions_too) {
language_type *str;
LOOP_OVER(str, language_type)