From e1ca0836cdc4279c9c0393282506a24b9b8e2633 Mon Sep 17 00:00:00 2001 From: AwesomeAdam54321 Date: Sat, 9 Mar 2024 20:44:19 +0800 Subject: [PATCH] Chapter 4: Nowebify. --- Chapter_4/{ACME Support.w => ACME_Support.nw} | 60 +-- ...C-Like Languages.w => C-Like_Languages.nw} | 188 +++++---- Chapter_4/{InC Support.w => InC_Support.nw} | 397 +++++++++--------- ...Language Methods.w => Language_Methods.nw} | 208 +++++---- ...g Languages.w => Programming_Languages.nw} | 191 ++++----- Chapter_4/{The Painter.w => The_Painter.nw} | 73 ++-- ...and Functions.w => Types_and_Functions.nw} | 70 +-- 7 files changed, 612 insertions(+), 575 deletions(-) rename Chapter_4/{ACME Support.w => ACME_Support.nw} (92%) rename Chapter_4/{C-Like Languages.w => C-Like_Languages.nw} (76%) rename Chapter_4/{InC Support.w => InC_Support.nw} (79%) rename Chapter_4/{Language Methods.w => Language_Methods.nw} (84%) rename Chapter_4/{Programming Languages.w => Programming_Languages.nw} (86%) rename Chapter_4/{The Painter.w => The_Painter.nw} (90%) rename Chapter_4/{Types and Functions.w => Types_and_Functions.nw} (80%) diff --git a/Chapter_4/ACME Support.w b/Chapter_4/ACME_Support.nw similarity index 92% rename from Chapter_4/ACME Support.w rename to Chapter_4/ACME_Support.nw index faa3dd4..a3618f5 100644 --- a/Chapter_4/ACME Support.w +++ b/Chapter_4/ACME_Support.nw @@ -2,7 +2,7 @@ For generic programming languages by the ACME corporation. -@h One Dozen ACME Explosive Tennis Balls. +@ \section{One Dozen ACME Explosive Tennis Balls.} Older readers will remember that Wile E. Coyote, when wishing to frustrate Road Runner with some ingenious device, would invariably buy it from the Acme Corporation, which manufactured everything imaginable. See Wikipedia, "Acme @@ -12,7 +12,7 @@ For us, ACME is an imaginary programming language, providing generic support for comments and syntax colouring. Ironically, this code grew out of a language actually called ACME: the 6502 assembler of the same name. -= +<<*>>= void ACMESupport::add_fallbacks(programming_language *pl) { if (Methods::provided(pl->methods, PARSE_TYPES_PAR_MTID) == FALSE) METHOD_ADD(pl, PARSE_TYPES_PAR_MTID, ACMESupport::parse_types); @@ -54,11 +54,11 @@ void ACMESupport::add_fallbacks(programming_language *pl) { METHOD_ADD(pl, SYNTAX_COLOUR_WEA_MTID, ACMESupport::syntax_colour); } -@ This utility does a very limited |WRITE|-like job. (We don't want to use -the actual |WRITE| because that would make it possible for malicious language +@ This utility does a very limited [[WRITE]]-like job. (We don't want to use +the actual [[WRITE]] because that would make it possible for malicious language files to crash Inweb.) -= +<<*>>= void ACMESupport::expand(OUTPUT_STREAM, text_stream *prototype, text_stream *S, int N, filename *F) { if (Str::len(prototype) > 0) { @@ -80,9 +80,9 @@ void ACMESupport::expand(OUTPUT_STREAM, text_stream *prototype, text_stream *S, } } -@h Tangling methods. +@ \section{Tangling methods.} -= +<<*>>= void ACMESupport::shebang(programming_language *pl, text_stream *OUT, web *W, tangle_target *target) { ACMESupport::expand(OUT, pl->shebang, NULL, -1, NULL); @@ -158,20 +158,20 @@ void ACMESupport::comment(programming_language *pl, } } -@ In the following, |q_mode| is 0 outside quotes, 1 inside a character literal, -and 2 inside a string literal; |c_mode| is 0 outside comments, 1 inside a line +@ In the following, [[q_mode]] is 0 outside quotes, 1 inside a character literal, +and 2 inside a string literal; [[c_mode]] is 0 outside comments, 1 inside a line comment, and 2 inside a multiline comment. -= +<<*>>= int ACMESupport::parse_comment(programming_language *pl, text_stream *line, text_stream *part_before_comment, text_stream *part_within_comment) { int q_mode = 0, c_mode = 0, non_white_space = FALSE, c_position = -1, c_end = -1; for (int i=0; i; break; - case 1: @; break; - case 2: @; break; + case 0: <>; break; + case 1: <>; break; + case 2: <>; break; } } if (c_mode == 2) c_end = Str::len(line); @@ -188,22 +188,22 @@ int ACMESupport::parse_comment(programming_language *pl, return FALSE; } -@ = +<>= if (Str::includes_at(line, i, pl->multiline_comment_close)) { c_mode = 0; c_end = i; i += Str::len(pl->multiline_comment_close) - 1; } -@ = +<>= ; -@ = +<>= switch (q_mode) { - case 0: @; break; - case 1: @; break; - case 2: @; break; + case 0: <>; break; + case 1: <>; break; + case 2: <>; break; } -@ = +<>= if (!(Characters::is_whitespace(c))) non_white_space = TRUE; if (c == Str::get_first_char(pl->string_literal)) q_mode = 2; else if (c == Str::get_first_char(pl->character_literal)) q_mode = 1; @@ -225,13 +225,13 @@ int ACMESupport::parse_comment(programming_language *pl, } } -@ = +<>= if (!(Characters::is_whitespace(c))) non_white_space = TRUE; if (c == Str::get_first_char(pl->character_literal_escape)) i += 1; if (c == Str::get_first_char(pl->character_literal)) q_mode = 0; q_mode = 0; -@ = +<>= if (!(Characters::is_whitespace(c))) non_white_space = TRUE; if (c == Str::get_first_char(pl->string_literal_escape)) i += 1; if (c == Str::get_first_char(pl->string_literal)) q_mode = 0; @@ -239,7 +239,7 @@ int ACMESupport::parse_comment(programming_language *pl, @ -= +<<*>>= void ACMESupport::parse_types(programming_language *self, web *W) { if (W->main_language->type_notation[0]) { chapter *C; @@ -258,7 +258,7 @@ void ACMESupport::parse_types(programming_language *self, web *W) { @ -= +<<*>>= void ACMESupport::parse_functions(programming_language *self, web *W) { if (W->main_language->function_notation[0]) { chapter *C; @@ -278,12 +278,12 @@ void ACMESupport::parse_functions(programming_language *self, web *W) { @ The following is an opportunity for us to scold the author for any violation of the namespace rules. We're going to look for functions named -|Whatever::name()| whose definitions are not in the |Whatever::| section; +[[Whatever::name()]] whose definitions are not in the [[Whatever::]] section; in other words, we police the rule that functions actually are defined in the namespace which their names imply. This can be turned off with a special bibliographic variable, but don't do that. -= +<<*>>= void ACMESupport::post_analysis(programming_language *self, web *W) { int check_namespaces = FALSE; if (Str::eq_wide_string(Bibliographic::get_datum(W->md, I"Namespaces"), L"On")) @@ -319,7 +319,7 @@ void ACMESupport::post_analysis(programming_language *self, web *W) { @ Having found all those functions and structure elements, we make sure they are all known to Inweb's hash table of interesting identifiers: -= +<<*>>= void ACMESupport::analyse_code(programming_language *self, web *W) { language_function *fn; LOOP_OVER(fn, language_function) @@ -337,14 +337,14 @@ void ACMESupport::analyse_code(programming_language *self, web *W) { @ This is here so that tangling the Standard Rules extension doesn't insert a spurious comment betraying Inweb's involvement in the process. -= +<<*>>= int ACMESupport::suppress_disclaimer(programming_language *pl) { return pl->suppress_disclaimer; } @ -= +<<*>>= void ACMESupport::begin_weave(programming_language *pl, section *S, weave_order *wv) { reserved_word *rw; LOOP_OVER_LINKED_LIST(rw, reserved_word, pl->reserved_words) @@ -353,7 +353,7 @@ void ACMESupport::begin_weave(programming_language *pl, section *S, weave_order @ ACME has all of its syntax-colouring done by the default engine: -= +<<*>>= void ACMESupport::reset_syntax_colouring(programming_language *pl) { Painter::reset_syntax_colouring(pl); } diff --git a/Chapter_4/C-Like Languages.w b/Chapter_4/C-Like_Languages.nw similarity index 76% rename from Chapter_4/C-Like Languages.w rename to Chapter_4/C-Like_Languages.nw index 78c8d9b..707ce91 100644 --- a/Chapter_4/C-Like Languages.w +++ b/Chapter_4/C-Like_Languages.nw @@ -2,10 +2,10 @@ To provide special features for the whole C family of languages. -@h What makes a language C-like? +@ \section{What makes a language C-like?} This does: -= +<<*>>= void CLike::make_c_like(programming_language *pl) { METHOD_ADD(pl, PARSE_TYPES_PAR_MTID, CLike::parse_types); METHOD_ADD(pl, PARSE_FUNCTIONS_PAR_MTID, CLike::parse_functions); @@ -15,38 +15,38 @@ void CLike::make_c_like(programming_language *pl) { METHOD_ADD(pl, ADDITIONAL_PREDECLARATIONS_TAN_MTID, CLike::additional_predeclarations); } -@h Parsing. +@ \section{Parsing.} After a web has been read in and then parsed, code supporting its language is then called to do any further parsing it might want to. The code below is run if the language is "C-like": regular C and InC both qualify. -= +<<*>>= void CLike::parse_types(programming_language *self, web *W) { - @; - @; + <>; + <>; } @ We're going to assume that the C source code uses structures looking something like this: -= (text as C) + typedef struct fruit { struct pip the_pips[5]; struct fruit *often_confused_with; struct tree_species *grows_on; int typical_weight; } fruit; -= + which adopts the traditional layout conventions of Kernighan and Ritchie. The structure definitions in this Inweb web all take the required form, of course, and provide many more examples. -Note that a |fruit| structure contains a |pip| structure (in fact, five of -them), but only contains pointers to |tree_species| structures and itself. -C requires therefore that the structure definition for |pip| must occur -earlier in the code than that for |fruit|. This is a nuisance, so Inweb +Note that a [[fruit]] structure contains a [[pip]] structure (in fact, five of +them), but only contains pointers to [[tree_species]] structures and itself. +C requires therefore that the structure definition for [[pip]] must occur +earlier in the code than that for [[fruit]]. This is a nuisance, so Inweb takes care of it automatically. -@ = +<>= language_type *current_str = NULL; chapter *C; section *S; @@ -61,7 +61,7 @@ takes care of it automatically. current_str->typedef_ends = L; current_str = NULL; } else if ((current_str) && (current_str->typedef_ends == NULL)) { - @; + <>; } else if ((Regexp::match(&mr, L->text, L"typedef %c+")) && (Regexp::match(&mr, L->text, L"%c+##%c+") == FALSE)) { if (L->owning_paragraph->placed_very_early == FALSE) @@ -73,24 +73,24 @@ takes care of it automatically. @ At this point we're reading a line within the structure's definition; for the sake of an illustrative example, let's suppose that line is: -= (text) - unsigned long long int *val; -= -We need to extract the element name, |val|, and make a note of it. -@ = + unsigned long long int *val; + +We need to extract the element name, [[val]], and make a note of it. + +<>= TEMPORARY_TEXT(p) Str::copy(p, L->text); Str::trim_white_space(p); - @; + <>; string_position pos = Str::start(p); if (Str::get(pos) != '/') { /* a slash must introduce a comment here */ - @; - @; + <>; + <>; if (Str::in_range(pos)) { match_results mr = Regexp::create_mr(); TEMPORARY_TEXT(elname) - @; + <>; Functions::new_element(current_str, elname, L); DISCARD_TEXT(elname) Regexp::dispose_of(&mr); @@ -98,9 +98,9 @@ We need to extract the element name, |val|, and make a note of it. } DISCARD_TEXT(p) -@ The following reduces |unsigned long long int *val;| to just |int *val;|. +@ The following reduces [[unsigned long long int *val;]] to just [[int *val;]]. -@ = +<>= wchar_t *modifier_patterns[] = { L"(struct )(%C%c*)", L"(signed )(%C%c*)", L"(unsigned )(%C%c*)", L"(short )(%C%c*)", L"(long )(%C%c*)", L"(static )(%C%c*)", NULL }; @@ -115,40 +115,40 @@ We need to extract the element name, |val|, and make a note of it. } } -@ At this point |p| has been reduced to |int *val;|, but the following moves -|pos| to point to the |*|: +@ At this point [[p]] has been reduced to [[int *val;]], but the following moves +[[pos]] to point to the [[*]]: -@ = +<>= while ((Str::get(pos)) && (Characters::is_space_or_tab(Str::get(pos)) == FALSE)) pos = Str::forward(pos); -@ And this moves it past the |*| to point to the |v| in |int *val;|: +@ And this moves it past the [[*]] to point to the [[v]] in [[int *val;]]: -@ = - while ((Characters::is_space_or_tab(Str::get(pos))) || (Str::get(pos) == '*') || +<>= + while ((Characters::is_space_or_tab(Str::get(pos))) [[| (Str::get(pos) == '*') |]] (Str::get(pos) == '(') || (Str::get(pos) == ')')) pos = Str::forward(pos); -@ This then first copies the substring |val;| into |elname|, then cuts that -down to just the identifier characters at the front, i.e., to |val|. +@ This then first copies the substring [[val;]] into [[elname]], then cuts that +down to just the identifier characters at the front, i.e., to [[val]]. -@ = +<>= Str::substr(elname, pos, Str::end(p)); if (Regexp::match(&mr, elname, L"(%i+)%c*")) Str::copy(elname, mr.exp[0]); -@h Structure dependency. -We say that S depends on T if |struct S| has an element whose type is -|struct T|. That matters because if so then |struct T| has to be defined -before |struct S| in the tangled output. +@ \section{Structure dependency.} +We say that S depends on T if [[struct S]] has an element whose type is +[[struct T]]. That matters because if so then [[struct T]] has to be defined +before [[struct S]] in the tangled output. + +It's important to note that [[struct S]] merely having a member of type +[[struct *T| does not create a dependency. In the code below, because [[%i]] +matches only identifier characters and [[*]] is not one of those, a line like -It's important to note that |struct S| merely having a member of type -|struct *T| does not create a dependency. In the code below, because |%i| -matches only identifier characters and |*| is not one of those, a line like -= (text) struct fruit *often_confused_with; -= + will not trip the switch here. -@ = +<>= language_type *current_str; LOOP_OVER(current_str, language_type) { for (source_line *L = current_str->structure_header_at; @@ -156,12 +156,12 @@ will not trip the switch here. L = L->next_line) { match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, L->text, L" struct (%i+) %i%c*")) - @; + <>; Regexp::dispose_of(&mr); } } -@ = +<>= text_stream *used_structure = mr.exp[0]; language_type *str; LOOP_OVER_LINKED_LIST(str, language_type, W->language_types) @@ -169,8 +169,8 @@ will not trip the switch here. (Str::eq(used_structure, str->structure_name))) ADD_TO_LINKED_LIST(str, language_type, current_str->incorporates); -@h Functions. -This time, we will need to keep track of |#ifdef| and |#endif| pairs +@ \section{Functions.} +This time, we will need to keep track of [[#ifdef]] and [[#endif]] pairs in the source. This matters because we will want to predeclare functions; but if functions are declared in conditional compilation, then their predeclarations have to be made under the same conditions. @@ -178,9 +178,10 @@ predeclarations have to be made under the same conditions. The following stack holds the current set of conditional compilations which the source line being scanned lies within. -@d MAX_CONDITIONAL_COMPILATION_STACK 8 +<<*>>= +#define MAX_CONDITIONAL_COMPILATION_STACK 8 -= +<<*>>= int cc_sp = 0; source_line *cc_stack[MAX_CONDITIONAL_COMPILATION_STACK]; @@ -192,14 +193,14 @@ void CLike::parse_functions(programming_language *self, web *W) { if ((L->category == CODE_BODY_LCAT) || (L->category == BEGIN_DEFINITION_LCAT) || (L->category == CONT_DEFINITION_LCAT)) { - @; - @; + <>; + <>; } if (cc_sp > 0) Main::error_in_web(I"program ended with conditional compilation open", NULL); } -@ = +<>= match_results mr = Regexp::create_mr(); if ((Regexp::match(&mr, L->text, L" *#ifn*def %c+")) || (Regexp::match(&mr, L->text, L" *#IFN*DEF %c+"))) { @@ -217,27 +218,27 @@ void CLike::parse_functions(programming_language *self, web *W) { } @ So, then, we recognise a C function as being a line which takes the form -= (text) + type identifier(args... -= -where we parse |type| only minimally. In InC (only), the identifier can -contain namespace dividers written |::|. Function declarations, we will assume, + +where we parse [[type]] only minimally. In InC (only), the identifier can +contain namespace dividers written [[::]]. Function declarations, we will assume, always begin on column 1 of their source files, and we expect them to take modern ANSI C style, not the long-deprecated late 1970s C style. -@ = +<>= if (!(Characters::is_space_or_tab(Str::get_first_char(L->text)))) { TEMPORARY_TEXT(qualifiers) TEMPORARY_TEXT(modified) Str::copy(modified, L->text); - @; + <>; match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, modified, L"(%i+) (%**)(%i+)%((%c*)")) { TEMPORARY_TEXT(ftype) Str::copy(ftype, mr.exp[0]); TEMPORARY_TEXT(asts) Str::copy(asts, mr.exp[1]); TEMPORARY_TEXT(fname) Str::copy(fname, mr.exp[2]); TEMPORARY_TEXT(arguments) Str::copy(arguments, mr.exp[3]); - @; + <>; DISCARD_TEXT(ftype) DISCARD_TEXT(asts) DISCARD_TEXT(fname) @@ -250,9 +251,9 @@ modern ANSI C style, not the long-deprecated late 1970s C style. @ C has a whole soup of reserved words applying to types, but most of them can't apply to the return type of a function. We do, however, iterate so that -forms like |static long long int| will work. +forms like [[static long long int]] will work. -@ = +<>= wchar_t *modifier_patterns[] = { L"(signed )(%C%c*)", L"(unsigned )(%C%c*)", L"(short )(%C%c*)", L"(long )(%C%c*)", L"(static )(%C%c*)", NULL }; @@ -269,8 +270,8 @@ forms like |static long long int| will work. Regexp::dispose_of(&mr); } -@ = - @; +<>= + <>; language_function *fn = Functions::new_function(fname, L); fn->function_arguments = Str::duplicate(arguments); WRITE_TO(fn->function_type, "%S%S %S", qualifiers, ftype, asts); @@ -279,17 +280,18 @@ forms like |static long long int| will work. for (int i=0; iwithin_conditionals[i] = cc_stack[i]; @ In some cases the function's declaration runs over several lines: -= (text as code) + void World::Subjects::make_adj_const_domain(inference_subject *infs,| instance *nc, property *prn) {| -= -Having read the first line, |arguments| would contain |inference_subject *infs,| + +Having read the first line, [[arguments]] would contain [[inference_subject *infs,]] and would thus be incomplete. We continue across subsequent lines until we -reach an open brace |{|. +reach an open brace [[{]]. -@d MAX_ARG_LINES 32 /* maximum number of lines over which a function's header can extend */ +<<*>>= +#define MAX_ARG_LINES 32 /* maximum number of lines over which a function's header can extend */ -@ = +<>= source_line *AL = L; int arg_lc = 1; while ((AL) && (arg_lc <= MAX_ARG_LINES) && (Regexp::find_open_brace(arguments) == -1)) { @@ -307,12 +309,12 @@ reach an open brace |{|. int n = Regexp::find_open_brace(arguments); if (n >= 0) Str::truncate(arguments, n); -@h Subcategorisation. +@ \section{Subcategorisation.} The following is called after the parser gives every line in the web a category; we can, if we wish, change that for a more exotic one. We simply -look for a |#include| of one of the ANSI C standard libraries. +look for a [[#include]] of one of the ANSI C standard libraries. -= +<<*>>= void CLike::subcategorise_code(programming_language *self, source_line *L) { match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, L->text, L"#include <(%C+)>%c*")) { @@ -330,18 +332,18 @@ void CLike::subcategorise_code(programming_language *self, source_line *L) { Regexp::dispose_of(&mr); } -@h Tangling extras. +@ \section{Tangling extras.} "Additional early matter" is used for the inclusions of the ANSI library files. We need to do that early, because otherwise types declared in them -(such as |FILE|) won't exist in time for the structure definitions we will +(such as [[FILE]]) won't exist in time for the structure definitions we will be tangling next. -It might seem reasonable to move all |#include| files up front this way, +It might seem reasonable to move all [[#include]] files up front this way, not just the ANSI ones. But that would defeat any conditional compilation around the inclusions; which Inform (for instance) needs in order to make platform-specific details to handle directories without POSIX in Windows. -= +<<*>>= void CLike::additional_early_matter(programming_language *self, text_stream *OUT, web *W, tangle_target *target) { chapter *C; section *S; @@ -354,23 +356,23 @@ void CLike::additional_early_matter(programming_language *self, text_stream *OUT } } -@h Tangling predeclarations. +@ \section{Tangling predeclarations.} This is where a language gets the chance to tangle predeclarations, early on in the file. We use it first for the structures, and then the functions -- in that order since the function types likely involve the typedef names for the structures. -= +<<*>>= void CLike::additional_predeclarations(programming_language *self, text_stream *OUT, web *W) { - @; - @; - @; + <>; + <>; + <>; } @ A "simple typedef" here means one that is aliasing something other than -a structure: for example |typedef unsigned int uint;| would be a simple typedef. +a structure: for example [[typedef unsigned int uint;]] would be a simple typedef. -@ = +<>= chapter *C; section *S; LOOP_WITHIN_TANGLE(C, S, Tangler::primary_target(W)) @@ -385,11 +387,11 @@ a structure: for example |typedef unsigned int uint;| would be a simple typedef. precede outer, but we need to be careful to be terminating if the source code we're given is not well founded because of an error by its programmer: for example, that structure A contains B contains C contains A. We do this -with the |tangled| flag, which is |FALSE| if a structure hasn't been -started yet, |NOT_APPLICABLE| if it's in progress, and |TRUE| if it's +with the [[tangled]] flag, which is [[FALSE]] if a structure hasn't been +started yet, [[NOT_APPLICABLE]] if it's in progress, and [[TRUE]] if it's finished. -@ = +<>= language_type *str; LOOP_OVER_LINKED_LIST(str, language_type, W->language_types) str->tangled = FALSE; @@ -398,7 +400,7 @@ finished. @ Using the following recursion, which is therefore terminating: -= +<<*>>= void CLike::tangle_structure(OUTPUT_STREAM, programming_language *self, language_type *str) { if (str->tangled != FALSE) return; str->tangled = NOT_APPLICABLE; @@ -417,12 +419,12 @@ void CLike::tangle_structure(OUTPUT_STREAM, programming_language *self, language } @ Functions are rather easier to deal with. In general, if a function was -defined within some number of nested |#ifdef| or |#ifndef| directives, then +defined within some number of nested [[#ifdef]] or [[#ifndef]] directives, then we reproduce those around the predeclaration: except, as a special trick, if the line contains a particular comment. For example: -= (text) + #ifdef SOLARIS /* inweb: always predeclare */ -= + That exempts any functions inside this condition from meeting the condition in order to be predeclared. It's a trick used in the foundation module just a couple of times: the idea is that although a definition of the functions @@ -431,7 +433,7 @@ provide alternative function definitions which would work without SOLARIS. The functions therefore need predeclaration regardless, because they will exist either way. -@ = +<>= chapter *C; section *S; LOOP_WITHIN_TANGLE(C, S, Tangler::primary_target(W)) @@ -467,6 +469,6 @@ exist either way. } } -@h Overriding regular code weaving. +@ \section{Overriding regular code weaving.} We have the opportunity here to sidestep the regular weaving algorithm, and do our own thing. We decline. diff --git a/Chapter_4/InC Support.w b/Chapter_4/InC_Support.nw similarity index 79% rename from Chapter_4/InC Support.w rename to Chapter_4/InC_Support.nw index f15f7bf..48e3712 100644 --- a/Chapter_4/InC Support.w +++ b/Chapter_4/InC_Support.nw @@ -2,11 +2,11 @@ To support a modest extension of C called InC. -@h Creation. +@ \section{Creation.} As can be seen, InC is a basically C-like language, but in addition to having all of those methods, it has a whole lot more of its own. -= +<<*>>= void InCSupport::add_features(programming_language *pl) { METHOD_ADD(pl, FURTHER_PARSING_PAR_MTID, InCSupport::further_parsing); @@ -29,13 +29,13 @@ void InCSupport::add_features(programming_language *pl) { @ We will apply this special tag wherever Preform grammar is defined: -= +<<*>>= theme_tag *Preform_theme = NULL; -@h Parsing methods. +@ \section{Parsing methods.} We only provide one parsing method, but it's a big one: -= +<<*>>= preform_nonterminal *alphabetical_list_of_nonterminals = NULL; void InCSupport::further_parsing(programming_language *self, web *W) { @@ -43,46 +43,47 @@ void InCSupport::further_parsing(programming_language *self, web *W) { section *S; LOOP_WITHIN_TANGLE(C, S, Tangler::primary_target(W)) if ((L->category == CODE_BODY_LCAT) || (L->category == CONT_DEFINITION_LCAT)) { - @; - @ + <>; + <> } } -@h Parsing Preform grammar. +@ \section{Parsing Preform grammar.} This is where we look for declarations of nonterminals. Very little about the following code will make sense unless you've first read the Preform -section of the |words| module, which is what we're supporting, and seen +section of the [[words]] module, which is what we're supporting, and seen some examples of Preform being used in the Inform source code. -In parsing, we categorise the opening lines |PREFORM_LCAT|. Subsequent lines -of grammar are |PREFORM_GRAMMAR_LCAT|; but the lines of InC code inside an -|internal| definition remain just plain |CODE_BODY_LCAT| lines. +In parsing, we categorise the opening lines [[PREFORM_LCAT]]. Subsequent lines +of grammar are [[PREFORM_GRAMMAR_LCAT]]; but the lines of InC code inside an +[[internal]] definition remain just plain [[CODE_BODY_LCAT]] lines. -@d NOT_A_NONTERMINAL -4 -@d A_FLEXIBLE_NONTERMINAL -3 -@d A_VORACIOUS_NONTERMINAL -2 -@d A_GRAMMAR_NONTERMINAL -1 +<<*>>= +#define NOT_A_NONTERMINAL -4 +#define A_FLEXIBLE_NONTERMINAL -3 +#define A_VORACIOUS_NONTERMINAL -2 +#define A_GRAMMAR_NONTERMINAL -1 -@ = +<>= int form = NOT_A_NONTERMINAL; /* one of the four values above, or a non-negative word count */ TEMPORARY_TEXT(pntname) TEMPORARY_TEXT(header) - @; - if (form != NOT_A_NONTERMINAL) @; + <>; + if (form != NOT_A_NONTERMINAL) <>; DISCARD_TEXT(pntname) DISCARD_TEXT(header) -@ The keyword |internal| can be followed by an indication of the number +@ The keyword [[internal]] can be followed by an indication of the number of words the nonterminal will match: usually a decimal non-negative number, -but optionally a question mark |?| to indicate voracity. +but optionally a question mark [[?]] to indicate voracity. -@ = +<>= match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, L->text, L"(<%p+>) ::=%c*")) { form = A_GRAMMAR_NONTERMINAL; Str::copy(pntname, mr.exp[0]); Str::copy(header, mr.exp[0]); - @; + <>; } else if (Regexp::match(&mr, L->text, L"((<%p+>) internal %?) {%c*")) { form = A_VORACIOUS_NONTERMINAL; Str::copy(pntname, mr.exp[1]); @@ -101,16 +102,16 @@ but optionally a question mark |?| to indicate voracity. @ Each Preform nonterminal defined in the tangle will cause one of these structures to be created: -= +<<*>>= typedef struct preform_nonterminal { - struct text_stream *nt_name; /* e.g., || */ - struct text_stream *unangled_name; /* e.g., |action-clause| */ - struct text_stream *as_C_identifier; /* e.g., |action_clause_NTM| */ + struct text_stream *nt_name; /* e.g., [[]] */ + struct text_stream *unangled_name; /* e.g., [[action-clause]] */ + struct text_stream *as_C_identifier; /* e.g., [[action_clause_NTM]] */ int as_function; /* defined internally, that is, parsed by a C language_function */ int voracious; /* a voracious nonterminal: see "The English Syntax of Inform" */ int min_word_count; /* for internals only */ int max_word_count; - int takes_pointer_result; /* right-hand formula defines |*XP|, not |*X| */ + int takes_pointer_result; /* right-hand formula defines [[*XP|, not |*X]] */ struct source_line *where_defined; struct preform_nonterminal *next_pnt_alphabetically; CLASS_DEFINITION @@ -118,21 +119,21 @@ typedef struct preform_nonterminal { @ We will -@ = +<>= preform_nonterminal *pnt = CREATE(preform_nonterminal); pnt->where_defined = L; pnt->nt_name = Str::duplicate(pntname); pnt->unangled_name = Str::duplicate(pntname); pnt->as_C_identifier = Str::duplicate(pntname); pnt->next_pnt_alphabetically = NULL; - @; - @; - @; + <>; + <>; + <>; - @; - @; + <>; + <>; -@ = +<>= match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, pntname, L"%<(%c*)%>")) pnt->unangled_name = Str::duplicate(mr.exp[0]); Regexp::dispose_of(&mr); @@ -142,7 +143,7 @@ will be represented by a pointer to a unique data structure for it. Inweb automatically compiles code to create these pointers; and here's how it works out their names. -@ = +<>= Str::delete_first_character(pnt->as_C_identifier); LOOP_THROUGH_TEXT(pos, pnt->as_C_identifier) { if (Str::get(pos) == '-') Str::put(pos, '_'); @@ -155,9 +156,10 @@ de Scudéry, published around 1650, runs to 1,954,300 words. If you can write an Inform source text 500 times longer than that, then you may need to raise the following definition: -@d INFINITE_WORD_COUNT 1000000000 +<<*>>= +#define INFINITE_WORD_COUNT 1000000000 -@ = +<>= pnt->voracious = FALSE; if (form == A_VORACIOUS_NONTERMINAL) pnt->voracious = TRUE; pnt->as_function = TRUE; if (form == A_GRAMMAR_NONTERMINAL) pnt->as_function = FALSE; @@ -174,7 +176,7 @@ the following definition: pnt->min_word_count = min; pnt->max_word_count = max; -@ = +<>= if (alphabetical_list_of_nonterminals == NULL) alphabetical_list_of_nonterminals = pnt; else { int placed = FALSE; @@ -197,24 +199,24 @@ the following definition: if (placed == FALSE) last->next_pnt_alphabetically = pnt; } -@ = +<>= L->preform_nonterminal_defined = pnt; if (Preform_theme) Tags::add_to_paragraph(L->owning_paragraph, Preform_theme, NULL); L->category = PREFORM_LCAT; L->text_operand = Str::duplicate(header); -@h Parsing the body of Preform grammar. -After a line like | ::=|, Preform grammar follows on subsequent +@ \section{Parsing the body of Preform grammar.} +After a line like [[ ::=]], Preform grammar follows on subsequent lines until we hit the end of the paragraph, or a white-space line, whichever -comes first. Each line of grammar is categorised |PREFORM_GRAMMAR_LCAT|. +comes first. Each line of grammar is categorised [[PREFORM_GRAMMAR_LCAT]]. If we have a line with an arrow, like so: -= (text) - porcupine tree ==> { 2, - }{} -= -then the text on the left goes into |text_operand| and the right into -|text_operand2|, with the arrow itself (and white space around it) cut out. -@ = + porcupine tree ==> { 2, - }{} + +then the text on the left goes into [[text_operand]] and the right into +[[text_operand2]], with the arrow itself (and white space around it) cut out. + +<>= Tags::add_by_name(L->owning_paragraph, I"Preform"); source_line *AL; for (AL = L; (AL) && (AL->category == CODE_BODY_LCAT); AL = AL->next_line) { @@ -228,29 +230,29 @@ then the text on the left goes into |text_operand| and the right into AL->text_operand = AL->text; AL->text_operand2 = Str::new(); } - @; - @; + <>; + <>; Regexp::dispose_of(&mr); } @ In case we have a comment at the end of the grammar, like this: -= (text) + porcupine tree /* what happens now? */ -= + we want to remove it. The regular expression here isn't terribly legible, but trust me, it's correct. -@ = +<>= match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, AL->text_operand, L"(%c*)%/%*%c*%*%/ *")) AL->text_operand = Str::duplicate(mr.exp[0]); Regexp::dispose_of(&mr); @ Note that nonterminal variables are, by default, integers. If their names -are divided internally with a colon, however, as |<>|, then -they have the type |structure *|. +are divided internally with a colon, however, as [[<>]], then +they have the type [[structure *]]. -@ = +<>= TEMPORARY_TEXT(to_scan) Str::copy(to_scan, AL->text_operand2); match_results mr = Regexp::create_mr(); while (Regexp::match(&mr, to_scan, L"%c*?<<(%P+?)>> =(%c*)")) { @@ -265,7 +267,7 @@ they have the type |structure *|. LOOP_OVER(ntv, nonterminal_variable) if (Str::eq(ntv->ntv_name, var_given)) break; - if (ntv == NULL) @; + if (ntv == NULL) <>; DISCARD_TEXT(var_given) DISCARD_TEXT(type_given) } @@ -273,19 +275,19 @@ they have the type |structure *|. Regexp::dispose_of(&mr); @ Nonterminal variables are actually just global C variables, and their C -identifiers need to avoid hyphens and colons. For example, |<>| -has identifier |"kind_ref_NTMV"|. Each one is recorded in a structure thus: +identifiers need to avoid hyphens and colons. For example, [[<>]] +has identifier [["kind_ref_NTMV"]]. Each one is recorded in a structure thus: -= +<<*>>= typedef struct nonterminal_variable { - struct text_stream *ntv_name; /* e.g., |"num"| */ - struct text_stream *ntv_type; /* e.g., |"int"| */ - struct text_stream *ntv_identifier; /* e.g., |"num_NTMV"| */ + struct text_stream *ntv_name; /* e.g., [["num"]] */ + struct text_stream *ntv_type; /* e.g., [["int"]] */ + struct text_stream *ntv_identifier; /* e.g., [["num_NTMV"]] */ struct source_line *first_mention; /* first usage */ CLASS_DEFINITION } nonterminal_variable; -@ = +<>= ntv = CREATE(nonterminal_variable); ntv->ntv_name = Str::duplicate(var_given); ntv->ntv_type = Str::duplicate(type_given); @@ -296,12 +298,12 @@ typedef struct nonterminal_variable { WRITE_TO(ntv->ntv_identifier, "%S_NTMV", var_given); ntv->first_mention = AL; -@h Parsing I-literals. +@ \section{Parsing I-literals.} A simpler but useful further addition to C is that we recognise a new form -of string literal: |I"quartz"| makes a constant text stream with the content +of string literal: [[I"quartz"]] makes a constant text stream with the content "quartz". -@ = +<>= for (int i = 0, quoted = FALSE; i < Str::len(L->text); i++) { if (Str::get_at(L->text, i) == '"') if ((Str::get_at(L->text, i-1) != '\\') && @@ -309,10 +311,10 @@ of string literal: |I"quartz"| makes a constant text stream with the content quoted = quoted?FALSE:TRUE; if ((fundamental_mode != WEAVE_MODE) && (quoted == FALSE) && (Str::get_at(L->text, i) == 'I') && (Str::get_at(L->text, i+1) == '"')) - @; + <>; } -@ = +<>= TEMPORARY_TEXT(lit) int i_was = i; int ended = FALSE; @@ -321,14 +323,14 @@ of string literal: |I"quartz"| makes a constant text stream with the content if (Str::get_at(L->text, i) == '"') { ended = TRUE; break; } PUT_TO(lit, Str::get_at(L->text, i++)); } - if (ended) @; + if (ended) <>; DISCARD_TEXT(lit) @ Each I-literal results in an instance of the following being created. The -I-literal |I"quartz"| would have content |quartz| and identifier something -like |TL_IS_123|. +I-literal [[I"quartz"]] would have content [[quartz]] and identifier something +like [[TL_IS_123]]. -= +<<*>>= typedef struct text_literal { struct text_stream *tl_identifier; struct text_stream *tl_content; @@ -336,14 +338,14 @@ typedef struct text_literal { } text_literal; @ So suppose we've got a line of web such as -= (text) + text_stream *T = I"quartz"; -= + We create the necessary I-literal, and splice the line so that it now reads -|text_stream *T = TL_IS_123;|. (That's why we don't call any of this on a +[[text_stream *T = TL_IS_123;]]. (That's why we don't call any of this on a weave run; we're actually amending the code of the web.) -@ = +<>= text_literal *tl = CREATE(text_literal); tl->tl_identifier = Str::new(); WRITE_TO(tl->tl_identifier, "TL_IS_%d", tl->allocation_id); @@ -360,11 +362,11 @@ weave run; we're actually amending the code of the web.) DISCARD_TEXT(before) DISCARD_TEXT(after) -@h Tangling methods. -Suppress the expansion of macros occurring on a line introduced by a |//| +@ \section{Tangling methods.} +Suppress the expansion of macros occurring on a line introduced by a [[//]] comment. (This avoids problems when tangling code that's been commented out.) -= +<<*>>= int InCSupport::suppress_expansion(programming_language *self, text_stream *material) { if ((Str::get_at(material, 0) == '/') && (Str::get_at(material, 1) == '/')) return TRUE; @@ -372,23 +374,23 @@ int InCSupport::suppress_expansion(programming_language *self, text_stream *mate } @ InC does three things which C doesn't: it allows the namespaced function -names like |Section::function()|; it allows Foundation-class-style string -literals marked with an I, |I"like this"|, which we will call I-literals; +names like [[Section::function()]]; it allows Foundation-class-style string +literals marked with an I, [[I"like this"]], which we will call I-literals; and it allows Preform natural language grammar to be mixed in with code. The following routine is a hook needed for two of these. It recognises two special tangling commands: -(a) |[[nonterminals]]| tangles to code which initialises the Preform +(a) [[[[nonterminals]]]] tangles to code which initialises the Preform grammar. (The grammar defines the meaning of nonterminals such as -||. They're not terminal in the sense that they are defined +[[]]. They're not terminal in the sense that they are defined as combinations of other things.) In practice, this needs to appear once in any program using Preform. For the Inform project, that's done in the -|words| module of the Inform 7 compiler. +[[words]] module of the Inform 7 compiler. -(b) |[[textliterals]]| tangles to code which initialises the I-literals. +(b) [[[[textliterals]]]] tangles to code which initialises the I-literals. -= +<<*>>= int InCSupport::special_tangle_command(programming_language *me, OUTPUT_STREAM, text_stream *data) { if (Str::eq_wide_string(data, L"nonterminals")) { WRITE("register_tangled_nonterminals();\n"); @@ -404,15 +406,15 @@ int InCSupport::special_tangle_command(programming_language *me, OUTPUT_STREAM, @ Time to predeclare things. InC is going to create a special function, right at the end of the code, which "registers" the nonterminals, creating their run-time data structures; we must predeclare this function. It will set values -for the pointers |action_clause_NTM|, and so on; these are global variables, -which we initially declare as |NULL|. +for the pointers [[action_clause_NTM]], and so on; these are global variables, +which we initially declare as [[NULL]]. -We also declare the nonterminal variables like |kind_ref_NTMV|, initialising -all integers to zero and all pointers to |NULL|. +We also declare the nonterminal variables like [[kind_ref_NTMV]], initialising +all integers to zero and all pointers to [[NULL]]. We do something similar, but simpler, to declare text stream constants. -= +<<*>>= void InCSupport::additional_predeclarations(programming_language *self, text_stream *OUT, web *W) { chapter *C; section *S; @@ -441,7 +443,7 @@ void InCSupport::additional_predeclarations(programming_language *self, text_str @ And here are the promised routines, which appear at the very end of the code. They make use of macros and data structures defined in the Inform 7 web. -= +<<*>>= void InCSupport::gnabehs(programming_language *self, text_stream *OUT, web *W) { WRITE("void register_tangled_nonterminals(void) {\n"); chapter *C; @@ -472,10 +474,10 @@ void InCSupport::gnabehs(programming_language *self, text_stream *OUT, web *W) { @ That's it for big structural additions to the tangled C code. Now we turn to how to tangle the lines we've given special categories to. -We need to tangle |PREFORM_LCAT| lines (those holding nonterminal declarations) +We need to tangle [[PREFORM_LCAT]] lines (those holding nonterminal declarations) in a special way... -= +<<*>>= int InCSupport::will_insert_in_tangle(programming_language *self, source_line *L) { if (L->category == PREFORM_LCAT) return TRUE; return FALSE; @@ -483,20 +485,20 @@ int InCSupport::will_insert_in_tangle(programming_language *self, source_line *L @ ...and this is how. As can be seen, each nonterminal turns into a C function. In the case of an internal definition, like -= (text) + internal { -= + we tangle this opening line to -= (text as code) + int k_kind_for_template_NTM(wording W, int *X, void **XP) { -= -that is, to a function which returns |TRUE| if it makes a match on the text -excerpt in Inform's source text, |FALSE| otherwise; if it matches and produces -an integer and/or pointer result, these are copied into |*X| and |*XP|. The + +that is, to a function which returns [[TRUE]] if it makes a match on the text +excerpt in Inform's source text, [[FALSE]] otherwise; if it matches and produces +an integer and/or pointer result, these are copied into [[*X]] and [[*XP]]. The remaining lines of the function are tangled unaltered, i.e., following the same rules as for the body of any other C function. -= +<<*>>= void InCSupport::insert_in_tangle(programming_language *self, text_stream *OUT, source_line *L) { preform_nonterminal *pnt = L->preform_nonterminal_defined; if (pnt->as_function) { @@ -505,73 +507,73 @@ void InCSupport::insert_in_tangle(programming_language *self, text_stream *OUT, } else { WRITE("int %SC(int *X, void **XP, int *R, void **RP, wording *FW, wording W) {\n", pnt->as_C_identifier); - @; + <>; WRITE("}\n"); } } @ On the other hand, a grammar nonterminal tangles to a "compositor function". Thus the opening line -= (text) + ::= -= + tangles to a function header: -= (text as code) + int action_clause_NTMC(int *X, void **XP, int *R, void **RP, wording *FW, wording W) { -= -Subsequent lines of the nonterminal are categorised |PREFORM_GRAMMAR_LCAT| + +Subsequent lines of the nonterminal are categorised [[PREFORM_GRAMMAR_LCAT]] and thus won't tangle to code at all, by the usual rules; so we tangle from them directly here. Composition is what happens after a successful match of the text in the -word range |W|. The idea is that, especially if the pattern was +word range [[W]]. The idea is that, especially if the pattern was complicated, we will need to "compose" the results of parsing individual pieces of it into a result for the whole. These partial results can be found -in the arrays |R[n]| and |RP[n]| passed as parameters; recall that every +in the arrays [[R[n]]] and [[RP[n]]] passed as parameters; recall that every nonterminal has in principle both an integer and a pointer result, though often one or both is undefined. A simple example would be -= (text) + + ==> R[1] + R[2] -= + where the composition function would be called on a match of, say, "$5 + 7$", -and would find the values 5 and 7 in |R[1]| and |R[2]| respectively. It would -then add these together, store 12 in |*X|, and return |TRUE| to show that all +and would find the values 5 and 7 in [[R[1]]] and [[R[2]]] respectively. It would +then add these together, store 12 in [[*X]], and return [[TRUE]] to show that all was well. A more typical example, drawn from the actual Inform 7 web, is: -= (text) + ==> { - , Kinds::var_construction(R[2], RP[1]) } -= + which says that the composite result -- the right-hand formula -- is formed by calling a particular routine on the integer result of subexpression 2 -(||) and the pointer result of subexpression 1 -(||). The answer, the composite result, that is, must be -placed in |*X| and |*XP|. (Composition functions are also allowed to -invalidate the result, by returning |FALSE|, and have other tricks up their +([[]]) and the pointer result of subexpression 1 +([[]]). The answer, the composite result, that is, must be +placed in [[*X]] and [[*XP]]. (Composition functions are also allowed to +invalidate the result, by returning [[FALSE]], and have other tricks up their sleeves, but none of that is handled by Inweb: see the Inform 7 web for more on this.) -@ = +<>= int needs_collation = FALSE; for (source_line *AL = L->next_line; ((AL) && (AL->category == PREFORM_GRAMMAR_LCAT)); AL = AL->next_line) if (Str::len(AL->text_operand2) > 0) needs_collation = TRUE; - if (needs_collation) @ - else @; + if (needs_collation) <> + else <>; WRITE("\treturn TRUE;\n"); -@ In the absence of any |==>| formulae, we simply set |*X| to the default +@ In the absence of any [[==>]] formulae, we simply set [[*X]] to the default result supplied; this is the production number within the grammar (0 for the first line, 1 for the second, and so on) by default, with an undefined pointer. -@ = +<>= WRITE("\t*X = R[0];\n"); -@ = +<>= WRITE("\tswitch(R[0]) {\n"); int c = 0; for (source_line *AL = L->next_line; @@ -581,7 +583,7 @@ first line, 1 for the second, and so on) by default, with an undefined pointer. if (Str::len(formula) > 0) { LanguageMethods::insert_line_marker(OUT, AL->owning_section->sect_language, AL); WRITE("\t\tcase %d: ", c); - @; + <>; WRITE(";\n"); WRITE("#pragma clang diagnostic push\n"); WRITE("#pragma clang diagnostic ignored \"-Wunreachable-code\"\n"); @@ -597,19 +599,19 @@ and that it produces an integer or a pointer according to what the non-terminal expects as its main result. But we make one exception: if the formula begins with a paragraph macro, then it can't be an expression, and instead we read it as code in a void context. (This code will, we -assume, set |*X| and/or |*XP| in some ingenious way of its own.) +assume, set [[*X]] and/or [[*XP]] in some ingenious way of its own.) -Within the body of the formula, we allow a pseudo-macro to work: |WR[n]| -expands to word range |n| in the match which we're compositing. This actually +Within the body of the formula, we allow a pseudo-macro to work: [[WR[n]]] +expands to word range [[n]] in the match which we're compositing. This actually expands like so: -= (text as code) + action_clause_NTM->range_result[n] -= + which saves a good deal of typing. (A regular C preprocessor macro couldn't easily do this, because it needs to include the identifier name of the nonterminal being parsed.) -@ = +<>= match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, formula, L"{ *(%c*?) *} *(%c*)")) { TEMPORARY_TEXT(rewritten) @@ -619,7 +621,7 @@ nonterminal being parsed.) InCSupport::expand_formula(OUT, AL, pnt, mr.exp[1], TRUE); DISCARD_TEXT(rewritten) } else { - if (!Regexp::match(&mr, formula, L"@<%c*")) { + if (!Regexp::match(&mr, formula, L"<<%c*")) { if (pnt->takes_pointer_result) WRITE("*XP = "); else WRITE("*X = "); } @@ -629,7 +631,7 @@ nonterminal being parsed.) @ -= +<<*>>= void InCSupport::expand_formula(text_stream *OUT, source_line *AL, preform_nonterminal *pnt, text_stream *formula, int full) { TEMPORARY_TEXT(expanded) @@ -657,7 +659,7 @@ void InCSupport::expand_formula(text_stream *OUT, source_line *AL, preform_nonte @ Going down from line level to the tangling of little excerpts of C code, we also provide for some other special extensions to C. -= +<<*>>= int InCSupport::tangle_line(programming_language *self, text_stream *OUT, text_stream *original) { InCSupport::tangle_line_inner(OUT, NULL, NULL, original); return TRUE; @@ -666,13 +668,13 @@ int InCSupport::tangle_line(programming_language *self, text_stream *OUT, text_s void InCSupport::tangle_line_inner(text_stream *OUT, source_line *AL, preform_nonterminal *pnt, text_stream *original) { int fcall_pos = -1; for (int i = 0; i < Str::len(original); i++) { - @; - @; + <>; + <>; if (Str::get_at(original, i) == '<') { if (Str::get_at(original, i+1) == '<') { - @; + <>; } else { - @; + <>; } } if (i == fcall_pos) { @@ -683,35 +685,36 @@ void InCSupport::tangle_line_inner(text_stream *OUT, source_line *AL, preform_no } } -@ For example, a function name like |Text::Parsing::get_next| must be rewritten -as |Text__Parsing__get_next| since colons aren't valid in C identifiers. The +@ For example, a function name like [[Text::Parsing::get_next]] must be rewritten +as [[Text__Parsing__get_next]] since colons aren't valid in C identifiers. The following is prone to all kinds of misreadings, of course; it picks up any use -of |::| between an alphanumberic character and a letter. In particular, code +of [[::]] between an alphanumberic character and a letter. In particular, code like -= (text) + printf("Trying Text::Parsing::get_next now.\n"); -= + will be rewritten as -= (text as code) + printf("Trying Text__Parsing__get_next now.\n"); -= + This is probably unwanted, but it doesn't matter, because these Inform-only extension features of Inweb aren't intended for general use: only for Inform, where no misreadings occur. -@ = +<>= if ((i > 0) && (Str::get_at(original, i) == ':') && (Str::get_at(original, i+1) == ':') && (isalpha(Str::get_at(original, i+2))) && (isalnum(Str::get_at(original, i-1)))) { WRITE("__"); i++; continue; } -@ For example, |==> { A, B }| assigns the expressions A and B as the results +@ For example, [[==> { A, B }]] assigns the expressions A and B as the results of parsing a Preform nonterminal. -@d MAX_PREFORM_RESULT_CLAUSES 10 +<<*>>= +#define MAX_PREFORM_RESULT_CLAUSES 10 -@ = +<>= if ((Str::get_at(original, i) == '=') && (Str::get_at(original, i+1) == '=') && (Str::get_at(original, i+2) == '>') && @@ -719,11 +722,11 @@ of parsing a Preform nonterminal. (Str::get_at(original, i+4) == '{')) { int clauses, err = FALSE; text_stream *clause[MAX_PREFORM_RESULT_CLAUSES]; - @; + <>; TEMPORARY_TEXT(extra) - if (clauses == 1) @; + if (clauses == 1) <>; if (clauses < 2) err = TRUE; - if (err == FALSE) @; + if (err == FALSE) <>; if (err) { Main::error_in_web(I"malformed '{ , }' formula", AL); if (AL == NULL) WRITE_TO(STDERR, "%S\n", original); @@ -734,7 +737,7 @@ of parsing a Preform nonterminal. @ The clauses are a comma-separated list inside the braces, except that the commas need to be outside of any parentheses. -@ = +<>= clauses = 1; clause[0] = Str::new(); int bl = 0; @@ -761,7 +764,7 @@ commas need to be outside of any parentheses. are implemented by rewriting them in two clauses, and sometimes adding some extra code to execute after the assignments. -@ = +<>= if (Str::eq(clause[0], I"fail")) { clause[1] = Str::new(); clauses = 2; WRITE_TO(extra, "return FAIL_NONTERMINAL;"); @@ -808,10 +811,10 @@ extra code to execute after the assignments. for the current nonterminal; any subsequent clauses must specify which variable is to be set. A dash means make no assignment. -For example, |{ R[1], - , <> = R[2] }| sets |*X| to |R[1]|, does not -alter |*XP|, and sets |<>| to |R[2]|. +For example, [[{ R[1], - , <> = R[2] }]] sets [[*X]] to [[R[1]]], does not +alter [[*XP]], and sets [[<>]] to [[R[2]]]. -@ = +<>= for (int c=0; c>| to |R[2]|. } @ Angle brackets around a valid Preform variable name expand into its -C identifier; for example, |<>| becomes |most_recent_result|. -We take no action if it's not a valid name, so |<>| becomes -just |<>|. +C identifier; for example, [[<>]] becomes [[most_recent_result]]. +We take no action if it's not a valid name, so [[<>]] becomes +just [[<>]]. -@ = +<>= match_results mr = Regexp::create_mr(); TEMPORARY_TEXT(check_this) Str::substr(check_this, Str::at(original, i), Str::end(original)); @@ -863,22 +866,22 @@ just |<>|. DISCARD_TEXT(check_this) Regexp::dispose_of(&mr); -@ Similarly for nonterminals; || might become |k_kind_NTM|. +@ Similarly for nonterminals; [[]] might become [[k_kind_NTM]]. Here, though, there's a complication: -= (text) + if ((W)) { ... -= + must expand to: -= (text as code) + if (Text__Languages__parse_nt_against_word_range(k_kind_NTM, W, NULL, NULL)) { ... -= + This is all syntactic sugar to make it easier to see parsing in action. -Anyway, it means we have to set |fcall_pos| to remember to add in the -two |NULL| arguments when we hit the |)| a little later. We're doing all +Anyway, it means we have to set [[fcall_pos]] to remember to add in the +two [[NULL]] arguments when we hit the [[)]] a little later. We're doing all of this fairly laxly, but as before: it only needs to work for Inform, and Inform doesn't cause any trouble. -@ = +<>= match_results mr = Regexp::create_mr(); TEMPORARY_TEXT(check_this) Str::substr(check_this, Str::at(original, i), Str::end(original)); @@ -912,7 +915,7 @@ name. They're not very efficient, but experience shows that even on a web the size of Inform 7, there's no significant gain from speeding them up (with, say, a hash table). -= +<<*>>= preform_nonterminal *InCSupport::nonterminal_by_name(text_stream *name) { preform_nonterminal *pnt; LOOP_OVER(pnt, preform_nonterminal) @@ -921,12 +924,12 @@ preform_nonterminal *InCSupport::nonterminal_by_name(text_stream *name) { return NULL; } -@ The special variables |<>| and |<>| hold the results, +@ The special variables [[<>]] and [[<>]] hold the results, integer and pointer, for the most recent successful match. They're defined in the Inform 7 web (see the code for parsing text against Preform grammars), not by Inweb. -= +<<*>>= text_stream *InCSupport::nonterminal_variable_identifier(text_stream *name) { if (Str::eq_wide_string(name, L"r")) return I"most_recent_result"; if (Str::eq_wide_string(name, L"rp")) return I"most_recent_result_p"; @@ -944,10 +947,10 @@ simply thrown away. It doesn't appear anywhere in the C code tangled by Inweb. So what does happen to it? The answer is that it's transcribed into an -auxiliary file called |Syntax.preform|, which Inform, once it is compiled, +auxiliary file called [[Syntax.preform]], which Inform, once it is compiled, will read in at run-time. This is how that happens: -= +<<*>>= void InCSupport::additional_tangling(programming_language *self, web *W, tangle_target *target) { if (NUMBER_CREATED(preform_nonterminal) > 0) { pathname *P = Reader::tangled_folder(W); @@ -965,7 +968,7 @@ void InCSupport::additional_tangling(programming_language *self, web *W, tangle_ if (Bibliographic::data_exists(W->md, I"Preform Language")) WRITE("language %S\n", Bibliographic::get_datum(W->md, I"Preform Language")); - @; + <>; STREAM_CLOSE(OUT); } } @@ -976,13 +979,13 @@ right-hand side of the arrow in a grammar line uses a paragraph macro which mentions a problem message, then we transcribe a Preform comment to that effect. (This really is a comment: Inform ignores it, but it makes the file more comprehensible to human eyes.) For example, -= (text) -
kind ==> @ -= + +
kind ==> <> + (The code in this paragraph macro will indeed issue this problem message, we assume.) -@ = +<>= chapter *C; section *S; LOOP_WITHIN_TANGLE(C, S, target) @@ -1004,13 +1007,13 @@ assume.) } } -@h Weaving. +@ \section{Weaving.} The following isn't a method, but is called by the weaver directly. It adds additional endnotes to the woven form of a paragraph which includes Preform nonterminal definitions; it is meaningful only in the TeX format, and should probably be dropped. -= +<<*>>= void InCSupport::weave_grammar_index(OUTPUT_STREAM) { WRITE("\\raggedright\\tolerance=10000"); preform_nonterminal *pnt; @@ -1022,8 +1025,8 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) { (pnt->as_function)?" (internal)":"", pnt->where_defined->owning_section->md->sect_range); int said_something = FALSE; - @; - @; + <>; + <>; if (said_something == FALSE) WRITE("\\par\\hangindent=3em{\\it unused}\n\n"); } @@ -1032,7 +1035,7 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) { WRITE("\\hrule\\smallbreak\n"); } -@ = +<>= section *S; LOOP_OVER(S, section) S->scratch_flag = FALSE; hash_table_entry *hte = Analyser::find_hash_entry_for_section( @@ -1057,7 +1060,7 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) { WRITE("\n\n"); } -@ = +<>= section *S; LOOP_OVER(S, section) S->scratch_flag = FALSE; hash_table_entry *hte = Analyser::find_hash_entry_for_section( @@ -1082,11 +1085,11 @@ void InCSupport::weave_grammar_index(OUTPUT_STREAM) { WRITE("\n\n"); } -@h Weaving methods. +@ \section{Weaving methods.} If we're weaving just a document of Preform grammar, then we skip any lines -of C code which appear in |internal| nonterminal definitions: +of C code which appear in [[internal]] nonterminal definitions: -= +<<*>>= int skipping_internal = FALSE, preform_production_count = 0; int InCSupport::skip_in_weaving(programming_language *self, weave_order *wv, source_line *L) { @@ -1103,7 +1106,7 @@ int InCSupport::skip_in_weaving(programming_language *self, weave_order *wv, sou @ And here is the TeX code for displaying Preform grammar: -= +<<*>>= int InCSupport::weave_code_line(programming_language *self, text_stream *OUT, weave_order *wv, web *W, chapter *C, section *S, source_line *L, text_stream *matter, text_stream *concluding_comment) { @@ -1114,17 +1117,17 @@ int InCSupport::weave_code_line(programming_language *self, text_stream *OUT, } @ In paragraphs where we spot Preform nonterminals being defined, we're -going to automatically apply the tag |^"Preform"|, but only if it already +going to automatically apply the tag [[^"Preform"]], but only if it already exists. We watch for it here: -= +<<*>>= void InCSupport::new_tag_declared(programming_language *self, theme_tag *tag) { if (Str::eq_wide_string(tag->tag_name, L"Preform")) Preform_theme = tag; } -@h Analysis methods. +@ \section{Analysis methods.} -= +<<*>>= void InCSupport::analyse_code(programming_language *self, web *W) { preform_nonterminal *pnt; LOOP_OVER(pnt, preform_nonterminal) diff --git a/Chapter_4/Language Methods.w b/Chapter_4/Language_Methods.nw similarity index 84% rename from Chapter_4/Language Methods.w rename to Chapter_4/Language_Methods.nw index f70c1b4..f5cb07b 100644 --- a/Chapter_4/Language Methods.w +++ b/Chapter_4/Language_Methods.nw @@ -3,12 +3,12 @@ To characterise the relevant differences in behaviour between the various programming languages supported. -@h Introduction. +@ \section{Introduction.} The conventions for writing, weaving and tangling a web are really quite independent of the programming language being written, woven or tangled; Knuth began literate programming with Pascal, but now uses C, and the original Pascal webs were mechanically translated into C ones with remarkably little -fuss or bother. Modern LP tools, such as |noweb|, aim to be language-agnostic. +fuss or bother. Modern LP tools, such as [[noweb]], aim to be language-agnostic. But of course if you act the same on all languages, you give up the benefits which might follow from knowing something about the languages you actually write in. @@ -23,64 +23,69 @@ all of them made from this section. That means a lot of simple wrapper routines which don't do very much. This section may still be useful to read, since it documents what amounts to an API. -@h Parsing methods. +@ \section{Parsing methods.} We begin with parsing extensions. When these are used, we have already read the web into chapters, sections and paragraphs, but for some languages we will need a more detailed picture. -|PARSE_TYPES_PAR_MTID| gives a language to look for type declarations. +[[PARSE_TYPES_PAR_MTID]] gives a language to look for type declarations. -@e PARSE_TYPES_PAR_MTID +<<*>>= +enum PARSE_TYPES_PAR_MTID -= +<<*>>= VOID_METHOD_TYPE(PARSE_TYPES_PAR_MTID, programming_language *pl, web *W) void LanguageMethods::parse_types(web *W, programming_language *pl) { VOID_METHOD_CALL(pl, PARSE_TYPES_PAR_MTID, W); } -@ |PARSE_FUNCTIONS_PAR_MTID| is, similarly, for function declarations. +@ [[PARSE_FUNCTIONS_PAR_MTID]] is, similarly, for function declarations. -@e PARSE_FUNCTIONS_PAR_MTID +<<*>>= +enum PARSE_FUNCTIONS_PAR_MTID -= +<<*>>= VOID_METHOD_TYPE(PARSE_FUNCTIONS_PAR_MTID, programming_language *pl, web *W) void LanguageMethods::parse_functions(web *W, programming_language *pl) { VOID_METHOD_CALL(pl, PARSE_FUNCTIONS_PAR_MTID, W); } -@ |FURTHER_PARSING_PAR_MTID| is "further" in that it is called when the main +@ [[FURTHER_PARSING_PAR_MTID]] is "further" in that it is called when the main parser has finished work; it typically looks over the whole web for something of interest. -@e FURTHER_PARSING_PAR_MTID +<<*>>= +enum FURTHER_PARSING_PAR_MTID -= +<<*>>= VOID_METHOD_TYPE(FURTHER_PARSING_PAR_MTID, programming_language *pl, web *W) void LanguageMethods::further_parsing(web *W, programming_language *pl) { VOID_METHOD_CALL(pl, FURTHER_PARSING_PAR_MTID, W); } -@ |SUBCATEGORISE_LINE_PAR_MTID| looks at a single line, after the main parser +@ [[SUBCATEGORISE_LINE_PAR_MTID]] looks at a single line, after the main parser has given it a category. The idea is not so much to second-guess the parser (although we can) but to change to a more exotic category which it would otherwise never produce. -@e SUBCATEGORISE_LINE_PAR_MTID +<<*>>= +enum SUBCATEGORISE_LINE_PAR_MTID -= +<<*>>= VOID_METHOD_TYPE(SUBCATEGORISE_LINE_PAR_MTID, programming_language *pl, source_line *L) void LanguageMethods::subcategorise_line(programming_language *pl, source_line *L) { VOID_METHOD_CALL(pl, SUBCATEGORISE_LINE_PAR_MTID, L); } @ Comments have different syntax in different languages. The method here is -expected to look for a comment on the |line|, and if so to return |TRUE|, +expected to look for a comment on the [[line]], and if so to return [[TRUE]], but not before splicing the non-comment parts of the line before and within the comment into the supplied strings. -@e PARSE_COMMENT_TAN_MTID +<<*>>= +enum PARSE_COMMENT_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(PARSE_COMMENT_TAN_MTID, programming_language *pl, text_stream *line, text_stream *before, text_stream *within) int LanguageMethods::parse_comment(programming_language *pl, @@ -90,17 +95,18 @@ int LanguageMethods::parse_comment(programming_language *pl, return rv; } -@h Tangling methods. +@ \section{Tangling methods.} We take these roughly in order of their effects on the tangled output, from the top to the bottom of the file. The top of the tangled file is a header called the "shebang". By default, -there's nothing there, but |SHEBANG_TAN_MTID| allows the language to add one. -For example, Perl prints |#!/usr/bin/perl| here. +there's nothing there, but [[SHEBANG_TAN_MTID]] allows the language to add one. +For example, Perl prints [[#!/usr/bin/perl]] here. -@e SHEBANG_TAN_MTID +<<*>>= +enum SHEBANG_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(SHEBANG_TAN_MTID, programming_language *pl, text_stream *OUT, web *W, tangle_target *target) void LanguageMethods::shebang(OUTPUT_STREAM, programming_language *pl, web *W, tangle_target *target) { VOID_METHOD_CALL(pl, SHEBANG_TAN_MTID, OUT, W, target); @@ -109,9 +115,10 @@ void LanguageMethods::shebang(OUTPUT_STREAM, programming_language *pl, web *W, t @ Next is the disclaimer, text warning the human reader that she is looking at tangled (therefore not original) material. -@e SUPPRESS_DISCLAIMER_TAN_MTID +<<*>>= +enum SUPPRESS_DISCLAIMER_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(SUPPRESS_DISCLAIMER_TAN_MTID, programming_language *pl) void LanguageMethods::disclaimer(text_stream *OUT, programming_language *pl, web *W, tangle_target *target) { int rv = FALSE; @@ -123,25 +130,27 @@ void LanguageMethods::disclaimer(text_stream *OUT, programming_language *pl, web @ Next is the disclaimer, text warning the human reader that she is looking at tangled (therefore not original) material. -@e ADDITIONAL_EARLY_MATTER_TAN_MTID +<<*>>= +enum ADDITIONAL_EARLY_MATTER_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(ADDITIONAL_EARLY_MATTER_TAN_MTID, programming_language *pl, text_stream *OUT, web *W, tangle_target *target) void LanguageMethods::additional_early_matter(text_stream *OUT, programming_language *pl, web *W, tangle_target *target) { VOID_METHOD_CALL(pl, ADDITIONAL_EARLY_MATTER_TAN_MTID, OUT, W, target); } @ A tangled file then normally declares "definitions". The following write a -definition of the constant named |term| as the value given. If the value spans -multiple lines, the first-line part is supplied to |START_DEFN_TAN_MTID| and -then subsequent lines are fed in order to |PROLONG_DEFN_TAN_MTID|. At the end, -|END_DEFN_TAN_MTID| is called. +definition of the constant named [[term]] as the value given. If the value spans +multiple lines, the first-line part is supplied to [[START_DEFN_TAN_MTID]] and +then subsequent lines are fed in order to [[PROLONG_DEFN_TAN_MTID]]. At the end, +[[END_DEFN_TAN_MTID]] is called. -@e START_DEFN_TAN_MTID -@e PROLONG_DEFN_TAN_MTID -@e END_DEFN_TAN_MTID +<<*>>= +enum START_DEFN_TAN_MTID +enum PROLONG_DEFN_TAN_MTID +enum END_DEFN_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(START_DEFN_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *term, text_stream *start, section *S, source_line *L) INT_METHOD_TYPE(PROLONG_DEFN_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *more, section *S, source_line *L) INT_METHOD_TYPE(END_DEFN_TAN_MTID, programming_language *pl, text_stream *OUT, section *S, source_line *L) @@ -171,9 +180,10 @@ void LanguageMethods::end_definition(OUTPUT_STREAM, programming_language *pl, @ Then we have some "predeclarations"; for example, for C-like languages we automatically predeclare all functions, obviating the need for header files. -@e ADDITIONAL_PREDECLARATIONS_TAN_MTID +<<*>>= +enum ADDITIONAL_PREDECLARATIONS_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(ADDITIONAL_PREDECLARATIONS_TAN_MTID, programming_language *pl, text_stream *OUT, web *W) void LanguageMethods::additional_predeclarations(OUTPUT_STREAM, programming_language *pl, web *W) { VOID_METHOD_CALL(pl, ADDITIONAL_PREDECLARATIONS_TAN_MTID, OUT, W); @@ -184,9 +194,10 @@ the more routine matter, tangling ordinary paragraphs into code. Languages have the ability to suppress paragraph macro expansion: -@e SUPPRESS_EXPANSION_TAN_MTID +<<*>>= +enum SUPPRESS_EXPANSION_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(SUPPRESS_EXPANSION_TAN_MTID, programming_language *pl, text_stream *material) int LanguageMethods::allow_expansion(programming_language *pl, text_stream *material) { int rv = FALSE; @@ -195,12 +206,13 @@ int LanguageMethods::allow_expansion(programming_language *pl, text_stream *mate } @ Inweb supports very few "tangle commands", that is, instructions written -inside double squares |[[Thus]]|. These can be handled by attaching methods -as follows, which return |TRUE| if they recognised and acted on the command. +inside double squares [[[[Thus]]]]. These can be handled by attaching methods +as follows, which return [[TRUE]] if they recognised and acted on the command. -@e TANGLE_COMMAND_TAN_MTID +<<*>>= +enum TANGLE_COMMAND_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(TANGLE_COMMAND_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *data) int LanguageMethods::special_tangle_command(OUTPUT_STREAM, programming_language *pl, text_stream *data) { @@ -210,14 +222,15 @@ int LanguageMethods::special_tangle_command(OUTPUT_STREAM, programming_language } @ The following methods make it possible for languages to tangle unorthodox -lines into code. Ordinarily, only |CODE_BODY_LCAT| lines are tangled, but +lines into code. Ordinarily, only [[CODE_BODY_LCAT]] lines are tangled, but we can intervene to say that we want to tangle a different line; and if we do so, we should then act on that basis. -@e WILL_TANGLE_EXTRA_LINE_TAN_MTID -@e TANGLE_EXTRA_LINE_TAN_MTID +<<*>>= +enum WILL_TANGLE_EXTRA_LINE_TAN_MTID +enum TANGLE_EXTRA_LINE_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(WILL_TANGLE_EXTRA_LINE_TAN_MTID, programming_language *pl, source_line *L) VOID_METHOD_TYPE(TANGLE_EXTRA_LINE_TAN_MTID, programming_language *pl, text_stream *OUT, source_line *L) int LanguageMethods::will_insert_in_tangle(programming_language *pl, source_line *L) { @@ -231,12 +244,13 @@ void LanguageMethods::insert_in_tangle(OUTPUT_STREAM, programming_language *pl, @ In order for C compilers to report C syntax errors on the correct line, despite rearranging by automatic tools, C conventionally recognises the -preprocessor directive |#line| to tell it that a contiguous extract follows +preprocessor directive [[#line]] to tell it that a contiguous extract follows from the given file; we generate this automatically. -@e INSERT_LINE_MARKER_TAN_MTID +<<*>>= +enum INSERT_LINE_MARKER_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(INSERT_LINE_MARKER_TAN_MTID, programming_language *pl, text_stream *OUT, source_line *L) void LanguageMethods::insert_line_marker(OUTPUT_STREAM, programming_language *pl, source_line *L) { VOID_METHOD_CALL(pl, INSERT_LINE_MARKER_TAN_MTID, OUT, L); @@ -244,12 +258,13 @@ void LanguageMethods::insert_line_marker(OUTPUT_STREAM, programming_language *pl @ The following hooks are provided so that we can top and/or tail the expansion of paragraph macros in the code. For example, C-like languages, use this to -splice |{| and |}| around the expanded matter. +splice [[{]] and [[}]] around the expanded matter. -@e BEFORE_MACRO_EXPANSION_TAN_MTID -@e AFTER_MACRO_EXPANSION_TAN_MTID +<<*>>= +enum BEFORE_MACRO_EXPANSION_TAN_MTID +enum AFTER_MACRO_EXPANSION_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(BEFORE_MACRO_EXPANSION_TAN_MTID, programming_language *pl, text_stream *OUT, para_macro *pmac) VOID_METHOD_TYPE(AFTER_MACRO_EXPANSION_TAN_MTID, programming_language *pl, text_stream *OUT, para_macro *pmac) void LanguageMethods::before_macro_expansion(OUTPUT_STREAM, programming_language *pl, para_macro *pmac) { @@ -261,12 +276,13 @@ void LanguageMethods::after_macro_expansion(OUTPUT_STREAM, programming_language @ It's a sad necessity, but sometimes we have to unconditionally tangle code for a preprocessor to conditionally read: that is, to tangle code which contains -|#ifdef| or similar preprocessor directive. +[[#ifdef]] or similar preprocessor directive. -@e OPEN_IFDEF_TAN_MTID -@e CLOSE_IFDEF_TAN_MTID +<<*>>= +enum OPEN_IFDEF_TAN_MTID +enum CLOSE_IFDEF_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(OPEN_IFDEF_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *symbol, int sense) VOID_METHOD_TYPE(CLOSE_IFDEF_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *symbol, int sense) void LanguageMethods::open_ifdef(OUTPUT_STREAM, programming_language *pl, text_stream *symbol, int sense) { @@ -278,9 +294,10 @@ void LanguageMethods::close_ifdef(OUTPUT_STREAM, programming_language *pl, text_ @ Now a routine to tangle a comment. Languages without comment should write nothing. -@e COMMENT_TAN_MTID +<<*>>= +enum COMMENT_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(COMMENT_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *comm) void LanguageMethods::comment(OUTPUT_STREAM, programming_language *pl, text_stream *comm) { VOID_METHOD_CALL(pl, COMMENT_TAN_MTID, OUT, comm); @@ -288,11 +305,12 @@ void LanguageMethods::comment(OUTPUT_STREAM, programming_language *pl, text_stre @ The inner code tangler now acts on all code known not to contain CWEB macros or double-square substitutions. In almost every language this simply -passes the code straight through, printing |original| to |OUT|. +passes the code straight through, printing [[original]] to [[OUT]]. -@e TANGLE_LINE_UNUSUALLY_TAN_MTID +<<*>>= +enum TANGLE_LINE_UNUSUALLY_TAN_MTID -= +<<*>>= INT_METHOD_TYPE(TANGLE_LINE_UNUSUALLY_TAN_MTID, programming_language *pl, text_stream *OUT, text_stream *original) void LanguageMethods::tangle_line(OUTPUT_STREAM, programming_language *pl, text_stream *original) { int rv = FALSE; @@ -302,9 +320,10 @@ void LanguageMethods::tangle_line(OUTPUT_STREAM, programming_language *pl, text_ @ We finally reach the bottom of the tangled file, a footer called the "gnabehs": -@e GNABEHS_TAN_MTID +<<*>>= +enum GNABEHS_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(GNABEHS_TAN_MTID, programming_language *pl, text_stream *OUT, web *W) void LanguageMethods::gnabehs(OUTPUT_STREAM, programming_language *pl, web *W) { VOID_METHOD_CALL(pl, GNABEHS_TAN_MTID, OUT, W); @@ -314,21 +333,23 @@ void LanguageMethods::gnabehs(OUTPUT_STREAM, programming_language *pl, web *W) { sidekick files alongside the main tangle file. This method exists to give them the opportunity. -@e ADDITIONAL_TANGLING_TAN_MTID +<<*>>= +enum ADDITIONAL_TANGLING_TAN_MTID -= +<<*>>= VOID_METHOD_TYPE(ADDITIONAL_TANGLING_TAN_MTID, programming_language *pl, web *W, tangle_target *target) void LanguageMethods::additional_tangling(programming_language *pl, web *W, tangle_target *target) { VOID_METHOD_CALL(pl, ADDITIONAL_TANGLING_TAN_MTID, W, target); } -@h Weaving methods. +@ \section{Weaving methods.} This metnod shouldn't do any actual weaving: it should simply initialise anything that the language in question might need later. -@e BEGIN_WEAVE_WEA_MTID +<<*>>= +enum BEGIN_WEAVE_WEA_MTID -= +<<*>>= VOID_METHOD_TYPE(BEGIN_WEAVE_WEA_MTID, programming_language *pl, section *S, weave_order *wv) void LanguageMethods::begin_weave(section *S, weave_order *wv) { VOID_METHOD_CALL(S->sect_language, BEGIN_WEAVE_WEA_MTID, S, wv); @@ -336,9 +357,10 @@ void LanguageMethods::begin_weave(section *S, weave_order *wv) { @ This method allows languages to tell the weaver to ignore certain lines. -@e SKIP_IN_WEAVING_WEA_MTID +<<*>>= +enum SKIP_IN_WEAVING_WEA_MTID -= +<<*>>= INT_METHOD_TYPE(SKIP_IN_WEAVING_WEA_MTID, programming_language *pl, weave_order *wv, source_line *L) int LanguageMethods::skip_in_weaving(programming_language *pl, weave_order *wv, source_line *L) { int rv = FALSE; @@ -351,9 +373,10 @@ a comment, inside qupted text, and so on); the following method is provided to reset that state, if so. Inweb runs it once per paragraph for safety's sake, which minimises the knock-on effect of any colouring mistakes. -@e RESET_SYNTAX_COLOURING_WEA_MTID +<<*>>= +enum RESET_SYNTAX_COLOURING_WEA_MTID -= +<<*>>= VOID_METHOD_TYPE(RESET_SYNTAX_COLOURING_WEA_MTID, programming_language *pl) void LanguageMethods::reset_syntax_colouring(programming_language *pl) { VOID_METHOD_CALL_WITHOUT_ARGUMENTS(pl, RESET_SYNTAX_COLOURING_WEA_MTID); @@ -361,9 +384,10 @@ void LanguageMethods::reset_syntax_colouring(programming_language *pl) { @ And this is where colouring is done. -@e SYNTAX_COLOUR_WEA_MTID +<<*>>= +enum SYNTAX_COLOUR_WEA_MTID -= +<<*>>= int colouring_state = PLAIN_COLOUR; INT_METHOD_TYPE(SYNTAX_COLOUR_WEA_MTID, programming_language *pl, @@ -386,13 +410,14 @@ int LanguageMethods::syntax_colour(programming_language *pl, return rv; } -@ This method is called for each code line to be woven. If it returns |FALSE|, the +@ This method is called for each code line to be woven. If it returns [[FALSE]], the weaver carries on in the normal way. If not, it does nothing, assuming that the method has already woven something more attractive. -@e WEAVE_CODE_LINE_WEA_MTID +<<*>>= +enum WEAVE_CODE_LINE_WEA_MTID -= +<<*>>= INT_METHOD_TYPE(WEAVE_CODE_LINE_WEA_MTID, programming_language *pl, text_stream *OUT, weave_order *wv, web *W, chapter *C, section *S, source_line *L, text_stream *matter, text_stream *concluding_comment) int LanguageMethods::weave_code_line(OUTPUT_STREAM, programming_language *pl, weave_order *wv, @@ -402,11 +427,12 @@ int LanguageMethods::weave_code_line(OUTPUT_STREAM, programming_language *pl, we return rv; } -@ When Inweb creates a new |^"Theme"|, it lets everybody know about that. +@ When Inweb creates a new [[^"Theme"]], it lets everybody know about that. -@e NOTIFY_NEW_TAG_WEA_MTID +<<*>>= +enum NOTIFY_NEW_TAG_WEA_MTID -= +<<*>>= VOID_METHOD_TYPE(NOTIFY_NEW_TAG_WEA_MTID, programming_language *pl, theme_tag *tag) void LanguageMethods::new_tag_declared(theme_tag *tag) { programming_language *pl; @@ -414,7 +440,7 @@ void LanguageMethods::new_tag_declared(theme_tag *tag) { VOID_METHOD_CALL(pl, NOTIFY_NEW_TAG_WEA_MTID, tag); } -@h Analysis methods. +@ \section{Analysis methods.} These are really a little miscellaneous, but they all have to do with looking at the code in a web and working out what's going on, rather than producing any weave or tangle output. @@ -425,10 +451,11 @@ are called first and last in the process, respectively. (What happens in between is essentially that Inweb looks for identifiers, for later syntax colouring purposes.) -@e ANALYSIS_ANA_MTID -@e POST_ANALYSIS_ANA_MTID +<<*>>= +enum ANALYSIS_ANA_MTID +enum POST_ANALYSIS_ANA_MTID -= +<<*>>= VOID_METHOD_TYPE(ANALYSIS_ANA_MTID, programming_language *pl, web *W) VOID_METHOD_TYPE(POST_ANALYSIS_ANA_MTID, programming_language *pl, web *W) void LanguageMethods::early_preweave_analysis(programming_language *pl, web *W) { @@ -441,9 +468,10 @@ void LanguageMethods::late_preweave_analysis(programming_language *pl, web *W) { @ And finally: in InC only, a few structure element names are given very slightly special treatment, and this method decides which. -@e SHARE_ELEMENT_ANA_MTID +<<*>>= +enum SHARE_ELEMENT_ANA_MTID -= +<<*>>= INT_METHOD_TYPE(SHARE_ELEMENT_ANA_MTID, programming_language *pl, text_stream *element_name) int LanguageMethods::share_element(programming_language *pl, text_stream *element_name) { int rv = FALSE; @@ -451,9 +479,9 @@ int LanguageMethods::share_element(programming_language *pl, text_stream *elemen return rv; } -@h What we support. +@ \section{What we support.} -= +<<*>>= int LanguageMethods::supports_definitions(programming_language *pl) { if (Str::len(pl->start_definition) > 0) return TRUE; if (Str::len(pl->prolong_definition) > 0) return TRUE; diff --git a/Chapter_4/Programming Languages.w b/Chapter_4/Programming_Languages.nw similarity index 86% rename from Chapter_4/Programming Languages.w rename to Chapter_4/Programming_Languages.nw index 8688cfa..b243899 100644 --- a/Chapter_4/Programming Languages.w +++ b/Chapter_4/Programming_Languages.nw @@ -3,34 +3,34 @@ Defining the programming languages supported by Inweb, loading in their definitions from files. -@h Languages. -Programming languages are identified by name: for example, |C++| or |Perl|. +@ \section{Languages.} +Programming languages are identified by name: for example, [[C++]] or [[Perl]]. -@ = +<<*>>= programming_language *Languages::find_by_name(text_stream *lname, web *W, int error_if_not_found) { programming_language *pl; - @; - @; + <>; + <>; if (Str::ne(pl->language_name, lname)) Errors::fatal_with_text( "definition of programming language '%S' is for something else", lname); return pl; } -@ = +<>= LOOP_OVER(pl, programming_language) if (Str::eq(lname, pl->language_name)) return pl; -@ = +<>= filename *F = NULL; if (W) { pathname *P = Pathnames::down(W->md->path_to_web, I"Dialects"); - @; + <>; } pathname *P = Languages::default_directory(); - @; + <>; if (F == NULL) { if (error_if_not_found) Errors::fatal_with_text( @@ -39,7 +39,7 @@ programming_language *Languages::find_by_name(text_stream *lname, web *W, } pl = Languages::read_definition(F); -@ = +<>= if (F == NULL) { TEMPORARY_TEXT(leaf) WRITE_TO(leaf, "%S.ildf", lname); @@ -50,7 +50,7 @@ programming_language *Languages::find_by_name(text_stream *lname, web *W, @ I'm probably showing my age here. -= +<<*>>= programming_language *Languages::default(web *W) { return Languages::find_by_name(I"C", W, TRUE); } @@ -71,7 +71,7 @@ void Languages::show(OUTPUT_STREAM) { Memory::I7_free(sorted_table, ARRAY_SORTING_MREASON, N*((int) sizeof(programming_language *))); } -@ = +<<*>>= int Languages::compare_names(const void *ent1, const void *ent2) { text_stream *tx1 = (*((const programming_language **) ent1))->language_name; text_stream *tx2 = (*((const programming_language **) ent2))->language_name; @@ -80,7 +80,7 @@ int Languages::compare_names(const void *ent1, const void *ent2) { @ We can read every language in a directory: -= +<<*>>= void Languages::read_definitions(pathname *P) { if (P == NULL) P = Languages::default_directory(); scan_directory *D = Directories::open(P); @@ -102,7 +102,7 @@ pathname *Languages::default_directory(void) { @ So, then, languages are defined by files which are read in, and parsed into the following structure (one per language): -= +<<*>>= typedef struct programming_language { text_stream *language_name; /* identifies it: see above */ @@ -139,20 +139,20 @@ typedef struct programming_language { int suppress_disclaimer; int C_like; /* languages with this set have access to extra features */ - struct linked_list *reserved_words; /* of |reserved_word| */ + struct linked_list *reserved_words; /* of [[reserved_word]] */ struct hash_table built_in_keywords; struct colouring_language_block *program; /* algorithm for syntax colouring */ struct method_set *methods; CLASS_DEFINITION } programming_language; -@ This is a simple one-pass compiler. The |language_reader_state| provides +@ This is a simple one-pass compiler. The [[language_reader_state]] provides the only state preserved as we work through line by line, except of course -that we are also working on the programming language it is |defining|. The -|current_block| is the braced block of colouring instructions we are +that we are also working on the programming language it is [[defining]]. The +[[current_block]] is the braced block of colouring instructions we are currently inside. -= +<<*>>= typedef struct language_reader_state { struct programming_language *defining; struct colouring_language_block *current_block; @@ -160,17 +160,17 @@ typedef struct language_reader_state { programming_language *Languages::read_definition(filename *F) { programming_language *pl = CREATE(programming_language); - @; + <>; language_reader_state lrs; lrs.defining = pl; lrs.current_block = NULL; TextFiles::read(F, FALSE, "can't open programming language definition file", TRUE, Languages::read_definition_line, NULL, (void *) &lrs); - @; + <>; return pl; } -@ = +<>= pl->language_name = NULL; pl->file_extension = NULL; pl->supports_namespaces = FALSE; @@ -212,35 +212,35 @@ itself C-like has functionality for function and structure definitions; the language whose name is InC gets even more, without having to ask. Languages have effect through their method calls, which is how those -extra features are provided. The call to |ACMESupport::add_fallbacks| +extra features are provided. The call to [[ACMESupport::add_fallbacks]] adds generic method calls to give effect to the settings in the definition. -@ = +<>= if (pl->C_like) CLike::make_c_like(pl); if (Str::eq(pl->language_name, I"InC")) InCSupport::add_features(pl); ACMESupport::add_fallbacks(pl); @ So, then, the above reads the file and feeds it line by line to this: -= +<<*>>= void Languages::read_definition_line(text_stream *line, text_file_position *tfp, void *v_state) { language_reader_state *state = (language_reader_state *) v_state; programming_language *pl = state->defining; Str::trim_white_space(line); /* ignore trailing space */ if (Str::len(line) == 0) return; /* ignore blank lines */ - if (Str::get_first_char(line) == '#') return; /* lines opening with |#| are comments */ + if (Str::get_first_char(line) == '#') return; /* lines opening with [[#]] are comments */ match_results mr = Regexp::create_mr(); - if (state->current_block) @ - else @; + if (state->current_block) <> + else <>; Regexp::dispose_of(&mr); } @ Outside a colouring program, you can do three things: start a program, declare a reserved keyword, or set a key to a value. -@ = +<>= if (Regexp::match(&mr, line, L"colouring {")) { if (pl->program) Errors::in_text_file("duplicate colouring program", tfp); pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN); @@ -323,7 +323,7 @@ declare a reserved keyword, or set a key to a value. the entire program), open a new block to apply to each character or to runs of a given colour, or give an if-X-then-Y rule: -@ = +<>= if (Str::eq(line, I"}")) { state->current_block = state->current_block->parent; } else if (Regexp::match(&mr, line, L"characters {")) { @@ -379,32 +379,33 @@ runs of a given colour, or give an if-X-then-Y rule: } } -@h Blocks. -These are code blocks of colouring instructions. A block whose |parent| is |NULL| +@ \section{Blocks.} +These are code blocks of colouring instructions. A block whose [[parent]] is [[NULL]] represents a complete program. -@d WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */ -@d CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */ -@d CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */ -@d INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */ -@d MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */ -@d BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */ +<<*>>= +#define WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */ +#define CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */ +#define CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */ +#define INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */ +#define MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */ +#define BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */ -= +<<*>>= typedef struct colouring_language_block { - struct linked_list *rules; /* of |colouring_rule| */ - struct colouring_language_block *parent; /* or |NULL| for the topmost one */ - int run; /* one of the |*_CRULE_RUN| values, or else a colour */ - struct text_stream *run_instance; /* used only for |INSTANCES_CRULE_RUN| */ - struct text_stream *char_set; /* used only for |CHARACTERS_IN_CRULE_RUN| */ - wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for |MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN| */ + struct linked_list *rules; /* of [[colouring_rule]] */ + struct colouring_language_block *parent; /* or [[NULL]] for the topmost one */ + int run; /* one of the [[*_CRULE_RUN]] values, or else a colour */ + struct text_stream *run_instance; /* used only for [[INSTANCES_CRULE_RUN]] */ + struct text_stream *char_set; /* used only for [[CHARACTERS_IN_CRULE_RUN]] */ + wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for [[MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN]] */ /* workspace during painting */ struct match_results mr; /* of a regular expression */ CLASS_DEFINITION } colouring_language_block; -@ = +<<*>>= colouring_language_block *Languages::new_block(colouring_language_block *within, int r) { colouring_language_block *block = CREATE(colouring_language_block); block->rules = NEW_LINKED_LIST(colouring_rule); @@ -417,37 +418,38 @@ colouring_language_block *Languages::new_block(colouring_language_block *within, return block; } -@h Colouring Rules. +@ \section{Colouring Rules.} Each individual rule has the form: if a premiss, then a conclusion. It will be applied to a snippet of text, and the premiss can test that, together with a little context before it (where available). Note that rules can be unconditional, in that the premiss always passes. -@d NOT_A_RULE_PREFIX 1 /* this isn't a prefix rule */ -@d UNSPACED_RULE_PREFIX 2 /* for |prefix P| */ -@d SPACED_RULE_PREFIX 3 /* for |spaced prefix P| */ -@d OPTIONALLY_SPACED_RULE_PREFIX 4 /* for |optionally spaced prefix P| */ -@d UNSPACED_RULE_SUFFIX 5 /* for |suffix P| */ -@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */ -@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */ +<<*>>= +#define NOT_A_RULE_PREFIX 1 /* this isn't a prefix rule */ +#define UNSPACED_RULE_PREFIX 2 /* for [[prefix P]] */ +#define SPACED_RULE_PREFIX 3 /* for [[spaced prefix P]] */ +#define OPTIONALLY_SPACED_RULE_PREFIX 4 /* for [[optionally spaced prefix P]] */ +#define UNSPACED_RULE_SUFFIX 5 /* for [[suffix P]] */ +#define SPACED_RULE_SUFFIX 6 /* for [[spaced suffix P]] */ +#define OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for [[optionally spaced suffix P]] */ -@d MAX_ILDF_REGEXP_LENGTH 64 +#define MAX_ILDF_REGEXP_LENGTH 64 -= +<<*>>= typedef struct colouring_rule { /* the premiss: */ - int sense; /* |FALSE| to negate the condition */ - wchar_t match_colour; /* for |coloured C|, or else |NOT_A_COLOUR| */ - wchar_t match_keyword_of_colour; /* for |keyword C|, or else |NOT_A_COLOUR| */ + int sense; /* [[FALSE]] to negate the condition */ + wchar_t match_colour; /* for [[coloured C|, or else |NOT_A_COLOUR]] */ + wchar_t match_keyword_of_colour; /* for [[keyword C|, or else |NOT_A_COLOUR]] */ struct text_stream *match_text; /* or length 0 to mean "anything" */ - int match_prefix; /* one of the |*_RULE_PREFIX| values above */ + int match_prefix; /* one of the [[*_RULE_PREFIX]] values above */ wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; - int number; /* for |number N| rules; 0 for others */ - int number_of; /* for |number N of M| rules; 0 for others */ + int number; /* for [[number N]] rules; 0 for others */ + int number_of; /* for [[number N of M]] rules; 0 for others */ /* the conclusion: */ - struct colouring_language_block *execute_block; /* or |NULL|, in which case... */ + struct colouring_language_block *execute_block; /* or [[NULL]], in which case... */ wchar_t set_to_colour; /* ...paint the snippet in this colour */ wchar_t set_prefix_to_colour; /* ...also paint this (same for suffix) */ int debug; /* ...or print debugging text to console */ @@ -458,7 +460,7 @@ typedef struct colouring_rule { CLASS_DEFINITION } colouring_rule; -@ = +<<*>>= colouring_rule *Languages::new_rule(colouring_language_block *within) { if (within == NULL) internal_error("rule outside block"); colouring_rule *rule = CREATE(colouring_rule); @@ -482,18 +484,18 @@ colouring_rule *Languages::new_rule(colouring_language_block *within) { return rule; } -@ = +<<*>>= void Languages::parse_rule(language_reader_state *state, text_stream *premiss, text_stream *action, text_file_position *tfp) { match_results mr = Regexp::create_mr(); colouring_rule *rule = Languages::new_rule(state->current_block); Str::trim_white_space(premiss); Str::trim_white_space(action); - @; - @; + <>; + <>; Regexp::dispose_of(&mr); } -@ = +<>= while (Regexp::match(&mr, premiss, L"not (%c+)")) { rule->sense = (rule->sense)?FALSE:TRUE; Str::clear(premiss); Str::copy(premiss, mr.exp[0]); @@ -533,7 +535,7 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss, rule->match_text = Languages::text(premiss, tfp, FALSE); } -@ = +<>= if (Str::eq(action, I"{")) { rule->execute_block = Languages::new_block(state->current_block, WHOLE_LINE_CRULE_RUN); @@ -553,10 +555,10 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss, Errors::in_text_file("action after '=>' illegible", tfp); } -@h Reserved words. -Note that these can come in any colour, though usually it's |!reserved|. +@ \section{Reserved words.} +Note that these can come in any colour, though usually it's [[!reserved]]. -= +<<*>>= typedef struct reserved_word { struct text_stream *word; int colour; @@ -578,28 +580,29 @@ reserved_word *Languages::reserved(programming_language *pl, text_stream *W, wch return rw; } -@h Expressions. +@ \section{Expressions.} Language definition files have three types of data: colours, booleans, and text. Colours first. Note that there are two pseudo-colours used above, but which are not expressible in the syntax of this file. -@d DEFINITION_COLOUR 'd' -@d FUNCTION_COLOUR 'f' -@d RESERVED_COLOUR 'r' -@d ELEMENT_COLOUR 'e' -@d IDENTIFIER_COLOUR 'i' -@d CHARACTER_COLOUR 'c' -@d CONSTANT_COLOUR 'n' -@d STRING_COLOUR 's' -@d PLAIN_COLOUR 'p' -@d EXTRACT_COLOUR 'x' -@d COMMENT_COLOUR '!' -@d NEWLINE_COLOUR '\n' +<<*>>= +#define DEFINITION_COLOUR 'd' +#define FUNCTION_COLOUR 'f' +#define RESERVED_COLOUR 'r' +#define ELEMENT_COLOUR 'e' +#define IDENTIFIER_COLOUR 'i' +#define CHARACTER_COLOUR 'c' +#define CONSTANT_COLOUR 'n' +#define STRING_COLOUR 's' +#define PLAIN_COLOUR 'p' +#define EXTRACT_COLOUR 'x' +#define COMMENT_COLOUR '!' +#define NEWLINE_COLOUR '\n' -@d NOT_A_COLOUR ' ' -@d UNQUOTED_COLOUR '_' +#define NOT_A_COLOUR ' ' +#define UNQUOTED_COLOUR '_' -= +<<*>>= wchar_t Languages::colour(text_stream *T, text_file_position *tfp) { if (Str::get_first_char(T) != '!') { Errors::in_text_file("colour names must begin with !", tfp); @@ -622,9 +625,9 @@ wchar_t Languages::colour(text_stream *T, text_file_position *tfp) { } } -@ A boolean must be written as |true| or |false|. +@ A boolean must be written as [[true]] or [[false]]. -= +<<*>>= int Languages::boolean(text_stream *T, text_file_position *tfp) { if (Str::eq(T, I"true")) return TRUE; else if (Str::eq(T, I"false")) return FALSE; @@ -634,11 +637,11 @@ int Languages::boolean(text_stream *T, text_file_position *tfp) { } } -@ In text, |\n| represents a newline, |\s| a space and |\t| a tab. Spaces -can be given in the ordinary way inside a text in any case. |\\| is a +@ In text, [[\n]] represents a newline, [[\s]] a space and [[\t]] a tab. Spaces +can be given in the ordinary way inside a text in any case. [[\\]] is a literal backslash. -= +<<*>>= text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow) { text_stream *V = Str::new(); if (Str::len(T) > 0) { @@ -725,7 +728,7 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow) @ And regular expressions. -= +<<*>>= void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) { if (write_to == NULL) internal_error("no buffer"); write_to[0] = 0; diff --git a/Chapter_4/The Painter.w b/Chapter_4/The_Painter.nw similarity index 90% rename from Chapter_4/The Painter.w rename to Chapter_4/The_Painter.nw index 3560c76..1ea2405 100644 --- a/Chapter_4/The Painter.w +++ b/Chapter_4/The_Painter.nw @@ -3,38 +3,38 @@ A simple syntax-colouring engine. @ This is a very simple syntax colouring algorithm. The work is done by the -function |Painter::syntax_colour|, which can in principle be applied to texts +function [[Painter::syntax_colour]], which can in principle be applied to texts of any length. But it's usually convenient to run it on a long file one line -at a time, so that it is called repeatedly. The variable |colouring_state| +at a time, so that it is called repeatedly. The variable [[colouring_state]] remembers where we were at the end of the previous line, so that we can pick up again later at the start of the next. Because of that, we need to call the following before we begin a run of calls -to |Painter::syntax_colour|: +to [[Painter::syntax_colour]]: -= +<<*>>= int painter_count = 1; void Painter::reset_syntax_colouring(programming_language *pl) { colouring_state = PLAIN_COLOUR; painter_count = 1; } -@ As we begin, the text to colour is in |matter|, while |colouring| is an +@ As we begin, the text to colour is in [[matter]], while [[colouring]] is an equal-length text where each character represents the colour of its -corresponding character in |matter|. For example, we might start as: -= (text as PainterOutput) +corresponding character in [[matter]]. For example, we might start as: + int x = 55; ppppppppppp -= -with every character having |PLAIN_COLOUR|, but end up with: -= (text as PainterOutput) + +with every character having [[PLAIN_COLOUR]], but end up with: + int x = 55; rrrpipppnnp -= + We get to that by using a language's rules on literals, and then executing its colouring program. -= +<<*>>= int Painter::syntax_colour(programming_language *pl, hash_table *HT, text_stream *matter, text_stream *colouring, int with_comments) { int from = 0, to = Str::len(matter) - 1; @@ -57,12 +57,12 @@ int Painter::syntax_colour(programming_language *pl, void Painter::syntax_colour_inner(programming_language *pl, hash_table *HT, text_stream *matter, text_stream *colouring, int from, int to) { - @; - @; - @; + <>; + <>; + <>; } -@ = +<>= int squote = Str::get_first_char(pl->character_literal); int squote_escape = Str::get_first_char(pl->character_literal_escape); int dquote = Str::get_first_char(pl->string_literal); @@ -106,7 +106,7 @@ void Painter::syntax_colour_inner(programming_language *pl, } } -@ = +<>= int base = -1, dec_possible = TRUE; for (int i=from; i <= to; i++) { if ((Str::get_at(colouring, i) == PLAIN_COLOUR) || @@ -148,8 +148,8 @@ void Painter::syntax_colour_inner(programming_language *pl, case 10: if (Characters::isdigit(c)) pass = TRUE; break; case 16: if (Characters::isdigit(c)) pass = TRUE; int d = Characters::tolower(c); - if ((d == 'a') || (d == 'b') || (d == 'c') || - (d == 'd') || (d == 'e') || (d == 'f')) pass = TRUE; + if ((d == 'a') [[| (d == 'b') || (d == 'c') |]] + (d == 'd') [[| (d == 'e') |]] (d == 'f')) pass = TRUE; break; } if (pass) { @@ -165,9 +165,9 @@ void Painter::syntax_colour_inner(programming_language *pl, @ For the moment, we always adopt the C rules on identifiers: they have to begin with an underscore or letter, then continue with underscores or alphanumeric characters, except that if the language allows it then they -can contain a |::| namespace divider. +can contain a [[::]] namespace divider. -= +<<*>>= int Painter::identifier_at(programming_language *pl, text_stream *matter, text_stream *colouring, int i) { wchar_t c = Str::get_at(matter, i); @@ -191,7 +191,7 @@ int Painter::identifier_at(programming_language *pl, @ With those preliminaries out of the way, the language's colouring program takes over. -@ = +<>= if (pl->program) Painter::execute(HT, pl->program, matter, colouring, from, to, painter_count++); @@ -200,7 +200,7 @@ whole snippet of text, or each character on its own, or each run of characters of a given sort. Note that we work width-first, as it were: we complete each rule across the whole snippet before moving on to the next. -= +<<*>>= void Painter::execute(hash_table *HT, colouring_language_block *block, text_stream *matter, text_stream *colouring, int from, int to, int N) { if (block == NULL) internal_error("no block"); @@ -281,7 +281,7 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre @ Rules have the form: if X, then Y. -= +<<*>>= void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *matter, text_stream *colouring, int from, int to, int N) { if (Painter::satisfies(HT, rule, matter, colouring, from, to, N) == rule->sense) @@ -290,14 +290,15 @@ void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *ma @ Here we test the "if X": -@d UNSPACED_RULE_PREFIX 2 /* for |prefix P| */ -@d SPACED_RULE_PREFIX 3 /* for |spaced prefix P| */ -@d OPTIONALLY_SPACED_RULE_PREFIX 4 /* for |optionally spaced prefix P| */ -@d UNSPACED_RULE_SUFFIX 5 /* for |suffix P| */ -@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */ -@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */ +<<*>>= +#define UNSPACED_RULE_PREFIX 2 /* for [[prefix P]] */ +#define SPACED_RULE_PREFIX 3 /* for [[spaced prefix P]] */ +#define OPTIONALLY_SPACED_RULE_PREFIX 4 /* for [[optionally spaced prefix P]] */ +#define UNSPACED_RULE_SUFFIX 5 /* for [[suffix P]] */ +#define SPACED_RULE_SUFFIX 6 /* for [[spaced suffix P]] */ +#define OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for [[optionally spaced suffix P]] */ -= +<<*>>= int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter, text_stream *colouring, int from, int to, int N) { if (rule->number > 0) { @@ -361,12 +362,12 @@ int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter @ And here we carry out the "then Y": -= +<<*>>= void Painter::follow(hash_table *HT, colouring_rule *rule, text_stream *matter, text_stream *colouring, int from, int to) { if (rule->execute_block) Painter::execute(HT, rule->execute_block, matter, colouring, from, to, 0); - else if (rule->debug) @ + else if (rule->debug) <> else { if (rule->set_to_colour != NOT_A_COLOUR) for (int i=from; i<=to; i++) @@ -377,7 +378,7 @@ void Painter::follow(hash_table *HT, colouring_rule *rule, text_stream *matter, } } -@ = +<>= PRINT("[%d, %d] text: ", from, to); for (int i=from; i<=to; i++) PUT_TO(STDOUT, Str::get_at(matter, i)); @@ -386,9 +387,9 @@ void Painter::follow(hash_table *HT, colouring_rule *rule, text_stream *matter, PUT_TO(STDOUT, Str::get_at(colouring, i)); PRINT("\n"); -@h Painting a file. +@ \section{Painting a file.} -= +<<*>>= linked_list *Painter::lines(filename *F) { linked_list *L = NEW_LINKED_LIST(text_stream); TextFiles::read(F, FALSE, "unable to read file of textual extract", TRUE, diff --git a/Chapter_4/Types and Functions.w b/Chapter_4/Types_and_Functions.nw similarity index 80% rename from Chapter_4/Types and Functions.w rename to Chapter_4/Types_and_Functions.nw index f8afb1a..98b9b4c 100644 --- a/Chapter_4/Types and Functions.w +++ b/Chapter_4/Types_and_Functions.nw @@ -2,33 +2,33 @@ Basic support for languages to recognise structure and function declarations. -@ For each |typedef struct| we find, we will make one of these: +@ For each [[typedef struct]] we find, we will make one of these: -= +<<*>>= typedef struct language_type { struct text_stream *structure_name; int tangled; /* whether the structure definition has been tangled out */ - struct source_line *structure_header_at; /* opening line of |typedef| */ - struct source_line *typedef_ends; /* closing line, where |}| appears */ - struct linked_list *incorporates; /* of |language_type| */ - struct linked_list *elements; /* of |structure_element| */ + struct source_line *structure_header_at; /* opening line of [[typedef]] */ + struct source_line *typedef_ends; /* closing line, where [[}]] appears */ + struct linked_list *incorporates; /* of [[language_type]] */ + struct linked_list *elements; /* of [[structure_element]] */ struct language_type *next_cst_alphabetically; CLASS_DEFINITION } language_type; -@ = +<<*>>= language_type *first_cst_alphabetically = NULL; language_type *Functions::new_struct(web *W, text_stream *name, source_line *L) { language_type *str = CREATE(language_type); - @; + <>; Analyser::mark_reserved_word_at_line(L, str->structure_name, RESERVED_COLOUR); - @; - @; + <>; + <>; return str; } -@ = +<>= str->structure_name = Str::duplicate(name); str->structure_header_at = L; str->tangled = FALSE; @@ -36,12 +36,12 @@ language_type *Functions::new_struct(web *W, text_stream *name, source_line *L) str->incorporates = NEW_LINKED_LIST(language_type); str->elements = NEW_LINKED_LIST(structure_element); -@ = +<>= Tags::add_by_name(L->owning_paragraph, I"Structures"); ADD_TO_LINKED_LIST(str, language_type, W->language_types); ADD_TO_LINKED_LIST(str, language_type, L->owning_paragraph->structures); -@ = +<>= str->next_cst_alphabetically = NULL; if (first_cst_alphabetically == NULL) first_cst_alphabetically = str; else { @@ -65,16 +65,16 @@ language_type *Functions::new_struct(web *W, text_stream *name, source_line *L) if (placed == FALSE) last->next_cst_alphabetically = str; } -@ A language can also create an instance of |structure_element| to record the -existence of the element |val|, and add it to the linked list of elements of +@ A language can also create an instance of [[structure_element]] to record the +existence of the element [[val]], and add it to the linked list of elements of the structure being defined. In InC, only, certain element names used often in Inform's source code are -given mildly special treatment. This doesn't amount to much. |allow_sharing| +given mildly special treatment. This doesn't amount to much. [[allow_sharing]] has no effect on tangling, so it doesn't change the program. It simply affects the reports in the woven code about where structures are used. -= +<<*>>= typedef struct structure_element { struct text_stream *element_name; struct source_line *element_created_at; @@ -82,7 +82,7 @@ typedef struct structure_element { CLASS_DEFINITION } structure_element; -@ = +<<*>>= structure_element *Functions::new_element(language_type *str, text_stream *elname, source_line *L) { Analyser::mark_reserved_word_at_line(L, elname, ELEMENT_COLOUR); @@ -96,7 +96,7 @@ structure_element *Functions::new_element(language_type *str, text_stream *elnam return elt; } -@ = +<<*>>= language_type *Functions::find_structure(web *W, text_stream *name) { language_type *str; LOOP_OVER_LINKED_LIST(str, language_type, W->language_types) @@ -105,14 +105,14 @@ language_type *Functions::find_structure(web *W, text_stream *name) { return NULL; } -@h Functions. +@ \section{Functions.} Each function definition found results in one of these structures being made: -= +<<*>>= typedef struct language_function { - struct text_stream *function_name; /* e.g., |"cultivate"| */ - struct text_stream *function_type; /* e.g., |"tree *"| */ - struct text_stream *function_arguments; /* e.g., |"int rainfall)"|: note |)| */ + struct text_stream *function_name; /* e.g., [["cultivate"]] */ + struct text_stream *function_type; /* e.g., [["tree *"]] */ + struct text_stream *function_arguments; /* e.g., [["int rainfall)"|: note |)]] */ struct source_line *function_header_at; /* where the first line of the header begins */ int within_namespace; /* written using InC namespace dividers */ int called_from_other_sections; @@ -123,23 +123,23 @@ typedef struct language_function { CLASS_DEFINITION } language_function; -@ = +<<*>>= language_function *Functions::new_function(text_stream *fname, source_line *L) { hash_table_entry *hte = Analyser::mark_reserved_word_at_line(L, fname, FUNCTION_COLOUR); language_function *fn = CREATE(language_function); hte->as_function = fn; - @; - @; + <>; + <>; if (L->owning_section->sect_language->supports_namespaces) - @; + <>; return fn; } @ Note that we take a snapshot of the conditional compilation stack as part of the function structure. We'll need it when predeclaring the function. -@ = +<>= fn->function_name = Str::duplicate(fname); fn->function_arguments = Str::new(); fn->function_type = Str::new(); @@ -153,12 +153,12 @@ part of the function structure. We'll need it when predeclaring the function. fn->usage_described = TRUE; fn->no_conditionals = 0; -@ = +<>= paragraph *P = L->owning_paragraph; if (P) ADD_TO_LINKED_LIST(fn, language_function, P->functions); L->function_defined = fn; -@ = +<>= text_stream *declared_namespace = NULL; text_stream *ambient_namespace = L->owning_section->sect_namespace; match_results mr = Regexp::create_mr(); @@ -188,7 +188,7 @@ part of the function structure. We'll need it when predeclaring the function. @ "Elsewhere" here means "in a paragraph of code other than the one in which the function's definition appears". -= +<<*>>= int Functions::used_elsewhere(language_function *fn) { paragraph *P = fn->function_header_at->owning_paragraph; hash_table_entry *hte = @@ -205,11 +205,11 @@ int Functions::used_elsewhere(language_function *fn) { return FALSE; } -@h Cataloguing. -This implements the additional information in the |-structures| and |-functions| +@ \section{Cataloguing.} +This implements the additional information in the [[-structures]] and [[-functions]] forms of section catalogue. -= +<<*>>= void Functions::catalogue(section *S, int functions_too) { language_type *str; LOOP_OVER(str, language_type)